├── .gitignore ├── .gitmodules ├── .style.yapf ├── CONTRIBUTING.md ├── DATASET.md ├── DOCKER.md ├── Dockerfile ├── GETTING_STARTED.md ├── INSTALL.md ├── ISSUES.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── compile.sh ├── configs ├── C3D │ ├── c3d_sports1m_3d_rgb_vgg_c3d_seg1_f16s1.py │ └── c3d_train01_16_128_171_mean.npy ├── I3D_RGB │ ├── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py │ └── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2_video.py ├── SlowOnly │ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16_finetune.py │ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16_scratch.py │ ├── slowonly_kinetics400_se_rgb_r50_seg1_8x8_finetune.py │ └── slowonly_kinetics400_se_rgb_r50_seg1_8x8_scratch.py ├── TSN │ ├── tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py │ └── ucf101 │ │ ├── tsn_flow_bninception.py │ │ └── tsn_rgb_bninception.py ├── ava │ └── ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py ├── hmdb51 │ ├── tsn_flow_bninception.py │ └── tsn_rgb_bninception.py └── thumos14 │ └── ssn_thumos14_rgb_bn_inception.py ├── data └── .placeholder ├── data_tools ├── ava │ ├── PREPARING_AVA.md │ ├── download_annotations.sh │ ├── download_videos.sh │ ├── download_videos_parallel.sh │ ├── extract_frames.sh │ ├── extract_rgb_frames.sh │ ├── fetch_ava_proposals.sh │ ├── obtain_video_resolution.sh │ └── preprocess_videos.sh ├── build_file_list.py ├── build_rawframes.py ├── hmdb51 │ ├── PREPARING_HMDB51.md │ ├── download_annotations.sh │ ├── download_videos.sh │ ├── extract_frames.sh │ ├── extract_rgb_frames.sh │ └── generate_filelist.sh ├── kinetics400 │ ├── PREPARING_KINETICS400.md │ ├── download_annotations.sh │ ├── download_videos.sh │ ├── extract_frames.sh │ ├── extract_rgb_frames.sh │ ├── generate_rawframes_filelist.sh │ ├── generate_video_filelist.sh │ └── rename_classnames.sh ├── thumos14 │ ├── PREPARING_TH14.md │ ├── download_annotations.sh │ ├── download_videos.sh │ ├── extracted_frames.sh │ └── fetch_tag_proposals.sh └── ucf101 │ ├── PREPARING_UCF101.md │ ├── download_annotations.sh │ ├── download_videos.sh │ ├── extract_frames.sh │ └── generate_filelist.sh ├── mmaction ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor2d │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── anchor_target.py │ ├── bbox1d │ │ ├── __init__.py │ │ └── geometry.py │ ├── bbox2d │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── assign_result.py │ │ │ ├── base_assigner.py │ │ │ └── max_iou_assigner.py │ │ ├── bbox_target.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── ava_utils.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── localize_utils.py │ │ └── recall.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ └── dist_utils.py ├── datasets │ ├── __init__.py │ ├── ava_dataset.py │ ├── feature_dataset.py │ ├── lmdbframes_dataset.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── rawframes_dataset.py │ ├── ssn_dataset.py │ ├── transforms.py │ ├── utils.py │ └── video_dataset.py ├── losses │ ├── __init__.py │ ├── flow_losses.py │ ├── losses.py │ └── ssn_losses.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── localizers │ │ ├── SSN2D.py │ │ ├── __init__.py │ │ └── base.py │ ├── recognizers │ │ ├── TSN2D.py │ │ ├── TSN3D.py │ │ ├── __init__.py │ │ └── base.py │ ├── registry.py │ └── tenons │ │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ └── rpn_head.py │ │ ├── backbones │ │ ├── __init__.py │ │ ├── bninception.py │ │ ├── c3d.py │ │ ├── inception_v1_i3d.py │ │ ├── resnet.py │ │ ├── resnet_i3d.py │ │ ├── resnet_i3d_slowfast.py │ │ ├── resnet_r3d.py │ │ └── resnet_s3d.py │ │ ├── bbox_heads │ │ ├── __init__.py │ │ └── bbox_head.py │ │ ├── cls_heads │ │ ├── __init__.py │ │ ├── cls_head.py │ │ └── ssn_head.py │ │ ├── flownets │ │ ├── __init__.py │ │ └── motionnet.py │ │ ├── necks │ │ ├── __init__.py │ │ └── fpn.py │ │ ├── roi_extractors │ │ ├── __init__.py │ │ ├── single_level.py │ │ └── single_level_straight3d.py │ │ ├── segmental_consensuses │ │ ├── TODO.md │ │ ├── __init__.py │ │ ├── simple_consensus.py │ │ └── stpp.py │ │ ├── shared_heads │ │ ├── __init__.py │ │ ├── res_i3d_layer.py │ │ └── res_layer.py │ │ ├── spatial_temporal_modules │ │ ├── __init__.py │ │ ├── non_local.py │ │ ├── simple_spatial_module.py │ │ ├── simple_spatial_temporal_module.py │ │ └── slowfast_spatial_temporal_module.py │ │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── nonlocal_block.py │ │ ├── norm.py │ │ └── resnet_r3d_utils.py ├── ops │ ├── __init__.py │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ ├── setup.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ ├── nms_kernel.cu │ │ │ ├── soft_nms_cpu.cpp │ │ │ └── soft_nms_cpu.pyx │ ├── resample2d_package │ │ ├── __init__.py │ │ ├── resample2d.py │ │ ├── resample2d_cuda.cc │ │ ├── resample2d_kernel.cu │ │ ├── resample2d_kernel.cuh │ │ └── setup.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ └── trajectory_conv_package │ │ ├── __init__.py │ │ ├── deform_3d_conv_cuda_kernel.cu │ │ ├── deform_3d_conv_cuda_kernel.h │ │ ├── gradcheck.py │ │ ├── setup.py │ │ ├── traj_conv.py │ │ └── traj_conv_cuda.cpp └── utils │ └── misc.py ├── modelzoo └── .placeholder ├── setup.py ├── test_configs ├── CSN │ ├── ipcsn_kinetics400_se_rgb_r152_seg1_32x2.py │ └── ircsn_kinetics400_se_rgb_r152_seg1_32x2.py ├── I3D_Flow │ ├── i3d_hmdb51_3d_tvl1_inception_v1_seg1_f64s1.py │ ├── i3d_kinetics400_3d_tvl1_inception_v1_seg1_f64s1.py │ └── i3d_ucf101_3d_tvl1_inception_v1_seg1_f64s1.py ├── I3D_RGB │ ├── i3d_hmdb51_3d_rgb_inception_v1_seg1_f64s1.py │ ├── i3d_kinetics400_3d_rgb_inception_v1_seg1_f64s1.py │ ├── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py │ └── i3d_ucf101_3d_rgb_inception_v1_seg1_f64s1.py ├── R2plus1D │ ├── r2plus1d_kinetics400_se_rgb_r34_seg1_32x2.py │ └── r2plus1d_kinetics400_se_rgb_r34_seg1_8x8.py ├── SlowFast │ └── slowfast_kinetics400_se_rgb_r50_seg1_4x16.py ├── SlowOnly │ ├── slowonly_kinetics400_se_rgb_r101_seg1_8x8.py │ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16.py │ └── slowonly_kinetics400_se_rgb_r50_seg1_8x8.py ├── TSN │ ├── tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py │ └── ucf101 │ │ ├── tsn_flow_bninception.py │ │ └── tsn_rgb_bninception.py ├── ava │ └── ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py └── thumos14 │ └── ssn_thumos14_rgb_bn_inception.py └── tools ├── dist_test_detector.sh ├── dist_test_recognizer.sh ├── dist_test_recognizer_heavy.sh ├── dist_train_detector.sh ├── dist_train_localizer.sh ├── dist_train_recognizer.sh ├── eval_localize_results.py ├── generate_lmdb.py ├── test_detector.py ├── test_localizer.py ├── test_recognizer.py ├── test_recognizer_heavy.py ├── train_detector.py ├── train_localizer.py └── train_recognizer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmaction/version.py 108 | data 109 | .vscode 110 | .idea 111 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/ActivityNet"] 2 | path = mmaction/third_party/ActivityNet 3 | url = https://github.com/zhaoyue-zephyrus/ActivityNet 4 | [submodule "mmaction/third_party/decord"] 5 | path = third_party/decord 6 | url = https://github.com/zhreshold/decord.git 7 | [submodule "mmaction/third_party/dense_flow"] 8 | path = third_party/dense_flow 9 | url = https://github.com/yjxiong/dense_flow 10 | branch = master 11 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to MMAction 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmaction 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmaction, 18 | please contact Yue Zhao (thuzhaoyue@gmail). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter. 25 | Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf). 26 | 27 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 28 | 29 | ### C++ and CUDA 30 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) 31 | -------------------------------------------------------------------------------- /DOCKER.md: -------------------------------------------------------------------------------- 1 | # using Docker to set environment of mmaction 2 | 3 | ## Requirements 4 | 5 | We've been testing/build from ubuntu 18.04 LTS & docker version 19.03.1 (with docker API version 1.40). If you want to building docker images, you could have 6 | 7 | - Docker Engine 8 | - nvidia-docker (to start container with GPUs) 9 | - Disk space (a lot) 10 | 11 | ## Install Docker Engine (Ubuntu version) 12 | 13 | ``` 14 | $ curl -fsSL https://get.docker.com -o get-docker.sh 15 | $ sh get-docker.sh 16 | ``` 17 | 18 | ## Install Nvidia-Docker 19 | 20 | You could update from [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). 21 | 22 | ## Build the images 23 | 24 | You could see the ```Dockerfile``` on [this](https://github.com/open-mmlab/mmaction) repository. So you can copy this file and build as manually or clone this repository. 25 | 26 | ``` 27 | $ git clone --recursive https://github.com/open-mmlab/mmaction 28 | $ cd mmaction 29 | $ docker build -t mmaction . 30 | ``` 31 | 32 | So when you building this image. The image will not successfully because we want to modified in this code. So you can clone repository in container manually from next step below. 33 | 34 | ## Run container from images 35 | 36 | ``` 37 | $ docker run --name mmaction --gpus all -it -v /path/to/your/data:/root mmaction 38 | ``` 39 | 40 | When run the container, Please follow step [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md) to use mmaction. -------------------------------------------------------------------------------- /ISSUES.md: -------------------------------------------------------------------------------- 1 | # Known Issues 2 | 3 | 1. Error on RTX cards with CUDA 10.0 4 | 5 | Description: 6 | 7 | THCudaCheck FAIL file=/pytorch/aten/src/THC/THCGeneral.cpp line=405 error=11 : invalid argument #15797 8 | 9 | Solution: Install pytorch wheel with cuda 10.0 via 10 | 11 | ```bash 12 | pip install https://download.pytorch.org/whl/cu100/torch-1.0.0-cp36-cp36m-linux_x86_64.whl 13 | ``` 14 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building package resample2d" 6 | cd ./mmaction/ops/resample2d_package 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | 11 | $PYTHON setup.py install --user 12 | 13 | echo "Building package trajectory_conv..." 14 | cd ../trajectory_conv_package 15 | if [ -d "build" ]; then 16 | rm -r build 17 | fi 18 | 19 | $PYTHON setup.py install --user 20 | 21 | echo "Building roi align op..." 22 | cd ../roi_align 23 | if [ -d "build" ]; then 24 | rm -r build 25 | fi 26 | $PYTHON setup.py build_ext --inplace 27 | 28 | echo "Building roi pool op..." 29 | cd ../roi_pool 30 | if [ -d "build" ]; then 31 | rm -r build 32 | fi 33 | $PYTHON setup.py build_ext --inplace 34 | 35 | echo "Building nms op..." 36 | cd ../nms 37 | if [ -d "build" ]; then 38 | rm -r build 39 | fi 40 | $PYTHON setup.py build_ext --inplace 41 | -------------------------------------------------------------------------------- /configs/C3D/c3d_train01_16_128_171_mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/configs/C3D/c3d_train01_16_128_171_mean.npy -------------------------------------------------------------------------------- /configs/hmdb51/tsn_rgb_bninception.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN2D', 4 | backbone=dict( 5 | type='BNInception', 6 | pretrained='open-mmlab://bninception_caffe', 7 | bn_eval=False, 8 | partial_bn=True), 9 | spatial_temporal_module=dict( 10 | type='SimpleSpatialModule', 11 | spatial_type='avg', 12 | spatial_size=7), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.8, 22 | in_channels=1024, 23 | init_std=0.001, 24 | num_classes=51)) 25 | train_cfg = None 26 | test_cfg = None 27 | # dataset settings 28 | dataset_type = 'RawFramesDataset' 29 | data_root = 'data/hmdb51/rawframes' 30 | img_norm_cfg = dict( 31 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False) 32 | 33 | data = dict( 34 | videos_per_gpu=32, 35 | workers_per_gpu=2, 36 | train=dict( 37 | type=dataset_type, 38 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt', 39 | img_prefix=data_root, 40 | img_norm_cfg=img_norm_cfg, 41 | num_segments=3, 42 | new_length=1, 43 | new_step=1, 44 | random_shift=True, 45 | modality='RGB', 46 | image_tmpl='img_{:05d}.jpg', 47 | img_scale=256, 48 | input_size=224, 49 | div_255=False, 50 | flip_ratio=0.5, 51 | resize_keep_ratio=True, 52 | oversample=None, 53 | random_crop=False, 54 | more_fix_crop=False, 55 | multiscale_crop=True, 56 | scales=[1, 0.875, 0.75, 0.66], 57 | max_distort=1, 58 | test_mode=False), 59 | val=dict( 60 | type=dataset_type, 61 | ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', 62 | img_prefix=data_root, 63 | img_norm_cfg=img_norm_cfg, 64 | num_segments=3, 65 | new_length=1, 66 | new_step=1, 67 | random_shift=False, 68 | modality='RGB', 69 | image_tmpl='img_{:05d}.jpg', 70 | img_scale=256, 71 | input_size=224, 72 | div_255=False, 73 | flip_ratio=0, 74 | resize_keep_ratio=True, 75 | oversample=None, 76 | random_crop=False, 77 | more_fix_crop=False, 78 | multiscale_crop=False, 79 | test_mode=False), 80 | test=dict( 81 | type=dataset_type, 82 | ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', 83 | img_prefix=data_root, 84 | img_norm_cfg=img_norm_cfg, 85 | num_segments=25, 86 | new_length=1, 87 | new_step=1, 88 | random_shift=False, 89 | modality='RGB', 90 | image_tmpl='img_{:05d}.jpg', 91 | img_scale=256, 92 | input_size=224, 93 | div_255=False, 94 | flip_ratio=0, 95 | resize_keep_ratio=True, 96 | oversample='ten_crop', 97 | random_crop=False, 98 | more_fix_crop=False, 99 | multiscale_crop=False, 100 | test_mode=True)) 101 | # optimizer 102 | optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005) 103 | optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) 104 | # learning policy 105 | lr_config = dict( 106 | policy='step', 107 | step=[30, 60]) 108 | checkpoint_config = dict(interval=1) 109 | # workflow = [('train', 5), ('val', 1)] 110 | workflow = [('train', 1)] 111 | # yapf:disable 112 | log_config = dict( 113 | interval=20, 114 | hooks=[ 115 | dict(type='TextLoggerHook'), 116 | # dict(type='TensorboardLoggerHook') 117 | ]) 118 | # yapf:enable 119 | # runtime settings 120 | total_epochs = 80 121 | dist_params = dict(backend='nccl') 122 | log_level = 'INFO' 123 | work_dir = './work_dirs/tsn_2d_rgb_bninception_seg_3_f1s1_b32_g8' 124 | load_from = None 125 | resume_from = None 126 | -------------------------------------------------------------------------------- /data/.placeholder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data_tools/ava/PREPARING_AVA.md: -------------------------------------------------------------------------------- 1 | ## Preparing AVA 2 | 3 | For more details, please refer to the [official website](https://research.google.com/ava/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/ava/`. 4 | 5 | ### Prepare annotations 6 | First of all, run the following script to prepare annotations. 7 | ```shell 8 | bash download_annotations.sh 9 | ``` 10 | 11 | ### Prepare videos 12 | Then, use the following script to prepare videos. The codes are adapted from the [official crawler](https://github.com/cvdfoundation/ava-dataset). Note that this might take a long time. 13 | ```shell 14 | bash download_videos.sh 15 | ``` 16 | Note that if you happen to have sudoer or have [GNU parallel](https://www.gnu.org/software/parallel/) [1](#1) on your machine, you can speed up the procedure by downloading in parallel. 17 | 18 | ```shell 19 | # sudo apt-get install parallel 20 | bash download_videos_parallel.sh 21 | ``` 22 | 23 | ### Preprocess videos 24 | The videos vary in length, while the annotations are from 15min to 30min. 25 | Therefore, we can preprocess videos to save storage and processing time afterward. 26 | Run the following scripts to trim the videos into 17-min segments (from 00:14:00 to 00:31:00) with FPS adjusted to 30 FPS and height to be 480. 27 | 28 | ```shell 29 | bash preprocess_videos.sh 30 | ``` 31 | 32 | 33 | ### Extract frames 34 | Now it is time to extract frames from videos. 35 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow). 36 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance. 37 | ```shell 38 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/") 39 | mkdir /mnt/SSD/ava_extracted/ 40 | ln -s /mnt/SSD/ava_extracted/ ../data/ava/rawframes/ 41 | ``` 42 | Afterwards, run the following script to extract frames. 43 | ```shell 44 | bash extract_frames.sh 45 | ``` 46 | If you only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames. 47 | ```shell 48 | bash extract_rgb_frames.sh 49 | ``` 50 | 51 | 52 | ### Fetching proposal files and other metadata file 53 | Run the follow scripts to fetch pre-computed proposal list. 54 | The proposals are adapted from FAIR's [Long-Term Feature Banks](https://github.com/facebookresearch/video-long-term-feature-banks). 55 | ```shell 56 | bash fetch_ava_proposals.sh 57 | ``` 58 | In addition, we use the following script to obtain the resolutions of all videos due to varying aspect ratio. 59 | ```shell 60 | bash obtain_video_resolution.sh 61 | ``` 62 | 63 | ### Folder structure 64 | In the context of the whole project (for ava only), the folder structure will look like: 65 | 66 | ``` 67 | mmaction 68 | ├── mmaction 69 | ├── tools 70 | ├── configs 71 | ├── data 72 | │ ├── ava 73 | │ │ ├── ava_video_resolution_stats.csv 74 | │ │ ├── ava_dense_proposals_train.FAIR.recall_93.9.pkl 75 | │ │ ├── ava_dense_proposals_val.FAIR.recall_93.9.pkl 76 | │ │ ├── annotations 77 | │ │ ├── videos_trainval 78 | │ │ │ ├── 053oq2xB3oU.mkv 79 | │ │ │ ├── 0f39OWEqJ24.mp4 80 | │ │ │ ├── ... 81 | │ │ ├── videos_trimmed_trainval 82 | │ │ │ ├── 053oq2xB3oU.mp4 83 | │ │ │ ├── 0f39OWEqJ24.mp4 84 | │ │ │ ├── ... 85 | │ │ ├── rawframes 86 | │ │ │ ├── 053oq2xB3oU.mp4 87 | | │ │ │ ├── img_00001.jpg 88 | | │ │ │ ├── img_00002.jpg 89 | | │ │ │ ├── ... 90 | ``` 91 | 92 | For training and evaluating on AVA, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). 93 | 94 | 95 | Reference 96 | 97 | [1] O. Tange (2018): GNU Parallel 2018, March 2018, https://doi.org/10.5281/zenodo.1146014 -------------------------------------------------------------------------------- /data_tools/ava/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/ava/annotations" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | wget https://research.google.com/ava/download/ava_v2.1.zip 11 | unzip -j ava_v2.1.zip -d ${DATA_DIR}/ 12 | rm ava_v2.1.zip -------------------------------------------------------------------------------- /data_tools/ava/download_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | wget -c https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ../../data/ava/annotations/ 4 | 5 | 6 | cat ../../data/ava/annotations/ava_file_names_trainval_v2.1.txt | while read vid; do wget -c "https://s3.amazonaws.com/ava-dataset/trainval/${vid}" -P ../../data/ava/videos_trainval/; done 7 | 8 | echo "Downloading finished." -------------------------------------------------------------------------------- /data_tools/ava/download_videos_parallel.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | wget -c https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ../../data/ava/annotations/ 4 | 5 | 6 | # sudo apt-get install parallel 7 | # parallel downloading to speed up 8 | awk '{print "https://s3.amazonaws.com/ava-dataset/trainval/"$0}' ../../data/ava/annotations/ava_file_names_trainval_v2.1.txt | parallel -j 8 wget -c -q {} -P ../../data/ava/videos_trainval/ 9 | echo "Parallel downloading finished." -------------------------------------------------------------------------------- /data_tools/ava/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/ava/videos_trimmed_trainval/ ../data/ava/rawframes/ --level 1 --flow_type tvl1 --ext mp4 5 | echo "Raw frames (RGB and tv-l1) Generated for train+val set" 6 | 7 | cd ava/ 8 | -------------------------------------------------------------------------------- /data_tools/ava/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/ava/videos_trimmed_trainval/ ../data/ava/rawframes/ --level 1 --ext mp4 5 | echo "Raw frames (RGB only) generated for train and val set" 6 | 7 | cd ava/ -------------------------------------------------------------------------------- /data_tools/ava/fetch_ava_proposals.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_train.FAIR.recall_93.9.pkl -P ../../data/ava/ 4 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_val.FAIR.recall_93.9.pkl -P ../../data/ava/ 5 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_test.FAIR.recall_93.9.pkl -P ../../data/ava/ 6 | -------------------------------------------------------------------------------- /data_tools/ava/obtain_video_resolution.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../data/ava/ 4 | 5 | ls ./videos_trimmed_trainval | while read filename; do \ 6 | vid="$(echo ${filename} | cut -d'.' -f1)"; 7 | resolution=`ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=s=x:p=0 ./videos_trimmed_trainval/${filename}` 8 | echo ${vid} ${resolution} 9 | done &> ava_video_resolution_stats.csv 10 | 11 | echo $PWD 12 | 13 | cd ../../data_tools/ava/ 14 | -------------------------------------------------------------------------------- /data_tools/ava/preprocess_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../data/ava/ 4 | 5 | mkdir ./videos_trimmed_trainval/ 6 | ls videos_trainval/ | while read filename; do \ 7 | vid="$(echo ${filename} | cut -d'.' -f1)"; 8 | ffmpeg -nostdin -i "./videos_trainval/${filename}" \ 9 | -ss 00:14:00 -t 00:17:00 \ 10 | -filter:v fps=fps=30 \ 11 | "./${vid}.tmp.mp4"; 12 | ffmpeg -nostdin -i "./${vid}.tmp.mp4" \ 13 | -vf scale=-2:480 \ 14 | -c:a copy \ 15 | "./videos_trimmed_trainval/${vid}.mp4"; 16 | rm "./${vid}.tmp.mp4"; 17 | done 18 | 19 | cd ../../data_tools/ava/ 20 | -------------------------------------------------------------------------------- /data_tools/hmdb51/PREPARING_HMDB51.md: -------------------------------------------------------------------------------- 1 | ## Preparing HMDB51 2 | 3 | For more details, please refer to the official [website](http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/hmdb51/`. 4 | 5 | ### Prepare annotations 6 | First of all, run the following script to prepare annotations. 7 | ```shell 8 | bash download_annotations.sh 9 | ``` 10 | 11 | ### Prepare videos 12 | Then, use the following script to prepare videos. 13 | ```shell 14 | bash download_videos.sh 15 | ``` 16 | 17 | ### Extract frames 18 | Now it is time to extract frames from videos. 19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow). 20 | If you have some SSD, then we recommend extracting frames there for better I/O performance. The extracted frames (RGB + Flow) will take up ~24GB. 21 | ```shell 22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/") 23 | mkdir /mnt/SSD/hmdb51_extracted/ 24 | ln -s /mnt/SSD/hmdb51_extracted/ ../data/hmdb51/rawframes 25 | ``` 26 | 27 | If you didn't install dense_flow in the installation or only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames. 28 | ```shell 29 | bash extract_rgb_frames.sh 30 | ``` 31 | 32 | If both rgb and optical flow are required, run the following script to extract frames alternatively. 33 | ```shell 34 | bash extract_frames.sh 35 | ``` 36 | 37 | ### Generate filelist 38 | Run the follow script to generate filelist in the format of rawframes and videos. 39 | ```shell 40 | bash generate_filelist.sh 41 | ``` 42 | 43 | ### Folder structure 44 | In the context of the whole project (for ucf101 only), the folder structure will look like: 45 | ``` 46 | mmaction 47 | ├── mmaction 48 | ├── tools 49 | ├── configs 50 | ├── data 51 | │ ├── hmdb51 52 | │ │ ├── hmdb51_{train,val}_split_{1,2,3}_rawframes.txt 53 | │ │ ├── hmdb51_{train,val}_split_{1,2,3}_videos.txt 54 | │ │ ├── annotations 55 | │ │ ├── videos 56 | │ │ │ ├── brush_hair 57 | │ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0.avi 58 | 59 | │ │ │ ├── wave 60 | │ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0.avi 61 | │ │ ├── rawframes 62 | │ │ │ ├── brush_hair 63 | │ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0 64 | │ │ │ │ │ ├── img_00001.jpg 65 | │ │ │ │ │ ├── img_00002.jpg 66 | │ │ │ │ │ ├── ... 67 | │ │ │ │ │ ├── flow_x_00001.jpg 68 | │ │ │ │ │ ├── flow_x_00002.jpg 69 | │ │ │ │ │ ├── ... 70 | │ │ │ │ │ ├── flow_y_00001.jpg 71 | │ │ │ │ │ ├── flow_y_00002.jpg 72 | │ │ │ ├── ... 73 | │ │ │ ├── wave 74 | │ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0 75 | │ │ │ │ ├── ... 76 | │ │ │ │ ├── winKen_wave_u_cm_np1_ri_bad_1 77 | 78 | ``` 79 | 80 | For training and evaluating on HMDB51, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). -------------------------------------------------------------------------------- /data_tools/hmdb51/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/hmdb51/annotations" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | cd ${DATA_DIR} 11 | wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar 12 | 13 | # sudo apt-get install unrar 14 | unrar x test_train_splits.rar 15 | rm test_train_splits.rar 16 | 17 | mv testTrainMulti_7030_splits/*.txt ./annotations 18 | rmdir testTrainMulti_7030_splits 19 | -------------------------------------------------------------------------------- /data_tools/hmdb51/download_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/hmdb51/" 4 | 5 | cd ${DATA_DIR} 6 | 7 | mkdir -p ./videos 8 | cd ./videos 9 | 10 | wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar 11 | 12 | unrar x ./hmdb51_org.rar 13 | rm ./hmdb51_org.rar 14 | 15 | # extract all rar files with full path 16 | for file in *.rar; do unrar x $file; done 17 | 18 | rm ./*.rar 19 | cd "../../../data_tools/hmdb51" 20 | -------------------------------------------------------------------------------- /data_tools/hmdb51/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | num_gpu=($(nvidia-smi -L | wc -l)) 4 | num_worker=${num_gpu} 5 | 6 | cd ../ 7 | python build_rawframes.py ../data/hmdb51/videos/ ../data/hmdb51/rawframes/ --level 2 --flow_type tvl1 --num_gpu ${num_gpu} --num_worker ${num_worker} 8 | echo "Raw frames (RGB and tv-l1) Generated" 9 | 10 | cd hmdb51/ 11 | -------------------------------------------------------------------------------- /data_tools/hmdb51/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | num_gpu=($(nvidia-smi -L | wc -l)) 4 | num_worker=${num_gpu} 5 | 6 | cd ../ 7 | python build_rawframes.py ../data/hmdb51/videos/ ../data/hmdb51/rawframes/ --level 2 --ext avi --num_gpu ${num_gpu} --num_worker ${num_worker} 8 | echo "Raw frames (RGB only) generated for train and val set" 9 | 10 | cd hmdb51/ -------------------------------------------------------------------------------- /data_tools/hmdb51/generate_filelist.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../ 4 | PYTHONPATH=. python data_tools/build_file_list.py hmdb51 data/hmdb51/rawframes/ --level 2 --format rawframes --shuffle 5 | echo "Filelist for rawframes generated." 6 | 7 | PYTHONPATH=. python data_tools/build_file_list.py hmdb51 data/hmdb51/videos/ --level 2 --format videos --shuffle 8 | echo "Filelist for videos generated." 9 | 10 | cd data_tools/hmdb51/ -------------------------------------------------------------------------------- /data_tools/kinetics400/PREPARING_KINETICS400.md: -------------------------------------------------------------------------------- 1 | ## Preparing Kinetics-400 2 | 3 | For more details, please refer to the official [website](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/kinetics400/`. 4 | 5 | ### Prepare annotations 6 | First of all, run the following script to prepare annotations. 7 | ```shell 8 | bash download_annotations.sh 9 | ``` 10 | 11 | ### Prepare videos 12 | Then, use the following script to prepare videos. The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time. 13 | ```shell 14 | bash download_videos.sh 15 | ``` 16 | Note that some people may already have a backup of the kinetics-400 dataset using the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). 17 | If this is the case, then you only need to replace all whitespaces in the class name for ease of processing either by [detox](http://manpages.ubuntu.com/manpages/bionic/man1/detox.1.html) 18 | 19 | ```shell 20 | # sudo apt-get install detox 21 | detox -r ../../data/kinetics400/videos_train/ 22 | detox -r ../../data/kinetics400/videos_val/ 23 | ``` 24 | or running 25 | ```shell 26 | bash rename_classnames.sh 27 | ``` 28 | 29 | ### Extract frames 30 | Now it is time to extract frames from videos. 31 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow). 32 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance. 33 | ```shell 34 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/") 35 | mkdir /mnt/SSD/kinetics400_extracted_train/ 36 | ln -s /mnt/SSD/kinetics400_extracted_train/ ../data/kinetics400/rawframes_train/ 37 | mkdir /mnt/SSD/kinetics400_extracted_val/ 38 | ln -s /mnt/SSD/kinetics400_extracted_val/ ../data/kinetics400/rawframes_val/ 39 | ``` 40 | Afterwards, run the following script to extract frames. 41 | ```shell 42 | bash extract_frames.sh 43 | ``` 44 | If you only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames. 45 | ```shell 46 | bash extract_rgb_frames.sh 47 | ``` 48 | 49 | 50 | ### Generate filelist 51 | Run the follow scripts to generate filelist in the format of videos and rawframes, respectively. 52 | ```shell 53 | bash generate_video_filelist.sh 54 | # execute the command below when rawframes are ready 55 | bash generate_rawframes_filelist.sh 56 | ``` 57 | 58 | ### Folder structure 59 | In the context of the whole project (for kinetics400 only), the *minimal* folder structure will look like: (*minimal* means that some data are not necessary: for example, you may want to evaluate kinetics-400 using the original video format.) 60 | 61 | ``` 62 | mmaction 63 | ├── mmaction 64 | ├── tools 65 | ├── configs 66 | ├── data 67 | │ ├── kinetics400 68 | │ │ ├── kinetics400_train_list_videos.txt 69 | │ │ ├── kinetics400_val_list_videos.txt 70 | │ │ ├── annotations 71 | │ │ ├── videos_train 72 | │ │ ├── videos_val 73 | │ │ │ ├── abseiling 74 | │ │ │ │ ├── 0wR5jVB-WPk_000417_000427.mp4 75 | │ │ │ │ ├── ... 76 | │ │ │ ├── ... 77 | │ │ │ ├── wrapping_present 78 | │ │ │ ├── ... 79 | │ │ │ ├── zumba 80 | │ │ ├── rawframes_train 81 | │ │ ├── rawframes_val 82 | 83 | ``` 84 | 85 | For training and evaluating on Kinetics-400, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). -------------------------------------------------------------------------------- /data_tools/kinetics400/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/kinetics400/annotations" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | wget https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz 11 | tar -xf kinetics400.tar.gz -C ${DATA_DIR}/ 12 | rm kinetics400.tar.gz 13 | -------------------------------------------------------------------------------- /data_tools/kinetics400/download_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../mmaction/third_party/ActivityNet/Crawler/Kinetics 4 | 5 | # set up environment 6 | conda env create -f environment.yml 7 | source activate kinetics 8 | pip install --upgrade youtube-dl 9 | 10 | DATA_DIR="../../../../../data/kinetics400" 11 | ANNO_DIR="../../../../../data/kinetics400/annotations" 12 | python download.py ${ANNO_DIR}/kinetics400/train.csv ${DATA_DIR}/videos_train 13 | python download.py ${ANNO_DIR}/kinetics400/val.csv ${DATA_DIR}/videos_val 14 | 15 | cd ../../../../../data_tools/kinetics400 16 | -------------------------------------------------------------------------------- /data_tools/kinetics400/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/kinetics400/videos_train/ ../data/kinetics400/rawframes_train/ --level 2 --flow_type tvl1 --ext mp4 5 | echo "Raw frames (RGB and tv-l1) Generated for train set" 6 | 7 | python build_rawframes.py ../data/kinetics400/videos_val/ ../data/kinetics400/rawframes_val/ --level 2 --flow_type tvl1 --ext mp4 8 | echo "Raw frames (RGB and tv-l1) Generated for val set" 9 | 10 | cd kinetics400/ 11 | -------------------------------------------------------------------------------- /data_tools/kinetics400/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/kinetics400/videos_train/ ../data/kinetics400/rawframes_train/ --level 2 --ext mp4 5 | echo "Raw frames (RGB only) generated for train set" 6 | 7 | python build_rawframes.py ../data/kinetics400/videos_val/ ../data/kinetics400/rawframes_val/ --level 2 --ext mp4 8 | echo "Raw frames (RGB only) generated for val set" 9 | 10 | cd kinetics400/ 11 | -------------------------------------------------------------------------------- /data_tools/kinetics400/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../ 4 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/rawframes_train/ --level 2 --format rawframes --num_split 1 --subset train --shuffle 5 | echo "Train filelist for rawframes generated." 6 | 7 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/rawframes_val/ --level 2 --format rawframes --num_split 1 --subset val --shuffle 8 | echo "Val filelist for rawframes generated." 9 | cd data_tools/kinetics400/ -------------------------------------------------------------------------------- /data_tools/kinetics400/generate_video_filelist.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../ 4 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/videos_train/ --level 2 --format videos --num_split 1 --subset train --shuffle 5 | echo "Train filelist for video generated." 6 | 7 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/videos_val/ --level 2 --format videos --num_split 1 --subset val --shuffle 8 | echo "Val filelist for video generated." 9 | cd data_tools/kinetics400/ -------------------------------------------------------------------------------- /data_tools/kinetics400/rename_classnames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | # Rename classname for convenience 4 | cd ../../data/kinetics400/ 5 | ls ./videos_train | while read class; do \ 6 | newclass=`echo $class | tr " " "_" | tr "(" "-" | tr ")" "-" `; 7 | if [ "${class}" != "${newclass}" ] 8 | then 9 | mv "videos_train/${class}" "videos_train/${newclass}"; 10 | fi 11 | done 12 | 13 | ls ./videos_val | while read class; do \ 14 | newclass=`echo $class | tr " " "_" | tr "(" "-" | tr ")" "-" `; 15 | if [ "${class}" != "${newclass}" ] 16 | then 17 | mv "videos_val/${class}" "videos_val/${newclass}"; 18 | fi 19 | done 20 | 21 | cd ../../data_tools/kinetics400/ 22 | -------------------------------------------------------------------------------- /data_tools/thumos14/PREPARING_TH14.md: -------------------------------------------------------------------------------- 1 | ## Preparing THUMOS-14 2 | 3 | For more details, please refer to the [official website](https://www.crcv.ucf.edu/THUMOS14/download.html). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/thumos14/`. 4 | 5 | ### Prepare annotations 6 | First of all, run the following script to prepare annotations. 7 | ```shell 8 | bash download_annotations.sh 9 | ``` 10 | 11 | ### Prepare videos 12 | Then, use the following script to prepare videos. 13 | ```shell 14 | bash download_videos.sh 15 | ``` 16 | 17 | ### Extract frames 18 | Now it is time to extract frames from videos. 19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow]. 20 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance. 21 | ```shell 22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/") 23 | mkdir /mnt/SSD/thumos14_extracted/ 24 | ln -s /mnt/SSD/thumos14_extracted/ ../data/thumos14/rawframes/ 25 | ``` 26 | Afterwards, run the following script to extract frames. 27 | ```shell 28 | bash extract_frames.sh 29 | ``` 30 | 31 | ### Fetching proposal files 32 | Run the follow scripts to fetch pre-computed tag proposals. 33 | ```shell 34 | bash fetch_tag_proposals.sh 35 | ``` 36 | 37 | ### Folder structure 38 | In the context of the whole project (for thumos14 only), the folder structure will look like: 39 | 40 | ``` 41 | mmaction 42 | ├── mmaction 43 | ├── tools 44 | ├── configs 45 | ├── data 46 | │ ├── thumos14 47 | │ │ ├── thumos14_tag_val_normalized_proposal_list.txt 48 | │ │ ├── thumos14_tag_test_normalized_proposal_list.txt 49 | │ │ ├── annotations 50 | │ │ ├── videos_val 51 | │ │ │ ├── video_validation_0000001.mp4 52 | │ │ │ ├── ... 53 | │ │ ├── videos_test 54 | │ │ │ ├── video_test_0000001.mp4 55 | │ │ ├── rawframes 56 | │ │ │ ├── video_validation_0000001 57 | | │ │ │ ├── img_00001.jpg 58 | | │ │ │ ├── img_00002.jpg 59 | | │ │ │ ├── ... 60 | | │ │ │ ├── flow_x_00001.jpg 61 | | │ │ │ ├── flow_x_00002.jpg 62 | | │ │ │ ├── ... 63 | | │ │ │ ├── flow_y_00001.jpg 64 | | │ │ │ ├── flow_y_00002.jpg 65 | | │ │ │ ├── ... 66 | │ │ │ ├── ... 67 | │ │ │ ├── video_test_0000001 68 | ``` 69 | 70 | For training and evaluating on THUMOS-14, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). -------------------------------------------------------------------------------- /data_tools/thumos14/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/thumos14/" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | wget http://crcv.ucf.edu/THUMOS14/Validation_set/TH14_Temporal_annotations_validation.zip 11 | wget http://crcv.ucf.edu/THUMOS14/test_set/TH14_Temporal_annotations_test.zip 12 | 13 | unzip -j TH14_Temporal_annotations_validation.zip -d $DATA_DIR/annotations_val 14 | unzip -j TH14_Temporal_annotations_test.zip -d $DATA_DIR/annotations_test -------------------------------------------------------------------------------- /data_tools/thumos14/download_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/thumos14/" 4 | 5 | cd ${DATA_DIR} 6 | 7 | wget https://storage.googleapis.com/thumos14_files/TH14_validation_set_mp4.zip 8 | wget https://storage.googleapis.com/thumos14_files/TH14_Test_set_mp4.zip 9 | 10 | unzip -j TH14_validation_set_mp4.zip -d videos_val/ 11 | 12 | unzip -P "THUMOS14_REGISTERED" TH14_Test_set_mp4.zip -d videos_test/ 13 | 14 | cd ../../data_tools/thumos14/ -------------------------------------------------------------------------------- /data_tools/thumos14/extracted_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/thumos14/videos_val/ ../data/thumos14/rawframes/ --level 1 --flow_type tvl1 --ext mp4 5 | echo "Raw frames (RGB and tv-l1) Generated for val set" 6 | 7 | python build_rawframes.py ../data/thumos14/videos_test/ ../data/thumos14/rawframes/ --level 1 --flow_type tvl1 --ext mp4 8 | echo "Raw frames (RGB and tv-l1) Generated for test set" 9 | 10 | cd thumos14/ 11 | -------------------------------------------------------------------------------- /data_tools/thumos14/fetch_tag_proposals.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_val_normalized_proposal_list.txt -P ../../data/thumos14/ 4 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_test_normalized_proposal_list.txt -P ../../data/thumos14/ 5 | -------------------------------------------------------------------------------- /data_tools/ucf101/PREPARING_UCF101.md: -------------------------------------------------------------------------------- 1 | ## Preparing UCF-101 2 | 3 | For more details, please refer to the official [website](https://www.crcv.ucf.edu/data/UCF101.php). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/ucf101/`. 4 | 5 | ### Prepare annotations 6 | First of all, run the following script to prepare annotations. 7 | ```shell 8 | bash download_annotations.sh 9 | ``` 10 | 11 | ### Prepare videos 12 | Then, use the following script to prepare videos. 13 | ```shell 14 | bash download_videos.sh 15 | ``` 16 | 17 | ### Extract frames 18 | Now it is time to extract frames from videos. 19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow). 20 | If you have some SSD, then we recommend extracting frames there for better I/O performance. The extracted frames (RGB + Flow) will take up ~100GB. 21 | ```shell 22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/") 23 | mkdir /mnt/SSD/ucf101_extracted/ 24 | ln -s /mnt/SSD/ucf101_extracted/ ../data/ucf101/rawframes 25 | ``` 26 | 27 | If you didn't install dense_flow in the installation or only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames. 28 | ```shell 29 | bash extract_rgb_frames.sh 30 | ``` 31 | 32 | If both rgb and optical flow are required, run the following script to extract frames alternatively. 33 | ```shell 34 | bash extract_frames.sh 35 | ``` 36 | 37 | ### Generate filelist 38 | Run the follow script to generate filelist in the format of rawframes and videos. 39 | ```shell 40 | bash generate_filelist.sh 41 | ``` 42 | 43 | ### Folder structure 44 | In the context of the whole project (for ucf101 only), the folder structure will look like: 45 | ``` 46 | mmaction 47 | ├── mmaction 48 | ├── tools 49 | ├── configs 50 | ├── data 51 | │ ├── ucf101 52 | │ │ ├── ucf101_{train,val}_split_{1,2,3}_rawframes.txt 53 | │ │ ├── ucf101_{train,val}_split_{1,2,3}_videos.txt 54 | │ │ ├── annotations 55 | │ │ ├── videos 56 | │ │ │ ├── ApplyEyeMakeup 57 | │ │ │ │ ├── v_ApplyEyeMakeup_g01_c01.avi 58 | 59 | │ │ │ ├── YoYo 60 | │ │ │ │ ├── v_YoYo_g25_c05.avi 61 | │ │ ├── rawframes 62 | │ │ │ ├── ApplyEyeMakeup 63 | │ │ │ │ ├── v_ApplyEyeMakeup_g01_c01 64 | │ │ │ │ │ ├── img_00001.jpg 65 | │ │ │ │ │ ├── img_00002.jpg 66 | │ │ │ │ │ ├── ... 67 | │ │ │ │ │ ├── flow_x_00001.jpg 68 | │ │ │ │ │ ├── flow_x_00002.jpg 69 | │ │ │ │ │ ├── ... 70 | │ │ │ │ │ ├── flow_y_00001.jpg 71 | │ │ │ │ │ ├── flow_y_00002.jpg 72 | │ │ │ ├── ... 73 | │ │ │ ├── YoYo 74 | │ │ │ │ ├── v_YoYo_g01_c01 75 | │ │ │ │ ├── ... 76 | │ │ │ │ ├── v_YoYo_g25_c05 77 | 78 | ``` 79 | 80 | For training and evaluating on UCF101, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). -------------------------------------------------------------------------------- /data_tools/ucf101/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/ucf101/annotations" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | wget "https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip" 11 | 12 | unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR}/ 13 | rm UCF101TrainTestSplits-RecognitionTask.zip 14 | -------------------------------------------------------------------------------- /data_tools/ucf101/download_videos.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | DATA_DIR="../../data/ucf101/" 4 | 5 | cd ${DATA_DIR} 6 | 7 | wget https://www.crcv.ucf.edu/data/UCF101/UCF101.rar 8 | unrar x UCF101.rar 9 | mv ./UCF-101 ./videos 10 | 11 | cd "../../data_tools/ucf101" 12 | -------------------------------------------------------------------------------- /data_tools/ucf101/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../ 4 | python build_rawframes.py ../data/ucf101/videos/ ../data/ucf101/rawframes/ --level 2 --flow_type tvl1 5 | echo "Raw frames (RGB and tv-l1) Generated" 6 | cd ucf101/ 7 | -------------------------------------------------------------------------------- /data_tools/ucf101/generate_filelist.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/bash env 2 | 3 | cd ../../ 4 | PYTHONPATH=. python data_tools/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle 5 | echo "Filelist for rawframes generated." 6 | 7 | PYTHONPATH=. python data_tools/build_file_list.py ucf101 data/ucf101/videos/ --level 2 --format videos --shuffle 8 | echo "Filelist for videos generated." 9 | 10 | cd data_tools/ucf101/ -------------------------------------------------------------------------------- /mmaction/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmaction/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_network 3 | 4 | __all__ = [ 5 | 'init_dist', 'get_root_logger', 'set_random_seed', 6 | 'train_network', 7 | ] 8 | -------------------------------------------------------------------------------- /mmaction/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | import torch.multiprocessing as mp 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def init_dist(launcher, backend='nccl', **kwargs): 13 | if mp.get_start_method(allow_none=True) is None: 14 | mp.set_start_method('spawn') 15 | if launcher == 'pytorch': 16 | _init_dist_pytorch(backend, **kwargs) 17 | elif launcher == 'mpi': 18 | _init_dist_mpi(backend, **kwargs) 19 | elif launcher == 'slurm': 20 | _init_dist_slurm(backend, **kwargs) 21 | else: 22 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 23 | 24 | 25 | def _init_dist_pytorch(backend, **kwargs): 26 | # TODO: use local_rank instead of rank % num_gpus 27 | rank = int(os.environ['RANK']) 28 | num_gpus = torch.cuda.device_count() 29 | torch.cuda.set_device(rank % num_gpus) 30 | dist.init_process_group(backend=backend, **kwargs) 31 | 32 | 33 | def _init_dist_mpi(backend, **kwargs): 34 | raise NotImplementedError 35 | 36 | 37 | def _init_dist_slurm(backend, **kwargs): 38 | raise NotImplementedError 39 | 40 | 41 | def set_random_seed(seed): 42 | random.seed(seed) 43 | np.random.seed(seed) 44 | torch.manual_seed(seed) 45 | torch.cuda.manual_seed_all(seed) 46 | 47 | 48 | def get_root_logger(log_level=logging.INFO): 49 | logger = logging.getLogger() 50 | if not logger.hasHandlers(): 51 | logging.basicConfig( 52 | format='%(asctime)s - %(levelname)s - %(message)s', 53 | level=log_level) 54 | rank, _ = get_dist_info() 55 | if rank != 0: 56 | logger.setLevel('ERROR') 57 | return logger 58 | -------------------------------------------------------------------------------- /mmaction/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * 2 | from .utils import * 3 | -------------------------------------------------------------------------------- /mmaction/core/anchor2d/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] -------------------------------------------------------------------------------- /mmaction/core/anchor2d/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /mmaction/core/bbox1d/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import temporal_iou 2 | 3 | __all__ = [ 4 | 'temporal_iou' 5 | ] 6 | -------------------------------------------------------------------------------- /mmaction/core/bbox1d/geometry.py: -------------------------------------------------------------------------------- 1 | 2 | def temporal_iou(span_A, span_B): 3 | """ 4 | Calculates the intersection over union of two temporal "bounding boxes" 5 | span_A: (start, end) 6 | span_B: (start, end) 7 | """ 8 | union = min(span_A[0], span_B[0]), max(span_A[1], span_B[1]) 9 | inter = max(span_A[0], span_B[0]), min(span_A[1], span_B[1]) 10 | 11 | if inter[0] >= inter[1]: 12 | return 0 13 | else: 14 | return float(inter[1] - inter[0]) / float(union[1] - union[0]) 15 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import BaseSampler, PseudoSampler, RandomSampler 4 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 5 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 6 | bbox_mapping_back, bbox2roi, bbox2result) 7 | from .bbox_target import bbox_target 8 | 9 | __all__ = [ 10 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'build_assigner', 'build_sampler', 'assign_and_sample', 13 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 14 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' 15 | ] 16 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict( 11 | cfg, assigners, default_args=kwargs) 12 | else: 13 | raise TypeError('Invalid type {} for building a sampler'.format( 14 | type(cfg))) 15 | 16 | 17 | def build_sampler(cfg, **kwargs): 18 | if isinstance(cfg, samplers.BaseSampler): 19 | return cfg 20 | elif isinstance(cfg, dict): 21 | return mmcv.runner.obj_from_dict( 22 | cfg, samplers, default_args=kwargs) 23 | else: 24 | raise TypeError('Invalid type {} for building a sampler'.format( 25 | type(cfg))) 26 | 27 | 28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 29 | bbox_assigner = build_assigner(cfg.assigner) 30 | bbox_sampler = build_sampler(cfg.sampler) 31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 32 | gt_labels) 33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 34 | gt_labels) 35 | return assign_result, sampling_result 36 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .assign_result import AssignResult 4 | 5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] -------------------------------------------------------------------------------- /mmaction/core/bbox2d/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from mmaction.utils.misc import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | (labels, label_weights, bbox_targets, 17 | bbox_weights, class_weights) = multi_apply( 18 | bbox_target_single, 19 | pos_bboxes_list, 20 | neg_bboxes_list, 21 | pos_gt_bboxes_list, 22 | pos_gt_labels_list, 23 | cfg=cfg, 24 | reg_classes=reg_classes, 25 | target_means=target_means, 26 | target_stds=target_stds) 27 | 28 | if concat: 29 | labels = torch.cat(labels, 0) 30 | label_weights = torch.cat(label_weights, 0) 31 | bbox_targets = torch.cat(bbox_targets, 0) 32 | bbox_weights = torch.cat(bbox_weights, 0) 33 | class_weights = torch.cat(class_weights, 0) 34 | return labels, label_weights, bbox_targets, bbox_weights, class_weights 35 | 36 | 37 | def bbox_target_single(pos_bboxes, 38 | neg_bboxes, 39 | pos_gt_bboxes, 40 | pos_gt_labels, 41 | cfg, 42 | reg_classes=1, 43 | target_means=[.0, .0, .0, .0], 44 | target_stds=[1.0, 1.0, 1.0, 1.0]): 45 | num_pos = pos_bboxes.size(0) 46 | num_neg = neg_bboxes.size(0) 47 | num_samples = num_pos + num_neg 48 | if len(pos_gt_labels[0]) == 1: 49 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 50 | else: 51 | labels = pos_bboxes.new_zeros( 52 | (num_samples, len(pos_gt_labels[0])), dtype=torch.long) 53 | label_weights = pos_bboxes.new_zeros(num_samples) 54 | if len(pos_gt_labels[0]) == 1: 55 | class_weights = pos_bboxes.new_zeros(num_samples) 56 | else: 57 | class_weights = pos_bboxes.new_zeros( 58 | num_samples, len(pos_gt_labels[0])) 59 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 60 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 61 | if num_pos > 0: 62 | labels[:num_pos] = pos_gt_labels 63 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 64 | label_weights[:num_pos] = pos_weight 65 | class_weight = 1.0 if not hasattr( 66 | cfg, 'cls_weight') or cfg.cls_weight <= 0 else cfg.cls_weight 67 | class_weights[:num_pos] = class_weight 68 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 69 | target_stds) 70 | bbox_targets[:num_pos, :] = pos_bbox_targets 71 | bbox_weights[:num_pos, :] = 1 72 | if num_neg > 0: 73 | label_weights[-num_neg:] = 1.0 74 | class_weights[-num_neg:] = 0.0 75 | 76 | return labels, label_weights, bbox_targets, bbox_weights, class_weights 77 | 78 | 79 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 80 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 81 | 4 * num_classes)) 82 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 83 | 4 * num_classes)) 84 | for i in torch.nonzero(labels > 0).squeeze(-1): 85 | start, end = labels[i] * 4, (labels[i] + 1) * 4 86 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 87 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 88 | return bbox_targets_expand, bbox_weights_expand 89 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .sampling_result import SamplingResult 5 | 6 | __all__ = [ 7 | 'BaseSampler', 'PseudoSampler', 'RandomSampler' 8 | 'SamplingResult' 9 | ] -------------------------------------------------------------------------------- /mmaction/core/bbox2d/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | This is a simple implementation of bbox sampling given candidates, 39 | assigning results and ground truth bboxes. 40 | Args: 41 | assign_result (:obj:`AssignResult`): Bbox assigning results. 42 | bboxes (Tensor): Boxes to be sampled from. 43 | gt_bboxes (Tensor): Ground truth bboxes. 44 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 45 | Returns: 46 | :obj:`SamplingResult`: Sampling result. 47 | """ 48 | bboxes = bboxes[:, :4] 49 | 50 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 51 | if self.add_gt_as_proposals: 52 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 53 | assign_result.add_gt_(gt_labels) 54 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 55 | gt_flags = torch.cat([gt_ones, gt_flags]) 56 | 57 | num_expected_pos = int(self.num * self.pos_fraction) 58 | pos_inds = self.pos_sampler._sample_pos( 59 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 60 | # We found that sampled indices have duplicated items occasionally. 61 | # (may be a bug of PyTorch) 62 | pos_inds = pos_inds.unique() 63 | num_sampled_pos = pos_inds.numel() 64 | num_expected_neg = self.num - num_sampled_pos 65 | if self.neg_pos_ub >= 0: 66 | _pos = max(1, num_sampled_pos) 67 | neg_upper_bound = int(self.neg_pos_ub * _pos) 68 | if num_expected_neg > neg_upper_bound: 69 | num_expected_neg = neg_upper_bound 70 | neg_inds = self.neg_sampler._sample_neg( 71 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 72 | neg_inds = neg_inds.unique() 73 | 74 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 75 | assign_result, gt_flags) 76 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | It seems that Pytorch's implementation is slower than numpy so we use 22 | numpy to randperm the indices. 23 | """ 24 | assert len(gallery) >= num 25 | if isinstance(gallery, list): 26 | gallery = np.array(gallery) 27 | cands = np.arange(len(gallery)) 28 | np.random.shuffle(cands) 29 | rand_inds = cands[:num] 30 | if not isinstance(gallery, np.ndarray): 31 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 32 | return gallery[rand_inds] 33 | 34 | def _sample_pos(self, assign_result, num_expected, **kwargs): 35 | """Randomly sample some positive samples.""" 36 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 37 | if pos_inds.numel() != 0: 38 | pos_inds = pos_inds.squeeze(1) 39 | if pos_inds.numel() <= num_expected: 40 | return pos_inds 41 | else: 42 | return self.random_choice(pos_inds, num_expected) 43 | 44 | def _sample_neg(self, assign_result, num_expected, **kwargs): 45 | """Randomly sample some negative samples.""" 46 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 47 | if neg_inds.numel() != 0: 48 | neg_inds = neg_inds.squeeze(1) 49 | if len(neg_inds) <= num_expected: 50 | return neg_inds 51 | else: 52 | return self.random_choice(neg_inds, num_expected) 53 | -------------------------------------------------------------------------------- /mmaction/core/bbox2d/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmaction/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (get_classes) 2 | from .eval_hooks import (DistEvalHook, DistEvalTopKAccuracyHook, 3 | AVADistEvalmAPHook) 4 | 5 | __all__ = [ 6 | 'get_classes', 7 | 'DistEvalHook', 'DistEvalTopKAccuracyHook', 8 | 'AVADistEvalmAPHook' 9 | ] 10 | -------------------------------------------------------------------------------- /mmaction/core/evaluation/accuracy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import confusion_matrix 3 | 4 | 5 | def softmax(x, dim=1): 6 | """Compute softmax values for each sets of scores in x.""" 7 | e_x = np.exp(x - np.max(x, axis=dim, keepdims=True)) 8 | return e_x / e_x.sum(axis=dim, keepdims=True) 9 | 10 | 11 | def mean_class_accuracy(scores, labels): 12 | pred = np.argmax(scores, axis=1) 13 | cf = confusion_matrix(labels, pred).astype(float) 14 | 15 | cls_cnt = cf.sum(axis=1) 16 | cls_hit = np.diag(cf) 17 | 18 | return np.mean(cls_hit/cls_cnt) 19 | 20 | 21 | def top_k_acc(score, lb_set, k=3): 22 | idx = np.argsort(score)[-k:] 23 | return len(lb_set.intersection(idx)), len(lb_set) 24 | 25 | 26 | def top_k_hit(score, lb_set, k=3): 27 | idx = np.argsort(score)[-k:] 28 | return len(lb_set.intersection(idx)) > 0, 1 29 | 30 | 31 | def top_k_accuracy(scores, labels, k=(1,)): 32 | res = [] 33 | for kk in k: 34 | hits = [] 35 | for x, y in zip(scores, labels): 36 | y = [y] if isinstance(y, int) else y 37 | hits.append(top_k_hit(x, set(y), k=kk)[0]) 38 | res.append(np.mean(hits)) 39 | return res 40 | -------------------------------------------------------------------------------- /mmaction/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | Args: 7 | bboxes1(ndarray): shape (n, 4) 8 | bboxes2(ndarray): shape (k, 4) 9 | mode(str): iou (intersection over union) or iof (intersection 10 | over foreground) 11 | Returns: 12 | ious(ndarray): shape (n, k) 13 | """ 14 | 15 | assert mode in ['iou', 'iof'] 16 | 17 | bboxes1 = bboxes1.astype(np.float32) 18 | bboxes2 = bboxes2.astype(np.float32) 19 | rows = bboxes1.shape[0] 20 | cols = bboxes2.shape[0] 21 | ious = np.zeros((rows, cols), dtype=np.float32) 22 | if rows * cols == 0: 23 | return ious 24 | exchange = False 25 | if bboxes1.shape[0] > bboxes2.shape[0]: 26 | bboxes1, bboxes2 = bboxes2, bboxes1 27 | ious = np.zeros((cols, rows), dtype=np.float32) 28 | exchange = True 29 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 30 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 31 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 32 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 39 | y_end - y_start + 1, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | ious[i, :] = overlap / union 45 | if exchange: 46 | ious = ious.T 47 | return ious 48 | -------------------------------------------------------------------------------- /mmaction/core/evaluation/class_names.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def ava_classes(): 5 | return [ 6 | 'bend/bow (at the waist)', 'crawl', 'crouch/kneel', 'dance', 7 | 'fall down', 'get up', 'jump/leap', 'lie/sleep', 'martial art', 8 | 'run/jog', 'sit', 'stand', 'swim', 'walk', 'answer phone', 9 | 'brush teeth', 'carry/hold (an object)', 'catch (an object)', 'chop', 10 | 'climb (e.g., a mountain)', 11 | 'clink glass', 'close (e.g., a door, a box)', 'cook', 'cut', 'dig', 12 | 'dress/put on clothing', 'drink', 'driving (e.g., a car, a truck)', 13 | 'eat', 'enter', 'exit', 'extract', 'fishing', 'hit (an object)', 14 | 'kick (an object)', 'lift/pick up', 'listen (e.g., to music)', 15 | 'open (e.g., a window, a car door)', 'paint', 'play board game', 16 | 'play musical instrument', 'play with pets', 'point to (an object)', 17 | 'press', 'pull (an object)', 'push (an object)', 'put down', 'read', 18 | 'ride (e.g., a bike, a car, a horse)', 'row boat', 'sail boat', 19 | 'shoot', 'shovel', 'smoke', 'stir', 'take a photo', 20 | 'text on/look at a cellphone', 'throw', 'touch (an object)', 21 | ' (e.g., a screwdriver)', 'watch (e.g., TV)', 'work on a computer', 22 | 'write', 'fight/hit (a person)', 23 | 'give/serve (an object) to (a person)', 24 | 'grab (a person)', 'hand clap', 'hand shake', 'hand wave', 25 | 'hug (a person)', 26 | 'kick (a person)', 'kiss (a person)', 'lift (a person)', 27 | 'listen to (a person)', 'play with kids', 'push (another person)', 28 | 'sing to (e.g., self, a person, a group)', 29 | 'take (an object) from (a person)', 30 | 'talk to (e.g., self, a person, a group)', 'watch (a person)' 31 | ] 32 | 33 | 34 | dataset_aliases = { 35 | 'ava': ['ava', 'ava2.1', 'ava2.2'], 36 | } 37 | 38 | 39 | def get_classes(dataset): 40 | """Get class names of a dataset.""" 41 | alias2name = {} 42 | for name, aliases in dataset_aliases.items(): 43 | for alias in aliases: 44 | alias2name[alias] = name 45 | 46 | if mmcv.is_str(dataset): 47 | if dataset in alias2name: 48 | labels = eval(alias2name[dataset] + '_classes()') 49 | else: 50 | raise ValueError('Unrecognized dataset: {}'.format(dataset)) 51 | else: 52 | raise TypeError('dataset must a str, but got {}'.format(type(dataset))) 53 | return labels 54 | -------------------------------------------------------------------------------- /mmaction/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms, singleclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'singleclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores' 8 | ] 9 | -------------------------------------------------------------------------------- /mmaction/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmaction.ops import nms 6 | from ..bbox2d import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | -------------------------------------------------------------------------------- /mmaction/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | 3 | __all__ = [ 4 | 'allreduce_grads', 'DistOptimizerHook', 5 | ] -------------------------------------------------------------------------------- /mmaction/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmaction/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .rawframes_dataset import RawFramesDataset 2 | from .lmdbframes_dataset import LMDBFramesDataset 3 | from .video_dataset import VideoDataset 4 | from .ssn_dataset import SSNDataset 5 | from .ava_dataset import AVADataset 6 | from .utils import get_untrimmed_dataset, get_trimmed_dataset 7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 8 | 9 | __all__ = [ 10 | 'RawFramesDataset', 'LMDBFramesDataset', 11 | 'VideoDataset', 'SSNDataset', 'AVADataset', 12 | 'get_trimmed_dataset', 'get_untrimmed_dataset', 13 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 14 | ] 15 | -------------------------------------------------------------------------------- /mmaction/datasets/feature_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/datasets/feature_dataset.py -------------------------------------------------------------------------------- /mmaction/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmaction/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | shuffle = kwargs.get('shuffle', True) 22 | if dist: 23 | rank, world_size = get_dist_info() 24 | if shuffle: 25 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, rank) 26 | else: 27 | sampler = DistributedSampler(dataset, world_size, rank, shuffle=False) 28 | batch_size = imgs_per_gpu 29 | num_workers = workers_per_gpu 30 | else: 31 | if not kwargs.get('shuffle', True): 32 | sampler = None 33 | else: 34 | sampler = GroupSampler(dataset, imgs_per_gpu) 35 | batch_size = num_gpus * imgs_per_gpu 36 | num_workers = num_gpus * workers_per_gpu 37 | 38 | data_loader = DataLoader( 39 | dataset, 40 | batch_size=batch_size, 41 | sampler=sampler, 42 | num_workers=num_workers, 43 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 44 | pin_memory=False, 45 | **kwargs) 46 | 47 | return data_loader 48 | -------------------------------------------------------------------------------- /mmaction/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .flow_losses import charbonnier_loss, SSIM_loss 2 | from .losses import ( 3 | weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, 4 | weighted_smoothl1, accuracy, 5 | weighted_multilabel_binary_cross_entropy, 6 | multilabel_accuracy) 7 | from .ssn_losses import (OHEMHingeLoss, completeness_loss, 8 | classwise_regression_loss) 9 | 10 | __all__ = [ 11 | 'charbonnier_loss', 'SSIM_loss', 12 | 'weighted_nll_loss', 'weighted_cross_entropy', 13 | 'weighted_binary_cross_entropy', 14 | 'weighted_smoothl1', 'accuracy', 15 | 'weighted_multilabel_binary_cross_entropy', 16 | 'multilabel_accuracy', 17 | 'OHEMHingeLoss', 'completeness_loss', 18 | 'classwise_regression_loss' 19 | ] 20 | -------------------------------------------------------------------------------- /mmaction/losses/flow_losses.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | def charbonnier_loss(difference, mask, alpha=1, beta=1., epsilon=0.001): 7 | ''' 8 | : sum( (x*beta)^2 + epsilon^2)^alpha 9 | ''' 10 | if mask is not None: 11 | assert difference.size(0) == mask.size(0) 12 | assert difference.size(2) == mask.size(2) 13 | assert difference.size(3) == mask.size(3) 14 | res = torch.pow(torch.pow(difference * beta, 2) + epsilon ** 2, alpha) 15 | if mask is not None: 16 | batch_pixels = torch.sum(mask) 17 | return torch.sum(res * mask) / batch_pixels 18 | else: 19 | batch_pixels = torch.numel(res) 20 | return torch.sum(res) / batch_pixels 21 | 22 | 23 | def SSIM_loss(img1, img2, kernel_size=8, stride=8, c1=0.00001, c2=0.00001): 24 | num = img1.size(0) 25 | channels = img1.size(1) 26 | 27 | kernel_h = kernel_w = kernel_size 28 | sigma = (kernel_w + kernel_h) / 12. 29 | gauss_kernel = torch.zeros((1, 1, kernel_h, kernel_w)).type(img1.type()) 30 | for h in range(kernel_h): 31 | for w in range(kernel_w): 32 | gauss_kernel[0, 0, h, w] = math.exp( 33 | -(math.pow(h - kernel_h/2.0, 2) + math.pow(- kernel_w/2.0, 2)) 34 | / (2.0 * sigma ** 2)) / (2 * 3.14159 * sigma ** 2) 35 | gauss_kernel = gauss_kernel / torch.sum(gauss_kernel) 36 | gauss_kernel = gauss_kernel.repeat(channels, 1, 1, 1) 37 | 38 | gauss_filter = nn.Conv2d(channels, channels, kernel_size, 39 | stride=stride, padding=0, 40 | groups=channels, bias=False) 41 | gauss_filter.weight.data = gauss_kernel 42 | gauss_filter.weight.requires_grad = False 43 | 44 | ux = gauss_filter(img1) 45 | uy = gauss_filter(img2) 46 | sx2 = gauss_filter(img1 ** 2) 47 | sy2 = gauss_filter(img2 ** 2) 48 | sxy = gauss_filter(img1 * img2) 49 | 50 | ux2 = ux ** 2 51 | uy2 = uy ** 2 52 | sx2 = sx2 - ux2 53 | sy2 = sy2 - uy2 54 | sxy = sxy - ux * uy 55 | 56 | lp = (2 * ux * uy + c1) / (ux2 + uy2 + c1) 57 | sc = (2 * sxy + c2) / (sx2 + sy2 + c2) 58 | 59 | ssim = lp * sc 60 | return (lp.numel() - torch.sum(ssim)) / num 61 | -------------------------------------------------------------------------------- /mmaction/losses/ssn_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | class OHEMHingeLoss(torch.autograd.Function): 6 | """ 7 | This class is the core implementation for the completeness loss in paper. 8 | It compute class-wise hinge loss and performs online hard negative mining 9 | (OHEM). 10 | """ 11 | 12 | @staticmethod 13 | def forward(ctx, pred, labels, is_positive, ohem_ratio, group_size): 14 | n_sample = pred.size()[0] 15 | assert n_sample == len( 16 | labels), "mismatch between sample size and label size" 17 | losses = torch.zeros(n_sample) 18 | slopes = torch.zeros(n_sample) 19 | for i in range(n_sample): 20 | losses[i] = max(0, 1 - is_positive * pred[i, labels[i] - 1]) 21 | slopes[i] = -is_positive if losses[i] != 0 else 0 22 | 23 | losses = losses.view(-1, group_size).contiguous() 24 | sorted_losses, indices = torch.sort(losses, dim=1, descending=True) 25 | keep_num = int(group_size * ohem_ratio) 26 | loss = torch.zeros(1).cuda() 27 | for i in range(losses.size(0)): 28 | loss += sorted_losses[i, :keep_num].sum() 29 | ctx.loss_ind = indices[:, :keep_num] 30 | ctx.labels = labels 31 | ctx.slopes = slopes 32 | ctx.shape = pred.size() 33 | ctx.group_size = group_size 34 | ctx.num_group = losses.size(0) 35 | return loss 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | labels = ctx.labels 40 | slopes = ctx.slopes 41 | 42 | grad_in = torch.zeros(ctx.shape) 43 | for group in range(ctx.num_group): 44 | for idx in ctx.loss_ind[group]: 45 | loc = idx + group * ctx.group_size 46 | grad_in[loc, labels[loc] - 1] = slopes[loc] * \ 47 | grad_output.data[0] 48 | return torch.autograd.Variable(grad_in.cuda()), None, None, None, None 49 | 50 | 51 | def completeness_loss(pred, labels, sample_split, 52 | sample_group_size, ohem_ratio=0.17): 53 | pred_dim = pred.size()[1] 54 | pred = pred.view(-1, sample_group_size, pred_dim) 55 | labels = labels.view(-1, sample_group_size) 56 | 57 | pos_group_size = sample_split 58 | neg_group_size = sample_group_size - sample_split 59 | pos_prob = pred[:, :sample_split, :].contiguous().view(-1, pred_dim) 60 | neg_prob = pred[:, sample_split:, :].contiguous().view(-1, pred_dim) 61 | pos_ls = OHEMHingeLoss.apply(pos_prob, 62 | labels[:, :sample_split].contiguous( 63 | ).view(-1), 1, 64 | 1.0, pos_group_size) 65 | neg_ls = OHEMHingeLoss.apply(neg_prob, 66 | labels[:, sample_split:].contiguous( 67 | ).view(-1), -1, 68 | ohem_ratio, neg_group_size) 69 | pos_cnt = pos_prob.size(0) 70 | neg_cnt = int(neg_prob.size()[0] * ohem_ratio) 71 | 72 | return pos_ls / float(pos_cnt + neg_cnt) + \ 73 | neg_ls / float(pos_cnt + neg_cnt) 74 | 75 | 76 | def classwise_regression_loss(pred, labels, targets): 77 | indexer = labels.data - 1 78 | prep = pred[:, indexer, :] 79 | class_pred = torch.cat((torch.diag(prep[:, :, 0]).view(-1, 1), 80 | torch.diag(prep[:, :, 1]).view(-1, 1)), 81 | dim=1) 82 | loss = F.smooth_l1_loss(class_pred.view(-1), targets.view(-1)) * 2 83 | return loss 84 | -------------------------------------------------------------------------------- /mmaction/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .tenons.backbones import * 2 | from .tenons.spatial_temporal_modules import * 3 | from .tenons.segmental_consensuses import * 4 | from .tenons.cls_heads import * 5 | from .recognizers import * 6 | from .tenons.necks import * 7 | from .tenons.roi_extractors import * 8 | from .tenons.anchor_heads import * 9 | from .tenons.shared_heads import * 10 | from .tenons.bbox_heads import * 11 | from .detectors import * 12 | from .localizers import * 13 | 14 | 15 | from .registry import (BACKBONES, SPATIAL_TEMPORAL_MODULES, SEGMENTAL_CONSENSUSES, HEADS, 16 | RECOGNIZERS, LOCALIZERS, DETECTORS, ARCHITECTURES, 17 | NECKS, ROI_EXTRACTORS) 18 | from .builder import (build_backbone, build_spatial_temporal_module, build_segmental_consensus, 19 | build_head, build_recognizer, build_detector, 20 | build_localizer, build_architecture, 21 | build_neck, build_roi_extractor) 22 | 23 | __all__ = [ 24 | 'BACKBONES', 'SPATIAL_TEMPORAL_MODULES', 'SEGMENTAL_CONSENSUSES', 'HEADS', 25 | 'RECOGNIZERS', 'LOCALIZERS', 'DETECTORS', 'ARCHITECTURES', 26 | 'NECKS', 'ROI_EXTRACTORS', 27 | 'build_backbone', 'build_spatial_temporal_module', 'build_segmental_consensus', 28 | 'build_head', 'build_recognizer', 'build_detector', 29 | 'build_localizer', 'build_architecture', 30 | 'build_neck', 'build_roi_extractor' 31 | ] 32 | -------------------------------------------------------------------------------- /mmaction/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import (BACKBONES, FLOWNETS, SPATIAL_TEMPORAL_MODULES, 5 | SEGMENTAL_CONSENSUSES, HEADS, 6 | RECOGNIZERS, DETECTORS, LOCALIZERS, ARCHITECTURES, 7 | NECKS, ROI_EXTRACTORS) 8 | 9 | 10 | def _build_module(cfg, registry, default_args): 11 | assert isinstance(cfg, dict) and 'type' in cfg 12 | assert isinstance(default_args, dict) or default_args is None 13 | args = cfg.copy() 14 | obj_type = args.pop('type') 15 | if mmcv.is_str(obj_type): 16 | if obj_type not in registry.module_dict: 17 | raise KeyError('{} is not in the {} registry'.format( 18 | obj_type, registry.name)) 19 | obj_type = registry.module_dict[obj_type] 20 | elif not isinstance(obj_type, type): 21 | raise TypeError('type must be a str or valid type, but got {}'.format( 22 | type(obj_type))) 23 | if default_args is not None: 24 | for name, value in default_args.items(): 25 | args.setdefault(name, value) 26 | return obj_type(**args) 27 | 28 | 29 | def build(cfg, registry, default_args=None): 30 | if isinstance(cfg, list): 31 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 32 | return nn.Sequential(*modules) 33 | else: 34 | return _build_module(cfg, registry, default_args) 35 | 36 | 37 | def build_backbone(cfg): 38 | return build(cfg, BACKBONES) 39 | 40 | 41 | def build_flownet(cfg): 42 | return build(cfg, FLOWNETS) 43 | 44 | 45 | def build_spatial_temporal_module(cfg): 46 | return build(cfg, SPATIAL_TEMPORAL_MODULES) 47 | 48 | 49 | def build_segmental_consensus(cfg): 50 | return build(cfg, SEGMENTAL_CONSENSUSES) 51 | 52 | 53 | def build_head(cfg): 54 | return build(cfg, HEADS) 55 | 56 | 57 | def build_recognizer(cfg, train_cfg=None, test_cfg=None): 58 | return build(cfg, RECOGNIZERS, 59 | dict(train_cfg=train_cfg, test_cfg=test_cfg)) 60 | 61 | 62 | def build_localizer(cfg, train_cfg=None, test_cfg=None): 63 | return build(cfg, LOCALIZERS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 64 | 65 | 66 | def build_detector(cfg, train_cfg=None, test_cfg=None): 67 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 68 | 69 | 70 | def build_architecture(cfg, train_cfg=None, test_cfg=None): 71 | return build(cfg, ARCHITECTURES, 72 | dict(train_cfg=train_cfg, test_cfg=test_cfg)) 73 | 74 | 75 | def build_neck(cfg): 76 | return build(cfg, NECKS) 77 | 78 | 79 | def build_roi_extractor(cfg): 80 | return build(cfg, ROI_EXTRACTORS) 81 | -------------------------------------------------------------------------------- /mmaction/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .two_stage import TwoStageDetector 3 | from .fast_rcnn import FastRCNN 4 | from .faster_rcnn import FasterRCNN 5 | 6 | __all__ = [ 7 | 'BaseDetector', 'TwoStageDetector', 8 | 'FastRCNN', 'FasterRCNN', 9 | ] 10 | -------------------------------------------------------------------------------- /mmaction/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | dropout_ratio=0, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FastRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | bbox_roi_extractor=bbox_roi_extractor, 23 | dropout_ratio=dropout_ratio, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmaction/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | dropout_ratio=0, 16 | neck=None, 17 | shared_head=None, 18 | pretrained=None): 19 | super(FasterRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | rpn_head=rpn_head, 24 | bbox_roi_extractor=bbox_roi_extractor, 25 | dropout_ratio=dropout_ratio, 26 | bbox_head=bbox_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmaction/models/localizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseLocalizer 2 | from .SSN2D import SSN2D 3 | 4 | __all__ = [ 5 | 'BaseLocalizer', 'SSN2D' 6 | ] 7 | -------------------------------------------------------------------------------- /mmaction/models/localizers/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import torch.nn as nn 5 | 6 | class BaseLocalizer(nn.Module): 7 | """Base class for localizers""" 8 | 9 | __metaclass__ = ABCMeta 10 | 11 | def __init__(self): 12 | super(BaseLocalizer, self).__init__() 13 | 14 | @abstractmethod 15 | def forward_train(self, num_modalities, **kwargs): 16 | pass 17 | 18 | @abstractmethod 19 | def forward_test(self, num_modalities, **kwargs): 20 | pass 21 | 22 | def init_weights(self, pretrained=None): 23 | if pretrained is not None: 24 | logger = logging.getLogger() 25 | logger.info("load model from: {}".format(pretrained)) 26 | 27 | def forward(self, num_modalities, img_meta, return_loss=True, **kwargs): 28 | num_modalities = int(num_modalities[0]) 29 | if return_loss: 30 | return self.forward_train(num_modalities, img_meta, **kwargs) 31 | else: 32 | return self.forward_test(num_modalities, img_meta, **kwargs) 33 | -------------------------------------------------------------------------------- /mmaction/models/recognizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseRecognizer 2 | from .TSN2D import TSN2D 3 | from .TSN3D import TSN3D 4 | 5 | __all__ = [ 6 | 'BaseRecognizer', 'TSN2D', 'TSN3D' 7 | ] -------------------------------------------------------------------------------- /mmaction/models/recognizers/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import torch.nn as nn 5 | 6 | 7 | class BaseRecognizer(nn.Module): 8 | """Base class for recognizers""" 9 | 10 | __metaclass__ = ABCMeta 11 | 12 | def __init__(self): 13 | super(BaseRecognizer, self).__init__() 14 | 15 | @property 16 | def with_tenon_list(self): 17 | return hasattr(self, 'tenon_list') and self.tenon_list is not None 18 | 19 | @property 20 | def with_cls(self): 21 | return hasattr(self, 'cls_head') and self.cls_head is not None 22 | 23 | @abstractmethod 24 | def forward_train(self, num_modalities, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def forward_test(self, num_modalities, **kwargs): 29 | pass 30 | 31 | def init_weights(self, pretrained=None): 32 | if pretrained is not None: 33 | logger = logging.getLogger() 34 | logger.info("load model from: {}".format(pretrained)) 35 | 36 | def forward(self, num_modalities, img_meta, return_loss=True, **kwargs): 37 | num_modalities = int(num_modalities[0]) 38 | if return_loss: 39 | return self.forward_train(num_modalities, img_meta, **kwargs) 40 | else: 41 | return self.forward_test(num_modalities, img_meta, **kwargs) 42 | -------------------------------------------------------------------------------- /mmaction/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | FLOWNETS = Registry('flownet') 41 | SPATIAL_TEMPORAL_MODULES = Registry('spatial_temporal_module') 42 | SEGMENTAL_CONSENSUSES = Registry('segmental_consensus') 43 | HEADS = Registry('head') 44 | RECOGNIZERS = Registry('recognizer') 45 | LOCALIZERS = Registry('localizer') 46 | DETECTORS = Registry('detector') 47 | ARCHITECTURES = Registry('architecture') 48 | NECKS = Registry('neck') 49 | ROI_EXTRACTORS = Registry('roi_extractor') 50 | -------------------------------------------------------------------------------- /mmaction/models/tenons/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | 4 | __all__ = ['AnchorHead', 'RPNHead'] -------------------------------------------------------------------------------- /mmaction/models/tenons/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .bninception import BNInception 2 | from .resnet import ResNet 3 | 4 | from .inception_v1_i3d import InceptionV1_I3D 5 | from .resnet_i3d import ResNet_I3D 6 | from .resnet_s3d import ResNet_S3D 7 | from .resnet_i3d_slowfast import ResNet_I3D_SlowFast 8 | from .resnet_r3d import ResNet_R3D 9 | from .c3d import C3D 10 | 11 | __all__ = [ 12 | 'BNInception', 13 | 'ResNet', 14 | 'InceptionV1_I3D', 15 | 'ResNet_I3D', 16 | 'ResNet_S3D', 17 | 'ResNet_I3D_SlowFast', 18 | 'ResNet_R3D', 19 | 'C3D' 20 | ] 21 | -------------------------------------------------------------------------------- /mmaction/models/tenons/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | 3 | __all__ = [ 4 | 'BBoxHead' 5 | ] -------------------------------------------------------------------------------- /mmaction/models/tenons/cls_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .cls_head import ClsHead 2 | from .ssn_head import SSNHead 3 | 4 | __all__ = [ 5 | 'ClsHead', 'SSNHead' 6 | ] 7 | -------------------------------------------------------------------------------- /mmaction/models/tenons/cls_heads/cls_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ...registry import HEADS 5 | 6 | @HEADS.register_module 7 | class ClsHead(nn.Module): 8 | """Simplest classification head""" 9 | 10 | def __init__(self, 11 | with_avg_pool=True, 12 | temporal_feature_size=1, 13 | spatial_feature_size=7, 14 | dropout_ratio=0.8, 15 | in_channels=2048, 16 | num_classes=101, 17 | init_std=0.01, 18 | fcn_testing=False): 19 | 20 | super(ClsHead, self).__init__() 21 | 22 | self.with_avg_pool = with_avg_pool 23 | self.dropout_ratio = dropout_ratio 24 | self.in_channels = in_channels 25 | self.dropout_ratio = dropout_ratio 26 | self.temporal_feature_size = temporal_feature_size 27 | self.spatial_feature_size = spatial_feature_size 28 | self.init_std = init_std 29 | self.fcn_testing = fcn_testing 30 | self.num_classes = num_classes 31 | 32 | if self.dropout_ratio != 0: 33 | self.dropout = nn.Dropout(p=self.dropout_ratio) 34 | else: 35 | self.dropout = None 36 | if self.with_avg_pool: 37 | self.avg_pool = nn.AvgPool3d((temporal_feature_size, spatial_feature_size, spatial_feature_size)) 38 | 39 | self.fc_cls = nn.Linear(in_channels, num_classes) 40 | self.new_cls = None 41 | 42 | def init_weights(self): 43 | nn.init.normal_(self.fc_cls.weight, 0, self.init_std) 44 | nn.init.constant_(self.fc_cls.bias, 0) 45 | 46 | def forward(self, x): 47 | if not self.fcn_testing: 48 | if x.ndimension() == 4: 49 | x = x.unsqueeze(2) 50 | assert x.shape[1] == self.in_channels 51 | assert x.shape[2] == self.temporal_feature_size 52 | assert x.shape[3] == self.spatial_feature_size 53 | assert x.shape[4] == self.spatial_feature_size 54 | if self.with_avg_pool: 55 | x = self.avg_pool(x) 56 | if self.dropout is not None: 57 | x = self.dropout(x) 58 | x = x.view(x.size(0), -1) 59 | 60 | cls_score = self.fc_cls(x) 61 | return cls_score 62 | else: 63 | if x.ndimension() == 4: 64 | x = x.unsqueeze(2) 65 | if self.with_avg_pool: 66 | x = self.avg_pool(x) 67 | if self.new_cls is None: 68 | self.new_cls = nn.Conv3d(self.in_channels, self.num_classes, 1,1,0).cuda() 69 | self.new_cls.load_state_dict({'weight': self.fc_cls.weight.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1), 70 | 'bias': self.fc_cls.bias}) 71 | class_map = self.new_cls(x) 72 | return class_map 73 | 74 | def loss(self, 75 | cls_score, 76 | labels): 77 | losses = dict() 78 | losses['loss_cls'] = F.cross_entropy(cls_score, labels) 79 | 80 | return losses 81 | -------------------------------------------------------------------------------- /mmaction/models/tenons/flownets/__init__.py: -------------------------------------------------------------------------------- 1 | from .motionnet import MotionNet 2 | 3 | __all__ = [ 4 | "MotionNet", 5 | ] 6 | -------------------------------------------------------------------------------- /mmaction/models/tenons/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | 3 | __all__ = ['FPN'] -------------------------------------------------------------------------------- /mmaction/models/tenons/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | from .single_level_straight3d import SingleRoIStraight3DExtractor 3 | 4 | __all__ = [ 5 | 'SingleRoIExtractor', 'SingleRoIStraight3DExtractor' 6 | ] -------------------------------------------------------------------------------- /mmaction/models/tenons/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from mmaction import ops 5 | 6 | from ...registry import ROI_EXTRACTORS 7 | 8 | @ROI_EXTRACTORS.register_module 9 | class SingleRoIExtractor(nn.Module): 10 | """Extract RoI features from a single level feature map. 11 | 12 | If there are multiple input feature levels, each RoI is mapped to a level 13 | according to its scale. 14 | 15 | Args: 16 | roi_layer (dict): Specify RoI layer type and arguments. 17 | out_channels (int): Output channels of RoI layers. 18 | featmap_strides (int): Strides of input feature maps. 19 | finest_scale (int): Scale threshold of mapping to level 0. 20 | """ 21 | 22 | def __init__(self, 23 | roi_layer, 24 | out_channels, 25 | featmap_strides, 26 | finest_scale=56): 27 | super(SingleRoIExtractor, self).__init__() 28 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 29 | self.out_channels = out_channels 30 | self.featmap_strides = featmap_strides 31 | self.finest_scale = finest_scale 32 | 33 | @property 34 | def num_inputs(self): 35 | """int: Input feature map levels.""" 36 | return len(self.featmap_strides) 37 | 38 | def init_weights(self): 39 | pass 40 | 41 | def build_roi_layers(self, layer_cfg, featmap_strides): 42 | cfg = layer_cfg.copy() 43 | layer_type = cfg.pop('type') 44 | assert hasattr(ops, layer_type) 45 | layer_cls = getattr(ops, layer_type) 46 | roi_layers = nn.ModuleList( 47 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 48 | return roi_layers 49 | 50 | def map_roi_levels(self, rois, num_levels): 51 | """Map rois to corresponding feature levels by scales. 52 | 53 | - scale < finest_scale: level 0 54 | - finest_scale <= scale < finest_scale * 2: level 1 55 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 56 | - scale >= finest_scale * 4: level 3 57 | 58 | Args: 59 | rois (Tensor): Input RoIs, shape (k, 5). 60 | num_levels (int): Total level number. 61 | 62 | Returns: 63 | Tensor: Level index (0-based) of each RoI, shape (k, ) 64 | """ 65 | scale = torch.sqrt( 66 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 67 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 68 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 69 | return target_lvls 70 | 71 | def forward(self, feats, rois): 72 | if len(feats) == 1: 73 | return self.roi_layers[0](feats[0], rois) 74 | 75 | out_size = self.roi_layers[0].out_size 76 | num_levels = len(feats) 77 | target_lvls = self.map_roi_levels(rois, num_levels) 78 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 79 | out_size, out_size).fill_(0) 80 | for i in range(num_levels): 81 | inds = target_lvls == i 82 | if inds.any(): 83 | rois_ = rois[inds, :] 84 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 85 | roi_feats[inds] += roi_feats_t 86 | return roi_feats 87 | -------------------------------------------------------------------------------- /mmaction/models/tenons/segmental_consensuses/TODO.md: -------------------------------------------------------------------------------- 1 | ### TODO 2 | 3 | [x] SimpleConsensus 4 | 5 | [ ] STPP 6 | 7 | [ ] TRN 8 | -------------------------------------------------------------------------------- /mmaction/models/tenons/segmental_consensuses/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_consensus import SimpleConsensus 2 | from .stpp import parse_stage_config 3 | from .stpp import StructuredTemporalPyramidPooling 4 | 5 | __all__ = [ 6 | 'SimpleConsensus', 7 | 'StructuredTemporalPyramidPooling', 8 | 'parse_stage_config' 9 | ] 10 | -------------------------------------------------------------------------------- /mmaction/models/tenons/segmental_consensuses/simple_consensus.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from ...registry import SEGMENTAL_CONSENSUSES 4 | 5 | @SEGMENTAL_CONSENSUSES.register_module 6 | class SimpleConsensus(nn.Module): 7 | def __init__(self, consensus_type, dim=1): 8 | super(SimpleConsensus, self).__init__() 9 | assert consensus_type in ['avg'] 10 | self.consensus_type = consensus_type 11 | self.dim = dim 12 | 13 | def init_weights(self): 14 | pass 15 | 16 | def forward(self, input): 17 | if self.consensus_type == 'avg': 18 | output = input.mean(dim=self.dim, keepdim=True) 19 | else: 20 | return None 21 | return output 22 | -------------------------------------------------------------------------------- /mmaction/models/tenons/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | from .res_i3d_layer import ResI3DLayer 3 | 4 | __all__ = [ 5 | 'ResLayer', 'ResI3DLayer' 6 | ] -------------------------------------------------------------------------------- /mmaction/models/tenons/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from ..backbones import ResNet 8 | from ..backbones.resnet import make_res_layer 9 | from ...registry import HEADS 10 | from ..spatial_temporal_modules.non_local import NonLocalModule 11 | 12 | 13 | @HEADS.register_module 14 | class ResLayer(nn.Module): 15 | 16 | def __init__(self, 17 | depth, 18 | pretrained=None, 19 | stage=3, 20 | stride=2, 21 | dilation=1, 22 | style='pytorch', 23 | bn_eval=True, 24 | bn_frozen=True, 25 | all_frozen=False, 26 | with_cp=False): 27 | super(ResLayer, self).__init__() 28 | self.bn_eval = bn_eval 29 | self.bn_frozen = bn_frozen 30 | self.all_frozen = all_frozen 31 | self.stage = stage 32 | block, stage_blocks = ResNet.arch_settings[depth] 33 | self.pretrained = pretrained 34 | stage_block = stage_blocks[stage] 35 | planes = 64 * 2**stage 36 | inplanes = 64 * 2**(stage - 1) * block.expansion 37 | 38 | res_layer = make_res_layer( 39 | block, 40 | inplanes, 41 | planes, 42 | stage_block, 43 | stride=stride, 44 | dilation=dilation, 45 | style=style, 46 | with_cp=with_cp) 47 | self.add_module('layer{}'.format(stage + 1), res_layer) 48 | 49 | def init_weights(self): 50 | if isinstance(self.pretrained, str): 51 | logger = logging.getLogger() 52 | load_checkpoint(self, self.pretrained, strict=False, logger=logger) 53 | elif self.pretrained is None: 54 | for m in self.modules(): 55 | if isinstance(m, nn.Conv2d): 56 | kaiming_init(m) 57 | elif isinstance(m, nn.BatchNorm2d): 58 | constant_init(m, 1) 59 | else: 60 | raise TypeError('pretrained must be a str or None') 61 | 62 | def forward(self, x): 63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 64 | out = res_layer(x) 65 | return out 66 | 67 | def train(self, mode=True): 68 | super(ResLayer, self).train(mode) 69 | if self.bn_eval: 70 | for m in self.modules(): 71 | if isinstance(m, nn.BatchNorm2d): 72 | m.eval() 73 | if self.bn_frozen: 74 | for params in m.parameters(): 75 | params.requires_grad = False 76 | if self.bn_frozen: 77 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 78 | for m in res_layer: 79 | if isinstance(m, nn.BatchNorm2d): 80 | m.eval() 81 | m.weight.requires_grad = False 82 | m.bias.requires_grad = False 83 | if self.all_frozen: 84 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 85 | res_layer.eval() 86 | for param in mod.parameters(): 87 | param.requires_grad = False 88 | -------------------------------------------------------------------------------- /mmaction/models/tenons/spatial_temporal_modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_spatial_module import SimpleSpatialModule 2 | from .simple_spatial_temporal_module import SimpleSpatialTemporalModule 3 | from .slowfast_spatial_temporal_module import SlowFastSpatialTemporalModule 4 | 5 | __all__ = [ 6 | 'SimpleSpatialModule', 7 | 'SimpleSpatialTemporalModule', 8 | 'SlowFastSpatialTemporalModule' 9 | ] 10 | -------------------------------------------------------------------------------- /mmaction/models/tenons/spatial_temporal_modules/simple_spatial_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ...registry import SPATIAL_TEMPORAL_MODULES 5 | 6 | 7 | @SPATIAL_TEMPORAL_MODULES.register_module 8 | class SimpleSpatialModule(nn.Module): 9 | def __init__(self, spatial_type='avg', spatial_size=7): 10 | super(SimpleSpatialModule, self).__init__() 11 | 12 | assert spatial_type in ['avg'] 13 | self.spatial_type = spatial_type 14 | 15 | self.spatial_size = spatial_size if not isinstance(spatial_size, int) else (spatial_size, spatial_size) 16 | 17 | if self.spatial_type == 'avg': 18 | self.op = nn.AvgPool2d(self.spatial_size, stride=1, padding=0) 19 | 20 | 21 | def init_weights(self): 22 | pass 23 | 24 | def forward(self, input): 25 | return self.op(input) -------------------------------------------------------------------------------- /mmaction/models/tenons/spatial_temporal_modules/simple_spatial_temporal_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ...registry import SPATIAL_TEMPORAL_MODULES 5 | 6 | 7 | @SPATIAL_TEMPORAL_MODULES.register_module 8 | class SimpleSpatialTemporalModule(nn.Module): 9 | def __init__(self, spatial_type='avg', spatial_size=7, temporal_size=1): 10 | super(SimpleSpatialTemporalModule, self).__init__() 11 | 12 | assert spatial_type in ['identity', 'avg', 'max'] 13 | self.spatial_type = spatial_type 14 | 15 | self.spatial_size = spatial_size 16 | if spatial_size != -1: 17 | self.spatial_size = (spatial_size, spatial_size) 18 | 19 | self.temporal_size = temporal_size 20 | 21 | assert not (self.spatial_size == -1) ^ (self.temporal_size == -1) 22 | 23 | if self.temporal_size == -1 and self.spatial_size == -1: 24 | self.pool_size = (1, 1, 1) 25 | if self.spatial_type == 'avg': 26 | self.pool_func = nn.AdaptiveAvgPool3d(self.pool_size) 27 | if self.spatial_type == 'max': 28 | self.pool_func = nn.AdaptiveMaxPool3d(self.pool_size) 29 | else: 30 | self.pool_size = (self.temporal_size, ) + self.spatial_size 31 | if self.spatial_type == 'avg': 32 | self.pool_func = nn.AvgPool3d(self.pool_size, stride=1, padding=0) 33 | if self.spatial_type == 'max': 34 | self.pool_func = nn.MaxPool3d(self.pool_size, stride=1, padding=0) 35 | 36 | 37 | def init_weights(self): 38 | pass 39 | 40 | def forward(self, input): 41 | if self.spatial_type == 'identity': 42 | return input 43 | else: 44 | return self.pool_func(input) 45 | -------------------------------------------------------------------------------- /mmaction/models/tenons/spatial_temporal_modules/slowfast_spatial_temporal_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ...registry import SPATIAL_TEMPORAL_MODULES 5 | 6 | 7 | @SPATIAL_TEMPORAL_MODULES.register_module 8 | class SlowFastSpatialTemporalModule(nn.Module): 9 | def __init__(self, adaptive_pool=True, spatial_type='avg', spatial_size=1, temporal_size=1): 10 | super(SlowFastSpatialTemporalModule, self).__init__() 11 | 12 | self.adaptive_pool = adaptive_pool 13 | assert spatial_type in ['avg'] 14 | self.spatial_type = spatial_type 15 | 16 | self.spatial_size = spatial_size if not isinstance(spatial_size, int) else (spatial_size, spatial_size) 17 | self.temporal_size = temporal_size 18 | self.pool_size = (self.temporal_size, ) + self.spatial_size 19 | 20 | if self.adaptive_pool: 21 | if self.spatial_type == 'avg': 22 | self.op = nn.AdaptiveAvgPool3d(self.pool_size) 23 | else: 24 | raise NotImplementedError 25 | 26 | 27 | def init_weights(self): 28 | pass 29 | 30 | def forward(self, input): 31 | x_slow, x_fast = input 32 | x_slow = self.op(x_slow) 33 | x_fast = self.op(x_fast) 34 | return torch.cat((x_slow, x_fast), dim=1) 35 | -------------------------------------------------------------------------------- /mmaction/models/tenons/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule 2 | from .norm import build_norm_layer 3 | 4 | __all__ = [ 5 | 'ConvModule', 'build_norm_layer' 6 | ] -------------------------------------------------------------------------------- /mmaction/models/tenons/utils/conv_module.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import kaiming_init, constant_init 5 | 6 | from .norm import build_norm_layer 7 | 8 | 9 | class ConvModule(nn.Module): 10 | 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | kernel_size, 15 | stride=1, 16 | padding=0, 17 | dilation=1, 18 | groups=1, 19 | bias=True, 20 | normalize=None, 21 | activation='relu', 22 | inplace=True, 23 | activate_last=True): 24 | super(ConvModule, self).__init__() 25 | self.with_norm = normalize is not None 26 | self.with_activatation = activation is not None 27 | self.with_bias = bias 28 | self.activation = activation 29 | self.activate_last = activate_last 30 | 31 | if self.with_norm and self.with_bias: 32 | warnings.warn('ConvModule has norm and bias at the same time') 33 | 34 | self.conv = nn.Conv2d( 35 | in_channels, 36 | out_channels, 37 | kernel_size, 38 | stride, 39 | padding, 40 | dilation, 41 | groups, 42 | bias=bias) 43 | 44 | self.in_channels = self.conv.in_channels 45 | self.out_channels = self.conv.out_channels 46 | self.kernel_size = self.conv.kernel_size 47 | self.stride = self.conv.stride 48 | self.padding = self.conv.padding 49 | self.dilation = self.conv.dilation 50 | self.transposed = self.conv.transposed 51 | self.output_padding = self.conv.output_padding 52 | self.groups = self.conv.groups 53 | 54 | if self.with_norm: 55 | norm_channels = out_channels if self.activate_last else in_channels 56 | self.norm_name, norm = build_norm_layer(normalize, norm_channels) 57 | self.add_module(self.norm_name, norm) 58 | 59 | if self.with_activatation: 60 | assert activation in ['relu'], 'Only ReLU supported.' 61 | if self.activation == 'relu': 62 | self.activate = nn.ReLU(inplace=inplace) 63 | 64 | # Default using msra init 65 | self.init_weights() 66 | 67 | @property 68 | def norm(self): 69 | return getattr(self, self.norm_name) 70 | 71 | def init_weights(self): 72 | nonlinearity = 'relu' if self.activation is None else self.activation 73 | kaiming_init(self.conv, nonlinearity=nonlinearity) 74 | if self.with_norm: 75 | constant_init(self.norm, 1, bias=0) 76 | 77 | def forward(self, x, activate=True, norm=True): 78 | if self.activate_last: 79 | x = self.conv(x) 80 | if norm and self.with_norm: 81 | x = self.norm(x) 82 | if activate and self.with_activatation: 83 | x = self.activate(x) 84 | else: 85 | if norm and self.with_norm: 86 | x = self.norm(x) 87 | if activate and self.with_activatation: 88 | x = self.activate(x) 89 | x = self.conv(x) 90 | return x 91 | -------------------------------------------------------------------------------- /mmaction/models/tenons/utils/nonlocal_block.py: -------------------------------------------------------------------------------- 1 | from ..spatial_temporal_modules.non_local import NonLocalModule 2 | 3 | 4 | def build_nonlocal_block(cfg): 5 | """ Build nonlocal block 6 | 7 | Args: 8 | """ 9 | assert isinstance(cfg, dict) 10 | cfg_ = cfg.copy() 11 | return NonLocalModule(**cfg_) 12 | -------------------------------------------------------------------------------- /mmaction/models/tenons/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', None), 8 | 'GN': ('gn', nn.GroupNorm), 9 | # and potentially 'SN' 10 | } 11 | 12 | 13 | def build_norm_layer(cfg, num_features, postfix=''): 14 | """ Build normalization layer 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | frozen (bool): [optional] whether stop gradient updates 20 | of norm layer, it is helpful to set frozen mode 21 | in backbone's norms. 22 | num_features (int): number of channels from input 23 | postfix (int, str): appended into norm abbreation to 24 | create named layer. 25 | Returns: 26 | name (str): abbreation + postfix 27 | layer (nn.Module): created norm layer 28 | """ 29 | assert isinstance(cfg, dict) and 'type' in cfg 30 | cfg_ = cfg.copy() 31 | 32 | layer_type = cfg_.pop('type') 33 | if layer_type not in norm_cfg: 34 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 35 | else: 36 | abbr, norm_layer = norm_cfg[layer_type] 37 | if norm_layer is None: 38 | raise NotImplementedError 39 | 40 | assert isinstance(postfix, (int, str)) 41 | name = abbr + str(postfix) 42 | 43 | frozen = cfg_.pop('frozen', False) 44 | cfg_.setdefault('eps', 1e-5) 45 | if layer_type != 'GN': 46 | layer = norm_layer(num_features, **cfg_) 47 | else: 48 | assert 'num_groups' in cfg_ 49 | layer = norm_layer(num_channels=num_features, **cfg_) 50 | 51 | if frozen: 52 | for param in layer.parameters(): 53 | param.requires_grad = False 54 | 55 | return name, layer -------------------------------------------------------------------------------- /mmaction/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms import nms, soft_nms 2 | from .roi_align import RoIAlign, roi_align 3 | from .roi_pool import RoIPool, roi_pool 4 | 5 | __all__ = [ 6 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool' 7 | ] 8 | -------------------------------------------------------------------------------- /mmaction/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmaction/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmaction/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmaction/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmaction/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmaction/ops/resample2d_package/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mmaction/ops/resample2d_package/resample2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.autograd import Function, Variable 3 | import resample2d_cuda 4 | 5 | 6 | class Resample2dFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, input1, input2, kernel_size=1): 10 | assert input1.is_contiguous() 11 | assert input2.is_contiguous() 12 | 13 | ctx.save_for_backward(input1, input2) 14 | ctx.kernel_size = kernel_size 15 | 16 | _, d, _, _ = input1.size() 17 | b, _, h, w = input2.size() 18 | output = input1.new(b, d, h, w).zero_() 19 | 20 | resample2d_cuda.forward(input1, input2, output, kernel_size) 21 | 22 | return output 23 | 24 | @staticmethod 25 | def backward(ctx, grad_output): 26 | grad_output = grad_output.contiguous() 27 | assert grad_output.is_contiguous() 28 | 29 | input1, input2 = ctx.saved_tensors 30 | 31 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 32 | grad_input2 = Variable(input1.new(input2.size()).zero_()) 33 | 34 | resample2d_cuda.backward(input1, input2, grad_output.data, 35 | grad_input1.data, grad_input2.data, 36 | ctx.kernel_size) 37 | 38 | return grad_input1, grad_input2, None 39 | 40 | 41 | class Resample2d(Module): 42 | 43 | def __init__(self, kernel_size=1): 44 | super(Resample2d, self).__init__() 45 | self.kernel_size = kernel_size 46 | 47 | def forward(self, input1, input2): 48 | input1_c = input1.contiguous() 49 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size) 50 | -------------------------------------------------------------------------------- /mmaction/ops/resample2d_package/resample2d_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "resample2d_kernel.cuh" 5 | 6 | int resample2d_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& input2, 9 | at::Tensor& output, 10 | int kernel_size) { 11 | resample2d_kernel_forward(input1, input2, output, kernel_size); 12 | return 1; 13 | } 14 | 15 | int resample2d_cuda_backward( 16 | at::Tensor& input1, 17 | at::Tensor& input2, 18 | at::Tensor& gradOutput, 19 | at::Tensor& gradInput1, 20 | at::Tensor& gradInput2, 21 | int kernel_size) { 22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size); 23 | return 1; 24 | } 25 | 26 | 27 | 28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); 30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /mmaction/ops/resample2d_package/resample2d_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void resample2d_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& input2, 8 | at::Tensor& output, 9 | int kernel_size); 10 | 11 | void resample2d_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& input2, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | at::Tensor& gradInput2, 17 | int kernel_size); 18 | -------------------------------------------------------------------------------- /mmaction/ops/resample2d_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++14'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70' 17 | ] 18 | 19 | setup( 20 | name='resample2d_cuda', 21 | ext_modules=[ 22 | CUDAExtension('resample2d_cuda', [ 23 | 'resample2d_cuda.cc', 24 | 'resample2d_kernel.cu' 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmaction/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmaction/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmaction/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmaction/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmaction/ops/trajectory_conv_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/trajectory_conv_package/__init__.py -------------------------------------------------------------------------------- /mmaction/ops/trajectory_conv_package/deform_3d_conv_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | void deformable_im2col(const at::Tensor data_im, 2 | const at::Tensor data_offset, const int channels, 3 | const int time, const int height, const int width, 4 | const int ksize_t, const int ksize_h, const int ksize_w, 5 | const int pad_t, const int pad_h, const int pad_w, 6 | const int stride_t, const int stride_h, const int stride_w, 7 | const int dilation_t, const int dilation_h, const int dilation_w, 8 | const int parallel_imgs, 9 | const int deformable_groups, 10 | at::Tensor data_col); 11 | 12 | void deformable_col2im(const at::Tensor data_col, 13 | const at::Tensor data_offset, const int channels, 14 | const int time, const int height, const int width, 15 | const int ksize_t, const int ksize_h, const int ksize_w, 16 | const int pad_t, const int pad_h, const int pad_w, 17 | const int stride_t, const int stride_h, const int stride_w, 18 | const int dilation_t, const int dilation_h, const int dilation_w, 19 | const int parallel_imgs, 20 | const int deformable_groups, 21 | at::Tensor grad_im); 22 | 23 | void deformable_col2im_coord(const at::Tensor data_col, 24 | const at::Tensor data_im, const at::Tensor data_offset, const int channels, 25 | const int time, const int height, const int width, 26 | const int ksize_t, const int ksize_h, const int ksize_w, 27 | const int pad_t, const int pad_h, const int pad_w, 28 | const int stride_t, const int stride_h, const int stride_w, 29 | const int dilation_t, const int dilation_h, const int dilation_w, 30 | const int parallel_imgs, 31 | const int deformable_groups, 32 | at::Tensor grad_offset); 33 | 34 | -------------------------------------------------------------------------------- /mmaction/ops/trajectory_conv_package/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import gradcheck 5 | 6 | from traj_conv import TrajConv 7 | 8 | num_deformable_groups = 2 9 | 10 | N, inC, inT, inH, inW = 2, 8, 8, 4, 4 11 | outC, outT, outH, outW = 4, 8, 4, 4 12 | kT, kH, kW = 3, 3, 3 13 | 14 | conv = nn.Conv3d(inC, num_deformable_groups * 3 * kT * kH * kW, 15 | kernel_size=(kT, kH, kW), 16 | stride=(1,1,1), 17 | padding=(1,1,1), 18 | bias=False) 19 | 20 | conv_offset3d = TrajConv(inC, outC, (kT, kH, kW), 21 | stride=(1,1,1), padding=(1,1,1), 22 | num_deformable_groups=num_deformable_groups).double().cuda() 23 | 24 | input = torch.randn(N, inC, inT, inH, inW, requires_grad=True).double().cuda() 25 | offset = torch.rand(N, num_deformable_groups * 2 * kT * kH * kW, inT, inH, inW, requires_grad=True) * 1 - 0.5 26 | offset = offset.double().cuda() 27 | test = gradcheck(conv_offset3d, (input, offset), eps=1e-5, atol=1e-1, rtol=1e-5) 28 | print(test) 29 | -------------------------------------------------------------------------------- /mmaction/ops/trajectory_conv_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup, find_packages 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++14'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70', 17 | ] 18 | 19 | setup( 20 | name='traj_conv_cuda', 21 | ext_modules=[ 22 | CUDAExtension('traj_conv_cuda', [ 23 | 'traj_conv_cuda.cpp', 24 | 'deform_3d_conv_cuda_kernel.cu', 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /mmaction/utils/misc.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def rsetattr(obj, attr, val): 7 | ''' 8 | See: 9 | https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects 10 | ''' 11 | pre, _, post = attr.rpartition('.') 12 | return setattr(rgetattr(obj, pre) if pre else obj, post, val) 13 | 14 | 15 | def rgetattr(obj, attr, *args): 16 | def _getattr(obj, attr): 17 | return getattr(obj, attr, *args) 18 | return functools.reduce(_getattr, [obj] + attr.split('.')) 19 | 20 | 21 | def rhasattr(obj, attr, *args): 22 | def _hasattr(obj, attr): 23 | if hasattr(obj, attr): 24 | return getattr(obj, attr) 25 | else: 26 | return None 27 | return functools.reduce(_hasattr, [obj] + attr.split('.')) is not None 28 | 29 | 30 | def tensor2video_snaps(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 31 | num_videos = tensor.size(0) 32 | num_frames = tensor.size(2) 33 | mean = np.array(mean, dtype=np.float32) 34 | std = np.array(std, dtype=np.float32) 35 | video_snaps = [] 36 | for vid_id in range(num_videos): 37 | img = tensor[vid_id, :, num_frames // 38 | 2, ...].cpu().numpy().transpose(1, 2, 0) 39 | img = mmcv.imdenormalize( 40 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 41 | video_snaps.append(np.ascontiguousarray(img)) 42 | return video_snaps 43 | 44 | 45 | def multi_apply(func, *args, **kwargs): 46 | pfunc = functools.partial(func, **kwargs) if kwargs else func 47 | map_results = map(pfunc, *args) 48 | return tuple(map(list, zip(*map_results))) 49 | -------------------------------------------------------------------------------- /modelzoo/.placeholder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 1 15 | PATCH = 'rc0' 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmaction/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmaction.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmaction', 87 | version=get_version(), 88 | description='Open MMLab Action Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, action recognition', 91 | url='https://github.com/open-mmlab/mmaction', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmaction.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='Apache License 2.0', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv', 'numpy', 'scipy', 'scikit-learn', 'terminaltables', 'lmdb', 'joblib' 110 | ], 111 | zip_safe=False) 112 | -------------------------------------------------------------------------------- /test_configs/CSN/ipcsn_kinetics400_se_rgb_r152_seg1_32x2.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_3D', 6 | pretrained=None, 7 | depth=152, 8 | use_pool1=True, 9 | block_type='0.3d'), 10 | spatial_temporal_module=dict( 11 | type='SimpleSpatialTemporalModule', 12 | spatial_type='avg', 13 | temporal_size=-1, 14 | spatial_size=-1), 15 | segmental_consensus=dict( 16 | type='SimpleConsensus', 17 | consensus_type='avg'), 18 | cls_head=dict( 19 | type='ClsHead', 20 | with_avg_pool=False, 21 | temporal_feature_size=1, 22 | spatial_feature_size=1, 23 | dropout_ratio=0.5, 24 | in_channels=2048, 25 | num_classes=400)) 26 | 27 | train_cfg = None 28 | test_cfg = None 29 | # dataset settings 30 | dataset_type = 'RawFramesDataset' 31 | data_root_val = 'data/kinetics400/rawframes_val/' 32 | img_norm_cfg = dict( 33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 34 | 35 | data = dict( 36 | test=dict( 37 | type=dataset_type, 38 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt', 39 | img_prefix=data_root_val, 40 | img_norm_cfg=img_norm_cfg, 41 | input_format="NCTHW", 42 | num_segments=10, 43 | new_length=32, 44 | new_step=2, 45 | random_shift=True, 46 | modality='RGB', 47 | image_tmpl='img_{:05d}.jpg', 48 | img_scale=256, 49 | input_size=256, 50 | div_255=False, 51 | flip_ratio=0, 52 | resize_keep_ratio=True, 53 | oversample='three_crop', 54 | random_crop=False, 55 | more_fix_crop=False, 56 | multiscale_crop=False, 57 | test_mode=True)) 58 | 59 | dist_params = dict(backend='nccl', port=16187) 60 | -------------------------------------------------------------------------------- /test_configs/CSN/ircsn_kinetics400_se_rgb_r152_seg1_32x2.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_3D', 6 | pretrained=None, 7 | depth=152, 8 | use_pool1=True, 9 | block_type='3d-sep'), 10 | spatial_temporal_module=dict( 11 | type='SimpleSpatialTemporalModule', 12 | spatial_type='avg', 13 | temporal_size=-1, 14 | spatial_size=-1), 15 | segmental_consensus=dict( 16 | type='SimpleConsensus', 17 | consensus_type='avg'), 18 | cls_head=dict( 19 | type='ClsHead', 20 | with_avg_pool=False, 21 | temporal_feature_size=1, 22 | spatial_feature_size=1, 23 | dropout_ratio=0.5, 24 | in_channels=2048, 25 | num_classes=400)) 26 | 27 | train_cfg = None 28 | test_cfg = None 29 | # dataset settings 30 | dataset_type = 'RawFramesDataset' 31 | data_root_val = 'data/kinetics400/rawframes_val/' 32 | img_norm_cfg = dict( 33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 34 | 35 | data = dict( 36 | test=dict( 37 | type=dataset_type, 38 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt', 39 | img_prefix=data_root_val, 40 | img_norm_cfg=img_norm_cfg, 41 | input_format="NCTHW", 42 | num_segments=10, 43 | new_length=32, 44 | new_step=2, 45 | random_shift=True, 46 | modality='RGB', 47 | image_tmpl='img_{:05d}.jpg', 48 | img_scale=256, 49 | input_size=256, 50 | div_255=False, 51 | flip_ratio=0, 52 | resize_keep_ratio=True, 53 | oversample='three_crop', 54 | random_crop=False, 55 | more_fix_crop=False, 56 | multiscale_crop=False, 57 | test_mode=True)) 58 | 59 | dist_params = dict(backend='nccl', port=16187) 60 | -------------------------------------------------------------------------------- /test_configs/I3D_Flow/i3d_hmdb51_3d_tvl1_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='Flow'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=51)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/hmdb51/rawframes/' 29 | img_norm_cfg = dict( 30 | mean=[128, 128], std=[128, 128]) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='Flow', 43 | image_tmpl='{}_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/I3D_Flow/i3d_kinetics400_3d_tvl1_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='Flow'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=400)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/kinetics400/rawframes_val/' 29 | img_norm_cfg = dict( 30 | mean=[128, 128], std=[128, 128]) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='Flow', 43 | image_tmpl='{}_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/I3D_Flow/i3d_ucf101_3d_tvl1_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='Flow'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=101)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/ucf101/rawframes/' 29 | img_norm_cfg = dict( 30 | mean=[128, 128], std=[128, 128]) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/ucf101/ucf101_train_split_1_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='Flow', 43 | image_tmpl='{}_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/I3D_RGB/i3d_hmdb51_3d_rgb_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='RGB'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=51)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/hmdb51/rawframes/' 29 | img_norm_cfg = dict( 30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='RGB', 43 | image_tmpl='img_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/I3D_RGB/i3d_kinetics400_3d_rgb_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='RGB'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=400)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/kinetics400/rawframes_val/' 29 | img_norm_cfg = dict( 30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='RGB', 43 | image_tmpl='img_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/I3D_RGB/i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_I3D', 6 | pretrained=None, 7 | depth=50, 8 | num_stages=4, 9 | out_indices=[3], 10 | frozen_stages=-1, 11 | inflate_freq=((1,1,1), (1,0,1,0), (1,0,1,0,1,0), (0,1,0)), 12 | inflate_style='3x1x1', 13 | conv1_kernel_t=5, 14 | conv1_stride_t=2, 15 | pool1_kernel_t=1, 16 | pool1_stride_t=2, 17 | bn_eval=False, 18 | partial_bn=False, 19 | style='pytorch'), 20 | spatial_temporal_module=dict( 21 | type='SimpleSpatialTemporalModule', 22 | spatial_type='avg', 23 | temporal_size=4, 24 | spatial_size=7), 25 | segmental_consensus=dict( 26 | type='SimpleConsensus', 27 | consensus_type='avg'), 28 | cls_head=dict( 29 | type='ClsHead', 30 | with_avg_pool=False, 31 | temporal_feature_size=1, 32 | spatial_feature_size=1, 33 | dropout_ratio=0.5, 34 | in_channels=2048, 35 | num_classes=400)) 36 | train_cfg = None 37 | test_cfg = None 38 | # dataset settings 39 | dataset_type = 'RawFramesDataset' 40 | data_root_val = 'data/kinetics400/rawframes_val/' 41 | img_norm_cfg = dict( 42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 43 | data = dict( 44 | test=dict( 45 | type=dataset_type, 46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 47 | img_prefix=data_root_val, 48 | img_norm_cfg=img_norm_cfg, 49 | input_format="NCTHW", 50 | num_segments=10, 51 | new_length=32, 52 | new_step=2, 53 | random_shift=True, 54 | modality='RGB', 55 | image_tmpl='img_{:05d}.jpg', 56 | img_scale=256, 57 | input_size=256, 58 | div_255=False, 59 | flip_ratio=0, 60 | resize_keep_ratio=True, 61 | oversample='three_crop', 62 | random_crop=False, 63 | more_fix_crop=False, 64 | multiscale_crop=False, 65 | test_mode=True)) 66 | 67 | dist_params = dict(backend='nccl') 68 | -------------------------------------------------------------------------------- /test_configs/I3D_RGB/i3d_ucf101_3d_rgb_inception_v1_seg1_f64s1.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='InceptionV1_I3D', 6 | pretrained=None, 7 | modality='RGB'), 8 | spatial_temporal_module=dict( 9 | type='SimpleSpatialTemporalModule', 10 | spatial_type='avg', 11 | temporal_size=-1, 12 | spatial_size=-1), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.5, 22 | in_channels=2048, 23 | num_classes=101)) 24 | train_cfg = None 25 | test_cfg = None 26 | # dataset settings 27 | dataset_type = 'RawFramesDataset' 28 | data_root_val = 'data/ucf101/rawframes/' 29 | img_norm_cfg = dict( 30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 31 | data = dict( 32 | test=dict( 33 | type=dataset_type, 34 | ann_file='data/ucf101/ucf101_train_split_1_rawframes.txt', 35 | img_prefix=data_root_val, 36 | img_norm_cfg=img_norm_cfg, 37 | input_format="NCTHW", 38 | num_segments=10, 39 | new_length=64, 40 | new_step=1, 41 | random_shift=True, 42 | modality='RGB', 43 | image_tmpl='img_{:05d}.jpg', 44 | img_scale=256, 45 | input_size=256, 46 | div_255=False, 47 | flip_ratio=0, 48 | resize_keep_ratio=True, 49 | oversample='three_crop', 50 | random_crop=False, 51 | more_fix_crop=False, 52 | multiscale_crop=False, 53 | test_mode=True)) 54 | 55 | dist_params = dict(backend='nccl') 56 | -------------------------------------------------------------------------------- /test_configs/R2plus1D/r2plus1d_kinetics400_se_rgb_r34_seg1_32x2.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_3D', 6 | pretrained=None, 7 | depth=34, 8 | use_pool1=True, 9 | block_type='2.5d', 10 | use_syncbn=True), 11 | spatial_temporal_module=dict( 12 | type='SimpleSpatialTemporalModule', 13 | spatial_type='avg', 14 | temporal_size=-1, 15 | spatial_size=-1), 16 | segmental_consensus=dict( 17 | type='SimpleConsensus', 18 | consensus_type='avg'), 19 | cls_head=dict( 20 | type='ClsHead', 21 | with_avg_pool=False, 22 | temporal_feature_size=1, 23 | spatial_feature_size=1, 24 | dropout_ratio=0.5, 25 | in_channels=512, 26 | num_classes=400)) 27 | train_cfg = None 28 | test_cfg = None 29 | # dataset settings 30 | dataset_type = 'RawFramesDataset' 31 | data_root_val = 'data/kinetics400/rawframes_val/' 32 | img_norm_cfg = dict( 33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 34 | data = dict( 35 | test=dict( 36 | type=dataset_type, 37 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt', 38 | img_prefix=data_root_val, 39 | img_norm_cfg=img_norm_cfg, 40 | input_format="NCTHW", 41 | num_segments=10, 42 | new_length=32, 43 | new_step=2, 44 | random_shift=True, 45 | modality='RGB', 46 | image_tmpl='img_{:05d}.jpg', 47 | img_scale=256, 48 | input_size=256, 49 | div_255=False, 50 | flip_ratio=0, 51 | resize_keep_ratio=True, 52 | oversample='three_crop', 53 | random_crop=False, 54 | more_fix_crop=False, 55 | multiscale_crop=False, 56 | test_mode=True)) 57 | 58 | dist_params = dict(backend='nccl', port=16187) 59 | -------------------------------------------------------------------------------- /test_configs/R2plus1D/r2plus1d_kinetics400_se_rgb_r34_seg1_8x8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_3D', 6 | pretrained=None, 7 | depth=34, 8 | use_pool1=True, 9 | block_type='2.5d', 10 | use_syncbn=True), 11 | spatial_temporal_module=dict( 12 | type='SimpleSpatialTemporalModule', 13 | spatial_type='avg', 14 | temporal_size=-1, 15 | spatial_size=-1), 16 | segmental_consensus=dict( 17 | type='SimpleConsensus', 18 | consensus_type='avg'), 19 | cls_head=dict( 20 | type='ClsHead', 21 | with_avg_pool=False, 22 | temporal_feature_size=1, 23 | spatial_feature_size=1, 24 | dropout_ratio=0.5, 25 | in_channels=512, 26 | num_classes=400)) 27 | train_cfg = None 28 | test_cfg = None 29 | # dataset settings 30 | dataset_type = 'RawFramesDataset' 31 | data_root_val = 'data/kinetics400/rawframes_val/' 32 | img_norm_cfg = dict( 33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 34 | data = dict( 35 | test=dict( 36 | type=dataset_type, 37 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt', 38 | img_prefix=data_root_val, 39 | img_norm_cfg=img_norm_cfg, 40 | input_format="NCTHW", 41 | num_segments=10, 42 | new_length=8, 43 | new_step=8, 44 | random_shift=True, 45 | modality='RGB', 46 | image_tmpl='img_{:05d}.jpg', 47 | img_scale=256, 48 | input_size=256, 49 | div_255=False, 50 | flip_ratio=0, 51 | resize_keep_ratio=True, 52 | oversample='three_crop', 53 | random_crop=False, 54 | more_fix_crop=False, 55 | multiscale_crop=False, 56 | test_mode=True)) 57 | 58 | dist_params = dict(backend='nccl', port=16187) 59 | -------------------------------------------------------------------------------- /test_configs/SlowFast/slowfast_kinetics400_se_rgb_r50_seg1_4x16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN3D', 4 | backbone=dict( 5 | type='ResNet_I3D_SlowFast', 6 | pretrained_slow=None, 7 | pretrained_fast=None, 8 | depth=50, 9 | alpha=8, 10 | beta_inv=8, 11 | num_stages=4, 12 | out_indices=[3], 13 | frozen_stages=-1, 14 | slow_inflate_freq=(0, 0, 1, 1), 15 | fast_inflate_freq=(1, 1, 1, 1), 16 | inflate_style='3x1x1', 17 | bn_eval=False, 18 | partial_bn=False, 19 | style='pytorch'), 20 | spatial_temporal_module=dict( 21 | type='SlowFastSpatialTemporalModule', 22 | adaptive_pool=True, 23 | spatial_type='avg', 24 | temporal_size=1, 25 | spatial_size=1), 26 | segmental_consensus=dict( 27 | type='SimpleConsensus', 28 | consensus_type='avg'), 29 | cls_head=dict( 30 | type='ClsHead', 31 | with_avg_pool=False, 32 | temporal_feature_size=1, 33 | spatial_feature_size=1, 34 | dropout_ratio=0.5, 35 | in_channels=2304, # 2048+256 36 | num_classes=400)) 37 | train_cfg = None 38 | test_cfg = None 39 | # dataset settings 40 | dataset_type = 'RawFramesDataset' 41 | data_root_val = 'data/kinetics400/rawframes_val/' 42 | img_norm_cfg = dict( 43 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 44 | 45 | data = dict( 46 | test=dict( 47 | type=dataset_type, 48 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 49 | img_prefix=data_root_val, 50 | img_norm_cfg=img_norm_cfg, 51 | input_format="NCTHW", 52 | num_segments=10, 53 | new_length=32, 54 | new_step=2, 55 | modality='RGB', 56 | image_tmpl='img_{:05d}.jpg', 57 | img_scale=256, 58 | input_size=256, 59 | div_255=False, 60 | flip_ratio=0, 61 | resize_keep_ratio=True, 62 | oversample='three_crop', 63 | random_crop=False, 64 | more_fix_crop=False, 65 | multiscale_crop=False, 66 | test_mode=True) 67 | ) 68 | 69 | dist_params = dict(backend='nccl', port=16187) 70 | -------------------------------------------------------------------------------- /test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r101_seg1_8x8.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='TSN3D', 3 | backbone=dict( 4 | type='ResNet_I3D', 5 | pretrained=None, 6 | depth=101, 7 | num_stages=4, 8 | out_indices=[3], 9 | frozen_stages=-1, 10 | inflate_freq=(0, 0, 1, 1), 11 | conv1_kernel_t=1, 12 | conv1_stride_t=1, 13 | pool1_kernel_t=1, 14 | pool1_stride_t=1, 15 | inflate_style='3x1x1', 16 | bn_eval=False, 17 | no_pool2=True, 18 | partial_bn=False, 19 | style='pytorch'), 20 | spatial_temporal_module=dict( 21 | type='SimpleSpatialTemporalModule', 22 | spatial_type='avg', 23 | temporal_size=-1, 24 | spatial_size=-1), 25 | segmental_consensus=dict( 26 | type='SimpleConsensus', 27 | consensus_type='avg'), 28 | cls_head=dict( 29 | type='ClsHead', 30 | with_avg_pool=False, 31 | temporal_feature_size=1, 32 | spatial_feature_size=1, 33 | dropout_ratio=0.5, 34 | in_channels=2048, 35 | num_classes=400)) 36 | train_cfg = None 37 | test_cfg = None 38 | # dataset settings 39 | dataset_type = 'RawFramesDataset' 40 | data_root_val = 'data/kinetics400/rawframes_val/' 41 | img_norm_cfg = dict( 42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 43 | data = dict( 44 | test=dict( 45 | type=dataset_type, 46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 47 | img_prefix=data_root_val, 48 | img_norm_cfg=img_norm_cfg, 49 | input_format="NCTHW", 50 | num_segments=10, 51 | new_length=8, 52 | new_step=8, 53 | random_shift=False, 54 | modality='RGB', 55 | image_tmpl='img_{:05d}.jpg', 56 | img_scale=256, 57 | input_size=256, 58 | div_255=False, 59 | flip_ratio=0, 60 | resize_keep_ratio=True, 61 | oversample='three_crop', 62 | random_crop=False, 63 | more_fix_crop=False, 64 | multiscale_crop=False, 65 | test_mode=True)) 66 | 67 | dist_params = dict(backend='nccl') 68 | -------------------------------------------------------------------------------- /test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r50_seg1_4x16.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='TSN3D', 3 | backbone=dict( 4 | type='ResNet_I3D', 5 | pretrained=None, 6 | depth=50, 7 | num_stages=4, 8 | out_indices=[3], 9 | frozen_stages=-1, 10 | inflate_freq=(0, 0, 1, 1), 11 | conv1_kernel_t=1, 12 | conv1_stride_t=1, 13 | pool1_kernel_t=1, 14 | pool1_stride_t=1, 15 | inflate_style='3x1x1', 16 | bn_eval=False, 17 | no_pool2=True, 18 | partial_bn=False, 19 | style='pytorch'), 20 | spatial_temporal_module=dict( 21 | type='SimpleSpatialTemporalModule', 22 | spatial_type='avg', 23 | temporal_size=-1, 24 | spatial_size=-1), 25 | segmental_consensus=dict( 26 | type='SimpleConsensus', 27 | consensus_type='avg'), 28 | cls_head=dict( 29 | type='ClsHead', 30 | with_avg_pool=False, 31 | temporal_feature_size=1, 32 | spatial_feature_size=1, 33 | dropout_ratio=0.5, 34 | in_channels=2048, 35 | num_classes=400)) 36 | train_cfg = None 37 | test_cfg = None 38 | # dataset settings 39 | dataset_type = 'RawFramesDataset' 40 | data_root_val = 'data/kinetics400/rawframes_val/' 41 | img_norm_cfg = dict( 42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 43 | data = dict( 44 | test=dict( 45 | type=dataset_type, 46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 47 | img_prefix=data_root_val, 48 | img_norm_cfg=img_norm_cfg, 49 | input_format="NCTHW", 50 | num_segments=10, 51 | new_length=4, 52 | new_step=16, 53 | random_shift=False, 54 | modality='RGB', 55 | image_tmpl='img_{:05d}.jpg', 56 | img_scale=256, 57 | input_size=256, 58 | div_255=False, 59 | flip_ratio=0, 60 | resize_keep_ratio=True, 61 | oversample='three_crop', 62 | random_crop=False, 63 | more_fix_crop=False, 64 | multiscale_crop=False, 65 | test_mode=True)) 66 | 67 | dist_params = dict(backend='nccl') 68 | -------------------------------------------------------------------------------- /test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r50_seg1_8x8.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='TSN3D', 3 | backbone=dict( 4 | type='ResNet_I3D', 5 | pretrained=None, 6 | depth=50, 7 | num_stages=4, 8 | out_indices=[3], 9 | frozen_stages=-1, 10 | inflate_freq=(0, 0, 1, 1), 11 | conv1_kernel_t=1, 12 | conv1_stride_t=1, 13 | pool1_kernel_t=1, 14 | pool1_stride_t=1, 15 | inflate_style='3x1x1', 16 | bn_eval=False, 17 | no_pool2=True, 18 | partial_bn=False, 19 | style='pytorch'), 20 | spatial_temporal_module=dict( 21 | type='SimpleSpatialTemporalModule', 22 | spatial_type='avg', 23 | temporal_size=-1, 24 | spatial_size=-1), 25 | segmental_consensus=dict( 26 | type='SimpleConsensus', 27 | consensus_type='avg'), 28 | cls_head=dict( 29 | type='ClsHead', 30 | with_avg_pool=False, 31 | temporal_feature_size=1, 32 | spatial_feature_size=1, 33 | dropout_ratio=0.5, 34 | in_channels=2048, 35 | num_classes=400)) 36 | train_cfg = None 37 | test_cfg = None 38 | # dataset settings 39 | dataset_type = 'RawFramesDataset' 40 | data_root_val = 'data/kinetics400/rawframes_val/' 41 | img_norm_cfg = dict( 42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 43 | data = dict( 44 | test=dict( 45 | type=dataset_type, 46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 47 | img_prefix=data_root_val, 48 | img_norm_cfg=img_norm_cfg, 49 | input_format="NCTHW", 50 | num_segments=10, 51 | new_length=8, 52 | new_step=8, 53 | random_shift=False, 54 | modality='RGB', 55 | image_tmpl='img_{:05d}.jpg', 56 | img_scale=256, 57 | input_size=256, 58 | div_255=False, 59 | flip_ratio=0, 60 | resize_keep_ratio=True, 61 | oversample='three_crop', 62 | random_crop=False, 63 | more_fix_crop=False, 64 | multiscale_crop=False, 65 | test_mode=True)) 66 | 67 | dist_params = dict(backend='nccl') 68 | -------------------------------------------------------------------------------- /test_configs/TSN/tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='TSN2D', 3 | backbone=dict( 4 | type='ResNet', 5 | pretrained=None, 6 | depth=50, 7 | out_indices=(3,), 8 | bn_eval=False, 9 | partial_bn=False), 10 | spatial_temporal_module=dict( 11 | type='SimpleSpatialModule', 12 | spatial_type='avg', 13 | spatial_size=7), 14 | segmental_consensus=dict( 15 | type='SimpleConsensus', 16 | consensus_type='avg'), 17 | cls_head=dict( 18 | type='ClsHead', 19 | with_avg_pool=False, 20 | temporal_feature_size=1, 21 | spatial_feature_size=1, 22 | dropout_ratio=0.4, 23 | in_channels=2048, 24 | num_classes=400)) 25 | train_cfg = None 26 | test_cfg = None 27 | # dataset settings 28 | dataset_type = 'RawFramesDataset' 29 | data_root_val = 'data/kinetics400/rawframes_val' 30 | img_norm_cfg = dict( 31 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 32 | 33 | data = dict( 34 | test=dict( 35 | type=dataset_type, 36 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt', 37 | img_prefix=data_root_val, 38 | img_norm_cfg=img_norm_cfg, 39 | num_segments=25, 40 | new_length=1, 41 | new_step=1, 42 | random_shift=False, 43 | modality='RGB', 44 | image_tmpl='img_{:05d}.jpg', 45 | img_scale=256, 46 | input_size=224, 47 | div_255=False, 48 | flip_ratio=0, 49 | resize_keep_ratio=True, 50 | oversample="ten_crop", 51 | random_crop=False, 52 | more_fix_crop=False, 53 | multiscale_crop=False, 54 | test_mode=True)) 55 | 56 | dist_params = dict(backend='nccl') 57 | -------------------------------------------------------------------------------- /test_configs/TSN/ucf101/tsn_flow_bninception.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN2D', 4 | modality='Flow', 5 | in_channels=10, 6 | backbone=dict( 7 | type='BNInception', 8 | pretrained=None, 9 | bn_eval=False, 10 | partial_bn=True), 11 | spatial_temporal_module=dict( 12 | type='SimpleSpatialModule', 13 | spatial_type='avg', 14 | spatial_size=7), 15 | segmental_consensus=dict( 16 | type='SimpleConsensus', 17 | consensus_type='avg'), 18 | cls_head=dict( 19 | type='ClsHead', 20 | with_avg_pool=False, 21 | temporal_feature_size=1, 22 | spatial_feature_size=1, 23 | dropout_ratio=0.7, 24 | in_channels=1024, 25 | num_classes=101)) 26 | train_cfg = None 27 | test_cfg = None 28 | # dataset settings 29 | dataset_type = 'RawFramesDataset' 30 | data_root = 'data/ucf101/rawframes' 31 | img_norm_cfg = dict( 32 | mean=[128], std=[1], to_rgb=False) 33 | data = dict( 34 | test=dict( 35 | type=dataset_type, 36 | ann_file='data/ucf101/ucf101_val_split_1_rawframes.txt', 37 | img_prefix=data_root, 38 | img_norm_cfg=img_norm_cfg, 39 | num_segments=25, 40 | new_length=5, 41 | new_step=1, 42 | random_shift=False, 43 | modality='Flow', 44 | image_tmpl='flow_{}_{:05d}.jpg', 45 | img_scale=256, 46 | input_size=224, 47 | div_255=False, 48 | flip_ratio=0, 49 | resize_keep_ratio=True, 50 | oversample='ten_crop', 51 | random_crop=False, 52 | more_fix_crop=False, 53 | multiscale_crop=False, 54 | test_mode=True)) 55 | 56 | dist_params = dict(backend='nccl') 57 | -------------------------------------------------------------------------------- /test_configs/TSN/ucf101/tsn_rgb_bninception.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='TSN2D', 4 | backbone=dict( 5 | type='BNInception', 6 | pretrained=None, 7 | bn_eval=False, 8 | partial_bn=True), 9 | spatial_temporal_module=dict( 10 | type='SimpleSpatialModule', 11 | spatial_type='avg', 12 | spatial_size=7), 13 | segmental_consensus=dict( 14 | type='SimpleConsensus', 15 | consensus_type='avg'), 16 | cls_head=dict( 17 | type='ClsHead', 18 | with_avg_pool=False, 19 | temporal_feature_size=1, 20 | spatial_feature_size=1, 21 | dropout_ratio=0.8, 22 | in_channels=1024, 23 | init_std=0.001, 24 | num_classes=101)) 25 | train_cfg = None 26 | test_cfg = None 27 | # dataset settings 28 | dataset_type = 'RawFramesDataset' 29 | data_root = 'data/ucf101/rawframes' 30 | img_norm_cfg = dict( 31 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False) 32 | 33 | data = dict( 34 | test=dict( 35 | type=dataset_type, 36 | ann_file='data/ucf101/ucf101_val_split_1_rawframes.txt', 37 | img_prefix=data_root, 38 | img_norm_cfg=img_norm_cfg, 39 | num_segments=25, 40 | new_length=1, 41 | new_step=1, 42 | random_shift=False, 43 | modality='RGB', 44 | image_tmpl='img_{:05d}.jpg', 45 | img_scale=256, 46 | input_size=224, 47 | div_255=False, 48 | flip_ratio=0, 49 | resize_keep_ratio=True, 50 | oversample='ten_crop', 51 | random_crop=False, 52 | more_fix_crop=False, 53 | multiscale_crop=False, 54 | test_mode=True)) 55 | 56 | dist_params = dict(backend='nccl') 57 | -------------------------------------------------------------------------------- /test_configs/ava/ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | backbone=dict( 5 | type='ResNet_I3D', 6 | pretrained=None, 7 | pretrained2d=False, 8 | depth=50, 9 | num_stages=3, 10 | spatial_strides=(1, 2, 2), 11 | temporal_strides=(1, 1, 1), 12 | dilations=(1, 1, 1), 13 | out_indices=(2,), 14 | frozen_stages=-1, 15 | inflate_freq=((1,1,1), (1,0,1,0), (1,0,1,0,1,0)), 16 | inflate_style='3x1x1', 17 | nonlocal_stages=(1, 2), 18 | # nonlocal_freq=((0,0,0), (0,1,0,1), (0,1,0,1,0,1)), 19 | nonlocal_cfg=dict(nonlocal_type="gaussian"), 20 | nonlocal_freq=((0,0,0), (0,1,0,1), (0,1,0,1,0,1), (0,0,0)), 21 | conv1_kernel_t=5, 22 | conv1_stride_t=1, 23 | pool1_kernel_t=1, 24 | pool1_stride_t=1, 25 | bn_eval=False, 26 | partial_bn=False, 27 | bn_frozen=True, 28 | style='pytorch'), 29 | shared_head=dict( 30 | type='ResI3DLayer', 31 | pretrained=None, 32 | pretrained2d=False, 33 | depth=50, 34 | stage=3, 35 | spatial_stride=2, 36 | temporal_stride=1, 37 | dilation=1, 38 | style='pytorch', 39 | inflate_freq=(0, 1, 0), 40 | inflate_style='3x1x1', 41 | bn_eval=False, 42 | bn_frozen=True), 43 | bbox_roi_extractor=dict( 44 | type='SingleRoIStraight3DExtractor', 45 | roi_layer=dict(type='RoIAlign', out_size=16, sample_num=2), 46 | out_channels=1024, 47 | featmap_strides=[16], 48 | with_temporal_pool=True), 49 | dropout_ratio=0.3, 50 | bbox_head=dict( 51 | type='BBoxHead', 52 | with_reg=False, 53 | with_temporal_pool=False, 54 | with_spatial_pool=True, 55 | spatial_pool_type='max', 56 | roi_feat_size=(1, 8, 8), 57 | in_channels=2048, 58 | num_classes=81, 59 | target_means=[0., 0., 0., 0.], 60 | target_stds=[0.1, 0.1, 0.2, 0.2], 61 | multilabel_classification=True, 62 | reg_class_agnostic=True, 63 | nms_class_agnostic=True)) 64 | # model training and testing settings 65 | test_cfg = dict( 66 | train_detector=False, 67 | person_det_score_thr=0.85, 68 | rcnn=dict( 69 | score_thr=0.00, nms=dict(type='nms', iou_thr=1.0), max_per_img=100, 70 | action_thr=0.00)) 71 | # dataset settings 72 | dataset_type = 'AVADataset' 73 | data_root = 'data/ava/rawframes/' 74 | img_norm_cfg = dict( 75 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 76 | data = dict( 77 | test=dict( 78 | type=dataset_type, 79 | ann_file='data/ava/annotations/ava_val_v2.1.csv', 80 | exclude_file='data/ava/annotations/ava_val_excluded_timestamps_v2.1.csv', 81 | label_file='data/ava/annotations/ava_action_list_v2.1_for_activitynet_2018.pbtxt', 82 | video_stat_file='data/ava/ava_video_resolution_stats.csv', 83 | proposal_file='data/ava/ava_dense_proposals_val.FAIR.recall_93.9.pkl', 84 | img_prefix=data_root, 85 | img_norm_cfg=img_norm_cfg, 86 | input_format='NCTHW', 87 | new_length=32, 88 | new_step=2, 89 | random_shift=False, 90 | modality='RGB', 91 | image_tmpl='img_{:05d}.jpg', 92 | img_scale=[(800, 256), ], 93 | input_size=None, 94 | div_255=False, 95 | size_divisor=32, 96 | flip_ratio=0, 97 | resize_keep_ratio=True, 98 | with_label=False, 99 | test_mode=True)) 100 | 101 | dist_params = dict(backend='nccl') 102 | -------------------------------------------------------------------------------- /test_configs/thumos14/ssn_thumos14_rgb_bn_inception.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='SSN2D', 4 | backbone=dict( 5 | type='BNInception', 6 | pretrained=None, 7 | bn_eval=False, 8 | partial_bn=True), 9 | spatial_temporal_module=dict( 10 | type='SimpleSpatialModule', 11 | spatial_type='avg', 12 | spatial_size=7), 13 | dropout_ratio=0.8, 14 | segmental_consensus=dict( 15 | type='StructuredTemporalPyramidPooling', 16 | standalong_classifier=True, 17 | stpp_cfg=(1, 1, 1), 18 | num_seg=(2, 5, 2)), 19 | cls_head=dict( 20 | type='SSNHead', 21 | dropout_ratio=0., 22 | in_channels_activity=1024, 23 | in_channels_complete=3072, 24 | num_classes=20, 25 | with_bg=False, 26 | with_reg=True)) 27 | # model training and testing settings 28 | test_cfg=dict( 29 | ssn=dict( 30 | sampler=dict( 31 | test_interval=6, 32 | batch_size=16), 33 | evaluater=dict( 34 | top_k=2000, 35 | nms=0.2, 36 | softmax_before_filter=True, 37 | cls_score_dict=None, 38 | cls_top_k=2))) 39 | # dataset settings 40 | dataset_type = 'SSNDataset' 41 | data_root = './data/thumos14/rawframes/' 42 | img_norm_cfg = dict( 43 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False) 44 | data = dict( 45 | test=dict( 46 | type=dataset_type, 47 | ann_file='data/thumos14/thumos14_tag_test_normalized_proposal_list.txt', 48 | img_prefix=data_root, 49 | img_norm_cfg=img_norm_cfg, 50 | train_cfg=train_cfg, 51 | test_cfg=test_cfg, 52 | input_format='NCHW', 53 | aug_ratio=0.5, 54 | new_length=1, 55 | new_step=1, 56 | random_shift=False, 57 | modality='RGB', 58 | image_tmpl='img_{:05d}.jpg', 59 | img_scale=(340, 256), 60 | input_size=224, 61 | oversample=None, 62 | div_255=False, 63 | size_divisor=32, 64 | flip_ratio=0, 65 | resize_keep_ratio=True, 66 | test_mode=True)) 67 | 68 | dist_params = dict(backend='nccl') 69 | -------------------------------------------------------------------------------- /tools/dist_test_detector.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_detector.py $1 $2 --launcher pytorch --eval bbox ${@:4} 6 | 7 | -------------------------------------------------------------------------------- /tools/dist_test_recognizer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_recognizer.py $1 $2 --launcher pytorch ${@:4} 6 | -------------------------------------------------------------------------------- /tools/dist_test_recognizer_heavy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_recognizer_heavy.py $1 $2 --launcher pytorch ${@:4} 6 | -------------------------------------------------------------------------------- /tools/dist_train_detector.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_detector.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /tools/dist_train_localizer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_localizer.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /tools/dist_train_recognizer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_recognizer.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /tools/generate_lmdb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os.path as osp 4 | from mmcv.lmdb.io import create_rawimage_dataset 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='generate lmdb datasets from raw frames') 10 | parser.add_argument( 11 | 'root_dir', help='root directory to store the raw frames') 12 | parser.add_argument( 13 | 'target_dir', help='target directory to stored the generated lmdbs') 14 | parser.add_argument('--image_format', nargs='+', 15 | help='format of the images to be stored', 16 | default=['img*.jpg']) 17 | parser.add_argument('--lmdb_tmpl', type=str, 18 | help='template for the lmdb to be generated', 19 | default='{}_img_lmdb') 20 | parser.add_argument('--image_tmpl', type=str, 21 | help='template for the lmdb key', default=None) 22 | parser.add_argument('--modality', type=str, help='modality', 23 | choices=['RGB', 'Flow'], default='RGB') 24 | args = parser.parse_args() 25 | 26 | return args 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | video_path_list = glob.glob(osp.join(args.root_dir, '*')) 32 | for i, vv in enumerate(video_path_list): 33 | if not osp.isdir(vv): 34 | continue 35 | image_file_list = [] 36 | for image_format in args.image_format: 37 | image_file_list += glob.glob(osp.join(vv, image_format)) 38 | vid = vv.split('/')[-1] 39 | output_path = osp.join(args.target_dir, args.lmdb_tmpl.format(vid)) 40 | create_rawimage_dataset(output_path, image_file_list, 41 | image_tmpl=args.image_tmpl, 42 | flag='color' if args.modality == 'RGB' 43 | else 'grayscale', 44 | check_valid=True) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /tools/train_detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmaction import __version__ 7 | from mmaction.datasets import get_trimmed_dataset 8 | from mmaction.apis import (train_network, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmaction.models import build_detector 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train an action recognizer') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | if cfg.checkpoint_config is not None: 56 | # save mmaction version in checkpoints as meta data 57 | cfg.checkpoint_config.meta = dict( 58 | mmact_version=__version__, config=cfg.text) 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | # init logger before other steps 68 | logger = get_root_logger(cfg.log_level) 69 | logger.info('Distributed training: {}'.format(distributed)) 70 | 71 | # set random seeds 72 | if args.seed is not None: 73 | logger.info('Set random seed to {}'.format(args.seed)) 74 | set_random_seed(args.seed) 75 | 76 | model = build_detector( 77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 78 | 79 | train_dataset = get_trimmed_dataset(cfg.data.train) 80 | train_network( 81 | model, 82 | train_dataset, 83 | cfg, 84 | distributed=distributed, 85 | validate=args.validate, 86 | logger=logger) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /tools/train_localizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmaction import __version__ 7 | from mmaction.datasets import get_trimmed_dataset 8 | from mmaction.apis import (train_network, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmaction.models import build_localizer 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train an action localizer') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | if cfg.checkpoint_config is not None: 56 | # save mmaction version in checkpoints as meta data 57 | cfg.checkpoint_config.meta = dict( 58 | mmact_version=__version__, config=cfg.text) 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | # init logger before other steps 68 | logger = get_root_logger(cfg.log_level) 69 | logger.info('Distributed training: {}'.format(distributed)) 70 | 71 | # set random seeds 72 | if args.seed is not None: 73 | logger.info('Set random seed to {}'.format(args.seed)) 74 | set_random_seed(args.seed) 75 | 76 | model = build_localizer( 77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 78 | 79 | train_dataset = get_trimmed_dataset(cfg.data.train) 80 | train_network( 81 | model, 82 | train_dataset, 83 | cfg, 84 | distributed=distributed, 85 | validate=args.validate, 86 | logger=logger) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /tools/train_recognizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmaction import __version__ 7 | from mmaction.datasets import get_trimmed_dataset 8 | from mmaction.apis import (train_network, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmaction.models import build_recognizer 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train an action recognizer') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | if cfg.checkpoint_config is not None: 56 | # save mmaction version in checkpoints as meta data 57 | cfg.checkpoint_config.meta = dict( 58 | mmact_version=__version__, config=cfg.text) 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | # init logger before other steps 68 | logger = get_root_logger(cfg.log_level) 69 | logger.info('Distributed training: {}'.format(distributed)) 70 | 71 | # set random seeds 72 | if args.seed is not None: 73 | logger.info('Set random seed to {}'.format(args.seed)) 74 | set_random_seed(args.seed) 75 | 76 | model = build_recognizer( 77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 78 | 79 | train_dataset = get_trimmed_dataset(cfg.data.train) 80 | train_network( 81 | model, 82 | train_dataset, 83 | cfg, 84 | distributed=distributed, 85 | validate=args.validate, 86 | logger=logger) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | --------------------------------------------------------------------------------