├── ._CODE_OF_CONDUCT.md ├── ._CONTRIBUTING.md ├── ._README.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── code ├── dmcnet │ ├── ._train.py │ ├── combine.py │ ├── data_loader │ │ ├── coviar_data_loader.c │ │ ├── install.sh │ │ └── setup.py │ ├── dataset.py │ ├── exp │ ├── model.py │ ├── test.py │ ├── train.py │ ├── train_options.py │ ├── transforms.py │ └── utils.py ├── dmcnet_GAN │ ├── combine.py │ ├── data_loader │ │ ├── coviar_data_loader.c │ │ ├── install.sh │ │ └── setup.py │ ├── dataset.py │ ├── exp │ ├── model.py │ ├── test.py │ ├── train.py │ ├── train_options.py │ ├── transforms.py │ └── utils.py └── dmcnet_I3D │ ├── README.MD │ ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── image_transforms.cpython-36.pyc │ │ ├── iterator_factory.cpython-36.pyc │ │ ├── video_iterator.cpython-36.pyc │ │ ├── video_sampler.cpython-36.pyc │ │ └── video_transforms.cpython-36.pyc │ ├── image_iterator.py │ ├── image_transforms.py │ ├── iterator_factory.py │ ├── video_iterator.py │ ├── video_sampler.py │ └── video_transforms.py │ ├── dataset │ ├── HMDB51 │ │ ├── raw │ │ │ ├── data │ │ │ └── list_cvt │ │ │ │ ├── hmdb51_split1_others.txt │ │ │ │ ├── hmdb51_split1_test.txt │ │ │ │ ├── hmdb51_split1_test_info.txt │ │ │ │ ├── hmdb51_split1_test_info_step2.txt │ │ │ │ ├── hmdb51_split1_train.txt │ │ │ │ ├── hmdb51_split1_train_info.txt │ │ │ │ ├── hmdb51_split1_train_info_step2.txt │ │ │ │ ├── hmdb51_split2_others.txt │ │ │ │ ├── hmdb51_split2_test.txt │ │ │ │ ├── hmdb51_split2_test_info.txt │ │ │ │ ├── hmdb51_split2_train.txt │ │ │ │ ├── hmdb51_split2_train_info.txt │ │ │ │ ├── hmdb51_split3_others.txt │ │ │ │ ├── hmdb51_split3_test.txt │ │ │ │ ├── hmdb51_split3_test_info.txt │ │ │ │ ├── hmdb51_split3_train.txt │ │ │ │ ├── hmdb51_split3_train_info.txt │ │ │ │ └── mapping_table.txt │ │ └── scripts │ │ │ ├── README.md │ │ │ └── convert_videos.py │ ├── README.md │ ├── UCF101 │ │ └── raw │ │ │ ├── data │ │ │ └── list_cvt │ │ │ ├── hmdb51_split1_test_info.txt │ │ │ ├── hmdb51_split1_train_info.txt │ │ │ ├── testlist01.txt │ │ │ ├── testlist02.txt │ │ │ ├── testlist03.txt │ │ │ ├── trainlist01.txt │ │ │ ├── trainlist02.txt │ │ │ ├── trainlist03.txt │ │ │ ├── ucf101_split1_test_info.txt │ │ │ ├── ucf101_split1_test_info_step2.txt │ │ │ ├── ucf101_split1_train_info.txt │ │ │ ├── ucf101_split1_train_info_step2.txt │ │ │ ├── ucf101_split2_test_info.txt │ │ │ ├── ucf101_split2_train_info.txt │ │ │ ├── ucf101_split3_test_info.txt │ │ │ └── ucf101_split3_train_info.txt │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── config.cpython-36.pyc │ └── config.py │ ├── network │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── config.cpython-36.pyc │ │ ├── i3d.cpython-36.pyc │ │ ├── initializer.cpython-36.pyc │ │ ├── mfnet_3d.cpython-36.pyc │ │ └── symbol_builder.cpython-36.pyc │ ├── config.py │ ├── i3d.py │ ├── initializer.py │ ├── pretrained │ └── symbol_builder.py │ ├── test │ ├── evaluate_video_hmdb_i3d.py │ ├── evaluate_video_ucf101_i3d.py │ └── test.sh │ ├── train.sh │ ├── train │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── callback.cpython-36.pyc │ │ ├── lr_scheduler.cpython-36.pyc │ │ ├── metric.cpython-36.pyc │ │ └── model.cpython-36.pyc │ ├── callback.py │ ├── lr_scheduler.py │ ├── metric.py │ └── model.py │ ├── train_hmdb51.py │ ├── train_model.py │ └── train_ucf101.py └── exp_my ├── hmdb51_coviar ├── flow │ ├── split1 │ │ ├── combine.sh │ │ └── flow_score_model_best.npz │ ├── split2 │ │ ├── combine.sh │ │ └── flow_score_model_best.npz │ └── split3 │ │ ├── combine.sh │ │ └── flow_score_model_best.npz ├── iframe │ ├── split1 │ │ └── iframe_score_model_best.npz │ ├── split2 │ │ └── iframe_score_model_best.npz │ └── split3 │ │ └── iframe_score_model_best.npz ├── mv │ ├── split1 │ │ └── mv_score_model_best.npz │ ├── split2 │ │ └── mv_score_model_best.npz │ └── split3 │ │ └── mv_score_model_best.npz └── residual │ ├── split1 │ └── residual_score_model_best.npz │ ├── split2 │ └── residual_score_model_best.npz │ └── split3 │ └── residual_score_model_best.npz ├── hmdb51_gan ├── split1 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh ├── split2 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh └── split3 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh ├── hmdb51_gen_flow ├── split1 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh ├── split2 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh └── split3 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh ├── ucf101_coviar ├── ucf101_flow │ ├── split1 │ │ └── flow_score_model_best.npz │ ├── split2 │ │ └── flow_score_model_best.npz │ └── split3 │ │ └── flow_score_model_best.npz ├── ucf101_iframe │ ├── split1 │ │ └── iframe_score_model_best.npz │ ├── split2 │ │ └── iframe_score_model_best.npz │ └── split3 │ │ └── iframe_score_model_best.npz ├── ucf101_mv │ ├── split1 │ │ └── mv_score_model_best.npz │ ├── split2 │ │ └── mv_score_model_best.npz │ └── split3 │ │ └── mv_score_model_best.npz └── ucf101_residual │ ├── split1 │ └── residual_score_model_best.npz │ ├── split2 │ └── residual_score_model_best.npz │ └── split3 │ └── residual_score_model_best.npz ├── ucf101_gan ├── split1 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh ├── split2 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh └── split3 │ ├── mv_score_model_best.npz │ ├── run.sh │ └── run_combine.sh └── ucf101_gen_flow ├── split1 ├── mv_score_model_best.npz ├── run.sh └── run_combine.sh ├── split2 ├── mv_score_model_best.npz ├── run.sh └── run_combine.sh └── split3 ├── mv_score_model_best.npz ├── run.sh └── run_combine.sh /._CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /._CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._CONTRIBUTING.md -------------------------------------------------------------------------------- /._README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._README.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | exp_my/hmdb51_gen_flow_tmp/ 2 | */data_loader/build/ 3 | */data 4 | */exp 5 | */__pycache__/ 6 | *.tar 7 | *.fuse 8 | .nfs* -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to DMC-Net 2 | We want to make contributing to this project as easy and transparent as possible. 3 | 4 | ## Our Development Process 5 | Minor changes and improvements will be released on an ongoing basis. 6 | Larger changes (e.g., changesets implementing a new paper) will be released 7 | on a more periodic basis. 8 | 9 | 10 | ## Pull Requests 11 | We actively welcome your pull requests. 12 | 13 | 1. Fork the repo and create your branch from `master`. 14 | 2. If you've added code that should be tested, add tests. 15 | 3. If you've changed APIs, update the documentation. 16 | 4. Ensure the test suite passes. 17 | 5. Make sure your code lints. 18 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 19 | 20 | ## Contributor License Agreement ("CLA") 21 | In order to accept your pull request, we need you to submit a CLA. You only need 22 | to do this once to work on any of Facebook's open source projects. 23 | 24 | Complete your CLA here: 25 | 26 | ## Issues 27 | We use GitHub issues to track public bugs. Please ensure your description is 28 | clear and has sufficient instructions to be able to reproduce the issue. 29 | 30 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 31 | disclosure of security bugs. In those cases, please go through the process 32 | outlined on that page and do not file a public issue. 33 | 34 | ## Coding Style 35 | * 4 spaces for indentation rather than tabs 36 | 37 | ## License 38 | By contributing to DMC-Net, you agree that your contributions will 39 | be licensed under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE.s -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DMC-Net 2 | 3 | ### Citing 4 | If you find DMC-Net useful, please consider citing: 5 | 6 | @inproceedings{shou2019dmc, 7 | title={DMC-Net: Generating Discriminative Motion Cues for Fast Compressed Video Action Recognition}, 8 | author={Shou, Zheng and Lin, Xudong and Kalantidis, Yannis and Sevilla-Lara, Laura and Rohrbach, Marcus and Chang, Shih-Fu and Yan, Zhicheng}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 10 | year={2019} 11 | } 12 | 13 | ### Overview 14 | 15 | `./exp_my/` contains scripts for running experiments and our trained models and prediction results. 16 | 17 | `./code/` contains implementation for 3 major models respectively: 18 | 0. `dmcnet` indicates the version which does not include the adversarial loss during training and uses ResNet-18 for classifying DMC; 19 | 1. `dmcnet_GAN` indicates the version which includes the adversarial loss during training and uses ResNet-18 for classifying DMC; 20 | 2. `dmcnet_I3D` indicates the version which uses I3D for classifying DMC. 21 | 22 | **In the following, we present how to use `dmcnet` and `dmcnet_GAN`. Instructions for `dmcnet_I3D` can be found in `./code/dmcnet_I3D/`.** 23 | 24 | ## DMC-Net with ResNet-18 classifier 25 | 26 | ### Installation 27 | 28 | We implement `dmcnet` and `dmcnet_GAN` using PyTorch based on [CoViAR](https://github.com/chaoyuaw/pytorch-coviar). Please refer to CoViAR for details of setup and installation (e.g. how to prepare input videos, setup mpeg-4 compressed video data loader, etc.). Specifically, the released models were trained using python 3.6, pytorch 0.31, cuda 9.0, MPEG-4 video of GOP 12 and macroblock size 16x16. 29 | 30 | Optical flow extraction: we extract optical flow using TV-L1 algorithm implementation from [dense_flow](https://github.com/wanglimin/dense_flow) and store the flow images beforehand and then load flow images during training. 31 | 32 | In both `./code/dmcnet/` and `./code/dmcnet_GAN/`, please first link `exp/` to true directory of './exp_my/' so that all data will be stored in the experimental folder. 33 | 34 | ### Usage 35 | 36 | As stated in the paper, we first train DMC-Net with the classification loss and flow reconstruction MSE loss but without the adversarial loss (using `./code/dmcnet/`). Sample training script for HMDB-51 can be found at `exp_my/hmdb51_gen_flow/split1/run.sh`. Performing training and testing by `exp/hmdb51_gen_flow/split1/run.sh;`. The trained model would be `exp/hmdb51_gen_flow/split1/_mv_model_best.pth.tar`. 37 | 38 | Explanations about some key options used in the `run.sh` script (detaied descriptions can be found in `train_options.py`): 39 | 40 | 0. `data-root`: specify the directory for storing mpeg-4 videos; 41 | 1. `train-list` and `test-list`: specify the training and testing videos lists. Some example lines in such list files (format follows [CoViAR](https://github.com/chaoyuaw/pytorch-coviar): directory class class_index): 42 | 43 | smile/Me_smiling_smile_h_nm_np1_fr_goo_0.avi smile 0 44 | 45 | clap/Alex_applauding_himself_clap_u_nm_np1_fr_med_0.avi clap 1 46 | 47 | climb/Chiara_Kletterwand_climb_f_cm_np1_ba_bad_0.avi climb 2 48 | 49 | 2. `flow-root`: specify the directory for storing ground truth optical flow images extracted by [dense_flow](https://github.com/wanglimin/dense_flow). Sample directory: 50 | 51 | flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_x_00001.jpg 52 | 53 | flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_y_00001.jpg 54 | 55 | flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_x_00002.jpg 56 | 57 | flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_1/flow_x_00001.jpg 58 | 59 | flow-root/climb_stairs/BIG_FISH_climb_stairs_f_nm_np1_fr_med_1/flow_x_00001.jpg 60 | 61 | Then we use the above trained model as initialization to train with the adversarial loss included (using `./code/dmcnet_GAN/`). Sample training script for HMDB-51 can be found at `exp_my/hmdb51_gan/split1/run.sh`. In order to reproduce the result on HMDB-51, simply run: `bash exp/hmdb51_gan/split1/run.sh; bash ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log` The trained model would be `exp/hmdb51_gan/split1/_mv_model_best.pth.tar` and the prediction results would be stored in `exp/hmdb51_gan/split1/mv_score_model_best.npz` and `./exp/hmdb51_gan/split1/acc.log` records the accuracy after fusing all modalities. 62 | 63 | ### Our trained models 64 | 65 | At AWS host (`dl.fbaipublicfiles.com/dmc-net/models.zip`), we provide our trained models and prediction results. The file directory of `./models/` follows similar structure as `./exp_my/`. Please put the trained model and prediction result (for each dataset and split from `./models/`) in the corresponding folder (for experiment in `./exp_my/`). 66 | 67 | ### Results 68 | 69 | Accuracy (%) | HMDB-51 | UCF-101 70 | ---------|--------|----- 71 | [EMV-CNN](https://ieeexplore.ieee.org/abstract/document/7780666) | 51.2 (split1) | 86.4 72 | [DTMV-CNN](https://zbwglory.github.io/papers/08249882.pdf) | 55.3 | 87.5 73 | [CoViAR](https://github.com/chaoyuaw/pytorch-coviar) | 59.1 | 90.4 74 | DMC-Net (ResNet-18) | 62.8 | 90.9 75 | DMC-Net (I3D) | 71.8 | 92.3 76 | DMC-Net (I3D) + I3D RGB | 77.8 | 96.5 77 | 78 | ## License 79 | DMC-Net is MIT licensed, as found in the LICENSE file. 80 | -------------------------------------------------------------------------------- /code/dmcnet/._train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet/._train.py -------------------------------------------------------------------------------- /code/dmcnet/combine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Combine testing results of the three models to get final accuracy.""" 9 | 10 | import argparse 11 | import numpy as np 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description="combine predictions") 15 | parser.add_argument('--iframe', type=str, required=True, 16 | help='iframe score file.') 17 | parser.add_argument('--mv', type=str, required=True, 18 | help='motion vector score file.') 19 | parser.add_argument('--res', type=str, required=True, 20 | help='residual score file.') 21 | parser.add_argument('--flow', type=str, default=None, 22 | help='residual score file.') 23 | 24 | parser.add_argument('--wi', type=float, default=2.0, 25 | help='iframe weight.') 26 | parser.add_argument('--wm', type=float, default=1.0, 27 | help='motion vector weight.') 28 | parser.add_argument('--wr', type=float, default=1.0, 29 | help='residual weight.') 30 | parser.add_argument('--wf', type=float, default=1.0, 31 | help='flow weight.') 32 | 33 | args = parser.parse_args() 34 | 35 | with np.load(args.iframe) as iframe: 36 | with np.load(args.mv) as mv: 37 | with np.load(args.res) as residual: 38 | n = len(mv['names']) 39 | 40 | i_score = np.array([score[0][0] for score in iframe['scores']]) 41 | mv_score = np.array([score[0][0] for score in mv['scores']]) 42 | res_score = np.array([score[0][0] for score in residual['scores']]) 43 | 44 | i_label = np.array([score[1] for score in iframe['scores']]) 45 | mv_label = np.array([score[1] for score in mv['scores']]) 46 | res_label = np.array([score[1] for score in residual['scores']]) 47 | assert np.alltrue(i_label == mv_label) and np.alltrue(i_label == res_label) 48 | 49 | combined_score = i_score * args.wi + mv_score * args.wm + res_score * args.wr 50 | 51 | if args.flow is not None: 52 | flow = np.load(args.flow) 53 | flow_score = np.array([score[0][0] for score in flow['scores']]) 54 | combined_score += args.wf * flow_score 55 | 56 | accuracy = float(sum(np.argmax(combined_score, axis=1) == i_label)) / n 57 | print('Accuracy: %f (%d).' % (accuracy, n)) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /code/dmcnet/data_loader/install.sh: -------------------------------------------------------------------------------- 1 | rm -rf build 2 | python setup.py build_ext 3 | python setup.py install --user 4 | -------------------------------------------------------------------------------- /code/dmcnet/data_loader/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | import numpy as np 3 | 4 | coviar_utils_module = Extension('coviar', 5 | sources = ['coviar_data_loader.c'], 6 | include_dirs=[np.get_include(), '/mnt/homedir/zshou/code/FFmpeg/include/'], 7 | extra_compile_args=['-DNDEBUG', '-O3', '-std=c99'], 8 | extra_link_args=['-lavutil', '-lavcodec', '-lavformat', '-lswscale', '-L/mnt/homedir/zshou/code/FFmpeg/lib/'] 9 | ) 10 | 11 | setup ( name = 'coviar', 12 | version = '0.1', 13 | description = 'Utils for coviar training.', 14 | ext_modules = [ coviar_utils_module ] 15 | ) 16 | -------------------------------------------------------------------------------- /code/dmcnet/exp: -------------------------------------------------------------------------------- 1 | /private/home/zshou/exp/coviar_exp/ -------------------------------------------------------------------------------- /code/dmcnet/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Run testing given a trained model.""" 9 | 10 | import argparse 11 | import time 12 | import os 13 | 14 | from dataset import CoviarDataSet 15 | from model import Model 16 | from transforms import GroupCenterCrop 17 | from transforms import GroupOverSample 18 | from transforms import GroupScale 19 | 20 | import numpy as np 21 | import torch.nn.parallel 22 | import torch.optim 23 | import torchvision 24 | 25 | parser = argparse.ArgumentParser( 26 | description="Standard video-level testing") 27 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51']) 28 | parser.add_argument('--representation', type=str, choices=['iframe', 'residual', 'mv', 'flow']) 29 | parser.add_argument('--no-accumulation', action='store_true', 30 | help='disable accumulation of motion vectors and residuals.') 31 | parser.add_argument('--new_length', type=int, default=1, 32 | help='number of MV/OF stacked to be processed together.') 33 | parser.add_argument('--use_databn', type=int, default=1, 34 | help='add databn for mv, residual, flow or not.') 35 | parser.add_argument('--flow_ds_factor', type=int, default=0, 36 | help='flow downsample factor.') 37 | parser.add_argument('--upsample_interp', type=bool, default=False, 38 | help='upsample via interpolation or not.') 39 | parser.add_argument('--data-root', type=str) 40 | parser.add_argument('--flow-root', type=str, help='directory of storing pre-extracted optical flow images.') 41 | parser.add_argument('--data-flow', type=str, default='tvl1') 42 | parser.add_argument('--test-list', type=str) 43 | parser.add_argument('--weights', type=str) 44 | parser.add_argument('--batch-size', default=1, type=int, help='batch size.') 45 | parser.add_argument('--arch', type=str) 46 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", help='estimator architecture.') 47 | parser.add_argument('--save-scores', type=str, default=None) 48 | parser.add_argument('--test_segments', type=int, default=25) 49 | parser.add_argument('--test-crops', type=int, default=10) 50 | parser.add_argument('--input_size', type=int, default=224) 51 | parser.add_argument('-j', '--workers', default=1, type=int, metavar='N', 52 | help='number of workers for data loader.') 53 | parser.add_argument('--gpus', nargs='+', type=int, default=None) 54 | parser.add_argument('--gop', type=int, default=12, help='size of GOP.') 55 | parser.add_argument('--viz', type=bool, default=False, help='visualize or not.') 56 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, help='0: generate flow; 1: generate flow delta.') 57 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, help='the downsample factor used in generating flow of small size') 58 | parser.add_argument('--att', type=int, default=0, help='0: no attention; 1: pixel-level attention.') 59 | 60 | args = parser.parse_args() 61 | 62 | if args.data_name == 'ucf101': 63 | num_class = 101 64 | elif args.data_name == 'hmdb51': 65 | num_class = 51 66 | else: 67 | raise ValueError('Unknown dataset '+args.data_name) 68 | 69 | 70 | def main(): 71 | # define the model 72 | net = Model(num_class, args.test_segments, args.representation, 73 | base_model=args.arch, 74 | new_length=args.new_length, 75 | use_databn=args.use_databn, 76 | gen_flow_or_delta=args.gen_flow_or_delta, 77 | gen_flow_ds_factor=args.gen_flow_ds_factor, 78 | arch_estimator=args.arch_estimator, 79 | att=args.att) 80 | 81 | # load the trained model 82 | checkpoint = torch.load(args.weights, map_location=lambda storage, loc: storage) 83 | print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) 84 | 85 | base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} 86 | net.load_state_dict(base_dict, strict=False) 87 | 88 | # setup the data loader 89 | if args.test_crops == 1: 90 | cropping = torchvision.transforms.Compose([ 91 | GroupScale(net.scale_size), 92 | GroupCenterCrop(net.crop_size), 93 | ]) 94 | elif args.test_crops == 10: 95 | cropping = torchvision.transforms.Compose([ 96 | GroupOverSample(net.crop_size, net.scale_size) 97 | ]) 98 | else: 99 | raise ValueError("Only 1 and 10 crops are supported, but got {}.".format(args.test_crops)) 100 | 101 | data_loader = torch.utils.data.DataLoader( 102 | CoviarDataSet( 103 | args.data_root, 104 | args.flow_root, 105 | args.data_name, 106 | video_list=args.test_list, 107 | num_segments=args.test_segments, 108 | representation=args.representation, 109 | new_length=args.new_length, 110 | flow_ds_factor=args.flow_ds_factor, 111 | upsample_interp=args.upsample_interp, 112 | transform=cropping, 113 | is_train=False, 114 | accumulate=(not args.no_accumulation), 115 | gop=args.gop, 116 | flow_folder=args.data_flow, 117 | viz=args.viz 118 | ), 119 | batch_size=1, shuffle=False, 120 | num_workers=args.workers * 2, pin_memory=True) 121 | 122 | # deploy model on gpu 123 | if args.gpus is not None: 124 | devices = [args.gpus[i] for i in range(args.workers)] 125 | else: 126 | devices = list(range(args.workers)) 127 | 128 | net.cuda(devices[0]) 129 | #net.base_model.cuda(devices[-1]) 130 | net = torch.nn.DataParallel(net, device_ids=devices) 131 | 132 | # switch to inference model and start to iterate over the test set 133 | net.eval() 134 | 135 | total_num = len(data_loader.dataset) 136 | output = [] 137 | 138 | # process each video to obtain its predictions 139 | def forward_video(input_mv, input_residual, att=0): 140 | input_mv_var = torch.autograd.Variable(input_mv, volatile=True) 141 | input_residual_var = torch.autograd.Variable(input_residual, volatile=True) 142 | if att == 0: 143 | scores, gen_flow = net(input_mv_var, input_residual_var) 144 | if att == 1: 145 | scores, gen_flow, att_flow = net(input_mv_var, input_residual_var) 146 | scores = scores.view((-1, args.test_segments * args.test_crops) + scores.size()[1:]) 147 | scores = torch.mean(scores, dim=1) 148 | if att == 0: 149 | return scores.data.cpu().numpy().copy(), gen_flow 150 | if att == 1: 151 | return scores.data.cpu().numpy().copy(), gen_flow, att_flow 152 | 153 | proc_start_time = time.time() 154 | 155 | # iterate over the whole test set 156 | for i, (input_flow, input_mv, input_residual, label) in enumerate(data_loader): 157 | input_mv = input_mv.cuda(args.gpus[-1], async=True) 158 | input_residual = input_residual.cuda(args.gpus[0], async=True) 159 | input_flow = input_flow.cuda(args.gpus[-1], async=True) 160 | 161 | # print("input_flow shape:") 162 | # print(input_flow.shape) # torch.Size([batch_size, num_crops*num_segments, 2, 224, 224]) 163 | # print("input_flow type:") # print(input_flow.type()) # torch.cuda.FloatTensor 164 | if args.att == 0: 165 | video_scores, gen_flow = forward_video(input_mv, input_residual) 166 | if args.att == 1: 167 | video_scores, gen_flow, att_flow = forward_video(input_mv, input_residual, args.att) 168 | output.append((video_scores, label[0])) 169 | cnt_time = time.time() - proc_start_time 170 | if (i + 1) % 100 == 0: 171 | print('video {} done, total {}/{}, average {} sec/video'.format(i, i+1, 172 | total_num, 173 | float(cnt_time) / (i+1))) 174 | 175 | video_pred = [np.argmax(x[0]) for x in output] 176 | video_labels = [x[1] for x in output] 177 | 178 | print('Accuracy {:.02f}% ({})'.format( 179 | float(np.sum(np.array(video_pred) == np.array(video_labels))) / len(video_pred) * 100.0, 180 | len(video_pred))) 181 | 182 | 183 | if args.save_scores is not None: 184 | 185 | name_list = [x.strip().split()[0] for x in open(args.test_list)] 186 | order_dict = {e:i for i, e in enumerate(sorted(name_list))} 187 | 188 | reorder_output = [None] * len(output) 189 | reorder_label = [None] * len(output) 190 | reorder_name = [None] * len(output) 191 | 192 | for i in range(len(output)): 193 | idx = order_dict[name_list[i]] 194 | reorder_output[idx] = output[i] 195 | reorder_label[idx] = video_labels[i] 196 | reorder_name[idx] = name_list[i] 197 | 198 | np.savez(args.save_scores, scores=reorder_output, labels=reorder_label, names=reorder_name) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | -------------------------------------------------------------------------------- /code/dmcnet/train_options.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Training options.""" 9 | 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser(description="CoViAR") 13 | 14 | # Data. 15 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400'], 16 | help='dataset name.') 17 | parser.add_argument('--data-root', type=str, 18 | help='root of data directory.') 19 | parser.add_argument('--flow-root', type=str, 20 | help='directory of storing pre-extracted optical flow images.') 21 | parser.add_argument('--data-flow', type=str, default='tvl1', 22 | help='root of data directory.') 23 | parser.add_argument('--train-list', type=str, 24 | help='training example list.') 25 | parser.add_argument('--test-list', type=str, 26 | help='testing example list.') 27 | parser.add_argument('--gop', type=int, default=12, 28 | help='size of GOP.') 29 | 30 | # Model. 31 | parser.add_argument('--representation', type=str, choices=['iframe', 'mv', 'residual', 'flow'], 32 | help='data representation.') 33 | parser.add_argument('--arch', type=str, default="resnet152", 34 | help='base architecture.') 35 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", 36 | help='estimator architecture.') 37 | parser.add_argument('--num_segments', type=int, default=3, 38 | help='number of TSN segments.') 39 | parser.add_argument('--no-accumulation', action='store_true', 40 | help='disable accumulation of motion vectors and residuals.') 41 | parser.add_argument('--new_length', type=int, default=1, 42 | help='number of MV/OF stacked to be processed together.') 43 | parser.add_argument('--flow_ds_factor', type=int, default=0, 44 | help='flow downsample factor.') 45 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, 46 | help='the downsample factor used in generating flow of small size') 47 | parser.add_argument('--upsample_interp', type=bool, default=False, 48 | help='upsample via interpolation or not.') 49 | parser.add_argument('--use_databn', type=int, default=1, 50 | help='add data batchnorm for mv, residual, flow or not. 1: yes; 0: no.') 51 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, 52 | help='0: generate flow; 1: generate flow delta.') 53 | parser.add_argument('--att', type=int, default=0, 54 | help='0: no attention; 1: pixel-level attention.') 55 | parser.add_argument('--mv_minmaxnorm', type=int, default=0, 56 | help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.') 57 | 58 | # Training. 59 | parser.add_argument('--weights', default=None, type=str) 60 | parser.add_argument('--resume', default=None, type=str) 61 | parser.add_argument('--epochs', default=500, type=int, 62 | help='number of training epochs.') 63 | parser.add_argument('--epoch-thre', default=500, type=int, 64 | help='number of training epochs.') 65 | parser.add_argument('--batch-size', default=40, type=int, 66 | help='batch size.') 67 | parser.add_argument('--lr', default=0.001, type=float, 68 | help='base learning rate.') 69 | parser.add_argument('--lr-cls', default=1, type=float, 70 | help='cls loss weight.') 71 | parser.add_argument('--loss-mse', default='MSELoss', type=str) 72 | parser.add_argument('--lr-mse', default=0.1, type=float, 73 | help='mse loss weight.') 74 | parser.add_argument('--lr_cls_mult', default=0.01, type=float, help='cls learning multiplier.') 75 | parser.add_argument('--lr_mse_mult', default=0.01, type=float, help='mse learning multiplier.') 76 | parser.add_argument('--lr-steps', default=[200, 300, 400], type=float, nargs="+", 77 | help='epochs to decay learning rate.') 78 | parser.add_argument('--lr-decay', default=0.1, type=float, 79 | help='lr decay factor.') 80 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, 81 | help='weight decay.') 82 | 83 | # Log. 84 | parser.add_argument('--eval-freq', default=5, type=int, 85 | help='evaluation frequency (epochs).') 86 | parser.add_argument('--workers', default=8, type=int, 87 | help='number of data loader workers.') 88 | parser.add_argument('--model-prefix', type=str, default="model", 89 | help="prefix of model name.") 90 | parser.add_argument('--gpus', nargs='+', type=int, default=None, 91 | help='gpu ids.') 92 | -------------------------------------------------------------------------------- /code/dmcnet/transforms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Functions for data augmentation and related preprocessing.""" 9 | 10 | import random 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def color_aug(img, random_h=36, random_l=50, random_s=50): 16 | img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS).astype(float) 17 | 18 | h = (random.random() * 2 - 1.0) * random_h 19 | l = (random.random() * 2 - 1.0) * random_l 20 | s = (random.random() * 2 - 1.0) * random_s 21 | 22 | img[..., 0] += h 23 | img[..., 0] = np.minimum(img[..., 0], 180) 24 | 25 | img[..., 1] += l 26 | img[..., 1] = np.minimum(img[..., 1], 255) 27 | 28 | img[..., 2] += s 29 | img[..., 2] = np.minimum(img[..., 2], 255) 30 | 31 | img = np.maximum(img, 0) 32 | img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_HLS2BGR) 33 | return img 34 | 35 | 36 | class GroupCenterCrop(object): 37 | def __init__(self, size): 38 | self._size = size 39 | 40 | def __call__(self, img_group): 41 | h, w, _ = img_group[0].shape 42 | hs = (h - self._size) // 2 43 | ws = (w - self._size) // 2 44 | return [img[hs:hs+self._size, ws:ws+self._size] for img in img_group] 45 | 46 | 47 | class GroupRandomHorizontalFlip(object): 48 | def __call__(self, img_group, is_mv_or_flow=False): 49 | if random.random() < 0.5: 50 | ret = [img[:, ::-1, :].astype(np.int32) for img in img_group] # residual and then flow and MV 51 | for i in range(len(ret)): 52 | ret[i][:, :, :4] -= 128 53 | ret[i][..., 0] *= (-1) 54 | ret[i][..., 2] *= (-1) 55 | ret[i][:, :, :4] += 128 56 | return ret 57 | else: 58 | return img_group 59 | 60 | class GroupScale(object): 61 | def __init__(self, size): 62 | self._size = (size, size) 63 | 64 | def __call__(self, img_group): 65 | # resize flow then MV then residual 66 | ret_img_group = [] 67 | for idx in range(0, len(img_group)): 68 | #print('Input frames shape %s:' % str(img_group[idx].shape)) 69 | #print('Input frames after transform mv flow shape %s:' % str(resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR).shape)) 70 | #print('Input frames after transform residual shape %s:' % str(cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR).shape)) 71 | ret_img_group.append( 72 | np.concatenate((resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR), 73 | cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR)), axis=2)) 74 | 75 | return ret_img_group 76 | 77 | class GroupOverSample(object): 78 | def __init__(self, crop_size, scale_size=None): 79 | self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size) 80 | 81 | if scale_size is not None: 82 | self.scale_worker = GroupScale(scale_size) 83 | else: 84 | self.scale_worker = None 85 | 86 | def __call__(self, img_group): 87 | 88 | if self.scale_worker is not None: 89 | img_group = self.scale_worker(img_group) 90 | 91 | image_w, image_h, _ = img_group[0].shape 92 | crop_w, crop_h = self.crop_size 93 | 94 | offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) 95 | oversample_group = list() 96 | 97 | for o_w, o_h in offsets: 98 | for img in img_group: 99 | 100 | crop = img[o_w:o_w+crop_w, o_h:o_h+crop_h] 101 | oversample_group.append(crop) 102 | 103 | flip_crop = crop[:, ::-1, :].astype(np.int32) # residual 104 | flip_crop[:, :, :4] -= 128 # flow and MV 105 | flip_crop[..., 0] *= (-1) 106 | flip_crop[..., 2] *= (-1) 107 | flip_crop[:, :, :4] += 128 108 | oversample_group.append(flip_crop) 109 | 110 | return oversample_group 111 | 112 | def resize_mv(img, shape, interpolation): 113 | return np.stack([cv2.resize(img[..., i], shape, interpolation) 114 | for i in range(img.shape[-1])], axis=2) 115 | 116 | 117 | class GroupMultiScaleCrop(object): 118 | def __init__(self, input_size, scales=None, max_distort=1, fix_crop=False, more_fix_crop=True): 119 | self.scales = scales if scales is not None else [1, .875, .75, .66] 120 | self.max_distort = max_distort 121 | self.fix_crop = fix_crop 122 | self.more_fix_crop = more_fix_crop 123 | self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size] # 224x224 124 | 125 | def __call__(self, img_group): 126 | 127 | im_size = img_group[0].shape 128 | crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size) 129 | crop_img_group = [img[offset_w:offset_w + crop_w, offset_h:offset_h + crop_h] for img in img_group] 130 | 131 | # resize flow -> MV -> residual 132 | ret_img_group = [] 133 | for idx in range(0, len(img_group)): 134 | ret_img_group.append( 135 | np.concatenate( 136 | (resize_mv(crop_img_group[idx][:, :, :4], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR), 137 | cv2.resize(crop_img_group[idx][:, :, 4:], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR)), axis=2)) 138 | 139 | return ret_img_group 140 | 141 | def _sample_crop_size(self, im_size): 142 | image_w, image_h = im_size[0], im_size[1] 143 | 144 | base_size = min(image_w, image_h) 145 | crop_sizes = [int(base_size * x) for x in self.scales] 146 | crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes] 147 | crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes] 148 | 149 | pairs = [] 150 | for i, h in enumerate(crop_h): 151 | for j, w in enumerate(crop_w): 152 | if abs(i - j) <= self.max_distort: 153 | pairs.append((w, h)) 154 | 155 | crop_pair = random.choice(pairs) 156 | if not self.fix_crop: 157 | w_offset = random.randint(0, image_w - crop_pair[0]) 158 | h_offset = random.randint(0, image_h - crop_pair[1]) 159 | else: 160 | w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1]) 161 | 162 | return crop_pair[0], crop_pair[1], w_offset, h_offset 163 | 164 | def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h): 165 | offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h) 166 | return random.choice(offsets) 167 | 168 | @staticmethod 169 | def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h): 170 | w_step = (image_w - crop_w) // 4 171 | h_step = (image_h - crop_h) // 4 172 | 173 | ret = list() 174 | ret.append((0, 0)) # upper left 175 | ret.append((4 * w_step, 0)) # upper right 176 | ret.append((0, 4 * h_step)) # lower left 177 | ret.append((4 * w_step, 4 * h_step)) # lower right 178 | ret.append((2 * w_step, 2 * h_step)) # center 179 | 180 | if more_fix_crop: 181 | ret.append((0, 2 * h_step)) # center left 182 | ret.append((4 * w_step, 2 * h_step)) # center right 183 | ret.append((2 * w_step, 4 * h_step)) # lower center 184 | ret.append((2 * w_step, 0 * h_step)) # upper center 185 | 186 | ret.append((1 * w_step, 1 * h_step)) # upper left quarter 187 | ret.append((3 * w_step, 1 * h_step)) # upper right quarter 188 | ret.append((1 * w_step, 3 * h_step)) # lower left quarter 189 | ret.append((3 * w_step, 3 * h_step)) # lower righ quarter 190 | 191 | return ret 192 | -------------------------------------------------------------------------------- /code/dmcnet/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import numpy as np 9 | 10 | def viz_flow(u,v,logscale=True,scaledown=6,output=False): 11 | """ 12 | topleft is zero, u is horiz/x/width, v is vertical/y/height 13 | red is 3 o'clock, yellow is 6, light blue is 9, blue/purple is 12 14 | """ 15 | colorwheel = makecolorwheel() 16 | ncols = colorwheel.shape[0] 17 | 18 | radius = np.sqrt(u**2 + v**2) 19 | if output: 20 | print("Maximum flow magnitude: %04f" % np.max(radius)) 21 | if logscale: 22 | radius = np.log(radius + 1) 23 | if output: 24 | print("Maximum flow magnitude (after log): %0.4f" % np.max(radius)) 25 | radius = radius / scaledown 26 | if output: 27 | print("Maximum flow magnitude (after scaledown): %0.4f" % np.max(radius)) 28 | rot = np.arctan2(-v, -u) / np.pi 29 | 30 | fk = (rot+1)/2 * (ncols-1) # -1~1 maped to 0~ncols 31 | k0 = fk.astype(np.uint8) # 0, 1, 2, ..., ncols 32 | 33 | k1 = k0+1 34 | k1[k1 == ncols] = 0 35 | 36 | f = fk - k0 37 | 38 | ncolors = colorwheel.shape[1] 39 | img = np.zeros(u.shape+(ncolors,)) 40 | for i in range(ncolors): 41 | tmp = colorwheel[:,i] 42 | col0 = tmp[k0] 43 | col1 = tmp[k1] 44 | col = (1-f)*col0 + f*col1 45 | 46 | idx = radius <= 1 47 | # increase saturation with radius 48 | col[idx] = 1 - radius[idx]*(1-col[idx]) 49 | # out of range 50 | col[~idx] *= 0.75 51 | img[:,:,i] = np.floor(255*col).astype(np.uint8) 52 | 53 | return img.astype(np.uint8) 54 | 55 | 56 | 57 | def makecolorwheel(): 58 | # Create a colorwheel for visualization 59 | RY = 15 60 | YG = 6 61 | GC = 4 62 | CB = 11 63 | BM = 13 64 | MR = 6 65 | 66 | ncols = RY + YG + GC + CB + BM + MR 67 | 68 | colorwheel = np.zeros((ncols,3)) 69 | 70 | col = 0 71 | # RY 72 | colorwheel[0:RY,0] = 1 73 | colorwheel[0:RY,1] = np.arange(0,1,1./RY) 74 | col += RY 75 | 76 | # YG 77 | colorwheel[col:col+YG,0] = np.arange(1,0,-1./YG) 78 | colorwheel[col:col+YG,1] = 1 79 | col += YG 80 | 81 | # GC 82 | colorwheel[col:col+GC,1] = 1 83 | colorwheel[col:col+GC,2] = np.arange(0,1,1./GC) 84 | col += GC 85 | 86 | # CB 87 | colorwheel[col:col+CB,1] = np.arange(1,0,-1./CB) 88 | colorwheel[col:col+CB,2] = 1 89 | col += CB 90 | 91 | # BM 92 | colorwheel[col:col+BM,2] = 1 93 | colorwheel[col:col+BM,0] = np.arange(0,1,1./BM) 94 | col += BM 95 | 96 | # MR 97 | colorwheel[col:col+MR,2] = np.arange(1,0,-1./MR) 98 | colorwheel[col:col+MR,0] = 1 99 | 100 | return colorwheel 101 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/combine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Combine testing results of the three models to get final accuracy.""" 9 | 10 | import argparse 11 | import numpy as np 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description="combine predictions") 15 | parser.add_argument('--iframe', type=str, required=True, 16 | help='iframe score file.') 17 | parser.add_argument('--mv', type=str, required=True, 18 | help='motion vector score file.') 19 | parser.add_argument('--res', type=str, required=True, 20 | help='residual score file.') 21 | parser.add_argument('--flow', type=str, default=None, 22 | help='residual score file.') 23 | 24 | parser.add_argument('--wi', type=float, default=2.0, 25 | help='iframe weight.') 26 | parser.add_argument('--wm', type=float, default=1.0, 27 | help='motion vector weight.') 28 | parser.add_argument('--wr', type=float, default=1.0, 29 | help='residual weight.') 30 | parser.add_argument('--wf', type=float, default=1.0, 31 | help='flow weight.') 32 | 33 | args = parser.parse_args() 34 | 35 | with np.load(args.iframe) as iframe: 36 | with np.load(args.mv) as mv: 37 | with np.load(args.res) as residual: 38 | n = len(mv['names']) 39 | 40 | i_score = np.array([score[0][0] for score in iframe['scores']]) 41 | mv_score = np.array([score[0][0] for score in mv['scores']]) 42 | res_score = np.array([score[0][0] for score in residual['scores']]) 43 | 44 | i_label = np.array([score[1] for score in iframe['scores']]) 45 | mv_label = np.array([score[1] for score in mv['scores']]) 46 | res_label = np.array([score[1] for score in residual['scores']]) 47 | assert np.alltrue(i_label == mv_label) and np.alltrue(i_label == res_label) 48 | 49 | combined_score = i_score * args.wi + mv_score * args.wm + res_score * args.wr 50 | 51 | if args.flow is not None: 52 | flow = np.load(args.flow) 53 | flow_score = np.array([score[0][0] for score in flow['scores']]) 54 | combined_score += args.wf * flow_score 55 | 56 | accuracy = float(sum(np.argmax(combined_score, axis=1) == i_label)) / n 57 | print('Accuracy: %f (%d).' % (accuracy, n)) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/data_loader/install.sh: -------------------------------------------------------------------------------- 1 | rm -rf build 2 | python setup.py build_ext 3 | python setup.py install --user 4 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/data_loader/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | import numpy as np 3 | 4 | coviar_utils_module = Extension('coviar', 5 | sources = ['coviar_data_loader.c'], 6 | include_dirs=[np.get_include(), '/mnt/homedir/zshou/code/FFmpeg/include/'], 7 | extra_compile_args=['-DNDEBUG', '-O3', '-std=c99'], 8 | extra_link_args=['-lavutil', '-lavcodec', '-lavformat', '-lswscale', '-L/mnt/homedir/zshou/code/FFmpeg/lib/'] 9 | ) 10 | 11 | setup ( name = 'coviar', 12 | version = '0.1', 13 | description = 'Utils for coviar training.', 14 | ext_modules = [ coviar_utils_module ] 15 | ) 16 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/exp: -------------------------------------------------------------------------------- 1 | /private/home/zshou/exp/coviar_exp/ -------------------------------------------------------------------------------- /code/dmcnet_GAN/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Run testing given a trained model.""" 9 | 10 | import argparse 11 | import time 12 | import os 13 | 14 | from dataset import CoviarDataSet 15 | from model import Model 16 | from transforms import GroupCenterCrop 17 | from transforms import GroupOverSample 18 | from transforms import GroupScale 19 | 20 | import numpy as np 21 | import torch.nn.parallel 22 | import torch.optim 23 | import torchvision 24 | 25 | 26 | parser = argparse.ArgumentParser( 27 | description="Standard video-level testing") 28 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400']) 29 | parser.add_argument('--representation', type=str, choices=['iframe', 'residual', 'mv', 'flow']) 30 | parser.add_argument('--no-accumulation', action='store_true', 31 | help='disable accumulation of motion vectors and residuals.') 32 | parser.add_argument('--new_length', type=int, default=1, 33 | help='number of MV/OF stacked to be processed together.') 34 | parser.add_argument('--use_databn', type=int, default=1, 35 | help='add databn for mv, residual, flow or not.') 36 | parser.add_argument('--flow_ds_factor', type=int, default=0, 37 | help='flow downsample factor.') 38 | parser.add_argument('--upsample_interp', type=bool, default=False, 39 | help='upsample via interpolation or not.') 40 | parser.add_argument('--data-root', type=str) 41 | parser.add_argument('--flow-root', type=str, help='directory of storing pre-extracted optical flow images') 42 | parser.add_argument('--data-flow', type=str, default='tvl1') 43 | parser.add_argument('--test-list', type=str) 44 | parser.add_argument('--weights', type=str) 45 | parser.add_argument('--batch-size', default=1, type=int, help='batch size.') 46 | parser.add_argument('--arch', type=str) 47 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", help='estimator architecture.') 48 | parser.add_argument('--arch_d', type=str, default="Discriminator", help='discriminator architecture.') 49 | parser.add_argument('--save-scores', type=str, default=None) 50 | parser.add_argument('--test_segments', type=int, default=25) 51 | parser.add_argument('--test-crops', type=int, default=10) 52 | parser.add_argument('--input_size', type=int, default=224) 53 | parser.add_argument('-j', '--workers', default=1, type=int, metavar='N', 54 | help='number of workers for data loader.') 55 | parser.add_argument('--gpus', nargs='+', type=int, default=None) 56 | parser.add_argument('--gop', type=int, default=12, help='size of GOP.') 57 | parser.add_argument('--viz', type=bool, default=False, help='visualize or not.') 58 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, help='0: generate flow; 1: generate flow delta') 59 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, help='the downsample factor used in generating flow of small size') 60 | parser.add_argument('--att', type=int, default=0, help='0: no attention; 1: pixel-level attention.') 61 | parser.add_argument('--mv_minmaxnorm', type=int, default=1, 62 | help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.') 63 | 64 | args = parser.parse_args() 65 | 66 | if args.data_name == 'ucf101': 67 | num_class = 101 68 | elif args.data_name == 'hmdb51': 69 | num_class = 51 70 | elif args.data_name == 'kinetics400': 71 | num_class = 400 72 | else: 73 | raise ValueError('Unknown dataset '+args.data_name) 74 | 75 | 76 | def main(): 77 | # define the whole model network architecture 78 | net = Model(num_class, args.test_segments, args.representation, 79 | base_model=args.arch, 80 | new_length=args.new_length, 81 | use_databn=args.use_databn, 82 | gen_flow_or_delta=args.gen_flow_or_delta, 83 | gen_flow_ds_factor=args.gen_flow_ds_factor, 84 | arch_estimator=args.arch_estimator, 85 | arch_d=args.arch_d, 86 | att=args.att) 87 | 88 | # load the trained model 89 | checkpoint = torch.load(args.weights, map_location=lambda storage, loc: storage) 90 | print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) 91 | 92 | base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} 93 | net.load_state_dict(base_dict, strict=False) 94 | 95 | # setup the data loader 96 | if args.test_crops == 1: 97 | cropping = torchvision.transforms.Compose([ 98 | GroupScale(net.scale_size), 99 | GroupCenterCrop(net.crop_size), 100 | ]) 101 | elif args.test_crops == 10: 102 | cropping = torchvision.transforms.Compose([ 103 | GroupOverSample(net.crop_size, net.scale_size) 104 | ]) 105 | else: 106 | raise ValueError("Only 1 and 10 crops are supported, but got {}.".format(args.test_crops)) 107 | 108 | data_loader = torch.utils.data.DataLoader( 109 | CoviarDataSet( 110 | args.data_root, 111 | args.flow_root, 112 | args.data_name, 113 | video_list=args.test_list, 114 | num_segments=args.test_segments, 115 | representation=args.representation, 116 | new_length=args.new_length, 117 | flow_ds_factor=args.flow_ds_factor, 118 | upsample_interp=args.upsample_interp, 119 | transform=cropping, 120 | is_train=False, 121 | accumulate=(not args.no_accumulation), 122 | gop=args.gop, 123 | flow_folder=args.data_flow, 124 | mv_minmaxnorm=args.mv_minmaxnorm, 125 | viz=args.viz 126 | ), 127 | batch_size=1, shuffle=False, 128 | num_workers=args.workers * 2, pin_memory=True) 129 | 130 | if args.gpus is not None: 131 | devices = [args.gpus[i] for i in range(args.workers)] 132 | else: 133 | devices = list(range(args.workers)) 134 | 135 | net.cuda(devices[0]) 136 | #net.base_model.cuda(devices[-1]) 137 | net = torch.nn.DataParallel(net, device_ids=devices) 138 | 139 | # switch to inference model and start to iterate over the test set 140 | net.eval() 141 | 142 | total_num = len(data_loader.dataset) 143 | output = [] 144 | 145 | # process each video to obtain its predictions 146 | def forward_video(input_mv, input_residual, att=0): 147 | input_mv_var = torch.autograd.Variable(input_mv, volatile=True) 148 | input_residual_var = torch.autograd.Variable(input_residual, volatile=True) 149 | if att == 0: 150 | scores, validity, gen_flow = net(input_mv_var, input_residual_var) 151 | if att == 1: 152 | scores, validity, gen_flow, att_flow = net(input_mv_var, input_residual_var) 153 | scores = scores.view((-1, args.test_segments * args.test_crops) + scores.size()[1:]) 154 | scores = torch.mean(scores, dim=1) 155 | if att == 0: 156 | return scores.data.cpu().numpy().copy(), validity.data.cpu().numpy().copy(), gen_flow 157 | if att == 1: 158 | return scores.data.cpu().numpy().copy(), validity.data.cpu().numpy().copy(), gen_flow, att_flow 159 | 160 | proc_start_time = time.time() 161 | 162 | # iterate over the whole test set 163 | for i, (input_flow, input_mv, input_residual, label) in enumerate(data_loader): 164 | input_mv = input_mv.cuda(args.gpus[-1], async=True) 165 | input_residual = input_residual.cuda(args.gpus[0], async=True) 166 | input_flow = input_flow.cuda(args.gpus[-1], async=True) 167 | 168 | # print("input_flow shape:") 169 | # print(input_flow.shape) # torch.Size([batch_size, num_crops*num_segments, 2, 224, 224]) 170 | # print("input_flow type:") # print(input_flow.type()) # torch.cuda.FloatTensor 171 | if args.att == 0: 172 | video_scores, validity, gen_flow = forward_video(input_mv, input_residual) 173 | if args.att == 1: 174 | video_scores, validity, gen_flow, att_flow = forward_video(input_mv, input_residual, args.att) 175 | output.append((video_scores, label[0], validity)) 176 | cnt_time = time.time() - proc_start_time 177 | if (i + 1) % 100 == 0: 178 | print('video {} done, total {}/{}, average {} sec/video'.format(i, i+1, 179 | total_num, 180 | float(cnt_time) / (i+1))) 181 | 182 | video_pred = [np.argmax(x[0]) for x in output] 183 | video_labels = [x[1] for x in output] 184 | video_validity = [np.argmax(x[2]) for x in output] 185 | 186 | print('Accuracy cls {:.02f}% ({})'.format( 187 | float(np.sum(np.array(video_pred) == np.array(video_labels))) / len(video_pred) * 100.0, 188 | len(video_pred))) 189 | 190 | print('Accuracy adv G {:.02f}% ({})'.format( 191 | float(np.sum(np.array(video_validity))) / len(video_validity) * 100.0, 192 | len(video_validity))) 193 | 194 | if args.save_scores is not None: 195 | 196 | name_list = [x.strip().split()[0] for x in open(args.test_list)] 197 | order_dict = {e:i for i, e in enumerate(sorted(name_list))} 198 | 199 | reorder_output = [None] * len(output) 200 | reorder_label = [None] * len(output) 201 | reorder_name = [None] * len(output) 202 | 203 | for i in range(len(output)): 204 | idx = order_dict[name_list[i]] 205 | reorder_output[idx] = output[i] 206 | reorder_label[idx] = video_labels[i] 207 | reorder_name[idx] = name_list[i] 208 | 209 | np.savez(args.save_scores, scores=reorder_output, labels=reorder_label, names=reorder_name) 210 | 211 | 212 | if __name__ == '__main__': 213 | main() 214 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/train_options.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Training options.""" 9 | 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser(description="CoViAR") 13 | 14 | # Data. 15 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400'], 16 | help='dataset name.') 17 | parser.add_argument('--data-root', type=str, 18 | help='root of data directory.') 19 | parser.add_argument('--flow-root', type=str, 20 | help='directory of storing pre-extracted optical flow images.') 21 | parser.add_argument('--data-flow', type=str, default='tvl1', 22 | help='root of data directory.') 23 | parser.add_argument('--train-list', type=str, 24 | help='training example list.') 25 | parser.add_argument('--test-list', type=str, 26 | help='testing example list.') 27 | parser.add_argument('--gop', type=int, default=12, 28 | help='size of GOP.') 29 | 30 | # Model. 31 | parser.add_argument('--representation', type=str, choices=['iframe', 'mv', 'residual', 'flow'], 32 | help='data representation.') 33 | parser.add_argument('--arch', type=str, default="resnet152", 34 | help='base architecture.') 35 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", 36 | help='estimator architecture.') 37 | parser.add_argument('--arch_d', type=str, default="Discriminator", 38 | help='discriminator architecture.') 39 | parser.add_argument('--num_segments', type=int, default=3, 40 | help='number of TSN segments.') 41 | parser.add_argument('--no-accumulation', action='store_true', 42 | help='disable accumulation of motion vectors and residuals.') 43 | parser.add_argument('--new_length', type=int, default=1, 44 | help='number of MV/OF stacked to be processed together.') 45 | parser.add_argument('--flow_ds_factor', type=int, default=0, 46 | help='flow downsample factor.') 47 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, 48 | help='the downsample factor used in generating flow of small size') 49 | parser.add_argument('--upsample_interp', type=bool, default=False, 50 | help='upsample via interpolation or not.') 51 | parser.add_argument('--use_databn', type=int, default=1, 52 | help='add data batchnorm for mv, residual, flow or not. 1: yes; 0: no.') 53 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, 54 | help='0: generate flow; 1: generate flow delta.') 55 | parser.add_argument('--att', type=int, default=0, 56 | help='0: no attention; 1: pixel-level attention.') 57 | parser.add_argument('--mv_minmaxnorm', type=int, default=1, 58 | help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.') 59 | 60 | # Training. 61 | parser.add_argument('--weights', default=None, type=str) 62 | parser.add_argument('--resume', default=None, type=str) 63 | parser.add_argument('--epochs', default=500, type=int, 64 | help='number of training epochs.') 65 | parser.add_argument('--epoch-thre', default=500, type=int, 66 | help='number of training epochs.') 67 | parser.add_argument('--batch-size', default=40, type=int, 68 | help='batch size.') 69 | parser.add_argument('--lr', default=0.001, type=float, 70 | help='base learning rate.') 71 | parser.add_argument('--lr-cls', default=1, type=float, 72 | help='cls loss weight.') 73 | parser.add_argument('--loss-mse', default='MSELoss', type=str) 74 | parser.add_argument('--lr-adv-g', default=1, type=float, 75 | help='adv loss weight during training G.') 76 | parser.add_argument('--lr-adv-d', default=1, type=float, 77 | help='adv loss weight during training D.') 78 | parser.add_argument('--lr-mse', default=0.1, type=float, 79 | help='mse loss weight.') 80 | parser.add_argument('--lr-steps', default=[200, 300, 400], type=float, nargs="+", 81 | help='epochs to decay learning rate.') 82 | parser.add_argument('--lr-decay', default=0.1, type=float, 83 | help='lr decay factor.') 84 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, 85 | help='weight decay.') 86 | parser.add_argument('--lr_cls_mult', default=0.01, type=float, help='cls learning multiplier.') 87 | parser.add_argument('--lr_mse_mult', default=0.01, type=float, help='mse learning multiplier.') 88 | parser.add_argument('--lr_d_mult', default=0.01, type=float, help='discriminator learning multiplier.') 89 | 90 | # Log. 91 | parser.add_argument('--eval-freq', default=5, type=int, 92 | help='evaluation frequency (epochs).') 93 | parser.add_argument('--workers', default=8, type=int, 94 | help='number of data loader workers.') 95 | parser.add_argument('--model-prefix', type=str, default="model", 96 | help="prefix of model name.") 97 | parser.add_argument('--gpus', nargs='+', type=int, default=None, 98 | help='gpu ids.') 99 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/transforms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | """Functions for data augmentation and related preprocessing.""" 9 | 10 | import random 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def color_aug(img, random_h=36, random_l=50, random_s=50): 16 | img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS).astype(float) 17 | 18 | h = (random.random() * 2 - 1.0) * random_h 19 | l = (random.random() * 2 - 1.0) * random_l 20 | s = (random.random() * 2 - 1.0) * random_s 21 | 22 | img[..., 0] += h 23 | img[..., 0] = np.minimum(img[..., 0], 180) 24 | 25 | img[..., 1] += l 26 | img[..., 1] = np.minimum(img[..., 1], 255) 27 | 28 | img[..., 2] += s 29 | img[..., 2] = np.minimum(img[..., 2], 255) 30 | 31 | img = np.maximum(img, 0) 32 | img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_HLS2BGR) 33 | return img 34 | 35 | 36 | class GroupCenterCrop(object): 37 | def __init__(self, size): 38 | self._size = size 39 | 40 | def __call__(self, img_group): 41 | h, w, _ = img_group[0].shape 42 | hs = (h - self._size) // 2 43 | ws = (w - self._size) // 2 44 | return [img[hs:hs+self._size, ws:ws+self._size] for img in img_group] 45 | 46 | 47 | class GroupRandomHorizontalFlip(object): 48 | def __call__(self, img_group, is_mv_or_flow=False): 49 | if random.random() < 0.5: 50 | ret = [img[:, ::-1, :].astype(np.int32) for img in img_group] # residual and then flow and MV 51 | for i in range(len(ret)): 52 | ret[i][:, :, :4] -= 128 53 | ret[i][..., 0] *= (-1) 54 | ret[i][..., 2] *= (-1) 55 | ret[i][:, :, :4] += 128 56 | return ret 57 | else: 58 | return img_group 59 | 60 | class GroupScale(object): 61 | def __init__(self, size): 62 | self._size = (size, size) 63 | 64 | def __call__(self, img_group): 65 | # resize flow then MV then residual 66 | ret_img_group = [] 67 | for idx in range(0, len(img_group)): 68 | #print('Input frames shape %s:' % str(img_group[idx].shape)) 69 | #print('Input frames after transform mv flow shape %s:' % str(resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR).shape)) 70 | #print('Input frames after transform residual shape %s:' % str(cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR).shape)) 71 | ret_img_group.append( 72 | np.concatenate((resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR), 73 | cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR)), axis=2)) 74 | 75 | return ret_img_group 76 | 77 | class GroupOverSample(object): 78 | def __init__(self, crop_size, scale_size=None): 79 | self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size) 80 | 81 | if scale_size is not None: 82 | self.scale_worker = GroupScale(scale_size) 83 | else: 84 | self.scale_worker = None 85 | 86 | def __call__(self, img_group): 87 | 88 | if self.scale_worker is not None: 89 | img_group = self.scale_worker(img_group) 90 | 91 | image_w, image_h, _ = img_group[0].shape 92 | crop_w, crop_h = self.crop_size 93 | 94 | offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) 95 | oversample_group = list() 96 | 97 | for o_w, o_h in offsets: 98 | for img in img_group: 99 | 100 | crop = img[o_w:o_w+crop_w, o_h:o_h+crop_h] 101 | oversample_group.append(crop) 102 | 103 | flip_crop = crop[:, ::-1, :].astype(np.int32) # residual 104 | flip_crop[:, :, :4] -= 128 # flow and MV 105 | flip_crop[..., 0] *= (-1) 106 | flip_crop[..., 2] *= (-1) 107 | flip_crop[:, :, :4] += 128 108 | oversample_group.append(flip_crop) 109 | 110 | return oversample_group 111 | 112 | def resize_mv(img, shape, interpolation): 113 | return np.stack([cv2.resize(img[..., i], shape, interpolation) 114 | for i in range(img.shape[-1])], axis=2) 115 | 116 | 117 | class GroupMultiScaleCrop(object): 118 | def __init__(self, input_size, scales=None, max_distort=1, fix_crop=False, more_fix_crop=True): 119 | self.scales = scales if scales is not None else [1, .875, .75, .66] 120 | self.max_distort = max_distort 121 | self.fix_crop = fix_crop 122 | self.more_fix_crop = more_fix_crop 123 | self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size] # 224x224 124 | 125 | def __call__(self, img_group): 126 | 127 | im_size = img_group[0].shape 128 | crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size) 129 | crop_img_group = [img[offset_w:offset_w + crop_w, offset_h:offset_h + crop_h] for img in img_group] 130 | 131 | # resize flow -> MV -> residual 132 | ret_img_group = [] 133 | for idx in range(0, len(img_group)): 134 | ret_img_group.append( 135 | np.concatenate( 136 | (resize_mv(crop_img_group[idx][:, :, :4], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR), 137 | cv2.resize(crop_img_group[idx][:, :, 4:], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR)), axis=2)) 138 | 139 | return ret_img_group 140 | 141 | def _sample_crop_size(self, im_size): 142 | image_w, image_h = im_size[0], im_size[1] 143 | 144 | base_size = min(image_w, image_h) 145 | crop_sizes = [int(base_size * x) for x in self.scales] 146 | crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes] 147 | crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes] 148 | 149 | pairs = [] 150 | for i, h in enumerate(crop_h): 151 | for j, w in enumerate(crop_w): 152 | if abs(i - j) <= self.max_distort: 153 | pairs.append((w, h)) 154 | 155 | crop_pair = random.choice(pairs) 156 | if not self.fix_crop: 157 | w_offset = random.randint(0, image_w - crop_pair[0]) 158 | h_offset = random.randint(0, image_h - crop_pair[1]) 159 | else: 160 | w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1]) 161 | 162 | return crop_pair[0], crop_pair[1], w_offset, h_offset 163 | 164 | def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h): 165 | offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h) 166 | return random.choice(offsets) 167 | 168 | @staticmethod 169 | def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h): 170 | w_step = (image_w - crop_w) // 4 171 | h_step = (image_h - crop_h) // 4 172 | 173 | ret = list() 174 | ret.append((0, 0)) # upper left 175 | ret.append((4 * w_step, 0)) # upper right 176 | ret.append((0, 4 * h_step)) # lower left 177 | ret.append((4 * w_step, 4 * h_step)) # lower right 178 | ret.append((2 * w_step, 2 * h_step)) # center 179 | 180 | if more_fix_crop: 181 | ret.append((0, 2 * h_step)) # center left 182 | ret.append((4 * w_step, 2 * h_step)) # center right 183 | ret.append((2 * w_step, 4 * h_step)) # lower center 184 | ret.append((2 * w_step, 0 * h_step)) # upper center 185 | 186 | ret.append((1 * w_step, 1 * h_step)) # upper left quarter 187 | ret.append((3 * w_step, 1 * h_step)) # upper right quarter 188 | ret.append((1 * w_step, 3 * h_step)) # lower left quarter 189 | ret.append((3 * w_step, 3 * h_step)) # lower righ quarter 190 | 191 | return ret 192 | -------------------------------------------------------------------------------- /code/dmcnet_GAN/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import numpy as np 9 | 10 | def viz_flow(u,v,logscale=True,scaledown=6,output=False): 11 | """ 12 | topleft is zero, u is horiz/x/width, v is vertical/y/height 13 | red is 3 o'clock, yellow is 6, light blue is 9, blue/purple is 12 14 | """ 15 | colorwheel = makecolorwheel() 16 | ncols = colorwheel.shape[0] 17 | 18 | radius = np.sqrt(u**2 + v**2) 19 | if output: 20 | print("Maximum flow magnitude: %04f" % np.max(radius)) 21 | if logscale: 22 | radius = np.log(radius + 1) 23 | if output: 24 | print("Maximum flow magnitude (after log): %0.4f" % np.max(radius)) 25 | radius = radius / scaledown 26 | if output: 27 | print("Maximum flow magnitude (after scaledown): %0.4f" % np.max(radius)) 28 | rot = np.arctan2(-v, -u) / np.pi 29 | 30 | fk = (rot+1)/2 * (ncols-1) # -1~1 maped to 0~ncols 31 | k0 = fk.astype(np.uint8) # 0, 1, 2, ..., ncols 32 | 33 | k1 = k0+1 34 | k1[k1 == ncols] = 0 35 | 36 | f = fk - k0 37 | 38 | ncolors = colorwheel.shape[1] 39 | img = np.zeros(u.shape+(ncolors,)) 40 | for i in range(ncolors): 41 | tmp = colorwheel[:,i] 42 | col0 = tmp[k0] 43 | col1 = tmp[k1] 44 | col = (1-f)*col0 + f*col1 45 | 46 | idx = radius <= 1 47 | # increase saturation with radius 48 | col[idx] = 1 - radius[idx]*(1-col[idx]) 49 | # out of range 50 | col[~idx] *= 0.75 51 | img[:,:,i] = np.floor(255*col).astype(np.uint8) 52 | 53 | return img.astype(np.uint8) 54 | 55 | 56 | 57 | def makecolorwheel(): 58 | # Create a colorwheel for visualization 59 | RY = 15 60 | YG = 6 61 | GC = 4 62 | CB = 11 63 | BM = 13 64 | MR = 6 65 | 66 | ncols = RY + YG + GC + CB + BM + MR 67 | 68 | colorwheel = np.zeros((ncols,3)) 69 | 70 | col = 0 71 | # RY 72 | colorwheel[0:RY,0] = 1 73 | colorwheel[0:RY,1] = np.arange(0,1,1./RY) 74 | col += RY 75 | 76 | # YG 77 | colorwheel[col:col+YG,0] = np.arange(1,0,-1./YG) 78 | colorwheel[col:col+YG,1] = 1 79 | col += YG 80 | 81 | # GC 82 | colorwheel[col:col+GC,1] = 1 83 | colorwheel[col:col+GC,2] = np.arange(0,1,1./GC) 84 | col += GC 85 | 86 | # CB 87 | colorwheel[col:col+CB,1] = np.arange(1,0,-1./CB) 88 | colorwheel[col:col+CB,2] = 1 89 | col += CB 90 | 91 | # BM 92 | colorwheel[col:col+BM,2] = 1 93 | colorwheel[col:col+BM,0] = np.arange(0,1,1./BM) 94 | col += BM 95 | 96 | # MR 97 | colorwheel[col:col+MR,2] = np.arange(1,0,-1./MR) 98 | colorwheel[col:col+MR,0] = 1 99 | 100 | return colorwheel 101 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/README.MD: -------------------------------------------------------------------------------- 1 | # DMC-Net + I3D 2 | 3 | 4 | This directory contains the code used for the experiment of combining our DMC-Net and I3D. 5 | 6 | 7 | ## Dependencies 8 | Our code is built on the the following (but not limited to) packages: 9 | 1. PyTorch 0.4.0 10 | 2. Python 3.6, numpy 11 | 3. [coviar](https://github.com/chaoyuaw/pytorch-coviar/blob/master/GETTING_STARTED.md) 12 | 13 | ## Data 14 | The experiments are done based on mpeg4 videos of [HMDB51](http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/) and [UCF101](https://www.crcv.ucf.edu/data/UCF101.php). 15 | 16 | Path to videos or stored MV and R is supposed to be manually set in data/video_iterator.py. 17 | 18 | ## Training 19 | First please go to [here](https://github.com/hassony2/kinetics_i3d_pytorch) to download the pretrained flow model of I3D. 20 | We have two separate files for HMDB51 and UCF101. We first train the generator without updating the weights of classifier (I3D) using Reconstruction loss and advasarial loss and classification loss. We train it in this way for epoch thre epochs. Then we joint update generator and classifier. Discriminator is updated in both stages. 21 | The details of how to use them are shown in the following sample training script. 22 | - Sample script for training on HMDB51 split 1 23 | ``` 24 | bash train.sh 25 | ``` 26 | ## Testing 27 | Open the directory test. We have two separate files for HMDB51 and UCF101. The details of how to use them are shown in the following sample testing code. Please put the model you want to evaluate in ./exps/models/. 28 | - Sample script for testing on HMDB51 split 1 29 | ``` 30 | bash test.sh 31 | ``` 32 | We provide models that produce the results reported in our paper [here](). 33 | 34 | ## Acknowledgment 35 | Our training and testing code is mainly built on [MF-Net](https://github.com/cypw/PyTorch-MFNet). Our I3D model is borrowed from this pytorch [implementation](https://github.com/hassony2/kinetics_i3d_pytorch). Our dataloader also borrows code from [CoViAR](https://github.com/chaoyuaw/pytorch-coviar). Thanks a lot! 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__init__.py -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/image_transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/image_transforms.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/iterator_factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/iterator_factory.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/video_iterator.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_iterator.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/video_sampler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_sampler.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/__pycache__/video_transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_transforms.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/image_iterator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import os 8 | import cv2 9 | import numpy as np 10 | 11 | import torch.utils.data as data 12 | import logging 13 | 14 | 15 | class ImageListIter(data.Dataset): 16 | 17 | def __init__(self, 18 | image_prefix, 19 | txt_list, 20 | image_transform, 21 | name="", 22 | force_color=True): 23 | super(ImageListIter, self).__init__() 24 | 25 | # load image list 26 | self.image_list = self._get_video_list(txt_list=txt_list) 27 | 28 | # load params 29 | self.force_color = force_color 30 | self.image_prefix = image_prefix 31 | self.image_transform = image_transform 32 | logging.info("ImageListIter ({:s}) initialized, num: {:d})".format(name, 33 | len(self.image_list))) 34 | 35 | def get_image(self, index): 36 | # get current video info 37 | im_id, label, img_subpath = self.image_list[index] 38 | 39 | # load image 40 | image_path = os.path.join(self.image_prefix, img_subpath) 41 | if self.force_color: 42 | cv_read_flag = cv2.IMREAD_COLOR 43 | else: 44 | cv_read_flag = cv2.IMREAD_GRAYSCALE 45 | cv_img = cv2.imread(image_path, cv_read_flag) 46 | image_input = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB) 47 | 48 | # apply image augmentation 49 | if self.image_transform is not None: 50 | image_input = self.image_transform(image_input) 51 | return image_input, label, img_subpath 52 | 53 | 54 | def __getitem__(self, index): 55 | image_input, label, img_subpath = self.get_image(index) 56 | return image_input, label 57 | 58 | 59 | def __len__(self): 60 | return len(self.image_list) 61 | 62 | 63 | def _get_video_list(self, txt_list): 64 | # formate: 65 | # [im_id, label, image_subpath] 66 | assert os.path.exists(txt_list), "Failed to locate: {}".format(txt_list) 67 | 68 | # building dataset 69 | logging.info("Building dataset ...") 70 | image_list = [] 71 | with open(txt_list) as f: 72 | lines = f.read().splitlines() 73 | logging.info("Found {} images in '{}'".format(len(lines), txt_list)) 74 | for i, line in enumerate(lines): 75 | im_id, label, image_subpath = line.split() 76 | info = [int(im_id), int(label), image_subpath] 77 | image_list.append(info) 78 | 79 | return image_list -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/image_transforms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | # most of the code are from: 8 | # https://github.com/bryanyzhu/two-stream-pytorch/blob/master/video_transforms.py 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | 14 | class Compose(object): 15 | """Composes several video_transforms together. 16 | 17 | Args: 18 | transforms (List[Transform]): list of transforms to compose. 19 | 20 | Example: 21 | >>> video_transforms.Compose([ 22 | >>> video_transforms.CenterCrop(10), 23 | >>> video_transforms.ToTensor(), 24 | >>> ]) 25 | """ 26 | def __init__(self, transforms, aug_seed=0): 27 | self.transforms = transforms 28 | for i, t in enumerate(self.transforms): 29 | t.set_random_state(seed=(aug_seed+i)) 30 | #print(aug_seed+i) 31 | 32 | def __call__(self, data): 33 | for t in self.transforms: 34 | data = t(data) 35 | return data 36 | 37 | class Transform(object): 38 | """basse class for all transformation""" 39 | def set_random_state(self, seed=None): 40 | self.rng = np.random.RandomState(seed) 41 | 42 | 43 | #################################### 44 | # Customized Transformations 45 | #################################### 46 | 47 | class Normalize(Transform): 48 | """Given mean: (R, G, B) and std: (R, G, B), 49 | will normalize each channel of the torch.*Tensor, i.e. 50 | channel = (channel - mean) / std 51 | """ 52 | def __init__(self, mean, std): 53 | self.mean = mean 54 | self.std = std 55 | 56 | def __call__(self, tensor): 57 | for t, m, s in zip(tensor, self.mean, self.std): 58 | t.sub_(m).div_(s) 59 | return tensor 60 | 61 | 62 | class Resize(Transform): 63 | """ Rescales the input numpy array to the given 'size'. 64 | 'size' will be the size of the smaller edge. 65 | For example, if height > width, then image will be 66 | rescaled to (size * height / width, size) 67 | size: size of the smaller edge 68 | interpolation: Default: cv2.INTER_LINEAR 69 | """ 70 | def __init__(self, size, interpolation=cv2.INTER_LINEAR): 71 | self.size = size # [w, h] 72 | self.interpolation = interpolation 73 | 74 | def __call__(self, data): 75 | h, w, c = data.shape 76 | 77 | if isinstance(self.size, int): 78 | slen = self.size 79 | if min(w, h) == slen: 80 | return data 81 | if w < h: 82 | new_w = self.size 83 | new_h = int(self.size * h / w) 84 | else: 85 | new_w = int(self.size * w / h) 86 | new_h = self.size 87 | else: 88 | new_w = self.size[0] 89 | new_h = self.size[1] 90 | 91 | if (h != new_h) or (w != new_w): 92 | scaled_data = cv2.resize(data, (new_w, new_h), self.interpolation) 93 | else: 94 | scaled_data = data 95 | 96 | return scaled_data 97 | 98 | 99 | class RandomScale(Transform): 100 | """ Rescales the input numpy array to the given 'size'. 101 | 'size' will be the size of the smaller edge. 102 | For example, if height > width, then image will be 103 | rescaled to (size * height / width, size) 104 | size: size of the smaller edge 105 | interpolation: Default: cv2.INTER_LINEAR 106 | """ 107 | def __init__(self, make_square=False, 108 | aspect_ratio=[1.0, 1.0], 109 | slen=[224, 288], 110 | interpolation=cv2.INTER_LINEAR): 111 | assert slen[1] >= slen[0], \ 112 | "slen ({}) should be in increase order".format(scale) 113 | assert aspect_ratio[1] >= aspect_ratio[0], \ 114 | "aspect_ratio ({}) should be in increase order".format(aspect_ratio) 115 | self.slen = slen # [min factor, max factor] 116 | self.aspect_ratio = aspect_ratio 117 | self.make_square = make_square 118 | self.interpolation = interpolation 119 | self.rng = np.random.RandomState(0) 120 | 121 | def __call__(self, data): 122 | h, w, c = data.shape 123 | new_w = w 124 | new_h = h if not self.make_square else w 125 | if self.aspect_ratio: 126 | random_aspect_ratio = self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1]) 127 | if self.rng.rand() > 0.5: 128 | random_aspect_ratio = 1.0 / random_aspect_ratio 129 | new_w *= random_aspect_ratio 130 | new_h /= random_aspect_ratio 131 | resize_factor = self.rng.uniform(self.slen[0], self.slen[1]) / min(new_w, new_h) 132 | new_w *= resize_factor 133 | new_h *= resize_factor 134 | scaled_data = cv2.resize(data, (int(new_w+1), int(new_h+1)), self.interpolation) 135 | return scaled_data 136 | 137 | 138 | class CenterCrop(Transform): 139 | """Crops the given numpy array at the center to have a region of 140 | the given size. size can be a tuple (target_height, target_width) 141 | or an integer, in which case the target will be of a square shape (size, size) 142 | """ 143 | def __init__(self, size): 144 | if isinstance(size, int): 145 | self.size = (size, size) 146 | else: 147 | self.size = size 148 | 149 | def __call__(self, data): 150 | h, w, c = data.shape 151 | th, tw = self.size 152 | x1 = int(round((w - tw) / 2.)) 153 | y1 = int(round((h - th) / 2.)) 154 | cropped_data = data[y1:(y1+th), x1:(x1+tw), :] 155 | return cropped_data 156 | 157 | class RandomCrop(Transform): 158 | """Crops the given numpy array at the random location to have a region of 159 | the given size. size can be a tuple (target_height, target_width) 160 | or an integer, in which case the target will be of a square shape (size, size) 161 | """ 162 | def __init__(self, size): 163 | if isinstance(size, int): 164 | self.size = (size, size) 165 | else: 166 | self.size = size 167 | self.rng = np.random.RandomState(0) 168 | 169 | def __call__(self, data): 170 | h, w, c = data.shape 171 | th, tw = self.size 172 | x1 = self.rng.choice(range(w - tw)) 173 | y1 = self.rng.choice(range(h - th)) 174 | #print(x1,y1,data.shape) 175 | cropped_data = data[y1:(y1+th), x1:(x1+tw), :] 176 | return cropped_data 177 | 178 | class RandomHorizontalFlip(Transform): 179 | """Randomly horizontally flips the given numpy array with a probability of 0.5 180 | """ 181 | def __init__(self, modality = 'rgb'): 182 | self.modality = modality 183 | self.rng = np.random.RandomState(0) 184 | 185 | def __call__(self, data): 186 | if self.rng.rand() < 0.5: 187 | data = np.fliplr(data) 188 | data = np.ascontiguousarray(data).astype(np.int32) 189 | if self.modality in ['mv', 'flow']: 190 | data[:, :, 0::2] += -128 191 | data[:, :, 0::2] *= (-1) 192 | data[:, :, 0::2] += 128 193 | elif self.modality == 'flow+mp4': 194 | data[:, :, 0::7] += -128 195 | data[:, :, 2::7] += -128 196 | data[:, :, 0::7] *= (-1) 197 | data[:, :, 2::7] *= (-1) 198 | data[:, :, 0::7] += 128 199 | data[:, :, 2::7] += 128 200 | return data.astype(np.uint8) 201 | 202 | class RandomVerticalFlip(Transform): 203 | """Randomly vertically flips the given numpy array with a probability of 0.5 204 | """ 205 | def __init__(self): 206 | self.rng = np.random.RandomState(0) 207 | 208 | def __call__(self, data): 209 | if self.rng.rand() < 0.5: 210 | data = np.flipud(data) 211 | data = np.ascontiguousarray(data) 212 | return data 213 | 214 | class RandomRGB(Transform): 215 | def __init__(self, vars=[10, 10, 10]): 216 | self.vars = vars 217 | self.rng = np.random.RandomState(0) 218 | 219 | def __call__(self, data): 220 | h, w, c = data.shape 221 | 222 | random_vars = [int(round(self.rng.uniform(-x, x))) for x in self.vars] 223 | 224 | base = len(random_vars) 225 | augmented_data = np.zeros(data.shape) 226 | for ic in range(0, c): 227 | var = random_vars[ic%base] 228 | augmented_data[:,:,ic] = np.minimum(np.maximum(data[:,:,ic] + var, 0), 255) 229 | return augmented_data 230 | 231 | class RandomHLS(Transform): 232 | def __init__(self, vars=[15, 35, 25]): 233 | self.vars = vars 234 | self.rng = np.random.RandomState(0) 235 | 236 | def __call__(self, data): 237 | h, w, c = data.shape 238 | 239 | assert c%3 == 0, "input channel = %d, illegal"%c 240 | random_vars = [int(round(self.rng.uniform(-x, x))) for x in self.vars] 241 | 242 | base = len(random_vars) 243 | augmented_data = np.zeros(data.shape, ) 244 | for i_im in range(0, int(c/3)): 245 | augmented_data[:,:,3*i_im:(3*i_im+3)] = \ 246 | cv2.cvtColor(data[:,:,3*i_im:(3*i_im+3)], cv2.COLOR_RGB2HLS) 247 | 248 | hls_limits = [180, 255, 255] 249 | for ic in range(0, c): 250 | var = random_vars[ic%base] 251 | limit = hls_limits[ic%base] 252 | augmented_data[:,:,ic] = np.minimum(np.maximum(augmented_data[:,:,ic] + var, 0), limit) 253 | 254 | for i_im in range(0, int(c/3)): 255 | augmented_data[:,:,3*i_im:(3*i_im+3)] = \ 256 | cv2.cvtColor(augmented_data[:,:,3*i_im:(3*i_im+3)].astype(np.uint8), \ 257 | cv2.COLOR_HLS2RGB) 258 | 259 | return augmented_data 260 | 261 | 262 | class ToTensor(Transform): 263 | """Converts a numpy.ndarray (H x W x C) in the range 264 | [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. 265 | """ 266 | def __init__(self, dim=3): 267 | self.dim = dim 268 | 269 | def __call__(self, image): 270 | if isinstance(image, np.ndarray): 271 | # H, W, C = image.shape 272 | # handle numpy array 273 | image = torch.from_numpy(image.transpose((2, 0, 1))) 274 | # backward compatibility 275 | return image.float() / 255.0 -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/iterator_factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import os 8 | import logging 9 | 10 | import torch 11 | 12 | from . import video_sampler as sampler 13 | from . import video_transforms as transforms 14 | from .video_iterator import VideoIter 15 | 16 | def get_hmdb51(data_root='./dataset/HMDB51', 17 | clip_length=8, 18 | train_interval=2, 19 | val_interval=2, 20 | mean=[0.485, 0.456, 0.406], 21 | std=[0.229, 0.224, 0.225], 22 | seed=torch.distributed.get_rank() if torch.distributed._initialized else 0, 23 | modality = 'rgb', 24 | split = 1, 25 | net_name = 'I3D', 26 | accumulate = True, 27 | ds_factor = 16, 28 | mv_minmaxnorm = False, 29 | mv_loadimg = False, 30 | **kwargs): 31 | """ data iter for ucf-101 32 | """ 33 | logging.debug("VideoIter:: clip_length = {}, interval = [train: {}, val: {}], seed = {}".format( \ 34 | clip_length, train_interval, val_interval, seed)) 35 | 36 | normalize = transforms.Normalize(mean=mean, std=std) 37 | if modality in ['rgb', 'I']: 38 | tran = transforms.Compose([ 39 | transforms.RandomScale(make_square=True, 40 | aspect_ratio=[0.8, 1./0.8], 41 | slen=[224, 288]), 42 | transforms.RandomCrop((224, 224)), # insert a resize if needed 43 | transforms.RandomHorizontalFlip(), 44 | transforms.RandomHLS(vars=[15, 35, 25]), 45 | transforms.ToTensor(modality), 46 | normalize, 47 | ], 48 | aug_seed=(seed+1)) 49 | elif modality in ['flow', 'mv', 'res', 'flow+mp4']: 50 | tran = transforms.Compose([ 51 | transforms.RandomCrop((224, 224)), # insert a resize if needed 52 | transforms.RandomHorizontalFlip(modality), 53 | transforms.ToTensor(modality, ds_factor), 54 | normalize, 55 | ], 56 | aug_seed=(seed+1)) 57 | train_sampler = sampler.RandomSampling(num=clip_length, 58 | interval=train_interval, 59 | speed=[1.0, 1.0], 60 | seed=(seed+0)) 61 | train = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly 62 | txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_train.txt'.format(split)), 63 | sampler=train_sampler, 64 | force_color=True, 65 | video_transform=tran, 66 | name='train', 67 | cached_info_path = os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_train_info.txt'.format(split)), 68 | shuffle_list_seed=(seed+2), 69 | check_video = True, 70 | load_from_img = True, 71 | modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg) 72 | if net_name == 'I3D': 73 | tran = transforms.Compose([ 74 | transforms.CenterCrop((224, 224)), 75 | transforms.ToTensor(modality), 76 | normalize, 77 | ]) 78 | else: 79 | raise ValueError('Unkown net_name') 80 | val_sampler = sampler.SequentialSampling(num=clip_length, 81 | interval=val_interval, 82 | fix_cursor=True, 83 | shuffle=True) 84 | val = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly 85 | txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_test.txt'.format(split)), 86 | sampler=val_sampler, 87 | force_color=True, 88 | video_transform=tran, 89 | name='test', 90 | cached_info_path = os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_test_info.txt'.format(split)), 91 | check_video = True, 92 | load_from_img = True, 93 | modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg) 94 | 95 | return (train, val) 96 | 97 | def get_ucf101(data_root='./dataset/UCF101', 98 | clip_length=8, 99 | train_interval=2, 100 | val_interval=2, 101 | mean=[0.485, 0.456, 0.406], 102 | std=[0.229, 0.224, 0.225], 103 | seed=torch.distributed.get_rank() if torch.distributed._initialized else 0, 104 | modality = 'rgb', 105 | split = 1, 106 | net_name = 'I3D', 107 | accumulate = True, 108 | ds_factor = 16, 109 | mv_minmaxnorm = False, 110 | mv_loadimg=False, 111 | **kwargs): 112 | """ data iter for ucf-101 113 | """ 114 | logging.debug("VideoIter:: clip_length = {}, interval = [train: {}, val: {}], seed = {}".format( \ 115 | clip_length, train_interval, val_interval, seed)) 116 | 117 | normalize = transforms.Normalize(mean=mean, std=std) 118 | 119 | if modality in ['rgb', 'I']: 120 | tran = transforms.Compose([ 121 | transforms.RandomScale(make_square=True, 122 | aspect_ratio=[0.8, 1./0.8], 123 | slen=[224, 288]), 124 | transforms.RandomCrop((224, 224)), # insert a resize if needed 125 | transforms.RandomHorizontalFlip(), 126 | transforms.RandomHLS(vars=[15, 35, 25]), 127 | transforms.ToTensor(modality), 128 | normalize, 129 | ], 130 | aug_seed=(seed+1)) 131 | elif modality in ['flow', 'mv', 'res', 'flow+mp4']: 132 | tran = transforms.Compose([ 133 | transforms.RandomCrop((224, 224)), # insert a resize if needed 134 | transforms.RandomHorizontalFlip(modality), 135 | transforms.ToTensor(modality, ds_factor), 136 | normalize, 137 | ], 138 | aug_seed=(seed+1)) 139 | train_sampler = sampler.RandomSampling(num=clip_length, 140 | interval=train_interval, 141 | speed=[1.0, 1.0], 142 | seed=(seed+0)) 143 | train = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly 144 | txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'trainlist0{}.txt'.format(split)), 145 | sampler=train_sampler, 146 | force_color=True, 147 | video_transform=tran, 148 | name='train', 149 | shuffle_list_seed=(seed+2), 150 | cached_info_path = os.path.join(data_root, 'raw', 'list_cvt', 'ucf101_split{}_train_info.txt'.format(split)), 151 | check_video = True, 152 | load_from_img = True, 153 | modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg) 154 | 155 | if net_name == 'I3D': 156 | tran = transforms.Compose([ 157 | transforms.CenterCrop((224, 224)), 158 | transforms.ToTensor(modality), 159 | normalize, 160 | ]) 161 | else: 162 | raise ValueError('Unkown net_name') 163 | val_sampler = sampler.SequentialSampling(num=clip_length, 164 | interval=val_interval, 165 | fix_cursor=True, 166 | shuffle=True) 167 | val = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly 168 | txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'testlist0{}.txt'.format(split)), 169 | sampler=val_sampler, 170 | force_color=True, 171 | video_transform=tran, 172 | name='test', 173 | cached_info_path = os.path.join(data_root, 'raw', 'list_cvt', 'ucf101_split{}_test_info.txt'.format(split)), 174 | check_video = True, 175 | load_from_img = True, 176 | modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg 177 | ) 178 | 179 | return (train, val) 180 | 181 | 182 | 183 | 184 | def creat(name, batch_size, num_workers=8, **kwargs): 185 | 186 | if name.upper() == 'UCF101': 187 | train, val = get_ucf101(**kwargs) 188 | elif name.upper() == 'HMDB51': 189 | train, val = get_hmdb51(**kwargs) 190 | else: 191 | assert NotImplementedError("iter {} not found".format(name)) 192 | 193 | 194 | train_loader = torch.utils.data.DataLoader(train, 195 | batch_size=batch_size, shuffle=True, 196 | num_workers=num_workers, pin_memory=False) 197 | 198 | val_loader = torch.utils.data.DataLoader(val, 199 | batch_size = batch_size, shuffle=False, 200 | num_workers=num_workers, pin_memory=False) 201 | 202 | return (train_loader, val_loader) 203 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/video_sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import math 8 | import numpy as np 9 | 10 | 11 | class RandomSampling(object): 12 | def __init__(self, num, interval=1, speed=[1.0, 1.0], seed=0): 13 | assert num > 0, "at least sampling 1 frame" 14 | self.num = num 15 | self.interval = interval if type(interval) == list else [interval] 16 | self.speed = speed 17 | self.rng = np.random.RandomState(seed) 18 | 19 | def sampling(self, range_max, v_id=None, prev_failed=False): 20 | assert range_max > 0, \ 21 | ValueError("range_max = {}".format(range_max)) 22 | interval = self.rng.choice(self.interval) 23 | if self.num == 1: 24 | return [self.rng.choice(range(0, range_max))] 25 | # sampling 26 | speed_min = self.speed[0] 27 | speed_max = min(self.speed[1], (range_max-1)/((self.num-1)*interval)) 28 | if speed_max < speed_min: 29 | return (np.remainder(np.arange(0, self.num * interval, interval), range_max)).tolist() 30 | random_interval = self.rng.uniform(speed_min, speed_max) * interval 31 | frame_range = (self.num-1) * random_interval 32 | clip_start = self.rng.uniform(0, (range_max-1) - frame_range) 33 | clip_end = clip_start + frame_range 34 | return np.linspace(clip_start, clip_end, self.num).astype(dtype=np.int).tolist() 35 | 36 | 37 | class SequentialSampling(object): 38 | def __init__(self, num, interval=1, shuffle=False, fix_cursor=False, seed=0): 39 | self.memory = {} 40 | self.num = num 41 | self.interval = interval if type(interval) == list else [interval] 42 | self.shuffle = shuffle 43 | self.fix_cursor = fix_cursor 44 | self.rng = np.random.RandomState(seed) 45 | 46 | def sampling(self, range_max, v_id, prev_failed=False): 47 | assert range_max > 0, \ 48 | ValueError("range_max = {}".format(range_max)) 49 | num = self.num 50 | interval = self.rng.choice(self.interval) 51 | frame_range = (num - 1) * interval + 1 52 | # sampling clips 53 | if v_id not in self.memory: 54 | clips = list(range(0, range_max-(frame_range-1), frame_range)) 55 | if self.shuffle: 56 | self.rng.shuffle(clips) 57 | self.memory[v_id] = [-1, clips] 58 | # pickup a clip 59 | cursor, clips = self.memory[v_id] 60 | if not clips: 61 | return (np.remainder(np.arange(0, self.num * interval, interval), range_max)).tolist() 62 | cursor = (cursor + 1) % len(clips) 63 | if prev_failed or not self.fix_cursor: 64 | self.memory[v_id][0] = cursor 65 | # sampling within clip 66 | idxs = range(clips[cursor], clips[cursor]+frame_range, interval) 67 | return idxs 68 | 69 | 70 | if __name__ == "__main__": 71 | 72 | import logging 73 | logging.getLogger().setLevel(logging.DEBUG) 74 | 75 | """ test RandomSampling() """ 76 | 77 | random_sampler = RandomSampling(num=8, interval=2, speed=[0.5, 2]) 78 | 79 | logging.info("RandomSampling(): range_max < num") 80 | for i in range(10): 81 | logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=2, v_id=1))) 82 | 83 | logging.info("RandomSampling(): range_max == num") 84 | for i in range(10): 85 | logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=8, v_id=1))) 86 | 87 | logging.info("RandomSampling(): range_max > num") 88 | for i in range(90): 89 | logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=30, v_id=1))) 90 | 91 | 92 | """ test SequentialSampling() """ 93 | sequential_sampler = SequentialSampling(num=3, interval=3, fix_cursor=False) 94 | 95 | logging.info("SequentialSampling():") 96 | for i in range(10): 97 | logging.info("{:d}: v_id = {}: {}".format(i, 0, list(sequential_sampler.sampling(range_max=14, v_id=0)))) 98 | # logging.info("{:d}: v_id = {}: {}".format(i, 1, sequential_sampler.sampling(range_max=9, v_id=1))) 99 | # logging.info("{:d}: v_id = {}: {}".format(i, 2, sequential_sampler.sampling(range_max=2, v_id=2))) 100 | # logging.info("{:d}: v_id = {}: {}".format(i, 3, sequential_sampler.sampling(range_max=3, v_id=3))) -------------------------------------------------------------------------------- /code/dmcnet_I3D/data/video_transforms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import torch 8 | import numpy as np 9 | from skimage.measure import block_reduce 10 | from scipy import interpolate 11 | from .image_transforms import Compose, \ 12 | Transform, \ 13 | Normalize, \ 14 | Resize, \ 15 | RandomScale, \ 16 | CenterCrop, \ 17 | RandomCrop, \ 18 | RandomHorizontalFlip, \ 19 | RandomRGB, \ 20 | RandomHLS 21 | 22 | 23 | class ToTensor(Transform): 24 | """Converts a numpy.ndarray (H x W x (T x C)) in the range 25 | [0, 255] to a torch.FloatTensor of shape (C x T x H x W) in the range [0.0, 1.0]. 26 | """ 27 | def __init__(self, modality = 'rgb', flow_ds_factor = 1, interp = False): 28 | self.modality = modality 29 | self._flow_ds_factor = flow_ds_factor 30 | self._upsample_interp = interp 31 | if modality == 'rgb': 32 | self.dim = 3 33 | elif modality in ['flow', 'mv']: 34 | self.dim = 2 35 | elif modality in ['res', 'I']: 36 | self.dim = 3 37 | elif modality == 'flow+mp4': 38 | self.dim = 7 39 | 40 | def __call__(self, clips): 41 | if isinstance(clips, np.ndarray): 42 | H, W, _ = clips.shape 43 | # handle numpy array 44 | clips = clips.reshape((H,W,-1,self.dim)).transpose((3, 2, 0, 1)) 45 | if self.modality == 'flow+mp4': 46 | if self._flow_ds_factor is not 0 or 1: 47 | clips = np.transpose(clips, (1,0,2,3)) 48 | # downsample to make OF blocky 49 | factor = self._flow_ds_factor 50 | w_max = H 51 | h_max = W 52 | input_flow = block_reduce(clips[:,0:2, :, :], block_size=(1, 1, factor, factor), func=np.mean) 53 | # resize to original size by repeating or interpolation 54 | if self._upsample_interp is False: 55 | input_flow = input_flow.repeat(factor, axis=2).repeat(factor, axis=3) 56 | else: 57 | # interpolate along certain dimension? only interp1d can do so 58 | w_max_ds = input_flow.shape[2] 59 | h_max_ds = input_flow.shape[3] 60 | f_out = interpolate.interp1d(np.linspace(0, 1, w_max_ds), input_flow, kind='linear', axis=2) 61 | input_flow = f_out(np.linspace(0, 1, w_max_ds * factor)) 62 | f_out = interpolate.interp1d(np.linspace(0, 1, h_max_ds), input_flow, kind='linear', axis=3) 63 | input_flow = f_out(np.linspace(0, 1, h_max_ds * factor)) 64 | clips[:,0:2, :, :] = input_flow[:, :, :w_max, :h_max] 65 | clips = np.transpose(clips, (1,0,2,3)) 66 | 67 | clips = torch.from_numpy(clips) 68 | #print(clips.shape) 69 | # backward compatibility 70 | return clips.float() / 255.0 -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/HMDB51/raw/data: -------------------------------------------------------------------------------- 1 | ../../../../DATA/HMDB51/raw/data -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/HMDB51/raw/list_cvt/mapping_table.txt: -------------------------------------------------------------------------------- 1 | 0 brush_hair 2 | 1 cartwheel 3 | 2 catch 4 | 3 chew 5 | 4 clap 6 | 5 climb 7 | 6 climb_stairs 8 | 7 dive 9 | 8 draw_sword 10 | 9 dribble 11 | 10 drink 12 | 11 eat 13 | 12 fall_floor 14 | 13 fencing 15 | 14 flic_flac 16 | 15 golf 17 | 16 handstand 18 | 17 hit 19 | 18 hug 20 | 19 jump 21 | 20 kick 22 | 21 kick_ball 23 | 22 kiss 24 | 23 laugh 25 | 24 pick 26 | 25 pour 27 | 26 pullup 28 | 27 punch 29 | 28 push 30 | 29 pushup 31 | 30 ride_bike 32 | 31 ride_horse 33 | 32 run 34 | 33 shake_hands 35 | 34 shoot_ball 36 | 35 shoot_bow 37 | 36 shoot_gun 38 | 37 sit 39 | 38 situp 40 | 39 smile 41 | 40 smoke 42 | 41 somersault 43 | 42 stand 44 | 43 swing_baseball 45 | 44 sword 46 | 45 sword_exercise 47 | 46 talk 48 | 47 throw 49 | 48 turn 50 | 49 walk 51 | 50 wave 52 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/HMDB51/scripts/README.md: -------------------------------------------------------------------------------- 1 | Here, I provide an example code for converting raw HMDB51 videos. 2 | ``` 3 | python convert_videos.py 4 | ``` 5 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/HMDB51/scripts/convert_videos.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import os 9 | import logging 10 | import subprocess 11 | 12 | from joblib import delayed 13 | from joblib import Parallel 14 | 15 | def exe_cmd(cmd): 16 | try: 17 | dst_file = cmd.split()[-1] 18 | if os.path.exists(dst_file): 19 | return "exist" 20 | cmd = cmd.replace('(', '\(').replace(')', '\)').replace('\'', '\\\'') 21 | output = subprocess.check_output(cmd, shell=True, 22 | stderr=subprocess.STDOUT) 23 | except subprocess.CalledProcessError as err: 24 | logging.warning("failed: {}".format(cmd)) 25 | # logging.warning("failed: {}: {}".format(cmd, err.output.decode("utf-8"))) # more details 26 | return False 27 | return output 28 | 29 | def convert_video_wapper(src_videos, 30 | dst_videos, 31 | cmd_format, 32 | in_parallel=True): 33 | commands = [] 34 | for src, dst in zip(src_videos, dst_videos): 35 | cmd = cmd_format.format(src, dst) 36 | commands.append(cmd) 37 | 38 | logging.info("- {} commonds to excute".format(len(commands))) 39 | 40 | if not in_parallel: 41 | for i, cmd in enumerate(commands): 42 | # if i % 100 == 0: 43 | # logging.info("{} / {}: '{}'".format(i, len(commands), cmd)) 44 | exe_cmd(cmd=cmd) 45 | else: 46 | num_jobs = 24 47 | logging.info("processing videos in parallel, num_jobs={}".format(num_jobs)) 48 | Parallel(n_jobs=num_jobs)(delayed(exe_cmd)(cmd) for cmd in commands) 49 | 50 | 51 | if __name__ == "__main__": 52 | logging.getLogger().setLevel(logging.DEBUG) 53 | 54 | # resize to slen = x360 55 | cmd_format = 'ffmpeg -y -i {} -c:v mpeg4 -filter:v "scale=min(iw\,(360*iw)/min(iw\,ih)):-1" -b:v 640k -an {}' 56 | 57 | src_root = '../raw/data' 58 | dst_root = '../raw/data-x360' 59 | assert os.path.exists(dst_root), "cannot locate `{}'".format(dst_root) 60 | 61 | classname = [name for name in os.listdir(src_root) \ 62 | if os.path.isdir(os.path.join(src_root,name))] 63 | classname.sort() 64 | 65 | for cls_name in classname: 66 | src_folder = os.path.join(src_root, cls_name) 67 | dst_folder = os.path.join(dst_root, cls_name) 68 | assert os.path.exists(src_folder), "failed to locate: `{}'.".format(src_folder) 69 | if not os.path.exists(dst_folder): 70 | os.makedirs(dst_folder) 71 | 72 | video_names = [name for name in os.listdir(src_folder) \ 73 | if os.path.isfile(os.path.join(src_folder, name))] 74 | 75 | src_videos = [os.path.join(src_folder, vid_name.replace(";", "\;").replace("&", "\&")) for vid_name in video_names] 76 | dst_videos = [os.path.join(dst_folder, vid_name.replace(";", "\;").replace("&", "\&")) for vid_name in video_names] 77 | 78 | convert_video_wapper(src_videos=src_videos, 79 | dst_videos=dst_videos, 80 | cmd_format=cmd_format) 81 | 82 | logging.info("- Done.") 83 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/README.md: -------------------------------------------------------------------------------- 1 | Please organize this folder as follow: 2 | ``` 3 | ./ 4 | ├── config.py 5 | ├── HMDB51 6 | │   ├── raw 7 | │   │   ├── data -> ../../../../DATA/HMDB51/raw/data 8 | │   │   │   ├── brush_hair 9 | │   │   │   ├── cartwheel 10 | │   │   │   ├── catch 11 | │   │   │   ├── chew 12 | │   │   │   ├── clap 13 | │   │   │   ├── climb 14 | │   │   │   ├── climb_stairs 15 | │   │   │   ├── dive 16 | │   │   │   ├── draw_sword 17 | │   │   │   ├── dribble 18 | │   │   │   ├── drink 19 | │   │   │   ├── eat 20 | │   │   │   ├── fall_floor 21 | │   │   │   ├── fencing 22 | │   │   │   ├── flic_flac 23 | │   │   │   ├── golf 24 | │   │   │   ├── handstand 25 | │   │   │   ├── hit 26 | │   │   │   ├── hug 27 | │   │   │   ├── jump 28 | │   │   │   ├── kick 29 | │   │   │   ├── kick_ball 30 | │   │   │   ├── kiss 31 | │   │   │   ├── laugh 32 | │   │   │   ├── pick 33 | │   │   │   ├── pour 34 | │   │   │   ├── pullup 35 | │   │   │   ├── punch 36 | │   │   │   ├── push 37 | │   │   │   ├── pushup 38 | │   │   │   ├── ride_bike 39 | │   │   │   ├── ride_horse 40 | │   │   │   ├── run 41 | │   │   │   ├── shake_hands 42 | │   │   │   ├── shoot_ball 43 | │   │   │   ├── shoot_bow 44 | │   │   │   ├── shoot_gun 45 | │   │   │   ├── sit 46 | │   │   │   ├── situp 47 | │   │   │   ├── smile 48 | │   │   │   ├── smoke 49 | │   │   │   ├── somersault 50 | │   │   │   ├── stand 51 | │   │   │   ├── swing_baseball 52 | │   │   │   ├── sword 53 | │   │   │   ├── sword_exercise 54 | │   │   │   ├── talk 55 | │   │   │   ├── throw 56 | │   │   │   ├── turn 57 | │   │   │   ├── walk 58 | │   │   │   └── wave 59 | │   │   └── list_cvt 60 | │   │   ├── hmdb51_split1_others.txt 61 | │   │   ├── hmdb51_split1_test.txt 62 | │   │   ├── hmdb51_split1_train.txt 63 | │   │   ├── hmdb51_split2_others.txt 64 | │   │   ├── hmdb51_split2_test.txt 65 | │   │   ├── hmdb51_split2_train.txt 66 | │   │   ├── hmdb51_split3_others.txt 67 | │   │   ├── hmdb51_split3_test.txt 68 | │   │   ├── hmdb51_split3_train.txt 69 | │   │   └── mapping_table.txt 70 | │   └── scripts 71 | │   ├── convert_list.py 72 | │   └── resave_videos.py 73 | ├── __init__.py 74 | ├── Kinetics 75 | │   ├── raw 76 | │   │   ├── data -> ../../../../DATA/Kinetics/raw/data 77 | │   │   │   ├── test 78 | │   │   │   ├── train 79 | │   │   │   └── val 80 | │   │   └── list_cvt 81 | │   │   ├── kinetics_test.txt 82 | │   │   ├── kinetics_test_avi.txt 83 | │   │   ├── kinetics_train.txt 84 | │   │   ├── kinetics_train_avi.txt 85 | │   │   ├── kinetics_val.txt 86 | │   │   ├── kinetics_val_avi.txt 87 | │   │   └── mapping_table.txt 88 | │   └── scripts 89 | │   ├── convert_video.py 90 | │   └── remove_spaces.py 91 | ├── README.md 92 | └── UCF101 93 | └── raw 94 | ├── data -> ../../../../DATA/UCF101/raw/data 95 | │   ├── ApplyEyeMakeup 96 | │   ├── ApplyLipstick 97 | │   ├── Archery 98 | │   ├── BabyCrawling 99 | │   ├── BalanceBeam 100 | │   ├── BandMarching 101 | │   ├── BaseballPitch 102 | │   ├── Basketball 103 | │   ├── BasketballDunk 104 | │   ├── BenchPress 105 | │   ├── Biking 106 | │   ├── Billiards 107 | │   ├── BlowDryHair 108 | │   ├── BlowingCandles 109 | │   ├── BodyWeightSquats 110 | │   ├── Bowling 111 | │   ├── BoxingPunchingBag 112 | │   ├── BoxingSpeedBag 113 | │   ├── BreastStroke 114 | │   ├── BrushingTeeth 115 | │   ├── CleanAndJerk 116 | │   ├── CliffDiving 117 | │   ├── CricketBowling 118 | │   ├── CricketShot 119 | │   ├── CuttingInKitchen 120 | │   ├── Diving 121 | │   ├── Drumming 122 | │   ├── Fencing 123 | │   ├── FieldHockeyPenalty 124 | │   ├── FloorGymnastics 125 | │   ├── FrisbeeCatch 126 | │   ├── FrontCrawl 127 | │   ├── GolfSwing 128 | │   ├── Haircut 129 | │   ├── Hammering 130 | │   ├── HammerThrow 131 | │   ├── HandstandPushups 132 | │   ├── HandstandWalking 133 | │   ├── HeadMassage 134 | │   ├── HighJump 135 | │   ├── HorseRace 136 | │   ├── HorseRiding 137 | │   ├── HulaHoop 138 | │   ├── IceDancing 139 | │   ├── JavelinThrow 140 | │   ├── JugglingBalls 141 | │   ├── JumpingJack 142 | │   ├── JumpRope 143 | │   ├── Kayaking 144 | │   ├── Knitting 145 | │   ├── LongJump 146 | │   ├── Lunges 147 | │   ├── MilitaryParade 148 | │   ├── Mixing 149 | │   ├── MoppingFloor 150 | │   ├── Nunchucks 151 | │   ├── ParallelBars 152 | │   ├── PizzaTossing 153 | │   ├── PlayingCello 154 | │   ├── PlayingDaf 155 | │   ├── PlayingDhol 156 | │   ├── PlayingFlute 157 | │   ├── PlayingGuitar 158 | │   ├── PlayingPiano 159 | │   ├── PlayingSitar 160 | │   ├── PlayingTabla 161 | │   ├── PlayingViolin 162 | │   ├── PoleVault 163 | │   ├── PommelHorse 164 | │   ├── PullUps 165 | │   ├── Punch 166 | │   ├── PushUps 167 | │   ├── Rafting 168 | │   ├── RockClimbingIndoor 169 | │   ├── RopeClimbing 170 | │   ├── Rowing 171 | │   ├── SalsaSpin 172 | │   ├── ShavingBeard 173 | │   ├── Shotput 174 | │   ├── SkateBoarding 175 | │   ├── Skiing 176 | │   ├── Skijet 177 | │   ├── SkyDiving 178 | │   ├── SoccerJuggling 179 | │   ├── SoccerPenalty 180 | │   ├── StillRings 181 | │   ├── SumoWrestling 182 | │   ├── Surfing 183 | │   ├── Swing 184 | │   ├── TableTennisShot 185 | │   ├── TaiChi 186 | │   ├── TennisSwing 187 | │   ├── ThrowDiscus 188 | │   ├── TrampolineJumping 189 | │   ├── Typing 190 | │   ├── UnevenBars 191 | │   ├── VolleyballSpiking 192 | │   ├── WalkingWithDog 193 | │   ├── WallPushups 194 | │   ├── WritingOnBoard 195 | │   └── YoYo 196 | └── list_cvt 197 | ├── testlist01.txt 198 | ├── testlist02.txt 199 | ├── testlist03.txt 200 | ├── trainlist01.txt 201 | ├── trainlist02.txt 202 | └── trainlist03.txt 203 | ``` 204 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/UCF101/raw/data: -------------------------------------------------------------------------------- 1 | ../../../../DATA/UCF101/raw/data -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import get_config 2 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/dataset/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/dataset/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/dataset/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import logging 8 | 9 | def get_config(name): 10 | 11 | config = {} 12 | 13 | if name.upper() == 'UCF101': 14 | config['num_classes'] = 101 15 | elif name.upper() == 'HMDB51': 16 | config['num_classes'] = 51 17 | else: 18 | logging.error("Configs for dataset '{}'' not found".format(name)) 19 | raise NotImplemented 20 | 21 | logging.debug("Target dataset: '{}', configs: {}".format(name.upper(), config)) 22 | 23 | return config 24 | 25 | 26 | if __name__ == "__main__": 27 | logging.getLogger().setLevel(logging.DEBUG) 28 | 29 | logging.info(get_config("ucf101")) 30 | logging.info(get_config("HMDB51")) 31 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__init__.py -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/i3d.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/i3d.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/initializer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/initializer.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/mfnet_3d.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/mfnet_3d.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/__pycache__/symbol_builder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/symbol_builder.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import logging 9 | 10 | def get_config(name, modality = 'rgb', **kwargs): 11 | 12 | logging.debug("loading network configs of: {}".format(name.upper())) 13 | 14 | config = {} 15 | 16 | if name.upper() == "I3D": 17 | config['mean'] = [0.5] * 3 18 | config['std'] = [0.5] * 3 19 | else: 20 | config['mean'] = [0.485, 0.456, 0.406] 21 | config['std'] = [0.229, 0.224, 0.225] 22 | 23 | # else: 24 | # raise NotImplemented("Configs for {} not implemented".format(name)) 25 | 26 | logging.info("data:: {}".format(config)) 27 | return config -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/initializer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import json 8 | import logging 9 | 10 | import numpy as np 11 | import torch 12 | 13 | 14 | def xavier(net): 15 | def weights_init(m): 16 | classname = m.__class__.__name__ 17 | if classname.find('Conv') != -1 and hasattr(m, 'weight'): 18 | torch.nn.init.xavier_uniform(m.weight.data, gain=1.) 19 | if m.bias is not None: 20 | m.bias.data.zero_() 21 | elif classname.find('BatchNorm') != -1: 22 | m.weight.data.fill_(1.0) 23 | if m.bias is not None: 24 | m.bias.data.zero_() 25 | elif classname.find('Linear') != -1: 26 | torch.nn.init.xavier_uniform(m.weight.data, gain=1.) 27 | if m.bias is not None: 28 | m.bias.data.zero_() 29 | elif classname.find('Unit3Dpy') != -1: 30 | torch.nn.init.xavier_uniform(m.conv3d.weight.data, gain=1.) 31 | elif classname in ['Sequential', 'AvgPool3d', 'MaxPool3d', 'MaxPool3dTFPadding', \ 32 | 'Dropout', 'ReLU', 'Softmax', 'BnActConv3d', 'ConstantPad3d'] \ 33 | or 'Block' in classname: 34 | pass 35 | else: 36 | if classname != classname.upper(): 37 | logging.warning("Initializer:: '{}' is uninitialized.".format(classname)) 38 | net.apply(weights_init) 39 | 40 | 41 | 42 | def init_from_dict(net, state_dict, strict=False): 43 | logging.debug("Initializer:: loading from `state_dic', strict = {} ...".format(strict)) 44 | 45 | if strict: 46 | net.load_state_dict(state_dict=state_dict) 47 | else: 48 | # customized partialy load function 49 | net_state_keys = list(net.state_dict().keys()) 50 | for name, param in state_dict.items(): 51 | if name in net_state_keys: 52 | dst_param_shape = net.state_dict()[name].shape 53 | net.state_dict()[name].copy_(param.view(dst_param_shape)) 54 | net_state_keys.remove(name) 55 | 56 | # indicating missed keys 57 | if net_state_keys: 58 | logging.info("Initializer:: failed to load: \n{}".format( 59 | json.dumps(net_state_keys, indent=4, sort_keys=True))) 60 | 61 | 62 | def init_3d_from_2d_dict(net, state_dict, method='inflation'): 63 | logging.debug("Initializer:: loading from 2D neural network, filling method: `{}' ...".format(method)) 64 | 65 | # filling method 66 | def filling_kernel(src, dshape, method): 67 | assert method in ['inflation', 'random'], \ 68 | "filling method: {} is unknown!".format(method) 69 | src_np = src.numpy() 70 | 71 | if method == 'inflation': 72 | dst = torch.FloatTensor(dshape) 73 | # normalize 74 | src = src/float(dshape[2]) 75 | src = src.view(dshape[0],dshape[1], 1, dshape[3],dshape[4]) 76 | dst.copy_(src, broadcast=True) 77 | elif method == 'random': 78 | dst = torch.FloatTensor(dshape) 79 | tmp = torch.FloatTensor(src.shape) 80 | # normalize 81 | src = src/float(dshape[2]) 82 | # random range 83 | scale = src.abs().mean() 84 | # filling 85 | dst[:,:,0,:,:].copy_(src) 86 | i = 1 87 | while i < dshape[2]: 88 | if i+2 < dshape[2]: 89 | torch.nn.init.uniform(tmp, a=-scale, b=scale) 90 | dst[:,:,i,:,:].copy_(tmp) 91 | dst[:,:,i+1,:,:].copy_(src) 92 | dst[:,:,i+2,:,:].copy_(-tmp) 93 | i += 3 94 | elif i+1 < dshape[2]: 95 | torch.nn.init.uniform(tmp, a=-scale, b=scale) 96 | dst[:,:,i,:,:].copy_(tmp) 97 | dst[:,:,i+1,:,:].copy_(-tmp) 98 | i += 2 99 | else: 100 | dst[:,:,i,:,:].copy_(src) 101 | i += 1 102 | # shuffle 103 | tmp = dst.numpy().swapaxes(2, -1) 104 | shp = tmp.shape[:-1] 105 | for ndx in np.ndindex(shp): 106 | np.random.shuffle(tmp[ndx]) 107 | dst = torch.from_numpy(tmp) 108 | else: 109 | raise NotImplementedError 110 | 111 | return dst 112 | 113 | 114 | # customized partialy loading function 115 | src_state_keys = list(state_dict.keys()) 116 | dst_state_keys = list(net.state_dict().keys()) 117 | for name, param in state_dict.items(): 118 | if name in dst_state_keys: 119 | src_param_shape = param.shape 120 | dst_param_shape = net.state_dict()[name].shape 121 | if src_param_shape != dst_param_shape: 122 | if name.startswith('classifier'): 123 | continue 124 | assert len(src_param_shape) == 4 and len(dst_param_shape) == 5, "{} mismatch".format(name) 125 | if list(src_param_shape) == [dst_param_shape[i] for i in [0, 1, 3, 4]]: 126 | if dst_param_shape[2] != 1: 127 | param = filling_kernel(src=param, dshape=dst_param_shape, method=method) 128 | else: 129 | param = param.view(dst_param_shape) 130 | assert dst_param_shape == param.shape, \ 131 | "Initilizer:: error({}): {} != {}".format(name, dst_param_shape, param.shape) 132 | net.state_dict()[name].copy_(param, broadcast=False) 133 | src_state_keys.remove(name) 134 | dst_state_keys.remove(name) 135 | 136 | # indicat missing / ignored keys 137 | if src_state_keys: 138 | out = "[\'" + '\', \''.join(src_state_keys) + "\']" 139 | logging.info("Initializer:: >> {} params are unused: {}".format(len(src_state_keys), 140 | out if len(out) < 300 else out[0:150] + " ... " + out[-150:])) 141 | if dst_state_keys: 142 | logging.info("Initializer:: >> failed to load: \n{}".format( 143 | json.dumps(dst_state_keys, indent=4, sort_keys=True))) 144 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/pretrained: -------------------------------------------------------------------------------- 1 | ../exps/pretrained -------------------------------------------------------------------------------- /code/dmcnet_I3D/network/symbol_builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | """ 6 | 7 | import logging 8 | 9 | from .i3d import I3D 10 | from .config import get_config 11 | 12 | def get_symbol(name, modality = 'rgb', drop_out = 0.5, print_net=False, arch_estimator = None, arch_d = None, **kwargs): 13 | 14 | if name.upper() == "I3D": 15 | net = I3D(modality = modality, dropout_prob = drop_out, arch_estimator = arch_estimator, arch_d = arch_d, **kwargs) 16 | else: 17 | logging.error("network '{}'' not implemented".format(name)) 18 | raise NotImplementedError() 19 | 20 | if print_net: 21 | logging.debug("Symbol:: Network Architecture:") 22 | logging.debug(net) 23 | 24 | input_conf = get_config(name, modality = modality, **kwargs) 25 | return net, input_conf 26 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/test/evaluate_video_ucf101_i3d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import sys 9 | sys.path.append("..") 10 | 11 | import os 12 | import time 13 | import json 14 | import logging 15 | import argparse 16 | import numpy as np 17 | import torch 18 | import torch.backends.cudnn as cudnn 19 | 20 | import dataset 21 | from train.model import static_model 22 | from train import metric 23 | from data import video_sampler as sampler 24 | from data import video_transforms as transforms 25 | from data.video_iterator import VideoIter 26 | from network.symbol_builder import get_symbol 27 | 28 | 29 | parser = argparse.ArgumentParser(description="PyTorch Video Recognition Parser (Evaluation)") 30 | # debug 31 | parser.add_argument('--debug-mode', type=bool, default=False, 32 | help="print all setting for debugging.") 33 | # io 34 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101','HMDB51'], 35 | help="path to dataset") 36 | parser.add_argument('--clip-length', type = int, default=250, 37 | help="define the length of each input sample.") 38 | parser.add_argument('--split', type = int, default=1, 39 | help="which split to train on") 40 | parser.add_argument('--frame-interval', type=int, default=1, 41 | help="define the sampling interval between frames.") 42 | parser.add_argument('--task-name', type=str, default='Pytorch-MF-Net', 43 | help="name of current task, leave it empty for using folder name") 44 | parser.add_argument('--model-dir', type=str, default="../exps/models/", 45 | help="set logging file.") 46 | parser.add_argument('--log-file', type=str, default="./eval-ucf101-split1_96_flow_add_drop.log", 47 | help="set logging file.") 48 | parser.add_argument('--accumulate', type=int, default=1, 49 | help="accumulate mv and res") 50 | parser.add_argument('--mv-minmaxnorm', type=int, default=0, 51 | help="minmaxnorm for mv") 52 | parser.add_argument('--ds_factor', type=int, default=16, 53 | help="downsampling the flow by ds_factor") 54 | parser.add_argument('--mv-loadimg', type=int, default=0, 55 | help="load img mv") 56 | 57 | # device 58 | parser.add_argument('--gpus', type=str, default='0', 59 | help="define gpu id") 60 | # algorithm 61 | parser.add_argument('--network', type=str, default='i3d', 62 | choices=['i3d'], 63 | help="chose the base network") 64 | parser.add_argument('--arch-estimator', type=str, default = None, 65 | choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'], 66 | help="chose the generator") 67 | # evaluation 68 | parser.add_argument('--load-epoch', type=int, default=0, 69 | help="resume trained model") 70 | parser.add_argument('--batch-size', type=int, default=1, 71 | help="batch size") 72 | parser.add_argument('--modality', type=str, default='rgb', 73 | choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'], 74 | help="chose input type") 75 | 76 | def autofill(args): 77 | # customized 78 | if not args.task_name: 79 | args.task_name = os.path.basename(os.getcwd()) 80 | # fixed 81 | args.model_prefix = os.path.join(args.model_dir, args.task_name) 82 | return args 83 | 84 | def set_logger(log_file='', debug_mode=False): 85 | if log_file: 86 | if not os.path.exists("./"+os.path.dirname(log_file)): 87 | os.makedirs("./"+os.path.dirname(log_file)) 88 | handlers = [logging.FileHandler(log_file), logging.StreamHandler()] 89 | else: 90 | handlers = [logging.StreamHandler()] 91 | 92 | """ add '%(filename)s' to format show source file """ 93 | logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO, 94 | format='%(asctime)s %(levelname)s: %(message)s', 95 | datefmt='%Y-%m-%d %H:%M:%S', 96 | handlers = handlers) 97 | 98 | 99 | if __name__ == '__main__': 100 | 101 | # set args 102 | args = parser.parse_args() 103 | args = autofill(args) 104 | 105 | set_logger(log_file=args.log_file, debug_mode=args.debug_mode) 106 | logging.info("Start evaluation with args:\n" + 107 | json.dumps(vars(args), indent=4, sort_keys=True)) 108 | 109 | # set device states 110 | os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpus) # before using torch 111 | assert torch.cuda.is_available(), "CUDA is not available" 112 | 113 | # load dataset related configuration 114 | dataset_cfg = dataset.get_config(name=args.dataset) 115 | 116 | # creat model 117 | sym_net, input_config = get_symbol(name=args.network, modality = args.modality, arch_estimator = args.arch_estimator, **dataset_cfg) 118 | 119 | # network 120 | if torch.cuda.is_available(): 121 | cudnn.benchmark = True 122 | sym_net = torch.nn.DataParallel(sym_net).cuda() 123 | criterion = torch.nn.CrossEntropyLoss().cuda() 124 | else: 125 | sym_net = torch.nn.DataParallel(sym_net) 126 | criterion = torch.nn.CrossEntropyLoss() 127 | net = static_model(net=sym_net, 128 | criterion=criterion, 129 | model_prefix=args.model_prefix, 130 | criterion2 = torch.nn.MSELoss().cuda() if args.modality == 'flow+mp4' else None) 131 | net.load_checkpoint(epoch=args.load_epoch) 132 | 133 | # data iterator: 134 | data_root = "../dataset/{}".format(args.dataset) 135 | normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std']) 136 | val_sampler = sampler.RandomSampling(num=args.clip_length, 137 | interval=args.frame_interval, 138 | speed=[1.0, 1.0]) 139 | val_loader = VideoIter(video_prefix='/UCF101/TSN_input/', # change this part accordingly 140 | txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'testlist0{}.txt'.format(args.split)), 141 | sampler=val_sampler, 142 | force_color=True, 143 | video_transform=transforms.Compose([ 144 | #transforms.Resize((256,256)), 145 | #transforms.RandomCrop((224,224)), 146 | transforms.CenterCrop((224, 224)), # we did not use center crop in our paper 147 | # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper 148 | transforms.ToTensor(args.modality), 149 | normalize, 150 | ]), 151 | name='test', 152 | cached_info_path = os.path.join(data_root, 'raw', 'list_cvt', 'ucf101_split{}_test_info.txt'.format(args.split)), 153 | return_item_subpath=True, 154 | check_video = True, 155 | load_from_img = True, 156 | modality = args.modality, accumulate = args.accumulate, ds_factor = args.ds_factor, mv_minmaxnorm = args.mv_minmaxnorm, 157 | mv_loadimg = args.mv_loadimg 158 | ) 159 | 160 | eval_iter = torch.utils.data.DataLoader(val_loader, 161 | batch_size=args.batch_size, 162 | shuffle=False, 163 | num_workers=12, # change this part accordingly 164 | pin_memory=True) 165 | 166 | # eval metrics 167 | metrics = metric.MetricList(metric.Loss(name="loss-ce"), 168 | metric.Accuracy(topk=1, name="top1"), 169 | metric.Accuracy(topk=5, name="top5")) 170 | metrics.reset() 171 | 172 | # main loop 173 | net.net.eval() 174 | avg_score = {} 175 | sum_batch_elapse = 0. 176 | sum_batch_inst = 0 177 | duplication = 1 178 | softmax = torch.nn.Softmax(dim=1) 179 | scores = [] 180 | label = [] 181 | total_round = 1 # change this part accordingly if you do not want an inf loop 182 | for i_round in range(total_round): 183 | i_batch = 0 184 | logging.info("round #{}/{}".format(i_round, total_round)) 185 | for data, target, video_subpath in eval_iter: 186 | batch_start_time = time.time() 187 | 188 | outputs, losses = net.forward(data, target, node = 'logit') 189 | 190 | sum_batch_elapse += time.time() - batch_start_time 191 | sum_batch_inst += 1 192 | 193 | # recording 194 | output = softmax(outputs[0]).data.cpu() 195 | scores.append(output) 196 | target = target.cpu() 197 | label.append(target) 198 | losses = losses[0].data.cpu() 199 | del outputs 200 | #torch.cuda.empty_cache() 201 | for i_item in range(0, output.shape[0]): 202 | output_i = output[i_item,:].view(1, -1) 203 | target_i = torch.LongTensor([target[i_item]]) 204 | loss_i = losses 205 | video_subpath_i = video_subpath[i_item] 206 | if video_subpath_i in avg_score: 207 | avg_score[video_subpath_i][2] += output_i 208 | avg_score[video_subpath_i][3] += 1 209 | duplication = 0.92 * duplication + 0.08 * avg_score[video_subpath_i][3] 210 | else: 211 | avg_score[video_subpath_i] = [torch.LongTensor(target_i.numpy().copy()), 212 | torch.FloatTensor(loss_i.numpy().copy()), 213 | torch.FloatTensor(output_i.numpy().copy()), 214 | 1] # the last one is counter 215 | 216 | # show progress 217 | if (i_batch % 100) == 99: 218 | metrics.reset() 219 | for _, video_info in avg_score.items(): 220 | target, loss, pred, _ = video_info 221 | metrics.update([pred], target, [loss]) 222 | name_value = metrics.get_name_value() 223 | logging.info("{:.1f}%, {:.1f} \t| Batch [0,{}] \tAvg: {} = {:.5f}, {} = {:.5f}, {} = {:.5f}".format( 224 | float(100*i_batch) / eval_iter.__len__(), \ 225 | duplication, \ 226 | i_batch, \ 227 | name_value[0][0][0], name_value[0][0][1], \ 228 | name_value[1][0][0], name_value[1][0][1], \ 229 | name_value[2][0][0], name_value[2][0][1])) 230 | np.savez(os.path.join('./{}_{}/'.format(args.dataset, args.split),args.task_name+'_{}'.format(args.clip_length)),scores=np.concatenate(scores, axis = 0), labels=np.concatenate(label, axis = 0)) 231 | i_batch += 1 232 | 233 | 234 | # finished 235 | logging.info("Evaluation Finished!") 236 | #print(np.concatenate(scores, axis = 0).shape, np.concatenate(label, axis = 0).shape) 237 | np.savez(os.path.join('./{}_{}/'.format(args.dataset, args.split),args.task_name+'_{}'.format(args.clip_length)),scores=np.concatenate(scores, axis = 0), labels=np.concatenate(label, axis = 0)) 238 | metrics.reset() 239 | for _, video_info in avg_score.items(): 240 | target, loss, pred, _ = video_info 241 | metrics.update([pred], target, [loss]) 242 | 243 | logging.info("Total time cost: {:.1f} sec".format(sum_batch_elapse)) 244 | logging.info("Speed: {:.4f} samples/sec".format( 245 | args.batch_size * sum_batch_inst / sum_batch_elapse )) 246 | logging.info("Accuracy:") 247 | logging.info(json.dumps(metrics.get_name_value(), indent=4, sort_keys=True)) 248 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/test/test.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | python evaluate_video_hmdb_i3d.py --task-name hmdb1\ 7 | --split 1\ 8 | --load-epoch 10\ 9 | --modality flow+mp4\ 10 | --log-file ./eval_hmdb1.log \ 11 | --gpus 0,1\ 12 | --batch-size 2 --clip-length 250\ 13 | --arch-estimator DenseNetTiny\ 14 | --accumulate 0\ 15 | --mv-minmaxnorm 1 16 | 17 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/train.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | python train_hmdb51.py --task-name hmdb_1\ 7 | --split 1\ 8 | --network I3D \ 9 | --clip-length 64 \ 10 | --pretrained_3d ./exps/models/model_flow.pth\ 11 | --iter-size 32 --batch-size 3\ 12 | --optimizer adam\ 13 | --gpus 1,3\ 14 | --modality flow+mp4\ 15 | --train-frame-interval 1 \ 16 | --val-frame-interval 1\ 17 | --lr-base 0.0004\ 18 | --lr-base2 0.0004\ 19 | --lr-d 0.002\ 20 | --detach 1\ 21 | --lr-factor 0.2\ 22 | --dataset HMDB51\ 23 | --drop-out 0.85\ 24 | --fine_tune 0\ 25 | --arch-estimator DenseNetTiny\ 26 | --arch-d Discriminator\ 27 | --adv 1\ 28 | --epoch-thre 6\ 29 | --ds_factor 16\ 30 | --mv-minmaxnorm 1\ 31 | --accumulate 0\ -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__init__.py -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__pycache__/callback.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/callback.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__pycache__/lr_scheduler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/lr_scheduler.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__pycache__/metric.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/metric.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/callback.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import logging 9 | 10 | class Callback(object): 11 | 12 | def __init__(self, with_header=False): 13 | self.with_header = with_header 14 | 15 | def __call__(self): 16 | raise NotImplementedError("To be implemented") 17 | 18 | def header(self, epoch=None, batch=None): 19 | str_out = "" 20 | if self.with_header: 21 | if epoch is not None: 22 | str_out += "Epoch {:s} ".format(("[%d]"%epoch).ljust(5, ' ')) 23 | if batch is not None: 24 | str_out += "Batch {:s} ".format(("[%d]"%batch).ljust(6, ' ')) 25 | return str_out 26 | 27 | class CallbackList(Callback): 28 | 29 | def __init__(self, *args, with_header=True): 30 | super(CallbackList, self).__init__(with_header=with_header) 31 | assert all([issubclass(type(x), Callback) for x in args]), \ 32 | "Callback inputs illegal: {}".format(args) 33 | self.callbacks = [callback for callback in args] 34 | 35 | def __call__(self, epoch=None, batch=None, silent=False, **kwargs): 36 | str_out = self.header(epoch, batch) 37 | 38 | for callback in self.callbacks: 39 | str_out += callback(**kwargs, silent=True) + " " 40 | 41 | if not silent: 42 | logging.info(str_out) 43 | return str_out 44 | 45 | 46 | #################### 47 | # CUSTOMIZED CALLBACKS 48 | #################### 49 | 50 | class SpeedMonitor(Callback): 51 | 52 | def __init__(self, with_header=False): 53 | super(SpeedMonitor, self).__init__(with_header=with_header) 54 | 55 | def __call__(self, sample_elapse, update_elapse=None, epoch=None, batch=None, silent=False, **kwargs): 56 | str_out = self.header(epoch, batch) 57 | 58 | if sample_elapse is not None: 59 | sample_freq = 1./sample_elapse 60 | if update_elapse is not None: 61 | update_freq = 1./update_elapse 62 | str_out += "Speed {: >5.1f} (+{: >2.0f}) sample/sec ".format(sample_freq, update_freq-sample_freq) 63 | else: 64 | str_out += "Speed {:.2f} sample/sec ".format(sample_freq) 65 | 66 | if not silent: 67 | logging.info(str_out) 68 | return str_out 69 | 70 | class MetricPrinter(Callback): 71 | 72 | def __init__(self, with_header=False): 73 | super(MetricPrinter, self).__init__(with_header=with_header) 74 | 75 | def __call__(self, namevals, epoch=None, batch=None, silent=False, **kwargs): 76 | str_out = self.header(epoch, batch) 77 | 78 | if namevals is not None: 79 | for i, nameval in enumerate(namevals): 80 | name, value = nameval[0] 81 | str_out += "{} = {:.5f}".format(name, value) 82 | str_out += ", " if i != (len(namevals)-1) else " " 83 | 84 | if not silent: 85 | logging.info(str_out) 86 | return str_out 87 | 88 | 89 | #################### 90 | # TESTING CASES 91 | #################### 92 | 93 | if __name__ == "__main__": 94 | 95 | logging.getLogger().setLevel(logging.DEBUG) 96 | 97 | # Test each function 98 | # [1] Callback 99 | logging.info("- testing base callback class:") 100 | c = Callback(with_header=True) 101 | logging.info(c.header(epoch=1, batch=123)) 102 | 103 | # [2] SpeedMonitor 104 | logging.info("- testing speedmonitor:") 105 | s = SpeedMonitor(with_header=True) 106 | s(sample_elapse=0.3, epoch=10, batch=31) 107 | s = SpeedMonitor(with_header=False) 108 | s(sample_elapse=0.3) 109 | 110 | # [3] DictPrinter 111 | logging.info("- test dict printer") 112 | d = MetricPrinter(with_header=True) 113 | d(namevals=[[('acc1',0.123)], [("acc5",0.4453232)]], epoch=10, batch=31) 114 | d = MetricPrinter(with_header=False) 115 | d(namevals=[[('acc1',0.123)], [("acc5",0.4453232)]]) 116 | 117 | # [4] CallbackList 118 | logging.info("- test callback list") 119 | c = CallbackList() 120 | c = CallbackList(SpeedMonitor(), MetricPrinter()) 121 | c(epoch=10, batch=31, sample_elapse=0.3, namevals=[[('acc1',0.123)], [("acc5",0.4453232)]]) -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import logging 9 | 10 | class LRScheduler(object): 11 | 12 | def __init__(self, step_counter=0, base_lr=0.01): 13 | self.step_counter = step_counter 14 | self.base_lr = base_lr 15 | 16 | def update(self): 17 | raise NotImplementedError("must override this") 18 | 19 | def get_lr(self): 20 | return self.lr 21 | 22 | class MultiFactorScheduler(LRScheduler): 23 | 24 | def __init__(self, steps, base_lr=0.01, factor=0.1, step_counter=0): 25 | super(MultiFactorScheduler, self).__init__(step_counter, base_lr) 26 | assert isinstance(steps, list) and len(steps) > 0 27 | for i, _step in enumerate(steps): 28 | if i != 0 and steps[i] <= steps[i-1]: 29 | raise ValueError("Schedule step must be an increasing integer list") 30 | if _step < 1: 31 | raise ValueError("Schedule step must be greater or equal than 1 round") 32 | if factor > 1.0: 33 | raise ValueError("Factor must be no more than 1 to make lr reduce") 34 | 35 | logging.info("Iter %d: start with learning rate: %0.5e (next lr step: %d)" \ 36 | % (self.step_counter, self.base_lr, steps[0])) 37 | self.steps = steps 38 | self.factor = factor 39 | self.lr = self.base_lr 40 | self.cursor = 0 41 | 42 | def update(self): 43 | self.step_counter += 1 44 | 45 | if self.cursor >= len(self.steps): 46 | return self.lr 47 | while self.steps[self.cursor] < self.step_counter: 48 | self.lr *= self.factor 49 | self.cursor += 1 50 | # message 51 | if self.cursor >= len(self.steps): 52 | logging.info("Iter: %d, change learning rate to %0.5e for step [%d:Inf)" \ 53 | % (self.step_counter-1, self.lr, self.step_counter-1)) 54 | return self.lr 55 | else: 56 | logging.info("Iter: %d, change learning rate to %0.5e for step [%d:%d)" \ 57 | % (self.step_counter-1, self.lr, self.step_counter-1, \ 58 | self.steps[self.cursor])) 59 | if self.step_counter < 100: 60 | return self.lr/2.0 61 | return self.lr 62 | 63 | 64 | if __name__ == "__main__": 65 | 66 | logging.getLogger().setLevel(logging.DEBUG) 67 | 68 | # test LRScheduler() 69 | logging.info("testing basic class: LRScheduler()") 70 | LRScheduler() 71 | 72 | # test MultiFactorScheduler() 73 | logging.info("testing basic class: MultiFactorScheduler()") 74 | start_point = 2 75 | lr_scheduler = MultiFactorScheduler(step_counter=start_point, 76 | base_lr=0.1, 77 | steps=[2, 14, 18], 78 | factor=0.1) 79 | for i in range(start_point, 22): 80 | logging.info("id = {}, lr = {:f}".format(i, lr_scheduler.update())) -------------------------------------------------------------------------------- /code/dmcnet_I3D/train/metric.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import logging 9 | import numpy as np 10 | 11 | class EvalMetric(object): 12 | 13 | def __init__(self, name, **kwargs): 14 | self.name = str(name) 15 | self.reset() 16 | 17 | def update(self, preds, labels, losses): 18 | raise NotImplementedError() 19 | 20 | def reset(self): 21 | self.num_inst = 0 22 | self.sum_metric = 0.0 23 | 24 | def get(self): 25 | if self.num_inst == 0: 26 | return (self.name, float('nan')) 27 | else: 28 | return (self.name, self.sum_metric / self.num_inst) 29 | 30 | def get_name_value(self): 31 | name, value = self.get() 32 | if not isinstance(name, list): 33 | name = [name] 34 | if not isinstance(value, list): 35 | value = [value] 36 | return list(zip(name, value)) 37 | 38 | def check_label_shapes(self, preds, labels): 39 | # raise if the shape is inconsistent 40 | if (type(labels) is list) and (type(preds) is list): 41 | label_shape, pred_shape = len(labels), len(preds) 42 | else: 43 | label_shape, pred_shape = labels.shape[0], preds.shape[0] 44 | #print(preds, labels, label_shape, pred_shape) 45 | if label_shape != pred_shape: 46 | raise NotImplementedError("") 47 | 48 | 49 | class MetricList(EvalMetric): 50 | """Handle multiple evaluation metric 51 | """ 52 | def __init__(self, *args, name="metric_list"): 53 | assert all([issubclass(type(x), EvalMetric) for x in args]), \ 54 | "MetricList input is illegal: {}".format(args) 55 | self.metrics = [metric for metric in args] 56 | super(MetricList, self).__init__(name=name) 57 | 58 | def update(self, preds, labels, losses=None): 59 | preds = [preds] if type(preds) is not list else preds 60 | labels = [labels] if type(labels) is not list else labels 61 | losses = [losses] if type(losses) is not list else losses 62 | i = 0 63 | for metric in self.metrics: 64 | if type(metric) is type(Loss()): 65 | #handle multiple losses 66 | metric.update(preds, labels, [losses[i]]) 67 | i = i + 1 68 | else: 69 | metric.update(preds, labels, losses) 70 | 71 | def reset(self): 72 | if hasattr(self, 'metrics'): 73 | for metric in self.metrics: 74 | metric.reset() 75 | else: 76 | logging.warning("No metric defined.") 77 | 78 | def get(self): 79 | ouputs = [] 80 | for metric in self.metrics: 81 | ouputs.append(metric.get()) 82 | return ouputs 83 | 84 | def get_name_value(self): 85 | ouputs = [] 86 | for metric in self.metrics: 87 | ouputs.append(metric.get_name_value()) 88 | return ouputs 89 | 90 | 91 | #################### 92 | # COMMON METRICS 93 | #################### 94 | 95 | class Accuracy(EvalMetric): 96 | """Computes accuracy classification score. 97 | """ 98 | def __init__(self, name='accuracy', topk=1): 99 | super(Accuracy, self).__init__(name) 100 | self.topk = topk 101 | 102 | def update(self, preds, labels, losses): 103 | preds = [preds] if type(preds) is not list else preds 104 | labels = [labels] if type(labels) is not list else labels 105 | 106 | self.check_label_shapes(preds, labels) 107 | for pred, label in zip(preds, labels): 108 | assert self.topk <= pred.shape[1], \ 109 | "topk({}) should no larger than the pred dim({})".format(self.topk, pred.shape[1]) 110 | _, pred_topk = pred.topk(self.topk, 1, True, True) 111 | 112 | pred_topk = pred_topk.t() 113 | correct = pred_topk.eq(label.view(1, -1).expand_as(pred_topk)) 114 | 115 | self.sum_metric += float(correct.view(-1).float().sum(0, keepdim=True).numpy()) 116 | self.num_inst += label.shape[0] 117 | 118 | 119 | class Loss(EvalMetric): 120 | """Dummy metric for directly printing loss. 121 | """ 122 | def __init__(self, name='loss'): 123 | super(Loss, self).__init__(name) 124 | 125 | def update(self, preds, labels, losses): 126 | assert losses is not None, "Loss undefined." 127 | for loss in losses: 128 | self.sum_metric += float(loss.numpy().sum()) 129 | #print(loss,loss.shape) 130 | if loss.shape == (): 131 | self.num_inst += 1 132 | else: 133 | self.num_inst += loss.shape[0] 134 | 135 | 136 | if __name__ == "__main__": 137 | import torch 138 | 139 | # Test Accuracy 140 | predicts = [torch.from_numpy(np.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]))] 141 | labels = [torch.from_numpy(np.array([ 0, 1, 1 ]))] 142 | losses = [torch.from_numpy(np.array([ 0.3, 0.4, 0.5 ])),torch.from_numpy(np.array([ 0., 0.4, 0.5 ]))] 143 | 144 | logging.getLogger().setLevel(logging.DEBUG) 145 | logging.debug("input pred: {}".format(predicts)) 146 | logging.debug("input label: {}".format(labels)) 147 | logging.debug("input loss: {}".format(losses)) 148 | 149 | acc = Accuracy() 150 | 151 | acc.update(preds=predicts, labels=labels, losses=losses) 152 | 153 | logging.info(acc.get()) 154 | 155 | # Test MetricList 156 | metrics = MetricList(Loss(name="ce-loss"), 157 | Loss(name="mse"), 158 | Accuracy(topk=1, name="acc-top1"), 159 | Accuracy(topk=2, name="acc-top2"), 160 | ) 161 | metrics.update(preds=predicts, labels=labels, losses=losses) 162 | 163 | logging.info("------------") 164 | logging.info(metrics.get()) 165 | acc.get_name_value() 166 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/train_hmdb51.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import os 9 | import json 10 | import socket 11 | import logging 12 | import argparse 13 | 14 | import torch 15 | import torch.nn.parallel 16 | import torch.distributed as dist 17 | 18 | import dataset 19 | from train_model import train_model 20 | from network.symbol_builder import get_symbol 21 | 22 | 23 | parser = argparse.ArgumentParser(description="DMC-Net Parser") 24 | # debug 25 | parser.add_argument('--debug-mode', type=bool, default=True, 26 | help="print all setting for debugging.") 27 | # io 28 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101', 'HMDB51'], 29 | help="path to dataset") 30 | parser.add_argument('--split', type = int, default=1, 31 | help="which split to train on") 32 | parser.add_argument('--clip-length',type=int, default=16, 33 | help="define the length of each input sample.") 34 | parser.add_argument('--train-frame-interval', type=int, default=2, 35 | help="define the sampling interval between frames.") 36 | parser.add_argument('--val-frame-interval', type=int, default=2, 37 | help="define the sampling interval between frames.") 38 | parser.add_argument('--task-name', type=str, default='', 39 | help="name of current task, leave it empty for using folder name") 40 | parser.add_argument('--model-dir', type=str, default="./exps/models", 41 | help="set logging file.") 42 | parser.add_argument('--log-file', type=str, default="", 43 | help="set logging file.") 44 | parser.add_argument('--accumulate', type=int, default=1, 45 | help="accumulate mv and res") 46 | parser.add_argument('--mv-minmaxnorm', type=int, default=0, 47 | help="minmaxnorm for mv") 48 | parser.add_argument('--mv-loadimg', type=int, default=0, 49 | help="load img mv") 50 | parser.add_argument('--detach', type=int, default=0, 51 | help="whether not update i3d") 52 | parser.add_argument('--ds_factor', type=int, default=16, 53 | help="downsampling the flow by ds_factor") 54 | # device 55 | parser.add_argument('--gpus', type=str, default="0,1,2,3,4,5,6,7", 56 | help="define gpu id") 57 | # algorithm 58 | parser.add_argument('--network', type=str, default='I3D', 59 | choices=['I3D'], 60 | help="choose the base network") 61 | parser.add_argument('--arch-estimator', type=str, default = None, 62 | choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'], 63 | help="choose the generator") 64 | parser.add_argument('--arch-d', type=str, default=None, 65 | help="choose the D") 66 | # initialization with priority (the next step will overwrite the previous step) 67 | # - step 1: random initialize 68 | # - step 2: load the 2D pretrained model if `pretrained_2d' is True 69 | # - step 3: load the 3D pretrained model if `pretrained_3d' is defined 70 | # - step 4: resume if `resume_epoch' >= 0 71 | parser.add_argument('--pretrained_2d', type=bool, default=False, 72 | help="load default 2D pretrained model.") 73 | parser.add_argument('--pretrained_3d', type=str, 74 | default='./network/pretrained/MFNet3D_Kinetics-400_72.8.pth', 75 | help="load default 3D pretrained model.") 76 | parser.add_argument('--new_classifier', type=bool, default=False, 77 | help="whether use mode_flow to initialize classifier weights") 78 | parser.add_argument('--resume-epoch', type=int, default=-1, 79 | help="resume train") 80 | # flow+mp4 is the modality we used for generating DMC 81 | parser.add_argument('--modality', type=str, default='rgb', 82 | choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'], 83 | help="choose input type") 84 | parser.add_argument('--drop-out', type=float, default=0.5, 85 | help="drop-out probability") 86 | parser.add_argument('--adv', type=float, default=0., 87 | help="weight for adversirial loss") 88 | # optimization 89 | parser.add_argument('--epoch-thre', type=int, default=1, 90 | help="the epoch classifier begins to be optimized when with gen") 91 | parser.add_argument('--optimizer', type=str, default='sgd', 92 | choices=['sgd', 'adam'], 93 | help="optimizer") 94 | parser.add_argument('--fine_tune', type=int, default=1, 95 | help="apply different learning rate for different layers") 96 | parser.add_argument('--batch-size', type=int, default=32, 97 | help="batch size") 98 | parser.add_argument('--iter-size', type=int, default=1, 99 | help="iteration size which is for accumalation of gradients") 100 | parser.add_argument('--lr-base', type=float, default=0.005, 101 | help="learning rate") 102 | parser.add_argument('--lr-base2', type=float, default=0.002, 103 | help="learning rate for stage 2") 104 | parser.add_argument('--lr-d', type=float, default=None, 105 | help="learning rate for discriminator") 106 | parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [3.5, 6, 8.5, 11, 13.5, 16]], 107 | help="number of samples to pass before changing learning rate") # 1e6 million 108 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [4.5, 7, 9.5, 12, 14.5, 17]], 109 | # help="number of samples to pass before changing learning rate") # 1e6 million 110 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [10, 20, 30, 40, 50, 60]], 111 | # help="number of samples to pass before changing learning rate") # 1e6 million 112 | parser.add_argument('--lr-factor', type=float, default=0.1, 113 | help="reduce the learning with factor") 114 | parser.add_argument('--save-frequency', type=float, default=1, 115 | help="save once after N epochs") 116 | parser.add_argument('--end-epoch', type=int, default=50, 117 | help="maxmium number of training epoch") 118 | parser.add_argument('--random-seed', type=int, default=1, 119 | help='random seed (default: 1)') 120 | 121 | def autofill(args): 122 | # customized 123 | if not args.task_name: 124 | args.task_name = os.path.basename(os.getcwd()) 125 | if not args.log_file: 126 | if os.path.exists("./exps/logs"): 127 | args.log_file = "./exps/logs/{}_at-{}.log".format(args.task_name, socket.gethostname()) 128 | else: 129 | args.log_file = ".{}_at-{}.log".format(args.task_name, socket.gethostname()) 130 | # fixed 131 | args.model_prefix = os.path.join(args.model_dir, args.task_name) 132 | args.score_dir = './exps/score' + '/{}_{}/'.format(args.dataset, args.split) + args.task_name 133 | return args 134 | 135 | def set_logger(log_file='', debug_mode=False): 136 | if log_file: 137 | if not os.path.exists("./"+os.path.dirname(log_file)): 138 | os.makedirs("./"+os.path.dirname(log_file)) 139 | handlers = [logging.FileHandler(log_file), logging.StreamHandler()] 140 | else: 141 | handlers = [logging.StreamHandler()] 142 | 143 | """ add '%(filename)s:%(lineno)d %(levelname)s:' to format show source file """ 144 | logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO, 145 | format='%(asctime)s: %(message)s', 146 | datefmt='%Y-%m-%d %H:%M:%S', 147 | handlers = handlers) 148 | 149 | if __name__ == "__main__": 150 | 151 | # set args 152 | args = parser.parse_args() 153 | args = autofill(args) 154 | 155 | set_logger(log_file=args.log_file, debug_mode=args.debug_mode) 156 | logging.info("Using pytorch {} ({})".format(torch.__version__, torch.__path__)) 157 | logging.info("Start training with args:\n" + 158 | json.dumps(vars(args), indent=4, sort_keys=True)) 159 | 160 | # set device states 161 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus # before using torch 162 | assert torch.cuda.is_available(), "CUDA is not available" 163 | torch.manual_seed(args.random_seed) 164 | torch.cuda.manual_seed(args.random_seed) 165 | 166 | 167 | # load dataset related configuration 168 | dataset_cfg = dataset.get_config(name=args.dataset) 169 | 170 | # creat model with all parameters initialized 171 | net, input_conf = get_symbol(name=args.network, 172 | pretrained=args.pretrained_2d if args.resume_epoch < 0 else None, 173 | modality = args.modality, 174 | drop_out = args.drop_out, 175 | arch_estimator = args.arch_estimator, 176 | arch_d = args.arch_d, 177 | print_net = False, 178 | **dataset_cfg) 179 | 180 | # training 181 | kwargs = {} 182 | kwargs.update(dataset_cfg) 183 | kwargs.update({'input_conf': input_conf}) 184 | kwargs.update(vars(args)) 185 | train_model(args.network, sym_net=net, optim = args.optimizer, **kwargs) 186 | -------------------------------------------------------------------------------- /code/dmcnet_I3D/train_ucf101.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | This source code is licensed under the MIT license found in the 5 | LICENSE file in the root directory of this source tree. 6 | """ 7 | 8 | import os 9 | import json 10 | import socket 11 | import logging 12 | import argparse 13 | 14 | import torch 15 | import torch.nn.parallel 16 | import torch.distributed as dist 17 | 18 | import dataset 19 | from train_model import train_model 20 | from network.symbol_builder import get_symbol 21 | 22 | parser = argparse.ArgumentParser(description="DMC-Net Parser") 23 | # debug 24 | parser.add_argument('--debug-mode', type=bool, default=True, 25 | help="print all setting for debugging.") 26 | # io 27 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101', 'HMDB51'], 28 | help="path to dataset") 29 | parser.add_argument('--split', type = int, default=1, 30 | help="which split to train on") 31 | parser.add_argument('--clip-length',type=int, default=16, 32 | help="define the length of each input sample.") 33 | parser.add_argument('--train-frame-interval', type=int, default=2, 34 | help="define the sampling interval between frames.") 35 | parser.add_argument('--val-frame-interval', type=int, default=2, 36 | help="define the sampling interval between frames.") 37 | parser.add_argument('--task-name', type=str, default='', 38 | help="name of current task, leave it empty for using folder name") 39 | parser.add_argument('--model-dir', type=str, default="./exps/models", 40 | help="set logging file.") 41 | parser.add_argument('--log-file', type=str, default="", 42 | help="set logging file.") 43 | parser.add_argument('--accumulate', type=int, default=1, 44 | help="accumulate mv and res") 45 | parser.add_argument('--mv-minmaxnorm', type=int, default=0, 46 | help="minmaxnorm for mv") 47 | parser.add_argument('--mv-loadimg', type=int, default=0, 48 | help="load img mv") 49 | parser.add_argument('--detach', type=int, default=0, 50 | help="whether not update i3d") 51 | parser.add_argument('--ds_factor', type=int, default=16, 52 | help="downsampling the flow by ds_factor") 53 | # device 54 | parser.add_argument('--gpus', type=str, default="0,1,2,3,4,5,6,7", 55 | help="define gpu id") 56 | # algorithm 57 | parser.add_argument('--network', type=str, default='I3D', 58 | choices=['I3D'], 59 | help="chose the base network") 60 | parser.add_argument('--arch-estimator', type=str, default=None, 61 | choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'], 62 | help="choose the generator") 63 | parser.add_argument('--arch-d', type=str, default=None, 64 | help="choose the D") 65 | # initialization with priority (the next step will overwrite the previous step) 66 | # - step 1: random initialize 67 | # - step 2: load the 2D pretrained model if `pretrained_2d' is True 68 | # - step 3: load the 3D pretrained model if `pretrained_3d' is defined 69 | # - step 4: resume if `resume_epoch' >= 0 70 | parser.add_argument('--pretrained_2d', type=bool, default=False, 71 | help="load default 2D pretrained model.") 72 | parser.add_argument('--pretrained_3d', type=str, 73 | default='./network/pretrained/MFNet3D_Kinetics-400_72.8.pth', 74 | help="load default 3D pretrained model.") 75 | parser.add_argument('--new_classifier', type=bool, default=False, 76 | help="whether use mode_flow to initialize classifier weights") 77 | parser.add_argument('--resume-epoch', type=int, default=-1, 78 | help="resume train") 79 | # flow+mp4 is the modality we used for generating DMC 80 | parser.add_argument('--modality', type=str, default='rgb', 81 | choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'], 82 | help="chose input type") 83 | parser.add_argument('--drop-out', type=float, default=0.5, 84 | help="drop-out probability") 85 | parser.add_argument('--adv', type=float, default=0., 86 | help="weight for adversirial loss") 87 | # optimization 88 | parser.add_argument('--epoch-thre', type=int, default=1, 89 | help="the epoch classifier begins to be optimized when with gen") 90 | parser.add_argument('--optimizer', type=str, default='sgd', 91 | choices=['sgd', 'adam'], 92 | help="optimizer") 93 | parser.add_argument('--fine_tune', type=int, default=1, 94 | help="apply different learning rate for different layers") 95 | parser.add_argument('--batch-size', type=int, default=32, 96 | help="batch size") 97 | parser.add_argument('--iter-size', type=int, default=1, 98 | help="iteration size which is for accumalation of gradients") 99 | parser.add_argument('--lr-base', type=float, default=0.005, 100 | help="learning rate") 101 | parser.add_argument('--lr-base2', type=float, default=0.001, 102 | help="learning rate for stage 2") 103 | parser.add_argument('--lr-d', type=float, default=None, 104 | help="learning rate for discriminator") 105 | parser.add_argument('--lr-steps', type=list, default=[int(1e5*x) for x in [0.5, 0.7, 1.0, 4, 5]], 106 | help="number of samples to pass before changing learning rate") # 1e6 million 107 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [12, 18, 24, 30, 40, 60]], 108 | # help="number of samples to pass before changing learning rate") # 1e6 million 109 | parser.add_argument('--lr-factor', type=float, default=0.1, 110 | help="reduce the learning with factor") 111 | parser.add_argument('--save-frequency', type=float, default=1, 112 | help="save once after N epochs") 113 | parser.add_argument('--end-epoch', type=int, default=50, 114 | help="maxmium number of training epoch") 115 | parser.add_argument('--random-seed', type=int, default=1, 116 | help='random seed (default: 1)') 117 | 118 | 119 | def autofill(args): 120 | # customized 121 | if not args.task_name: 122 | args.task_name = os.path.basename(os.getcwd()) 123 | if not args.log_file: 124 | if os.path.exists("./exps/logs"): 125 | args.log_file = "./exps/logs/{}_at-{}.log".format(args.task_name, socket.gethostname()) 126 | else: 127 | args.log_file = ".{}_at-{}.log".format(args.task_name, socket.gethostname()) 128 | # fixed 129 | args.model_prefix = os.path.join(args.model_dir, args.task_name) 130 | args.score_dir = './exps/score' + '/{}_{}/'.format(args.dataset, args.split) + args.task_name 131 | return args 132 | 133 | def set_logger(log_file='', debug_mode=False): 134 | if log_file: 135 | if not os.path.exists("./"+os.path.dirname(log_file)): 136 | os.makedirs("./"+os.path.dirname(log_file)) 137 | handlers = [logging.FileHandler(log_file), logging.StreamHandler()] 138 | else: 139 | handlers = [logging.StreamHandler()] 140 | 141 | """ add '%(filename)s:%(lineno)d %(levelname)s:' to format show source file """ 142 | logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO, 143 | format='%(asctime)s: %(message)s', 144 | datefmt='%Y-%m-%d %H:%M:%S', 145 | handlers = handlers) 146 | 147 | if __name__ == "__main__": 148 | 149 | # set args 150 | args = parser.parse_args() 151 | args = autofill(args) 152 | 153 | set_logger(log_file=args.log_file, debug_mode=args.debug_mode) 154 | logging.info("Using pytorch {} ({})".format(torch.__version__, torch.__path__)) 155 | logging.info("Start training with args:\n" + 156 | json.dumps(vars(args), indent=4, sort_keys=True)) 157 | 158 | # set device states 159 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus # before using torch 160 | assert torch.cuda.is_available(), "CUDA is not available" 161 | torch.manual_seed(args.random_seed) 162 | torch.cuda.manual_seed(args.random_seed) 163 | 164 | 165 | 166 | # load dataset related configuration 167 | dataset_cfg = dataset.get_config(name=args.dataset) 168 | 169 | # creat model with all parameters initialized 170 | net, input_conf = get_symbol(name=args.network, 171 | pretrained=args.pretrained_2d if args.resume_epoch < 0 else None, 172 | modality = args.modality, 173 | drop_out = args.drop_out, 174 | arch_estimator = args.arch_estimator, 175 | arch_d = args.arch_d, 176 | print_net= False, 177 | **dataset_cfg) 178 | 179 | # training 180 | kwargs = {} 181 | kwargs.update(dataset_cfg) 182 | kwargs.update({'input_conf': input_conf}) 183 | kwargs.update(vars(args)) 184 | train_model(args.network, sym_net=net, optim = args.optimizer, **kwargs) 185 | -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split1/combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | 6 | # ./exp/hmdb51_coviar/flow/split1/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split1/acc.log 7 | 8 | expdir=hmdb51_coviar 9 | expname=flow/split1 10 | 11 | python combine.py --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \ 12 | --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz 13 | -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split1/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split1/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split2/combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | # ./exp/hmdb51_coviar/flow/split2/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split2/acc.log 6 | 7 | expdir=hmdb51_coviar 8 | expname=flow/split2 9 | 10 | python combine.py --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \ 11 | --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz 12 | -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split2/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split2/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split3/combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | # ./exp/hmdb51_coviar/flow/split3/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split3/acc.log 6 | 7 | expdir=hmdb51_coviar 8 | expname=flow/split3 9 | 10 | python combine.py --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \ 11 | --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz 12 | -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/flow/split3/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split3/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/mv/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/mv/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/mv/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/residual/split1/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split1/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/residual/split2/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split2/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_coviar/residual/split3/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split3/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split1/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gan/split1 8 | representation=mv 9 | 10 | # exp/hmdb51_gan/split1/run.sh; ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name hmdb51 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 26 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_train_rename.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \ 29 | --weights exp/hmdb51_gen_flow/split1/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name hmdb51 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 49 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split1/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log 8 | 9 | expdir=hmdb51_gan/split1 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split2/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gan/split2 8 | representation=mv 9 | 10 | # exp/hmdb51_gan/split2/run.sh; ./exp/hmdb51_gan/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split2/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name hmdb51 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 26 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_train_rename.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \ 29 | --weights exp/hmdb51_gen_flow/split2/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name hmdb51 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 49 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split2/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gan/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split2/acc.log 8 | 9 | expdir=hmdb51_gan/split2 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split3/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gan/split3 8 | representation=mv 9 | 10 | # exp/hmdb51_gan/split3/run.sh; ./exp/hmdb51_gan/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split3/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name hmdb51 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 26 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_train_rename.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \ 29 | --weights exp/hmdb51_gen_flow/split3/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name hmdb51 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 49 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gan/split3/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gan/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split3/acc.log 8 | 9 | expdir=hmdb51_gan/split3 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split1/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gen_flow/split1 8 | representation=mv 9 | 10 | # exp/hmdb51_gen_flow/split1/run.sh; ./exp/hmdb51_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split1/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name hmdb51 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 20 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_train_rename.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \ 23 | --weights ./exp/hmdb51_coviar/flow/split1/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name hmdb51 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 45 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split1/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split1/acc.log 8 | 9 | expdir=hmdb51_gen_flow/split1 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split2/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gen_flow/split2 8 | representation=mv 9 | 10 | # exp/hmdb51_gen_flow/split2/run.sh; ./exp/hmdb51_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split2/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name hmdb51 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 20 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_train_rename.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \ 23 | --weights ./exp/hmdb51_coviar/flow/split2/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name hmdb51 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 45 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split2/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split2/acc.log 8 | 9 | expdir=hmdb51_gen_flow/split2 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split3/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=hmdb51_gen_flow/split3 8 | representation=mv 9 | 10 | # exp/hmdb51_gen_flow/split3/run.sh; ./exp/hmdb51_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split3/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name hmdb51 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 20 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_train_rename.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \ 23 | --weights ./exp/hmdb51_coviar/flow/split3/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name hmdb51 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \ 45 | --flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/hmdb51_gen_flow/split3/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/hmdb51_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split3/acc.log 8 | 9 | expdir=hmdb51_gen_flow/split3 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz \ 14 | --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \ 15 | --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_flow/split1/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split1/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_flow/split2/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split2/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_flow/split3/flow_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split3/flow_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split1/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gan/split1 8 | representation=mv 9 | 10 | # exp/ucf101_gan/split1/run.sh; ./exp/ucf101_gan/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split1/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name ucf101 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 26 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_train.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \ 29 | --weights exp/ucf101_gen_flow/split1/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name ucf101 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 49 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split1/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gan/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split1/acc.log 8 | 9 | expdir=ucf101_gan/split1 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split2/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gan/split2 8 | representation=mv 9 | 10 | # exp/ucf101_gan/split2/run.sh; ./exp/ucf101_gan/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split2/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name ucf101 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 26 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_train.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \ 29 | --weights exp/ucf101_gen_flow/split2/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name ucf101 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 49 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split2/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gan/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split2/acc.log 8 | 9 | expdir=ucf101_gan/split2 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split3/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gan/split3 8 | representation=mv 9 | 10 | # exp/ucf101_gan/split3/run.sh; ./exp/ucf101_gan/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split3/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --lr-adv-g 1 \ 15 | --lr-adv-d 0.01 \ 16 | --lr-mse 10 \ 17 | --lr_mse_mult 1 \ 18 | --lr_d_mult 1 \ 19 | --batch-size 30 \ 20 | --arch resnet18 \ 21 | --arch_estimator DenseNetTiny \ 22 | --arch_d Discriminator3 \ 23 | --data-name ucf101 \ 24 | --representation ${representation} \ 25 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 26 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 27 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_train.txt \ 28 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \ 29 | --weights exp/ucf101_gen_flow/split3/_mv_model_best.pth.tar \ 30 | --model-prefix exp/${expdir}/ \ 31 | --lr-steps 20 35 45 \ 32 | --use_databn 0 \ 33 | --epochs 50 \ 34 | --epoch-thre 0 \ 35 | --flow_ds_factor 0 \ 36 | --gen_flow_or_delta 1 \ 37 | --mv_minmaxnorm 1 \ 38 | --no-accumulation \ 39 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 40 | 41 | python test.py \ 42 | --arch resnet18 \ 43 | --arch_estimator DenseNetTiny \ 44 | --data-name ucf101 \ 45 | --representation mv \ 46 | --test-crops 1 \ 47 | --test_segments 25 \ 48 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 49 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 50 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \ 51 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 52 | --use_databn 0 \ 53 | --flow_ds_factor 0 \ 54 | --gen_flow_or_delta 1 \ 55 | --mv_minmaxnorm 1 \ 56 | --no-accumulation \ 57 | --save-scores exp/${expdir}/${representation}_score_model_best \ 58 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gan/split3/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gan/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split3/acc.log 8 | 9 | expdir=ucf101_gan/split3 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split1/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split1/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split1/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gen_flow/split1 8 | representation=mv 9 | 10 | # exp/ucf101_gen_flow/split1/run.sh; ./exp/ucf101_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split1/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name ucf101 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 20 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_train.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \ 23 | --weights ./exp/ucf101_coviar/ucf101_flow/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name ucf101 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 45 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split1/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split1/acc.log 8 | 9 | expdir=ucf101_gen_flow/split1 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split2/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split2/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split2/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gen_flow/split2 8 | representation=mv 9 | 10 | # exp/ucf101_gen_flow/split2/run.sh; ./exp/ucf101_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split2/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name ucf101 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 20 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_train.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \ 23 | --weights ./exp/ucf101_coviar/ucf101_flow/split2/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name ucf101 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 45 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split2/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split2/acc.log 8 | 9 | expdir=ucf101_gen_flow/split2 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz \ 17 | --wf 0.25 -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split3/mv_score_model_best.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split3/mv_score_model_best.npz -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split3/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | expdir=ucf101_gen_flow/split3 8 | representation=mv 9 | 10 | # exp/ucf101_gen_flow/split3/run.sh; ./exp/ucf101_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split3/acc.log 11 | 12 | python train.py \ 13 | --lr 0.01 \ 14 | --batch-size 45 \ 15 | --arch resnet18 \ 16 | --arch_estimator DenseNetTiny \ 17 | --data-name ucf101 \ 18 | --representation ${representation} \ 19 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 20 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 21 | --train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_train.txt \ 22 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \ 23 | --weights ./exp/ucf101_coviar/ucf101_flow/split3/_flow_model_best.pth.tar \ 24 | --model-prefix exp/${expdir}/ \ 25 | --lr-steps 20 35 45 \ 26 | --lr-mse 10 \ 27 | --lr_mse_mult 1 \ 28 | --use_databn 0 \ 29 | --epochs 50 \ 30 | --epoch-thre 1 \ 31 | --flow_ds_factor 16 \ 32 | --gen_flow_or_delta 1 \ 33 | --no-accumulation \ 34 | --mv_minmaxnorm 1 \ 35 | --gpus 0 2>&1 | tee exp/${expdir}/train.log 36 | 37 | python test.py \ 38 | --arch resnet18 \ 39 | --arch_estimator DenseNetTiny \ 40 | --data-name ucf101 \ 41 | --representation mv \ 42 | --test-crops 1 \ 43 | --test_segments 25 \ 44 | --data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \ 45 | --flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \ 46 | --test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \ 47 | --weights exp/${expdir}/_${representation}_model_best.pth.tar \ 48 | --use_databn 0 \ 49 | --flow_ds_factor 16 \ 50 | --gen_flow_or_delta 1 \ 51 | --no-accumulation \ 52 | --mv_minmaxnorm 1 \ 53 | --save-scores exp/${expdir}/${representation}_score_model_best \ 54 | --gpus 0 2>&1 | tee exp/${expdir}/test.log -------------------------------------------------------------------------------- /exp_my/ucf101_gen_flow/split3/run_combine.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # This source code is licensed under the MIT license found in the 3 | # LICENSE file in the root directory of this source tree. 4 | 5 | #! /bin/bash 6 | 7 | # ./exp/ucf101_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split3/acc.log 8 | 9 | expdir=ucf101_gen_flow/split3 10 | representation=mv 11 | 12 | python combine.py \ 13 | --iframe exp/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz \ 14 | --res exp/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz \ 15 | --mv exp/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz \ 16 | --flow exp/${expdir}/mv_score_model_best.npz \ 17 | --wf 0.25 --------------------------------------------------------------------------------