├── ._CODE_OF_CONDUCT.md
├── ._CONTRIBUTING.md
├── ._README.md
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── code
    ├── dmcnet
    │   ├── ._train.py
    │   ├── combine.py
    │   ├── data_loader
    │   │   ├── coviar_data_loader.c
    │   │   ├── install.sh
    │   │   └── setup.py
    │   ├── dataset.py
    │   ├── exp
    │   ├── model.py
    │   ├── test.py
    │   ├── train.py
    │   ├── train_options.py
    │   ├── transforms.py
    │   └── utils.py
    ├── dmcnet_GAN
    │   ├── combine.py
    │   ├── data_loader
    │   │   ├── coviar_data_loader.c
    │   │   ├── install.sh
    │   │   └── setup.py
    │   ├── dataset.py
    │   ├── exp
    │   ├── model.py
    │   ├── test.py
    │   ├── train.py
    │   ├── train_options.py
    │   ├── transforms.py
    │   └── utils.py
    └── dmcnet_I3D
    │   ├── README.MD
    │   ├── data
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-36.pyc
    │       │   ├── image_transforms.cpython-36.pyc
    │       │   ├── iterator_factory.cpython-36.pyc
    │       │   ├── video_iterator.cpython-36.pyc
    │       │   ├── video_sampler.cpython-36.pyc
    │       │   └── video_transforms.cpython-36.pyc
    │       ├── image_iterator.py
    │       ├── image_transforms.py
    │       ├── iterator_factory.py
    │       ├── video_iterator.py
    │       ├── video_sampler.py
    │       └── video_transforms.py
    │   ├── dataset
    │       ├── HMDB51
    │       │   ├── raw
    │       │   │   ├── data
    │       │   │   └── list_cvt
    │       │   │   │   ├── hmdb51_split1_others.txt
    │       │   │   │   ├── hmdb51_split1_test.txt
    │       │   │   │   ├── hmdb51_split1_test_info.txt
    │       │   │   │   ├── hmdb51_split1_test_info_step2.txt
    │       │   │   │   ├── hmdb51_split1_train.txt
    │       │   │   │   ├── hmdb51_split1_train_info.txt
    │       │   │   │   ├── hmdb51_split1_train_info_step2.txt
    │       │   │   │   ├── hmdb51_split2_others.txt
    │       │   │   │   ├── hmdb51_split2_test.txt
    │       │   │   │   ├── hmdb51_split2_test_info.txt
    │       │   │   │   ├── hmdb51_split2_train.txt
    │       │   │   │   ├── hmdb51_split2_train_info.txt
    │       │   │   │   ├── hmdb51_split3_others.txt
    │       │   │   │   ├── hmdb51_split3_test.txt
    │       │   │   │   ├── hmdb51_split3_test_info.txt
    │       │   │   │   ├── hmdb51_split3_train.txt
    │       │   │   │   ├── hmdb51_split3_train_info.txt
    │       │   │   │   └── mapping_table.txt
    │       │   └── scripts
    │       │   │   ├── README.md
    │       │   │   └── convert_videos.py
    │       ├── README.md
    │       ├── UCF101
    │       │   └── raw
    │       │   │   ├── data
    │       │   │   └── list_cvt
    │       │   │       ├── hmdb51_split1_test_info.txt
    │       │   │       ├── hmdb51_split1_train_info.txt
    │       │   │       ├── testlist01.txt
    │       │   │       ├── testlist02.txt
    │       │   │       ├── testlist03.txt
    │       │   │       ├── trainlist01.txt
    │       │   │       ├── trainlist02.txt
    │       │   │       ├── trainlist03.txt
    │       │   │       ├── ucf101_split1_test_info.txt
    │       │   │       ├── ucf101_split1_test_info_step2.txt
    │       │   │       ├── ucf101_split1_train_info.txt
    │       │   │       ├── ucf101_split1_train_info_step2.txt
    │       │   │       ├── ucf101_split2_test_info.txt
    │       │   │       ├── ucf101_split2_train_info.txt
    │       │   │       ├── ucf101_split3_test_info.txt
    │       │   │       └── ucf101_split3_train_info.txt
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-36.pyc
    │       │   └── config.cpython-36.pyc
    │       └── config.py
    │   ├── network
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-36.pyc
    │       │   ├── config.cpython-36.pyc
    │       │   ├── i3d.cpython-36.pyc
    │       │   ├── initializer.cpython-36.pyc
    │       │   ├── mfnet_3d.cpython-36.pyc
    │       │   └── symbol_builder.cpython-36.pyc
    │       ├── config.py
    │       ├── i3d.py
    │       ├── initializer.py
    │       ├── pretrained
    │       └── symbol_builder.py
    │   ├── test
    │       ├── evaluate_video_hmdb_i3d.py
    │       ├── evaluate_video_ucf101_i3d.py
    │       └── test.sh
    │   ├── train.sh
    │   ├── train
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-36.pyc
    │       │   ├── callback.cpython-36.pyc
    │       │   ├── lr_scheduler.cpython-36.pyc
    │       │   ├── metric.cpython-36.pyc
    │       │   └── model.cpython-36.pyc
    │       ├── callback.py
    │       ├── lr_scheduler.py
    │       ├── metric.py
    │       └── model.py
    │   ├── train_hmdb51.py
    │   ├── train_model.py
    │   └── train_ucf101.py
└── exp_my
    ├── hmdb51_coviar
        ├── flow
        │   ├── split1
        │   │   ├── combine.sh
        │   │   └── flow_score_model_best.npz
        │   ├── split2
        │   │   ├── combine.sh
        │   │   └── flow_score_model_best.npz
        │   └── split3
        │   │   ├── combine.sh
        │   │   └── flow_score_model_best.npz
        ├── iframe
        │   ├── split1
        │   │   └── iframe_score_model_best.npz
        │   ├── split2
        │   │   └── iframe_score_model_best.npz
        │   └── split3
        │   │   └── iframe_score_model_best.npz
        ├── mv
        │   ├── split1
        │   │   └── mv_score_model_best.npz
        │   ├── split2
        │   │   └── mv_score_model_best.npz
        │   └── split3
        │   │   └── mv_score_model_best.npz
        └── residual
        │   ├── split1
        │       └── residual_score_model_best.npz
        │   ├── split2
        │       └── residual_score_model_best.npz
        │   └── split3
        │       └── residual_score_model_best.npz
    ├── hmdb51_gan
        ├── split1
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        ├── split2
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        └── split3
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
    ├── hmdb51_gen_flow
        ├── split1
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        ├── split2
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        └── split3
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
    ├── ucf101_coviar
        ├── ucf101_flow
        │   ├── split1
        │   │   └── flow_score_model_best.npz
        │   ├── split2
        │   │   └── flow_score_model_best.npz
        │   └── split3
        │   │   └── flow_score_model_best.npz
        ├── ucf101_iframe
        │   ├── split1
        │   │   └── iframe_score_model_best.npz
        │   ├── split2
        │   │   └── iframe_score_model_best.npz
        │   └── split3
        │   │   └── iframe_score_model_best.npz
        ├── ucf101_mv
        │   ├── split1
        │   │   └── mv_score_model_best.npz
        │   ├── split2
        │   │   └── mv_score_model_best.npz
        │   └── split3
        │   │   └── mv_score_model_best.npz
        └── ucf101_residual
        │   ├── split1
        │       └── residual_score_model_best.npz
        │   ├── split2
        │       └── residual_score_model_best.npz
        │   └── split3
        │       └── residual_score_model_best.npz
    ├── ucf101_gan
        ├── split1
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        ├── split2
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
        └── split3
        │   ├── mv_score_model_best.npz
        │   ├── run.sh
        │   └── run_combine.sh
    └── ucf101_gen_flow
        ├── split1
            ├── mv_score_model_best.npz
            ├── run.sh
            └── run_combine.sh
        ├── split2
            ├── mv_score_model_best.npz
            ├── run.sh
            └── run_combine.sh
        └── split3
            ├── mv_score_model_best.npz
            ├── run.sh
            └── run_combine.sh


/._CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._CODE_OF_CONDUCT.md


--------------------------------------------------------------------------------
/._CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._CONTRIBUTING.md


--------------------------------------------------------------------------------
/._README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/._README.md


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | exp_my/hmdb51_gen_flow_tmp/
2 | */data_loader/build/
3 | */data
4 | */exp
5 | */__pycache__/
6 | *.tar
7 | *.fuse
8 | .nfs*


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to DMC-Net
 2 | We want to make contributing to this project as easy and transparent as possible.
 3 | 
 4 | ## Our Development Process
 5 | Minor changes and improvements will be released on an ongoing basis.
 6 | Larger changes (e.g., changesets implementing a new paper) will be released
 7 | on a more periodic basis.
 8 | 
 9 | 
10 | ## Pull Requests
11 | We actively welcome your pull requests.
12 | 
13 | 1. Fork the repo and create your branch from `master`.
14 | 2. If you've added code that should be tested, add tests.
15 | 3. If you've changed APIs, update the documentation.
16 | 4. Ensure the test suite passes.
17 | 5. Make sure your code lints.
18 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
19 | 
20 | ## Contributor License Agreement ("CLA")
21 | In order to accept your pull request, we need you to submit a CLA. You only need
22 | to do this once to work on any of Facebook's open source projects.
23 | 
24 | Complete your CLA here: <https://code.facebook.com/cla>
25 | 
26 | ## Issues
27 | We use GitHub issues to track public bugs. Please ensure your description is
28 | clear and has sufficient instructions to be able to reproduce the issue.
29 | 
30 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
31 | disclosure of security bugs. In those cases, please go through the process
32 | outlined on that page and do not file a public issue.
33 | 
34 | ## Coding Style  
35 | * 4 spaces for indentation rather than tabs
36 | 
37 | ## License
38 | By contributing to DMC-Net, you agree that your contributions will
39 | be licensed under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.s


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DMC-Net
 2 | 
 3 | ### Citing
 4 | If you find DMC-Net useful, please consider citing:
 5 | 
 6 |     @inproceedings{shou2019dmc,
 7 |     	title={DMC-Net: Generating Discriminative Motion Cues for Fast Compressed Video Action Recognition},
 8 |     	author={Shou, Zheng and Lin, Xudong and Kalantidis, Yannis and Sevilla-Lara, Laura and Rohrbach, Marcus and Chang, Shih-Fu and Yan, Zhicheng},
 9 |     	booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
10 |     	year={2019}
11 |     }
12 | 
13 | ### Overview
14 | 
15 | `./exp_my/` contains scripts for running experiments and our trained models and prediction results.
16 | 
17 | `./code/` contains implementation for 3 major models respectively: 
18 | 0. `dmcnet` indicates the version which does not include the adversarial loss during training and uses ResNet-18 for classifying DMC;
19 | 1. `dmcnet_GAN` indicates the version which includes the adversarial loss during training and uses ResNet-18 for classifying DMC;
20 | 2. `dmcnet_I3D` indicates the version which uses I3D for classifying DMC.
21 | 
22 | **In the following, we present how to use `dmcnet` and `dmcnet_GAN`. Instructions for `dmcnet_I3D` can be found in `./code/dmcnet_I3D/`.**
23 | 
24 | ## DMC-Net with ResNet-18 classifier
25 | 
26 | ### Installation
27 | 
28 | We implement `dmcnet` and `dmcnet_GAN` using PyTorch based on [CoViAR](https://github.com/chaoyuaw/pytorch-coviar). Please refer to CoViAR for details of setup and installation (e.g. how to prepare input videos, setup mpeg-4 compressed video data loader, etc.). Specifically, the released models were trained using python 3.6, pytorch 0.31, cuda 9.0, MPEG-4 video of GOP 12 and macroblock size 16x16.
29 | 
30 | Optical flow extraction: we extract optical flow using TV-L1 algorithm implementation from [dense_flow](https://github.com/wanglimin/dense_flow) and store the flow images beforehand and then load flow images during training.
31 | 
32 | In both `./code/dmcnet/` and `./code/dmcnet_GAN/`, please first link `exp/` to true directory of './exp_my/' so that all data will be stored in the experimental folder. 
33 | 
34 | ### Usage
35 | 
36 | As stated in the paper, we first train DMC-Net with the classification loss and flow reconstruction MSE loss but without the adversarial loss (using `./code/dmcnet/`). Sample training script for HMDB-51 can be found at `exp_my/hmdb51_gen_flow/split1/run.sh`. Performing training and testing by `exp/hmdb51_gen_flow/split1/run.sh;`. The trained model would be `exp/hmdb51_gen_flow/split1/_mv_model_best.pth.tar`.
37 | 
38 | Explanations about some key options used in the `run.sh` script (detaied descriptions can be found in `train_options.py`):
39 | 
40 | 0. `data-root`: specify the directory for storing mpeg-4 videos;
41 | 1. `train-list` and `test-list`: specify the training and testing videos lists. Some example lines in such list files (format follows [CoViAR](https://github.com/chaoyuaw/pytorch-coviar): directory class class_index): 
42 | 
43 |     smile/Me_smiling_smile_h_nm_np1_fr_goo_0.avi smile 0
44 |     
45 |     clap/Alex_applauding_himself_clap_u_nm_np1_fr_med_0.avi clap 1
46 |     
47 |     climb/Chiara_Kletterwand_climb_f_cm_np1_ba_bad_0.avi climb 2
48 | 
49 | 2. `flow-root`: specify the directory for storing ground truth optical flow images extracted by [dense_flow](https://github.com/wanglimin/dense_flow). Sample directory:
50 | 
51 |     flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_x_00001.jpg
52 |     
53 |     flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_y_00001.jpg
54 |     
55 |     flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0/flow_x_00002.jpg
56 |     
57 |     flow-root/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_1/flow_x_00001.jpg
58 |     
59 |     flow-root/climb_stairs/BIG_FISH_climb_stairs_f_nm_np1_fr_med_1/flow_x_00001.jpg
60 | 
61 | Then we use the above trained model as initialization to train with the adversarial loss included (using `./code/dmcnet_GAN/`). Sample training script for HMDB-51 can be found at `exp_my/hmdb51_gan/split1/run.sh`. In order to reproduce the result on HMDB-51, simply run: `bash exp/hmdb51_gan/split1/run.sh; bash ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log` The trained model would be `exp/hmdb51_gan/split1/_mv_model_best.pth.tar` and the prediction results would be stored in `exp/hmdb51_gan/split1/mv_score_model_best.npz` and `./exp/hmdb51_gan/split1/acc.log` records the accuracy after fusing all modalities.
62 | 
63 | ### Our trained models
64 | 
65 | At AWS host (`dl.fbaipublicfiles.com/dmc-net/models.zip`), we provide our trained models and prediction results. The file directory of `./models/` follows similar structure as `./exp_my/`. Please put the trained model and prediction result (for each dataset and split from `./models/`) in the corresponding folder (for experiment in `./exp_my/`). 
66 | 
67 | ### Results
68 | 
69 | Accuracy (%)     | HMDB-51 | UCF-101
70 | ---------|--------|-----
71 | [EMV-CNN](https://ieeexplore.ieee.org/abstract/document/7780666)     | 51.2 (split1) | 86.4
72 | [DTMV-CNN](https://zbwglory.github.io/papers/08249882.pdf)     | 55.3 | 87.5
73 | [CoViAR](https://github.com/chaoyuaw/pytorch-coviar)     | 59.1 | 90.4
74 | DMC-Net (ResNet-18)     | 62.8 | 90.9
75 | DMC-Net (I3D)     | 71.8 | 92.3
76 | DMC-Net (I3D) + I3D RGB     | 77.8 | 96.5
77 | 
78 | ## License
79 | DMC-Net is MIT licensed, as found in the LICENSE file.
80 | 


--------------------------------------------------------------------------------
/code/dmcnet/._train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet/._train.py


--------------------------------------------------------------------------------
/code/dmcnet/combine.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | """Combine testing results of the three models to get final accuracy."""
 9 | 
10 | import argparse
11 | import numpy as np
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(description="combine predictions")
15 |     parser.add_argument('--iframe', type=str, required=True,
16 |                         help='iframe score file.')
17 |     parser.add_argument('--mv', type=str, required=True,
18 |                         help='motion vector score file.')
19 |     parser.add_argument('--res', type=str, required=True,
20 |                         help='residual score file.')
21 |     parser.add_argument('--flow', type=str, default=None,
22 |                         help='residual score file.')
23 | 
24 |     parser.add_argument('--wi', type=float, default=2.0,
25 |                         help='iframe weight.')
26 |     parser.add_argument('--wm', type=float, default=1.0,
27 |                         help='motion vector weight.')
28 |     parser.add_argument('--wr', type=float, default=1.0,
29 |                         help='residual weight.')
30 |     parser.add_argument('--wf', type=float, default=1.0,
31 |                         help='flow weight.')
32 | 
33 |     args = parser.parse_args()
34 | 
35 |     with np.load(args.iframe) as iframe:
36 |         with np.load(args.mv) as mv:
37 |             with np.load(args.res) as residual:
38 |                 n = len(mv['names'])
39 | 
40 |                 i_score = np.array([score[0][0] for score in iframe['scores']])
41 |                 mv_score = np.array([score[0][0] for score in mv['scores']])
42 |                 res_score = np.array([score[0][0] for score in residual['scores']])
43 | 
44 |                 i_label = np.array([score[1] for score in iframe['scores']])
45 |                 mv_label = np.array([score[1] for score in mv['scores']])
46 |                 res_label = np.array([score[1] for score in residual['scores']])
47 |                 assert np.alltrue(i_label == mv_label) and np.alltrue(i_label == res_label)
48 | 
49 |                 combined_score = i_score * args.wi + mv_score * args.wm + res_score * args.wr
50 | 
51 |                 if args.flow is not None:
52 |                     flow = np.load(args.flow)
53 |                     flow_score = np.array([score[0][0] for score in flow['scores']])
54 |                     combined_score += args.wf * flow_score
55 | 
56 |                 accuracy = float(sum(np.argmax(combined_score, axis=1) == i_label)) / n
57 |                 print('Accuracy: %f (%d).' % (accuracy, n))
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/code/dmcnet/data_loader/install.sh:
--------------------------------------------------------------------------------
1 | rm -rf build
2 | python setup.py build_ext
3 | python setup.py install --user
4 | 


--------------------------------------------------------------------------------
/code/dmcnet/data_loader/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | import numpy as np
 3 | 
 4 | coviar_utils_module = Extension('coviar',
 5 | 		sources = ['coviar_data_loader.c'],
 6 | 		include_dirs=[np.get_include(), '/mnt/homedir/zshou/code/FFmpeg/include/'],
 7 | 		extra_compile_args=['-DNDEBUG', '-O3', '-std=c99'],
 8 | 		extra_link_args=['-lavutil', '-lavcodec', '-lavformat', '-lswscale', '-L/mnt/homedir/zshou/code/FFmpeg/lib/']
 9 | )
10 | 
11 | setup ( name = 'coviar',
12 | 	version = '0.1',
13 | 	description = 'Utils for coviar training.',
14 | 	ext_modules = [ coviar_utils_module ]
15 | )
16 | 


--------------------------------------------------------------------------------
/code/dmcnet/exp:
--------------------------------------------------------------------------------
1 | /private/home/zshou/exp/coviar_exp/


--------------------------------------------------------------------------------
/code/dmcnet/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | """Run testing given a trained model."""
  9 | 
 10 | import argparse
 11 | import time
 12 | import os
 13 | 
 14 | from dataset import CoviarDataSet
 15 | from model import Model
 16 | from transforms import GroupCenterCrop
 17 | from transforms import GroupOverSample
 18 | from transforms import GroupScale
 19 | 
 20 | import numpy as np
 21 | import torch.nn.parallel
 22 | import torch.optim
 23 | import torchvision
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description="Standard video-level testing")
 27 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51'])
 28 | parser.add_argument('--representation', type=str, choices=['iframe', 'residual', 'mv', 'flow'])
 29 | parser.add_argument('--no-accumulation', action='store_true',
 30 |                     help='disable accumulation of motion vectors and residuals.')
 31 | parser.add_argument('--new_length', type=int, default=1,
 32 |                     help='number of MV/OF stacked to be processed together.')
 33 | parser.add_argument('--use_databn', type=int, default=1,
 34 |                     help='add databn for mv, residual, flow or not.')
 35 | parser.add_argument('--flow_ds_factor', type=int, default=0,
 36 |                     help='flow downsample factor.')
 37 | parser.add_argument('--upsample_interp', type=bool, default=False,
 38 |                     help='upsample via interpolation or not.')
 39 | parser.add_argument('--data-root', type=str)
 40 | parser.add_argument('--flow-root', type=str, help='directory of storing pre-extracted optical flow images.')
 41 | parser.add_argument('--data-flow', type=str, default='tvl1')
 42 | parser.add_argument('--test-list', type=str)
 43 | parser.add_argument('--weights', type=str)
 44 | parser.add_argument('--batch-size', default=1, type=int, help='batch size.')
 45 | parser.add_argument('--arch', type=str)
 46 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", help='estimator architecture.')
 47 | parser.add_argument('--save-scores', type=str, default=None)
 48 | parser.add_argument('--test_segments', type=int, default=25)
 49 | parser.add_argument('--test-crops', type=int, default=10)
 50 | parser.add_argument('--input_size', type=int, default=224)
 51 | parser.add_argument('-j', '--workers', default=1, type=int, metavar='N',
 52 |                     help='number of workers for data loader.')
 53 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
 54 | parser.add_argument('--gop', type=int, default=12, help='size of GOP.')
 55 | parser.add_argument('--viz', type=bool, default=False, help='visualize or not.')
 56 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, help='0: generate flow; 1: generate flow delta.')
 57 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, help='the downsample factor used in generating flow of small size')
 58 | parser.add_argument('--att', type=int, default=0, help='0: no attention; 1: pixel-level attention.')
 59 | 
 60 | args = parser.parse_args()
 61 | 
 62 | if args.data_name == 'ucf101':
 63 |     num_class = 101
 64 | elif args.data_name == 'hmdb51':
 65 |     num_class = 51
 66 | else:
 67 |     raise ValueError('Unknown dataset '+args.data_name)
 68 | 
 69 | 
 70 | def main():
 71 |     # define the model
 72 |     net = Model(num_class, args.test_segments, args.representation,
 73 |                 base_model=args.arch,
 74 |                 new_length=args.new_length,
 75 |                 use_databn=args.use_databn,
 76 |                 gen_flow_or_delta=args.gen_flow_or_delta,
 77 |                 gen_flow_ds_factor=args.gen_flow_ds_factor,
 78 |                 arch_estimator=args.arch_estimator,
 79 |                 att=args.att)
 80 | 
 81 |     # load the trained model
 82 |     checkpoint = torch.load(args.weights, map_location=lambda storage, loc: storage)
 83 |     print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
 84 | 
 85 |     base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
 86 |     net.load_state_dict(base_dict, strict=False)
 87 | 
 88 |     # setup the data loader
 89 |     if args.test_crops == 1:
 90 |         cropping = torchvision.transforms.Compose([
 91 |             GroupScale(net.scale_size),
 92 |             GroupCenterCrop(net.crop_size),
 93 |         ])
 94 |     elif args.test_crops == 10:
 95 |         cropping = torchvision.transforms.Compose([
 96 |             GroupOverSample(net.crop_size, net.scale_size)
 97 |         ])
 98 |     else:
 99 |         raise ValueError("Only 1 and 10 crops are supported, but got {}.".format(args.test_crops))
100 | 
101 |     data_loader = torch.utils.data.DataLoader(
102 |         CoviarDataSet(
103 |             args.data_root,
104 |             args.flow_root,
105 |             args.data_name,
106 |             video_list=args.test_list,
107 |             num_segments=args.test_segments,
108 |             representation=args.representation,
109 |             new_length=args.new_length,
110 |             flow_ds_factor=args.flow_ds_factor,
111 |             upsample_interp=args.upsample_interp,
112 |             transform=cropping,
113 |             is_train=False,
114 |             accumulate=(not args.no_accumulation),
115 |             gop=args.gop,
116 |             flow_folder=args.data_flow,
117 |             viz=args.viz
118 |             ),
119 |         batch_size=1, shuffle=False,
120 |         num_workers=args.workers * 2, pin_memory=True)
121 | 
122 |     # deploy model on gpu
123 |     if args.gpus is not None:
124 |         devices = [args.gpus[i] for i in range(args.workers)]
125 |     else:
126 |         devices = list(range(args.workers))
127 | 
128 |     net.cuda(devices[0])
129 |     #net.base_model.cuda(devices[-1])
130 |     net = torch.nn.DataParallel(net, device_ids=devices)
131 | 
132 |     # switch to inference model and start to iterate over the test set
133 |     net.eval()
134 | 
135 |     total_num = len(data_loader.dataset)
136 |     output = []
137 | 
138 |     # process each video to obtain its predictions
139 |     def forward_video(input_mv, input_residual, att=0):
140 |         input_mv_var = torch.autograd.Variable(input_mv, volatile=True)
141 |         input_residual_var = torch.autograd.Variable(input_residual, volatile=True)
142 |         if att == 0:
143 |             scores, gen_flow = net(input_mv_var, input_residual_var)
144 |         if att == 1:
145 |             scores, gen_flow, att_flow = net(input_mv_var, input_residual_var)
146 |         scores = scores.view((-1, args.test_segments * args.test_crops) + scores.size()[1:])
147 |         scores = torch.mean(scores, dim=1)
148 |         if att == 0:
149 |             return scores.data.cpu().numpy().copy(), gen_flow
150 |         if att == 1:
151 |             return scores.data.cpu().numpy().copy(), gen_flow, att_flow
152 | 
153 |     proc_start_time = time.time()
154 | 
155 |     # iterate over the whole test set
156 |     for i, (input_flow, input_mv, input_residual, label) in enumerate(data_loader):
157 |         input_mv = input_mv.cuda(args.gpus[-1], async=True)
158 |         input_residual = input_residual.cuda(args.gpus[0], async=True)
159 |         input_flow = input_flow.cuda(args.gpus[-1], async=True)
160 | 
161 |         # print("input_flow shape:")
162 |         # print(input_flow.shape) # torch.Size([batch_size, num_crops*num_segments, 2, 224, 224])
163 |         # print("input_flow type:")  # print(input_flow.type())  # torch.cuda.FloatTensor
164 |         if args.att == 0:
165 |             video_scores, gen_flow = forward_video(input_mv, input_residual)
166 |         if args.att == 1:
167 |             video_scores, gen_flow, att_flow = forward_video(input_mv, input_residual, args.att)
168 |         output.append((video_scores, label[0]))
169 |         cnt_time = time.time() - proc_start_time
170 |         if (i + 1) % 100 == 0:
171 |             print('video {} done, total {}/{}, average {} sec/video'.format(i, i+1,
172 |                                                                             total_num,
173 |                                                                             float(cnt_time) / (i+1)))
174 | 
175 |     video_pred = [np.argmax(x[0]) for x in output]
176 |     video_labels = [x[1] for x in output]
177 | 
178 |     print('Accuracy {:.02f}% ({})'.format(
179 |         float(np.sum(np.array(video_pred) == np.array(video_labels))) / len(video_pred) * 100.0,
180 |         len(video_pred)))
181 | 
182 | 
183 |     if args.save_scores is not None:
184 | 
185 |         name_list = [x.strip().split()[0] for x in open(args.test_list)]
186 |         order_dict = {e:i for i, e in enumerate(sorted(name_list))}
187 | 
188 |         reorder_output = [None] * len(output)
189 |         reorder_label = [None] * len(output)
190 |         reorder_name = [None] * len(output)
191 | 
192 |         for i in range(len(output)):
193 |             idx = order_dict[name_list[i]]
194 |             reorder_output[idx] = output[i]
195 |             reorder_label[idx] = video_labels[i]
196 |             reorder_name[idx] = name_list[i]
197 | 
198 |         np.savez(args.save_scores, scores=reorder_output, labels=reorder_label, names=reorder_name)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |     main()
203 | 


--------------------------------------------------------------------------------
/code/dmcnet/train_options.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | """Training options."""
 9 | 
10 | import argparse
11 | 
12 | parser = argparse.ArgumentParser(description="CoViAR")
13 | 
14 | # Data.
15 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400'],
16 |                     help='dataset name.')
17 | parser.add_argument('--data-root', type=str,
18 |                     help='root of data directory.')
19 | parser.add_argument('--flow-root', type=str, 
20 |                     help='directory of storing pre-extracted optical flow images.')
21 | parser.add_argument('--data-flow', type=str, default='tvl1',
22 |                     help='root of data directory.')
23 | parser.add_argument('--train-list', type=str,
24 |                     help='training example list.')
25 | parser.add_argument('--test-list', type=str,
26 |                     help='testing example list.')
27 | parser.add_argument('--gop', type=int, default=12,
28 |                     help='size of GOP.')
29 | 
30 | # Model.
31 | parser.add_argument('--representation', type=str, choices=['iframe', 'mv', 'residual', 'flow'],
32 |                     help='data representation.')
33 | parser.add_argument('--arch', type=str, default="resnet152",
34 |                     help='base architecture.')
35 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork",
36 |                     help='estimator architecture.')
37 | parser.add_argument('--num_segments', type=int, default=3,
38 |                     help='number of TSN segments.')
39 | parser.add_argument('--no-accumulation', action='store_true',
40 |                     help='disable accumulation of motion vectors and residuals.')
41 | parser.add_argument('--new_length', type=int, default=1,
42 |                     help='number of MV/OF stacked to be processed together.')
43 | parser.add_argument('--flow_ds_factor', type=int, default=0,
44 |                     help='flow downsample factor.')
45 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0,
46 |                     help='the downsample factor used in generating flow of small size')
47 | parser.add_argument('--upsample_interp', type=bool, default=False,
48 |                     help='upsample via interpolation or not.')
49 | parser.add_argument('--use_databn', type=int, default=1,
50 |                     help='add data batchnorm for mv, residual, flow or not. 1: yes; 0: no.')
51 | parser.add_argument('--gen_flow_or_delta', type=int, default=0,
52 |                     help='0: generate flow; 1: generate flow delta.')
53 | parser.add_argument('--att', type=int, default=0,
54 |                     help='0: no attention; 1: pixel-level attention.')
55 | parser.add_argument('--mv_minmaxnorm', type=int, default=0,
56 |                     help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.')
57 | 
58 | # Training.
59 | parser.add_argument('--weights', default=None, type=str)
60 | parser.add_argument('--resume', default=None, type=str)
61 | parser.add_argument('--epochs', default=500, type=int,
62 |                     help='number of training epochs.')
63 | parser.add_argument('--epoch-thre', default=500, type=int,
64 |                     help='number of training epochs.')
65 | parser.add_argument('--batch-size', default=40, type=int,
66 |                     help='batch size.')
67 | parser.add_argument('--lr', default=0.001, type=float,
68 |                     help='base learning rate.')
69 | parser.add_argument('--lr-cls', default=1, type=float,
70 |                     help='cls loss weight.')
71 | parser.add_argument('--loss-mse', default='MSELoss', type=str)
72 | parser.add_argument('--lr-mse', default=0.1, type=float,
73 |                     help='mse loss weight.')
74 | parser.add_argument('--lr_cls_mult', default=0.01, type=float, help='cls learning multiplier.')
75 | parser.add_argument('--lr_mse_mult', default=0.01, type=float, help='mse learning multiplier.')
76 | parser.add_argument('--lr-steps', default=[200, 300, 400], type=float, nargs="+",
77 |                     help='epochs to decay learning rate.')
78 | parser.add_argument('--lr-decay', default=0.1, type=float,
79 |                     help='lr decay factor.')
80 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
81 |                     help='weight decay.')
82 | 
83 | # Log.
84 | parser.add_argument('--eval-freq', default=5, type=int,
85 |                     help='evaluation frequency (epochs).')
86 | parser.add_argument('--workers', default=8, type=int,
87 |                     help='number of data loader workers.')
88 | parser.add_argument('--model-prefix', type=str, default="model",
89 |                     help="prefix of model name.")
90 | parser.add_argument('--gpus', nargs='+', type=int, default=None,
91 |                     help='gpu ids.')
92 | 


--------------------------------------------------------------------------------
/code/dmcnet/transforms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | """Functions for data augmentation and related preprocessing."""
  9 | 
 10 | import random
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | 
 15 | def color_aug(img, random_h=36, random_l=50, random_s=50):
 16 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS).astype(float)
 17 | 
 18 |     h = (random.random() * 2 - 1.0) * random_h
 19 |     l = (random.random() * 2 - 1.0) * random_l
 20 |     s = (random.random() * 2 - 1.0) * random_s
 21 | 
 22 |     img[..., 0] += h
 23 |     img[..., 0] = np.minimum(img[..., 0], 180)
 24 | 
 25 |     img[..., 1] += l
 26 |     img[..., 1] = np.minimum(img[..., 1], 255)
 27 | 
 28 |     img[..., 2] += s
 29 |     img[..., 2] = np.minimum(img[..., 2], 255)
 30 | 
 31 |     img = np.maximum(img, 0)
 32 |     img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_HLS2BGR)
 33 |     return img
 34 | 
 35 | 
 36 | class GroupCenterCrop(object):
 37 |     def __init__(self, size):
 38 |         self._size = size
 39 | 
 40 |     def __call__(self, img_group):
 41 |         h, w, _ = img_group[0].shape
 42 |         hs = (h - self._size) // 2
 43 |         ws = (w - self._size) // 2
 44 |         return [img[hs:hs+self._size, ws:ws+self._size] for img in img_group]
 45 | 
 46 | 
 47 | class GroupRandomHorizontalFlip(object):
 48 |     def __call__(self, img_group, is_mv_or_flow=False):
 49 |         if random.random() < 0.5:
 50 |             ret = [img[:, ::-1, :].astype(np.int32) for img in img_group]   # residual and then flow and MV
 51 |             for i in range(len(ret)):
 52 |                 ret[i][:, :, :4] -= 128
 53 |                 ret[i][..., 0] *= (-1)
 54 |                 ret[i][..., 2] *= (-1)
 55 |                 ret[i][:, :, :4] += 128
 56 |             return ret
 57 |         else:
 58 |             return img_group
 59 | 
 60 | class GroupScale(object):
 61 |     def __init__(self, size):
 62 |         self._size = (size, size)
 63 | 
 64 |     def __call__(self, img_group):
 65 |         # resize flow then MV then residual
 66 |         ret_img_group = []
 67 |         for idx in range(0, len(img_group)):
 68 |             #print('Input frames shape %s:' % str(img_group[idx].shape))
 69 |             #print('Input frames after transform mv flow shape %s:' % str(resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR).shape))
 70 |             #print('Input frames after transform residual shape %s:' % str(cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR).shape))
 71 |             ret_img_group.append(
 72 |                 np.concatenate((resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR),
 73 |                                 cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR)), axis=2))
 74 | 
 75 |         return ret_img_group
 76 | 
 77 | class GroupOverSample(object):
 78 |     def __init__(self, crop_size, scale_size=None):
 79 |         self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size)
 80 | 
 81 |         if scale_size is not None:
 82 |             self.scale_worker = GroupScale(scale_size)
 83 |         else:
 84 |             self.scale_worker = None
 85 | 
 86 |     def __call__(self, img_group):
 87 | 
 88 |         if self.scale_worker is not None:
 89 |             img_group = self.scale_worker(img_group)
 90 | 
 91 |         image_w, image_h, _ = img_group[0].shape
 92 |         crop_w, crop_h = self.crop_size
 93 | 
 94 |         offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h)
 95 |         oversample_group = list()
 96 | 
 97 |         for o_w, o_h in offsets:
 98 |             for img in img_group:
 99 | 
100 |                 crop = img[o_w:o_w+crop_w, o_h:o_h+crop_h]
101 |                 oversample_group.append(crop)
102 | 
103 |                 flip_crop = crop[:, ::-1, :].astype(np.int32)   # residual
104 |                 flip_crop[:, :, :4] -= 128  # flow and MV
105 |                 flip_crop[..., 0] *= (-1)
106 |                 flip_crop[..., 2] *= (-1)
107 |                 flip_crop[:, :, :4] += 128
108 |                 oversample_group.append(flip_crop)
109 | 
110 |         return oversample_group
111 | 
112 | def resize_mv(img, shape, interpolation):
113 |     return np.stack([cv2.resize(img[..., i], shape, interpolation)
114 |                      for i in range(img.shape[-1])], axis=2)
115 | 
116 | 
117 | class GroupMultiScaleCrop(object):
118 |     def __init__(self, input_size, scales=None, max_distort=1, fix_crop=False, more_fix_crop=True):
119 |         self.scales = scales if scales is not None else [1, .875, .75, .66]
120 |         self.max_distort = max_distort
121 |         self.fix_crop = fix_crop
122 |         self.more_fix_crop = more_fix_crop
123 |         self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size]   # 224x224
124 | 
125 |     def __call__(self, img_group):
126 | 
127 |         im_size = img_group[0].shape
128 |         crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
129 |         crop_img_group = [img[offset_w:offset_w + crop_w, offset_h:offset_h + crop_h] for img in img_group]
130 | 
131 |         # resize flow -> MV -> residual
132 |         ret_img_group = []
133 |         for idx in range(0, len(img_group)):
134 |             ret_img_group.append(
135 |                 np.concatenate(
136 |                             (resize_mv(crop_img_group[idx][:, :, :4], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR),
137 |                                 cv2.resize(crop_img_group[idx][:, :, 4:], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR)), axis=2))
138 | 
139 |         return ret_img_group
140 | 
141 |     def _sample_crop_size(self, im_size):
142 |         image_w, image_h = im_size[0], im_size[1]
143 | 
144 |         base_size = min(image_w, image_h)
145 |         crop_sizes = [int(base_size * x) for x in self.scales]
146 |         crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes]
147 |         crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes]
148 | 
149 |         pairs = []
150 |         for i, h in enumerate(crop_h):
151 |             for j, w in enumerate(crop_w):
152 |                 if abs(i - j) <= self.max_distort:
153 |                     pairs.append((w, h))
154 | 
155 |         crop_pair = random.choice(pairs)
156 |         if not self.fix_crop:
157 |             w_offset = random.randint(0, image_w - crop_pair[0])
158 |             h_offset = random.randint(0, image_h - crop_pair[1])
159 |         else:
160 |             w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1])
161 | 
162 |         return crop_pair[0], crop_pair[1], w_offset, h_offset
163 | 
164 |     def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
165 |         offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h)
166 |         return random.choice(offsets)
167 | 
168 |     @staticmethod
169 |     def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
170 |         w_step = (image_w - crop_w) // 4
171 |         h_step = (image_h - crop_h) // 4
172 | 
173 |         ret = list()
174 |         ret.append((0, 0))  # upper left
175 |         ret.append((4 * w_step, 0))  # upper right
176 |         ret.append((0, 4 * h_step))  # lower left
177 |         ret.append((4 * w_step, 4 * h_step))  # lower right
178 |         ret.append((2 * w_step, 2 * h_step))  # center
179 | 
180 |         if more_fix_crop:
181 |             ret.append((0, 2 * h_step))  # center left
182 |             ret.append((4 * w_step, 2 * h_step))  # center right
183 |             ret.append((2 * w_step, 4 * h_step))  # lower center
184 |             ret.append((2 * w_step, 0 * h_step))  # upper center
185 | 
186 |             ret.append((1 * w_step, 1 * h_step))  # upper left quarter
187 |             ret.append((3 * w_step, 1 * h_step))  # upper right quarter
188 |             ret.append((1 * w_step, 3 * h_step))  # lower left quarter
189 |             ret.append((3 * w_step, 3 * h_step))  # lower righ quarter
190 | 
191 |         return ret
192 | 


--------------------------------------------------------------------------------
/code/dmcnet/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | def viz_flow(u,v,logscale=True,scaledown=6,output=False):
 11 |     """
 12 |     topleft is zero, u is horiz/x/width, v is vertical/y/height
 13 |     red is 3 o'clock, yellow is 6, light blue is 9, blue/purple is 12
 14 |     """
 15 |     colorwheel = makecolorwheel()
 16 |     ncols = colorwheel.shape[0]
 17 | 
 18 |     radius = np.sqrt(u**2 + v**2)
 19 |     if output:
 20 |         print("Maximum flow magnitude: %04f" % np.max(radius))
 21 |     if logscale:
 22 |         radius = np.log(radius + 1)
 23 |         if output:
 24 |             print("Maximum flow magnitude (after log): %0.4f" % np.max(radius))
 25 |     radius = radius / scaledown
 26 |     if output:
 27 |         print("Maximum flow magnitude (after scaledown): %0.4f" % np.max(radius))
 28 |     rot = np.arctan2(-v, -u) / np.pi
 29 | 
 30 |     fk = (rot+1)/2 * (ncols-1)  # -1~1 maped to 0~ncols
 31 |     k0 = fk.astype(np.uint8)       # 0, 1, 2, ..., ncols
 32 | 
 33 |     k1 = k0+1
 34 |     k1[k1 == ncols] = 0
 35 | 
 36 |     f = fk - k0
 37 | 
 38 |     ncolors = colorwheel.shape[1]
 39 |     img = np.zeros(u.shape+(ncolors,))
 40 |     for i in range(ncolors):
 41 |         tmp = colorwheel[:,i]
 42 |         col0 = tmp[k0]
 43 |         col1 = tmp[k1]
 44 |         col = (1-f)*col0 + f*col1
 45 | 
 46 |         idx = radius <= 1
 47 |         # increase saturation with radius
 48 |         col[idx] = 1 - radius[idx]*(1-col[idx])
 49 |         # out of range
 50 |         col[~idx] *= 0.75
 51 |         img[:,:,i] = np.floor(255*col).astype(np.uint8)
 52 | 
 53 |     return img.astype(np.uint8)
 54 | 
 55 | 
 56 | 
 57 | def makecolorwheel():
 58 |     # Create a colorwheel for visualization
 59 |     RY = 15
 60 |     YG = 6
 61 |     GC = 4
 62 |     CB = 11
 63 |     BM = 13
 64 |     MR = 6
 65 | 
 66 |     ncols = RY + YG + GC + CB + BM + MR
 67 | 
 68 |     colorwheel = np.zeros((ncols,3))
 69 | 
 70 |     col = 0
 71 |     # RY
 72 |     colorwheel[0:RY,0] = 1
 73 |     colorwheel[0:RY,1] = np.arange(0,1,1./RY)
 74 |     col += RY
 75 | 
 76 |     # YG
 77 |     colorwheel[col:col+YG,0] = np.arange(1,0,-1./YG)
 78 |     colorwheel[col:col+YG,1] = 1
 79 |     col += YG
 80 | 
 81 |     # GC
 82 |     colorwheel[col:col+GC,1] = 1
 83 |     colorwheel[col:col+GC,2] = np.arange(0,1,1./GC)
 84 |     col += GC
 85 | 
 86 |     # CB
 87 |     colorwheel[col:col+CB,1] = np.arange(1,0,-1./CB)
 88 |     colorwheel[col:col+CB,2] = 1
 89 |     col += CB
 90 | 
 91 |     # BM
 92 |     colorwheel[col:col+BM,2] = 1
 93 |     colorwheel[col:col+BM,0] = np.arange(0,1,1./BM)
 94 |     col += BM
 95 | 
 96 |     # MR
 97 |     colorwheel[col:col+MR,2] = np.arange(1,0,-1./MR)
 98 |     colorwheel[col:col+MR,0] = 1
 99 | 
100 |     return colorwheel
101 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/combine.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | """Combine testing results of the three models to get final accuracy."""
 9 | 
10 | import argparse
11 | import numpy as np
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(description="combine predictions")
15 |     parser.add_argument('--iframe', type=str, required=True,
16 |                         help='iframe score file.')
17 |     parser.add_argument('--mv', type=str, required=True,
18 |                         help='motion vector score file.')
19 |     parser.add_argument('--res', type=str, required=True,
20 |                         help='residual score file.')
21 |     parser.add_argument('--flow', type=str, default=None,
22 |                         help='residual score file.')
23 | 
24 |     parser.add_argument('--wi', type=float, default=2.0,
25 |                         help='iframe weight.')
26 |     parser.add_argument('--wm', type=float, default=1.0,
27 |                         help='motion vector weight.')
28 |     parser.add_argument('--wr', type=float, default=1.0,
29 |                         help='residual weight.')
30 |     parser.add_argument('--wf', type=float, default=1.0,
31 |                         help='flow weight.')
32 | 
33 |     args = parser.parse_args()
34 | 
35 |     with np.load(args.iframe) as iframe:
36 |         with np.load(args.mv) as mv:
37 |             with np.load(args.res) as residual:
38 |                 n = len(mv['names'])
39 | 
40 |                 i_score = np.array([score[0][0] for score in iframe['scores']])
41 |                 mv_score = np.array([score[0][0] for score in mv['scores']])
42 |                 res_score = np.array([score[0][0] for score in residual['scores']])
43 | 
44 |                 i_label = np.array([score[1] for score in iframe['scores']])
45 |                 mv_label = np.array([score[1] for score in mv['scores']])
46 |                 res_label = np.array([score[1] for score in residual['scores']])
47 |                 assert np.alltrue(i_label == mv_label) and np.alltrue(i_label == res_label)
48 | 
49 |                 combined_score = i_score * args.wi + mv_score * args.wm + res_score * args.wr
50 | 
51 |                 if args.flow is not None:
52 |                     flow = np.load(args.flow)
53 |                     flow_score = np.array([score[0][0] for score in flow['scores']])
54 |                     combined_score += args.wf * flow_score
55 | 
56 |                 accuracy = float(sum(np.argmax(combined_score, axis=1) == i_label)) / n
57 |                 print('Accuracy: %f (%d).' % (accuracy, n))
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/data_loader/install.sh:
--------------------------------------------------------------------------------
1 | rm -rf build
2 | python setup.py build_ext
3 | python setup.py install --user
4 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/data_loader/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | import numpy as np
 3 | 
 4 | coviar_utils_module = Extension('coviar',
 5 | 		sources = ['coviar_data_loader.c'],
 6 | 		include_dirs=[np.get_include(), '/mnt/homedir/zshou/code/FFmpeg/include/'],
 7 | 		extra_compile_args=['-DNDEBUG', '-O3', '-std=c99'],
 8 | 		extra_link_args=['-lavutil', '-lavcodec', '-lavformat', '-lswscale', '-L/mnt/homedir/zshou/code/FFmpeg/lib/']
 9 | )
10 | 
11 | setup ( name = 'coviar',
12 | 	version = '0.1',
13 | 	description = 'Utils for coviar training.',
14 | 	ext_modules = [ coviar_utils_module ]
15 | )
16 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/exp:
--------------------------------------------------------------------------------
1 | /private/home/zshou/exp/coviar_exp/


--------------------------------------------------------------------------------
/code/dmcnet_GAN/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | """Run testing given a trained model."""
  9 | 
 10 | import argparse
 11 | import time
 12 | import os
 13 | 
 14 | from dataset import CoviarDataSet
 15 | from model import Model
 16 | from transforms import GroupCenterCrop
 17 | from transforms import GroupOverSample
 18 | from transforms import GroupScale
 19 | 
 20 | import numpy as np
 21 | import torch.nn.parallel
 22 | import torch.optim
 23 | import torchvision
 24 | 
 25 | 
 26 | parser = argparse.ArgumentParser(
 27 |     description="Standard video-level testing")
 28 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400'])
 29 | parser.add_argument('--representation', type=str, choices=['iframe', 'residual', 'mv', 'flow'])
 30 | parser.add_argument('--no-accumulation', action='store_true',
 31 |                     help='disable accumulation of motion vectors and residuals.')
 32 | parser.add_argument('--new_length', type=int, default=1,
 33 |                     help='number of MV/OF stacked to be processed together.')
 34 | parser.add_argument('--use_databn', type=int, default=1,
 35 |                     help='add databn for mv, residual, flow or not.')
 36 | parser.add_argument('--flow_ds_factor', type=int, default=0,
 37 |                     help='flow downsample factor.')
 38 | parser.add_argument('--upsample_interp', type=bool, default=False,
 39 |                     help='upsample via interpolation or not.')
 40 | parser.add_argument('--data-root', type=str)
 41 | parser.add_argument('--flow-root', type=str, help='directory of storing pre-extracted optical flow images')
 42 | parser.add_argument('--data-flow', type=str, default='tvl1')
 43 | parser.add_argument('--test-list', type=str)
 44 | parser.add_argument('--weights', type=str)
 45 | parser.add_argument('--batch-size', default=1, type=int, help='batch size.')
 46 | parser.add_argument('--arch', type=str)
 47 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork", help='estimator architecture.')
 48 | parser.add_argument('--arch_d', type=str, default="Discriminator", help='discriminator architecture.')
 49 | parser.add_argument('--save-scores', type=str, default=None)
 50 | parser.add_argument('--test_segments', type=int, default=25)
 51 | parser.add_argument('--test-crops', type=int, default=10)
 52 | parser.add_argument('--input_size', type=int, default=224)
 53 | parser.add_argument('-j', '--workers', default=1, type=int, metavar='N',
 54 |                     help='number of workers for data loader.')
 55 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
 56 | parser.add_argument('--gop', type=int, default=12, help='size of GOP.')
 57 | parser.add_argument('--viz', type=bool, default=False, help='visualize or not.')
 58 | parser.add_argument('--gen_flow_or_delta', type=int, default=0, help='0: generate flow; 1: generate flow delta')
 59 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0, help='the downsample factor used in generating flow of small size')
 60 | parser.add_argument('--att', type=int, default=0, help='0: no attention; 1: pixel-level attention.')
 61 | parser.add_argument('--mv_minmaxnorm', type=int, default=1,
 62 |                     help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.')
 63 | 
 64 | args = parser.parse_args()
 65 | 
 66 | if args.data_name == 'ucf101':
 67 |     num_class = 101
 68 | elif args.data_name == 'hmdb51':
 69 |     num_class = 51
 70 | elif args.data_name == 'kinetics400':
 71 |     num_class = 400
 72 | else:
 73 |     raise ValueError('Unknown dataset '+args.data_name)
 74 | 
 75 | 
 76 | def main():
 77 |     # define the whole model network architecture
 78 |     net = Model(num_class, args.test_segments, args.representation,
 79 |                 base_model=args.arch,
 80 |                 new_length=args.new_length,
 81 |                 use_databn=args.use_databn,
 82 |                 gen_flow_or_delta=args.gen_flow_or_delta,
 83 |                 gen_flow_ds_factor=args.gen_flow_ds_factor,
 84 |                 arch_estimator=args.arch_estimator,
 85 |                 arch_d=args.arch_d,
 86 |                 att=args.att)
 87 | 
 88 |     # load the trained model
 89 |     checkpoint = torch.load(args.weights, map_location=lambda storage, loc: storage)
 90 |     print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
 91 | 
 92 |     base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
 93 |     net.load_state_dict(base_dict, strict=False)
 94 | 
 95 |     # setup the data loader
 96 |     if args.test_crops == 1:
 97 |         cropping = torchvision.transforms.Compose([
 98 |             GroupScale(net.scale_size),
 99 |             GroupCenterCrop(net.crop_size),
100 |         ])
101 |     elif args.test_crops == 10:
102 |         cropping = torchvision.transforms.Compose([
103 |             GroupOverSample(net.crop_size, net.scale_size)
104 |         ])
105 |     else:
106 |         raise ValueError("Only 1 and 10 crops are supported, but got {}.".format(args.test_crops))
107 | 
108 |     data_loader = torch.utils.data.DataLoader(
109 |         CoviarDataSet(
110 |             args.data_root,
111 |             args.flow_root,
112 |             args.data_name,
113 |             video_list=args.test_list,
114 |             num_segments=args.test_segments,
115 |             representation=args.representation,
116 |             new_length=args.new_length,
117 |             flow_ds_factor=args.flow_ds_factor,
118 |             upsample_interp=args.upsample_interp,
119 |             transform=cropping,
120 |             is_train=False,
121 |             accumulate=(not args.no_accumulation),
122 |             gop=args.gop,
123 |             flow_folder=args.data_flow,
124 |             mv_minmaxnorm=args.mv_minmaxnorm,
125 |             viz=args.viz
126 |             ),
127 |         batch_size=1, shuffle=False,
128 |         num_workers=args.workers * 2, pin_memory=True)
129 | 
130 |     if args.gpus is not None:
131 |         devices = [args.gpus[i] for i in range(args.workers)]
132 |     else:
133 |         devices = list(range(args.workers))
134 | 
135 |     net.cuda(devices[0])
136 |     #net.base_model.cuda(devices[-1])
137 |     net = torch.nn.DataParallel(net, device_ids=devices)
138 | 
139 |     # switch to inference model and start to iterate over the test set
140 |     net.eval()
141 | 
142 |     total_num = len(data_loader.dataset)
143 |     output = []
144 | 
145 |     # process each video to obtain its predictions
146 |     def forward_video(input_mv, input_residual, att=0):
147 |         input_mv_var = torch.autograd.Variable(input_mv, volatile=True)
148 |         input_residual_var = torch.autograd.Variable(input_residual, volatile=True)
149 |         if att == 0:
150 |             scores, validity, gen_flow = net(input_mv_var, input_residual_var)
151 |         if att == 1:
152 |             scores, validity, gen_flow, att_flow = net(input_mv_var, input_residual_var)
153 |         scores = scores.view((-1, args.test_segments * args.test_crops) + scores.size()[1:])
154 |         scores = torch.mean(scores, dim=1)
155 |         if att == 0:
156 |             return scores.data.cpu().numpy().copy(), validity.data.cpu().numpy().copy(), gen_flow
157 |         if att == 1:
158 |             return scores.data.cpu().numpy().copy(), validity.data.cpu().numpy().copy(), gen_flow, att_flow
159 | 
160 |     proc_start_time = time.time()
161 | 
162 |     # iterate over the whole test set
163 |     for i, (input_flow, input_mv, input_residual, label) in enumerate(data_loader):
164 |         input_mv = input_mv.cuda(args.gpus[-1], async=True)
165 |         input_residual = input_residual.cuda(args.gpus[0], async=True)
166 |         input_flow = input_flow.cuda(args.gpus[-1], async=True)
167 | 
168 |         # print("input_flow shape:")
169 |         # print(input_flow.shape) # torch.Size([batch_size, num_crops*num_segments, 2, 224, 224])
170 |         # print("input_flow type:")  # print(input_flow.type())  # torch.cuda.FloatTensor
171 |         if args.att == 0:
172 |             video_scores, validity, gen_flow = forward_video(input_mv, input_residual)
173 |         if args.att == 1:
174 |             video_scores, validity, gen_flow, att_flow = forward_video(input_mv, input_residual, args.att)
175 |         output.append((video_scores, label[0], validity))
176 |         cnt_time = time.time() - proc_start_time
177 |         if (i + 1) % 100 == 0:
178 |             print('video {} done, total {}/{}, average {} sec/video'.format(i, i+1,
179 |                                                                             total_num,
180 |                                                                             float(cnt_time) / (i+1)))
181 | 
182 |     video_pred = [np.argmax(x[0]) for x in output]
183 |     video_labels = [x[1] for x in output]
184 |     video_validity = [np.argmax(x[2]) for x in output]
185 | 
186 |     print('Accuracy cls {:.02f}% ({})'.format(
187 |         float(np.sum(np.array(video_pred) == np.array(video_labels))) / len(video_pred) * 100.0,
188 |         len(video_pred)))
189 | 
190 |     print('Accuracy adv G {:.02f}% ({})'.format(
191 |         float(np.sum(np.array(video_validity))) / len(video_validity) * 100.0,
192 |         len(video_validity)))
193 | 
194 |     if args.save_scores is not None:
195 | 
196 |         name_list = [x.strip().split()[0] for x in open(args.test_list)]
197 |         order_dict = {e:i for i, e in enumerate(sorted(name_list))}
198 | 
199 |         reorder_output = [None] * len(output)
200 |         reorder_label = [None] * len(output)
201 |         reorder_name = [None] * len(output)
202 | 
203 |         for i in range(len(output)):
204 |             idx = order_dict[name_list[i]]
205 |             reorder_output[idx] = output[i]
206 |             reorder_label[idx] = video_labels[i]
207 |             reorder_name[idx] = name_list[i]
208 | 
209 |         np.savez(args.save_scores, scores=reorder_output, labels=reorder_label, names=reorder_name)
210 | 
211 | 
212 | if __name__ == '__main__':
213 |     main()
214 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/train_options.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | """Training options."""
 9 | 
10 | import argparse
11 | 
12 | parser = argparse.ArgumentParser(description="CoViAR")
13 | 
14 | # Data.
15 | parser.add_argument('--data-name', type=str, choices=['ucf101', 'hmdb51', 'kinetics400'],
16 |                     help='dataset name.')
17 | parser.add_argument('--data-root', type=str,
18 |                     help='root of data directory.')
19 | parser.add_argument('--flow-root', type=str, 
20 |                     help='directory of storing pre-extracted optical flow images.')
21 | parser.add_argument('--data-flow', type=str, default='tvl1',
22 |                     help='root of data directory.')
23 | parser.add_argument('--train-list', type=str,
24 |                     help='training example list.')
25 | parser.add_argument('--test-list', type=str,
26 |                     help='testing example list.')
27 | parser.add_argument('--gop', type=int, default=12,
28 |                     help='size of GOP.')
29 | 
30 | # Model.
31 | parser.add_argument('--representation', type=str, choices=['iframe', 'mv', 'residual', 'flow'],
32 |                     help='data representation.')
33 | parser.add_argument('--arch', type=str, default="resnet152",
34 |                     help='base architecture.')
35 | parser.add_argument('--arch_estimator', type=str, default="ContextNetwork",
36 |                     help='estimator architecture.')
37 | parser.add_argument('--arch_d', type=str, default="Discriminator",
38 |                     help='discriminator architecture.')
39 | parser.add_argument('--num_segments', type=int, default=3,
40 |                     help='number of TSN segments.')
41 | parser.add_argument('--no-accumulation', action='store_true',
42 |                     help='disable accumulation of motion vectors and residuals.')
43 | parser.add_argument('--new_length', type=int, default=1,
44 |                     help='number of MV/OF stacked to be processed together.')
45 | parser.add_argument('--flow_ds_factor', type=int, default=0,
46 |                     help='flow downsample factor.')
47 | parser.add_argument('--gen_flow_ds_factor', type=int, default=0,
48 |                     help='the downsample factor used in generating flow of small size')
49 | parser.add_argument('--upsample_interp', type=bool, default=False,
50 |                     help='upsample via interpolation or not.')
51 | parser.add_argument('--use_databn', type=int, default=1,
52 |                     help='add data batchnorm for mv, residual, flow or not. 1: yes; 0: no.')
53 | parser.add_argument('--gen_flow_or_delta', type=int, default=0,
54 |                     help='0: generate flow; 1: generate flow delta.')
55 | parser.add_argument('--att', type=int, default=0,
56 |                     help='0: no attention; 1: pixel-level attention.')
57 | parser.add_argument('--mv_minmaxnorm', type=int, default=1,
58 |                     help='use min max normalization for mv value to map from 128+-20 to 128+-127 something.')
59 | 
60 | # Training.
61 | parser.add_argument('--weights', default=None, type=str)
62 | parser.add_argument('--resume', default=None, type=str)
63 | parser.add_argument('--epochs', default=500, type=int,
64 |                     help='number of training epochs.')
65 | parser.add_argument('--epoch-thre', default=500, type=int,
66 |                     help='number of training epochs.')
67 | parser.add_argument('--batch-size', default=40, type=int,
68 |                     help='batch size.')
69 | parser.add_argument('--lr', default=0.001, type=float,
70 |                     help='base learning rate.')
71 | parser.add_argument('--lr-cls', default=1, type=float,
72 |                     help='cls loss weight.')
73 | parser.add_argument('--loss-mse', default='MSELoss', type=str)
74 | parser.add_argument('--lr-adv-g', default=1, type=float,
75 |                     help='adv loss weight during training G.')
76 | parser.add_argument('--lr-adv-d', default=1, type=float,
77 |                     help='adv loss weight during training D.')
78 | parser.add_argument('--lr-mse', default=0.1, type=float,
79 |                     help='mse loss weight.')
80 | parser.add_argument('--lr-steps', default=[200, 300, 400], type=float, nargs="+",
81 |                     help='epochs to decay learning rate.')
82 | parser.add_argument('--lr-decay', default=0.1, type=float,
83 |                     help='lr decay factor.')
84 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
85 |                     help='weight decay.')
86 | parser.add_argument('--lr_cls_mult', default=0.01, type=float, help='cls learning multiplier.')
87 | parser.add_argument('--lr_mse_mult', default=0.01, type=float, help='mse learning multiplier.')
88 | parser.add_argument('--lr_d_mult', default=0.01, type=float, help='discriminator learning multiplier.')
89 | 
90 | # Log.
91 | parser.add_argument('--eval-freq', default=5, type=int,
92 |                     help='evaluation frequency (epochs).')
93 | parser.add_argument('--workers', default=8, type=int,
94 |                     help='number of data loader workers.')
95 | parser.add_argument('--model-prefix', type=str, default="model",
96 |                     help="prefix of model name.")
97 | parser.add_argument('--gpus', nargs='+', type=int, default=None,
98 |                     help='gpu ids.')
99 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/transforms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | """Functions for data augmentation and related preprocessing."""
  9 | 
 10 | import random
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | 
 15 | def color_aug(img, random_h=36, random_l=50, random_s=50):
 16 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2HLS).astype(float)
 17 | 
 18 |     h = (random.random() * 2 - 1.0) * random_h
 19 |     l = (random.random() * 2 - 1.0) * random_l
 20 |     s = (random.random() * 2 - 1.0) * random_s
 21 | 
 22 |     img[..., 0] += h
 23 |     img[..., 0] = np.minimum(img[..., 0], 180)
 24 | 
 25 |     img[..., 1] += l
 26 |     img[..., 1] = np.minimum(img[..., 1], 255)
 27 | 
 28 |     img[..., 2] += s
 29 |     img[..., 2] = np.minimum(img[..., 2], 255)
 30 | 
 31 |     img = np.maximum(img, 0)
 32 |     img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_HLS2BGR)
 33 |     return img
 34 | 
 35 | 
 36 | class GroupCenterCrop(object):
 37 |     def __init__(self, size):
 38 |         self._size = size
 39 | 
 40 |     def __call__(self, img_group):
 41 |         h, w, _ = img_group[0].shape
 42 |         hs = (h - self._size) // 2
 43 |         ws = (w - self._size) // 2
 44 |         return [img[hs:hs+self._size, ws:ws+self._size] for img in img_group]
 45 | 
 46 | 
 47 | class GroupRandomHorizontalFlip(object):
 48 |     def __call__(self, img_group, is_mv_or_flow=False):
 49 |         if random.random() < 0.5:
 50 |             ret = [img[:, ::-1, :].astype(np.int32) for img in img_group]   # residual and then flow and MV
 51 |             for i in range(len(ret)):
 52 |                 ret[i][:, :, :4] -= 128
 53 |                 ret[i][..., 0] *= (-1)
 54 |                 ret[i][..., 2] *= (-1)
 55 |                 ret[i][:, :, :4] += 128
 56 |             return ret
 57 |         else:
 58 |             return img_group
 59 | 
 60 | class GroupScale(object):
 61 |     def __init__(self, size):
 62 |         self._size = (size, size)
 63 | 
 64 |     def __call__(self, img_group):
 65 |         # resize flow then MV then residual
 66 |         ret_img_group = []
 67 |         for idx in range(0, len(img_group)):
 68 |             #print('Input frames shape %s:' % str(img_group[idx].shape))
 69 |             #print('Input frames after transform mv flow shape %s:' % str(resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR).shape))
 70 |             #print('Input frames after transform residual shape %s:' % str(cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR).shape))
 71 |             ret_img_group.append(
 72 |                 np.concatenate((resize_mv(img_group[idx][:, :, :4], self._size, cv2.INTER_LINEAR),
 73 |                                 cv2.resize(img_group[idx][:, :, 4:], self._size, cv2.INTER_LINEAR)), axis=2))
 74 | 
 75 |         return ret_img_group
 76 | 
 77 | class GroupOverSample(object):
 78 |     def __init__(self, crop_size, scale_size=None):
 79 |         self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size)
 80 | 
 81 |         if scale_size is not None:
 82 |             self.scale_worker = GroupScale(scale_size)
 83 |         else:
 84 |             self.scale_worker = None
 85 | 
 86 |     def __call__(self, img_group):
 87 | 
 88 |         if self.scale_worker is not None:
 89 |             img_group = self.scale_worker(img_group)
 90 | 
 91 |         image_w, image_h, _ = img_group[0].shape
 92 |         crop_w, crop_h = self.crop_size
 93 | 
 94 |         offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h)
 95 |         oversample_group = list()
 96 | 
 97 |         for o_w, o_h in offsets:
 98 |             for img in img_group:
 99 | 
100 |                 crop = img[o_w:o_w+crop_w, o_h:o_h+crop_h]
101 |                 oversample_group.append(crop)
102 | 
103 |                 flip_crop = crop[:, ::-1, :].astype(np.int32)   # residual
104 |                 flip_crop[:, :, :4] -= 128  # flow and MV
105 |                 flip_crop[..., 0] *= (-1)
106 |                 flip_crop[..., 2] *= (-1)
107 |                 flip_crop[:, :, :4] += 128
108 |                 oversample_group.append(flip_crop)
109 | 
110 |         return oversample_group
111 | 
112 | def resize_mv(img, shape, interpolation):
113 |     return np.stack([cv2.resize(img[..., i], shape, interpolation)
114 |                      for i in range(img.shape[-1])], axis=2)
115 | 
116 | 
117 | class GroupMultiScaleCrop(object):
118 |     def __init__(self, input_size, scales=None, max_distort=1, fix_crop=False, more_fix_crop=True):
119 |         self.scales = scales if scales is not None else [1, .875, .75, .66]
120 |         self.max_distort = max_distort
121 |         self.fix_crop = fix_crop
122 |         self.more_fix_crop = more_fix_crop
123 |         self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size]   # 224x224
124 | 
125 |     def __call__(self, img_group):
126 | 
127 |         im_size = img_group[0].shape
128 |         crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
129 |         crop_img_group = [img[offset_w:offset_w + crop_w, offset_h:offset_h + crop_h] for img in img_group]
130 | 
131 |         # resize flow -> MV -> residual
132 |         ret_img_group = []
133 |         for idx in range(0, len(img_group)):
134 |             ret_img_group.append(
135 |                 np.concatenate(
136 |                             (resize_mv(crop_img_group[idx][:, :, :4], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR),
137 |                                 cv2.resize(crop_img_group[idx][:, :, 4:], (self.input_size[0], self.input_size[1]), cv2.INTER_LINEAR)), axis=2))
138 | 
139 |         return ret_img_group
140 | 
141 |     def _sample_crop_size(self, im_size):
142 |         image_w, image_h = im_size[0], im_size[1]
143 | 
144 |         base_size = min(image_w, image_h)
145 |         crop_sizes = [int(base_size * x) for x in self.scales]
146 |         crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes]
147 |         crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes]
148 | 
149 |         pairs = []
150 |         for i, h in enumerate(crop_h):
151 |             for j, w in enumerate(crop_w):
152 |                 if abs(i - j) <= self.max_distort:
153 |                     pairs.append((w, h))
154 | 
155 |         crop_pair = random.choice(pairs)
156 |         if not self.fix_crop:
157 |             w_offset = random.randint(0, image_w - crop_pair[0])
158 |             h_offset = random.randint(0, image_h - crop_pair[1])
159 |         else:
160 |             w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1])
161 | 
162 |         return crop_pair[0], crop_pair[1], w_offset, h_offset
163 | 
164 |     def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
165 |         offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h)
166 |         return random.choice(offsets)
167 | 
168 |     @staticmethod
169 |     def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
170 |         w_step = (image_w - crop_w) // 4
171 |         h_step = (image_h - crop_h) // 4
172 | 
173 |         ret = list()
174 |         ret.append((0, 0))  # upper left
175 |         ret.append((4 * w_step, 0))  # upper right
176 |         ret.append((0, 4 * h_step))  # lower left
177 |         ret.append((4 * w_step, 4 * h_step))  # lower right
178 |         ret.append((2 * w_step, 2 * h_step))  # center
179 | 
180 |         if more_fix_crop:
181 |             ret.append((0, 2 * h_step))  # center left
182 |             ret.append((4 * w_step, 2 * h_step))  # center right
183 |             ret.append((2 * w_step, 4 * h_step))  # lower center
184 |             ret.append((2 * w_step, 0 * h_step))  # upper center
185 | 
186 |             ret.append((1 * w_step, 1 * h_step))  # upper left quarter
187 |             ret.append((3 * w_step, 1 * h_step))  # upper right quarter
188 |             ret.append((1 * w_step, 3 * h_step))  # lower left quarter
189 |             ret.append((3 * w_step, 3 * h_step))  # lower righ quarter
190 | 
191 |         return ret
192 | 


--------------------------------------------------------------------------------
/code/dmcnet_GAN/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | def viz_flow(u,v,logscale=True,scaledown=6,output=False):
 11 |     """
 12 |     topleft is zero, u is horiz/x/width, v is vertical/y/height
 13 |     red is 3 o'clock, yellow is 6, light blue is 9, blue/purple is 12
 14 |     """
 15 |     colorwheel = makecolorwheel()
 16 |     ncols = colorwheel.shape[0]
 17 | 
 18 |     radius = np.sqrt(u**2 + v**2)
 19 |     if output:
 20 |         print("Maximum flow magnitude: %04f" % np.max(radius))
 21 |     if logscale:
 22 |         radius = np.log(radius + 1)
 23 |         if output:
 24 |             print("Maximum flow magnitude (after log): %0.4f" % np.max(radius))
 25 |     radius = radius / scaledown
 26 |     if output:
 27 |         print("Maximum flow magnitude (after scaledown): %0.4f" % np.max(radius))
 28 |     rot = np.arctan2(-v, -u) / np.pi
 29 | 
 30 |     fk = (rot+1)/2 * (ncols-1)  # -1~1 maped to 0~ncols
 31 |     k0 = fk.astype(np.uint8)       # 0, 1, 2, ..., ncols
 32 | 
 33 |     k1 = k0+1
 34 |     k1[k1 == ncols] = 0
 35 | 
 36 |     f = fk - k0
 37 | 
 38 |     ncolors = colorwheel.shape[1]
 39 |     img = np.zeros(u.shape+(ncolors,))
 40 |     for i in range(ncolors):
 41 |         tmp = colorwheel[:,i]
 42 |         col0 = tmp[k0]
 43 |         col1 = tmp[k1]
 44 |         col = (1-f)*col0 + f*col1
 45 | 
 46 |         idx = radius <= 1
 47 |         # increase saturation with radius
 48 |         col[idx] = 1 - radius[idx]*(1-col[idx])
 49 |         # out of range
 50 |         col[~idx] *= 0.75
 51 |         img[:,:,i] = np.floor(255*col).astype(np.uint8)
 52 | 
 53 |     return img.astype(np.uint8)
 54 | 
 55 | 
 56 | 
 57 | def makecolorwheel():
 58 |     # Create a colorwheel for visualization
 59 |     RY = 15
 60 |     YG = 6
 61 |     GC = 4
 62 |     CB = 11
 63 |     BM = 13
 64 |     MR = 6
 65 | 
 66 |     ncols = RY + YG + GC + CB + BM + MR
 67 | 
 68 |     colorwheel = np.zeros((ncols,3))
 69 | 
 70 |     col = 0
 71 |     # RY
 72 |     colorwheel[0:RY,0] = 1
 73 |     colorwheel[0:RY,1] = np.arange(0,1,1./RY)
 74 |     col += RY
 75 | 
 76 |     # YG
 77 |     colorwheel[col:col+YG,0] = np.arange(1,0,-1./YG)
 78 |     colorwheel[col:col+YG,1] = 1
 79 |     col += YG
 80 | 
 81 |     # GC
 82 |     colorwheel[col:col+GC,1] = 1
 83 |     colorwheel[col:col+GC,2] = np.arange(0,1,1./GC)
 84 |     col += GC
 85 | 
 86 |     # CB
 87 |     colorwheel[col:col+CB,1] = np.arange(1,0,-1./CB)
 88 |     colorwheel[col:col+CB,2] = 1
 89 |     col += CB
 90 | 
 91 |     # BM
 92 |     colorwheel[col:col+BM,2] = 1
 93 |     colorwheel[col:col+BM,0] = np.arange(0,1,1./BM)
 94 |     col += BM
 95 | 
 96 |     # MR
 97 |     colorwheel[col:col+MR,2] = np.arange(1,0,-1./MR)
 98 |     colorwheel[col:col+MR,0] = 1
 99 | 
100 |     return colorwheel
101 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/README.MD:
--------------------------------------------------------------------------------
 1 | # DMC-Net + I3D
 2 | 
 3 | 
 4 | This directory contains the code used for the experiment of combining our DMC-Net and I3D.
 5 | 
 6 | 
 7 | ## Dependencies
 8 | Our code is built on the the following (but not limited to) packages:
 9 | 1. PyTorch 0.4.0
10 | 2. Python 3.6, numpy
11 | 3. [coviar](https://github.com/chaoyuaw/pytorch-coviar/blob/master/GETTING_STARTED.md)
12 | 
13 | ## Data
14 | The experiments are done based on mpeg4 videos of [HMDB51](http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/) and [UCF101](https://www.crcv.ucf.edu/data/UCF101.php). 
15 | 
16 | Path to videos or stored MV and R is supposed to be manually set in data/video_iterator.py.
17 | 
18 | ## Training
19 | First please go to [here](https://github.com/hassony2/kinetics_i3d_pytorch) to download the pretrained flow model of I3D.
20 | We have two separate files for HMDB51 and UCF101. We first train the generator without updating the weights of classifier (I3D) using Reconstruction loss and advasarial loss and classification loss. We train it in this way for epoch thre epochs. Then we joint update generator and classifier. Discriminator is updated in both stages.
21 | The details of how to use them are shown in the following sample training script.
22 | - Sample script for training on HMDB51 split 1
23 |   ```
24 |   bash train.sh
25 |   ```
26 | ## Testing
27 | Open the directory test. We have two separate files for HMDB51 and UCF101. The details of how to use them are shown in the following sample testing code. Please put the model you want to evaluate in ./exps/models/.
28 | - Sample script for testing on HMDB51 split 1
29 |   ```
30 |   bash test.sh
31 |   ```
32 | We provide models that produce the results reported in our paper [here]().
33 | 
34 | ## Acknowledgment
35 | Our training and testing code is mainly built on [MF-Net](https://github.com/cypw/PyTorch-MFNet). Our I3D model is borrowed from  this pytorch [implementation](https://github.com/hassony2/kinetics_i3d_pytorch). Our dataloader also borrows code from [CoViAR](https://github.com/chaoyuaw/pytorch-coviar). Thanks a lot!
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__init__.py


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/image_transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/image_transforms.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/iterator_factory.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/iterator_factory.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/video_iterator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_iterator.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/video_sampler.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_sampler.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/__pycache__/video_transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/data/__pycache__/video_transforms.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/image_iterator.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | This source code is licensed under the MIT license found in the
 4 | LICENSE file in the root directory of this source tree.
 5 | """
 6 | 
 7 | import os
 8 | import cv2
 9 | import numpy as np
10 | 
11 | import torch.utils.data as data
12 | import logging
13 | 
14 | 
15 | class ImageListIter(data.Dataset):
16 | 
17 |     def __init__(self, 
18 |                  image_prefix,
19 |                  txt_list,
20 |                  image_transform,
21 |                  name="",
22 |                  force_color=True):
23 |         super(ImageListIter, self).__init__()
24 | 
25 |         # load image list
26 |         self.image_list = self._get_video_list(txt_list=txt_list)
27 | 
28 |         # load params
29 |         self.force_color = force_color
30 |         self.image_prefix = image_prefix
31 |         self.image_transform = image_transform
32 |         logging.info("ImageListIter ({:s}) initialized, num: {:d})".format(name,
33 |                       len(self.image_list)))
34 | 
35 |     def get_image(self, index):
36 |         # get current video info
37 |         im_id, label, img_subpath = self.image_list[index]
38 | 
39 |         # load image
40 |         image_path = os.path.join(self.image_prefix, img_subpath)
41 |         if self.force_color:
42 |             cv_read_flag = cv2.IMREAD_COLOR
43 |         else:
44 |             cv_read_flag = cv2.IMREAD_GRAYSCALE
45 |         cv_img = cv2.imread(image_path, cv_read_flag)
46 |         image_input = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
47 | 
48 |         # apply image augmentation
49 |         if self.image_transform is not None:
50 |             image_input = self.image_transform(image_input)
51 |         return image_input, label, img_subpath
52 | 
53 | 
54 |     def __getitem__(self, index):
55 |         image_input, label, img_subpath = self.get_image(index)
56 |         return image_input, label
57 | 
58 | 
59 |     def __len__(self):
60 |         return len(self.image_list)
61 | 
62 | 
63 |     def _get_video_list(self, txt_list):
64 |         # formate:
65 |         # [im_id, label, image_subpath]
66 |         assert os.path.exists(txt_list), "Failed to locate: {}".format(txt_list)
67 | 
68 |         # building dataset
69 |         logging.info("Building dataset ...")
70 |         image_list = []
71 |         with open(txt_list) as f:
72 |             lines = f.read().splitlines()
73 |             logging.info("Found {} images in '{}'".format(len(lines), txt_list))
74 |             for i, line in enumerate(lines):
75 |                 im_id, label, image_subpath = line.split()
76 |                 info = [int(im_id), int(label), image_subpath]
77 |                 image_list.append(info)
78 | 
79 |         return image_list


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/image_transforms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | This source code is licensed under the MIT license found in the
  4 | LICENSE file in the root directory of this source tree.
  5 | """
  6 | 
  7 | # most of the code are from:
  8 | # https://github.com/bryanyzhu/two-stream-pytorch/blob/master/video_transforms.py
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | 
 14 | class Compose(object):
 15 |     """Composes several video_transforms together.
 16 | 
 17 |     Args:
 18 |         transforms (List[Transform]): list of transforms to compose.
 19 | 
 20 |     Example:
 21 |         >>> video_transforms.Compose([
 22 |         >>>     video_transforms.CenterCrop(10),
 23 |         >>>     video_transforms.ToTensor(),
 24 |         >>> ])
 25 |     """
 26 |     def __init__(self, transforms, aug_seed=0):
 27 |         self.transforms = transforms
 28 |         for i, t in enumerate(self.transforms):
 29 |             t.set_random_state(seed=(aug_seed+i))
 30 |             #print(aug_seed+i)
 31 | 
 32 |     def __call__(self, data):
 33 |         for t in self.transforms:
 34 |             data = t(data)
 35 |         return data
 36 | 
 37 | class Transform(object):
 38 |     """basse class for all transformation"""
 39 |     def set_random_state(self, seed=None):
 40 |         self.rng = np.random.RandomState(seed)
 41 | 
 42 | 
 43 | ####################################
 44 | # Customized Transformations
 45 | ####################################
 46 | 
 47 | class Normalize(Transform):
 48 |     """Given mean: (R, G, B) and std: (R, G, B),
 49 |     will normalize each channel of the torch.*Tensor, i.e.
 50 |     channel = (channel - mean) / std
 51 |     """
 52 |     def __init__(self, mean, std):
 53 |         self.mean = mean
 54 |         self.std = std
 55 | 
 56 |     def __call__(self, tensor):
 57 |         for t, m, s in zip(tensor, self.mean, self.std):
 58 |             t.sub_(m).div_(s)
 59 |         return tensor
 60 | 
 61 | 
 62 | class Resize(Transform):
 63 |     """ Rescales the input numpy array to the given 'size'.
 64 |     'size' will be the size of the smaller edge.
 65 |     For example, if height > width, then image will be
 66 |     rescaled to (size * height / width, size)
 67 |     size: size of the smaller edge
 68 |     interpolation: Default: cv2.INTER_LINEAR
 69 |     """
 70 |     def __init__(self, size, interpolation=cv2.INTER_LINEAR):
 71 |         self.size = size # [w, h]
 72 |         self.interpolation = interpolation
 73 | 
 74 |     def __call__(self, data):
 75 |         h, w, c = data.shape
 76 | 
 77 |         if isinstance(self.size, int):
 78 |             slen = self.size
 79 |             if min(w, h) == slen:
 80 |                 return data
 81 |             if w < h:
 82 |                 new_w = self.size
 83 |                 new_h = int(self.size * h / w)
 84 |             else:
 85 |                 new_w = int(self.size * w / h)
 86 |                 new_h = self.size
 87 |         else:
 88 |             new_w = self.size[0]
 89 |             new_h = self.size[1]
 90 | 
 91 |         if (h != new_h) or (w != new_w):
 92 |             scaled_data = cv2.resize(data, (new_w, new_h), self.interpolation)
 93 |         else:
 94 |             scaled_data = data
 95 | 
 96 |         return scaled_data
 97 | 
 98 | 
 99 | class RandomScale(Transform):
100 |     """ Rescales the input numpy array to the given 'size'.
101 |     'size' will be the size of the smaller edge.
102 |     For example, if height > width, then image will be
103 |     rescaled to (size * height / width, size)
104 |     size: size of the smaller edge
105 |     interpolation: Default: cv2.INTER_LINEAR
106 |     """
107 |     def __init__(self, make_square=False,
108 |                        aspect_ratio=[1.0, 1.0],
109 |                        slen=[224, 288],
110 |                        interpolation=cv2.INTER_LINEAR):
111 |         assert slen[1] >= slen[0], \
112 |                 "slen ({}) should be in increase order".format(scale)
113 |         assert aspect_ratio[1] >= aspect_ratio[0], \
114 |                 "aspect_ratio ({}) should be in increase order".format(aspect_ratio)
115 |         self.slen = slen # [min factor, max factor]
116 |         self.aspect_ratio = aspect_ratio
117 |         self.make_square = make_square
118 |         self.interpolation = interpolation
119 |         self.rng = np.random.RandomState(0)
120 | 
121 |     def __call__(self, data):
122 |         h, w, c = data.shape
123 |         new_w = w
124 |         new_h = h if not self.make_square else w
125 |         if self.aspect_ratio:
126 |             random_aspect_ratio = self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1])
127 |             if self.rng.rand() > 0.5:
128 |                 random_aspect_ratio = 1.0 / random_aspect_ratio
129 |             new_w *= random_aspect_ratio
130 |             new_h /= random_aspect_ratio
131 |         resize_factor = self.rng.uniform(self.slen[0], self.slen[1]) / min(new_w, new_h)
132 |         new_w *= resize_factor
133 |         new_h *= resize_factor
134 |         scaled_data = cv2.resize(data, (int(new_w+1), int(new_h+1)), self.interpolation)
135 |         return scaled_data
136 | 
137 | 
138 | class CenterCrop(Transform):
139 |     """Crops the given numpy array at the center to have a region of
140 |     the given size. size can be a tuple (target_height, target_width)
141 |     or an integer, in which case the target will be of a square shape (size, size)
142 |     """
143 |     def __init__(self, size):
144 |         if isinstance(size, int):
145 |             self.size = (size, size)
146 |         else:
147 |             self.size = size
148 | 
149 |     def __call__(self, data):
150 |         h, w, c = data.shape
151 |         th, tw = self.size
152 |         x1 = int(round((w - tw) / 2.))
153 |         y1 = int(round((h - th) / 2.))
154 |         cropped_data = data[y1:(y1+th), x1:(x1+tw), :]
155 |         return cropped_data
156 | 
157 | class RandomCrop(Transform):
158 |     """Crops the given numpy array at the random location to have a region of
159 |     the given size. size can be a tuple (target_height, target_width)
160 |     or an integer, in which case the target will be of a square shape (size, size)
161 |     """
162 |     def __init__(self, size):
163 |         if isinstance(size, int):
164 |             self.size = (size, size)
165 |         else:
166 |             self.size = size
167 |         self.rng = np.random.RandomState(0)
168 | 
169 |     def __call__(self, data):
170 |         h, w, c = data.shape
171 |         th, tw = self.size
172 |         x1 = self.rng.choice(range(w - tw))
173 |         y1 = self.rng.choice(range(h - th))
174 |         #print(x1,y1,data.shape)
175 |         cropped_data = data[y1:(y1+th), x1:(x1+tw), :]
176 |         return cropped_data
177 | 
178 | class RandomHorizontalFlip(Transform):
179 |     """Randomly horizontally flips the given numpy array with a probability of 0.5
180 |     """
181 |     def __init__(self, modality = 'rgb'):
182 |         self.modality = modality
183 |         self.rng = np.random.RandomState(0)
184 | 
185 |     def __call__(self, data):
186 |         if self.rng.rand() < 0.5:
187 |             data = np.fliplr(data)
188 |             data = np.ascontiguousarray(data).astype(np.int32)
189 |             if self.modality in ['mv', 'flow']:
190 |                 data[:, :, 0::2] += -128
191 |                 data[:, :, 0::2] *= (-1)
192 |                 data[:, :, 0::2] += 128
193 |             elif self.modality == 'flow+mp4':
194 |                 data[:, :, 0::7] += -128
195 |                 data[:, :, 2::7] += -128
196 |                 data[:, :, 0::7] *= (-1)
197 |                 data[:, :, 2::7] *= (-1)
198 |                 data[:, :, 0::7] += 128
199 |                 data[:, :, 2::7] += 128
200 |         return data.astype(np.uint8)
201 | 
202 | class RandomVerticalFlip(Transform):
203 |     """Randomly vertically flips the given numpy array with a probability of 0.5
204 |     """
205 |     def __init__(self):
206 |         self.rng = np.random.RandomState(0)
207 | 
208 |     def __call__(self, data):
209 |         if self.rng.rand() < 0.5:
210 |             data = np.flipud(data)
211 |             data = np.ascontiguousarray(data)
212 |         return data
213 | 
214 | class RandomRGB(Transform):
215 |     def __init__(self, vars=[10, 10, 10]):
216 |         self.vars = vars
217 |         self.rng = np.random.RandomState(0)
218 | 
219 |     def __call__(self, data):
220 |         h, w, c = data.shape
221 | 
222 |         random_vars = [int(round(self.rng.uniform(-x, x))) for x in self.vars]
223 | 
224 |         base = len(random_vars)
225 |         augmented_data = np.zeros(data.shape)
226 |         for ic in range(0, c):
227 |             var = random_vars[ic%base]
228 |             augmented_data[:,:,ic] = np.minimum(np.maximum(data[:,:,ic] + var, 0), 255)
229 |         return augmented_data
230 | 
231 | class RandomHLS(Transform):
232 |     def __init__(self, vars=[15, 35, 25]):
233 |         self.vars = vars
234 |         self.rng = np.random.RandomState(0)
235 | 
236 |     def __call__(self, data):
237 |         h, w, c = data.shape
238 |         
239 |         assert c%3 == 0, "input channel = %d, illegal"%c
240 |         random_vars = [int(round(self.rng.uniform(-x, x))) for x in self.vars]
241 | 
242 |         base = len(random_vars)
243 |         augmented_data = np.zeros(data.shape, )
244 |         for i_im in range(0, int(c/3)):
245 |             augmented_data[:,:,3*i_im:(3*i_im+3)] = \
246 |                     cv2.cvtColor(data[:,:,3*i_im:(3*i_im+3)], cv2.COLOR_RGB2HLS)
247 | 
248 |         hls_limits = [180, 255, 255]
249 |         for ic in range(0, c):
250 |             var = random_vars[ic%base]
251 |             limit = hls_limits[ic%base]
252 |             augmented_data[:,:,ic] = np.minimum(np.maximum(augmented_data[:,:,ic] + var, 0), limit)
253 | 
254 |         for i_im in range(0, int(c/3)):
255 |             augmented_data[:,:,3*i_im:(3*i_im+3)] = \
256 |                     cv2.cvtColor(augmented_data[:,:,3*i_im:(3*i_im+3)].astype(np.uint8), \
257 |                         cv2.COLOR_HLS2RGB)
258 | 
259 |         return augmented_data
260 | 
261 | 
262 | class ToTensor(Transform):
263 |     """Converts a numpy.ndarray (H x W x C) in the range
264 |     [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
265 |     """
266 |     def __init__(self, dim=3):
267 |         self.dim = dim
268 | 
269 |     def __call__(self, image):
270 |         if isinstance(image, np.ndarray):
271 |             # H, W, C = image.shape
272 |             # handle numpy array
273 |             image = torch.from_numpy(image.transpose((2, 0, 1)))
274 |             # backward compatibility
275 |             return image.float() / 255.0


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/iterator_factory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | This source code is licensed under the MIT license found in the
  4 | LICENSE file in the root directory of this source tree.
  5 | """
  6 | 
  7 | import os
  8 | import logging
  9 | 
 10 | import torch
 11 | 
 12 | from . import video_sampler as sampler
 13 | from . import video_transforms as transforms
 14 | from .video_iterator import VideoIter
 15 | 
 16 | def get_hmdb51(data_root='./dataset/HMDB51',
 17 |                clip_length=8,
 18 |                train_interval=2,
 19 |                val_interval=2,
 20 |                mean=[0.485, 0.456, 0.406],
 21 |                std=[0.229, 0.224, 0.225],
 22 |                seed=torch.distributed.get_rank() if torch.distributed._initialized else 0,
 23 |                modality = 'rgb',
 24 |                split = 1,
 25 |                net_name = 'I3D',
 26 |                accumulate = True,
 27 |                ds_factor = 16,
 28 |                mv_minmaxnorm = False,
 29 |                mv_loadimg = False,
 30 |                **kwargs):
 31 |     """ data iter for ucf-101
 32 |     """
 33 |     logging.debug("VideoIter:: clip_length = {}, interval = [train: {}, val: {}], seed = {}".format( \
 34 |                 clip_length, train_interval, val_interval, seed))
 35 | 
 36 |     normalize = transforms.Normalize(mean=mean, std=std)
 37 |     if modality in ['rgb', 'I']:
 38 |         tran = transforms.Compose([
 39 |                                          transforms.RandomScale(make_square=True,
 40 |                                                                 aspect_ratio=[0.8, 1./0.8],
 41 |                                                                 slen=[224, 288]),
 42 |                                          transforms.RandomCrop((224, 224)), # insert a resize if needed
 43 |                                          transforms.RandomHorizontalFlip(),
 44 |                                          transforms.RandomHLS(vars=[15, 35, 25]),
 45 |                                          transforms.ToTensor(modality),
 46 |                                          normalize,
 47 |                                       ],
 48 |                                       aug_seed=(seed+1))
 49 |     elif modality in ['flow', 'mv', 'res', 'flow+mp4']:
 50 |         tran = transforms.Compose([
 51 |                                          transforms.RandomCrop((224, 224)), # insert a resize if needed
 52 |                                          transforms.RandomHorizontalFlip(modality),
 53 |                                          transforms.ToTensor(modality, ds_factor),
 54 |                                          normalize,
 55 |                                       ],
 56 |                                       aug_seed=(seed+1))
 57 |     train_sampler = sampler.RandomSampling(num=clip_length,
 58 |                                            interval=train_interval,
 59 |                                            speed=[1.0, 1.0],
 60 |                                            seed=(seed+0))
 61 |     train = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly
 62 |                       txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_train.txt'.format(split)),
 63 |                       sampler=train_sampler,
 64 |                       force_color=True,
 65 |                       video_transform=tran,
 66 |                       name='train',
 67 |                       cached_info_path = os.path.join(data_root, 'raw',  'list_cvt', 'hmdb51_split{}_train_info.txt'.format(split)),
 68 |                       shuffle_list_seed=(seed+2),
 69 |                       check_video = True,
 70 |                       load_from_img = True,
 71 |                       modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg)
 72 |     if net_name == 'I3D':
 73 |         tran = transforms.Compose([
 74 |                                          transforms.CenterCrop((224, 224)),
 75 |                                          transforms.ToTensor(modality),
 76 |                                          normalize,
 77 |                                       ])
 78 |     else:
 79 |         raise ValueError('Unkown net_name')
 80 |     val_sampler   = sampler.SequentialSampling(num=clip_length,
 81 |                                                interval=val_interval,
 82 |                                                fix_cursor=True,
 83 |                                                shuffle=True)
 84 |     val   = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly
 85 |                       txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'hmdb51_split{}_test.txt'.format(split)),
 86 |                       sampler=val_sampler,
 87 |                       force_color=True,
 88 |                       video_transform=tran,
 89 |                       name='test',
 90 |                       cached_info_path = os.path.join(data_root, 'raw',  'list_cvt', 'hmdb51_split{}_test_info.txt'.format(split)),
 91 |                       check_video = True,
 92 |                       load_from_img = True,
 93 |                       modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg)
 94 | 
 95 |     return (train, val)
 96 | 
 97 | def get_ucf101(data_root='./dataset/UCF101',
 98 |                clip_length=8,
 99 |                train_interval=2,
100 |                val_interval=2,
101 |                mean=[0.485, 0.456, 0.406],
102 |                std=[0.229, 0.224, 0.225],
103 |                seed=torch.distributed.get_rank() if torch.distributed._initialized else 0,
104 |                modality = 'rgb',
105 |                split = 1,
106 |                net_name = 'I3D',
107 |                accumulate = True,
108 |                ds_factor = 16,
109 |                mv_minmaxnorm = False,
110 |                mv_loadimg=False,
111 |                **kwargs):
112 |     """ data iter for ucf-101
113 |     """
114 |     logging.debug("VideoIter:: clip_length = {}, interval = [train: {}, val: {}], seed = {}".format( \
115 |                 clip_length, train_interval, val_interval, seed))
116 | 
117 |     normalize = transforms.Normalize(mean=mean, std=std)
118 | 
119 |     if modality  in ['rgb', 'I']:
120 |         tran = transforms.Compose([
121 |                                          transforms.RandomScale(make_square=True,
122 |                                                                 aspect_ratio=[0.8, 1./0.8],
123 |                                                                 slen=[224, 288]),
124 |                                          transforms.RandomCrop((224, 224)), # insert a resize if needed
125 |                                          transforms.RandomHorizontalFlip(),
126 |                                          transforms.RandomHLS(vars=[15, 35, 25]),
127 |                                          transforms.ToTensor(modality),
128 |                                          normalize,
129 |                                       ],
130 |                                       aug_seed=(seed+1))
131 |     elif modality in ['flow', 'mv', 'res', 'flow+mp4']:
132 |         tran = transforms.Compose([
133 |                                          transforms.RandomCrop((224, 224)), # insert a resize if needed
134 |                                          transforms.RandomHorizontalFlip(modality),
135 |                                          transforms.ToTensor(modality, ds_factor),
136 |                                          normalize,
137 |                                       ],
138 |                                       aug_seed=(seed+1))
139 |     train_sampler = sampler.RandomSampling(num=clip_length,
140 |                                            interval=train_interval,
141 |                                            speed=[1.0, 1.0],
142 |                                            seed=(seed+0))
143 |     train = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly
144 |                       txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'trainlist0{}.txt'.format(split)),
145 |                       sampler=train_sampler,
146 |                       force_color=True,
147 |                       video_transform=tran,
148 |                       name='train',
149 |                       shuffle_list_seed=(seed+2),
150 |                       cached_info_path = os.path.join(data_root, 'raw',  'list_cvt', 'ucf101_split{}_train_info.txt'.format(split)),
151 |                       check_video = True,
152 |                       load_from_img = True,
153 |                       modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg)
154 | 
155 |     if net_name == 'I3D':
156 |         tran = transforms.Compose([
157 |                                          transforms.CenterCrop((224, 224)),
158 |                                          transforms.ToTensor(modality),
159 |                                          normalize,
160 |                                       ])
161 |     else:
162 |         raise ValueError('Unkown net_name')
163 |     val_sampler   = sampler.SequentialSampling(num=clip_length,
164 |                                                interval=val_interval,
165 |                                                fix_cursor=True,
166 |                                                shuffle=True)
167 |     val   = VideoIter(video_prefix='/HMDB51/fb/TSN_input/', #change accordingly
168 |                       txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'testlist0{}.txt'.format(split)),
169 |                       sampler=val_sampler,
170 |                       force_color=True,
171 |                       video_transform=tran,
172 |                       name='test',
173 |                       cached_info_path = os.path.join(data_root, 'raw',  'list_cvt', 'ucf101_split{}_test_info.txt'.format(split)),
174 |                       check_video = True,
175 |                       load_from_img = True,
176 |                       modality = modality, accumulate = accumulate, ds_factor = ds_factor, mv_minmaxnorm = mv_minmaxnorm,mv_loadimg=mv_loadimg
177 |                       )
178 | 
179 |     return (train, val)
180 | 
181 | 
182 | 
183 | 
184 | def creat(name, batch_size, num_workers=8, **kwargs):
185 | 
186 |     if name.upper() == 'UCF101':
187 |         train, val = get_ucf101(**kwargs)
188 |     elif name.upper() == 'HMDB51':
189 |         train, val = get_hmdb51(**kwargs)
190 |     else:
191 |         assert NotImplementedError("iter {} not found".format(name))
192 | 
193 | 
194 |     train_loader = torch.utils.data.DataLoader(train,
195 |         batch_size=batch_size, shuffle=True,
196 |         num_workers=num_workers, pin_memory=False)
197 | 
198 |     val_loader = torch.utils.data.DataLoader(val,
199 |         batch_size = batch_size, shuffle=False,
200 |         num_workers=num_workers, pin_memory=False)
201 | 
202 |     return (train_loader, val_loader)
203 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/video_sampler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | This source code is licensed under the MIT license found in the
  4 | LICENSE file in the root directory of this source tree.
  5 | """
  6 | 
  7 | import math
  8 | import numpy as np
  9 | 
 10 | 
 11 | class RandomSampling(object):
 12 |     def __init__(self, num, interval=1, speed=[1.0, 1.0], seed=0):
 13 |         assert num > 0, "at least sampling 1 frame"
 14 |         self.num = num
 15 |         self.interval = interval if type(interval) == list else [interval]
 16 |         self.speed = speed
 17 |         self.rng = np.random.RandomState(seed)
 18 | 
 19 |     def sampling(self, range_max, v_id=None, prev_failed=False):
 20 |         assert range_max > 0, \
 21 |             ValueError("range_max = {}".format(range_max))
 22 |         interval = self.rng.choice(self.interval)
 23 |         if self.num == 1:
 24 |             return [self.rng.choice(range(0, range_max))]
 25 |         # sampling
 26 |         speed_min = self.speed[0]
 27 |         speed_max = min(self.speed[1], (range_max-1)/((self.num-1)*interval))
 28 |         if speed_max < speed_min:
 29 |             return (np.remainder(np.arange(0, self.num * interval, interval), range_max)).tolist()
 30 |         random_interval = self.rng.uniform(speed_min, speed_max) * interval
 31 |         frame_range = (self.num-1) * random_interval
 32 |         clip_start = self.rng.uniform(0, (range_max-1) - frame_range)
 33 |         clip_end = clip_start + frame_range
 34 |         return np.linspace(clip_start, clip_end, self.num).astype(dtype=np.int).tolist()
 35 | 
 36 | 
 37 | class SequentialSampling(object):
 38 |     def __init__(self, num, interval=1, shuffle=False, fix_cursor=False, seed=0):
 39 |         self.memory = {}
 40 |         self.num = num
 41 |         self.interval = interval if type(interval) == list else [interval]
 42 |         self.shuffle = shuffle
 43 |         self.fix_cursor = fix_cursor
 44 |         self.rng = np.random.RandomState(seed)
 45 | 
 46 |     def sampling(self, range_max, v_id, prev_failed=False):
 47 |         assert range_max > 0, \
 48 |             ValueError("range_max = {}".format(range_max))
 49 |         num = self.num
 50 |         interval = self.rng.choice(self.interval)
 51 |         frame_range = (num - 1) * interval + 1
 52 |         # sampling clips
 53 |         if v_id not in self.memory:
 54 |             clips = list(range(0, range_max-(frame_range-1), frame_range))
 55 |             if self.shuffle:
 56 |                 self.rng.shuffle(clips)
 57 |             self.memory[v_id] = [-1, clips]
 58 |         # pickup a clip
 59 |         cursor, clips = self.memory[v_id]
 60 |         if not clips:
 61 |             return (np.remainder(np.arange(0, self.num * interval, interval), range_max)).tolist()
 62 |         cursor = (cursor + 1) % len(clips)
 63 |         if prev_failed or not self.fix_cursor:
 64 |             self.memory[v_id][0] = cursor
 65 |         # sampling within clip
 66 |         idxs = range(clips[cursor], clips[cursor]+frame_range, interval)
 67 |         return idxs
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 | 
 72 |     import logging
 73 |     logging.getLogger().setLevel(logging.DEBUG)
 74 | 
 75 |     """ test RandomSampling() """
 76 | 
 77 |     random_sampler = RandomSampling(num=8, interval=2, speed=[0.5, 2])
 78 | 
 79 |     logging.info("RandomSampling(): range_max < num")
 80 |     for i in range(10):
 81 |         logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=2, v_id=1)))
 82 | 
 83 |     logging.info("RandomSampling(): range_max == num")
 84 |     for i in range(10):
 85 |         logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=8, v_id=1)))
 86 | 
 87 |     logging.info("RandomSampling(): range_max > num")
 88 |     for i in range(90):
 89 |         logging.info("{:d}: {}".format(i, random_sampler.sampling(range_max=30, v_id=1)))
 90 | 
 91 | 
 92 |     """ test SequentialSampling() """
 93 |     sequential_sampler = SequentialSampling(num=3, interval=3, fix_cursor=False)
 94 | 
 95 |     logging.info("SequentialSampling():")
 96 |     for i in range(10):
 97 |         logging.info("{:d}: v_id = {}: {}".format(i, 0, list(sequential_sampler.sampling(range_max=14, v_id=0))))
 98 |         # logging.info("{:d}: v_id = {}: {}".format(i, 1, sequential_sampler.sampling(range_max=9, v_id=1)))
 99 |         # logging.info("{:d}: v_id = {}: {}".format(i, 2, sequential_sampler.sampling(range_max=2, v_id=2)))
100 |         # logging.info("{:d}: v_id = {}: {}".format(i, 3, sequential_sampler.sampling(range_max=3, v_id=3)))


--------------------------------------------------------------------------------
/code/dmcnet_I3D/data/video_transforms.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | This source code is licensed under the MIT license found in the
 4 | LICENSE file in the root directory of this source tree.
 5 | """
 6 | 
 7 | import torch
 8 | import numpy as np
 9 | from skimage.measure import block_reduce
10 | from scipy import interpolate
11 | from .image_transforms import Compose, \
12 |                               Transform, \
13 |                               Normalize, \
14 |                               Resize, \
15 |                               RandomScale, \
16 |                               CenterCrop, \
17 |                               RandomCrop, \
18 |                               RandomHorizontalFlip, \
19 |                               RandomRGB, \
20 |                               RandomHLS
21 | 
22 | 
23 | class ToTensor(Transform):
24 |     """Converts a numpy.ndarray (H x W x (T x C)) in the range
25 |     [0, 255] to a torch.FloatTensor of shape (C x T x H x W) in the range [0.0, 1.0].
26 |     """
27 |     def __init__(self, modality = 'rgb', flow_ds_factor = 1, interp = False):
28 |         self.modality = modality
29 |         self._flow_ds_factor = flow_ds_factor
30 |         self._upsample_interp = interp
31 |         if modality == 'rgb':
32 |             self.dim = 3
33 |         elif modality in ['flow', 'mv']:
34 |             self.dim = 2
35 |         elif modality in ['res', 'I']:
36 |             self.dim = 3
37 |         elif modality == 'flow+mp4':
38 |             self.dim = 7
39 | 
40 |     def __call__(self, clips):
41 |         if isinstance(clips, np.ndarray):
42 |             H, W, _ = clips.shape
43 |             # handle numpy array
44 |             clips = clips.reshape((H,W,-1,self.dim)).transpose((3, 2, 0, 1))
45 |             if self.modality == 'flow+mp4':
46 |                 if self._flow_ds_factor is not 0 or 1:
47 |                     clips = np.transpose(clips, (1,0,2,3))
48 |                     # downsample to make OF blocky
49 |                     factor = self._flow_ds_factor
50 |                     w_max = H
51 |                     h_max = W
52 |                     input_flow = block_reduce(clips[:,0:2, :, :], block_size=(1, 1, factor, factor), func=np.mean)
53 |                     # resize to original size by repeating or interpolation
54 |                     if self._upsample_interp is False:
55 |                         input_flow = input_flow.repeat(factor, axis=2).repeat(factor, axis=3)
56 |                     else:
57 |                         # interpolate along certain dimension? only interp1d can do so
58 |                         w_max_ds = input_flow.shape[2]
59 |                         h_max_ds = input_flow.shape[3]
60 |                         f_out = interpolate.interp1d(np.linspace(0, 1, w_max_ds), input_flow, kind='linear', axis=2)
61 |                         input_flow = f_out(np.linspace(0, 1, w_max_ds * factor))
62 |                         f_out = interpolate.interp1d(np.linspace(0, 1, h_max_ds), input_flow, kind='linear', axis=3)
63 |                         input_flow = f_out(np.linspace(0, 1, h_max_ds * factor))
64 |                     clips[:,0:2, :, :] = input_flow[:, :, :w_max, :h_max]
65 |                 clips = np.transpose(clips, (1,0,2,3))
66 |                 
67 |             clips = torch.from_numpy(clips)
68 |             #print(clips.shape)
69 |             # backward compatibility
70 |             return clips.float() / 255.0


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/HMDB51/raw/data:
--------------------------------------------------------------------------------
1 | ../../../../DATA/HMDB51/raw/data


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/HMDB51/raw/list_cvt/mapping_table.txt:
--------------------------------------------------------------------------------
 1 | 0	brush_hair
 2 | 1	cartwheel
 3 | 2	catch
 4 | 3	chew
 5 | 4	clap
 6 | 5	climb
 7 | 6	climb_stairs
 8 | 7	dive
 9 | 8	draw_sword
10 | 9	dribble
11 | 10	drink
12 | 11	eat
13 | 12	fall_floor
14 | 13	fencing
15 | 14	flic_flac
16 | 15	golf
17 | 16	handstand
18 | 17	hit
19 | 18	hug
20 | 19	jump
21 | 20	kick
22 | 21	kick_ball
23 | 22	kiss
24 | 23	laugh
25 | 24	pick
26 | 25	pour
27 | 26	pullup
28 | 27	punch
29 | 28	push
30 | 29	pushup
31 | 30	ride_bike
32 | 31	ride_horse
33 | 32	run
34 | 33	shake_hands
35 | 34	shoot_ball
36 | 35	shoot_bow
37 | 36	shoot_gun
38 | 37	sit
39 | 38	situp
40 | 39	smile
41 | 40	smoke
42 | 41	somersault
43 | 42	stand
44 | 43	swing_baseball
45 | 44	sword
46 | 45	sword_exercise
47 | 46	talk
48 | 47	throw
49 | 48	turn
50 | 49	walk
51 | 50	wave
52 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/HMDB51/scripts/README.md:
--------------------------------------------------------------------------------
1 | Here, I provide an example code for converting raw HMDB51 videos.
2 | ```
3 | python convert_videos.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/HMDB51/scripts/convert_videos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | import logging
10 | import subprocess
11 | 
12 | from joblib import delayed
13 | from joblib import Parallel
14 | 
15 | def exe_cmd(cmd):
16 |     try:
17 |         dst_file = cmd.split()[-1]
18 |         if os.path.exists(dst_file):
19 |             return "exist"
20 |         cmd = cmd.replace('(', '\(').replace(')', '\)').replace('\'', '\\\'')
21 |         output = subprocess.check_output(cmd, shell=True, 
22 |                                         stderr=subprocess.STDOUT)
23 |     except subprocess.CalledProcessError as err:
24 |         logging.warning("failed: {}".format(cmd))
25 |         # logging.warning("failed: {}: {}".format(cmd, err.output.decode("utf-8"))) # more details
26 |         return False
27 |     return output
28 | 
29 | def convert_video_wapper(src_videos, 
30 |                          dst_videos, 
31 |                          cmd_format,
32 |                          in_parallel=True):
33 |     commands = []
34 |     for src, dst in zip(src_videos, dst_videos):
35 |         cmd = cmd_format.format(src, dst)
36 |         commands.append(cmd)
37 | 
38 |     logging.info("- {} commonds to excute".format(len(commands)))
39 | 
40 |     if not in_parallel:
41 |         for i, cmd in enumerate(commands):
42 |             # if i % 100 == 0:
43 |             #     logging.info("{} / {}: '{}'".format(i, len(commands), cmd))
44 |             exe_cmd(cmd=cmd)
45 |     else:
46 |         num_jobs = 24
47 |         logging.info("processing videos in parallel, num_jobs={}".format(num_jobs))
48 |         Parallel(n_jobs=num_jobs)(delayed(exe_cmd)(cmd) for cmd in commands)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     logging.getLogger().setLevel(logging.DEBUG)
53 |     
54 |     # resize to slen = x360
55 |     cmd_format = 'ffmpeg -y -i {} -c:v mpeg4 -filter:v "scale=min(iw\,(360*iw)/min(iw\,ih)):-1" -b:v 640k -an {}'
56 | 
57 |     src_root = '../raw/data'
58 |     dst_root = '../raw/data-x360'
59 |     assert os.path.exists(dst_root), "cannot locate `{}'".format(dst_root)
60 | 
61 |     classname = [name for name in os.listdir(src_root) \
62 |                     if os.path.isdir(os.path.join(src_root,name))]
63 |     classname.sort()
64 | 
65 |     for cls_name in classname:
66 |         src_folder = os.path.join(src_root, cls_name)
67 |         dst_folder = os.path.join(dst_root, cls_name)
68 |         assert os.path.exists(src_folder), "failed to locate: `{}'.".format(src_folder)
69 |         if not os.path.exists(dst_folder):
70 |             os.makedirs(dst_folder)
71 | 
72 |         video_names = [name for name in os.listdir(src_folder) \
73 |                             if os.path.isfile(os.path.join(src_folder, name))]
74 | 
75 |         src_videos = [os.path.join(src_folder, vid_name.replace(";", "\;").replace("&", "\&")) for vid_name in video_names]
76 |         dst_videos = [os.path.join(dst_folder, vid_name.replace(";", "\;").replace("&", "\&")) for vid_name in video_names]
77 | 
78 |         convert_video_wapper(src_videos=src_videos, 
79 |                              dst_videos=dst_videos, 
80 |                              cmd_format=cmd_format)
81 |         
82 |     logging.info("- Done.")
83 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/README.md:
--------------------------------------------------------------------------------
  1 | Please organize this folder as follow:
  2 | ```
  3 | ./
  4 | ├── config.py
  5 | ├── HMDB51
  6 | │   ├── raw
  7 | │   │   ├── data -> ../../../../DATA/HMDB51/raw/data
  8 | │   │   │   ├── brush_hair
  9 | │   │   │   ├── cartwheel
 10 | │   │   │   ├── catch
 11 | │   │   │   ├── chew
 12 | │   │   │   ├── clap
 13 | │   │   │   ├── climb
 14 | │   │   │   ├── climb_stairs
 15 | │   │   │   ├── dive
 16 | │   │   │   ├── draw_sword
 17 | │   │   │   ├── dribble
 18 | │   │   │   ├── drink
 19 | │   │   │   ├── eat
 20 | │   │   │   ├── fall_floor
 21 | │   │   │   ├── fencing
 22 | │   │   │   ├── flic_flac
 23 | │   │   │   ├── golf
 24 | │   │   │   ├── handstand
 25 | │   │   │   ├── hit
 26 | │   │   │   ├── hug
 27 | │   │   │   ├── jump
 28 | │   │   │   ├── kick
 29 | │   │   │   ├── kick_ball
 30 | │   │   │   ├── kiss
 31 | │   │   │   ├── laugh
 32 | │   │   │   ├── pick
 33 | │   │   │   ├── pour
 34 | │   │   │   ├── pullup
 35 | │   │   │   ├── punch
 36 | │   │   │   ├── push
 37 | │   │   │   ├── pushup
 38 | │   │   │   ├── ride_bike
 39 | │   │   │   ├── ride_horse
 40 | │   │   │   ├── run
 41 | │   │   │   ├── shake_hands
 42 | │   │   │   ├── shoot_ball
 43 | │   │   │   ├── shoot_bow
 44 | │   │   │   ├── shoot_gun
 45 | │   │   │   ├── sit
 46 | │   │   │   ├── situp
 47 | │   │   │   ├── smile
 48 | │   │   │   ├── smoke
 49 | │   │   │   ├── somersault
 50 | │   │   │   ├── stand
 51 | │   │   │   ├── swing_baseball
 52 | │   │   │   ├── sword
 53 | │   │   │   ├── sword_exercise
 54 | │   │   │   ├── talk
 55 | │   │   │   ├── throw
 56 | │   │   │   ├── turn
 57 | │   │   │   ├── walk
 58 | │   │   │   └── wave
 59 | │   │   └── list_cvt
 60 | │   │       ├── hmdb51_split1_others.txt
 61 | │   │       ├── hmdb51_split1_test.txt
 62 | │   │       ├── hmdb51_split1_train.txt
 63 | │   │       ├── hmdb51_split2_others.txt
 64 | │   │       ├── hmdb51_split2_test.txt
 65 | │   │       ├── hmdb51_split2_train.txt
 66 | │   │       ├── hmdb51_split3_others.txt
 67 | │   │       ├── hmdb51_split3_test.txt
 68 | │   │       ├── hmdb51_split3_train.txt
 69 | │   │       └── mapping_table.txt
 70 | │   └── scripts
 71 | │       ├── convert_list.py
 72 | │       └── resave_videos.py
 73 | ├── __init__.py
 74 | ├── Kinetics
 75 | │   ├── raw
 76 | │   │   ├── data -> ../../../../DATA/Kinetics/raw/data
 77 | │   │   │   ├── test
 78 | │   │   │   ├── train
 79 | │   │   │   └── val
 80 | │   │   └── list_cvt
 81 | │   │       ├── kinetics_test.txt
 82 | │   │       ├── kinetics_test_avi.txt
 83 | │   │       ├── kinetics_train.txt
 84 | │   │       ├── kinetics_train_avi.txt
 85 | │   │       ├── kinetics_val.txt
 86 | │   │       ├── kinetics_val_avi.txt
 87 | │   │       └── mapping_table.txt
 88 | │   └── scripts
 89 | │       ├── convert_video.py
 90 | │       └── remove_spaces.py
 91 | ├── README.md
 92 | └── UCF101
 93 |     └── raw
 94 |         ├── data -> ../../../../DATA/UCF101/raw/data
 95 |         │   ├── ApplyEyeMakeup
 96 |         │   ├── ApplyLipstick
 97 |         │   ├── Archery
 98 |         │   ├── BabyCrawling
 99 |         │   ├── BalanceBeam
100 |         │   ├── BandMarching
101 |         │   ├── BaseballPitch
102 |         │   ├── Basketball
103 |         │   ├── BasketballDunk
104 |         │   ├── BenchPress
105 |         │   ├── Biking
106 |         │   ├── Billiards
107 |         │   ├── BlowDryHair
108 |         │   ├── BlowingCandles
109 |         │   ├── BodyWeightSquats
110 |         │   ├── Bowling
111 |         │   ├── BoxingPunchingBag
112 |         │   ├── BoxingSpeedBag
113 |         │   ├── BreastStroke
114 |         │   ├── BrushingTeeth
115 |         │   ├── CleanAndJerk
116 |         │   ├── CliffDiving
117 |         │   ├── CricketBowling
118 |         │   ├── CricketShot
119 |         │   ├── CuttingInKitchen
120 |         │   ├── Diving
121 |         │   ├── Drumming
122 |         │   ├── Fencing
123 |         │   ├── FieldHockeyPenalty
124 |         │   ├── FloorGymnastics
125 |         │   ├── FrisbeeCatch
126 |         │   ├── FrontCrawl
127 |         │   ├── GolfSwing
128 |         │   ├── Haircut
129 |         │   ├── Hammering
130 |         │   ├── HammerThrow
131 |         │   ├── HandstandPushups
132 |         │   ├── HandstandWalking
133 |         │   ├── HeadMassage
134 |         │   ├── HighJump
135 |         │   ├── HorseRace
136 |         │   ├── HorseRiding
137 |         │   ├── HulaHoop
138 |         │   ├── IceDancing
139 |         │   ├── JavelinThrow
140 |         │   ├── JugglingBalls
141 |         │   ├── JumpingJack
142 |         │   ├── JumpRope
143 |         │   ├── Kayaking
144 |         │   ├── Knitting
145 |         │   ├── LongJump
146 |         │   ├── Lunges
147 |         │   ├── MilitaryParade
148 |         │   ├── Mixing
149 |         │   ├── MoppingFloor
150 |         │   ├── Nunchucks
151 |         │   ├── ParallelBars
152 |         │   ├── PizzaTossing
153 |         │   ├── PlayingCello
154 |         │   ├── PlayingDaf
155 |         │   ├── PlayingDhol
156 |         │   ├── PlayingFlute
157 |         │   ├── PlayingGuitar
158 |         │   ├── PlayingPiano
159 |         │   ├── PlayingSitar
160 |         │   ├── PlayingTabla
161 |         │   ├── PlayingViolin
162 |         │   ├── PoleVault
163 |         │   ├── PommelHorse
164 |         │   ├── PullUps
165 |         │   ├── Punch
166 |         │   ├── PushUps
167 |         │   ├── Rafting
168 |         │   ├── RockClimbingIndoor
169 |         │   ├── RopeClimbing
170 |         │   ├── Rowing
171 |         │   ├── SalsaSpin
172 |         │   ├── ShavingBeard
173 |         │   ├── Shotput
174 |         │   ├── SkateBoarding
175 |         │   ├── Skiing
176 |         │   ├── Skijet
177 |         │   ├── SkyDiving
178 |         │   ├── SoccerJuggling
179 |         │   ├── SoccerPenalty
180 |         │   ├── StillRings
181 |         │   ├── SumoWrestling
182 |         │   ├── Surfing
183 |         │   ├── Swing
184 |         │   ├── TableTennisShot
185 |         │   ├── TaiChi
186 |         │   ├── TennisSwing
187 |         │   ├── ThrowDiscus
188 |         │   ├── TrampolineJumping
189 |         │   ├── Typing
190 |         │   ├── UnevenBars
191 |         │   ├── VolleyballSpiking
192 |         │   ├── WalkingWithDog
193 |         │   ├── WallPushups
194 |         │   ├── WritingOnBoard
195 |         │   └── YoYo
196 |         └── list_cvt
197 |             ├── testlist01.txt
198 |             ├── testlist02.txt
199 |             ├── testlist03.txt
200 |             ├── trainlist01.txt
201 |             ├── trainlist02.txt
202 |             └── trainlist03.txt
203 | ```
204 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/UCF101/raw/data:
--------------------------------------------------------------------------------
1 | ../../../../DATA/UCF101/raw/data


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_config
2 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/dataset/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/dataset/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/dataset/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | This source code is licensed under the MIT license found in the
 4 | LICENSE file in the root directory of this source tree.
 5 | """
 6 | 
 7 | import logging
 8 | 
 9 | def get_config(name):
10 | 
11 |     config = {}
12 | 
13 |     if name.upper() == 'UCF101':
14 |         config['num_classes'] = 101
15 |     elif name.upper() == 'HMDB51':
16 |         config['num_classes'] = 51
17 |     else:
18 |         logging.error("Configs for dataset '{}'' not found".format(name))
19 |         raise NotImplemented
20 | 
21 |     logging.debug("Target dataset: '{}', configs: {}".format(name.upper(), config))
22 | 
23 |     return config
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     logging.getLogger().setLevel(logging.DEBUG)
28 | 
29 |     logging.info(get_config("ucf101"))
30 |     logging.info(get_config("HMDB51"))
31 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__init__.py


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/i3d.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/i3d.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/initializer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/initializer.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/mfnet_3d.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/mfnet_3d.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/__pycache__/symbol_builder.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/network/__pycache__/symbol_builder.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import logging
 9 | 
10 | def get_config(name, modality = 'rgb', **kwargs):
11 | 
12 |     logging.debug("loading network configs of: {}".format(name.upper()))
13 | 
14 |     config = {}
15 | 
16 |     if name.upper() == "I3D":
17 |         config['mean'] = [0.5] * 3
18 |         config['std'] = [0.5] * 3
19 |     else:
20 |         config['mean'] = [0.485, 0.456, 0.406]
21 |         config['std'] = [0.229, 0.224, 0.225]
22 |     
23 |     # else:
24 |     #    raise NotImplemented("Configs for {} not implemented".format(name))
25 | 
26 |     logging.info("data:: {}".format(config))
27 |     return config


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/initializer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | This source code is licensed under the MIT license found in the
  4 | LICENSE file in the root directory of this source tree.
  5 | """
  6 | 
  7 | import json
  8 | import logging
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | 
 14 | def xavier(net):
 15 |     def weights_init(m):
 16 |         classname = m.__class__.__name__
 17 |         if classname.find('Conv') != -1 and hasattr(m, 'weight'):
 18 |             torch.nn.init.xavier_uniform(m.weight.data, gain=1.)
 19 |             if m.bias is not None:
 20 |                 m.bias.data.zero_()
 21 |         elif classname.find('BatchNorm') != -1:
 22 |             m.weight.data.fill_(1.0)
 23 |             if m.bias is not None:
 24 |                 m.bias.data.zero_()
 25 |         elif classname.find('Linear') != -1:
 26 |             torch.nn.init.xavier_uniform(m.weight.data, gain=1.)
 27 |             if m.bias is not None:
 28 |                 m.bias.data.zero_()
 29 |         elif  classname.find('Unit3Dpy') != -1:
 30 |             torch.nn.init.xavier_uniform(m.conv3d.weight.data, gain=1.)
 31 |         elif classname in ['Sequential', 'AvgPool3d', 'MaxPool3d', 'MaxPool3dTFPadding', \
 32 |                            'Dropout', 'ReLU', 'Softmax', 'BnActConv3d', 'ConstantPad3d'] \
 33 |              or 'Block' in classname:
 34 |             pass
 35 |         else:
 36 |             if classname != classname.upper():
 37 |                 logging.warning("Initializer:: '{}' is uninitialized.".format(classname))
 38 |     net.apply(weights_init)
 39 | 
 40 | 
 41 | 
 42 | def init_from_dict(net, state_dict, strict=False):
 43 |     logging.debug("Initializer:: loading from `state_dic', strict = {} ...".format(strict))
 44 | 
 45 |     if strict:
 46 |         net.load_state_dict(state_dict=state_dict)
 47 |     else:
 48 |         # customized partialy load function
 49 |         net_state_keys = list(net.state_dict().keys())
 50 |         for name, param in state_dict.items():
 51 |             if name in net_state_keys:
 52 |                 dst_param_shape = net.state_dict()[name].shape
 53 |                 net.state_dict()[name].copy_(param.view(dst_param_shape))
 54 |                 net_state_keys.remove(name)
 55 | 
 56 |         # indicating missed keys
 57 |         if net_state_keys:
 58 |             logging.info("Initializer:: failed to load: \n{}".format(
 59 |                          json.dumps(net_state_keys, indent=4, sort_keys=True)))
 60 | 
 61 | 
 62 | def init_3d_from_2d_dict(net, state_dict, method='inflation'):
 63 |     logging.debug("Initializer:: loading from 2D neural network, filling method: `{}' ...".format(method))
 64 | 
 65 |     # filling method
 66 |     def filling_kernel(src, dshape, method):
 67 |         assert method in ['inflation', 'random'], \
 68 |             "filling method: {} is unknown!".format(method)
 69 |         src_np = src.numpy()
 70 | 
 71 |         if method == 'inflation':
 72 |             dst = torch.FloatTensor(dshape)
 73 |             # normalize
 74 |             src = src/float(dshape[2])
 75 |             src = src.view(dshape[0],dshape[1], 1, dshape[3],dshape[4])
 76 |             dst.copy_(src, broadcast=True)
 77 |         elif method == 'random':
 78 |             dst = torch.FloatTensor(dshape)
 79 |             tmp = torch.FloatTensor(src.shape)
 80 |             # normalize
 81 |             src = src/float(dshape[2])
 82 |             # random range
 83 |             scale = src.abs().mean()
 84 |             # filling
 85 |             dst[:,:,0,:,:].copy_(src)
 86 |             i = 1
 87 |             while i < dshape[2]:
 88 |                 if i+2 < dshape[2]:
 89 |                     torch.nn.init.uniform(tmp, a=-scale, b=scale)
 90 |                     dst[:,:,i,:,:].copy_(tmp)
 91 |                     dst[:,:,i+1,:,:].copy_(src)
 92 |                     dst[:,:,i+2,:,:].copy_(-tmp)
 93 |                     i += 3
 94 |                 elif i+1 < dshape[2]:
 95 |                     torch.nn.init.uniform(tmp, a=-scale, b=scale)
 96 |                     dst[:,:,i,:,:].copy_(tmp)
 97 |                     dst[:,:,i+1,:,:].copy_(-tmp)
 98 |                     i += 2
 99 |                 else:
100 |                     dst[:,:,i,:,:].copy_(src)
101 |                     i += 1
102 |             # shuffle
103 |             tmp = dst.numpy().swapaxes(2, -1)
104 |             shp = tmp.shape[:-1]
105 |             for ndx in np.ndindex(shp):
106 |                 np.random.shuffle(tmp[ndx])
107 |             dst = torch.from_numpy(tmp)
108 |         else:
109 |             raise NotImplementedError
110 | 
111 |         return dst
112 | 
113 | 
114 |     # customized partialy loading function
115 |     src_state_keys = list(state_dict.keys())
116 |     dst_state_keys = list(net.state_dict().keys())
117 |     for name, param in state_dict.items():
118 |         if name in dst_state_keys:
119 |             src_param_shape = param.shape
120 |             dst_param_shape = net.state_dict()[name].shape
121 |             if src_param_shape != dst_param_shape:
122 |                 if name.startswith('classifier'):
123 |                     continue
124 |                 assert len(src_param_shape) == 4 and len(dst_param_shape) == 5, "{} mismatch".format(name)
125 |                 if list(src_param_shape) == [dst_param_shape[i] for i in [0, 1, 3, 4]]:
126 |                     if dst_param_shape[2] != 1:
127 |                         param = filling_kernel(src=param, dshape=dst_param_shape, method=method)
128 |                     else:
129 |                         param = param.view(dst_param_shape)
130 |                 assert dst_param_shape == param.shape, \
131 |                     "Initilizer:: error({}): {} != {}".format(name, dst_param_shape, param.shape)
132 |             net.state_dict()[name].copy_(param, broadcast=False)
133 |             src_state_keys.remove(name)
134 |             dst_state_keys.remove(name)
135 | 
136 |     # indicat missing / ignored keys
137 |     if src_state_keys:
138 |         out = "[\'" + '\', \''.join(src_state_keys) + "\']"
139 |         logging.info("Initializer:: >> {} params are unused: {}".format(len(src_state_keys),
140 |                      out if len(out) < 300 else out[0:150] + " ... " + out[-150:]))
141 |     if dst_state_keys:
142 |         logging.info("Initializer:: >> failed to load: \n{}".format(
143 |                      json.dumps(dst_state_keys, indent=4, sort_keys=True)))
144 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/pretrained:
--------------------------------------------------------------------------------
1 | ../exps/pretrained


--------------------------------------------------------------------------------
/code/dmcnet_I3D/network/symbol_builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | This source code is licensed under the MIT license found in the
 4 | LICENSE file in the root directory of this source tree.
 5 | """
 6 | 
 7 | import logging
 8 | 
 9 | from .i3d import I3D
10 | from .config import get_config
11 | 
12 | def get_symbol(name, modality = 'rgb', drop_out = 0.5, print_net=False, arch_estimator = None, arch_d = None, **kwargs):
13 | 
14 |     if name.upper() == "I3D":
15 |         net = I3D(modality = modality, dropout_prob = drop_out, arch_estimator = arch_estimator, arch_d = arch_d, **kwargs)
16 |     else:
17 |         logging.error("network '{}'' not implemented".format(name))
18 |         raise NotImplementedError()
19 | 
20 |     if print_net:
21 |         logging.debug("Symbol:: Network Architecture:")
22 |         logging.debug(net)
23 | 
24 |     input_conf = get_config(name, modality = modality, **kwargs)
25 |     return net, input_conf
26 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/test/evaluate_video_ucf101_i3d.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import sys
  9 | sys.path.append("..")
 10 | 
 11 | import os
 12 | import time
 13 | import json
 14 | import logging
 15 | import argparse
 16 | import numpy as np
 17 | import torch
 18 | import torch.backends.cudnn as cudnn
 19 | 
 20 | import dataset
 21 | from train.model import static_model
 22 | from train import metric
 23 | from data import video_sampler as sampler
 24 | from data import video_transforms as transforms
 25 | from data.video_iterator import VideoIter
 26 | from network.symbol_builder import get_symbol
 27 | 
 28 | 
 29 | parser = argparse.ArgumentParser(description="PyTorch Video Recognition Parser (Evaluation)")
 30 | # debug
 31 | parser.add_argument('--debug-mode', type=bool, default=False,
 32 |                     help="print all setting for debugging.")
 33 | # io
 34 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101','HMDB51'],
 35 |                     help="path to dataset")
 36 | parser.add_argument('--clip-length', type = int, default=250,
 37 |                     help="define the length of each input sample.")   
 38 | parser.add_argument('--split', type = int, default=1, 
 39 |                     help="which split to train on")
 40 | parser.add_argument('--frame-interval', type=int, default=1,
 41 |                     help="define the sampling interval between frames.")    
 42 | parser.add_argument('--task-name', type=str, default='Pytorch-MF-Net',
 43 |                     help="name of current task, leave it empty for using folder name")
 44 | parser.add_argument('--model-dir', type=str, default="../exps/models/",
 45 |                     help="set logging file.")
 46 | parser.add_argument('--log-file', type=str, default="./eval-ucf101-split1_96_flow_add_drop.log",
 47 |                     help="set logging file.")
 48 | parser.add_argument('--accumulate', type=int, default=1,
 49 |                     help="accumulate mv and res")
 50 | parser.add_argument('--mv-minmaxnorm', type=int, default=0,
 51 |                     help="minmaxnorm for mv")
 52 | parser.add_argument('--ds_factor', type=int, default=16,
 53 |                     help="downsampling the flow by ds_factor")
 54 | parser.add_argument('--mv-loadimg', type=int, default=0,
 55 |                     help="load img mv")
 56 | 
 57 | # device
 58 | parser.add_argument('--gpus', type=str, default='0',
 59 |                     help="define gpu id")
 60 | # algorithm
 61 | parser.add_argument('--network', type=str, default='i3d',
 62 |                     choices=['i3d'],
 63 |                     help="chose the base network")
 64 | parser.add_argument('--arch-estimator', type=str, default = None,
 65 |                     choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'],
 66 |                     help="chose the generator")
 67 | # evaluation
 68 | parser.add_argument('--load-epoch', type=int, default=0,
 69 |                     help="resume trained model")
 70 | parser.add_argument('--batch-size', type=int, default=1,
 71 |                     help="batch size")
 72 | parser.add_argument('--modality', type=str, default='rgb',
 73 |                     choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'],
 74 |                     help="chose input type")
 75 | 
 76 | def autofill(args):
 77 |     # customized
 78 |     if not args.task_name:
 79 |         args.task_name = os.path.basename(os.getcwd())
 80 |     # fixed
 81 |     args.model_prefix = os.path.join(args.model_dir, args.task_name)
 82 |     return args
 83 | 
 84 | def set_logger(log_file='', debug_mode=False):
 85 |     if log_file:
 86 |         if not os.path.exists("./"+os.path.dirname(log_file)):
 87 |             os.makedirs("./"+os.path.dirname(log_file))
 88 |         handlers = [logging.FileHandler(log_file), logging.StreamHandler()]
 89 |     else:
 90 |         handlers = [logging.StreamHandler()]
 91 | 
 92 |     """ add '%(filename)s' to format show source file """
 93 |     logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO,
 94 |                 format='%(asctime)s %(levelname)s: %(message)s',
 95 |                 datefmt='%Y-%m-%d %H:%M:%S',
 96 |                 handlers = handlers)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 | 
101 |     # set args
102 |     args = parser.parse_args()
103 |     args = autofill(args)
104 |     
105 |     set_logger(log_file=args.log_file, debug_mode=args.debug_mode)
106 |     logging.info("Start evaluation with args:\n" +
107 |                  json.dumps(vars(args), indent=4, sort_keys=True))
108 | 
109 |     # set device states
110 |     os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpus) # before using torch
111 |     assert torch.cuda.is_available(), "CUDA is not available"
112 | 
113 |     # load dataset related configuration
114 |     dataset_cfg = dataset.get_config(name=args.dataset)
115 | 
116 |     # creat model
117 |     sym_net, input_config = get_symbol(name=args.network, modality = args.modality, arch_estimator = args.arch_estimator, **dataset_cfg)
118 |     
119 |     # network
120 |     if torch.cuda.is_available():
121 |         cudnn.benchmark = True
122 |         sym_net = torch.nn.DataParallel(sym_net).cuda()
123 |         criterion = torch.nn.CrossEntropyLoss().cuda()
124 |     else:
125 |         sym_net = torch.nn.DataParallel(sym_net)
126 |         criterion = torch.nn.CrossEntropyLoss()
127 |     net = static_model(net=sym_net,
128 |                        criterion=criterion,
129 |                        model_prefix=args.model_prefix,
130 |                        criterion2 = torch.nn.MSELoss().cuda() if args.modality == 'flow+mp4' else None)
131 |     net.load_checkpoint(epoch=args.load_epoch)
132 |     
133 |     # data iterator:
134 |     data_root = "../dataset/{}".format(args.dataset)
135 |     normalize = transforms.Normalize(mean=input_config['mean'], std=input_config['std'])
136 |     val_sampler = sampler.RandomSampling(num=args.clip_length,
137 |                                          interval=args.frame_interval,
138 |                                          speed=[1.0, 1.0])
139 |     val_loader = VideoIter(video_prefix='/UCF101/TSN_input/', # change this part accordingly
140 |                       txt_list=os.path.join(data_root, 'raw', 'list_cvt', 'testlist0{}.txt'.format(args.split)), 
141 |                       sampler=val_sampler,
142 |                       force_color=True,
143 |                       video_transform=transforms.Compose([
144 |                                          #transforms.Resize((256,256)),
145 |                                          #transforms.RandomCrop((224,224)),
146 |                                          transforms.CenterCrop((224, 224)), # we did not use center crop in our paper
147 |                                          # transforms.RandomHorizontalFlip(), # we did not use mirror in our paper
148 |                                          transforms.ToTensor(args.modality),
149 |                                          normalize,
150 |                                       ]),
151 |                       name='test',
152 |                       cached_info_path = os.path.join(data_root, 'raw',  'list_cvt', 'ucf101_split{}_test_info.txt'.format(args.split)),
153 |                       return_item_subpath=True,
154 |                       check_video = True,
155 |                       load_from_img = True,
156 |                       modality = args.modality, accumulate = args.accumulate, ds_factor = args.ds_factor, mv_minmaxnorm = args.mv_minmaxnorm,
157 |                       mv_loadimg = args.mv_loadimg
158 |                       )
159 |                       
160 |     eval_iter = torch.utils.data.DataLoader(val_loader,
161 |                       batch_size=args.batch_size,
162 |                       shuffle=False,
163 |                       num_workers=12, # change this part accordingly
164 |                       pin_memory=True)
165 | 
166 |     # eval metrics
167 |     metrics = metric.MetricList(metric.Loss(name="loss-ce"),
168 |                                 metric.Accuracy(topk=1, name="top1"),
169 |                                 metric.Accuracy(topk=5, name="top5"))
170 |     metrics.reset()
171 | 
172 |     # main loop
173 |     net.net.eval()
174 |     avg_score = {}
175 |     sum_batch_elapse = 0.
176 |     sum_batch_inst = 0
177 |     duplication = 1
178 |     softmax = torch.nn.Softmax(dim=1)
179 |     scores = []
180 |     label = []
181 |     total_round = 1 # change this part accordingly if you do not want an inf loop
182 |     for i_round in range(total_round):
183 |         i_batch = 0
184 |         logging.info("round #{}/{}".format(i_round, total_round))
185 |         for data, target, video_subpath in eval_iter:
186 |             batch_start_time = time.time()
187 | 
188 |             outputs, losses = net.forward(data, target, node = 'logit')
189 | 
190 |             sum_batch_elapse += time.time() - batch_start_time
191 |             sum_batch_inst += 1
192 | 
193 |             # recording
194 |             output = softmax(outputs[0]).data.cpu()
195 |             scores.append(output)
196 |             target = target.cpu()
197 |             label.append(target)
198 |             losses = losses[0].data.cpu()
199 |             del outputs
200 |             #torch.cuda.empty_cache()
201 |             for i_item in range(0, output.shape[0]):
202 |                 output_i = output[i_item,:].view(1, -1)
203 |                 target_i = torch.LongTensor([target[i_item]])
204 |                 loss_i = losses
205 |                 video_subpath_i = video_subpath[i_item]
206 |                 if video_subpath_i in avg_score:
207 |                     avg_score[video_subpath_i][2] += output_i
208 |                     avg_score[video_subpath_i][3] += 1
209 |                     duplication = 0.92 * duplication + 0.08 * avg_score[video_subpath_i][3]
210 |                 else:
211 |                     avg_score[video_subpath_i] = [torch.LongTensor(target_i.numpy().copy()), 
212 |                                                   torch.FloatTensor(loss_i.numpy().copy()), 
213 |                                                   torch.FloatTensor(output_i.numpy().copy()),
214 |                                                   1] # the last one is counter
215 |             
216 |             # show progress
217 |             if (i_batch % 100) == 99:
218 |                 metrics.reset()
219 |                 for _, video_info in avg_score.items():
220 |                     target, loss, pred, _ = video_info
221 |                     metrics.update([pred], target, [loss])
222 |                 name_value = metrics.get_name_value()
223 |                 logging.info("{:.1f}%, {:.1f} \t| Batch [0,{}]    \tAvg: {} = {:.5f}, {} = {:.5f}, {} = {:.5f}".format(
224 |                             float(100*i_batch) / eval_iter.__len__(), \
225 |                             duplication, \
226 |                             i_batch, \
227 |                             name_value[0][0][0], name_value[0][0][1], \
228 |                             name_value[1][0][0], name_value[1][0][1], \
229 |                             name_value[2][0][0], name_value[2][0][1]))
230 |                 np.savez(os.path.join('./{}_{}/'.format(args.dataset, args.split),args.task_name+'_{}'.format(args.clip_length)),scores=np.concatenate(scores, axis = 0), labels=np.concatenate(label, axis = 0))
231 |             i_batch += 1
232 | 
233 | 
234 |     # finished
235 |     logging.info("Evaluation Finished!")
236 |     #print(np.concatenate(scores, axis = 0).shape, np.concatenate(label, axis = 0).shape)
237 |     np.savez(os.path.join('./{}_{}/'.format(args.dataset, args.split),args.task_name+'_{}'.format(args.clip_length)),scores=np.concatenate(scores, axis = 0), labels=np.concatenate(label, axis = 0))
238 |     metrics.reset()
239 |     for _, video_info in avg_score.items():
240 |         target, loss, pred, _ = video_info
241 |         metrics.update([pred], target, [loss])
242 | 
243 |     logging.info("Total time cost: {:.1f} sec".format(sum_batch_elapse))
244 |     logging.info("Speed: {:.4f} samples/sec".format(
245 |             args.batch_size * sum_batch_inst / sum_batch_elapse ))
246 |     logging.info("Accuracy:")
247 |     logging.info(json.dumps(metrics.get_name_value(), indent=4, sort_keys=True))
248 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/test/test.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | python evaluate_video_hmdb_i3d.py --task-name hmdb1\
 7 |                                     --split 1\
 8 |                                     --load-epoch 10\
 9 |                                     --modality flow+mp4\
10 |                                     --log-file ./eval_hmdb1.log \
11 |                                     --gpus 0,1\
12 |                                     --batch-size 2  --clip-length 250\
13 |                                     --arch-estimator DenseNetTiny\
14 |                                     --accumulate 0\
15 |                                     --mv-minmaxnorm 1 
16 | 
17 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | python train_hmdb51.py --task-name hmdb_1\
 7 |                        --split 1\
 8 |                        --network I3D \
 9 |                        --clip-length 64 \
10 |                        --pretrained_3d ./exps/models/model_flow.pth\
11 |                        --iter-size 32 --batch-size 3\
12 |                        --optimizer adam\
13 |                        --gpus 1,3\
14 |                        --modality flow+mp4\
15 |                        --train-frame-interval 1 \
16 |                        --val-frame-interval 1\
17 |                        --lr-base 0.0004\
18 |                        --lr-base2 0.0004\
19 |                        --lr-d 0.002\
20 |                        --detach 1\
21 |                        --lr-factor 0.2\
22 |                        --dataset HMDB51\
23 |                        --drop-out 0.85\
24 |                        --fine_tune 0\
25 |                        --arch-estimator DenseNetTiny\
26 |                        --arch-d Discriminator\
27 |                        --adv 1\
28 |                        --epoch-thre 6\
29 |                        --ds_factor 16\
30 |                        --mv-minmaxnorm 1\
31 |                        --accumulate 0\


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__init__.py


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__pycache__/callback.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/callback.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__pycache__/lr_scheduler.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/lr_scheduler.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__pycache__/metric.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/metric.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/code/dmcnet_I3D/train/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/callback.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import logging
  9 | 
 10 | class Callback(object):
 11 | 
 12 |     def __init__(self, with_header=False):
 13 |         self.with_header = with_header
 14 | 
 15 |     def __call__(self):
 16 |         raise NotImplementedError("To be implemented")
 17 | 
 18 |     def header(self, epoch=None, batch=None):
 19 |         str_out = ""
 20 |         if self.with_header:
 21 |             if epoch is not None:
 22 |                 str_out += "Epoch {:s} ".format(("[%d]"%epoch).ljust(5, ' '))
 23 |             if batch is not None:
 24 |                 str_out += "Batch {:s} ".format(("[%d]"%batch).ljust(6, ' '))
 25 |         return str_out
 26 |  
 27 | class CallbackList(Callback):
 28 | 
 29 |     def __init__(self, *args, with_header=True):
 30 |         super(CallbackList, self).__init__(with_header=with_header)
 31 |         assert all([issubclass(type(x), Callback) for x in args]), \
 32 |                 "Callback inputs illegal: {}".format(args)
 33 |         self.callbacks = [callback for callback in args]
 34 | 
 35 |     def __call__(self, epoch=None, batch=None, silent=False, **kwargs):
 36 |         str_out = self.header(epoch, batch)
 37 | 
 38 |         for callback in self.callbacks:
 39 |             str_out += callback(**kwargs, silent=True) + " "
 40 | 
 41 |         if not silent:
 42 |             logging.info(str_out)
 43 |         return str_out   
 44 | 
 45 | 
 46 | ####################
 47 | # CUSTOMIZED CALLBACKS
 48 | ####################
 49 | 
 50 | class SpeedMonitor(Callback):
 51 | 
 52 |     def __init__(self, with_header=False):
 53 |         super(SpeedMonitor, self).__init__(with_header=with_header)
 54 | 
 55 |     def __call__(self, sample_elapse, update_elapse=None, epoch=None, batch=None, silent=False, **kwargs): 
 56 |         str_out = self.header(epoch, batch)
 57 | 
 58 |         if sample_elapse is not None:
 59 |             sample_freq = 1./sample_elapse
 60 |             if update_elapse is not None:
 61 |                 update_freq = 1./update_elapse
 62 |                 str_out += "Speed {: >5.1f} (+{: >2.0f}) sample/sec ".format(sample_freq, update_freq-sample_freq)
 63 |             else:
 64 |                 str_out += "Speed {:.2f} sample/sec ".format(sample_freq)
 65 | 
 66 |         if not silent:
 67 |             logging.info(str_out)
 68 |         return str_out
 69 | 
 70 | class MetricPrinter(Callback):
 71 | 
 72 |     def __init__(self, with_header=False):
 73 |         super(MetricPrinter, self).__init__(with_header=with_header)
 74 | 
 75 |     def __call__(self, namevals, epoch=None, batch=None, silent=False, **kwargs):
 76 |         str_out = self.header(epoch, batch)
 77 | 
 78 |         if namevals is not None:
 79 |             for i, nameval in enumerate(namevals):
 80 |                 name, value = nameval[0]
 81 |                 str_out += "{} = {:.5f}".format(name, value)
 82 |                 str_out += ", " if i != (len(namevals)-1) else " "
 83 | 
 84 |         if not silent:
 85 |             logging.info(str_out)
 86 |         return str_out
 87 | 
 88 | 
 89 | ####################
 90 | # TESTING CASES
 91 | ####################
 92 | 
 93 | if __name__ == "__main__":
 94 | 
 95 |     logging.getLogger().setLevel(logging.DEBUG)
 96 | 
 97 |     # Test each function
 98 |     # [1] Callback
 99 |     logging.info("- testing base callback class:")
100 |     c = Callback(with_header=True)
101 |     logging.info(c.header(epoch=1, batch=123))
102 |     
103 |     # [2] SpeedMonitor
104 |     logging.info("- testing speedmonitor:")
105 |     s = SpeedMonitor(with_header=True)
106 |     s(sample_elapse=0.3, epoch=10, batch=31)
107 |     s = SpeedMonitor(with_header=False)
108 |     s(sample_elapse=0.3)
109 | 
110 |     # [3] DictPrinter
111 |     logging.info("- test dict printer")
112 |     d = MetricPrinter(with_header=True)
113 |     d(namevals=[[('acc1',0.123)], [("acc5",0.4453232)]], epoch=10, batch=31)
114 |     d = MetricPrinter(with_header=False)
115 |     d(namevals=[[('acc1',0.123)], [("acc5",0.4453232)]])
116 | 
117 |     # [4] CallbackList
118 |     logging.info("- test callback list")
119 |     c = CallbackList()
120 |     c = CallbackList(SpeedMonitor(), MetricPrinter())
121 |     c(epoch=10, batch=31, sample_elapse=0.3, namevals=[[('acc1',0.123)], [("acc5",0.4453232)]])


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import logging
 9 | 
10 | class LRScheduler(object):
11 | 
12 |     def __init__(self, step_counter=0, base_lr=0.01):
13 |         self.step_counter = step_counter
14 |         self.base_lr = base_lr
15 | 
16 |     def update(self):
17 |         raise NotImplementedError("must override this")
18 | 
19 |     def get_lr(self):
20 |         return self.lr
21 | 
22 | class MultiFactorScheduler(LRScheduler):
23 | 
24 |     def __init__(self, steps, base_lr=0.01, factor=0.1, step_counter=0):
25 |         super(MultiFactorScheduler, self).__init__(step_counter, base_lr)
26 |         assert isinstance(steps, list) and len(steps) > 0
27 |         for i, _step in enumerate(steps):
28 |             if i != 0 and steps[i] <= steps[i-1]:
29 |                 raise ValueError("Schedule step must be an increasing integer list")
30 |             if _step < 1:
31 |                 raise ValueError("Schedule step must be greater or equal than 1 round")
32 |         if factor > 1.0:
33 |             raise ValueError("Factor must be no more than 1 to make lr reduce")
34 | 
35 |         logging.info("Iter %d: start with learning rate: %0.5e (next lr step: %d)" \
36 |                                 % (self.step_counter, self.base_lr, steps[0]))
37 |         self.steps = steps
38 |         self.factor = factor
39 |         self.lr = self.base_lr
40 |         self.cursor = 0
41 | 
42 |     def update(self):
43 |         self.step_counter += 1
44 | 
45 |         if self.cursor >= len(self.steps):
46 |             return self.lr
47 |         while self.steps[self.cursor] < self.step_counter:
48 |             self.lr *= self.factor
49 |             self.cursor += 1
50 |             # message
51 |             if self.cursor >= len(self.steps):
52 |                 logging.info("Iter: %d, change learning rate to %0.5e for step [%d:Inf)" \
53 |                                 % (self.step_counter-1, self.lr, self.step_counter-1))
54 |                 return self.lr
55 |             else:
56 |                 logging.info("Iter: %d, change learning rate to %0.5e for step [%d:%d)" \
57 |                                 % (self.step_counter-1, self.lr, self.step_counter-1, \
58 |                                    self.steps[self.cursor]))
59 |         if self.step_counter < 100:
60 |             return self.lr/2.0
61 |         return self.lr
62 | 
63 | 
64 | if __name__ == "__main__":
65 | 
66 |     logging.getLogger().setLevel(logging.DEBUG)
67 | 
68 |     # test LRScheduler()
69 |     logging.info("testing basic class: LRScheduler()")
70 |     LRScheduler()
71 | 
72 |     # test MultiFactorScheduler()
73 |     logging.info("testing basic class: MultiFactorScheduler()")
74 |     start_point = 2
75 |     lr_scheduler = MultiFactorScheduler(step_counter=start_point,
76 |                                         base_lr=0.1,
77 |                                         steps=[2, 14, 18],
78 |                                         factor=0.1)
79 |     for i in range(start_point, 22):
80 |         logging.info("id = {}, lr = {:f}".format(i, lr_scheduler.update()))


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train/metric.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import logging
  9 | import numpy as np
 10 | 
 11 | class EvalMetric(object):
 12 | 
 13 |     def __init__(self, name, **kwargs):
 14 |         self.name = str(name)
 15 |         self.reset()
 16 | 
 17 |     def update(self, preds, labels, losses):
 18 |         raise NotImplementedError()
 19 | 
 20 |     def reset(self):
 21 |         self.num_inst = 0
 22 |         self.sum_metric = 0.0
 23 | 
 24 |     def get(self):
 25 |         if self.num_inst == 0:
 26 |             return (self.name, float('nan'))
 27 |         else:
 28 |             return (self.name, self.sum_metric / self.num_inst)
 29 | 
 30 |     def get_name_value(self):
 31 |         name, value = self.get()
 32 |         if not isinstance(name, list):
 33 |             name = [name]
 34 |         if not isinstance(value, list):
 35 |             value = [value]
 36 |         return list(zip(name, value))
 37 | 
 38 |     def check_label_shapes(self, preds, labels):
 39 |         # raise if the shape is inconsistent
 40 |         if (type(labels) is list) and (type(preds) is list):
 41 |             label_shape, pred_shape = len(labels), len(preds)
 42 |         else:
 43 |             label_shape, pred_shape = labels.shape[0], preds.shape[0]
 44 |         #print(preds, labels, label_shape, pred_shape)
 45 |         if label_shape != pred_shape:
 46 |             raise NotImplementedError("")
 47 | 
 48 | 
 49 | class MetricList(EvalMetric):
 50 |     """Handle multiple evaluation metric
 51 |     """
 52 |     def __init__(self, *args, name="metric_list"):
 53 |         assert all([issubclass(type(x), EvalMetric) for x in args]), \
 54 |             "MetricList input is illegal: {}".format(args)
 55 |         self.metrics = [metric for metric in args]
 56 |         super(MetricList, self).__init__(name=name)
 57 | 
 58 |     def update(self, preds, labels, losses=None):
 59 |         preds = [preds] if type(preds) is not list else preds
 60 |         labels = [labels] if type(labels) is not list else labels
 61 |         losses = [losses] if type(losses) is not list else losses
 62 |         i = 0
 63 |         for metric in self.metrics:
 64 |             if type(metric) is type(Loss()):
 65 |                 #handle multiple losses
 66 |                 metric.update(preds, labels, [losses[i]])
 67 |                 i = i + 1
 68 |             else:
 69 |                 metric.update(preds, labels, losses)
 70 | 
 71 |     def reset(self):
 72 |         if hasattr(self, 'metrics'):
 73 |             for metric in self.metrics:
 74 |                 metric.reset()
 75 |         else:
 76 |             logging.warning("No metric defined.")
 77 | 
 78 |     def get(self):
 79 |         ouputs = []
 80 |         for metric in self.metrics:
 81 |             ouputs.append(metric.get())
 82 |         return ouputs
 83 | 
 84 |     def get_name_value(self):
 85 |         ouputs = []
 86 |         for metric in self.metrics:
 87 |             ouputs.append(metric.get_name_value())        
 88 |         return ouputs
 89 | 
 90 | 
 91 | ####################
 92 | # COMMON METRICS
 93 | ####################
 94 | 
 95 | class Accuracy(EvalMetric):
 96 |     """Computes accuracy classification score.
 97 |     """
 98 |     def __init__(self, name='accuracy', topk=1):
 99 |         super(Accuracy, self).__init__(name)
100 |         self.topk = topk
101 | 
102 |     def update(self, preds, labels, losses):
103 |         preds = [preds] if type(preds) is not list else preds
104 |         labels = [labels] if type(labels) is not list else labels
105 | 
106 |         self.check_label_shapes(preds, labels)
107 |         for pred, label in zip(preds, labels):
108 |             assert self.topk <= pred.shape[1], \
109 |                 "topk({}) should no larger than the pred dim({})".format(self.topk, pred.shape[1])
110 |             _, pred_topk = pred.topk(self.topk, 1, True, True)
111 | 
112 |             pred_topk = pred_topk.t()
113 |             correct = pred_topk.eq(label.view(1, -1).expand_as(pred_topk))
114 | 
115 |             self.sum_metric += float(correct.view(-1).float().sum(0, keepdim=True).numpy())
116 |             self.num_inst += label.shape[0]
117 | 
118 | 
119 | class Loss(EvalMetric):
120 |     """Dummy metric for directly printing loss.
121 |     """        
122 |     def __init__(self, name='loss'):
123 |         super(Loss, self).__init__(name)
124 | 
125 |     def update(self, preds, labels, losses):
126 |         assert losses is not None, "Loss undefined."
127 |         for loss in losses:
128 |             self.sum_metric += float(loss.numpy().sum())
129 |             #print(loss,loss.shape)
130 |             if loss.shape == ():
131 |                 self.num_inst += 1
132 |             else:
133 |                 self.num_inst += loss.shape[0]
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     import torch
138 | 
139 |     # Test Accuracy
140 |     predicts = [torch.from_numpy(np.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]))]
141 |     labels   = [torch.from_numpy(np.array([   0,            1,          1 ]))]
142 |     losses   = [torch.from_numpy(np.array([   0.3,       0.4,       0.5   ])),torch.from_numpy(np.array([   0.,       0.4,       0.5   ]))]
143 | 
144 |     logging.getLogger().setLevel(logging.DEBUG)
145 |     logging.debug("input pred:  {}".format(predicts))
146 |     logging.debug("input label: {}".format(labels))
147 |     logging.debug("input loss: {}".format(losses))
148 | 
149 |     acc = Accuracy()
150 | 
151 |     acc.update(preds=predicts, labels=labels, losses=losses)
152 | 
153 |     logging.info(acc.get())
154 | 
155 |     # Test MetricList
156 |     metrics = MetricList(Loss(name="ce-loss"),
157 |                          Loss(name="mse"),
158 |                          Accuracy(topk=1, name="acc-top1"), 
159 |                          Accuracy(topk=2, name="acc-top2"), 
160 |                          )
161 |     metrics.update(preds=predicts, labels=labels, losses=losses)
162 | 
163 |     logging.info("------------")
164 |     logging.info(metrics.get())
165 |     acc.get_name_value()
166 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train_hmdb51.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import os
  9 | import json
 10 | import socket
 11 | import logging
 12 | import argparse
 13 | 
 14 | import torch
 15 | import torch.nn.parallel
 16 | import torch.distributed as dist
 17 | 
 18 | import dataset
 19 | from train_model import train_model
 20 | from network.symbol_builder import get_symbol
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser(description="DMC-Net Parser")
 24 | # debug
 25 | parser.add_argument('--debug-mode', type=bool, default=True,
 26 |                     help="print all setting for debugging.")
 27 | # io
 28 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101', 'HMDB51'],
 29 |                     help="path to dataset")
 30 | parser.add_argument('--split', type = int, default=1, 
 31 |                     help="which split to train on")
 32 | parser.add_argument('--clip-length',type=int, default=16,
 33 |                     help="define the length of each input sample.")
 34 | parser.add_argument('--train-frame-interval', type=int, default=2,
 35 |                     help="define the sampling interval between frames.")
 36 | parser.add_argument('--val-frame-interval', type=int, default=2,
 37 |                     help="define the sampling interval between frames.")
 38 | parser.add_argument('--task-name', type=str, default='',
 39 |                     help="name of current task, leave it empty for using folder name")
 40 | parser.add_argument('--model-dir', type=str, default="./exps/models",
 41 |                     help="set logging file.")
 42 | parser.add_argument('--log-file', type=str, default="",
 43 |                     help="set logging file.")
 44 | parser.add_argument('--accumulate', type=int, default=1,
 45 |                     help="accumulate mv and res")
 46 | parser.add_argument('--mv-minmaxnorm', type=int, default=0,
 47 |                     help="minmaxnorm for mv")
 48 | parser.add_argument('--mv-loadimg', type=int, default=0,
 49 |                     help="load img mv")
 50 | parser.add_argument('--detach', type=int, default=0,
 51 |                     help="whether not update i3d")
 52 | parser.add_argument('--ds_factor', type=int, default=16,
 53 |                     help="downsampling the flow by ds_factor")
 54 | # device
 55 | parser.add_argument('--gpus', type=str, default="0,1,2,3,4,5,6,7",
 56 |                     help="define gpu id")
 57 | # algorithm
 58 | parser.add_argument('--network', type=str, default='I3D',
 59 |                     choices=['I3D'],
 60 |                     help="choose the base network")
 61 | parser.add_argument('--arch-estimator', type=str, default = None,
 62 |                     choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'],
 63 |                     help="choose the generator")
 64 | parser.add_argument('--arch-d', type=str, default=None,
 65 |                     help="choose the D")
 66 | # initialization with priority (the next step will overwrite the previous step)
 67 | # - step 1: random initialize
 68 | # - step 2: load the 2D pretrained model if `pretrained_2d' is True
 69 | # - step 3: load the 3D pretrained model if `pretrained_3d' is defined
 70 | # - step 4: resume if `resume_epoch' >= 0
 71 | parser.add_argument('--pretrained_2d', type=bool, default=False,
 72 |                     help="load default 2D pretrained model.")
 73 | parser.add_argument('--pretrained_3d', type=str, 
 74 |                     default='./network/pretrained/MFNet3D_Kinetics-400_72.8.pth',
 75 |                     help="load default 3D pretrained model.")
 76 | parser.add_argument('--new_classifier', type=bool, default=False,
 77 |                     help="whether use mode_flow to initialize classifier weights")
 78 | parser.add_argument('--resume-epoch', type=int, default=-1,
 79 |                     help="resume train")
 80 | # flow+mp4 is the modality we used for generating DMC
 81 | parser.add_argument('--modality', type=str, default='rgb',
 82 |                     choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'],
 83 |                     help="choose input type")
 84 | parser.add_argument('--drop-out', type=float, default=0.5,
 85 |                     help="drop-out probability")
 86 | parser.add_argument('--adv', type=float, default=0.,
 87 |                     help="weight for adversirial loss")
 88 | # optimization
 89 | parser.add_argument('--epoch-thre', type=int, default=1,
 90 |                     help="the epoch classifier begins to be optimized when with gen")
 91 | parser.add_argument('--optimizer', type=str, default='sgd',
 92 |                     choices=['sgd', 'adam'],
 93 |                     help="optimizer")
 94 | parser.add_argument('--fine_tune', type=int, default=1,
 95 |                     help="apply different learning rate for different layers")
 96 | parser.add_argument('--batch-size', type=int, default=32,
 97 |                     help="batch size")
 98 | parser.add_argument('--iter-size', type=int, default=1,
 99 |                     help="iteration size which is for accumalation of gradients")
100 | parser.add_argument('--lr-base', type=float, default=0.005,
101 |                     help="learning rate")
102 | parser.add_argument('--lr-base2', type=float, default=0.002,
103 |                     help="learning rate for stage 2")
104 | parser.add_argument('--lr-d', type=float, default=None,
105 |                     help="learning rate for discriminator")
106 | parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [3.5, 6, 8.5, 11, 13.5, 16]],
107 |                     help="number of samples to pass before changing learning rate") # 1e6 million
108 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [4.5, 7, 9.5, 12, 14.5, 17]],
109 | #                    help="number of samples to pass before changing learning rate") # 1e6 million
110 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [10, 20, 30, 40, 50, 60]],
111 | #                    help="number of samples to pass before changing learning rate") # 1e6 million
112 | parser.add_argument('--lr-factor', type=float, default=0.1,
113 |                     help="reduce the learning with factor")
114 | parser.add_argument('--save-frequency', type=float, default=1,
115 |                     help="save once after N epochs")
116 | parser.add_argument('--end-epoch', type=int, default=50,
117 |                     help="maxmium number of training epoch")
118 | parser.add_argument('--random-seed', type=int, default=1,
119 |                     help='random seed (default: 1)')
120 | 
121 | def autofill(args):
122 |     # customized
123 |     if not args.task_name:
124 |         args.task_name = os.path.basename(os.getcwd())
125 |     if not args.log_file:
126 |         if os.path.exists("./exps/logs"):
127 |             args.log_file = "./exps/logs/{}_at-{}.log".format(args.task_name, socket.gethostname())
128 |         else:
129 |             args.log_file = ".{}_at-{}.log".format(args.task_name, socket.gethostname())
130 |     # fixed
131 |     args.model_prefix = os.path.join(args.model_dir, args.task_name)
132 |     args.score_dir = './exps/score' + '/{}_{}/'.format(args.dataset, args.split) + args.task_name
133 |     return args
134 | 
135 | def set_logger(log_file='', debug_mode=False):
136 |     if log_file:
137 |         if not os.path.exists("./"+os.path.dirname(log_file)):
138 |             os.makedirs("./"+os.path.dirname(log_file))
139 |         handlers = [logging.FileHandler(log_file), logging.StreamHandler()]
140 |     else:
141 |         handlers = [logging.StreamHandler()]
142 | 
143 |     """ add '%(filename)s:%(lineno)d %(levelname)s:' to format show source file """
144 |     logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO,
145 |                 format='%(asctime)s: %(message)s',
146 |                 datefmt='%Y-%m-%d %H:%M:%S',
147 |                 handlers = handlers)
148 | 
149 | if __name__ == "__main__":
150 | 
151 |     # set args
152 |     args = parser.parse_args()
153 |     args = autofill(args)
154 | 
155 |     set_logger(log_file=args.log_file, debug_mode=args.debug_mode)
156 |     logging.info("Using pytorch {} ({})".format(torch.__version__, torch.__path__))
157 |     logging.info("Start training with args:\n" +
158 |                  json.dumps(vars(args), indent=4, sort_keys=True))
159 | 
160 |     # set device states
161 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus # before using torch
162 |     assert torch.cuda.is_available(), "CUDA is not available"
163 |     torch.manual_seed(args.random_seed)
164 |     torch.cuda.manual_seed(args.random_seed)
165 | 
166 | 
167 |     # load dataset related configuration
168 |     dataset_cfg = dataset.get_config(name=args.dataset)
169 | 
170 |     # creat model with all parameters initialized
171 |     net, input_conf = get_symbol(name=args.network,
172 |                      pretrained=args.pretrained_2d if args.resume_epoch < 0 else None,
173 |                      modality = args.modality,
174 |                      drop_out = args.drop_out,
175 |                      arch_estimator = args.arch_estimator,
176 |                      arch_d = args.arch_d,
177 |                      print_net = False,
178 |                      **dataset_cfg)
179 | 
180 |     # training
181 |     kwargs = {}
182 |     kwargs.update(dataset_cfg)
183 |     kwargs.update({'input_conf': input_conf})
184 |     kwargs.update(vars(args))
185 |     train_model(args.network, sym_net=net, optim = args.optimizer, **kwargs)
186 | 


--------------------------------------------------------------------------------
/code/dmcnet_I3D/train_ucf101.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | This source code is licensed under the MIT license found in the
  5 | LICENSE file in the root directory of this source tree.
  6 | """
  7 | 
  8 | import os
  9 | import json
 10 | import socket
 11 | import logging
 12 | import argparse
 13 | 
 14 | import torch
 15 | import torch.nn.parallel
 16 | import torch.distributed as dist
 17 | 
 18 | import dataset
 19 | from train_model import train_model
 20 | from network.symbol_builder import get_symbol
 21 | 
 22 | parser = argparse.ArgumentParser(description="DMC-Net Parser")
 23 | # debug
 24 | parser.add_argument('--debug-mode', type=bool, default=True,
 25 |                     help="print all setting for debugging.")
 26 | # io
 27 | parser.add_argument('--dataset', default='UCF101', choices=['UCF101', 'HMDB51'],
 28 |                     help="path to dataset")
 29 | parser.add_argument('--split', type = int, default=1, 
 30 |                     help="which split to train on")
 31 | parser.add_argument('--clip-length',type=int, default=16,
 32 |                     help="define the length of each input sample.")
 33 | parser.add_argument('--train-frame-interval', type=int, default=2,
 34 |                     help="define the sampling interval between frames.")
 35 | parser.add_argument('--val-frame-interval', type=int, default=2,
 36 |                     help="define the sampling interval between frames.")
 37 | parser.add_argument('--task-name', type=str, default='',
 38 |                     help="name of current task, leave it empty for using folder name")
 39 | parser.add_argument('--model-dir', type=str, default="./exps/models",
 40 |                     help="set logging file.")
 41 | parser.add_argument('--log-file', type=str, default="",
 42 |                     help="set logging file.")
 43 | parser.add_argument('--accumulate', type=int, default=1,
 44 |                     help="accumulate mv and res")
 45 | parser.add_argument('--mv-minmaxnorm', type=int, default=0,
 46 |                     help="minmaxnorm for mv")
 47 | parser.add_argument('--mv-loadimg', type=int, default=0,
 48 |                     help="load img mv")
 49 | parser.add_argument('--detach', type=int, default=0,
 50 |                     help="whether not update i3d")
 51 | parser.add_argument('--ds_factor', type=int, default=16,
 52 |                     help="downsampling the flow by ds_factor")
 53 | # device
 54 | parser.add_argument('--gpus', type=str, default="0,1,2,3,4,5,6,7",
 55 |                     help="define gpu id")
 56 | # algorithm
 57 | parser.add_argument('--network', type=str, default='I3D',
 58 |                     choices=['I3D'],
 59 |                     help="chose the base network")
 60 | parser.add_argument('--arch-estimator', type=str, default=None,
 61 |                     choices=['DenseNet','DenseNetSmall', 'DenseNetTiny'],
 62 |                     help="choose the generator")
 63 | parser.add_argument('--arch-d', type=str, default=None,
 64 |                     help="choose the D")
 65 | # initialization with priority (the next step will overwrite the previous step)
 66 | # - step 1: random initialize
 67 | # - step 2: load the 2D pretrained model if `pretrained_2d' is True
 68 | # - step 3: load the 3D pretrained model if `pretrained_3d' is defined
 69 | # - step 4: resume if `resume_epoch' >= 0
 70 | parser.add_argument('--pretrained_2d', type=bool, default=False,
 71 |                     help="load default 2D pretrained model.")
 72 | parser.add_argument('--pretrained_3d', type=str, 
 73 |                     default='./network/pretrained/MFNet3D_Kinetics-400_72.8.pth',
 74 |                     help="load default 3D pretrained model.")
 75 | parser.add_argument('--new_classifier', type=bool, default=False,
 76 |                     help="whether use mode_flow to initialize classifier weights")
 77 | parser.add_argument('--resume-epoch', type=int, default=-1,
 78 |                     help="resume train")
 79 | # flow+mp4 is the modality we used for generating DMC
 80 | parser.add_argument('--modality', type=str, default='rgb',
 81 |                     choices=['rgb', 'flow', 'mv', 'res', 'flow+mp4', 'I'],
 82 |                     help="chose input type")
 83 | parser.add_argument('--drop-out', type=float, default=0.5,
 84 |                     help="drop-out probability")
 85 | parser.add_argument('--adv', type=float, default=0.,
 86 |                     help="weight for adversirial loss")
 87 | # optimization
 88 | parser.add_argument('--epoch-thre', type=int, default=1,
 89 |                     help="the epoch classifier begins to be optimized when with gen")
 90 | parser.add_argument('--optimizer', type=str, default='sgd',
 91 |                     choices=['sgd', 'adam'],
 92 |                     help="optimizer")
 93 | parser.add_argument('--fine_tune', type=int, default=1,
 94 |                     help="apply different learning rate for different layers")
 95 | parser.add_argument('--batch-size', type=int, default=32,
 96 |                     help="batch size")
 97 | parser.add_argument('--iter-size', type=int, default=1,
 98 |                     help="iteration size which is for accumalation of gradients")
 99 | parser.add_argument('--lr-base', type=float, default=0.005,
100 |                     help="learning rate")
101 | parser.add_argument('--lr-base2', type=float, default=0.001,
102 |                     help="learning rate for stage 2")
103 | parser.add_argument('--lr-d', type=float, default=None,
104 |                     help="learning rate for discriminator")
105 | parser.add_argument('--lr-steps', type=list, default=[int(1e5*x) for x in [0.5, 0.7, 1.0, 4, 5]],
106 |                     help="number of samples to pass before changing learning rate") # 1e6 million
107 | #parser.add_argument('--lr-steps', type=list, default=[int(1e4*x) for x in [12, 18, 24, 30, 40, 60]],
108 | #                    help="number of samples to pass before changing learning rate") # 1e6 million
109 | parser.add_argument('--lr-factor', type=float, default=0.1,
110 |                     help="reduce the learning with factor")
111 | parser.add_argument('--save-frequency', type=float, default=1,
112 |                     help="save once after N epochs")
113 | parser.add_argument('--end-epoch', type=int, default=50,
114 |                     help="maxmium number of training epoch")
115 | parser.add_argument('--random-seed', type=int, default=1,
116 |                     help='random seed (default: 1)')
117 | 
118 | 
119 | def autofill(args):
120 |     # customized
121 |     if not args.task_name:
122 |         args.task_name = os.path.basename(os.getcwd())
123 |     if not args.log_file:
124 |         if os.path.exists("./exps/logs"):
125 |             args.log_file = "./exps/logs/{}_at-{}.log".format(args.task_name, socket.gethostname())
126 |         else:
127 |             args.log_file = ".{}_at-{}.log".format(args.task_name, socket.gethostname())
128 |     # fixed
129 |     args.model_prefix = os.path.join(args.model_dir, args.task_name)
130 |     args.score_dir = './exps/score' + '/{}_{}/'.format(args.dataset, args.split) + args.task_name
131 |     return args
132 | 
133 | def set_logger(log_file='', debug_mode=False):
134 |     if log_file:
135 |         if not os.path.exists("./"+os.path.dirname(log_file)):
136 |             os.makedirs("./"+os.path.dirname(log_file))
137 |         handlers = [logging.FileHandler(log_file), logging.StreamHandler()]
138 |     else:
139 |         handlers = [logging.StreamHandler()]
140 | 
141 |     """ add '%(filename)s:%(lineno)d %(levelname)s:' to format show source file """
142 |     logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO,
143 |                 format='%(asctime)s: %(message)s',
144 |                 datefmt='%Y-%m-%d %H:%M:%S',
145 |                 handlers = handlers)
146 | 
147 | if __name__ == "__main__":
148 | 
149 |     # set args
150 |     args = parser.parse_args()
151 |     args = autofill(args)
152 | 
153 |     set_logger(log_file=args.log_file, debug_mode=args.debug_mode)
154 |     logging.info("Using pytorch {} ({})".format(torch.__version__, torch.__path__))
155 |     logging.info("Start training with args:\n" +
156 |                  json.dumps(vars(args), indent=4, sort_keys=True))
157 | 
158 |     # set device states
159 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus # before using torch
160 |     assert torch.cuda.is_available(), "CUDA is not available"
161 |     torch.manual_seed(args.random_seed)
162 |     torch.cuda.manual_seed(args.random_seed)
163 | 
164 | 
165 | 
166 |     # load dataset related configuration
167 |     dataset_cfg = dataset.get_config(name=args.dataset)
168 | 
169 |     # creat model with all parameters initialized
170 |     net, input_conf = get_symbol(name=args.network,
171 |                      pretrained=args.pretrained_2d if args.resume_epoch < 0 else None,
172 |                      modality = args.modality,
173 |                      drop_out = args.drop_out,
174 |                      arch_estimator = args.arch_estimator,
175 |                      arch_d = args.arch_d,
176 |                      print_net= False,
177 |                      **dataset_cfg)
178 | 
179 |     # training
180 |     kwargs = {}
181 |     kwargs.update(dataset_cfg)
182 |     kwargs.update({'input_conf': input_conf})
183 |     kwargs.update(vars(args))
184 |     train_model(args.network, sym_net=net, optim = args.optimizer, **kwargs)
185 | 


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split1/combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | 
 6 | # ./exp/hmdb51_coviar/flow/split1/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split1/acc.log
 7 | 
 8 | expdir=hmdb51_coviar
 9 | expname=flow/split1
10 | 
11 | python combine.py --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \
12 |   --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz
13 | 


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split1/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split1/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split2/combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # ./exp/hmdb51_coviar/flow/split2/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split2/acc.log
 6 | 
 7 | expdir=hmdb51_coviar
 8 | expname=flow/split2
 9 | 
10 | python combine.py --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \
11 |   --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz
12 | 


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split2/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split2/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split3/combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # ./exp/hmdb51_coviar/flow/split3/combine.sh 2>&1 | tee ./exp/hmdb51_coviar/flow/split3/acc.log
 6 | 
 7 | expdir=hmdb51_coviar
 8 | expname=flow/split3
 9 | 
10 | python combine.py --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \
11 |   --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz --flow exp/${expdir}/${expname}/flow_score_model_best.npz
12 | 


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/flow/split3/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/flow/split3/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/mv/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/mv/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/mv/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/mv/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/residual/split1/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split1/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/residual/split2/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split2/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_coviar/residual/split3/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_coviar/residual/split3/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split1/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gan/split1
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gan/split1/run.sh; ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name hmdb51 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
26 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
27 |  	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_train_rename.txt \
28 |  	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \
29 | 	--weights exp/hmdb51_gen_flow/split1/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name hmdb51 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
49 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split1/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gan/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split1/acc.log
 8 | 
 9 | expdir=hmdb51_gan/split1
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split2/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gan/split2
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gan/split2/run.sh; ./exp/hmdb51_gan/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split2/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name hmdb51 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
26 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
27 |  	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_train_rename.txt \
28 |  	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \
29 | 	--weights exp/hmdb51_gen_flow/split2/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name hmdb51 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
49 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split2/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gan/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split2/acc.log
 8 | 
 9 | expdir=hmdb51_gan/split2
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gan/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split3/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gan/split3
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gan/split3/run.sh; ./exp/hmdb51_gan/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split3/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name hmdb51 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
26 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
27 |  	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_train_rename.txt \
28 |  	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \
29 | 	--weights exp/hmdb51_gen_flow/split3/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name hmdb51 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
49 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gan/split3/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gan/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gan/split3/acc.log
 8 | 
 9 | expdir=hmdb51_gan/split3
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split1/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gen_flow/split1
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gen_flow/split1/run.sh; ./exp/hmdb51_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split1/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name hmdb51 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
20 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_train_rename.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \
23 | 	--weights ./exp/hmdb51_coviar/flow/split1/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name hmdb51 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
45 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split1_test_rename.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split1/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split1/acc.log
 8 | 
 9 | expdir=hmdb51_gen_flow/split1
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split1/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split1/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split1/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split2/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gen_flow/split2
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gen_flow/split2/run.sh; ./exp/hmdb51_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split2/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name hmdb51 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
20 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_train_rename.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \
23 | 	--weights ./exp/hmdb51_coviar/flow/split2/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name hmdb51 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
45 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split2_test_rename.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split2/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split2/acc.log
 8 | 
 9 | expdir=hmdb51_gen_flow/split2
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split2/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split2/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split2/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/hmdb51_gen_flow/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split3/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=hmdb51_gen_flow/split3
 8 | representation=mv
 9 | 
10 | # exp/hmdb51_gen_flow/split3/run.sh; ./exp/hmdb51_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split3/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name hmdb51 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
20 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_train_rename.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \
23 | 	--weights ./exp/hmdb51_coviar/flow/split3/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name hmdb51 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/HMDB51/fb/videos_mpeg4 \
45 |  	--flow-root /projects/eventnet/dataset/HMDB51/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/hmdb51_split3_test_rename.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/hmdb51_gen_flow/split3/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/hmdb51_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/hmdb51_gen_flow/split3/acc.log
 8 | 
 9 | expdir=hmdb51_gen_flow/split3
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/hmdb51_coviar/iframe/split3/iframe_score_model_best.npz \
14 |   --res exp/hmdb51_coviar/residual/split3/residual_score_model_best.npz \
15 |   --mv exp/hmdb51_coviar/mv/split3/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_flow/split1/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split1/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_flow/split2/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split2/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_flow/split3/flow_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_flow/split3/flow_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split1/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gan/split1
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gan/split1/run.sh; ./exp/ucf101_gan/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split1/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name ucf101 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
26 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
27 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_train.txt \
28 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \
29 | 	--weights exp/ucf101_gen_flow/split1/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name ucf101 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
49 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split1/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gan/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split1/acc.log
 8 | 
 9 | expdir=ucf101_gan/split1
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split2/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gan/split2
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gan/split2/run.sh; ./exp/ucf101_gan/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split2/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name ucf101 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
26 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
27 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_train.txt \
28 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \
29 | 	--weights exp/ucf101_gen_flow/split2/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name ucf101 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
49 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split2/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gan/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split2/acc.log
 8 | 
 9 | expdir=ucf101_gan/split2
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gan/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split3/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gan/split3
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gan/split3/run.sh; ./exp/ucf101_gan/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split3/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--lr-adv-g 1 \
15 | 	--lr-adv-d 0.01 \
16 | 	--lr-mse 10 \
17 | 	--lr_mse_mult 1 \
18 | 	--lr_d_mult 1 \
19 | 	--batch-size 30 \
20 | 	--arch resnet18 \
21 | 	--arch_estimator DenseNetTiny \
22 | 	--arch_d Discriminator3 \
23 | 	--data-name ucf101 \
24 | 	--representation ${representation} \
25 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
26 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
27 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_train.txt \
28 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \
29 | 	--weights exp/ucf101_gen_flow/split3/_mv_model_best.pth.tar \
30 | 	--model-prefix exp/${expdir}/ \
31 | 	--lr-steps 20 35 45 \
32 | 	--use_databn 0 \
33 | 	--epochs 50 \
34 | 	--epoch-thre 0 \
35 | 	--flow_ds_factor 0 \
36 | 	--gen_flow_or_delta 1 \
37 | 	--mv_minmaxnorm 1 \
38 | 	--no-accumulation \
39 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
40 | 
41 | python test.py \
42 | 	--arch resnet18 \
43 | 	--arch_estimator DenseNetTiny \
44 | 	--data-name ucf101 \
45 | 	--representation mv \
46 | 	--test-crops 1 \
47 | 	--test_segments 25 \
48 |  	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
49 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
50 |   	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \
51 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
52 | 	--use_databn 0 \
53 | 	--flow_ds_factor 0 \
54 | 	--gen_flow_or_delta 1 \
55 | 	--mv_minmaxnorm 1 \
56 | 	--no-accumulation \
57 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
58 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gan/split3/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gan/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gan/split3/acc.log
 8 | 
 9 | expdir=ucf101_gan/split3
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split1/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split1/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split1/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gen_flow/split1
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gen_flow/split1/run.sh; ./exp/ucf101_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split1/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name ucf101 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
20 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_train.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \
23 | 	--weights ./exp/ucf101_coviar/ucf101_flow/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name ucf101 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
45 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split1_test.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split1/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gen_flow/split1/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split1/acc.log
 8 | 
 9 | expdir=ucf101_gen_flow/split1
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split1/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split1/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split1/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split2/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split2/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split2/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gen_flow/split2
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gen_flow/split2/run.sh; ./exp/ucf101_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split2/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name ucf101 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
20 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_train.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \
23 | 	--weights ./exp/ucf101_coviar/ucf101_flow/split2/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name ucf101 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
45 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split2_test.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split2/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gen_flow/split2/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split2/acc.log
 8 | 
 9 | expdir=ucf101_gen_flow/split2
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split2/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split2/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split2/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz \
17 |   --wf 0.25


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split3/mv_score_model_best.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/dmc-net/323d1600f09204a8f28e7bbabd71626405f6ac0d/exp_my/ucf101_gen_flow/split3/mv_score_model_best.npz


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split3/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | expdir=ucf101_gen_flow/split3
 8 | representation=mv
 9 | 
10 | # exp/ucf101_gen_flow/split3/run.sh; ./exp/ucf101_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split3/acc.log
11 | 
12 | python train.py \
13 | 	--lr 0.01 \
14 | 	--batch-size 45 \
15 | 	--arch resnet18 \
16 | 	--arch_estimator DenseNetTiny \
17 | 	--data-name ucf101 \
18 | 	--representation ${representation} \
19 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
20 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
21 | 	--train-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_train.txt \
22 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \
23 | 	--weights ./exp/ucf101_coviar/ucf101_flow/split3/_flow_model_best.pth.tar \
24 | 	--model-prefix exp/${expdir}/ \
25 | 	--lr-steps 20 35 45 \
26 | 	--lr-mse 10 \
27 | 	--lr_mse_mult 1 \
28 | 	--use_databn 0 \
29 | 	--epochs 50 \
30 | 	--epoch-thre 1 \
31 | 	--flow_ds_factor 16 \
32 | 	--gen_flow_or_delta 1 \
33 | 	--no-accumulation \
34 | 	--mv_minmaxnorm 1 \
35 | 	--gpus 0 2>&1 | tee exp/${expdir}/train.log
36 | 
37 | python test.py \
38 | 	--arch resnet18 \
39 | 	--arch_estimator DenseNetTiny \
40 | 	--data-name ucf101 \
41 | 	--representation mv \
42 | 	--test-crops 1 \
43 | 	--test_segments 25 \
44 | 	--data-root /projects/eventnet/dataset/UCF101/fb/mpeg4_videos \
45 |  	--flow-root /projects/eventnet/dataset/UCF101/fb/TSN_input \
46 | 	--test-list /projects/LSDE/work03/FB/data_preprocess/datalists/ucf101_split3_test.txt \
47 | 	--weights exp/${expdir}/_${representation}_model_best.pth.tar \
48 | 	--use_databn 0 \
49 | 	--flow_ds_factor 16 \
50 | 	--gen_flow_or_delta 1 \
51 | 	--no-accumulation \
52 | 	--mv_minmaxnorm 1 \
53 | 	--save-scores exp/${expdir}/${representation}_score_model_best \
54 | 	--gpus 0 2>&1 | tee exp/${expdir}/test.log


--------------------------------------------------------------------------------
/exp_my/ucf101_gen_flow/split3/run_combine.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | #! /bin/bash
 6 | 
 7 | # ./exp/ucf101_gen_flow/split3/run_combine.sh 2>&1 | tee ./exp/ucf101_gen_flow/split3/acc.log
 8 | 
 9 | expdir=ucf101_gen_flow/split3
10 | representation=mv
11 | 
12 | python combine.py \
13 |   --iframe exp/ucf101_coviar/ucf101_iframe/split3/iframe_score_model_best.npz \
14 |   --res exp/ucf101_coviar/ucf101_residual/split3/residual_score_model_best.npz \
15 |   --mv exp/ucf101_coviar/ucf101_mv/split3/mv_score_model_best.npz \
16 |   --flow exp/${expdir}/mv_score_model_best.npz \
17 |   --wf 0.25


--------------------------------------------------------------------------------