├── pt_loader ├── __init__.py ├── config.py └── opn_datasets.py ├── tf_model ├── __init__.py ├── model │ ├── __init__.py │ ├── memory_bank.py │ ├── cluster_km.py │ ├── dataset_utils.py │ ├── rot_model.py │ ├── preprocessing.py │ ├── trn_model.py │ ├── opn_model.py │ ├── self_loss.py │ └── resnet_th_preprocessing.py ├── saved_settings │ ├── __init__.py │ ├── basics.py │ ├── vd_vanilla3D.py │ ├── vd_binned_slow.py │ ├── vd_prep.py │ ├── vd_binned_single_frame.py │ ├── vd_rot.py │ ├── vd_3dresnet_fx.py │ ├── vd_binned_single_frame_fx.py │ ├── vd_fast.py │ ├── vd_slow_fx.py │ ├── vd_opn.py │ ├── vd_finetune_HMDB.py │ ├── vd_slowfast_fx.py │ ├── vd_trn.py │ ├── vd_single_frame_fx.py │ ├── vd_test_kinetics.py │ ├── vd_super_fx.py │ ├── vd_finetune_UCF_fx.py │ ├── vd_finetune_HMDB_fx.py │ └── vd_transfer_IN.py ├── run_finetune.sh ├── run_transfer_IN.sh ├── utils.py ├── run_transfer_KN.sh ├── run_finetune_HMDB.sh ├── run_finetune_UCF.sh ├── run_training.sh ├── generate_resps_from_ckpt.py ├── rot_data.py ├── opn_data.py ├── README.md ├── train_rot.py ├── config.py ├── train_opn.py ├── load_param_dict.pkl ├── train_transfer_KN.py ├── framework.py └── data.py ├── README.md ├── .gitignore ├── misc └── combine_ckpts.py └── notebook ├── tfutils_reader.py └── jupyter_utils.py /pt_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tf_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tf_model/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tf_model/saved_settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tf_model/run_finetune.sh: -------------------------------------------------------------------------------- 1 | python train_transfer_KN.py --setting vd_finetune_UCF_fx.vd_slowfast_single_a4_sc --gpu 0,1,5,8 2 | -------------------------------------------------------------------------------- /tf_model/saved_settings/basics.py: -------------------------------------------------------------------------------- 1 | def bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['test_no_frames'] = 5 5 | args['kNN_val'] = 10 6 | args['fre_filter'] = 50000 7 | args['fre_cache_filter'] = 5000 8 | args['fre_valid'] = 5000 9 | return args 10 | 11 | 12 | def basic_fix(args): 13 | args['port'] = 27006 14 | args['db_name'] = 'vd_unsup_fx' 15 | args['col_name'] = 'dyn_clstr' 16 | return args 17 | -------------------------------------------------------------------------------- /tf_model/run_transfer_IN.sh: -------------------------------------------------------------------------------- 1 | # Directory hosting ImageNet tfrecords 2 | image_dir=/mnt/fs1/Dataset/TFRecord_Imagenet_standard/image_label_full_widx 3 | # Directory to host your saved models and logs 4 | cache_dir=/mnt/fs4/chengxuz/video_pub_cache 5 | # Your gpu number 6 | gpu=0 7 | 8 | main_setting=vd_transfer_IN.vd_3dresnet_trans_all 9 | python train_transfer.py \ 10 | --setting ${main_setting} \ 11 | --cache_dir ${cache_dir} \ 12 | --gpu ${gpu} \ 13 | --image_dir ${image_dir} "$@" 14 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_vanilla3D.py: -------------------------------------------------------------------------------- 1 | from basics import basic 2 | from vd_single_frame import vd_basic_color 3 | from vd_fast import fast_bs 4 | 5 | 6 | def vd_vanilla3D_IR(): 7 | args = {} 8 | 9 | args = basic(args) 10 | args = fast_bs(args) 11 | args = vd_basic_color(args) 12 | 13 | args['exp_id'] = 'vd_vanilla3D_IR' 14 | args['model_type'] = 'vanilla3D' 15 | args['task'] = 'IR' 16 | args['train_num_workers'] = 20 17 | args['val_num_workers'] = 10 18 | return args 19 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_binned_slow.py: -------------------------------------------------------------------------------- 1 | from basics import basic 2 | from vd_single_frame import vd_basic_color 3 | from vd_slow import slow_bs 4 | 5 | 6 | def vd_bin_slow_basic(args): 7 | args = vd_basic_color(args) 8 | args['data_len'] = 1145906 9 | args['bin_interval'] = 42 10 | return args 11 | 12 | 13 | def vd_bin_slow_IR(): 14 | args = {} 15 | 16 | args = basic(args) 17 | args = slow_bs(args) 18 | args = vd_bin_slow_basic(args) 19 | 20 | args['exp_id'] = 'vd_bin_slow_IR' 21 | args['model_type'] = 'slow' 22 | args['task'] = 'IR' 23 | return args 24 | -------------------------------------------------------------------------------- /tf_model/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys, datetime 3 | import numpy as np 4 | import tensorflow as tf 5 | import copy 6 | import pdb 7 | from model.self_loss import DATA_LEN_IMAGENET_FULL, assert_shape 8 | DATA_LEN_KINETICS_400 = 239888 9 | VAL_DATA_LEN_KINETICS_400 = 19653 10 | 11 | 12 | def online_keep_all(agg_res, res, step): 13 | if agg_res is None: 14 | agg_res = {k: [] for k in res} 15 | for k, v in res.items(): 16 | agg_res[k].append(v) 17 | return agg_res 18 | 19 | 20 | def tuple_get_one(x): 21 | if isinstance(x, tuple) or isinstance(x, list): 22 | return x[0] 23 | return x 24 | -------------------------------------------------------------------------------- /tf_model/run_transfer_KN.sh: -------------------------------------------------------------------------------- 1 | # Directory hosting your extracted frames 2 | image_dir=/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted 3 | # Directory to host your saved models and logs 4 | cache_dir=/mnt/fs4/chengxuz/video_pub_cache 5 | # If you are using metas provided by us, this should be /repo_dir/build_data/kinetics 6 | meta_dir=/home/chengxuz/video_unsup/build_data/kinetics 7 | # Your gpu number 8 | gpu=0 9 | 10 | main_setting=vd_transfer_KN.vd_3dresnet_trans_all 11 | python train_transfer_KN.py \ 12 | --setting ${main_setting} \ 13 | --cache_dir ${cache_dir} \ 14 | --gpu ${gpu} \ 15 | --metafile_root ${meta_dir} \ 16 | --image_dir ${image_dir} \ 17 | --val_image_dir ${image_dir} "$@" 18 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_prep.py: -------------------------------------------------------------------------------- 1 | from basics import basic, bs128 2 | from vd_single_frame import vd_basic 3 | 4 | 5 | def vd_ctl_IR_rdsz(): 6 | args = {} 7 | 8 | args = basic(args) 9 | args = bs128(args) 10 | args = vd_basic(args) 11 | 12 | args['exp_id'] = 'vd_ctl_IR_rdsz' 13 | args['task'] = 'IR' 14 | args['train_prep'] = 'RandomSized' 15 | return args 16 | 17 | 18 | def vd_ctl_IR_color(): 19 | args = {} 20 | 21 | args = basic(args) 22 | args = bs128(args) 23 | args = vd_basic(args) 24 | 25 | args['exp_id'] = 'vd_ctl_IR_color' 26 | args['task'] = 'IR' 27 | args['train_prep'] = 'ColorJitter' 28 | args['lr_boundaries'] = '690011,1070011' 29 | return args 30 | -------------------------------------------------------------------------------- /tf_model/run_finetune_HMDB.sh: -------------------------------------------------------------------------------- 1 | # Directory hosting your extracted frames 2 | image_dir=/data5/shetw/HMDB51/extracted_frames 3 | # Directory to host your saved models and logs 4 | cache_dir=/mnt/fs4/chengxuz/video_pub_cache 5 | # If you are using metas provided by us, this should be /repo_dir/build_data/kinetics 6 | meta_dir=/home/chengxuz/video_unsup/build_data/HMDB51 7 | # Your gpu number 8 | gpu=0 9 | 10 | main_setting=vd_finetune_HMDB_fx.vd_3dresnet_sc # Table 2 result 11 | #main_setting=vd_finetune_HMDB_fx.vd_3dresnet_cj # Table 3 result 12 | 13 | python train_transfer_KN.py \ 14 | --setting ${main_setting} \ 15 | --cache_dir ${cache_dir} \ 16 | --gpu ${gpu} \ 17 | --metafile_root ${meta_dir} \ 18 | --image_dir ${image_dir} \ 19 | --val_image_dir ${image_dir} "$@" 20 | -------------------------------------------------------------------------------- /tf_model/run_finetune_UCF.sh: -------------------------------------------------------------------------------- 1 | # Directory hosting your extracted frames 2 | image_dir=/data5/shetw/UCF101/extracted_frames 3 | # Directory to host your saved models and logs 4 | cache_dir=/mnt/fs4/chengxuz/video_pub_cache 5 | # If you are using metas provided by us, this should be /repo_dir/build_data/kinetics 6 | meta_dir=/home/chengxuz/video_unsup/build_data/UCF101 7 | # Your gpu number 8 | gpu=0 9 | 10 | main_setting=vd_finetune_UCF_fx.vd_3dresnet_sc # Table 2 result 11 | #main_setting=vd_finetune_UCF_fx.vd_3dresnet_cj # Table 3 result 12 | 13 | python train_transfer_KN.py \ 14 | --setting ${main_setting} \ 15 | --cache_dir ${cache_dir} \ 16 | --gpu ${gpu} \ 17 | --metafile_root ${meta_dir} \ 18 | --image_dir ${image_dir} \ 19 | --val_image_dir ${image_dir} "$@" 20 | -------------------------------------------------------------------------------- /tf_model/run_training.sh: -------------------------------------------------------------------------------- 1 | # Directory hosting your extracted frames 2 | image_dir=/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted 3 | # Directory to host your saved models and logs 4 | cache_dir=/mnt/fs4/chengxuz/video_pub_cache 5 | # If you are using metas provided by us, this should be /repo_dir/build_data/kinetics 6 | meta_dir=/home/chengxuz/video_unsup/build_data/kinetics 7 | # Your gpu number 8 | gpu=0 9 | 10 | IR_setting=vd_3dresnet_fx.vd_3dresnet_IR 11 | python train_vie.py \ 12 | --setting ${IR_setting} \ 13 | --cache_dir ${cache_dir} \ 14 | --gpu ${gpu} \ 15 | --metafile_root ${meta_dir} \ 16 | --image_dir ${image_dir} \ 17 | --val_image_dir ${image_dir} "$@" 18 | 19 | main_setting=vd_3dresnet_fx.vd_3dresnet 20 | python train_vie.py \ 21 | --setting ${main_setting} \ 22 | --cache_dir ${cache_dir} \ 23 | --gpu ${gpu} \ 24 | --metafile_root ${meta_dir} \ 25 | --image_dir ${image_dir} \ 26 | --val_image_dir ${image_dir} "$@" 27 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_binned_single_frame.py: -------------------------------------------------------------------------------- 1 | from basics import basic, bs128 2 | from vd_single_frame import vd_basic_color 3 | 4 | 5 | def vd_bin_basic(args): 6 | args = vd_basic_color(args) 7 | args['data_len'] = 1157650 8 | args['bin_interval'] = 52 9 | return args 10 | 11 | 12 | def vd_ctl_bin_IR(): 13 | args = {} 14 | 15 | args = basic(args) 16 | args = bs128(args) 17 | args = vd_bin_basic(args) 18 | 19 | args['exp_id'] = 'vd_ctl_bin_IR' 20 | args['task'] = 'IR' 21 | return args 22 | 23 | 24 | def load_from_bin_IR(args): 25 | args['load_exp'] = 'vd_unsup/dyn_clstr/vd_ctl_bin_IR' 26 | args['load_step'] = 50000 27 | return args 28 | 29 | 30 | def vd_ctl_bin(): 31 | args = {} 32 | 33 | args = basic(args) 34 | args = bs128(args) 35 | args = vd_bin_basic(args) 36 | args = load_from_bin_IR(args) 37 | 38 | args['exp_id'] = 'vd_ctl_bin' 39 | args['kmeans_k'] = '10000' 40 | return args 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised Learning from Video with Deep Neural Embeddings 2 | 3 | Please see codes in `build_data` to prepare different datasets, you need to have kinetics at least to run the training. 4 | After that, please see codes in `tf_model` to train the model and evaluate it. 5 | Finally, check `show_results.ipynb` in `notebook` folder to see how the training progress can be checked and compared to our training trajectory. 6 | 7 | ## Pretrained weights for VIE-3DResNet (updated 12/31/2020) 8 | 9 | Weights can be downloaded at [this link](http://visualmaster-models.s3.amazonaws.com/vie/3dresnet_112/checkpoint-1450000.tar). 10 | 11 | ## How to get responses from intermediate layers 12 | 13 | Check function `test_video_model` in script `tf_model/generate_resps_from_ckpt.py`. 14 | The outputs will be stored in a dictionary, with keys like `encode_x` (x is from 1 to 10). 15 | Layer `encode_1` is the output of the first pooling layer. 16 | The other layers are outputs from the following residual blocks (ResNet18 has 9 residual blocks in total). 17 | The output is in shape `(batch_size, channels, temporal_dim, spatial_dim, spatial_dim)`. 18 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_rot.py: -------------------------------------------------------------------------------- 1 | def basic_rot(args): 2 | args['port'] = 27006 3 | args['db_name'] = 'vd_unsup_fx' 4 | args['col_name'] = 'rot' 5 | args['init_lr'] = 0.1 6 | return args 7 | 8 | 9 | def bs128_rot(args): 10 | args['batch_size'] = 32 11 | args['test_batch_size'] = 32 12 | args['fre_filter'] = 50000 13 | args['fre_cache_filter'] = 5000 14 | args['fre_valid'] = 5000 15 | return args 16 | 17 | 18 | def vd_basic_rot(args): 19 | args['image_dir'] = '/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted' 20 | args['val_image_dir'] = args['image_dir'] 21 | return args 22 | 23 | 24 | def rot_3dresnet(): 25 | args = {} 26 | 27 | args = basic_rot(args) 28 | args = bs128_rot(args) 29 | args = vd_basic_rot(args) 30 | args['exp_id'] = 'rot_3dresnet' 31 | return args 32 | 33 | 34 | def rot_3dresnet_re(): 35 | args = {} 36 | 37 | args = basic_rot(args) 38 | args = bs128_rot(args) 39 | args = vd_basic_rot(args) 40 | args['exp_id'] = 'rot_3dresnet_re' 41 | args['rot_real_prep'] = True 42 | args['lr_boundaries'] = '25000,50000,75000' 43 | return args 44 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_3dresnet_fx.py: -------------------------------------------------------------------------------- 1 | from saved_settings.basics import basic_fix 2 | from saved_settings.vd_single_frame_fx import vd_basic 3 | from saved_settings.vd_slow_fx import slow_bs 4 | 5 | 6 | def vd_3dresnet_IR(): 7 | args = {} 8 | 9 | args = basic_fix(args) 10 | args = slow_bs(args) 11 | args = vd_basic(args) 12 | args['train_prep'] = 'ColorJitter_112' 13 | args['port'] = 27007 14 | 15 | args['exp_id'] = 'vd_3dresnet_IR' 16 | args['model_type'] = '3dresnet' 17 | args['task'] = 'IR' 18 | args['train_num_workers'] = 40 19 | args['val_num_workers'] = 20 20 | args['train_num_steps'] = 50000 21 | return args 22 | 23 | 24 | def load_from_3dresnet_IR(args): 25 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_3dresnet_IR' 26 | args['load_step'] = 50000 27 | return args 28 | 29 | 30 | def vd_3dresnet(): 31 | args = {} 32 | 33 | args = basic_fix(args) 34 | args = slow_bs(args) 35 | args = vd_basic(args) 36 | args = load_from_3dresnet_IR(args) 37 | args['train_prep'] = 'ColorJitter_112' 38 | args['port'] = 27007 39 | 40 | args['exp_id'] = 'vd_3dresnet' 41 | args['model_type'] = '3dresnet' 42 | args['lr_boundaries'] = '974946,1304998' 43 | args['train_num_steps'] = 1600000 44 | args['train_num_workers'] = 40 45 | args['val_num_workers'] = 20 46 | return args 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | .idea 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | data/coco 93 | data/pretrained_models 94 | tags 95 | output 96 | 97 | *.swp 98 | *.gz 99 | 100 | # emacs autosave files 101 | *~ 102 | 103 | # VSCode 104 | .vscode/ 105 | 106 | # Running script 107 | # tf_model/run_*.sh 108 | -------------------------------------------------------------------------------- /tf_model/model/memory_bank.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | class MemoryBank(object): 7 | def __init__(self, size, dim, seed=None): 8 | self.size = size 9 | self.dim = dim 10 | self.seed = seed or 0 11 | self._bank = self._create() 12 | 13 | def _create(self): 14 | mb_init = tf.random_uniform( 15 | shape=(self.size, self.dim), 16 | seed=self.seed, 17 | ) 18 | std_dev = 1. / np.sqrt(self.dim/3) 19 | mb_init = mb_init * (2*std_dev) - std_dev 20 | return tf.get_variable( 21 | 'memory_bank', 22 | initializer=mb_init, 23 | dtype=tf.float32, 24 | trainable=False, 25 | ) 26 | 27 | def as_tensor(self): 28 | return self._bank 29 | 30 | def at_idxs(self, idxs): 31 | return tf.gather(self._bank, idxs, axis=0) 32 | 33 | def get_all_dot_products(self, vec): 34 | vec_shape = vec.get_shape().as_list() 35 | # [bs, dim] 36 | assert len(vec_shape) == 2 37 | return tf.matmul(vec, tf.transpose(self._bank, [1, 0])) 38 | 39 | def get_dot_products(self, vec, idxs): 40 | vec_shape = vec.get_shape().as_list() 41 | # [bs, dim] 42 | idxs_shape = idxs.get_shape().as_list() 43 | # [bs, ...] 44 | assert len(vec_shape) == 2 45 | assert vec_shape[0] == idxs_shape[0] 46 | 47 | memory_vecs = tf.gather(self._bank, idxs, axis=0) 48 | memory_vecs_shape = memory_vecs.get_shape().as_list() 49 | # [bs, ..., dim] 50 | assert memory_vecs_shape[:-1] == idxs_shape 51 | 52 | vec_shape[1:1] = [1] * (len(idxs_shape) - 1) 53 | vec = tf.reshape(vec, vec_shape) 54 | # [bs, 1,...,1, dim] 55 | 56 | prods = tf.multiply(memory_vecs, vec) 57 | assert prods.get_shape().as_list() == memory_vecs_shape 58 | return tf.reduce_sum(prods, axis=-1) 59 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_binned_single_frame_fx.py: -------------------------------------------------------------------------------- 1 | from basics import basic_fix, bs128 2 | from vd_single_frame_fx import vd_basic 3 | 4 | 5 | def vd_bin_basic(args): 6 | args = vd_basic(args) 7 | args['data_len'] = 1157650 8 | args['bin_interval'] = 52 9 | return args 10 | 11 | 12 | def vd_ctl_bin_IR(): 13 | args = {} 14 | 15 | args = basic_fix(args) 16 | args = bs128(args) 17 | args = vd_bin_basic(args) 18 | 19 | args['exp_id'] = 'vd_ctl_bin_IR' 20 | args['task'] = 'IR' 21 | args['lr_boundaries'] = '1125011,1535011' 22 | return args 23 | 24 | 25 | def load_from_bin_IR(args): 26 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl_bin_IR' 27 | args['load_step'] = 50000 28 | return args 29 | 30 | 31 | def vd_ctl_bin(): 32 | args = {} 33 | 34 | args = basic_fix(args) 35 | args = bs128(args) 36 | args = vd_bin_basic(args) 37 | args = load_from_bin_IR(args) 38 | 39 | args['exp_id'] = 'vd_ctl_bin' 40 | args['kmeans_k'] = '10000' 41 | args['lr_boundaries'] = '1220011,1505011' 42 | return args 43 | 44 | 45 | def vd_big_bin_basic(args): 46 | args = vd_basic(args) 47 | args['data_len'] = 473532 48 | args['bin_interval'] = 130 49 | return args 50 | 51 | 52 | def vd_ctl_big_bin_IR(): 53 | args = {} 54 | 55 | args = basic_fix(args) 56 | args = bs128(args) 57 | args = vd_big_bin_basic(args) 58 | 59 | args['exp_id'] = 'vd_ctl_big_bin_IR' 60 | args['task'] = 'IR' 61 | args['lr_boundaries'] = '1350011,1960011' 62 | return args 63 | 64 | 65 | def load_from_big_bin_IR(args): 66 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl_big_bin_IR' 67 | args['load_step'] = 50000 68 | return args 69 | 70 | 71 | def vd_ctl_big_bin(): 72 | args = {} 73 | 74 | args = basic_fix(args) 75 | args = bs128(args) 76 | args = vd_big_bin_basic(args) 77 | args = load_from_big_bin_IR(args) 78 | 79 | args['exp_id'] = 'vd_ctl_big_bin' 80 | args['kmeans_k'] = '5000' 81 | args['lr_boundaries'] = '1375011,1600011' 82 | return args 83 | -------------------------------------------------------------------------------- /tf_model/generate_resps_from_ckpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pdb 4 | import tensorflow as tf 5 | from argparse import Namespace 6 | from collections import OrderedDict 7 | import model.instance_model as vd_inst_model 8 | import numpy as np 9 | SETTINGS = { 10 | '3dresnet': { 11 | 'size': 112, 12 | 'num_frames': 16}, 13 | } 14 | 15 | 16 | def get_network_outputs( 17 | input_images, 18 | model_type): 19 | all_outs = vd_inst_model.resnet_embedding( 20 | input_images, 21 | get_all_layers='all_raw', 22 | skip_final_dense=True, 23 | model_type=model_type) 24 | return all_outs 25 | 26 | 27 | def build_graph(model_type, batch_size): 28 | nf = SETTINGS[model_type]['num_frames'] 29 | size = SETTINGS[model_type]['size'] 30 | img_placeholder = tf.placeholder( 31 | dtype=tf.uint8, 32 | shape=[batch_size, nf, size, size, 3]) 33 | network_outputs = get_network_outputs(img_placeholder, model_type) 34 | return img_placeholder, network_outputs 35 | 36 | 37 | def test_video_model(): 38 | batch_size = 1 39 | img_placeholder, network_outputs = build_graph( 40 | model_type='3dresnet', 41 | batch_size=batch_size, 42 | ) 43 | 44 | saver = tf.train.Saver() 45 | gpu_options = tf.GPUOptions(allow_growth=True) 46 | SESS = tf.Session(config=tf.ConfigProto( 47 | allow_soft_placement=True, 48 | gpu_options=gpu_options, 49 | )) 50 | # Change this to your ckpt path 51 | model_ckpt_path = '/mnt/fs4/chengxuz/brainscore_model_caches/vd_unsup_fx/dyn_clstr/vd_3dresnet/checkpoint-1450000' 52 | # This should be the actual input clips 53 | input_images = np.zeros([batch_size, 16, 112, 112, 3], dtype=np.uint8) 54 | saver.restore(SESS, model_ckpt_path) 55 | outputs_np = SESS.run( 56 | network_outputs, 57 | feed_dict={img_placeholder: input_images}) 58 | pdb.set_trace() 59 | pass 60 | 61 | 62 | if __name__ == '__main__': 63 | test_video_model() 64 | -------------------------------------------------------------------------------- /tf_model/rot_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import torch 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | sys.path.append(os.path.abspath('../')) 8 | from pt_loader import transforms, datasets 9 | import data 10 | FPS_FACTOR = (25 / 16) 11 | 12 | 13 | def get_train_rot_pt_loader(args): 14 | cfg, _ = data.get_cfg_transform(args) 15 | if not args.rot_real_prep: 16 | transform = transforms.video_3DRot_transform() 17 | else: 18 | transform = transforms.video_3DRot_transform_real_resize() 19 | dataset = datasets.RotVideoDataset( 20 | cfg['root'], cfg['train_metafile'], 21 | num_frames=16, frame_interval=1, 22 | transform=transform, 23 | frame_start='RANDOM', 24 | fps_conversion_factor=FPS_FACTOR) 25 | return data.get_train_dataloader(args, dataset) 26 | 27 | 28 | def get_val_rot_pt_loader(args): 29 | cfg, _ = data.get_val_cfg_transform(args) 30 | if not args.rot_real_prep: 31 | transform = transforms.video_3DRot_transform_val() 32 | else: 33 | transform = transforms.video_3DRot_transform_val((136, 136)) 34 | 35 | dataset = datasets.RotVideoDataset( 36 | cfg['root'], cfg['val_metafile'], 37 | num_frames=16, frame_interval=1, 38 | transform=transform, 39 | fps_conversion_factor=FPS_FACTOR) 40 | return data.get_val_dataloader(args, dataset) 41 | 42 | 43 | def get_rot_placeholders( 44 | batch_size, 45 | crop_size=112, num_channels=3, 46 | name_prefix='TRAIN'): 47 | num_frames = 64 48 | image_placeholder = tf.placeholder( 49 | tf.uint8, 50 | (batch_size, num_frames, crop_size, crop_size, num_channels), 51 | name='%s_IMAGE_PLACEHOLDER' % name_prefix) 52 | inputs = {'image': image_placeholder} 53 | return inputs 54 | 55 | 56 | def get_feeddict(image, name_prefix='TRAIN'): 57 | image_placeholder = tf.get_default_graph().get_tensor_by_name( 58 | '%s_IMAGE_PLACEHOLDER:0' % name_prefix) 59 | feed_dict = {image_placeholder: image[0].numpy()} 60 | return feed_dict 61 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_fast.py: -------------------------------------------------------------------------------- 1 | from basics import basic 2 | from vd_single_frame import vd_basic_color, res18_la_one_bigk, \ 3 | load_from_IR_color, res18_la_one_bigk_smN, res18_la_one_bigk_ssmN 4 | 5 | 6 | def fast_bs(args): 7 | args['batch_size'] = 64 8 | args['test_batch_size'] = 32 9 | args['test_no_frames'] = 5 10 | args['kNN_val'] = 10 11 | args['fre_filter'] = 50000 12 | args['fre_cache_filter'] = 5000 13 | args['fre_valid'] = 5000 14 | return args 15 | 16 | 17 | def vd_fast_color_IR(): 18 | args = {} 19 | 20 | args = basic(args) 21 | args = fast_bs(args) 22 | args = vd_basic_color(args) 23 | 24 | args['exp_id'] = 'vd_fast_color_IR_fx' 25 | args['model_type'] = 'fast' 26 | args['task'] = 'IR' 27 | args['train_num_workers'] = 30 28 | return args 29 | 30 | 31 | def fast_a4(args): 32 | args['model_type'] = 'fast_a4' 33 | args['train_num_workers'] = 30 34 | args['val_num_workers'] = 10 35 | return args 36 | 37 | 38 | def vd_fast_a4_color_IR(): 39 | args = {} 40 | 41 | args = basic(args) 42 | args = fast_bs(args) 43 | args = vd_basic_color(args) 44 | args = fast_a4(args) 45 | 46 | args['exp_id'] = 'vd_fast_a4_color_IR' 47 | args['task'] = 'IR' 48 | args['lr_boundaries'] = '325011' 49 | return args 50 | 51 | 52 | def load_from_fast_IR(args): 53 | args['load_exp'] = 'vd_unsup/dyn_clstr/vd_fast_a4_color_IR' 54 | args['load_step'] = 50000 55 | return args 56 | 57 | 58 | def vd_fast_a4_LA(): 59 | args = {} 60 | 61 | args = basic(args) 62 | args = fast_bs(args) 63 | args = vd_basic_color(args) 64 | args = load_from_fast_IR(args) 65 | args = res18_la_one_bigk_smN(args) 66 | args = fast_a4(args) 67 | 68 | args['exp_id'] = 'vd_fast_a4_LA' 69 | return args 70 | 71 | 72 | def vd_fast_a4_LA_ssmN(): 73 | args = {} 74 | 75 | args = basic(args) 76 | args = fast_bs(args) 77 | args = vd_basic_color(args) 78 | args = load_from_fast_IR(args) 79 | args = res18_la_one_bigk_ssmN(args) 80 | args = fast_a4(args) 81 | 82 | args['exp_id'] = 'vd_fast_a4_LA_ssmN' 83 | return args 84 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_slow_fx.py: -------------------------------------------------------------------------------- 1 | from saved_settings.basics import basic_fix 2 | from saved_settings.vd_single_frame_fx import vd_basic, res18_la 3 | 4 | 5 | def slow_bs(args): 6 | args['batch_size'] = 64 7 | args['test_batch_size'] = 32 8 | args['test_no_frames'] = 5 9 | args['kNN_val'] = 10 10 | args['fre_filter'] = 50000 11 | args['fre_cache_filter'] = 5000 12 | args['fre_valid'] = 5000 13 | return args 14 | 15 | 16 | def vd_slow_IR(): 17 | args = {} 18 | 19 | args = basic_fix(args) 20 | args = slow_bs(args) 21 | args = vd_basic(args) 22 | 23 | args['exp_id'] = 'vd_slow_IR' 24 | args['model_type'] = 'slow' 25 | args['task'] = 'IR' 26 | args['lr_boundaries'] = '1280011,1480011' 27 | return args 28 | 29 | 30 | def load_from_slow_IR(args): 31 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slow_IR' 32 | args['load_step'] = 50000 33 | return args 34 | 35 | 36 | def vd_slow(): 37 | args = {} 38 | 39 | args = basic_fix(args) 40 | args = slow_bs(args) 41 | args = vd_basic(args) 42 | args = load_from_slow_IR(args) 43 | args = res18_la(args) 44 | 45 | args['exp_id'] = 'vd_slow' 46 | args['model_type'] = 'slow' 47 | args['lr_boundaries'] = '1170011,1380011' 48 | return args 49 | 50 | 51 | def vd_slow_test_noimg(): 52 | args = {} 53 | 54 | args = basic_fix(args) 55 | args = slow_bs(args) 56 | args = vd_basic(args) 57 | args = res18_la(args) 58 | 59 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slow' 60 | args['exp_id'] = 'vd_slow_test_noimg' 61 | args['model_type'] = 'slow' 62 | args['lr_boundaries'] = '1170011,1380011' 63 | args['plot_val'] = True 64 | args['pure_test'] = True 65 | args['plot_val_no_image'] = True 66 | return args 67 | 68 | 69 | def vd_slow_test_noimg_mre(): 70 | args = {} 71 | 72 | args = basic_fix(args) 73 | args = slow_bs(args) 74 | args = vd_basic(args) 75 | args = res18_la(args) 76 | 77 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slow' 78 | args['exp_id'] = 'vd_slow_test_noimg_mre' 79 | args['model_type'] = 'slow' 80 | args['lr_boundaries'] = '1170011,1380011' 81 | args['plot_val'] = True 82 | args['pure_test'] = True 83 | args['plot_val_no_image'] = True 84 | args['test_no_frames'] = 10 85 | args['val_num_workers'] = 40 86 | return args 87 | -------------------------------------------------------------------------------- /tf_model/opn_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import torch 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | sys.path.append(os.path.abspath('../')) 8 | from pt_loader import transforms, opn_datasets 9 | import data 10 | 11 | 12 | def get_train_opn_pt_loader(args): 13 | cfg, _ = data.get_cfg_transform(args) 14 | transform = transforms.video_OPN_transform_color( 15 | crop_size=args.opn_crop_size) 16 | if args.opn_transform == 'Sep': 17 | transform = transforms.video_OPN_transform_sep_color( 18 | crop_size=args.opn_crop_size) 19 | 20 | if args.opn_flow_folder is None: 21 | dataset = opn_datasets.OPNVideoDataset( 22 | cfg['root'], cfg['train_metafile'], 23 | transform=transform) 24 | else: 25 | dataset = opn_datasets.MotionAwareOPNVideoDataset( 26 | cfg['root'], args.opn_flow_folder, 27 | cfg['train_metafile'], 28 | transform=transform) 29 | return data.get_train_dataloader(args, dataset) 30 | 31 | 32 | def get_val_opn_pt_loader(args): 33 | cfg, _ = data.get_val_cfg_transform(args) 34 | transform = transforms.video_transform_val(crop_size=args.opn_crop_size) 35 | 36 | if args.opn_flow_folder is None: 37 | dataset = opn_datasets.OPNVideoDataset( 38 | cfg['root'], cfg['val_metafile'], 39 | transform=transform) 40 | else: 41 | dataset = opn_datasets.MotionAwareOPNVideoDataset( 42 | cfg['root'], args.opn_flow_folder, 43 | cfg['val_metafile'], 44 | transform=transform) 45 | return data.get_val_dataloader(args, dataset) 46 | 47 | 48 | def get_opn_placeholders( 49 | batch_size, 50 | crop_size=80, num_channels=3, 51 | name_prefix='TRAIN'): 52 | num_frames = 4 53 | image_placeholder = tf.placeholder( 54 | tf.uint8, 55 | (batch_size, num_frames, crop_size, crop_size, num_channels), 56 | name='%s_IMAGE_PLACEHOLDER' % name_prefix) 57 | inputs = {'image': image_placeholder} 58 | return inputs 59 | 60 | 61 | def get_feeddict(image, name_prefix='TRAIN'): 62 | image_placeholder = tf.get_default_graph().get_tensor_by_name( 63 | '%s_IMAGE_PLACEHOLDER:0' % name_prefix) 64 | feed_dict = {image_placeholder: image.numpy()} 65 | return feed_dict 66 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_opn.py: -------------------------------------------------------------------------------- 1 | def basic_opn(args): 2 | args['port'] = 27006 3 | args['db_name'] = 'vd_unsup_fx' 4 | args['col_name'] = 'opn' 5 | return args 6 | 7 | 8 | def bs128_opn(args): 9 | args['batch_size'] = 128 10 | args['test_batch_size'] = 64 11 | args['fre_filter'] = 50000 12 | args['fre_cache_filter'] = 5000 13 | args['fre_valid'] = 5000 14 | return args 15 | 16 | 17 | def vd_basic_opn(args): 18 | args['image_dir'] = '/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted' 19 | args['val_image_dir'] = args['image_dir'] 20 | return args 21 | 22 | 23 | def opn_random(): 24 | args = {} 25 | 26 | args = basic_opn(args) 27 | args = bs128_opn(args) 28 | args = vd_basic_opn(args) 29 | 30 | args['exp_id'] = 'opn_random' 31 | args['train_num_workers'] = 30 32 | args['lr_boundaries'] = '145011,215011' 33 | return args 34 | 35 | 36 | def opn_random_224(): 37 | args = {} 38 | 39 | args = basic_opn(args) 40 | args = bs128_opn(args) 41 | args = vd_basic_opn(args) 42 | 43 | args['opn_crop_size'] = 224 44 | args['exp_id'] = 'opn_random_224' 45 | args['train_num_workers'] = 30 46 | return args 47 | 48 | 49 | def opn_random_224_sep(): 50 | args = {} 51 | 52 | args = basic_opn(args) 53 | args = bs128_opn(args) 54 | args = vd_basic_opn(args) 55 | 56 | args['opn_crop_size'] = 224 57 | args['exp_id'] = 'opn_random_224_sep' 58 | args['train_num_workers'] = 30 59 | args['opn_transform'] = 'Sep' 60 | return args 61 | 62 | 63 | def opn_random_sep_flow(): 64 | args = {} 65 | 66 | args = basic_opn(args) 67 | args = bs128_opn(args) 68 | args = vd_basic_opn(args) 69 | 70 | args['exp_id'] = 'opn_random_sep_flow' 71 | args['train_num_workers'] = 30 72 | args['opn_transform'] = 'Sep' 73 | args['opn_flow_folder'] = '/data5/chengxuz/Dataset/kinetics/kinetics_flow25' 74 | args['lr_boundaries'] = '224998,269998' 75 | return args 76 | 77 | 78 | def opn_random_224_sep_flow(): 79 | args = {} 80 | 81 | args = basic_opn(args) 82 | args = bs128_opn(args) 83 | args = vd_basic_opn(args) 84 | 85 | args['opn_crop_size'] = 224 86 | args['exp_id'] = 'opn_random_224_sep_flow' 87 | args['train_num_workers'] = 30 88 | args['opn_transform'] = 'Sep' 89 | args['opn_flow_folder'] = '/data5/chengxuz/Dataset/kinetics/kinetics_flow25' 90 | args['lr_boundaries'] = '279998,344998' 91 | return args 92 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_finetune_HMDB.py: -------------------------------------------------------------------------------- 1 | def finetune_HMDB_bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['test_no_frames'] = 5 5 | args['fre_filter'] = 10000 6 | args['fre_cache_filter'] = 5000 7 | args['fre_valid'] = 1000 8 | return args 9 | 10 | 11 | def finetune_basics(args): 12 | # args['port'] = 27006 13 | args['port'] = 27007 14 | args['finetune_conv'] = True 15 | args['cache_dir'] = "/mnt/fs4/shetw/tfutils_cache" 16 | return args 17 | 18 | 19 | def HMDB_basics(args): 20 | args['image_dir'] = '/data5/shetw/HMDB51/extracted_frames' 21 | args['val_image_dir'] = args['image_dir'] 22 | args['metafile_root'] = '/data5/shetw/HMDB51/metafiles' 23 | args['dataset'] = 'HMDB51' 24 | args['train_len'] = 3570 25 | args['val_len'] = 1530 26 | args['train_prep'] = 'ColorJitter' 27 | args['num_classes'] = 51 28 | args['HMDB_sample'] = True 29 | return args 30 | 31 | 32 | def vd_ctl_pool1(): 33 | args = {} 34 | 35 | args = finetune_basics(args) 36 | args = HMDB_basics(args) 37 | args = finetune_HMDB_bs128(args) 38 | 39 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 40 | args['load_port'] = 27006 41 | args["save_exp"] = "vd_finetune/HMDB/vd_ctl_pool1" 42 | args['final_pooling'] = 1 43 | args["train_num_workers"] = 10 44 | args["lr_boundaries"] = '1750000,1850000' 45 | return args 46 | 47 | 48 | def vd_tsrn_pool1(): 49 | args = {} 50 | 51 | args = finetune_basics(args) 52 | args = HMDB_basics(args) 53 | args = finetune_HMDB_bs128(args) 54 | 55 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_tsrn_f4_pret" 56 | args['load_port'] = 27006 57 | args["save_exp"] = "vd_finetune/HMDB/vd_tsrn_pool1" 58 | args['model_type'] = 'tsrn' 59 | args['trn_num_frames'] = 4 60 | args['get_all_layers'] = '9-time-avg' 61 | args["train_num_workers"] = 30 62 | args['final_pooling'] = 1 63 | 64 | args['lr_boundaries'] = '1755000,1855000' 65 | return args 66 | 67 | 68 | def vd_slow_pool1(): 69 | args = {} 70 | 71 | args = finetune_basics(args) 72 | args = HMDB_basics(args) 73 | args = finetune_HMDB_bs128(args) 74 | 75 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slow" 76 | args['load_port'] = 27006 77 | args["save_exp"] = "vd_finetune/HMDB/vd_slow_pool1" 78 | args['model_type'] = 'slow' 79 | args["train_num_workers"] = 40 80 | args['final_pooling'] = 1 81 | args["lr_boundaries"] = '1460000,1475000' 82 | return args -------------------------------------------------------------------------------- /tf_model/model/cluster_km.py: -------------------------------------------------------------------------------- 1 | import time 2 | import faiss 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | DEFAULT_SEED = 1234 7 | 8 | 9 | def run_kmeans(x, nmb_clusters, verbose=False, seed=DEFAULT_SEED): 10 | """Runs kmeans on 1 GPU. 11 | Args: 12 | x: data 13 | nmb_clusters (int): number of clusters 14 | Returns: 15 | list: ids of data in each cluster 16 | """ 17 | n_data, d = x.shape 18 | 19 | # faiss implementation of k-means 20 | clus = faiss.Clustering(d, nmb_clusters) 21 | clus.niter = 20 22 | clus.max_points_per_centroid = 10000000 23 | clus.seed = seed 24 | res = faiss.StandardGpuResources() 25 | flat_config = faiss.GpuIndexFlatConfig() 26 | flat_config.useFloat16 = False 27 | flat_config.device = 0 28 | index = faiss.GpuIndexFlatL2(res, d, flat_config) 29 | 30 | # perform the training 31 | clus.train(x, index) 32 | _, I = index.search(x, 1) 33 | losses = faiss.vector_to_array(clus.obj) 34 | if verbose: 35 | print('k-means loss evolution: {0}'.format(losses)) 36 | 37 | return [int(n[0]) for n in I], losses[-1] 38 | 39 | 40 | class Kmeans: 41 | def __init__(self, k, memory_bank, cluster_labels): 42 | self.k = k 43 | self.memory_bank = memory_bank 44 | self.cluster_labels = cluster_labels 45 | 46 | self.new_cluster_feed = tf.placeholder( 47 | tf.int64, shape=self.cluster_labels.get_shape().as_list()) 48 | self.update_clusters_op = tf.assign( 49 | self.cluster_labels, self.new_cluster_feed) 50 | 51 | def recompute_clusters(self, sess, verbose=True): 52 | """Performs k-means clustering. 53 | Args: 54 | x_data (np.array N * dim): data to cluster 55 | """ 56 | end = time.time() 57 | 58 | data = sess.run(self.memory_bank.as_tensor()) 59 | 60 | all_lables = [] 61 | for k_idx, each_k in enumerate(self.k): 62 | # cluster the data 63 | I, _ = run_kmeans(data, each_k, 64 | verbose, seed = k_idx + DEFAULT_SEED) 65 | new_clust_labels = np.asarray(I) 66 | all_lables.append(new_clust_labels) 67 | new_clust_labels = np.stack(all_lables, axis=0) 68 | 69 | if verbose: 70 | print('k-means time: {0:.0f} s'.format(time.time() - end)) 71 | return new_clust_labels 72 | 73 | def apply_clusters(self, sess, new_clust_labels): 74 | sess.run(self.update_clusters_op, feed_dict={ 75 | self.new_cluster_feed: new_clust_labels 76 | }) 77 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_slowfast_fx.py: -------------------------------------------------------------------------------- 1 | from basics import basic_fix 2 | from vd_single_frame_fx import vd_basic, res18_la 3 | from vd_slow_fx import slow_bs 4 | 5 | 6 | def slowfast_a4_basic(args): 7 | args['model_type'] = 'slowfast_a4' 8 | args['train_num_workers'] = 30 9 | args['val_num_workers'] = 10 10 | return args 11 | 12 | 13 | def vd_slowfast_a4_IR(): 14 | args = {} 15 | 16 | args = basic_fix(args) 17 | args = slow_bs(args) 18 | args = vd_basic(args) 19 | args = slowfast_a4_basic(args) 20 | 21 | args['exp_id'] = 'vd_slowfast_a4_IR' 22 | args['task'] = 'IR' 23 | return args 24 | 25 | 26 | def load_from_slowfast_a4_IR(args): 27 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slowfast_a4_IR' 28 | args['load_step'] = 50000 29 | return args 30 | 31 | 32 | def vd_slowfast_a4(): 33 | args = {} 34 | 35 | args = basic_fix(args) 36 | args = slow_bs(args) 37 | args = vd_basic(args) 38 | args = load_from_slowfast_a4_IR(args) 39 | args = res18_la(args) 40 | args = slowfast_a4_basic(args) 41 | 42 | args['exp_id'] = 'vd_slowfast_a4' 43 | args['lr_boundaries'] = '1120011,1294998' 44 | return args 45 | 46 | 47 | def vd_slowfast_a4_test(): 48 | args = {} 49 | 50 | args = basic_fix(args) 51 | args = slow_bs(args) 52 | args = vd_basic(args) 53 | args = res18_la(args) 54 | args = slowfast_a4_basic(args) 55 | 56 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slowfast_a4' 57 | args['exp_id'] = 'vd_slowfast_a4_test' 58 | args['lr_boundaries'] = '1120011,1294998' 59 | args['plot_val'] = True 60 | args['pure_test'] = True 61 | return args 62 | 63 | 64 | def vd_slowfast_a4_test_noimg(): 65 | args = {} 66 | 67 | args = basic_fix(args) 68 | args = slow_bs(args) 69 | args = vd_basic(args) 70 | args = res18_la(args) 71 | args = slowfast_a4_basic(args) 72 | 73 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slowfast_a4' 74 | args['exp_id'] = 'vd_slowfast_a4_test_noimg' 75 | args['lr_boundaries'] = '1120011,1294998' 76 | args['plot_val'] = True 77 | args['pure_test'] = True 78 | args['plot_val_no_image'] = True 79 | return args 80 | 81 | 82 | def vd_slowfast_a4_test_noimg_mre(): 83 | args = {} 84 | 85 | args = basic_fix(args) 86 | args = slow_bs(args) 87 | args = vd_basic(args) 88 | args = res18_la(args) 89 | args = slowfast_a4_basic(args) 90 | 91 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_slowfast_a4' 92 | args['exp_id'] = 'vd_slowfast_a4_test_noimg_mre' 93 | args['lr_boundaries'] = '1120011,1294998' 94 | args['plot_val'] = True 95 | args['pure_test'] = True 96 | args['plot_val_no_image'] = True 97 | args['test_no_frames'] = 10 98 | args['val_num_workers'] = 40 99 | return args 100 | -------------------------------------------------------------------------------- /tf_model/model/dataset_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import functools 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | 8 | def image_dir_to_tfrecords_dataset(image_dir, is_train): 9 | pattern = 'train-*' if is_train else 'validation-*' 10 | pattern = os.path.join(image_dir, pattern) 11 | datasource = tf.gfile.Glob(pattern) 12 | datasource.sort() 13 | tfr_list = np.asarray(datasource) 14 | dataset = tf.data.Dataset.list_files(tfr_list) 15 | 16 | if is_train: 17 | dataset = dataset.apply( 18 | tf.contrib.data.shuffle_and_repeat(len(tfr_list)) 19 | ) 20 | else: 21 | dataset = dataset.repeat() 22 | 23 | def fetch(filename): 24 | buffer_size = 32 * 1024 * 1024 # 32 MiB per file 25 | return tf.data.TFRecordDataset(filename, buffer_size=buffer_size) 26 | 27 | dataset = dataset.apply( 28 | tf.contrib.data.parallel_interleave( 29 | fetch, cycle_length=8, sloppy=True)) 30 | return dataset 31 | 32 | 33 | def data_parser(record_str_tensor, process_img_func, 34 | is_train=True, with_indx=True, num_tile=None): 35 | ''' 36 | Takes a TFRecord string and outputs a dictionary ready to use 37 | as input to the model. 38 | ''' 39 | 40 | # Parse the TFRecord 41 | keys_to_features = { 42 | 'images': tf.FixedLenFeature((), tf.string, ''), 43 | 'labels': tf.FixedLenFeature([], tf.int64, -1)} 44 | if with_indx: 45 | keys_to_features['index'] = tf.FixedLenFeature([], tf.int64, -1) 46 | parsed = tf.parse_single_example(record_str_tensor, keys_to_features) 47 | image_string = parsed['images'] 48 | image_label = parsed['labels'] 49 | image_index = parsed.get('index', None) 50 | 51 | # Process the image 52 | image = process_img_func(image_string) 53 | if num_tile is not None: 54 | curr_shape = image.get_shape().as_list() 55 | image = tf.expand_dims(image, axis=0) 56 | image = tf.tile(image, [num_tile] + [1] * len(curr_shape)) 57 | ret_dict = {'image': image, 'label': image_label} 58 | if with_indx: 59 | ret_dict['index'] = image_index 60 | return ret_dict 61 | 62 | 63 | def dataset_func( 64 | image_dir, process_img_func, is_train, batch_size, q_cap, 65 | num_tile=None): 66 | dataset = image_dir_to_tfrecords_dataset(image_dir, is_train=is_train) 67 | if is_train: 68 | dataset = dataset.shuffle(buffer_size=q_cap) 69 | dataset = dataset.prefetch(batch_size * 4) 70 | dataset = dataset.map(functools.partial( 71 | data_parser, process_img_func=process_img_func, 72 | is_train=is_train, with_indx=is_train, 73 | num_tile=num_tile, 74 | ), num_parallel_calls=64) 75 | dataset = dataset.apply( 76 | tf.contrib.data.batch_and_drop_remainder(batch_size)) 77 | dataset = dataset.prefetch(4) 78 | next_element = dataset.make_one_shot_iterator().get_next() 79 | return next_element 80 | -------------------------------------------------------------------------------- /misc/combine_ckpts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import tensorflow as tf 4 | import pdb 5 | 6 | sys.path.append(os.path.abspath('../')) 7 | from tf_model.model import resnet_model_slowfast as sf_model 8 | 9 | 10 | SINGLE_MODEL_PATH = '/data/chengxuz/.tfutils/localhost:27006/vd_unsup_fx/dyn_clstr/vd_ctl/checkpoint-1375000' 11 | SLOW_MODEL_PATH = '/data/chengxuz/.tfutils/localhost:27006/vd_unsup_fx/dyn_clstr/vd_slow/checkpoint-1440000' 12 | SLOWFAST_MODEL_PATH = '/data/chengxuz/.tfutils/localhost:27006/vd_unsup_fx/dyn_clstr/vd_slowfast_a4/checkpoint-1350000' 13 | OUTPUT_DIR = '/mnt/fs3/chengxuz/vd_relat/slow_single_model' 14 | 15 | 16 | def main(): 17 | os.system('mkdir -p %s' % OUTPUT_DIR) 18 | 19 | slow_single_model = sf_model.SlowSingleModel(resnet_size=18) 20 | input_image = tf.zeros([1, 4, 224, 224, 3], dtype=tf.float32) 21 | _, _ = slow_single_model(input_image, False, get_all_layers=True) 22 | 23 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 24 | 25 | single_vars = tf.get_collection( 26 | tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_model/') 27 | single_saver = tf.train.Saver(single_vars) 28 | 29 | slow_vars = tf.get_collection( 30 | tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_model_slow/') 31 | var_dict = {} 32 | for each_var in slow_vars: 33 | new_name = each_var.op.name.replace( 34 | 'resnet_model_slow/', 'resnet_model/') 35 | var_dict[new_name] = each_var 36 | slow_saver = tf.train.Saver(var_dict) 37 | 38 | final_saver = tf.train.Saver() 39 | 40 | #reader = tf.train.NewCheckpointReader(SLOW_MODEL_PATH) 41 | #var_shapes = reader.get_variable_to_shape_map() 42 | #print('Saved vars and shapes:\n' + str(var_shapes)) 43 | 44 | with tf.Session() as sess: 45 | single_saver.restore(sess, SINGLE_MODEL_PATH) 46 | slow_saver.restore(sess, SLOW_MODEL_PATH) 47 | assert len(sess.run(tf.report_uninitialized_variables())) == 0 48 | final_saver.save(sess, os.path.join(OUTPUT_DIR, 'model')) 49 | 50 | 51 | OUTPUT_SF_DIR = '/mnt/fs3/chengxuz/vd_relat/slowfast_single_model' 52 | 53 | 54 | def main_sf(): 55 | os.system('mkdir -p %s' % OUTPUT_SF_DIR) 56 | 57 | slowfast_single_model = sf_model.SlowFastSingleModel(resnet_size=18) 58 | input_image = tf.zeros([1, 16, 224, 224, 3], dtype=tf.float32) 59 | _, _ = slowfast_single_model(input_image, False, get_all_layers=True) 60 | 61 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 62 | 63 | single_vars = tf.get_collection( 64 | tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_model/') 65 | single_saver = tf.train.Saver(single_vars) 66 | 67 | slowfast_vars = tf.get_collection( 68 | tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_model_') 69 | slowfast_saver = tf.train.Saver(slowfast_vars) 70 | 71 | final_saver = tf.train.Saver() 72 | 73 | with tf.Session() as sess: 74 | single_saver.restore(sess, SINGLE_MODEL_PATH) 75 | slowfast_saver.restore(sess, SLOWFAST_MODEL_PATH) 76 | assert len(sess.run(tf.report_uninitialized_variables())) == 0 77 | final_saver.save(sess, os.path.join(OUTPUT_SF_DIR, 'model')) 78 | 79 | 80 | if __name__ == "__main__": 81 | #main() 82 | main_sf() 83 | -------------------------------------------------------------------------------- /notebook/tfutils_reader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import pymongo as pm 4 | import gridfs 5 | from tensorflow.core.protobuf import saver_pb2 6 | import tarfile 7 | import cPickle 8 | 9 | import numpy as np 10 | from scipy import misc 11 | import os 12 | import time 13 | import sklearn.linear_model 14 | import math 15 | 16 | import tensorflow as tf 17 | from tfutils.db_interface import verify_pb2_v2_files 18 | 19 | 20 | def _print_checkpt_vars(path): 21 | # For debugging 22 | from tensorflow.python.tools.inspect_checkpoint import ( 23 | print_tensors_in_checkpoint_file 24 | ) 25 | print_tensors_in_checkpoint_file(path, 26 | all_tensor_names=True, 27 | all_tensors=False, 28 | tensor_name='') 29 | 30 | 31 | class TfutilsReader(object): 32 | def __init__(self, dbname, colname, exp_id, 33 | port, cache_dir): 34 | self.exp_id = exp_id 35 | self.conn = conn = pm.MongoClient(port=port) 36 | 37 | self.coll = conn[dbname][colname + '.files'] 38 | self.collfs = gridfs.GridFS(conn[dbname], colname) 39 | self.fs_bucket = gridfs.GridFSBucket(conn[dbname], colname) 40 | 41 | self.load_files_dir = os.path.join(cache_dir, dbname, colname, exp_id) 42 | 43 | def query(self, query_dict, restrict_fields=None, **kwargs): 44 | # commonly used kwargs: sort, projection 45 | query_dict = query_dict.copy() 46 | query_dict['exp_id'] = self.exp_id 47 | if restrict_fields is None: 48 | return self.coll.find(query_dict, **kwargs) 49 | return self.coll.find(query_dict, restrict_fields, **kwargs) 50 | 51 | def load_gridfs_file(self, rec): 52 | ''' 53 | Converts a GridFS file to an ordinary file and returns the 54 | path where the GridFS contents were copied. 55 | ''' 56 | assert 'saved_filters' in rec 57 | 58 | if not os.path.exists(self.load_files_dir): 59 | os.makedirs(self.load_files_dir) 60 | fname = os.path.basename(rec['filename']) 61 | path = os.path.join(self.load_files_dir, fname) 62 | 63 | if rec['_saver_write_version'] == saver_pb2.SaverDef.V2: 64 | extracted_path = os.path.splitext(path)[0] 65 | if os.path.exists(extracted_path + '.index'): 66 | print('Using already present file at extraction path %s.' 67 | % extracted_path) 68 | return extracted_path 69 | elif os.path.exists(path): 70 | print('Using already present file at extraction path %s.' % path) 71 | return path 72 | 73 | fs_file = open(path, 'wrb+') 74 | self.fs_bucket.download_to_stream(rec['_id'], fs_file) 75 | fs_file.close() 76 | 77 | if rec['_saver_write_version'] == saver_pb2.SaverDef.V2: 78 | assert fname.endswith('.tar') 79 | tar = tarfile.open(path) 80 | tar.extractall(path=self.load_files_dir) 81 | tar.close() 82 | path = os.path.splitext(path)[0] 83 | verify_pb2_v2_files(path, rec) 84 | return path 85 | -------------------------------------------------------------------------------- /tf_model/README.md: -------------------------------------------------------------------------------- 1 | # Instructions for training 2 | 3 | We first show how to train a VIE-3DResNet as an example, including pretraining on Kinetics, transfer learning on ImageNet and Kinetics, and fine-tuning on UCF101 and HMDB51. 4 | We then show how other models can be trained with minor modifications to the commands of training VIE-3DResNet. 5 | 6 | Due to legacy reasons, we only support tensorflow < 2.0. We have tested our codes with Python 3.7. 7 | You also need to install [faiss-gpu](https://github.com/facebookresearch/faiss)==1.6.1 and pytorch. 8 | 9 | ## VIE-3DResNet 10 | You can either start the training directly only using one gpu or do the training with the help of tfutils, which provides support for multi-gpu training. 11 | 12 | ### Training directly 13 | 14 | Make required changes to the following script to set the parameters and then run it to start the training including first starting an VIE-3DResNet-IR training as pretraining for the VIE-3DResNet and then starting the main VIE-3DResNet training: 15 | ``` 16 | sh run_training.sh 17 | ``` 18 | During training, k-nearest-neighbor validations will be performed on the validation videos of Kinetics. This validation is done by getting 5 clips from one validation video, finding 10 nearest neighbor for each clip separately in the memory bank of the training videos, combining the lables of these 10 neighbors in a weighted manner (see Instance Discrimination paper for details) to get class probabilities for one clip, and finally averaging the class prababilities across all 5 clips chosen. 19 | 20 | ### Transfer learning to Kinetics and ImageNet; fine-tuning to UCF101 and HMDB51 21 | For transfer learning to ImageNet, you need to first build ImageNet tfrecords following instructions in [LocalAggregation](https://github.com/neuroailab/LocalAggregation.git) repo. Then, run the following script: 22 | ``` 23 | sh run_transfer_IN.sh 24 | ``` 25 | 26 | For transfer learning to Kinetics: 27 | ``` 28 | sh run_transfer_KN.sh 29 | ``` 30 | 31 | The reported transfer learning performance has three keys: `top1_5`, `top1_7`, and `top1_9`. They are named as ResNet-18 has 9 "layers": the first convolution-pooling layer, and the remaining eight residual blocks. So `top1_5` means reading out from 5th layer (CONV3 in the paper), `top1_7` means reading out from 7th layer (CONV4 in the paper), and `top1_9` means reading out from the final layer (CONV5 in the paper). 32 | 33 | For finetuning experiments, we provide configs for reproducing both Table 2 and Table 3 results in the scripts, by default, the script will reproduce the Table 2 result. See the following scripts for how to generate the Table 3 result. 34 | For finetuning to UCF101: 35 | ``` 36 | sh run_finetune_UCF.sh 37 | ``` 38 | 39 | For finetuning to HMDB51: 40 | ``` 41 | sh run_finetune_HMDB.sh 42 | ``` 43 | 44 | ### Training using tfutils 45 | Install `tfutils` as following. 46 | ``` 47 | git clone https://github.com/neuroailab/tfutils.git 48 | cd tfutils 49 | python setup.py install --user 50 | ``` 51 | 52 | For all the previuos training examples, just add ` --tfutils --port mongodb_port_number` after the command to start the training using tfutils, such as `sh run_training.sh --tfutils --port mongodb_port_number`. 53 | However, you need to start a mongodb for tfutils to work. 54 | 55 | 56 | ## Other models 57 | COMING SOON 58 | -------------------------------------------------------------------------------- /tf_model/model/rot_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import json 4 | import numpy as np 5 | import tensorflow as tf 6 | import copy 7 | import pdb 8 | from collections import OrderedDict 9 | 10 | from .instance_model import color_normalize 11 | from .resnet3D_model import get_block_sizes, Model, DEFAULT_DTYPE 12 | 13 | 14 | class ROTModel(Model): 15 | def __init__(self, resnet_size, data_format=None, 16 | dtype=DEFAULT_DTYPE): 17 | """ 18 | Args: 19 | resnet_size: The number of convolutional layers needed in the model. 20 | data_format: Either 'channels_first' or 'channels_last', specifying which 21 | data format to use when setting up the model. 22 | num_classes: The number of output classes needed from the model. This 23 | enables users to extend the same model to their own datasets. 24 | resnet_version: Integer representing which version of the ResNet network 25 | to use. See README for details. Valid values: [1, 2] 26 | dtype: The TensorFlow dtype to use for calculations. 27 | """ 28 | 29 | # For bigger models, we want to use "bottleneck" layers 30 | if resnet_size < 50: 31 | bottleneck = False 32 | final_size = 512 33 | else: 34 | bottleneck = True 35 | final_size = 2048 36 | 37 | super(ROTModel, self).__init__( 38 | resnet_size=resnet_size, 39 | bottleneck=bottleneck, 40 | num_classes=None, 41 | num_filters=64, 42 | kernel_size=7, 43 | conv_stride=2, 44 | time_kernel_size=7, 45 | first_pool_size=3, 46 | first_pool_stride=2, 47 | block_sizes=get_block_sizes(resnet_size), 48 | block_strides=[1, 2, 2, 2], 49 | final_size=final_size, 50 | data_format=data_format) 51 | 52 | def _preprocess_data(self, inputs): 53 | org_shape = inputs.get_shape().as_list() 54 | inputs = tf.reshape( 55 | inputs, 56 | [org_shape[0], 4, org_shape[1] // 4] + org_shape[2:]) 57 | inputs = tf.reshape( 58 | inputs, 59 | [org_shape[0] * 4, org_shape[1] // 4] + org_shape[2:]) 60 | if self.data_format == 'channels_first': 61 | # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). 62 | # This provides a large performance boost on GPU. See 63 | # https://www.tensorflow.org/performance/performance_guide#data_formats 64 | inputs = tf.transpose(inputs, [0, 4, 1, 2, 3]) 65 | return inputs 66 | 67 | def _get_final_dense(self, inputs): 68 | inputs = tf.reshape(inputs, [-1, self.final_size]) 69 | inputs = tf.layers.dense(inputs=inputs, units=64) 70 | inputs = tf.identity(inputs, 'final_dense_1') 71 | inputs = tf.layers.dense(inputs=inputs, units=4) 72 | inputs = tf.identity(inputs, 'final_dense_2') 73 | 74 | all_logits = inputs 75 | all_labels = tf.tile( 76 | tf.expand_dims(tf.range(4, dtype=tf.int64), axis=0), 77 | [inputs.get_shape().as_list()[0] // 4, 1]) 78 | all_labels = tf.reshape(all_labels, [-1]) 79 | _, pred = tf.nn.top_k(all_logits, k=1) 80 | pred = tf.cast(tf.squeeze(pred), tf.int64) 81 | accuracy = tf.reduce_mean( 82 | tf.cast(tf.equal(pred, all_labels), tf.float32)) 83 | 84 | one_hot_labels = tf.one_hot(all_labels, 4) 85 | loss = tf.losses.softmax_cross_entropy(one_hot_labels, all_logits) 86 | return loss, accuracy 87 | 88 | 89 | def build_loss_accuracy( 90 | inputs, train, 91 | resnet_size=18, 92 | *args, **kwargs): 93 | image = color_normalize(inputs['image']) 94 | model = ROTModel( 95 | resnet_size=resnet_size) 96 | loss, accuracy = model(image, train, skip_final_dense=False) 97 | return {'loss': loss, 'accuracy': accuracy}, {} 98 | -------------------------------------------------------------------------------- /tf_model/model/preprocessing.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import os, sys 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from .resnet_th_preprocessing import ( 7 | preprocessing_inst, RandomSizedCrop_from_jpeg, 8 | ApplyGray, ColorJitter, alexnet_crop_from_jpg 9 | ) 10 | 11 | # This file contains various preprocessing ops for images (typically 12 | # used for data augmentation). 13 | 14 | def resnet_train(img_str): 15 | return preprocessing_inst(img_str, 224, 224, is_train=True) 16 | 17 | 18 | def resnet_train_112(img_str): 19 | return preprocessing_inst(img_str, 112, 112, is_train=True) 20 | 21 | 22 | def resnet_validate(img_str): 23 | return preprocessing_inst(img_str, 224, 224, is_train=False) 24 | 25 | 26 | def resnet_validate_112(img_str): 27 | return preprocessing_inst( 28 | img_str, 112, 112, 29 | is_train=False, val_short_side=128) 30 | 31 | 32 | def resnet_crop_only(img_str): 33 | return RandomSizedCrop_from_jpeg( 34 | img_str, out_height=224, out_width=224, size_minval=0.2) 35 | 36 | 37 | def resnet_crop_flip(img_str): 38 | img = RandomSizedCrop_from_jpeg( 39 | img_str, out_height=224, out_width=224, size_minval=0.2) 40 | img = tf.image.random_flip_left_right(img) 41 | return img 42 | 43 | 44 | def alexnet_crop_flip(img_str): 45 | img = alexnet_crop_from_jpg(img_str) 46 | img = tf.image.random_flip_left_right(img) 47 | return img 48 | 49 | 50 | def resnet_noflip(img_str): 51 | img = resnet_crop_only(img_str) 52 | img = ApplyGray(img, 0.2) 53 | return ColorJitter(img) 54 | 55 | 56 | def resnet_nocrop(img_str): 57 | img = resnet_validate(img_str) 58 | img = ApplyGray(img, 0.2) 59 | img = ColorJitter(img) 60 | return tf.image.random_flip_left_right(img) 61 | 62 | 63 | def resnet_bigcrop(img_str): 64 | return preprocessing_inst(img_str, 224, 224, is_train=True, 65 | size_minval=0.6) 66 | 67 | 68 | def resnet_4way_rot(img_str): 69 | img = resnet_train(img_str) 70 | angle_choice = tf.random_uniform([], maxval=4, dtype=tf.int32) 71 | angle = tf.cast(angle_choice, tf.float32) * (np.pi/2) 72 | img = tf.contrib.image.rotate(img, angle) 73 | return img 74 | 75 | 76 | def resnet_rot(img_str): 77 | img = resnet_train(img_str) 78 | angle_choice = tf.random_uniform([], maxval=1, dtype=tf.float32) 79 | angle = angle_choice * (2*np.pi) 80 | img = tf.contrib.image.rotate(img, angle) 81 | return img 82 | 83 | 84 | def _get_resize_scale(height, width, smallest_side): 85 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 86 | 87 | height = tf.to_float(height) 88 | width = tf.to_float(width) 89 | smallest_side = tf.to_float(smallest_side) 90 | 91 | scale = tf.cond( 92 | tf.greater(height, width), 93 | lambda: smallest_side / width, 94 | lambda: smallest_side / height) 95 | return scale 96 | 97 | 98 | def center_crop(img_str, out_height, out_width): 99 | shape = tf.image.extract_jpeg_shape(image_string) 100 | # the scaling factor needed to make the smaller side 256 101 | scale = _get_resize_scale(shape[0], shape[1], 256) 102 | cp_height = tf.cast(out_height / scale, tf.int32) 103 | cp_width = tf.cast(out_width / scale, tf.int32) 104 | cp_begin_x = tf.cast((shape[0] - cp_height) / 2, tf.int32) 105 | cp_begin_y = tf.cast((shape[1] - cp_width) / 2, tf.int32) 106 | bbox = tf.stack([cp_begin_x, cp_begin_y, 107 | cp_height, cp_width]) 108 | crop_image = tf.image.decode_and_crop_jpeg( 109 | image_string, bbox, channels=3) 110 | image = image_resize(crop_image, out_height, out_width) 111 | 112 | image.set_shape([out_height, out_width, 3]) 113 | return image 114 | 115 | 116 | def rgb_to_gray(flt_image): 117 | flt_image = tf.cast(flt_image, tf.float32) 118 | gry_image = flt_image[:,:,0] * 0.299 \ 119 | + flt_image[:,:,1] * 0.587 \ 120 | + flt_image[:,:,2] * 0.114 121 | gry_image = tf.expand_dims(gry_image, axis=2) 122 | gry_image = tf.cast(gry_image + EPS, tf.uint8) 123 | gry_image = tf.cast(gry_image, tf.float32) 124 | return gry_image 125 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_trn.py: -------------------------------------------------------------------------------- 1 | from basics import basic_fix 2 | from vd_single_frame_fx import vd_basic, res18_la 3 | from vd_slow_fx import slow_bs 4 | 5 | 6 | def vd_trn_IR(): 7 | args = {} 8 | 9 | args = basic_fix(args) 10 | args = slow_bs(args) 11 | args = vd_basic(args) 12 | 13 | args['exp_id'] = 'vd_trn_IR' 14 | args['model_type'] = 'trn' 15 | args['task'] = 'IR' 16 | return args 17 | 18 | 19 | def vd_trn_f4_IR(): 20 | args = {} 21 | 22 | args = basic_fix(args) 23 | args = slow_bs(args) 24 | args = vd_basic(args) 25 | 26 | args['exp_id'] = 'vd_trn_f4_IR' 27 | args['model_type'] = 'trn' 28 | args['task'] = 'IR' 29 | args['trn_num_frames'] = 4 30 | return args 31 | 32 | 33 | def load_from_LA_final(args): 34 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl' 35 | return args 36 | 37 | 38 | def vd_trn_f4_pret(): 39 | args = {} 40 | 41 | args = basic_fix(args) 42 | args = slow_bs(args) 43 | args = vd_basic(args) 44 | args = load_from_LA_final(args) 45 | args = res18_la(args) 46 | 47 | args['exp_id'] = 'vd_trn_f4_pret' 48 | args['model_type'] = 'trn' 49 | args['trn_num_frames'] = 4 50 | args['lr_boundaries'] = '1460011,1570011' 51 | return args 52 | 53 | 54 | def vd_trn_pret(): 55 | args = {} 56 | 57 | args = basic_fix(args) 58 | args = slow_bs(args) 59 | args = vd_basic(args) 60 | args = load_from_LA_final(args) 61 | args = res18_la(args) 62 | 63 | args['exp_id'] = 'vd_trn_pret' 64 | args['model_type'] = 'trn' 65 | args['lr_boundaries'] = '1460011,1570011' 66 | return args 67 | 68 | 69 | def vd_tsrn_IR(): 70 | args = {} 71 | 72 | args = basic_fix(args) 73 | args = slow_bs(args) 74 | args = vd_basic(args) 75 | 76 | args['exp_id'] = 'vd_tsrn_IR' 77 | args['model_type'] = 'tsrn' 78 | args['task'] = 'IR' 79 | return args 80 | 81 | 82 | def vd_tsrn_f4_IR(): 83 | args = {} 84 | 85 | args = basic_fix(args) 86 | args = slow_bs(args) 87 | args = vd_basic(args) 88 | 89 | args['exp_id'] = 'vd_tsrn_f4_IR' 90 | args['model_type'] = 'tsrn' 91 | args['task'] = 'IR' 92 | args['trn_num_frames'] = 4 93 | return args 94 | 95 | 96 | def load_from_tsrn_f4_IR(args): 97 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_tsrn_f4_IR' 98 | args['load_step'] = 50000 99 | return args 100 | 101 | 102 | def vd_tsrn_f4(): 103 | args = {} 104 | 105 | args = basic_fix(args) 106 | args = slow_bs(args) 107 | args = vd_basic(args) 108 | args = load_from_tsrn_f4_IR(args) 109 | args = res18_la(args) 110 | 111 | args['exp_id'] = 'vd_tsrn_f4' 112 | args['model_type'] = 'tsrn' 113 | args['trn_num_frames'] = 4 114 | return args 115 | 116 | 117 | def vd_tsrn_f4_pret(): 118 | args = {} 119 | 120 | args = basic_fix(args) 121 | args = slow_bs(args) 122 | args = vd_basic(args) 123 | args = load_from_LA_final(args) 124 | args = res18_la(args) 125 | 126 | args['exp_id'] = 'vd_tsrn_f4_pret' 127 | args['model_type'] = 'tsrn' 128 | args['trn_num_frames'] = 4 129 | args['lr_boundaries'] = '1455011,1570011' 130 | return args 131 | 132 | 133 | def vd_tsrn_slow_IR(): 134 | args = {} 135 | 136 | args = basic_fix(args) 137 | args = slow_bs(args) 138 | args = vd_basic(args) 139 | 140 | args['exp_id'] = 'vd_tsrn_slow_IR' 141 | args['model_type'] = 'tsrn_slow' 142 | args['task'] = 'IR' 143 | return args 144 | 145 | 146 | def load_from_tsrn_slow_IR(args): 147 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_tsrn_slow_IR' 148 | args['load_step'] = 50000 149 | return args 150 | 151 | 152 | def vd_tsrn_slow(): 153 | args = {} 154 | 155 | args = basic_fix(args) 156 | args = slow_bs(args) 157 | args = vd_basic(args) 158 | args = load_from_tsrn_slow_IR(args) 159 | args = res18_la(args) 160 | 161 | args['exp_id'] = 'vd_tsrn_slow' 162 | args['model_type'] = 'tsrn_slow' 163 | return args 164 | 165 | 166 | def vd_tsrn_slow_pret(): 167 | args = {} 168 | 169 | args = basic_fix(args) 170 | args = slow_bs(args) 171 | args = vd_basic(args) 172 | args = load_from_LA_final(args) 173 | args = res18_la(args) 174 | 175 | args['exp_id'] = 'vd_tsrn_slow_pret' 176 | args['model_type'] = 'tsrn_slow' 177 | args['lr_boundaries'] = '1455011,1570011' 178 | return args 179 | -------------------------------------------------------------------------------- /tf_model/model/trn_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import pdb 7 | import itertools 8 | 9 | 10 | class Relation(object): 11 | 12 | def __init__( 13 | self, num_inputs, 14 | out_features, 15 | bottleneck_dim=512, 16 | layer_name=None): 17 | self.num_inputs = num_inputs 18 | self.out_features = out_features 19 | self.bottleneck_dim = bottleneck_dim 20 | if layer_name is not None: 21 | self.layer_name = layer_name 22 | else: 23 | self.layer_name = 'relation_{}'.format(num_inputs) 24 | 25 | def __call__(self, inputs): 26 | no_rel, bs, _, _ = inputs.get_shape().as_list() 27 | inputs = tf.reshape(inputs, [no_rel * bs, -1]) 28 | inputs = tf.layers.dense(inputs=inputs, units=self.bottleneck_dim, 29 | activation=tf.nn.relu, 30 | name=self.layer_name + '_hidden') 31 | inputs = tf.layers.dense(inputs=inputs, units=self.out_features, 32 | name=self.layer_name + '_out') 33 | inputs = tf.reshape(inputs, [no_rel, bs, -1]) 34 | return inputs 35 | 36 | 37 | class MultiScaleRelation(object): 38 | """Multi-Relation module. 39 | This module applies mlps to concatenated n-input tuples. 40 | 41 | Args: 42 | num_frame_total: total number of frame features (e.g. 16 frames). 43 | out_features: dim of output relation feature. 44 | bottleneck_dim: dim of bottleneck in each relation MLP. 45 | num_relations: number of MLPs used for frame relation tuples. 46 | 47 | """ 48 | 49 | def __init__(self, 50 | num_frame_total, 51 | out_features, 52 | bottleneck_dim=512, 53 | num_relations=8, 54 | use_mean=False): 55 | self.num_frame_total = num_frame_total 56 | self.out_features = out_features 57 | self.num_relations = num_relations 58 | self.bottleneck_dim = bottleneck_dim 59 | self.use_mean = use_mean 60 | 61 | self.scales = list(range(num_frame_total, 1, -1)) 62 | self.relations_scales = [] 63 | self.subsample_scales = [] 64 | 65 | for scale in self.scales: 66 | 67 | # Determine possible `scale`-input tuples e.g: 68 | # [(0, 1), (0, 2), (1, 2)] = self.return_relationset(2) 69 | relations_scale = self.return_relationset(scale) 70 | self.relations_scales.append(relations_scale) 71 | 72 | # Limit number of 73 | self.subsample_scales.append( 74 | min(self.num_relations, len(relations_scale))) 75 | 76 | # Each Relation takes `scale` num frame features. 77 | self.relations = [ 78 | Relation(scale, self.out_features, self.bottleneck_dim) \ 79 | for scale in self.scales] 80 | 81 | print('Adding multi-Scale Relation Network Module') 82 | print(['{}-frame relation'.format(i) for i in self.scales]) 83 | 84 | def return_relationset(self, num_input_relation): 85 | return list(itertools.combinations( 86 | range(self.num_frame_total), num_input_relation)) 87 | 88 | def __call__(self, input): 89 | """Apply TRN module. 90 | 91 | Args: 92 | input: frame features (batch_size, num_frames, feature_dim) 93 | 94 | Returns: 95 | video embedding: (batch_size, out_features) 96 | """ 97 | output = [] 98 | for idx_scale, scale in enumerate(self.scales): 99 | curr_num_relations = self.subsample_scales[idx_scale] 100 | max_num_relations = len(self.relations_scales[idx_scale]) 101 | idx_relations = tf.random_uniform( 102 | shape=[curr_num_relations], 103 | minval=0, maxval=max_num_relations, 104 | dtype=tf.int64) 105 | 106 | input_to_mlp = [] 107 | curr_relation_ts = tf.constant(self.relations_scales[idx_scale]) 108 | for idx in range(curr_num_relations): 109 | curr_idx_rel = idx_relations[idx] 110 | curr_rel_tuple = curr_relation_ts[curr_idx_rel] 111 | input_relation = tf.gather(input, curr_rel_tuple, axis=1) 112 | input_to_mlp.append(input_relation) 113 | input_to_mlp = tf.stack(input_to_mlp, axis=0) 114 | output_of_mlp = self.relations[idx_scale](input_to_mlp) 115 | output.append(output_of_mlp) 116 | 117 | output = tf.concat(output, axis=0) 118 | if not self.use_mean: 119 | output = tf.reduce_sum(output, axis=0) 120 | else: 121 | output = tf.reduce_mean(output, axis=0) 122 | return output 123 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_single_frame_fx.py: -------------------------------------------------------------------------------- 1 | from saved_settings.basics import basic_fix, bs128 2 | from utils import DATA_LEN_KINETICS_400 3 | 4 | 5 | def vd_basic(args): 6 | args['dataset'] = 'kinetics' 7 | args['data_len'] = DATA_LEN_KINETICS_400 8 | args['image_dir'] = '/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted' 9 | args['val_image_dir'] = args['image_dir'] 10 | args['train_prep'] = 'ColorJitter' 11 | return args 12 | 13 | 14 | def UCF_basic(args): 15 | args['data_len'] = 9537 16 | args['image_dir'] = '/data5/shetw/UCF101/extracted_frames' 17 | args['val_image_dir'] = args['image_dir'] 18 | args['metafile_root'] = '/data5/shetw/UCF101/metafiles' 19 | args['dataset'] = 'UCF101' 20 | args['train_len'] = 9537 21 | args['val_len'] = 3783 22 | args['num_classes'] = 101 23 | args['train_prep'] = 'ColorJitter' 24 | return args 25 | 26 | 27 | def vd_ctl_IR(): 28 | args = {} 29 | 30 | args = basic_fix(args) 31 | args = bs128(args) 32 | args = vd_basic(args) 33 | 34 | args['exp_id'] = 'vd_ctl_IR' 35 | args['task'] = 'IR' 36 | args['lr_boundaries'] = '845011,1280011' 37 | return args 38 | 39 | 40 | def vd_ctl_p30_IR(): 41 | args = {} 42 | 43 | args = basic_fix(args) 44 | args = bs128(args) 45 | args = vd_basic(args) 46 | 47 | args['exp_id'] = 'vd_ctl_p30_IR' 48 | args['task'] = 'IR' 49 | args['part_vd'] = 0.3 50 | args['data_len'] = int(DATA_LEN_KINETICS_400 * 0.3) 51 | args['lr_boundaries'] = '865011,1080011' 52 | return args 53 | 54 | 55 | def vd_ctl_p70_IR(): 56 | args = {} 57 | 58 | args = basic_fix(args) 59 | args = bs128(args) 60 | args = vd_basic(args) 61 | 62 | args['exp_id'] = 'vd_ctl_p70_IR' 63 | args['task'] = 'IR' 64 | args['part_vd'] = 0.7 65 | args['data_len'] = int(DATA_LEN_KINETICS_400 * 0.7) 66 | args['lr_boundaries'] = '875011,1080011' 67 | return args 68 | 69 | 70 | def res18_la(args): 71 | args['kmeans_k'] = '8000' 72 | args['instance_k'] = 512 73 | return args 74 | 75 | 76 | def load_from_IR(args): 77 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl_IR' 78 | args['load_step'] = 50000 79 | return args 80 | 81 | 82 | def vd_ctl(): 83 | args = {} 84 | 85 | args = basic_fix(args) 86 | args = bs128(args) 87 | args = vd_basic(args) 88 | args = load_from_IR(args) 89 | args = res18_la(args) 90 | 91 | args['exp_id'] = 'vd_ctl' 92 | args['lr_boundaries'] = '1020011,1220011' 93 | return args 94 | 95 | 96 | def res18_la_p30(args): 97 | args['kmeans_k'] = '2400' 98 | args['instance_k'] = 512 99 | return args 100 | 101 | 102 | def load_from_IR_p30(args): 103 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl_p30_IR' 104 | args['load_step'] = 50000 105 | return args 106 | 107 | 108 | def vd_ctl_p30(): 109 | args = {} 110 | 111 | args = basic_fix(args) 112 | args = bs128(args) 113 | args = vd_basic(args) 114 | args = load_from_IR_p30(args) 115 | args = res18_la_p30(args) 116 | 117 | args['exp_id'] = 'vd_ctl_p30' 118 | args['part_vd'] = 0.3 119 | args['data_len'] = int(DATA_LEN_KINETICS_400 * 0.3) 120 | args['lr_boundaries'] = '860011,1060011' 121 | return args 122 | 123 | 124 | def res18_la_p70(args): 125 | args['kmeans_k'] = '5600' 126 | args['instance_k'] = 512 127 | return args 128 | 129 | 130 | def load_from_IR_p70(args): 131 | args['load_exp'] = 'vd_unsup_fx/dyn_clstr/vd_ctl_p70_IR' 132 | args['load_step'] = 50000 133 | return args 134 | 135 | 136 | def vd_ctl_p70(): 137 | args = {} 138 | 139 | args = basic_fix(args) 140 | args = bs128(args) 141 | args = vd_basic(args) 142 | args = load_from_IR_p70(args) 143 | args = res18_la_p70(args) 144 | 145 | args['exp_id'] = 'vd_ctl_p70' 146 | args['part_vd'] = 0.7 147 | args['data_len'] = int(DATA_LEN_KINETICS_400 * 0.7) 148 | args['lr_boundaries'] = '860011,1060011' 149 | return args 150 | 151 | 152 | def res18_la_smK(args): 153 | args['kmeans_k'] = '4000' 154 | args['instance_k'] = 512 155 | return args 156 | 157 | 158 | def vd_ctl_smK(): 159 | args = {} 160 | 161 | args = basic_fix(args) 162 | args = bs128(args) 163 | args = vd_basic(args) 164 | args = load_from_IR(args) 165 | args = res18_la_smK(args) 166 | 167 | args['exp_id'] = 'vd_ctl_smK' 168 | args['lr_boundaries'] = '1025011,1220011' 169 | return args 170 | 171 | 172 | def res18_la_bgN(args): 173 | args['kmeans_k'] = '8000' 174 | args['instance_k'] = 1024 175 | return args 176 | 177 | 178 | def vd_ctl_bgN(): 179 | args = {} 180 | 181 | args = basic_fix(args) 182 | args = bs128(args) 183 | args = vd_basic(args) 184 | args = load_from_IR(args) 185 | args = res18_la_bgN(args) 186 | 187 | args['exp_id'] = 'vd_ctl_bgN' 188 | args['lr_boundaries'] = '985011,1255011' 189 | return args 190 | -------------------------------------------------------------------------------- /tf_model/train_rot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | import json 7 | import copy 8 | import argparse 9 | import time 10 | import functools 11 | import inspect 12 | import pdb 13 | 14 | from tfutils import base, optimizer 15 | import tfutils.defaults 16 | 17 | from model import rot_model 18 | 19 | from utils import online_keep_all 20 | import config 21 | import rot_data 22 | 23 | import train_vie 24 | ROT_KINETICS_VIDEOS = 239871 25 | ROT_KINETICS_VAL_VDS = 19647 26 | 27 | 28 | def get_params_from_arg(args): 29 | save_params, load_params = train_vie.get_save_load_params_from_arg(args) 30 | loss_params, learning_rate_params, optimizer_params \ 31 | = train_vie.get_loss_lr_opt_params_from_arg(args) 32 | 33 | # train_params 34 | train_data_loader = rot_data.get_train_rot_pt_loader(args) 35 | data_enumerator = [enumerate(train_data_loader)] 36 | def train_loop(sess, train_targets, num_minibatches=1, **params): 37 | assert num_minibatches==1, "Mini-batch not supported!" 38 | 39 | global_step_vars = [v for v in tf.global_variables() \ 40 | if 'global_step' in v.name] 41 | assert len(global_step_vars) == 1 42 | global_step = sess.run(global_step_vars[0]) 43 | 44 | data_en_update_fre = ROT_KINETICS_VIDEOS // args.batch_size 45 | if global_step % data_en_update_fre == 0: 46 | data_enumerator.pop() 47 | data_enumerator.append(enumerate(train_data_loader)) 48 | _, image = next(data_enumerator[0]) 49 | feed_dict = rot_data.get_feeddict(image) 50 | sess_res = sess.run(train_targets, feed_dict=feed_dict) 51 | return sess_res 52 | 53 | train_data_param = { 54 | 'func': rot_data.get_rot_placeholders, 55 | 'batch_size': args.batch_size} 56 | train_params = { 57 | 'validate_first': False, 58 | 'data_params': train_data_param, 59 | 'queue_params': None, 60 | 'thres_loss': float('Inf'), 61 | 'num_steps': float('Inf'), 62 | 'train_loop': {'func': train_loop}} 63 | train_params['targets'] = { 64 | 'func': lambda inputs, output: {'accuracy': output['accuracy']}} 65 | 66 | # validation_params 67 | val_len = 3 * ROT_KINETICS_VAL_VDS 68 | topn_val_data_param = { 69 | 'func': rot_data.get_rot_placeholders, 70 | 'batch_size': args.test_batch_size, 71 | 'name_prefix': 'VAL'} 72 | 73 | val_step_num = int(val_len / args.test_batch_size) 74 | val_data_loader = rot_data.get_val_rot_pt_loader(args) 75 | val_counter = [0] 76 | val_data_enumerator = [enumerate(val_data_loader)] 77 | def valid_loop(sess, target): 78 | val_counter[0] += 1 79 | if val_counter[0] % (ROT_KINETICS_VAL_VDS // args.test_batch_size) == 0: 80 | val_data_enumerator.pop() 81 | val_data_enumerator.append(enumerate(val_data_loader)) 82 | _, image = next(val_data_enumerator[0]) 83 | feed_dict = rot_data.get_feeddict(image, name_prefix='VAL') 84 | return sess.run(target, feed_dict=feed_dict) 85 | 86 | val_targets = { 87 | 'func': lambda inputs, output: {'accuracy': output['accuracy']}} 88 | 89 | topn_val_param = { 90 | 'data_params': topn_val_data_param, 91 | 'queue_params': None, 92 | 'targets': val_targets, 93 | 'num_steps': val_step_num, 94 | 'agg_func': lambda x: {k: np.mean(v) for k, v in x.items()}, 95 | 'online_agg_func': train_vie.online_agg, 96 | 'valid_loop': {'func': valid_loop} 97 | } 98 | validation_params = {'topn': topn_val_param} 99 | 100 | # model_params 101 | model_params = { 102 | 'func': rot_model.build_loss_accuracy, 103 | 'resnet_size': args.resnet_size} 104 | multi_gpu = len(args.gpu.split(',')) 105 | if multi_gpu > 1: 106 | model_params['num_gpus'] = multi_gpu 107 | model_params['devices'] = ['/gpu:%i' % idx for idx in range(multi_gpu)] 108 | 109 | # Put all parameters together 110 | params = { 111 | 'save_params': save_params, 112 | 'load_params': load_params, 113 | 'loss_params': loss_params, 114 | 'learning_rate_params': learning_rate_params, 115 | 'optimizer_params': optimizer_params, 116 | 'log_device_placement': False, 117 | 'skip_check': True, 118 | 'train_params': train_params, 119 | 'validation_params': validation_params, 120 | 'model_params': model_params, 121 | } 122 | return params 123 | 124 | 125 | def get_config(): 126 | cfg = train_vie.get_config() 127 | cfg.add('rot_real_prep', type=bool, 128 | help='Train preprocessing being real resize') 129 | return cfg 130 | 131 | 132 | def main(): 133 | # Parse arguments 134 | cfg = get_config() 135 | args = cfg.parse_args() 136 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 137 | 138 | # Get params needed, start training 139 | params = get_params_from_arg(args) 140 | base.train_from_params(**params) 141 | 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /pt_loader/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | MIT_ROOT = '/data/vision/oliva/scratch/aandonia/moments/models/datasets/kinetics' 4 | MIT_ROOT_DATA = '/data/vision/oliva/scratch/datasets/kinetics/comp_jpgs_extracted' 5 | 6 | 7 | def return_kinetics(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 8 | """Return the split information.""" 9 | filename_categories = os.path.join(root, 'categories.txt') 10 | filename_imglist_train = os.path.join(root, 'train_frameno_new.txt') 11 | filename_imglist_val = os.path.join(root, 'val_frameno_new.txt') 12 | prefix = '{:06d}.jpg' 13 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 14 | 15 | 16 | def return_moments(): 17 | filename_categories = '/data/vision/oliva/scratch/moments/split/categoryList_nov17.csv' 18 | prefix = '{:06d}.jpg' 19 | root_data = '/data/vision/oliva/scratch/moments/moments_nov17_frames' 20 | filename_imglist_train = '/data/vision/oliva/scratch/moments/split/rgb_trainingSet_nov17.csv' 21 | filename_imglist_val = '/data/vision/oliva/scratch/moments/split/rgb_validationSet_nov17.csv' 22 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 23 | 24 | 25 | def return_UCF101(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 26 | filename_categories = os.path.join(root, 'categories.txt') 27 | filename_imglist_train = os.path.join(root, 'trainlist01_meta.txt') 28 | filename_imglist_val = os.path.join(root, 'testlist01_meta.txt') 29 | prefix = '{:06d}.jpg' 30 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 31 | 32 | def return_UCF101_2(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 33 | filename_categories = os.path.join(root, 'categories.txt') 34 | filename_imglist_train = os.path.join(root, 'trainlist02_meta.txt') 35 | filename_imglist_val = os.path.join(root, 'testlist02_meta.txt') 36 | prefix = '{:06d}.jpg' 37 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 38 | 39 | def return_UCF101_3(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 40 | filename_categories = os.path.join(root, 'categories.txt') 41 | filename_imglist_train = os.path.join(root, 'trainlist03_meta.txt') 42 | filename_imglist_val = os.path.join(root, 'testlist03_meta.txt') 43 | prefix = '{:06d}.jpg' 44 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 45 | 46 | 47 | def return_HMDB51(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 48 | filename_categories = os.path.join(root, 'categories.txt') 49 | filename_imglist_train = os.path.join(root, 'trainlist01_meta.txt') 50 | filename_imglist_val = os.path.join(root, 'testlist01_meta.txt') 51 | prefix = '{:06d}.jpg' 52 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 53 | 54 | def return_HMDB51_2(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 55 | filename_categories = os.path.join(root, 'categories.txt') 56 | filename_imglist_train = os.path.join(root, 'trainlist02_meta.txt') 57 | filename_imglist_val = os.path.join(root, 'testlist02_meta.txt') 58 | prefix = '{:06d}.jpg' 59 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 60 | 61 | def return_HMDB51_3(root=MIT_ROOT, root_data=MIT_ROOT_DATA): 62 | filename_categories = os.path.join(root, 'categories.txt') 63 | filename_imglist_train = os.path.join(root, 'trainlist03_meta.txt') 64 | filename_imglist_val = os.path.join(root, 'testlist03_meta.txt') 65 | prefix = '{:06d}.jpg' 66 | return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix 67 | 68 | def return_infant(root, root_data): 69 | filename_imglist_val = os.path.join(root, "infant_30min_metafile.txt") 70 | return filename_imglist_val, root_data 71 | 72 | 73 | def dataset_config(dataset, **kwargs): 74 | datasets = { 75 | 'hmdb0': {}, 76 | 'hmdb1': {}, 77 | 'hmdb2': {}, 78 | 'ucf101': {}, 79 | 'jester': {}, 80 | 'charades': {}, 81 | 'something': {}, 82 | 'somethingv2': {}, 83 | 'moments': return_moments, 84 | 'kinetics': return_kinetics, 85 | 'UCF101': return_UCF101, 86 | 'UCF101_2': return_UCF101_2, 87 | 'UCF101_3': return_UCF101_3, 88 | 'HMDB51': return_HMDB51, 89 | 'HMDB51_2': return_HMDB51_2, 90 | 'HMDB51_3': return_HMDB51_3,} 91 | 92 | if dataset == 'infant': 93 | file_imglist_val, root_data = return_infant(**kwargs) 94 | return { 95 | 'val_metafile': file_imglist_val, 96 | 'root': root_data 97 | } 98 | 99 | if dataset in datasets: 100 | file_categories, file_imglist_train, \ 101 | file_imglist_val, root_data, \ 102 | prefix = datasets[dataset](**kwargs) 103 | else: 104 | raise ValueError('Unknown dataset {}'.format(dataset)) 105 | 106 | with open(file_categories) as f: 107 | categories = [line.rstrip() for line in f.readlines()] 108 | 109 | return { 110 | 'categories': categories, 111 | 'train_metafile': file_imglist_train, 112 | 'val_metafile': file_imglist_val, 113 | 'root': root_data, 114 | 'prefix': prefix 115 | } 116 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_test_kinetics.py: -------------------------------------------------------------------------------- 1 | def trans_KN_bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['test_no_frames'] = 5 5 | args['fre_filter'] = 50000 6 | args['fre_cache_filter'] = 5000 7 | args['fre_valid'] = 5000 8 | return args 9 | 10 | 11 | def test_basics(args): 12 | args['port'] = 27007 13 | args['pure_test'] = True 14 | args['cache_dir'] = '/mnt/fs4/shetw/tfutils_cache' 15 | return args 16 | 17 | 18 | def KN_basics(args): 19 | args['image_dir'] = '/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted' 20 | args['val_image_dir'] = args['image_dir'] 21 | args['metafile_root'] = '/mnt/fs3/chengxuz/kinetics/pt_meta' 22 | args['dataset'] = 'kinetics' 23 | args['train_len'] = 239888 24 | args['val_len'] = 19653 25 | args['train_prep'] = 'ColorJitter' 26 | args['num_classes'] = 400 27 | return args 28 | 29 | 30 | def vd_ctl_test(): 31 | args = {} 32 | 33 | args = test_basics(args) 34 | args = KN_basics(args) 35 | args = trans_KN_bs128(args) 36 | 37 | args['load_exp'] = "vd_trans/KN/vd_ctl_trans" 38 | args['load_port'] = 27006 39 | args["save_exp"] = "vd_trans_test/KN/vd_ctl" 40 | args["train_num_workers"] = 40 41 | # args["lr_boundaries"] = '1610099,1894998' 42 | return args 43 | 44 | 45 | def vd_slow_test(): 46 | args = {} 47 | 48 | args = test_basics(args) 49 | args = KN_basics(args) 50 | args = trans_KN_bs128(args) 51 | 52 | args['load_exp'] = "vd_trans/KN/vd_slow_trans" 53 | args['load_port'] = 27006 54 | args["save_exp"] = "vd_trans_test/KN/vd_slow" 55 | args['model_type'] = 'slow' 56 | args["train_num_workers"] = 40 57 | # args["lr_boundaries"] = '1539998,1630001' 58 | return args 59 | 60 | 61 | def vd_slowfast_a4_test(): 62 | args = {} 63 | 64 | args = test_basics(args) 65 | args = KN_basics(args) 66 | args = trans_KN_bs128(args) 67 | 68 | args['load_exp'] = "vd_trans/KN/vd_slowfast_a4_trans" 69 | args['load_port'] = 27006 70 | args["save_exp"] = "vd_trans_test/KN/vd_slowfast_a4" 71 | args['model_type'] = 'slowfast_a4' 72 | args["train_num_workers"] = 40 73 | return args 74 | 75 | 76 | def vd_tsrn_f4_pret_test(): 77 | args = {} 78 | 79 | args = test_basics(args) 80 | args = KN_basics(args) 81 | args = trans_KN_bs128(args) 82 | 83 | args['load_exp'] = "vd_trans/KN/vd_tsrn_f4_pret_trans" 84 | args['load_port'] = 27006 85 | args["save_exp"] = "vd_trans_test/KN/vd_tsrn_f4" 86 | args['model_type'] = 'tsrn' 87 | args['trn_num_frames'] = 4 88 | args['get_all_layers'] = '9-time-avg' 89 | args["train_num_workers"] = 40 90 | args['test_batch_size'] = 32 91 | return args 92 | 93 | 94 | def vd_ctl_all_test(): 95 | args = {} 96 | 97 | args = test_basics(args) 98 | args = KN_basics(args) 99 | args = trans_KN_bs128(args) 100 | 101 | args['load_exp'] = "vd_trans/KN/vd_ctl_trans_all" 102 | args['load_port'] = 27006 103 | args["save_exp"] = "vd_trans_test/KN/vd_ctl_all" 104 | args["train_num_workers"] = 40 105 | # args["lr_boundaries"] = '1610099,1894998' 106 | args["get_all_layers"] = '1,3,5,7,9' 107 | return args 108 | 109 | 110 | def vd_slow_all_test(): 111 | args = {} 112 | 113 | args = test_basics(args) 114 | args = KN_basics(args) 115 | args = trans_KN_bs128(args) 116 | 117 | #args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slow" 118 | #args["save_exp"] = "vd_trans/KN/vd_slow_trans_all" 119 | args["load_exp"] = "vd_trans/KN/vd_slow_trans_all_ct" 120 | args["save_exp"] = "vd_trans_test/KN/vd_slow_all" 121 | 122 | args['load_port'] = 27006 123 | args['model_type'] = 'slow' 124 | args["train_num_workers"] = 40 125 | #args["lr_boundaries"] = '1539998,1714998' 126 | args["get_all_layers"] = '1,3,5,7,9' 127 | return args 128 | 129 | 130 | def vd_slowfast_a4_all_test(): 131 | args = {} 132 | 133 | args = test_basics(args) 134 | args = KN_basics(args) 135 | args = trans_KN_bs128(args) 136 | 137 | args['load_exp'] = "vd_trans/KN/vd_slowfast_a4_trans_all" 138 | args['load_port'] = 27006 139 | args["save_exp"] = "vd_trans_test/KN/vd_slowfast_all" 140 | args['model_type'] = 'slowfast_a4' 141 | args["train_num_workers"] = 40 142 | args["get_all_layers"] = '1,3,5,7,9' 143 | #args["lr_boundaries"] = '1469998,1594998' 144 | return args 145 | 146 | 147 | def vd_tsrn_f4_pret_all_test(): 148 | args = {} 149 | 150 | args = test_basics(args) 151 | args = KN_basics(args) 152 | args = trans_KN_bs128(args) 153 | 154 | #args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_tsrn_f4_pret" 155 | #args["save_exp"] = "vd_trans/KN/vd_tsrn_f4_pret_trans_all" 156 | args["load_exp"] = "vd_trans/KN/vd_tsrn_f4_pret_trans_all" 157 | args["save_exp"] = "vd_trans_test/KN/vd_tsrn_all" 158 | # args["load_step"] = 2050000 159 | 160 | args['load_port'] = 27006 161 | args['model_type'] = 'tsrn' 162 | args['trn_num_frames'] = 4 163 | args['get_all_layers'] = '1-time-avg,3-time-avg,5-time-avg,7-time-avg,9-time-avg' 164 | args["train_num_workers"] = 40 165 | args['test_batch_size'] = 32 166 | #args['lr_boundaries'] = '1854998,2049998' 167 | # args['lr_boundaries'] = '1854998' 168 | return args -------------------------------------------------------------------------------- /tf_model/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import argparse 3 | import json 4 | import importlib 5 | 6 | 7 | def named_choices(choices): 8 | def convert(val): 9 | if val not in choices: 10 | raise Exception('%s is not a recognized ' 11 | 'choice (choices are %s)' 12 | % (val, ', '.join(choices.keys()))) 13 | return choices[val] 14 | return convert 15 | 16 | 17 | class Config(object): 18 | def __init__(self): 19 | self.parser = argparse.ArgumentParser( 20 | description="Train instance task using dataset interface") 21 | self.parser.add_argument('--config', default=None, 22 | type=str, 23 | help="Path to a JSON file containing configuration info. Any " \ 24 | "configurations loaded from this file are superseded by " \ 25 | "configurations passed from the command line.") 26 | self.parser.add_argument('--setting', default=None, 27 | type=str, 28 | help="Function name in saved_settings folder") 29 | self.fields = [] 30 | self.required_fields = [] 31 | 32 | self._reserved = ['config', 'description', 'setting'] 33 | self._default_values = {} 34 | self._types = {} 35 | 36 | def add(self, field, type, help, 37 | default=None, required=False, 38 | action='store'): 39 | def _assert(cond, mesg): 40 | if not cond: 41 | raise Exception("Error in defining flag %s: %s" % (field, mesg)) 42 | _assert(field not in self._reserved, "flag name reserved!") 43 | _assert(field not in self.fields, "already defined!") 44 | 45 | if type is bool: 46 | if default is None: 47 | default = False 48 | self.parser.add_argument( 49 | '--' + field, default=None, 50 | help=help, action='store_true') 51 | else: 52 | self.parser.add_argument( 53 | '--' + field, default=None, type=type, 54 | help=help, action=action) 55 | 56 | self.fields.append(field) 57 | self._types[field] = type 58 | 59 | if default is not None: 60 | _assert(not required, "default doesn't make sense " \ 61 | "when flag is required!") 62 | self._default_values[field] = type(default) 63 | if required: 64 | self.required_fields.append(field) 65 | 66 | def parse_config_file(self, config_str): 67 | if config_str is None: 68 | return {} 69 | 70 | parts = config_str.split(':') 71 | assert len(parts) <= 2 72 | if len(parts) < 2: 73 | parts.append(None) 74 | path, config_name = parts 75 | 76 | def strip_comments(s): 77 | # Quick-and-dirty way to strip comments. Should work for our 78 | # purposes. 79 | lines = s.split('\n') 80 | lines = filter(lambda x: not x.strip().startswith('//'), lines) 81 | return '\n'.join(lines) 82 | 83 | f = open(path) 84 | json_str = strip_comments(f.read()) 85 | json_dict = json.loads(json_str) 86 | if config_name is not None: 87 | if config_name not in json_dict: 88 | raise Exception("Could not find configuration called '%s' " 89 | "in file '%s'" % (config_name, path)) 90 | json_dict = json_dict[config_name] 91 | return json_dict 92 | 93 | def load_setting_func(self, setting_func_name): 94 | if setting_func_name is None: 95 | return {} 96 | 97 | all_paths = setting_func_name.split('.') 98 | module_name = '.'.join(['saved_settings'] + all_paths[:-1]) 99 | load_setting_module = importlib.import_module(module_name) 100 | setting_func_name = all_paths[-1] 101 | setting_func = getattr(load_setting_module, setting_func_name) 102 | func_cfg = setting_func() 103 | return func_cfg 104 | 105 | def parse_args(self, *p_args, **p_kwargs): 106 | args = self.parser.parse_args(*p_args, **p_kwargs) 107 | file_cfg = self.parse_config_file(args.config) 108 | file_cfg.update(self.load_setting_func(args.setting)) 109 | 110 | # Configuration priority: 111 | # 1. Explicit command line values 112 | # 2. Config file values 113 | # 3. Default values 114 | for field in self.fields: 115 | cmd_val = getattr(args, field) 116 | if cmd_val is not None: 117 | continue 118 | 119 | if field in file_cfg: 120 | value = self._types[field](file_cfg[field]) 121 | setattr(args, field, value) 122 | elif field in self._default_values: 123 | setattr(args, field, self._default_values[field]) 124 | 125 | curr_inner_args = getattr(args, 'inner_args', None) 126 | if curr_inner_args is not None and curr_inner_args != '[]': 127 | return args 128 | 129 | for field in self.required_fields: 130 | if getattr(args, field) is None: 131 | raise Exception("Missing required argument %s" % field) 132 | return args 133 | -------------------------------------------------------------------------------- /tf_model/train_opn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import numpy as np 4 | import tensorflow as tf 5 | import cPickle 6 | 7 | import json 8 | import copy 9 | import argparse 10 | import time 11 | import functools 12 | import inspect 13 | import pdb 14 | 15 | from tfutils import base, optimizer 16 | import tfutils.defaults 17 | 18 | from model import opn_model 19 | 20 | from utils import online_keep_all 21 | import config 22 | import opn_data 23 | 24 | import train_vie 25 | OPN_KINETICS_VIDEOS = 239871 26 | OPN_KINETICS_VAL_VDS = 19647 27 | 28 | 29 | def get_params_from_arg(args): 30 | save_params, load_params = train_vie.get_save_load_params_from_arg(args) 31 | loss_params, learning_rate_params, optimizer_params \ 32 | = train_vie.get_loss_lr_opt_params_from_arg(args) 33 | 34 | # train_params 35 | train_data_loader = opn_data.get_train_opn_pt_loader(args) 36 | data_enumerator = [enumerate(train_data_loader)] 37 | def train_loop(sess, train_targets, num_minibatches=1, **params): 38 | assert num_minibatches==1, "Mini-batch not supported!" 39 | 40 | global_step_vars = [v for v in tf.global_variables() \ 41 | if 'global_step' in v.name] 42 | assert len(global_step_vars) == 1 43 | global_step = sess.run(global_step_vars[0]) 44 | 45 | data_en_update_fre = OPN_KINETICS_VIDEOS // args.batch_size 46 | if global_step % data_en_update_fre == 0: 47 | data_enumerator.pop() 48 | data_enumerator.append(enumerate(train_data_loader)) 49 | _, image = data_enumerator[0].next() 50 | feed_dict = opn_data.get_feeddict(image) 51 | sess_res = sess.run(train_targets, feed_dict=feed_dict) 52 | return sess_res 53 | 54 | train_data_param = { 55 | 'func': opn_data.get_opn_placeholders, 56 | 'batch_size': args.batch_size, 57 | 'crop_size': args.opn_crop_size} 58 | train_params = { 59 | 'validate_first': False, 60 | 'data_params': train_data_param, 61 | 'queue_params': None, 62 | 'thres_loss': float('Inf'), 63 | 'num_steps': float('Inf'), 64 | 'train_loop': {'func': train_loop}} 65 | train_params['targets'] = { 66 | 'func': lambda inputs, output: {'accuracy': output['accuracy']}} 67 | 68 | # validation_params 69 | val_len = 3 * OPN_KINETICS_VAL_VDS 70 | topn_val_data_param = { 71 | 'func': opn_data.get_opn_placeholders, 72 | 'batch_size': args.test_batch_size, 73 | 'name_prefix': 'VAL', 74 | 'crop_size': args.opn_crop_size} 75 | 76 | val_step_num = int(val_len / args.test_batch_size) 77 | val_data_loader = opn_data.get_val_opn_pt_loader(args) 78 | val_counter = [0] 79 | val_data_enumerator = [enumerate(val_data_loader)] 80 | def valid_loop(sess, target): 81 | val_counter[0] += 1 82 | if val_counter[0] % (OPN_KINETICS_VAL_VDS // args.test_batch_size) == 0: 83 | val_data_enumerator.pop() 84 | val_data_enumerator.append(enumerate(val_data_loader)) 85 | _, image = val_data_enumerator[0].next() 86 | feed_dict = opn_data.get_feeddict(image, name_prefix='VAL') 87 | return sess.run(target, feed_dict=feed_dict) 88 | 89 | val_targets = { 90 | 'func': lambda inputs, output: {'accuracy': output['accuracy']}} 91 | 92 | topn_val_param = { 93 | 'data_params': topn_val_data_param, 94 | 'queue_params': None, 95 | 'targets': val_targets, 96 | 'num_steps': val_step_num, 97 | 'agg_func': lambda x: {k: np.mean(v) for k, v in x.items()}, 98 | 'online_agg_func': train_vie.online_agg, 99 | 'valid_loop': {'func': valid_loop} 100 | } 101 | validation_params = {'topn': topn_val_param} 102 | 103 | # model_params 104 | model_params = { 105 | 'func': opn_model.build_loss_accuracy, 106 | 'resnet_size': args.resnet_size} 107 | multi_gpu = len(args.gpu.split(',')) 108 | if multi_gpu > 1: 109 | model_params['num_gpus'] = multi_gpu 110 | model_params['devices'] = ['/gpu:%i' % idx for idx in range(multi_gpu)] 111 | 112 | # Put all parameters together 113 | params = { 114 | 'save_params': save_params, 115 | 'load_params': load_params, 116 | 'loss_params': loss_params, 117 | 'learning_rate_params': learning_rate_params, 118 | 'optimizer_params': optimizer_params, 119 | 'log_device_placement': False, 120 | 'skip_check': True, 121 | 'train_params': train_params, 122 | 'validation_params': validation_params, 123 | 'model_params': model_params, 124 | } 125 | return params 126 | 127 | 128 | def get_config(): 129 | cfg = train_vie.get_config() 130 | cfg.add('opn_crop_size', type=int, default=80, 131 | help='Crop size for opn') 132 | cfg.add('opn_transform', type=str, default=None, 133 | help='Transform type for opn, None or Sep') 134 | cfg.add('opn_flow_folder', type=str, default=None, 135 | help='Not none, will use flow') 136 | return cfg 137 | 138 | 139 | def main(): 140 | # Parse arguments 141 | cfg = get_config() 142 | args = cfg.parse_args() 143 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 144 | 145 | # Get params needed, start training 146 | params = get_params_from_arg(args) 147 | base.train_from_params(**params) 148 | 149 | 150 | if __name__ == "__main__": 151 | main() 152 | -------------------------------------------------------------------------------- /tf_model/model/opn_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import json 4 | import numpy as np 5 | import tensorflow as tf 6 | import copy 7 | import pdb 8 | from collections import OrderedDict 9 | 10 | from .instance_model import color_normalize 11 | from .resnet_model import get_block_sizes, Model, DEFAULT_VERSION, DEFAULT_DTYPE 12 | 13 | ALL_ORDERS = [ 14 | (0,1,2,3), 15 | (0,2,1,3), 16 | (0,2,3,1), 17 | (0,1,3,2), 18 | (0,3,1,2), 19 | (0,3,2,1), 20 | (1,0,2,3), 21 | (1,0,3,2), 22 | (1,2,0,3), 23 | (2,0,1,3), 24 | (2,0,3,1), 25 | (2,1,0,3), 26 | ] 27 | 28 | 29 | class OPNModel(Model): 30 | def __init__(self, resnet_size, data_format=None, 31 | resnet_version=DEFAULT_VERSION, 32 | dtype=DEFAULT_DTYPE): 33 | """ 34 | Args: 35 | resnet_size: The number of convolutional layers needed in the model. 36 | data_format: Either 'channels_first' or 'channels_last', specifying which 37 | data format to use when setting up the model. 38 | num_classes: The number of output classes needed from the model. This 39 | enables users to extend the same model to their own datasets. 40 | resnet_version: Integer representing which version of the ResNet network 41 | to use. See README for details. Valid values: [1, 2] 42 | dtype: The TensorFlow dtype to use for calculations. 43 | """ 44 | 45 | # For bigger models, we want to use "bottleneck" layers 46 | if resnet_size < 50: 47 | bottleneck = False 48 | final_size = 512 49 | else: 50 | bottleneck = True 51 | final_size = 2048 52 | 53 | super(OPNModel, self).__init__( 54 | resnet_size=resnet_size, 55 | bottleneck=bottleneck, 56 | num_classes=None, 57 | num_filters=64, 58 | kernel_size=7, 59 | conv_stride=2, 60 | first_pool_size=3, 61 | first_pool_stride=2, 62 | block_sizes=get_block_sizes(resnet_size), 63 | block_strides=[1, 2, 2, 2], 64 | final_size=final_size, 65 | resnet_version=resnet_version, 66 | data_format=data_format, 67 | dtype=dtype 68 | ) 69 | 70 | def _preprocess_data(self, inputs): 71 | if self.data_format == 'channels_first': 72 | # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). 73 | # This provides a large performance boost on GPU. See 74 | # https://www.tensorflow.org/performance/performance_guide#data_formats 75 | inputs = tf.transpose(inputs, [0, 1, 4, 2, 3]) 76 | curr_shape = inputs.get_shape().as_list() 77 | self.num_frames = curr_shape[1] 78 | inputs = tf.reshape(inputs, [-1] + curr_shape[2:]) 79 | return inputs 80 | 81 | def _build_pairwise_features(self, each_frame_out, out_dim=512): 82 | pairwise_features = {} 83 | for first_frame in range(self.num_frames): 84 | for second_frame in range(self.num_frames): 85 | if first_frame == second_frame: 86 | continue 87 | curr_pair = (first_frame, second_frame) 88 | curr_input_to_mlp = tf.concat( 89 | [each_frame_out[first_frame], each_frame_out[second_frame]], 90 | axis=-1) 91 | curr_output = tf.layers.dense( 92 | inputs=curr_input_to_mlp, units=out_dim, 93 | activation=tf.nn.relu, name='opn_pairwise_mlp') 94 | pairwise_features[curr_pair] = curr_output 95 | self.pairwise_features = pairwise_features 96 | 97 | def _build_final_mlp_for_order(self, curr_order): 98 | input_to_final_mlp = [] 99 | for first_frame in range(self.num_frames): 100 | for second_frame in range(first_frame+1, self.num_frames): 101 | input_to_final_mlp.append( 102 | self.pairwise_features[ 103 | (curr_order[first_frame], curr_order[second_frame])]) 104 | input_to_final_mlp = tf.concat(input_to_final_mlp, axis=-1) 105 | final_mlp_output = tf.layers.dense( 106 | inputs=input_to_final_mlp, units=12, 107 | name='opn_final_mlp') 108 | return final_mlp_output 109 | 110 | def _get_final_dense(self, inputs): 111 | inputs = tf.reshape(inputs, [-1, self.num_frames, self.final_size]) 112 | bs = inputs.get_shape().as_list()[0] 113 | each_frame_out = tf.unstack(inputs, axis=1) 114 | self._build_pairwise_features(each_frame_out) 115 | 116 | all_logits = [] 117 | all_labels = [] 118 | for curr_lbl, curr_order in enumerate(ALL_ORDERS): 119 | _final_mlp_output = self._build_final_mlp_for_order(curr_order) 120 | _final_mlp_output_rev = self._build_final_mlp_for_order( 121 | tuple(reversed(curr_order))) 122 | all_logits.append( 123 | tf.concat([_final_mlp_output, _final_mlp_output_rev], axis=0)) 124 | all_labels.append(tf.ones((bs * 2), dtype=tf.int64) * curr_lbl) 125 | all_logits = tf.concat(all_logits, axis=0) 126 | all_labels = tf.concat(all_labels, axis=0) 127 | 128 | _, pred = tf.nn.top_k(all_logits, k=1) 129 | pred = tf.cast(tf.squeeze(pred), tf.int64) 130 | accuracy = tf.reduce_mean( 131 | tf.cast(tf.equal(pred, all_labels), tf.float32)) 132 | 133 | one_hot_labels = tf.one_hot(all_labels, 12) 134 | loss = tf.losses.softmax_cross_entropy(one_hot_labels, all_logits) 135 | return loss, accuracy 136 | 137 | 138 | def build_loss_accuracy( 139 | inputs, train, 140 | resnet_size=18, 141 | *args, **kwargs): 142 | image = color_normalize(inputs['image']) 143 | model = OPNModel( 144 | resnet_size=resnet_size) 145 | loss, accuracy = model(image, train, skip_final_dense=False) 146 | return {'loss': loss, 'accuracy': accuracy}, {} 147 | -------------------------------------------------------------------------------- /pt_loader/opn_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | from collections import namedtuple 4 | import time 5 | import pdb 6 | 7 | import numpy as np 8 | import torch.utils.data as data 9 | from PIL import Image 10 | 11 | from pt_loader.datasets import VideoRecord 12 | 13 | 14 | class OPNVideoDataset(data.Dataset): 15 | MIN_NUM_FRAMES = 16 16 | T_MAX_CHOICES = [9, 15] 17 | 18 | def __init__(self, root, metafile, file_tmpl='{:06d}.jpg', transform=None): 19 | self.root = root 20 | self.metafile = metafile 21 | self.transform = transform 22 | self.file_tmpl = file_tmpl 23 | 24 | self._parse_list() 25 | 26 | def _parse_list(self): 27 | # check the frame number is >= MIN_NUM_FRAMES 28 | # usualy it is [video_id, num_frames, class_idx] 29 | with open(self.metafile) as f: 30 | lines = [x.strip().split(' ') for x in f] 31 | lines = [line for line in lines 32 | if int(line[1]) >= self.MIN_NUM_FRAMES] 33 | 34 | self.video_list = [VideoRecord(*v) for v in lines] 35 | print('Number of videos: {}'.format(len(self.video_list))) 36 | 37 | def _get_valid_video(self, index): 38 | record = self.video_list[index] 39 | # check this is a legit video folder 40 | while not os.path.exists( 41 | os.path.join(self.root, record.path, self.file_tmpl.format(1))): 42 | print( 43 | os.path.join( 44 | self.root, 45 | record.path, 46 | self.file_tmpl.format(1))) 47 | index = np.random.randint(len(self.video_list)) 48 | record = self.video_list[index] 49 | return record, index 50 | 51 | def _load_image(self, directory, idx): 52 | tmpl = os.path.join(self.root, directory, self.file_tmpl) 53 | try: 54 | return Image.open(tmpl.format(idx)).convert('RGB') 55 | except Exception: 56 | print('error loading image: {}'.format(tmpl.format(idx))) 57 | return Image.open(tmpl.format(1)).convert('RGB') 58 | 59 | def _get_indices(self, record): 60 | rec_no_frames = int(record.num_frames) 61 | t_max = np.random.choice(self.T_MAX_CHOICES) 62 | start_idx = np.random.randint(rec_no_frames - t_max) 63 | indices = [start_idx + _tmp_idx * t_max // 3 for _tmp_idx in range(4)] 64 | return np.asarray(indices) + 1 65 | 66 | def __getitem__(self, index): 67 | record, index = self._get_valid_video(index) 68 | indices = self._get_indices(record) 69 | frames = self.transform([self._load_image(record.path, int(idx)) 70 | for idx in indices]) 71 | return frames 72 | 73 | def __len__(self): 74 | return len(self.video_list) 75 | 76 | 77 | def normalize(x): 78 | x -= x.min() 79 | #m = x.max() 80 | #x /= m if m != 0 else 1 81 | if x.max() == 0: 82 | x += 1 83 | x /= x.sum() 84 | return x 85 | 86 | 87 | class MotionAwareOPNVideoDataset(OPNVideoDataset): 88 | 89 | magnitude_templ = 'magnitudes.npy' 90 | 91 | def __init__(self, root, flow_root, metafile, file_tmpl='{:06d}.jpg', 92 | transform=None): 93 | self.root = root 94 | self.flow_root = flow_root 95 | self.metafile = metafile 96 | self.transform = transform 97 | self.file_tmpl = file_tmpl 98 | 99 | self._parse_list() 100 | 101 | def _load_magnitudes(self, record): 102 | mag_path = os.path.join(self.flow_root, 103 | record.path, 104 | self.magnitude_templ) 105 | try: 106 | mag_arr = np.load(mag_path) 107 | except: 108 | print(mag_path, "Mag not there!") 109 | mag_arr = np.ones(int(record.num_frames)) 110 | 111 | if len(mag_arr) == 0: 112 | mag_arr = np.ones(int(record.num_frames)) 113 | return mag_arr 114 | 115 | def _get_indices(self, record): 116 | t_max = np.random.choice(self.T_MAX_CHOICES) 117 | magnitudes = self._load_magnitudes(record) 118 | window_weights = np.convolve(magnitudes, np.ones(t_max), mode='valid') 119 | window_weights = normalize(window_weights) 120 | 121 | rec_no_frames = int(record.num_frames) 122 | start_idx = np.random.choice( 123 | len(window_weights), 124 | p=window_weights) 125 | start_idx = min(start_idx, rec_no_frames - t_max - 1) 126 | indices = [start_idx + _tmp_idx * t_max // 3 for _tmp_idx in range(4)] 127 | return np.asarray(indices) + 1 128 | 129 | 130 | if __name__ == '__main__': 131 | import config 132 | import transforms 133 | import torch 134 | 135 | root = '/mnt/fs3/chengxuz/kinetics/pt_meta' 136 | root_data = '/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted' 137 | cfg = config.dataset_config('kinetics', root=root, root_data=root_data) 138 | transform = transforms.video_OPN_transform_color() 139 | dataset = OPNVideoDataset( 140 | cfg['root'], cfg['train_metafile'], transform=transform) 141 | 142 | dataloader = torch.utils.data.DataLoader( 143 | dataset, batch_size=64, shuffle=True, 144 | num_workers=10, pin_memory=False, 145 | worker_init_fn=lambda x: np.random.seed(x)) 146 | 147 | curr_time = time.time() 148 | init_time = curr_time 149 | data_enumerator = enumerate(dataloader) 150 | for i in range(100): 151 | _, input = data_enumerator.next() 152 | print(input.shape, input.dtype, np.max(input.numpy())) 153 | curr_time = time.time() 154 | print(time.time() - init_time) 155 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_super_fx.py: -------------------------------------------------------------------------------- 1 | def sup_basic(args): 2 | args['port'] = 27007 3 | args['db_name'] = 'vd_sup' 4 | args['cache_dir'] = '/mnt/fs4/shetw/tfutils_cache' 5 | args['task'] = 'SUP' 6 | return args 7 | 8 | def sup_bs128(args): 9 | args['batch_size'] = 128 10 | args['test_batch_size'] = 64 11 | args['test_no_frames'] = 5 12 | args['fre_filter'] = 10000 13 | args['fre_cache_filter'] = 5000 14 | args['fre_valid'] = 500 15 | 16 | args['init_lr'] = 0.01 17 | return args 18 | 19 | def UCF_basics(args): 20 | args = sup_basic(args) 21 | args = sup_bs128(args) 22 | 23 | args['col_name'] = 'UCF' 24 | args['data_len'] = 9537 25 | args['image_dir'] = '/data5/shetw/UCF101/extracted_frames' 26 | args['val_image_dir'] = args['image_dir'] 27 | args['metafile_root'] = '/data5/shetw/UCF101/metafiles' 28 | args['dataset'] = 'UCF101' 29 | args['train_len'] = 9537 30 | args['val_len'] = 3783 31 | args['num_classes'] = 101 32 | return args 33 | 34 | 35 | def HMDB_basics(args): 36 | args = sup_basic(args) 37 | args = sup_bs128(args) 38 | 39 | args['col_name'] = 'HMDB' 40 | args['image_dir'] = '/data5/shetw/HMDB51/extracted_frames' 41 | args['val_image_dir'] = args['image_dir'] 42 | args['metafile_root'] = '/data5/shetw/HMDB51/metafiles' 43 | args['dataset'] = 'HMDB51' 44 | args['train_len'] = 3570 45 | args['data_len'] = 3570 46 | args['val_len'] = 1530 47 | args['train_prep'] = 'ColorJitter' 48 | args['num_classes'] = 51 49 | return args 50 | 51 | #################### Single-frame model #################### 52 | def single_frame_setting(args): 53 | args['test_no_frame'] = 5 54 | args["train_num_workers"] = 10 55 | return args 56 | 57 | def vd_single_UCF_sc(): 58 | args = {} 59 | args = UCF_basics(args) 60 | args = single_frame_setting(args) 61 | 62 | args['train_prep'] = 'MultiScaleCrop_224' 63 | args['exp_id'] = 'vd_single_sc' 64 | args['lr_boundaries'] = '9000,13000' 65 | return args 66 | 67 | def vd_single_UCF_cj(): 68 | args = {} 69 | args = UCF_basics(args) 70 | args = single_frame_setting(args) 71 | 72 | args['train_prep'] = 'ColorJitter' 73 | args['exp_id'] = 'vd_single_cj' 74 | args['lr_boundaries'] = '9000,13000' 75 | return args 76 | 77 | def vd_single_HMDB_sc(): 78 | args = {} 79 | args = HMDB_basics(args) 80 | args = single_frame_setting(args) 81 | 82 | args['train_prep'] = 'MultiScaleCrop_224' 83 | args['exp_id'] = 'vd_single_sc' 84 | args['lr_boundaries'] = '7000,10000' 85 | return args 86 | 87 | def vd_single_HMDB_cj(): 88 | args = {} 89 | args = HMDB_basics(args) 90 | args = single_frame_setting(args) 91 | 92 | args['train_prep'] = 'ColorJitter' 93 | args['exp_id'] = 'vd_single_cj' 94 | args['lr_boundaries'] = '7000,10000' 95 | return args 96 | 97 | 98 | #################### Slowfast model #################### 99 | def slowfast_setting(args): 100 | args['model_type'] = 'slowfast_a4' 101 | args["train_num_workers"] = 40 102 | return args 103 | 104 | def vd_slowfast_UCF_sc(): 105 | args = {} 106 | args = UCF_basics(args) 107 | args = slowfast_setting(args) 108 | 109 | args['test_no_frames'] = 3 110 | args['train_prep'] = 'MultiScaleCrop_224' 111 | args['exp_id'] = 'vd_slowfast_sc' 112 | #args['lr_boundaries'] = '390011,590011' 113 | return args 114 | 115 | def vd_slowfast_UCF_cj(): 116 | args = {} 117 | args = UCF_basics(args) 118 | args = slowfast_setting(args) 119 | 120 | args['test_no_frames'] = 3 121 | args['train_prep'] = 'ColorJitter' 122 | args['exp_id'] = 'vd_slowfast_cj' 123 | args['lr_boundaries'] = '10000,14000' 124 | return args 125 | 126 | def vd_slowfast_HMDB_sc(): 127 | args = {} 128 | args = HMDB_basics(args) 129 | args = slowfast_setting(args) 130 | 131 | args['test_no_frames'] = 2 132 | args['train_prep'] = 'MultiScaleCrop_224' 133 | args['exp_id'] = 'vd_slowfast_sc' 134 | args['lr_boundaries'] = '13000,18000' 135 | return args 136 | 137 | def vd_slowfast_HMDB_cj(): 138 | args = {} 139 | args = HMDB_basics(args) 140 | args = slowfast_setting(args) 141 | 142 | args['test_no_frames'] = 2 143 | args['train_prep'] = 'ColorJitter' 144 | args['exp_id'] = 'vd_slowfast_cj' 145 | args['lr_boundaries'] = '9000,13000' 146 | return args 147 | 148 | 149 | #################### 3D ResNet #################### 150 | def resnet3d_setting(args): 151 | args['model_type'] = '3dresnet' 152 | args["train_num_workers"] = 40 153 | return args 154 | 155 | def vd_resnet3d_UCF_sc(): 156 | args = {} 157 | args = UCF_basics(args) 158 | args = resnet3d_setting(args) 159 | 160 | args['test_no_frames'] = 10 161 | args['train_prep'] = 'MultiScaleCrop_112' 162 | args['exp_id'] = 'vd_resnet3d_sc' 163 | args['lr_boundaries'] = '10000,16000' 164 | return args 165 | 166 | def vd_resnet3d_UCF_cj(): 167 | args = {} 168 | args = UCF_basics(args) 169 | args = resnet3d_setting(args) 170 | 171 | args['test_no_frames'] = 10 172 | args['train_prep'] = 'ColorJitter_112' 173 | args['exp_id'] = 'vd_resnet3d_cj' 174 | #args['lr_boundaries'] = '390011,590011' 175 | return args 176 | 177 | def vd_resnet3d_HMDB_sc(): 178 | args = {} 179 | args = HMDB_basics(args) 180 | args = resnet3d_setting(args) 181 | 182 | args['test_no_frames'] = 4 183 | args['train_prep'] = 'MultiScaleCrop_112' 184 | args['exp_id'] = 'vd_resnet3d_sc' 185 | args['lr_boundaries'] = '10000,15000' 186 | return args 187 | 188 | def vd_resnet3d_HMDB_cj(): 189 | args = {} 190 | args = HMDB_basics(args) 191 | args = resnet3d_setting(args) 192 | 193 | args['test_no_frames'] = 4 194 | args['train_prep'] = 'ColorJitter_112' 195 | args['exp_id'] = 'vd_resnet3d_cj' 196 | args['lr_boundaries'] = '10000,15000' 197 | return args -------------------------------------------------------------------------------- /tf_model/load_param_dict.pkl: -------------------------------------------------------------------------------- 1 | (dp0 2 | S'resnet_model/batch_normalization_11/moving_mean' 3 | p1 4 | g1 5 | sS'resnet_model/conv2d_5/kernel' 6 | p2 7 | g2 8 | sS'resnet_model/batch_normalization_8/gamma' 9 | p3 10 | g3 11 | sS'resnet_model/batch_normalization_5/gamma' 12 | p4 13 | g4 14 | sS'resnet_model/batch_normalization_14/moving_variance' 15 | p5 16 | g5 17 | sS'resnet_model/batch_normalization_13/beta' 18 | p6 19 | g6 20 | sS'resnet_model/batch_normalization_13/gamma' 21 | p7 22 | g7 23 | sS'resnet_model/batch_normalization_7/moving_variance' 24 | p8 25 | g8 26 | sS'resnet_model/conv2d_12/kernel' 27 | p9 28 | g9 29 | sS'resnet_model/conv2d_11/kernel' 30 | p10 31 | g10 32 | sS'resnet_model/batch_normalization_10/moving_mean' 33 | p11 34 | g11 35 | sS'resnet_model/batch_normalization_3/gamma' 36 | p12 37 | g12 38 | sS'resnet_model/batch_normalization_6/beta' 39 | p13 40 | g13 41 | sS'resnet_model/conv2d_8/kernel' 42 | p14 43 | g14 44 | sS'resnet_model/batch_normalization_12/moving_variance' 45 | p15 46 | g15 47 | sS'resnet_model/batch_normalization_1/beta' 48 | p16 49 | g16 50 | sS'resnet_model/batch_normalization_13/moving_mean' 51 | p17 52 | g17 53 | sS'resnet_model/batch_normalization_2/gamma' 54 | p18 55 | g18 56 | sS'resnet_model/batch_normalization_12/moving_mean' 57 | p19 58 | g19 59 | sS'resnet_model/batch_normalization_9/moving_mean' 60 | p20 61 | g20 62 | sS'resnet_model/conv2d_10/kernel' 63 | p21 64 | g21 65 | sS'resnet_model/conv2d_18/kernel' 66 | p22 67 | g22 68 | sS'resnet_model/batch_normalization/gamma' 69 | p23 70 | g23 71 | sS'resnet_model/batch_normalization_10/beta' 72 | p24 73 | g24 74 | sS'resnet_model/conv2d_9/kernel' 75 | p25 76 | g25 77 | sS'resnet_model/conv2d_6/kernel' 78 | p26 79 | g26 80 | sS'resnet_model/batch_normalization_4/beta' 81 | p27 82 | g27 83 | sS'resnet_model/batch_normalization_9/moving_variance' 84 | p28 85 | g28 86 | sS'resnet_model/batch_normalization_16/gamma' 87 | p29 88 | g29 89 | sS'resnet_model/batch_normalization_16/beta' 90 | p30 91 | g30 92 | sS'resnet_model/batch_normalization_14/gamma' 93 | p31 94 | g31 95 | sS'resnet_model/conv2d_13/kernel' 96 | p32 97 | g32 98 | sS'resnet_model/conv2d_20/kernel' 99 | p33 100 | g33 101 | sS'resnet_model/batch_normalization_6/gamma' 102 | p34 103 | g34 104 | sS'resnet_model/batch_normalization_1/gamma' 105 | p35 106 | g35 107 | sS'resnet_model/batch_normalization_11/gamma' 108 | p36 109 | g36 110 | sS'resnet_model/conv2d_15/kernel' 111 | p37 112 | g37 113 | sS'resnet_model/batch_normalization_15/moving_mean' 114 | p38 115 | g38 116 | sS'resnet_model/batch_normalization_4/moving_variance' 117 | p39 118 | g39 119 | sS'resnet_model/batch_normalization_8/beta' 120 | p40 121 | g40 122 | sS'resnet_model/batch_normalization_2/beta' 123 | p41 124 | g41 125 | sS'resnet_model/batch_normalization_8/moving_variance' 126 | p42 127 | g42 128 | sS'resnet_model/batch_normalization/beta' 129 | p43 130 | g43 131 | sS'resnet_model/batch_normalization_2/moving_mean' 132 | p44 133 | g44 134 | sS'resnet_model/conv2d_1/kernel' 135 | p45 136 | g45 137 | sS'resnet_model/batch_normalization_5/moving_variance' 138 | p46 139 | g46 140 | sS'resnet_model/batch_normalization_15/moving_variance' 141 | p47 142 | g47 143 | sS'resnet_model/batch_normalization_1/moving_mean' 144 | p48 145 | g48 146 | sS'resnet_model/batch_normalization_16/moving_mean' 147 | p49 148 | g49 149 | sS'resnet_model/conv2d_4/kernel' 150 | p50 151 | g50 152 | sS'resnet_model/batch_normalization_5/beta' 153 | p51 154 | g51 155 | sS'resnet_model/batch_normalization_7/gamma' 156 | p52 157 | g52 158 | sS'resnet_model/batch_normalization_10/moving_variance' 159 | p53 160 | g53 161 | sS'resnet_model/batch_normalization_14/moving_mean' 162 | p54 163 | g54 164 | sS'resnet_model/batch_normalization_15/beta' 165 | p55 166 | g55 167 | sS'resnet_model/batch_normalization_12/beta' 168 | p56 169 | g56 170 | sS'resnet_model/batch_normalization_7/moving_mean' 171 | p57 172 | g57 173 | sS'resnet_model/batch_normalization/moving_variance' 174 | p58 175 | g58 176 | sS'resnet_model/conv2d_16/kernel' 177 | p59 178 | g59 179 | sS'resnet_model/batch_normalization_5/moving_mean' 180 | p60 181 | g60 182 | sS'resnet_model/dense/bias' 183 | p61 184 | g61 185 | sS'resnet_model/batch_normalization_3/beta' 186 | p62 187 | g62 188 | sS'resnet_model/batch_normalization_9/beta' 189 | p63 190 | g63 191 | sS'resnet_model/batch_normalization_4/moving_mean' 192 | p64 193 | g64 194 | sS'resnet_model/batch_normalization_1/moving_variance' 195 | p65 196 | g65 197 | sS'resnet_model/batch_normalization_4/gamma' 198 | p66 199 | g66 200 | sS'resnet_model/conv2d_19/kernel' 201 | p67 202 | g67 203 | sS'resnet_model/batch_normalization_8/moving_mean' 204 | p68 205 | g68 206 | sS'resnet_model/batch_normalization/moving_mean' 207 | p69 208 | g69 209 | sS'resnet_model/batch_normalization_6/moving_variance' 210 | p70 211 | g70 212 | sS'resnet_model/batch_normalization_2/moving_variance' 213 | p71 214 | g71 215 | sS'resnet_model/conv2d_17/kernel' 216 | p72 217 | g72 218 | sS'resnet_model/batch_normalization_11/moving_variance' 219 | p73 220 | g73 221 | sS'resnet_model/conv2d_7/kernel' 222 | p74 223 | g74 224 | sS'resnet_model/batch_normalization_13/moving_variance' 225 | p75 226 | g75 227 | sS'resnet_model/batch_normalization_15/gamma' 228 | p76 229 | g76 230 | sS'global_step' 231 | p77 232 | g77 233 | sS'resnet_model/batch_normalization_3/moving_mean' 234 | p78 235 | g78 236 | sS'resnet_model/conv2d_14/kernel' 237 | p79 238 | g79 239 | sS'resnet_model/dense/kernel' 240 | p80 241 | g80 242 | sS'resnet_model/batch_normalization_3/moving_variance' 243 | p81 244 | g81 245 | sS'resnet_model/batch_normalization_9/gamma' 246 | p82 247 | g82 248 | sS'resnet_model/conv2d_3/kernel' 249 | p83 250 | g83 251 | sS'resnet_model/batch_normalization_7/beta' 252 | p84 253 | g84 254 | sS'resnet_model/conv2d_2/kernel' 255 | p85 256 | g85 257 | sS'resnet_model/batch_normalization_16/moving_variance' 258 | p86 259 | g86 260 | sS'resnet_model/conv2d/kernel' 261 | p87 262 | g87 263 | sS'resnet_model/batch_normalization_6/moving_mean' 264 | p88 265 | g88 266 | sS'resnet_model/batch_normalization_11/beta' 267 | p89 268 | g89 269 | sS'resnet_model/batch_normalization_14/beta' 270 | p90 271 | g90 272 | sS'resnet_model/batch_normalization_10/gamma' 273 | p91 274 | g91 275 | sS'resnet_model/batch_normalization_12/gamma' 276 | p92 277 | g92 278 | s. -------------------------------------------------------------------------------- /tf_model/train_transfer_KN.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | import json 7 | import copy 8 | import argparse 9 | import time 10 | import functools 11 | import inspect 12 | 13 | from model import instance_model 14 | import train_transfer as prev_trans 15 | import train_vie 16 | import data 17 | 18 | 19 | def get_config(): 20 | cfg = prev_trans.get_config() 21 | cfg.add('finetune_conv', type=bool, default=False, 22 | help='Whether to finetune conv layers or not') 23 | cfg.add('dropout', type=float, default=None, 24 | help='If not none, apply dropout at the given rate') 25 | cfg.add('optimizer', type=str, default=None, 26 | help='If not none, use the given optimizer instead of momentum') 27 | cfg.add('test_no_frames', type=int, default=5, 28 | help='Number of frames in one video during validation') 29 | cfg.add('train_prep', type=str, default=None, 30 | help='Train preprocessing') 31 | cfg.add('val_image_dir', type=str, required=True, 32 | help='Directory containing dataset') 33 | cfg.add('bin_interval', type=int, default=None, 34 | help='Bin interval for binned video dataset') 35 | cfg.add('part_vd', type=float, default=None, 36 | help='Portion of videos to use during training') 37 | cfg.add('trn_num_frames', type=int, default=8, 38 | help='Number of frames in trn style') 39 | cfg.add('HMDB_sample', type=bool, default=False, 40 | help='Whether to use HMDB sampling strategy') 41 | cfg.add('final_pooling', type=int, default=None, 42 | help='The output feature map size of the final pooling layer') 43 | cfg.add('slowfast_single_pooling', type=bool, default=False, 44 | help='Whether to add reduce mean final pooling for slowfast_single model') 45 | cfg.add('rotnet', type=bool, default=False, 46 | help='Whether finetuing 3D RotNet') 47 | cfg.add('train_num_workers', type=int, default=12, 48 | help='Training worker number') 49 | cfg.add('val_num_workers', type=int, default=12, 50 | help='Validation worker number') 51 | return cfg 52 | 53 | 54 | def valid_func( 55 | inputs, output, 56 | test_no_frames): 57 | def _get_one_top(output): 58 | num_classes = output.get_shape().as_list()[-1] 59 | curr_output = tf.nn.softmax(output) 60 | curr_output = tf.reshape(curr_output, [-1, test_no_frames, num_classes]) 61 | curr_output = tf.reduce_mean(curr_output, axis=1) 62 | 63 | top1_accuracy = tf.nn.in_top_k(curr_output, inputs['label'], k=1) 64 | top5_accuracy = tf.nn.in_top_k(curr_output, inputs['label'], k=5) 65 | #return {'pred': curr_output, 'top1': top1_accuracy, 'top5': top5_accuracy} 66 | return {'top1': top1_accuracy, 'top5': top5_accuracy} 67 | 68 | if isinstance(output, dict): 69 | ret_dict = {} 70 | for key, _output in output.items(): 71 | _one_ret_dict = _get_one_top(_output) 72 | ret_dict['top1_%s' % key] = _one_ret_dict['top1'] 73 | ret_dict['top5_%s' % key] = _one_ret_dict['top5'] 74 | else: 75 | ret_dict = _get_one_top(output) 76 | return ret_dict 77 | 78 | 79 | def main(): 80 | # Parse arguments 81 | cfg = get_config() 82 | args = cfg.parse_args() 83 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 84 | 85 | params = { 86 | 'skip_check': True, 87 | 'log_device_placement': False 88 | } 89 | 90 | prev_trans.add_save_and_load_params(params, args) 91 | prev_trans.add_optimization_params(params, args) 92 | 93 | params['loss_params'] = { 94 | 'pred_targets': [], 95 | 'agg_func': prev_trans.reg_loss, 96 | 'agg_func_kwargs': {'weight_decay': args.weight_decay}, 97 | 'loss_func': lambda output, *args, **kwargs: output['loss'], 98 | } 99 | 100 | # model_params 101 | model_params = { 102 | 'func': instance_model.build_KN_transfer_output, 103 | 'finetune_conv': args.finetune_conv, 104 | 'get_all_layers': args.get_all_layers, 105 | "model_type": args.model_type, 106 | "resnet_size": args.resnet_size, 107 | 'num_classes': args.num_classes, 108 | 'dropout': args.dropout, 109 | "final_pooling": args.final_pooling, 110 | "slowfast_single_pooling": args.slowfast_single_pooling, 111 | } 112 | # Only train the readout layer 113 | if not args.finetune_conv: 114 | model_params['trainable_scopes'] = ['instance'] 115 | 116 | multi_gpu = len(args.gpu.split(',')) 117 | if multi_gpu > 1: 118 | model_params['num_gpus'] = multi_gpu 119 | model_params['devices'] = ['/gpu:%i' % idx for idx in range(multi_gpu)] 120 | params['model_params'] = model_params 121 | 122 | # train_params 123 | train_data_loader = train_vie.get_train_pt_loader_from_arg(args) 124 | data_enumerator = [enumerate(train_data_loader)] 125 | def train_loop(sess, train_targets, num_minibatches=1, **params): 126 | assert num_minibatches==1, "Mini-batch not supported!" 127 | 128 | global_step_vars = [v for v in tf.global_variables() \ 129 | if 'global_step' in v.name] 130 | assert len(global_step_vars) == 1 131 | global_step = sess.run(global_step_vars[0]) 132 | 133 | # data_en_update_fre = train_vie.NUM_KINETICS_VIDEOS // args.batch_size 134 | data_en_update_fre = args.train_len // args.batch_size 135 | if global_step % data_en_update_fre == 0: 136 | data_enumerator.pop() 137 | data_enumerator.append(enumerate(train_data_loader)) 138 | _, (image, label, index) = next(data_enumerator[0]) 139 | feed_dict = data.get_feeddict(image, label, index) 140 | sess_res = sess.run(train_targets, feed_dict=feed_dict) 141 | return sess_res 142 | 143 | train_data_param = train_vie.get_train_data_param_from_arg(args) 144 | train_params = { 145 | 'validate_first': False, 146 | 'data_params': train_data_param, 147 | 'queue_params': None, 148 | 'thres_loss': float('Inf'), 149 | 'num_steps': float('Inf'), 150 | 'train_loop': {'func': train_loop}, 151 | } 152 | train_params['targets'] = { 153 | 'func': lambda inputs, output: {'accuracy': output['accuracy']}} 154 | params['train_params'] = train_params 155 | 156 | # validation_params 157 | # val_len = 19653 158 | topn_val_data_param = train_vie.get_topn_val_data_param_from_arg(args) 159 | valid_loop, val_step_num = train_vie.get_valid_loop_from_arg(args) 160 | val_targets = { 161 | 'func': valid_func, 162 | 'test_no_frames': args.test_no_frames} 163 | topn_val_param = { 164 | 'data_params': topn_val_data_param, 165 | 'queue_params': None, 166 | 'targets': val_targets, 167 | 'num_steps': val_step_num, 168 | 'agg_func': lambda x: {k: np.mean(v) for k, v in x.items()}, 169 | 'online_agg_func': train_vie.online_agg, 170 | 'valid_loop': {'func': valid_loop}} 171 | validation_params = {'topn': topn_val_param} 172 | params['validation_params'] = validation_params 173 | 174 | prev_trans.start_training(params, args) 175 | 176 | 177 | if __name__ == "__main__": 178 | main() 179 | -------------------------------------------------------------------------------- /tf_model/model/self_loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | DATA_LEN_IMAGENET_FULL = 1281167 6 | 7 | 8 | def assert_shape(t, shape): 9 | assert t.get_shape().as_list() == shape, \ 10 | "Got shape %r, expected %r" % (t.get_shape().as_list(), shape) 11 | 12 | 13 | def get_selfloss(memory_bank, **kwargs): 14 | use_clusters = kwargs.get('use_clusters') 15 | if use_clusters is not None: 16 | all_labels = np.load(use_clusters) # path to all labels 17 | assert len(all_labels.shape) == 1 18 | sample_num = kwargs.get('num_cluster_samples') 19 | print("Using %d cluster labels read from %s for self-loss" % 20 | (len(all_labels), use_clusters)) 21 | print("Sampling %d items from the cluster each time" % sample_num) 22 | return ClusterLoss(memory_bank, all_labels, sample_num=sample_num) 23 | 24 | add_topn_dot = kwargs.get('add_topn_dot') 25 | add_thres_dot = kwargs.get('add_thres_dot') 26 | nn_list_path = kwargs.get('nn_list_path') 27 | if add_topn_dot is not None and add_thres_dot is not None: 28 | raise NotImplementedError( 29 | "Add_topn_dot and add_thres_dot cannot be both on!") 30 | 31 | if add_topn_dot is not None: 32 | if nn_list_path is not None: 33 | print("Using nearest %i self-loss with fixed " 34 | "neighbors loaded from %s" % (add_topn_dot, nn_list_path)) 35 | nn = np.load(nn_list_path) 36 | nn = nn['highest_dp_indices'][:,:add_topn_dot] 37 | print("Loaded nearest neighbor indices with shape", nn.shape) 38 | return StaticNearestNeighborLoss(memory_bank, nn) 39 | 40 | print("Using nearest %i self-loss." % add_topn_dot) 41 | return NearestNLoss(memory_bank, add_topn_dot) 42 | 43 | elif add_thres_dot is not None: 44 | print("Using threshold self-loss with threshold %f." % add_thres_dot) 45 | return ThresholdNeighborLoss(memory_bank, add_thres_dot) 46 | 47 | return DefaultSelfLoss(memory_bank) 48 | 49 | 50 | class DefaultSelfLoss(object): 51 | def __init__(self, memory_bank): 52 | self.memory_bank = memory_bank 53 | 54 | def get_closeness(self, idxs, vecs): 55 | return self.memory_bank.get_dot_products(vecs, idxs) 56 | 57 | 58 | class StaticNearestNeighborLoss(object): 59 | def __init__(self, memory_bank, nearest_neighbors): 60 | self.memory_bank = memory_bank 61 | nn_shape = nearest_neighbors.shape 62 | # [data_len, num_neighbors] 63 | # Your nearest neighbor is yourself, so if num_neighbors is 1, this 64 | # should be equivalent to the default loss. 65 | assert len(nn_shape) == 2 66 | self.nn = tf.constant(nearest_neighbors) 67 | 68 | def get_closeness(self, idxs, vecs): 69 | ''' 70 | idxs: The indices whose neighbors we care about. 71 | vecs: The embedding values of those indices. 72 | ''' 73 | cur_nn = tf.gather(self.nn, idxs, axis=0) 74 | nn_dps = self.memory_bank.get_dot_products(vecs, cur_nn) 75 | return tf.reduce_mean(nn_dps, axis=-1) 76 | 77 | 78 | class NearestNLoss(object): 79 | def __init__(self, memory_bank, n): 80 | self.memory_bank = memory_bank 81 | self.n = n 82 | 83 | def get_closeness(self, idxs, vecs): 84 | batch_size = idxs.get_shape().as_list()[0] 85 | all_dps = self.memory_bank.get_all_dot_products(vecs) 86 | topn_values, _ = tf.nn.top_k(all_dps, k=self.n, sorted=False) 87 | assert_shape(topn_values, [batch_size, self.n]) 88 | return tf.reduce_mean(topn_values, axis=1) 89 | 90 | 91 | class ThresholdNeighborLoss(object): 92 | def __init__(self, memory_bank, treshold): 93 | self.memory_bank = memory_bank 94 | self.treshold = treshold 95 | 96 | def get_closeness(self, idxs, vecs): 97 | batch_size = idxs.get_shape().as_list()[0] 98 | 99 | # Currently take first 1000 and then threshold it 100 | # TODO: fix this to be more general 101 | all_dps = self.memory_bank.get_all_dot_products(vecs) 102 | topn_values, _ = tf.nn.top_k(all_dps, k=1000, sorted=False) 103 | 104 | big_mask = topn_values > self.treshold 105 | all_values_under_mask = tf.boolean_mask(topn_values, big_mask) 106 | all_indexes_for_mask = tf.where(big_mask) 107 | ## As we only need the batch dimension 108 | batch_indexes_for_mask = all_indexes_for_mask[:, 0] 109 | big_number = tf.unsorted_segment_sum( 110 | tf.ones_like(all_values_under_mask), 111 | batch_indexes_for_mask, 112 | batch_size, 113 | ) 114 | big_sum = tf.unsorted_segment_sum( 115 | all_values_under_mask, 116 | batch_indexes_for_mask, 117 | batch_size, 118 | ) 119 | 120 | # Add the original data dot-product in case the threshold too high 121 | data_dot_product = self.memory_bank.get_dot_products(vecs, idxs) 122 | big_sum += data_dot_product 123 | big_number += 1 124 | return tf.reshape(big_sum/big_number, [batch_size]) 125 | 126 | 127 | class ClusterLoss(object): 128 | @staticmethod 129 | def pad_clusters_to_same_size(clusters): 130 | ''' 131 | Make clusters the same size by repeating elements. 132 | ''' 133 | ret = [] 134 | max_size = max(len(c) for c in clusters) 135 | for c in clusters: 136 | c = np.array(c) 137 | tiling = np.tile(c, (max_size // len(c))) 138 | # TODO: consider setting numpy seed 139 | padding = np.random.choice( 140 | c, size=(max_size - len(tiling)), replace=False) 141 | ret.append(np.concatenate([tiling, padding])) 142 | return np.stack(ret) 143 | 144 | def __init__(self, memory_bank, cluster_labels, sample_num=None): 145 | self.memory_bank = memory_bank 146 | self.cluster_labels = cluster_labels 147 | self.n = np.max(cluster_labels) + 1 148 | print('Initializing cluster loss with %i clusters' % self.n) 149 | # number of same-cluster labels to sample, or None meaning take 150 | # all of them 151 | self.sample_num = sample_num 152 | 153 | self.clusters = [[] for _ in range(self.n)] 154 | for idx, label in enumerate(self.cluster_labels): 155 | self.clusters[label].append(idx) 156 | self.clusters = ClusterLoss.pad_clusters_to_same_size(self.clusters) 157 | _, self.max_cluster_size = self.clusters.shape 158 | print('Padding each cluster to size %i' % self.max_cluster_size) 159 | 160 | def get_closeness(self, idxs, vecs): 161 | batch_size = idxs.get_shape().as_list()[0] 162 | cluster_ids = tf.gather(self.cluster_labels, idxs) 163 | # [bs] 164 | 165 | if self.sample_num is None: 166 | # Don't sample 167 | cluster_lists = tf.gather(self.clusters, cluster_ids) 168 | assert_shape(cluster_lists, [batch_size, self.max_cluster_size]) 169 | cluster_dps = self.memory_bank.get_dot_products(vecs, cluster_lists) 170 | return tf.reduce_mean(cluster_dps, axis=1) 171 | 172 | same_clust_idxs = tf.random_uniform( 173 | shape=(batch_size, self.sample_num), 174 | minval=0, maxval=self.max_cluster_size, 175 | dtype=tf.int64 176 | ) # now indices into the cluster lists 177 | same_clust_idxs = tf.stack([ 178 | tf.stack([cluster_ids] * self.sample_num, axis=-1), 179 | same_clust_idxs 180 | ], axis=-1) 181 | assert_shape(same_clust_idxs, [batch_size, self.sample_num, 2]) 182 | # [bs, sample_num, 2], stack with labels in preparation for 183 | # gather_nd, each index term [a, b] refers to 184 | # self.clusters[a][b] 185 | same_clust_idxs = tf.gather_nd(self.clusters, same_clust_idxs) 186 | assert_shape(same_clust_idxs, [batch_size, self.sample_num]) 187 | # [bs, sample_num] 188 | 189 | dps = self.memory_bank.get_dot_products( 190 | vecs, same_clust_idxs) 191 | return tf.reduce_mean(dps, axis=1) 192 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_finetune_UCF_fx.py: -------------------------------------------------------------------------------- 1 | def finetune_UCF_bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['test_no_frames'] = 5 5 | args['fre_filter'] = 10000 6 | args['fre_cache_filter'] = 5000 7 | args['fre_valid'] = 500 8 | return args 9 | 10 | 11 | def finetune_basics(args): 12 | args['port'] = 27007 13 | args['finetune_conv'] = True 14 | args['final_pooling'] = 1 15 | args['cache_dir'] = "/mnt/fs4/shetw/tfutils_cache" 16 | 17 | args['init_lr'] = 0.0005 18 | args['weight_decay'] = 1e-5 19 | return args 20 | 21 | 22 | def UCF_basics(args): 23 | args['image_dir'] = '/data5/shetw/UCF101/extracted_frames' 24 | args['val_image_dir'] = args['image_dir'] 25 | args['metafile_root'] = '/data5/shetw/UCF101/metafiles' 26 | args['dataset'] = 'UCF101' 27 | args['train_len'] = 9537 28 | args['val_len'] = 3783 29 | args['num_classes'] = 101 30 | return args 31 | 32 | def finetune_UCF_all_basics(args): 33 | args = finetune_basics(args) 34 | args = UCF_basics(args) 35 | args = finetune_UCF_bs128(args) 36 | return args 37 | 38 | 39 | #################### Single-frame model #################### 40 | def single_frame_setting(args): 41 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 42 | args['load_port'] = 27006 43 | args['test_no_frame'] = 5 44 | args["train_num_workers"] = 10 45 | return args 46 | 47 | # Node08-3 48 | def vd_ctl_sc(): 49 | args = {} 50 | args = finetune_UCF_all_basics(args) 51 | args = single_frame_setting(args) 52 | 53 | args['train_prep'] = 'MultiScaleCrop_224' 54 | args["save_exp"] = "vd_finetune/UCF/vd_ctl_sc_2" 55 | args["lr_boundaries"] = '1390000,1410000' 56 | return args 57 | 58 | def vd_ctl_cj(): 59 | args = {} 60 | args = finetune_UCF_all_basics(args) 61 | args = single_frame_setting(args) 62 | 63 | args["save_exp"] = "vd_finetune/UCF/vd_ctl_cj" 64 | args['train_prep'] = 'ColorJitter' 65 | args["lr_boundaries"] = '1390000,1410000' 66 | return args 67 | 68 | def vd_sup_ctl_sc(): 69 | args = {} 70 | args = finetune_UCF_all_basics(args) 71 | args = single_frame_setting(args) 72 | 73 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 74 | args["save_exp"] = "vd_finetune/UCF/vd_sup_ctl_sc" 75 | args['train_prep'] = 'MultiScaleCrop_224' 76 | args["lr_boundaries"] = '715000,735000' 77 | return args 78 | 79 | def vd_sup_ctl_cj(): 80 | args = {} 81 | 82 | args = finetune_UCF_all_basics(args) 83 | args = single_frame_setting(args) 84 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 85 | args["save_exp"] = "vd_finetune/UCF/vd_sup_ctl_cj" 86 | args['train_prep'] = 'ColorJitter' 87 | args["lr_boundaries"] = '715000,735000' 88 | return args 89 | 90 | 91 | #################### Slowfast model #################### 92 | def slowfast_setting(args): 93 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 94 | args['load_port'] = 27006 95 | args['model_type'] = 'slowfast_a4' 96 | args['test_no_frames'] = 3 97 | args["train_num_workers"] = 40 98 | return args 99 | 100 | def vd_slowfast_a4_sc(): 101 | args = {} 102 | 103 | args = finetune_UCF_all_basics(args) 104 | args = slowfast_setting(args) 105 | args["save_exp"] = "vd_finetune/UCF/vd_slowfast_sc" 106 | args['train_prep'] = 'MultiScaleCrop_224' 107 | args["lr_boundaries"] = '1360000,1370000' 108 | return args 109 | 110 | def vd_slowfast_a4_cj(): 111 | args = {} 112 | 113 | args = finetune_UCF_all_basics(args) 114 | args = slowfast_setting(args) 115 | args["save_exp"] = "vd_finetune/UCF/vd_slowfast_cj" 116 | args['train_prep'] = 'ColorJitter' 117 | args["lr_boundaries"] = '1360000,1370000' 118 | return args 119 | 120 | def vd_sup_slowfast_a4_sc(): 121 | args = {} 122 | 123 | args = finetune_UCF_all_basics(args) 124 | args = slowfast_setting(args) 125 | args['load_exp'] = "vd_sup/ctl/vd_sup_slowfast_a4" 126 | args["save_exp"] = "vd_finetune/UCF/vd_sup_slowfast_sc" 127 | args['train_prep'] = 'MultiScaleCrop_224' 128 | args["lr_boundaries"] = '345000' 129 | return args 130 | 131 | def vd_sup_slowfast_a4_cj(): 132 | args = {} 133 | 134 | args = finetune_UCF_all_basics(args) 135 | args = slowfast_setting(args) 136 | args['load_exp'] = "vd_sup/ctl/vd_sup_slowfast_a4" 137 | args["save_exp"] = "vd_finetune/UCF/vd_sup_slowfast_cj" 138 | args['train_prep'] = 'ColorJitter' 139 | args["lr_boundaries"] = '345000' 140 | return args 141 | 142 | 143 | #################### Slowfast-single model #################### 144 | def slowfast_single_setting(args): 145 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 146 | args["from_ckpt"] = '/mnt/fs3/chengxuz/vd_relat/slowfast_single_model/model' 147 | args['load_port'] = 27006 148 | args['model_type'] = 'slowfastsingle_avg' 149 | args["get_all_layers"] = '9' 150 | args['test_no_frames'] = 3 151 | args["train_num_workers"] = 40 152 | args['batch_size'] = 64 153 | args['slowfast_single_pooling'] = True 154 | 155 | return args 156 | 157 | def vd_slowfast_single_a4_sc(): 158 | args = {} 159 | 160 | args = finetune_UCF_all_basics(args) 161 | args = slowfast_single_setting(args) 162 | args["save_exp"] = "vd_finetune/UCF/vd_slowfast_single_sc_2" 163 | args['train_prep'] = 'MultiScaleCrop_224' 164 | args["lr_boundaries"] = '10000,20000' 165 | return args 166 | 167 | def vd_slowfast_single_a4_cj(): 168 | args = {} 169 | 170 | args = finetune_UCF_all_basics(args) 171 | args = slowfast_single_setting(args) 172 | args["save_exp"] = "vd_finetune/UCF/vd_slowfast_single_cj_2" 173 | args['train_prep'] = 'ColorJitter' 174 | args["lr_boundaries"] = '10000,20000' 175 | return args 176 | 177 | 178 | #################### 3D ResNet #################### 179 | def resnet3d_setting(args): 180 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_3dresnet" 181 | args['load_port'] = 27007 182 | args['model_type'] = '3dresnet' 183 | args['test_no_frames'] = 10 184 | args["train_num_workers"] = 40 185 | return args 186 | 187 | def vd_3dresnet_sc(): 188 | args = {} 189 | args = finetune_UCF_all_basics(args) 190 | args = resnet3d_setting(args) 191 | 192 | args["save_exp"] = "vd_finetune/UCF/vd_3dresnet_sc" 193 | args['train_prep'] = 'MultiScaleCrop_112' 194 | return args 195 | 196 | def vd_3dresnet_cj(): 197 | args = {} 198 | args = finetune_UCF_all_basics(args) 199 | args = resnet3d_setting(args) 200 | 201 | args["save_exp"] = "vd_finetune/UCF/vd_3dresnet_cj" 202 | args['train_prep'] = 'ColorJitter_112' 203 | return args 204 | 205 | def vd_sup_3dresnet_sc(): 206 | args = {} 207 | args = finetune_UCF_all_basics(args) 208 | args = resnet3d_setting(args) 209 | 210 | args['load_exp'] = 'vd_sup/KN/vd_sup_3dresnet' 211 | args["save_exp"] = "vd_finetune/UCF/vd_sup_3dresnet_sc" 212 | args['train_prep'] = 'MultiScaleCrop_112' 213 | return args 214 | 215 | def vd_sup_3dresnet_cj(): 216 | args = {} 217 | args = finetune_UCF_all_basics(args) 218 | args = resnet3d_setting(args) 219 | 220 | args['load_exp'] = 'vd_sup/KN/vd_sup_3dresnet' 221 | args["save_exp"] = "vd_finetune/UCF/vd_sup_3dresnet_cj" 222 | args['train_prep'] = 'ColorJitter_112' 223 | return args 224 | 225 | 226 | #################### OPN #################### 227 | def vd_opn_sc(): 228 | args = {} 229 | args = finetune_UCF_all_basics(args) 230 | 231 | args['load_exp'] = "vd_unsup_fx/opn/opn_random_sep_flow" 232 | args['load_port'] = 27006 233 | args['test_no_frames'] = 5 234 | args["train_num_workers"] = 20 235 | 236 | args["save_exp"] = "vd_finetune/UCF/vd_opn_sc" 237 | args['train_prep'] = 'MultiScaleCrop_224' 238 | #args['lr_boundaries'] = '290000+' 239 | args['lr_boundaries'] = '340000' 240 | return args 241 | 242 | 243 | 244 | 245 | def vd_3drotnet_UCF(): 246 | args = {} 247 | 248 | args = finetune_UCF_all_basics(args) 249 | 250 | args['load_exp'] = "vd_unsup_fx/rot/rot_3dresnet" 251 | args['load_port'] = 27006 252 | args["save_exp"] = "vd_finetune/UCF/vd_3drotnet_hflip" 253 | 254 | args['model_type'] = '3dresnet' 255 | args['train_prep'] = '3DRotNet_finetune' 256 | args['test_no_frames'] = 10 257 | args['final_pooling'] = 1 258 | 259 | args['finetune_conv'] = True 260 | args["train_num_workers"] = 30 261 | 262 | args['init_lr'] = 0.008 263 | #args['lr_boundaries'] = '246000' 264 | return args 265 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_finetune_HMDB_fx.py: -------------------------------------------------------------------------------- 1 | def finetune_HMDB_bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['test_no_frames'] = 5 5 | args['fre_filter'] = 10000 6 | args['fre_cache_filter'] = 5000 7 | args['fre_valid'] = 500 8 | return args 9 | 10 | 11 | def finetune_basics(args): 12 | args['port'] = 27007 13 | args['finetune_conv'] = True 14 | args['final_pooling'] = 1 15 | args['cache_dir'] = "/mnt/fs4/shetw/tfutils_cache" 16 | 17 | args['init_lr'] = 0.0005 18 | args['weight_decay'] = 1e-5 19 | return args 20 | 21 | def HMDB_basics(args): 22 | args['image_dir'] = '/data5/shetw/HMDB51/extracted_frames' 23 | args['val_image_dir'] = args['image_dir'] 24 | args['metafile_root'] = '/data5/shetw/HMDB51/metafiles' 25 | args['dataset'] = 'HMDB51' 26 | args['train_len'] = 3570 27 | args['val_len'] = 1530 28 | args['num_classes'] = 51 29 | return args 30 | 31 | def finetune_HMDB_all_basics(args): 32 | args = finetune_basics(args) 33 | args = HMDB_basics(args) 34 | args = finetune_HMDB_bs128(args) 35 | return args 36 | 37 | 38 | #################### Single-frame model #################### 39 | def single_frame_setting(args): 40 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 41 | args['load_port'] = 27006 42 | args['test_no_frame'] = 5 43 | args["train_num_workers"] = 10 44 | return args 45 | 46 | def vd_ctl_sc(): 47 | args = {} 48 | args = finetune_HMDB_all_basics(args) 49 | args = single_frame_setting(args) 50 | 51 | args['train_prep'] = 'MultiScaleCrop_224' 52 | args["save_exp"] = "vd_finetune/HMDB/vd_ctl_sc_2" 53 | args["lr_boundaries"] = '1385000,1395000' 54 | return args 55 | 56 | def vd_ctl_cj(): 57 | args = {} 58 | args = finetune_HMDB_all_basics(args) 59 | args = single_frame_setting(args) 60 | 61 | args["save_exp"] = "vd_finetune/HMDB/vd_ctl_cj" 62 | args['train_prep'] = 'ColorJitter' 63 | args["lr_boundaries"] = '1385000,1395000' 64 | return args 65 | 66 | def vd_sup_ctl_sc(): 67 | args = {} 68 | args = finetune_HMDB_all_basics(args) 69 | args = single_frame_setting(args) 70 | 71 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 72 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_ctl_sc" 73 | args['train_prep'] = 'MultiScaleCrop_224' 74 | args["lr_boundaries"] = '715000,735000' 75 | return args 76 | 77 | def vd_sup_ctl_cj(): 78 | args = {} 79 | 80 | args = finetune_HMDB_all_basics(args) 81 | args = single_frame_setting(args) 82 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 83 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_ctl_cj" 84 | args['train_prep'] = 'ColorJitter' 85 | args["lr_boundaries"] = '710000,720000' 86 | return args 87 | 88 | 89 | #################### Slowfast model #################### 90 | def slowfast_setting(args): 91 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 92 | args['load_port'] = 27006 93 | args['model_type'] = 'slowfast_a4' 94 | args['test_no_frames'] = 2 95 | args["train_num_workers"] = 40 96 | return args 97 | 98 | def vd_slowfast_a4_sc(): 99 | args = {} 100 | 101 | args = finetune_HMDB_all_basics(args) 102 | args = slowfast_setting(args) 103 | args["save_exp"] = "vd_finetune/HMDB/vd_slowfast_sc" 104 | args['train_prep'] = 'MultiScaleCrop_224' 105 | args["lr_boundaries"] = '1370000,1380000' 106 | return args 107 | 108 | def vd_slowfast_a4_cj(): 109 | args = {} 110 | 111 | args = finetune_HMDB_all_basics(args) 112 | args = slowfast_setting(args) 113 | args["save_exp"] = "vd_finetune/HMDB/vd_slowfast_cj" 114 | args['train_prep'] = 'ColorJitter' 115 | args["lr_boundaries"] = '1365000,1380000' 116 | return args 117 | 118 | def vd_sup_slowfast_a4_sc(): 119 | args = {} 120 | 121 | args = finetune_HMDB_all_basics(args) 122 | args = slowfast_setting(args) 123 | args['load_exp'] = "vd_sup/ctl/vd_sup_slowfast_a4" 124 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_slowfast_sc" 125 | args['train_prep'] = 'MultiScaleCrop_224' 126 | args["lr_boundaries"] = '340000' 127 | return args 128 | 129 | def vd_sup_slowfast_a4_cj(): 130 | args = {} 131 | 132 | args = finetune_HMDB_all_basics(args) 133 | args = slowfast_setting(args) 134 | args['load_exp'] = "vd_sup/ctl/vd_sup_slowfast_a4" 135 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_slowfast_cj" 136 | args['train_prep'] = 'ColorJitter' 137 | args["lr_boundaries"] = '345000' 138 | return args 139 | 140 | 141 | #################### Slowfast-single model #################### 142 | def slowfast_single_setting(args): 143 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 144 | args["from_ckpt"] = '/mnt/fs3/chengxuz/vd_relat/slowfast_single_model/model' 145 | args['load_port'] = 27006 146 | args['model_type'] = 'slowfastsingle_avg' 147 | args["get_all_layers"] = '9' 148 | args['test_no_frames'] = 3 149 | args["train_num_workers"] = 40 150 | args['batch_size'] = 64 151 | args['slowfast_single_pooling'] = True 152 | return args 153 | 154 | def vd_slowfast_single_a4_sc(): 155 | args = {} 156 | 157 | args = finetune_HMDB_all_basics(args) 158 | args = slowfast_single_setting(args) 159 | args["save_exp"] = "vd_finetune/HMDB/vd_slowfast_single_sc_2" 160 | args['train_prep'] = 'MultiScaleCrop_224' 161 | args["lr_boundaries"] = '10000,20000' 162 | return args 163 | 164 | def vd_slowfast_single_a4_cj(): 165 | args = {} 166 | 167 | args = finetune_HMDB_all_basics(args) 168 | args = slowfast_single_setting(args) 169 | args["save_exp"] = "vd_finetune/HMDB/vd_slowfast_single_cj_2" 170 | args['train_prep'] = 'ColorJitter' 171 | args["lr_boundaries"] = '10000,20000' 172 | return args 173 | 174 | 175 | #################### 3D ResNet #################### 176 | def resnet3d_setting(args): 177 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_3dresnet" 178 | args['load_port'] = 27007 179 | args['model_type'] = '3dresnet' 180 | args['test_no_frames'] = 4 181 | args["train_num_workers"] = 40 182 | return args 183 | 184 | def vd_3dresnet_sc(): 185 | args = {} 186 | args = finetune_HMDB_all_basics(args) 187 | args = resnet3d_setting(args) 188 | 189 | args["save_exp"] = "vd_finetune/HMDB/vd_3dresnet_sc" 190 | args['train_prep'] = 'MultiScaleCrop_112' 191 | return args 192 | 193 | def vd_3dresnet_cj(): 194 | args = {} 195 | args = finetune_HMDB_all_basics(args) 196 | args = resnet3d_setting(args) 197 | 198 | args["save_exp"] = "vd_finetune/HMDB/vd_3dresnet_cj" 199 | args['train_prep'] = 'ColorJitter_112' 200 | args['lr_boundaries'] = '1490000,1510000' 201 | return args 202 | 203 | def vd_sup_3dresnet_sc(): 204 | args = {} 205 | args = finetune_HMDB_all_basics(args) 206 | args = resnet3d_setting(args) 207 | 208 | args['load_exp'] = 'vd_sup/KN/vd_sup_3dresnet' 209 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_3dresnet_sc" 210 | args['train_prep'] = 'MultiScaleCrop_112' 211 | args['lr_boundaries'] = '140000' 212 | return args 213 | 214 | def vd_sup_3dresnet_cj(): 215 | args = {} 216 | args = finetune_HMDB_all_basics(args) 217 | args = resnet3d_setting(args) 218 | 219 | args['load_exp'] = 'vd_sup/KN/vd_sup_3dresnet' 220 | args["save_exp"] = "vd_finetune/HMDB/vd_sup_3dresnet_cj" 221 | args['train_prep'] = 'ColorJitter_112' 222 | args['lr_boundaries'] = '140000' 223 | return args 224 | 225 | def vd_opn_sc(): 226 | args = {} 227 | args = finetune_HMDB_all_basics(args) 228 | 229 | args['load_exp'] = "vd_unsup_fx/opn/opn_random_sep_flow" 230 | args['load_port'] = 27006 231 | args['test_no_frames'] = 5 232 | args["train_num_workers"] = 20 233 | 234 | args["save_exp"] = "vd_finetune/HMDB/vd_opn_sc" 235 | args['train_prep'] = 'MultiScaleCrop_224' 236 | #args['lr_boundaries'] = '290000+' 237 | args['lr_boundaries'] = '340000' 238 | return args 239 | 240 | 241 | 242 | 243 | def vd_3drotnet_HMDB(): 244 | args = {} 245 | 246 | args = finetune_HMDB_all_basics(args) 247 | 248 | args['load_exp'] = "vd_unsup_fx/rot/rot_3dresnet" 249 | args['load_port'] = 27006 250 | args["save_exp"] = "vd_finetune/HMDB/vd_3drotnet_hflip" 251 | 252 | args['model_type'] = '3dresnet' 253 | args['train_prep'] = '3DRotNet_finetune' 254 | args['test_no_frames'] = 10 255 | args['final_pooling'] = 1 256 | 257 | args['finetune_conv'] = True 258 | args["train_num_workers"] = 30 259 | 260 | args['init_lr'] = 0.008 261 | #args['lr_boundaries'] = '246000' 262 | return args 263 | -------------------------------------------------------------------------------- /notebook/jupyter_utils.py: -------------------------------------------------------------------------------- 1 | import pymongo as pm 2 | import gridfs 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pylab 7 | 8 | from scipy import misc 9 | import os 10 | import time 11 | 12 | import sklearn.linear_model 13 | import math 14 | from matplotlib.backends.backend_pdf import PdfPages 15 | from scipy.stats import pearsonr 16 | from scipy.stats import spearmanr 17 | import json 18 | 19 | vis_big_dict = {} 20 | 21 | 22 | def show_val( 23 | expid, dbname, colname, cache_dir, 24 | key='top1_10NN', 25 | valid_key='topn', 26 | batch_watch_start=0, 27 | batch_watch_end=None, 28 | new_figure=True, label_now=None, batch_offset=0, 29 | do_plot=True, do_conv=None): 30 | log_path = os.path.join( 31 | cache_dir, 'models', dbname, colname, expid, 'val_log.txt') 32 | with open(log_path, 'r') as fin: 33 | logs = fin.readlines() 34 | logs = filter(lambda x: x.startswith(valid_key), logs) 35 | logs = list(logs) 36 | logs = [ 37 | json.loads(log[len(valid_key) + 1:].replace("'", '"')) \ 38 | for log in logs] 39 | logs = filter(lambda x: key in x, logs) 40 | performance = [log[key] for log in logs] 41 | return plot_perf( 42 | performance, 43 | do_plot, new_figure, do_conv, 44 | batch_watch_start, batch_watch_end, label_now, batch_offset) 45 | 46 | 47 | def show_val_in_tfutils( 48 | curr_expid, 49 | conn, 50 | key='loss', 51 | dbname='combinet-test', 52 | valid_key='topn', 53 | colname='combinet', 54 | gridfs_name='combinet', 55 | big_dict=vis_big_dict, 56 | batch_watch_start=0, 57 | batch_watch_end=None, 58 | new_figure=True, 59 | label_now=None, 60 | batch_offset=0, 61 | do_conv=None, 62 | do_plot=True, 63 | special_delete=None): 64 | 65 | colname += '.files' 66 | if label_now is None: 67 | label_now = curr_expid 68 | 69 | find_res = conn[dbname][colname].find({'exp_id': curr_expid, 70 | 'validation_results': {'$exists': True}}) 71 | find_res = sorted(find_res, key = lambda x: x['step'] or -1) 72 | if len(find_res)==0: 73 | return None 74 | new_find_res = [] 75 | for curr_indx in range(len(find_res)-1): 76 | if find_res[curr_indx]['step'] == find_res[curr_indx+1]['step'] and find_res[curr_indx]['step'] is not None: 77 | continue 78 | new_find_res.append(find_res[curr_indx]) 79 | new_find_res.append(find_res[len(find_res)-1]) 80 | find_res = new_find_res 81 | 82 | if len(find_res)==0: 83 | return 84 | find_res = filter(lambda x: valid_key in x['validation_results'], find_res) 85 | find_res = filter(lambda x: key in x['validation_results'][valid_key], find_res) 86 | find_res = list(find_res) 87 | if special_delete: 88 | del find_res[special_delete] 89 | if key in find_res[0]['validation_results'][valid_key].keys(): 90 | list_res = find_res 91 | else: 92 | print(find_res[0]['validation_results'][valid_key].keys()) 93 | assert key in find_res[0]['validation_results'][valid_key].keys(), 'Wrong key %s!' % key 94 | performance = [r['validation_results'][valid_key][key] for r in list_res] 95 | return plot_perf( 96 | performance, 97 | do_plot, new_figure, do_conv, 98 | batch_watch_start, batch_watch_end, label_now, batch_offset) 99 | 100 | 101 | def plot_perf( 102 | performance, 103 | do_plot, new_figure, do_conv, 104 | batch_watch_start, batch_watch_end, label_now, batch_offset): 105 | 106 | x_range = range(len(performance)) 107 | if new_figure and do_plot: 108 | plt.figure(figsize=(9, 5)) 109 | x_range = np.asarray(x_range) + batch_offset 110 | performance = np.asarray(performance) 111 | 112 | if do_conv: 113 | conv_list = np.ones([do_conv]) / do_conv 114 | performance = np.convolve(performance, conv_list, mode='valid') 115 | x_range = x_range[:len(performance)] 116 | 117 | choose_indx = x_range > batch_watch_start 118 | if batch_watch_end is not None: 119 | choose_indx = choose_indx & (x_range < batch_watch_end) 120 | 121 | if do_plot: 122 | plt.plot( 123 | x_range[choose_indx], performance[choose_indx], 124 | label = label_now) 125 | plt.title('Validation Performance') 126 | plt.legend(loc='best') 127 | if do_plot: 128 | return performance 129 | else: 130 | return x_range[choose_indx], performance[choose_indx] 131 | 132 | 133 | def show_train_learnrate( 134 | curr_expid, 135 | conn, 136 | cache_dict={}, 137 | dbname='combinet-test', 138 | colname='combinet.files', 139 | start_N=50, 140 | with_dataset=None, 141 | batch_watch_start=0, 142 | batch_watch_end=None, 143 | do_conv=False, 144 | conv_len=100, 145 | new_figure=True, 146 | batch_size=8, 147 | batch_offset=0, 148 | max_step=None, 149 | label_now=None, 150 | loss_key='loss', 151 | refresh_cache=True, 152 | ): 153 | 154 | if label_now is None: 155 | label_now = curr_expid 156 | 157 | cache_key = os.path.join(dbname, colname, curr_expid) 158 | if refresh_cache or cache_key not in cache_dict: 159 | find_res = conn[dbname][colname].find( 160 | {'exp_id': curr_expid, 'train_results': {'$exists': True}}) 161 | find_res = sorted(find_res, key = lambda x: x['step']) 162 | new_find_res = [] 163 | for curr_indx in range(len(find_res)-1): 164 | if find_res[curr_indx]['step'] == find_res[curr_indx+1]['step']: 165 | continue 166 | new_find_res.append(find_res[curr_indx]) 167 | new_find_res.append(find_res[len(find_res)-1]) 168 | find_res = new_find_res 169 | cache_dict[cache_key] = find_res 170 | else: 171 | find_res = cache_dict[cache_key] 172 | 173 | if max_step: 174 | find_res = filter(lambda x: x['step'] batch_watch_start 226 | if batch_watch_end is not None: 227 | new_indx_list = (temp_x_list>batch_watch_start) & (temp_x_list batch_watch_start 235 | if batch_watch_end is not None: 236 | new_indx_list_2 = (temp_x_list_2>batch_watch_start) & (temp_x_list_2 0: 201 | print('Saving model...') 202 | self.saver.save( 203 | self.sess, 204 | os.path.join( 205 | self.cache_dir, 206 | 'model.ckpt'), 207 | global_step=curr_step) 208 | 209 | self.log_writer.write(message + '\n') 210 | if curr_step % self.save_params['save_metrics_freq'] == 0: 211 | self.log_writer.close() 212 | self.log_writer = open(self.log_file_path, 'a+') 213 | 214 | if curr_step % self.save_params['save_valid_freq'] == 0: 215 | for each_val_key in self.validation_params: 216 | val_result = self.run_each_validation(each_val_key) 217 | self.val_log_writer.write( 218 | '%s: %s\n' % (each_val_key, str(val_result))) 219 | print(val_result) 220 | self.val_log_writer.close() 221 | self.val_log_writer = open(self.val_log_file_path, 'a+') 222 | 223 | def build_train(self): 224 | self.build_inputs() 225 | self.outputs = self.build_network(self.inputs, True) 226 | self.build_train_op() 227 | self.build_train_targets() 228 | 229 | def build_val_inputs(self, val_key): 230 | data_params = self.validation_params[val_key]['data_params'] 231 | func = data_params.pop('func') 232 | val_inputs = func(**data_params) 233 | return val_inputs 234 | 235 | def build_val_network(self, val_key, val_inputs): 236 | with tf.name_scope('validation/' + val_key): 237 | val_outputs = self.build_network(val_inputs, False) 238 | return val_outputs 239 | 240 | def build_val_targets(self, val_key, val_inputs, val_outputs): 241 | target_params = self.validation_params[val_key]['targets'] 242 | func = target_params.pop('func') 243 | val_targets = func(val_inputs, val_outputs, **target_params) 244 | return val_targets 245 | 246 | def build_val(self): 247 | tf.get_variable_scope().reuse_variables() 248 | self.all_val_targets = {} 249 | for each_val_key in self.validation_params: 250 | val_inputs = self.build_val_inputs(each_val_key) 251 | val_outputs = self.build_val_network(each_val_key, val_inputs) 252 | val_targets = self.build_val_targets( 253 | each_val_key, val_inputs, val_outputs) 254 | self.all_val_targets[each_val_key] = val_targets 255 | 256 | def train(self): 257 | self.build_train() 258 | self.build_val() 259 | 260 | self.build_sess_and_saver() 261 | self.init_and_restore() 262 | 263 | self.run_train_loop() 264 | -------------------------------------------------------------------------------- /tf_model/model/resnet_th_preprocessing.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | import os, sys 8 | import numpy as np 9 | import pdb 10 | 11 | EPS = 1e-6 12 | 13 | 14 | def _at_least_x_are_true(a, b, x): 15 | """At least `x` of `a` and `b` `Tensors` are true.""" 16 | match = tf.equal(a, b) 17 | match = tf.cast(match, tf.int32) 18 | return tf.greater_equal(tf.reduce_sum(match), x) 19 | 20 | 21 | def image_resize( 22 | crop_image, out_height, out_width, 23 | ): 24 | resize_func = tf.image.resize_area 25 | image = tf.cast( 26 | resize_func( 27 | [crop_image], 28 | [out_height, out_width])[0], 29 | dtype=tf.uint8) 30 | return image 31 | 32 | 33 | def RandomSizedCrop_from_jpeg( 34 | image_str, 35 | out_height, 36 | out_width, 37 | size_minval=0.08, 38 | ): 39 | shape = tf.image.extract_jpeg_shape(image_str) 40 | bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 41 | crop_max_attempts = 100 42 | sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( 43 | shape, 44 | bounding_boxes=bbox, 45 | min_object_covered=0.1, 46 | aspect_ratio_range=(3. / 4, 4. / 3.), 47 | area_range=(size_minval, 1.0), 48 | max_attempts=crop_max_attempts, 49 | use_image_if_no_bounding_boxes=True) 50 | bbox_begin, bbox_size, bbox = sample_distorted_bounding_box 51 | random_image = tf.image.decode_and_crop_jpeg( 52 | image_str, 53 | tf.stack([bbox_begin[0], bbox_begin[1], \ 54 | bbox_size[0], bbox_size[1]]), 55 | channels=3) 56 | bad = _at_least_x_are_true(shape, tf.shape(random_image), 3) 57 | # central crop if bad 58 | min_size = tf.minimum(shape[0], shape[1]) 59 | offset_height = tf.random_uniform( 60 | shape=[], 61 | minval=0, maxval=shape[0] - min_size + 1, 62 | dtype=tf.int32 63 | ) 64 | offset_width = tf.random_uniform( 65 | shape=[], 66 | minval=0, maxval=shape[1] - min_size + 1, 67 | dtype=tf.int32 68 | ) 69 | bad_image = tf.image.decode_and_crop_jpeg( 70 | image_str, 71 | tf.stack([offset_height, offset_width, \ 72 | min_size, min_size]), 73 | channels=3) 74 | image = tf.cond( 75 | bad, 76 | lambda: bad_image, 77 | lambda: random_image, 78 | ) 79 | # if use py_func, will do resize elsewhere 80 | image = image_resize( 81 | image, 82 | out_height, out_width, 83 | ) 84 | image.set_shape([out_height, out_width, 3]) 85 | return image 86 | 87 | 88 | def RandomBrightness(image, low, high): 89 | rnd_bright = tf.random_uniform( 90 | shape=[], 91 | minval=low, maxval=high, 92 | dtype=tf.float32) 93 | #rnd_bright = tf.Print(rnd_bright, [rnd_bright], message='Brigh') 94 | flt_image = tf.cast(image, tf.float32) 95 | blend_image = flt_image * rnd_bright 96 | blend_image = tf.maximum(blend_image, 0) 97 | blend_image = tf.minimum(blend_image, 255) 98 | image_after = tf.cast(blend_image + EPS, tf.uint8) 99 | return image_after 100 | 101 | 102 | def RGBtoGray(flt_image): 103 | flt_image = tf.cast(flt_image, tf.float32) 104 | gry_image = flt_image[:,:,0] * 0.299 \ 105 | + flt_image[:,:,1] * 0.587 \ 106 | + flt_image[:,:,2] * 0.114 107 | gry_image = tf.expand_dims(gry_image, axis=2) 108 | gry_image = tf.cast(gry_image + EPS, tf.uint8) 109 | gry_image = tf.cast(gry_image, tf.float32) 110 | return gry_image 111 | 112 | 113 | def RandomSaturation(image, low, high): 114 | rnd_saturt = tf.random_uniform( 115 | shape=[], 116 | minval=low, maxval=high, 117 | dtype=tf.float32) 118 | #rnd_saturt = tf.Print(rnd_saturt, [rnd_saturt], message='Satu') 119 | flt_image = tf.cast(image, tf.float32) 120 | gry_image = RGBtoGray(flt_image) 121 | blend_image = flt_image * rnd_saturt + gry_image * (1-rnd_saturt) 122 | blend_image = tf.maximum(blend_image, 0) 123 | blend_image = tf.minimum(blend_image, 255) 124 | image_after = tf.cast(blend_image + EPS, tf.uint8) 125 | return image_after 126 | 127 | 128 | def RandomContrast(image, low, high): 129 | rnd_contr = tf.random_uniform( 130 | shape=[], 131 | minval=low, maxval=high, 132 | dtype=tf.float32) 133 | #rnd_contr = tf.Print(rnd_contr, [rnd_contr], message='Contr') 134 | flt_image = tf.cast(image, tf.float32) 135 | mean_gray = tf.cast( 136 | tf.cast( 137 | tf.reduce_mean(RGBtoGray(flt_image)) + EPS, 138 | tf.uint8), 139 | tf.float32) 140 | blend_image = flt_image * rnd_contr + mean_gray * (1-rnd_contr) 141 | blend_image = tf.maximum(blend_image, 0) 142 | blend_image = tf.minimum(blend_image, 255) 143 | image_after = tf.cast(blend_image + EPS, tf.uint8) 144 | return image_after 145 | 146 | 147 | def ColorJitter(image, seed_random=0, 148 | as_batch=False, shape_undefined=1, 149 | ): 150 | order_temp = tf.constant([0,1,2,3], dtype=tf.int32) 151 | order_rand = tf.random_shuffle(order_temp) 152 | #order_rand = tf.Print(order_rand, [order_rand], message='Order') 153 | 154 | random_hue_func = tf.image.random_hue 155 | 156 | fn_pred_fn_pairs = lambda x, image: [ 157 | (tf.equal(x, order_temp[0]), \ 158 | lambda :RandomSaturation(image, 0.6, 1.4)), 159 | (tf.equal(x, order_temp[1]), \ 160 | lambda :RandomBrightness(image, 0.6, 1.4)), 161 | (tf.equal(x, order_temp[2]), \ 162 | lambda :random_hue_func(image, 0.4)), 163 | ] 164 | #default_fn = lambda image: tf.image.random_contrast(image, 0.6, 1.4) 165 | default_fn = lambda image: RandomContrast(image, 0.6, 1.4) 166 | 167 | def _color_jitter_one(_norm): 168 | orig_shape = tf.shape(_norm) 169 | for curr_idx in range(order_temp.get_shape().as_list()[0]): 170 | _norm = tf.case( 171 | fn_pred_fn_pairs(order_rand[curr_idx], _norm), 172 | default=lambda : default_fn(_norm)) 173 | if shape_undefined==0: 174 | _norm.set_shape(orig_shape) 175 | return _norm 176 | if as_batch: 177 | image = tf.map_fn(_color_jitter_one, image) 178 | else: 179 | image = _color_jitter_one(image) 180 | return image 181 | 182 | 183 | def ColorNormalize(image): 184 | transpose_flag = image.get_shape().as_list()[-1] != 3 185 | if transpose_flag: 186 | image = tf.transpose(image, [1,2,0]) 187 | imagenet_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) 188 | imagenet_std = np.array([0.229, 0.224, 0.225], dtype=np.float32) 189 | image = (image - imagenet_mean) / imagenet_std 190 | if transpose_flag: 191 | image = tf.transpose(image, [2,0,1]) 192 | 193 | return image 194 | 195 | 196 | def ApplyGray(norm, prob_gray, as_batch=False): 197 | def _postprocess_gray(im): 198 | do_gray = tf.random_uniform( 199 | shape=[], 200 | minval=0, 201 | maxval=1, 202 | dtype=tf.float32) 203 | def __gray(im): 204 | gray_im = tf.cast(RGBtoGray(im), tf.uint8) 205 | gray_im = tf.tile(gray_im, [1,1,3]) 206 | return gray_im 207 | return tf.cond( 208 | tf.less(do_gray, prob_gray), 209 | lambda: __gray(im), 210 | lambda: im) 211 | if as_batch: 212 | norm = tf.map_fn(_postprocess_gray, norm, dtype=norm.dtype) 213 | else: 214 | norm = _postprocess_gray(norm) 215 | return norm 216 | 217 | 218 | def get_resize_scale(height, width, smallest_side): 219 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 220 | 221 | height = tf.to_float(height) 222 | width = tf.to_float(width) 223 | smallest_side = tf.to_float(smallest_side) 224 | 225 | scale = tf.cond( 226 | tf.greater(height, width), 227 | lambda: smallest_side / width, 228 | lambda: smallest_side / height) 229 | return scale 230 | 231 | 232 | def alexnet_crop_from_jpg(image_string): 233 | """ 234 | Resize the image to make its smallest side to be 256; 235 | then randomly get a 224 crop 236 | """ 237 | crop_size = 224 238 | shape = tf.image.extract_jpeg_shape(image_string) 239 | scale = get_resize_scale(shape[0], shape[1], 256) 240 | cp_height = tf.cast(crop_size / scale, tf.int32) 241 | cp_width = tf.cast(crop_size / scale, tf.int32) 242 | 243 | # Randomly sample begin x and y 244 | # Original AlexNet preprocessing uses center 256*256 to crop 245 | min_shape = tf.minimum(shape[0], shape[1]) 246 | x_range = [ 247 | tf.cast((shape[0] - min_shape) / 2, tf.int32), 248 | shape[0] - cp_height + 1 - \ 249 | tf.cast( 250 | (shape[0] - min_shape) / 2, 251 | tf.int32), 252 | ] 253 | y_range = [ 254 | tf.cast((shape[1] - min_shape) / 2, tf.int32), 255 | shape[1] - cp_width + 1 - \ 256 | tf.cast( 257 | (shape[1] - min_shape) / 2, 258 | tf.int32), 259 | ] 260 | 261 | cp_begin_x = tf.random_uniform( 262 | shape=[], 263 | minval=x_range[0], maxval=x_range[1], 264 | dtype=tf.int32 265 | ) 266 | cp_begin_y = tf.random_uniform( 267 | shape=[], 268 | minval=y_range[0], maxval=y_range[1], 269 | dtype=tf.int32 270 | ) 271 | 272 | bbox = tf.stack([ 273 | cp_begin_x, cp_begin_y, \ 274 | cp_height, cp_width]) 275 | crop_image = tf.image.decode_and_crop_jpeg( 276 | image_string, 277 | bbox, 278 | channels=3) 279 | image = image_resize(crop_image, crop_size, crop_size) 280 | return image 281 | 282 | 283 | def preprocessing_inst( 284 | image_string, 285 | out_height, 286 | out_width, 287 | is_train, 288 | size_minval=0.2, 289 | val_short_side=256, 290 | ): 291 | def _val_func(image_string): 292 | shape = tf.image.extract_jpeg_shape(image_string) 293 | scale = get_resize_scale(shape[0], shape[1], val_short_side) 294 | cp_height = tf.cast(out_height / scale, tf.int32) 295 | cp_width = tf.cast(out_width / scale, tf.int32) 296 | cp_begin_x = tf.cast((shape[0] - cp_height) / 2, tf.int32) 297 | cp_begin_y = tf.cast((shape[1] - cp_width) / 2, tf.int32) 298 | bbox = tf.stack([ 299 | cp_begin_x, cp_begin_y, \ 300 | cp_height, cp_width]) 301 | crop_image = tf.image.decode_and_crop_jpeg( 302 | image_string, 303 | bbox, 304 | channels=3) 305 | image = image_resize( 306 | crop_image, 307 | out_height, out_width, 308 | ) 309 | 310 | image.set_shape([out_height, out_width, 3]) 311 | return image 312 | 313 | def _rand_crop(image_string): 314 | image = RandomSizedCrop_from_jpeg( 315 | image_string, 316 | out_height=out_height, 317 | out_width=out_width, 318 | size_minval=size_minval, 319 | ) 320 | return image 321 | 322 | if is_train: 323 | image = _rand_crop(image_string) 324 | image = ApplyGray(image, 0.2) 325 | image = ColorJitter(image) 326 | image = tf.image.random_flip_left_right(image) 327 | 328 | else: 329 | image = _val_func(image_string) 330 | 331 | return image 332 | -------------------------------------------------------------------------------- /tf_model/data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os, sys 3 | import torch 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | sys.path.append(os.path.abspath('../')) 8 | from pt_loader import datasets, config, transforms, opn_datasets 9 | 10 | 11 | def get_feeddict(image, label, index, name_prefix='TRAIN'): 12 | image_placeholder = tf.get_default_graph().get_tensor_by_name( 13 | '%s_IMAGE_PLACEHOLDER:0' % name_prefix) 14 | label_placeholder = tf.get_default_graph().get_tensor_by_name( 15 | '%s_LABEL_PLACEHOLDER:0' % name_prefix) 16 | index_placeholder = tf.get_default_graph().get_tensor_by_name( 17 | '%s_INDEX_PLACEHOLDER:0' % name_prefix) 18 | feed_dict = { 19 | image_placeholder: image.numpy(), 20 | label_placeholder: label.numpy(), 21 | index_placeholder: index.numpy()} 22 | return feed_dict 23 | 24 | 25 | def get_cfg_transform(args): 26 | root = '/mnt/fs3/chengxuz/kinetics/pt_meta' 27 | if args.metafile_root is not None: 28 | root = args.metafile_root 29 | root_data = args.image_dir 30 | """ 31 | # TODO: May have problem later 32 | # TODO: Would be better to specify both train_dataset and test_dataset 33 | if args.only_emb: 34 | cfg = config.dataset_config('kinetics', 35 | root='/mnt/fs3/chengxuz/kinetics/pt_meta', 36 | root_data='/data5/chengxuz/Dataset/kinetics/comp_jpgs_extracted') 37 | else:""" 38 | cfg = config.dataset_config(args.dataset, root=root, root_data=root_data) 39 | 40 | if args.train_prep is None: 41 | transform = transforms.video_transform() 42 | elif args.train_prep == 'RandomSized': 43 | transform = transforms.video_transform_rdsz() 44 | elif args.train_prep == 'ColorJitter': 45 | transform = transforms.video_transform_color() 46 | elif args.train_prep == 'ColorJitter_112': 47 | transform = transforms.video_transform_color( 48 | frame_size_min=128, frame_size_max=160, 49 | crop_size=112) 50 | elif args.train_prep == 'ColorJitterRandomSized': 51 | transform = transforms.video_transform_color_rdsz() 52 | elif args.train_prep == '3DRotNet_finetune': 53 | transform = transforms.video_3DRot_finetune_transform() 54 | elif args.train_prep == 'MultiScaleCrop_112': 55 | transform = transforms.video_transform_multiscalecrop() 56 | elif args.train_prep == 'MultiScaleCrop_224': 57 | transform = transforms.video_transform_multiscalecrop(size=224) 58 | else: 59 | raise NotImplementedError('Preprocessing specified not implemented!') 60 | return cfg, transform 61 | 62 | 63 | def get_train_dataloader(args, dataset): 64 | dataloader = torch.utils.data.DataLoader( 65 | dataset, batch_size=args.batch_size, shuffle=True, 66 | num_workers=args.train_num_workers, pin_memory=False, 67 | worker_init_fn=lambda x: np.random.seed(x)) 68 | return dataloader 69 | 70 | 71 | def get_train_pt_loader(args): 72 | cfg, transform = get_cfg_transform(args) 73 | 74 | dataset = datasets.VideoDataset( 75 | cfg['root'], cfg['train_metafile'], 76 | num_frames=1, transform=transform, 77 | frame_start='RANDOM', 78 | bin_interval=args.bin_interval, 79 | part_vd=args.part_vd, 80 | HMDB_sample=False, resnet3d_test_sample=False) 81 | return get_train_dataloader(args, dataset) 82 | 83 | 84 | def get_val_cfg_transform(args): 85 | root = '/mnt/fs3/chengxuz/kinetics/pt_meta' 86 | if args.metafile_root is not None: 87 | root = args.metafile_root 88 | root_data = args.val_image_dir 89 | # cfg = config.dataset_config('kinetics', root=root, root_data=root_data) 90 | cfg = config.dataset_config(args.dataset, root=root, root_data=root_data) 91 | 92 | if args.train_prep is not None and '112' in args.train_prep: 93 | transform = transforms.video_transform_val( 94 | frame_size=128, crop_size=112) 95 | elif args.train_prep == "3DRotNet_finetune": 96 | transform = transforms.video_3DRot_finetune_val((136, 136)) 97 | elif args.train_prep == 'MultiScaleCrop_112': 98 | transform = transforms.video_transform_multiscalecrop(scales=[1], 99 | crop_positions=['c']) 100 | elif args.train_prep == 'MultiScaleCrop_224': 101 | transform = transforms.video_transform_multiscalecrop(scales=[1], 102 | crop_positions=['c'], size=224) 103 | 104 | else: 105 | transform = transforms.video_transform_val(dataset=args.dataset) 106 | return cfg, transform 107 | 108 | 109 | def get_val_dataloader(args, dataset): 110 | dataloader = torch.utils.data.DataLoader( 111 | dataset, batch_size=args.test_batch_size, shuffle=False, 112 | num_workers=args.val_num_workers, pin_memory=False) 113 | return dataloader 114 | 115 | 116 | def get_val_pt_loader(args): 117 | cfg, transform = get_val_cfg_transform(args) 118 | 119 | dataset = datasets.VideoDataset( 120 | cfg['root'], cfg['val_metafile'], 121 | num_frames=args.test_no_frames, transform=transform, 122 | HMDB_sample=False, resnet3d_test_sample=False) 123 | return get_val_dataloader(args, dataset) 124 | 125 | 126 | def get_train_slow_pt_loader(args): 127 | cfg, transform = get_cfg_transform(args) 128 | 129 | dataset = datasets.VideoDataset( 130 | cfg['root'], cfg['train_metafile'], 131 | num_frames=4, frame_interval=16, 132 | transform=transform, 133 | frame_start='RANDOM', 134 | bin_interval=args.bin_interval, 135 | HMDB_sample=args.HMDB_sample) 136 | return get_train_dataloader(args, dataset) 137 | 138 | 139 | def get_val_slow_pt_loader(args): 140 | cfg, transform = get_val_cfg_transform(args) 141 | 142 | dataset = datasets.VideoDataset( 143 | cfg['root'], cfg['val_metafile'], 144 | num_frames=4, frame_interval=16, 145 | sample_groups=args.test_no_frames, transform=transform, 146 | HMDB_sample=args.HMDB_sample) 147 | return get_val_dataloader(args, dataset) 148 | 149 | 150 | def get_train_fast_pt_loader(args): 151 | cfg, transform = get_cfg_transform(args) 152 | 153 | dataset = datasets.VideoDataset( 154 | cfg['root'], cfg['train_metafile'], 155 | num_frames=32, frame_interval=2, 156 | transform=transform, 157 | frame_start='RANDOM', 158 | bin_interval=args.bin_interval, 159 | HMDB_sample=args.HMDB_sample) 160 | return get_train_dataloader(args, dataset) 161 | 162 | 163 | def get_val_fast_pt_loader(args): 164 | cfg, transform = get_val_cfg_transform(args) 165 | 166 | dataset = datasets.VideoDataset( 167 | cfg['root'], cfg['val_metafile'], 168 | num_frames=32, frame_interval=2, 169 | sample_groups=args.test_no_frames, transform=transform, 170 | HMDB_sample=args.HMDB_sample) 171 | return get_val_dataloader(args, dataset) 172 | 173 | 174 | def get_train_fast_a4_pt_loader(args): 175 | cfg, transform = get_cfg_transform(args) 176 | 177 | dataset = datasets.VideoDataset( 178 | cfg['root'], cfg['train_metafile'], 179 | num_frames=16, frame_interval=4, 180 | transform=transform, 181 | frame_start='RANDOM', 182 | bin_interval=args.bin_interval, 183 | HMDB_sample=True, resnet3d_test_sample=False) 184 | return get_train_dataloader(args, dataset) 185 | 186 | 187 | def get_val_fast_a4_pt_loader(args): 188 | cfg, transform = get_val_cfg_transform(args) 189 | 190 | dataset = datasets.VideoDataset( 191 | cfg['root'], cfg['val_metafile'], 192 | num_frames=16, frame_interval=4, 193 | sample_groups=args.test_no_frames, transform=transform, 194 | HMDB_sample=False, resnet3d_test_sample=True) 195 | return get_val_dataloader(args, dataset) 196 | 197 | 198 | def get_train_trn_pt_loader(args): 199 | cfg, transform = get_cfg_transform(args) 200 | 201 | dataset = datasets.VideoDataset( 202 | cfg['root'], cfg['train_metafile'], 203 | trn_style=True, transform=transform, 204 | frame_start='RANDOM', 205 | trn_num_frames=args.trn_num_frames, 206 | HMDB_sample=args.HMDB_sample) 207 | return get_train_dataloader(args, dataset) 208 | 209 | 210 | def get_val_trn_pt_loader(args): 211 | cfg, transform = get_val_cfg_transform(args) 212 | 213 | dataset = datasets.VideoDataset( 214 | cfg['root'], cfg['val_metafile'], 215 | trn_style=True, sample_groups=args.test_no_frames, 216 | transform=transform, 217 | trn_num_frames=args.trn_num_frames, 218 | HMDB_sample=args.HMDB_sample) 219 | return get_val_dataloader(args, dataset) 220 | 221 | 222 | def get_train_3dresnet_pt_loader(args): 223 | cfg, transform = get_cfg_transform(args) 224 | 225 | dataset = datasets.VideoDataset( 226 | cfg['root'], cfg['train_metafile'], 227 | num_frames=16, frame_interval=1, 228 | transform=transform, 229 | frame_start='RANDOM', 230 | bin_interval=args.bin_interval, 231 | HMDB_sample=True, resnet3d_test_sample=False) 232 | if args.rotnet: 233 | FPS_FACTOR = (25 / 16) 234 | dataset = datasets.RotVideoDataset( 235 | cfg['root'], cfg['train_metafile'], 236 | num_frames=16, frame_interval=1, 237 | transform=transform, 238 | frame_start='RANDOM', 239 | fps_conversion_factor=FPS_FACTOR, 240 | HMDB_sample=False, resnet3d_test_sample=False) 241 | return get_train_dataloader(args, dataset) 242 | 243 | 244 | def get_val_3dresnet_pt_loader(args): 245 | cfg, transform = get_val_cfg_transform(args) 246 | 247 | dataset = datasets.VideoDataset( 248 | cfg['root'], cfg['val_metafile'], 249 | num_frames=16, frame_interval=1, 250 | sample_groups=args.test_no_frames, transform=transform, 251 | HMDB_sample=False, resnet3d_test_sample=True) 252 | if args.rotnet: 253 | FPS_FACTOR = (25 / 16) 254 | dataset = datasets.RotVideoDataset( 255 | cfg['root'], cfg['val_metafile'], 256 | num_frames=16, frame_interval=1, 257 | sample_groups=args.test_no_frames, 258 | transform=transform, 259 | fps_conversion_factor=FPS_FACTOR, 260 | HMDB_sample=False, resnet3d_test_sample=False) 261 | return get_val_dataloader(args, dataset) 262 | 263 | 264 | def get_placeholders( 265 | batch_size, num_frames=1, 266 | crop_size=224, num_channels=3, 267 | name_prefix='TRAIN', multi_frame=False, multi_group=None): 268 | image_placeholder = tf.placeholder( 269 | tf.uint8, 270 | #(batch_size, num_channels, num_frames, crop_size, crop_size), 271 | (batch_size, num_frames, crop_size, crop_size, num_channels), 272 | name='%s_IMAGE_PLACEHOLDER' % name_prefix) 273 | label_placeholder = tf.placeholder( 274 | tf.int64, 275 | (batch_size), 276 | name='%s_LABEL_PLACEHOLDER' % name_prefix) 277 | index_placeholder = tf.placeholder( 278 | tf.int64, 279 | (batch_size), 280 | name='%s_INDEX_PLACEHOLDER' % name_prefix) 281 | if not multi_frame: 282 | if num_frames == 1: 283 | image_placeholder = tf.squeeze(image_placeholder, axis=1) 284 | else: 285 | image_placeholder = tf.reshape( 286 | image_placeholder, 287 | [-1, crop_size, crop_size, num_channels]) 288 | else: 289 | if multi_group is not None: 290 | image_placeholder = tf.reshape( 291 | image_placeholder, 292 | [batch_size*multi_group, num_frames // multi_group, \ 293 | crop_size, crop_size, num_channels]) 294 | inputs = { 295 | 'image': image_placeholder, 296 | 'label': label_placeholder, 297 | 'index': index_placeholder} 298 | return inputs 299 | -------------------------------------------------------------------------------- /tf_model/saved_settings/vd_transfer_IN.py: -------------------------------------------------------------------------------- 1 | def trans_IN_bs128(args): 2 | args['batch_size'] = 128 3 | args['test_batch_size'] = 64 4 | args['fre_filter'] = 100090 5 | args['fre_cache_filter'] = 10009 6 | args['fre_valid'] = 10009 7 | return args 8 | 9 | 10 | def trans_IN_bs64(args): 11 | args['batch_size'] = 64 12 | args['test_batch_size'] = 32 13 | args['fre_filter'] = 100090 14 | args['fre_cache_filter'] = 10009 15 | args['fre_valid'] = 10009 16 | return args 17 | 18 | 19 | def trans_basics(args): 20 | args['port'] = 27006 21 | return args 22 | 23 | 24 | def IN_basics(args): 25 | args['image_dir'] = '/mnt/fs1/Dataset/TFRecord_Imagenet_standard/image_label_full_widx' 26 | return args 27 | 28 | 29 | def vd_from_scratch_trans_all(): 30 | args = {} 31 | 32 | args = trans_basics(args) 33 | args = IN_basics(args) 34 | args = trans_IN_bs128(args) 35 | 36 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 37 | args['load_port'] = 27006 38 | args['resume'] = True 39 | args["save_exp"] = "vd_trans/IN/vd_from_scratch_trans_all" 40 | args["get_all_layers"] = '1,3,5,7,9' 41 | args["lr_boundaries"] = '370011,524998' 42 | return args 43 | 44 | 45 | def vd_ctl_IR_trans_all(): 46 | args = {} 47 | 48 | args = trans_basics(args) 49 | args = IN_basics(args) 50 | args = trans_IN_bs128(args) 51 | 52 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl_IR" 53 | args['load_port'] = 27006 54 | args["save_exp"] = "vd_trans/IN/vd_ctl_IR_trans_all" 55 | args["get_all_layers"] = '1,3,5,7,9' 56 | args["lr_boundaries"] = '1840011,2192191' 57 | return args 58 | 59 | 60 | def vd_ctl_trans(): 61 | args = {} 62 | 63 | args = trans_basics(args) 64 | args = IN_basics(args) 65 | args = trans_IN_bs128(args) 66 | 67 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 68 | args['load_port'] = 27006 69 | args["save_exp"] = "vd_trans/IN/vd_ctl_trans" 70 | args['lr_boundaries'] = '1491301,1891691' 71 | return args 72 | 73 | 74 | def vd_ctl_trans_all(): 75 | args = {} 76 | 77 | args = trans_basics(args) 78 | args = IN_basics(args) 79 | args = trans_IN_bs128(args) 80 | 81 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 82 | args['load_port'] = 27006 83 | args["save_exp"] = "vd_trans/IN/vd_ctl_trans_all" 84 | args['lr_boundaries'] = '1491301,1891691' 85 | args["get_all_layers"] = '1,3,5,7,9' 86 | return args 87 | 88 | 89 | def vd_ctl_p30_trans_all(): 90 | args = {} 91 | 92 | args = trans_basics(args) 93 | args = IN_basics(args) 94 | args = trans_IN_bs128(args) 95 | 96 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl_p30" 97 | args['load_port'] = 27006 98 | args["save_exp"] = "vd_trans/IN/vd_ctl_p30_trans_all" 99 | args["get_all_layers"] = '1,3,5,7,9' 100 | args['lr_boundaries'] = '1330011,1730011' 101 | return args 102 | 103 | 104 | def vd_ctl_p70_trans_all(): 105 | args = {} 106 | 107 | args = trans_basics(args) 108 | args = IN_basics(args) 109 | args = trans_IN_bs128(args) 110 | 111 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl_p70" 112 | args['load_port'] = 27006 113 | args["save_exp"] = "vd_trans/IN/vd_ctl_p70_trans_all" 114 | args["get_all_layers"] = '1,3,5,7,9' 115 | args["lr_boundaries"] = '1370011,1600011' 116 | return args 117 | 118 | 119 | def vd_ctl_big_bin_trans_all(): 120 | args = {} 121 | 122 | args = trans_basics(args) 123 | args = IN_basics(args) 124 | args = trans_IN_bs128(args) 125 | 126 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl_big_bin" 127 | args['load_port'] = 27006 128 | args["save_exp"] = "vd_trans/IN/vd_ctl_big_bin_trans_all" 129 | args['lr_boundaries'] = '1900011,2300011' 130 | args["get_all_layers"] = '1,3,5,7,9' 131 | return args 132 | 133 | 134 | def vd_ctl_bin_trans_all(): 135 | args = {} 136 | 137 | args = trans_basics(args) 138 | args = IN_basics(args) 139 | args = trans_IN_bs128(args) 140 | 141 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl_bin" 142 | args['load_port'] = 27006 143 | args["save_exp"] = "vd_trans/IN/vd_ctl_bin_trans_all" 144 | args["get_all_layers"] = '1,3,5,7,9' 145 | args['lr_boundaries'] = '1811011,2100011' 146 | return args 147 | 148 | 149 | def vd_sup_f1_trans(): 150 | args = {} 151 | 152 | args = trans_basics(args) 153 | args = IN_basics(args) 154 | args = trans_IN_bs128(args) 155 | 156 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 157 | args['load_port'] = 27006 158 | args["save_exp"] = "vd_trans/IN/vd_sup_f1_trans" 159 | args["lr_boundaries"] = '790001,890001' 160 | return args 161 | 162 | 163 | def vd_sup_f1_avg_trans(): 164 | args = {} 165 | 166 | args = trans_basics(args) 167 | args = IN_basics(args) 168 | args = trans_IN_bs128(args) 169 | 170 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 171 | args['load_port'] = 27006 172 | args["save_exp"] = "vd_trans/IN/vd_sup_f1_avg_trans" 173 | args["get_all_layers"] = '9-avg' 174 | args["lr_boundaries"] = '790001,890001' 175 | return args 176 | 177 | 178 | def vd_sup_f1_trans_all(): 179 | args = {} 180 | 181 | args = trans_basics(args) 182 | args = IN_basics(args) 183 | args = trans_IN_bs128(args) 184 | 185 | args['load_exp'] = "vd_sup/ctl/vd_f1_ctl" 186 | args['load_port'] = 27006 187 | args["save_exp"] = "vd_trans/IN/vd_sup_f1_trans_all" 188 | args["get_all_layers"] = '1,3,5,7,9-avg' 189 | args["lr_boundaries"] = '790001,890001' 190 | return args 191 | 192 | 193 | def vd_ctl_opn_trans(): 194 | args = {} 195 | 196 | args = trans_basics(args) 197 | args = IN_basics(args) 198 | args = trans_IN_bs128(args) 199 | 200 | args['load_exp'] = "vd_unsup_fx/opn/opn_random" 201 | args['load_port'] = 27006 202 | args["save_exp"] = "vd_trans/IN/vd_ctl_opn_trans_mlt" 203 | args["get_all_layers"] = '1,3,5,7,9' 204 | return args 205 | 206 | 207 | def vd_ctl_opn_80_trans_all(): 208 | args = {} 209 | 210 | args = trans_basics(args) 211 | args = IN_basics(args) 212 | args = trans_IN_bs128(args) 213 | 214 | args['load_exp'] = "vd_unsup_fx/opn/opn_random_sep_flow" 215 | args['load_port'] = 27006 216 | args["save_exp"] = "vd_trans/IN/vd_ctl_opn_80_trans_all" 217 | args["get_all_layers"] = '1,3,5,7,9' 218 | args['lr_boundaries'] = '429998,670011' 219 | return args 220 | 221 | 222 | def vd_ctl_opn_224_trans_all(): 223 | args = {} 224 | 225 | args = trans_basics(args) 226 | args = IN_basics(args) 227 | args = trans_IN_bs128(args) 228 | 229 | args['load_exp'] = "vd_unsup_fx/opn/opn_random_224_sep_flow" 230 | args['load_port'] = 27006 231 | args["save_exp"] = "vd_trans/IN/vd_ctl_opn_224_trans_all" 232 | args["get_all_layers"] = '1,3,5,7,9' 233 | return args 234 | 235 | 236 | def vd_ctl_trans_alx(): 237 | args = {} 238 | 239 | args = trans_basics(args) 240 | args = IN_basics(args) 241 | args = trans_IN_bs128(args) 242 | 243 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_ctl" 244 | args['load_port'] = 27006 245 | args["save_exp"] = "vd_trans/IN/vd_ctl_trans_alx" 246 | args['lr_boundaries'] = '1491301' 247 | args["train_crop"] = 'alexnet_crop_flip' 248 | return args 249 | 250 | 251 | def vd_trn_pret_trans(): 252 | args = {} 253 | 254 | args = trans_basics(args) 255 | args = IN_basics(args) 256 | args = trans_IN_bs128(args) 257 | 258 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_trn_pret" 259 | args['load_port'] = 27006 260 | args["save_exp"] = "vd_trans/IN/vd_trn_pret_trans_fx" 261 | return args 262 | 263 | 264 | def vd_trn_f4_pret_trans(): 265 | args = {} 266 | 267 | args = trans_basics(args) 268 | args = IN_basics(args) 269 | args = trans_IN_bs128(args) 270 | 271 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_trn_f4_pret" 272 | args['load_port'] = 27006 273 | args["save_exp"] = "vd_trans/IN/vd_trn_f4_pret_trans" 274 | args["lr_boundaries"] = '2440011,2650011' 275 | return args 276 | 277 | 278 | def vd_trn_f4_pret_mlt_trans(): 279 | args = {} 280 | 281 | args = trans_basics(args) 282 | args = IN_basics(args) 283 | args = trans_IN_bs128(args) 284 | 285 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_trn_f4_pret" 286 | args['load_port'] = 27006 287 | args["save_exp"] = "vd_trans/IN/vd_trn_f4_pret_mlt_trans" 288 | args["model_type"] = "trn_f4_tile" 289 | args["get_all_layers"] = 10 290 | args['lr_boundaries'] = '2360011' 291 | return args 292 | 293 | 294 | def vd_sup_trn_f4_trans(): 295 | args = {} 296 | 297 | args = trans_basics(args) 298 | args = IN_basics(args) 299 | args = trans_IN_bs128(args) 300 | 301 | args['load_exp'] = "vd_sup/ctl/vd_sup_trn_f4_fx" 302 | args['load_port'] = 27006 303 | args["save_exp"] = "vd_trans/IN/vd_sup_trn_f4_trans" 304 | return args 305 | 306 | 307 | def vd_sup_trn_f4_avg_trans(): 308 | args = {} 309 | 310 | args = trans_basics(args) 311 | args = IN_basics(args) 312 | args = trans_IN_bs128(args) 313 | 314 | args['load_exp'] = "vd_sup/ctl/vd_sup_trn_f4_fx" 315 | args['load_port'] = 27006 316 | args["save_exp"] = "vd_trans/IN/vd_sup_trn_f4_avg_trans" 317 | args["get_all_layers"] = "9-avg" 318 | args["lr_boundaries"] = '680011' 319 | return args 320 | 321 | 322 | def vd_tsrn_slow_pret_trans(): 323 | args = {} 324 | 325 | args = trans_basics(args) 326 | args = IN_basics(args) 327 | args = trans_IN_bs128(args) 328 | 329 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_tsrn_slow_pret" 330 | args['load_port'] = 27006 331 | args["save_exp"] = "vd_trans/IN/vd_tsrn_slow_pret_trans" 332 | args["lr_boundaries"] = '1901601' 333 | return args 334 | 335 | 336 | def vd_tsrn_f4_pret_trans(): 337 | args = {} 338 | 339 | args = trans_basics(args) 340 | args = IN_basics(args) 341 | args = trans_IN_bs128(args) 342 | 343 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_tsrn_f4_pret" 344 | args['load_port'] = 27006 345 | args["save_exp"] = "vd_trans/IN/vd_tsrn_f4_pret_trans" 346 | args["lr_boundaries"] = '1980011,2190011' 347 | return args 348 | 349 | 350 | def vd_tsrn_f4_pret_trans_all(): 351 | args = {} 352 | 353 | args = trans_basics(args) 354 | args = IN_basics(args) 355 | args = trans_IN_bs128(args) 356 | 357 | #args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_tsrn_f4_pret" 358 | #args["save_exp"] = "vd_trans/IN/vd_tsrn_f4_pret_trans_all" 359 | args["load_exp"] = "vd_trans/IN/vd_tsrn_f4_pret_trans_all" 360 | args["save_exp"] = "vd_trans/IN/vd_tsrn_f4_pret_trans_all_ct" 361 | args["load_step"] = 2181962 362 | 363 | args['load_port'] = 27006 364 | #args["lr_boundaries"] = '1980011,2190011' 365 | args["lr_boundaries"] = '1980011,2650011' 366 | args["get_all_layers"] = '1,3,5,7,9' 367 | return args 368 | 369 | 370 | def vd_sup_tsrn_f4_pret_trans_all(): 371 | args = {} 372 | 373 | args = trans_basics(args) 374 | args = IN_basics(args) 375 | args = trans_IN_bs128(args) 376 | 377 | args['load_exp'] = "vd_sup/ctl/vd_sup_tsrn_f4" 378 | args['load_port'] = 27006 379 | args["save_exp"] = "vd_trans/IN/vd_sup_tsrn_f4_pret_trans_all" 380 | args["get_all_layers"] = '1,3,5,7,9' 381 | args["lr_boundaries"] = '880011,1660011' 382 | return args 383 | 384 | 385 | def vd_slow_trans(): 386 | args = {} 387 | 388 | args = trans_basics(args) 389 | args = IN_basics(args) 390 | args = trans_IN_bs128(args) 391 | 392 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slow" 393 | args['load_port'] = 27006 394 | args["save_exp"] = "vd_trans/IN/vd_slow_trans" 395 | args['model_type'] = 'slow' 396 | return args 397 | 398 | 399 | def vd_slow_trans_all(): 400 | args = {} 401 | 402 | args = trans_basics(args) 403 | args = IN_basics(args) 404 | args = trans_IN_bs128(args) 405 | 406 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slow" 407 | args['load_port'] = 27006 408 | args["save_exp"] = "vd_trans/IN/vd_slow_trans_all" 409 | args['model_type'] = 'slow' 410 | args["get_all_layers"] = '1,3,5,7,9' 411 | args["num_tile"] = 4 412 | args["lr_boundaries"] = '1690011,1940011' 413 | return args 414 | 415 | 416 | def vd_sup_slow_trans_all(): 417 | args = {} 418 | 419 | args = trans_basics(args) 420 | args = IN_basics(args) 421 | args = trans_IN_bs128(args) 422 | 423 | args['load_exp'] = "vd_sup/ctl/vd_sup_slow" 424 | args['load_port'] = 27006 425 | args["save_exp"] = "vd_trans/IN/vd_sup_slow_trans_all" 426 | args['model_type'] = 'slow' 427 | args["get_all_layers"] = '1,3,5,7,9-avg' 428 | args["num_tile"] = 4 429 | args["lr_boundaries"] = '610011,1180011' 430 | return args 431 | 432 | 433 | def vd_slowfast_a4_trans_all(): 434 | args = {} 435 | 436 | args = trans_basics(args) 437 | args = IN_basics(args) 438 | args = trans_IN_bs128(args) 439 | 440 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 441 | args['load_port'] = 27006 442 | args["save_exp"] = "vd_trans/IN/vd_slowfast_a4_trans_all_fx" 443 | args['model_type'] = 'slowfast_a4' 444 | args["get_all_layers"] = '1,3,5,7,9' 445 | args["num_tile"] = 16 446 | args["lr_boundaries"] = '1550011,1810011' 447 | return args 448 | 449 | 450 | def vd_sup_slowfast_a4_trans_all(): 451 | args = {} 452 | 453 | args = trans_basics(args) 454 | args = IN_basics(args) 455 | args = trans_IN_bs128(args) 456 | 457 | args['load_exp'] = "vd_sup/ctl/vd_sup_slowfast_a4" 458 | args['load_port'] = 27006 459 | args["save_exp"] = "vd_trans/IN/vd_sup_slowfast_a4_trans_all_fx" 460 | args['model_type'] = 'slowfast_a4' 461 | args["get_all_layers"] = '1,3,5,7,9-avg' 462 | args["num_tile"] = 16 463 | args["lr_boundaries"] = '530011,790011' 464 | return args 465 | 466 | 467 | def vd_slow_single_trans_all(): 468 | args = {} 469 | 470 | args = trans_basics(args) 471 | args = IN_basics(args) 472 | args = trans_IN_bs128(args) 473 | 474 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slow" 475 | args['load_port'] = 27006 476 | args["save_exp"] = "vd_trans/IN/vd_slow_single_trans_all" 477 | args['model_type'] = 'slowsingle' 478 | args["get_all_layers"] = '5,7,9' 479 | args["num_tile"] = 4 480 | #args["from_ckpt"] = '/mnt/fs3/chengxuz/vd_relat/slow_single_model/model' 481 | args['lr_boundaries'] = '230011,570011' 482 | return args 483 | 484 | 485 | def vd_slowfast_single_trans_all(): 486 | args = {} 487 | 488 | args = trans_basics(args) 489 | args = IN_basics(args) 490 | args = trans_IN_bs128(args) 491 | 492 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_slowfast_a4" 493 | args['load_port'] = 27006 494 | args["save_exp"] = "vd_trans/IN/vd_slowfast_single_trans_all" 495 | args['model_type'] = 'slowfastsingle' 496 | args["get_all_layers"] = '7,9' 497 | args["num_tile"] = 16 498 | #args["from_ckpt"] = '/mnt/fs3/chengxuz/vd_relat/slowfast_single_model/model' 499 | args['lr_boundaries'] = '160011,460011' 500 | return args 501 | 502 | 503 | def vd_3dresnet_trans_all(): 504 | args = {} 505 | 506 | args = trans_basics(args) 507 | args = IN_basics(args) 508 | args = trans_IN_bs128(args) 509 | 510 | args['load_exp'] = "vd_unsup_fx/dyn_clstr/vd_3dresnet" 511 | args['load_port'] = 27007 512 | args["save_exp"] = "vd_trans/IN/vd_3dresnet_trans_all" 513 | args['port'] = 27007 514 | args['model_type'] = '3dresnet' 515 | args["get_all_layers"] = '5,7,9' 516 | args["num_tile"] = 16 517 | args["train_crop"] = 'outshape_112' 518 | args['lr_boundaries'] = '1850011,2500011' 519 | return args 520 | --------------------------------------------------------------------------------