├── Controllable_imgs ├── Controllable_compare.gif ├── Controllable_kitti.gif ├── Controllable_rp.gif └── Controllable_ucf_pushups.gif ├── LICENSE ├── README.md ├── datasets ├── KITTI │ ├── parse_sequence.py │ └── png2jpg.py ├── README.md ├── RobotPush │ ├── grab_train_images_to_hdf5.py │ └── read_push_data.py └── UCF-101 │ └── videos_to_jpg_seq.py ├── model ├── README.md ├── aeeval_kitti.py ├── aeeval_rp.py ├── aeeval_ucf.py ├── dataset │ ├── commons.py │ ├── data_loader_kitti_reimpl.py │ ├── data_loader_rp_reimpl.py │ ├── data_loader_ucf_reimpl.py │ └── utils │ │ └── set_dataset_path.py ├── guieval_rp.py ├── models │ ├── vgg_warper_weak_shortcut.py │ └── vgg_warper_weak_shortcut_nobn.py ├── ops │ ├── cooltanh.py │ ├── flow_warper.py │ ├── flow_warper_pad_2x.py │ ├── grad_hook.py │ ├── hardshinkloss.py │ └── laplace2d.py ├── reader │ ├── kitti_reader.py │ ├── rp_reader.py │ └── ucf_reader.py ├── train_kitti.py ├── train_rp.py ├── train_ucf.py └── utils │ ├── trajs2map.py │ └── visual.py └── offline_traj ├── for_KITTI ├── DenseTrackStab.cpp ├── DenseTrackStab.h ├── Descriptors.h ├── Initialize.h ├── Makefile ├── OpticalFlow.h ├── README.md ├── Video.cpp ├── batch_process_dataset.py ├── make │ ├── dep.py │ └── generic.mk └── view_traj.py ├── for_RobotPush ├── DenseTrackStab.cpp ├── DenseTrackStab.h ├── Descriptors.h ├── Initialize.h ├── Makefile ├── OpticalFlow.h ├── README.md ├── Video.cpp ├── batch_process_dataset.py ├── make │ ├── dep.py │ └── generic.mk └── view_traj.py └── for_UCF101 ├── DenseTrackStab.cpp ├── DenseTrackStab.h ├── Descriptors.h ├── Initialize.h ├── Makefile ├── OpticalFlow.h ├── README.md ├── Video.cpp ├── batch_process_dataset.py ├── make ├── dep.py └── generic.mk ├── testlist01.txt ├── trainlist01.txt └── view_traj.py /Controllable_imgs/Controllable_compare.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_compare.gif -------------------------------------------------------------------------------- /Controllable_imgs/Controllable_kitti.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_kitti.gif -------------------------------------------------------------------------------- /Controllable_imgs/Controllable_rp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_rp.gif -------------------------------------------------------------------------------- /Controllable_imgs/Controllable_ucf_pushups.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_ucf_pushups.gif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 3 | 4 | 1. Pre-process datasets using the tools provided in **datasets** directory. 5 | 2. Generate trajectories for each dataset using codes in **offline_traj** directory. 6 | 3. Train & evaluate with the code in **model** directory. 7 | 8 | **Warning: The code is provided in its original form without any cleanup.** 9 | 10 | compare 11 | robotpush 12 | pushup 13 | kitty 14 | 15 | -------------------------------------------------------------------------------- /datasets/KITTI/parse_sequence.py: -------------------------------------------------------------------------------- 1 | #from os import listdir 2 | #from os.path import isfile, join 3 | import os 4 | import re 5 | import numpy as np 6 | 7 | 'dataset/sequences/[00 to 21]/image2/[000000 to n]' 8 | 9 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences' 10 | 11 | def get_num(x): 12 | return int(''.join(ele for ele in x if ele.isdigit())) 13 | 14 | frame_count_stor = [] 15 | for vid_id in range(21): 16 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), 'image_2') 17 | video_file_list = os.listdir(vid_path_prefix) 18 | frame_count = 0 19 | for filename in video_file_list: 20 | frame_count = max(get_num(filename),frame_count) 21 | print(frame_count) 22 | frame_count_stor.append(frame_count) 23 | 24 | # 16 / 5 split 25 | # test: 15 11 7 5 4 26 | frame_count_cumsum = np.cumsum(frame_count_stor) 27 | print(frame_count_cumsum) 28 | 29 | 30 | -------------------------------------------------------------------------------- /datasets/KITTI/png2jpg.py: -------------------------------------------------------------------------------- 1 | #from os import listdir 2 | #from os.path import isfile, join 3 | import os 4 | import re 5 | import numpy as np 6 | import cv2 7 | # OpenBLAS screws up with CPU affinity 8 | # Spawned process will inherit this 9 | os.sched_setaffinity(0,range(os.cpu_count())) 10 | 11 | 'dataset/sequences/[00 to 21]/image_2/[000000 to n]' 12 | 13 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences' 14 | 15 | out_path_prefix = '/media/haozekun/512SSD_2/KITTI_bmp/dataset/sequences' 16 | 17 | cam_names = ['image_2', 'image_3'] 18 | 19 | for vid_id in range(21): 20 | for cam_name in cam_names: 21 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), cam_name) 22 | out_path = os.path.join(out_path_prefix, '{:02d}'.format(vid_id), cam_name) 23 | os.makedirs(out_path) 24 | video_file_list = os.listdir(vid_path_prefix) 25 | print('{} - {}'.format(vid_id, cam_name)) 26 | for filename in video_file_list: 27 | png_full_path = os.path.join(vid_path_prefix,filename) 28 | out_full_path = os.path.join(out_path,filename.rsplit('.',1)[0]+'.bmp') 29 | frame = cv2.imread(png_full_path) 30 | frame2x = cv2.resize(frame, (845,256), interpolation=cv2.INTER_AREA) 31 | cv2.imwrite(out_full_path,frame2x) 32 | #cv2.imwrite(out_full_path,frame2x,[cv2.IMWRITE_JPEG_QUALITY, 100]) 33 | #cv2.imwrite(out_full_path,frame2x,[cv2.IMWRITE_WEBP_QUALITY, 100]) 34 | #print(out_full_path) 35 | 36 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | # Scripts for preprocessing datasets 2 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 3 | 4 | 1. **KITTI Odometry** 5 | data_odometry_color.zip 6 | http://www.cvlibs.net/datasets/kitti/eval_odometry.php 7 | Converting PNGs to other formats is recommended to reduce CPU load (script provided). 8 | 9 | 2. **Push Dataset** 10 | https://sites.google.com/site/brainrobotdata/home/push-dataset 11 | You may want to use the provided script to convert TFRecords to HDF5 format for easier use outside TF. 12 | 13 | 3. **UCF101 - Action Recognition Data Set** 14 | http://crcv.ucf.edu/data/UCF101.php 15 | It is recommended to convert videos to image sequences for better random-access performance. 16 | 17 | -------------------------------------------------------------------------------- /datasets/RobotPush/grab_train_images_to_hdf5.py: -------------------------------------------------------------------------------- 1 | """Code for converting TFRecords to HDF5""" 2 | 3 | import os 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | import h5py 8 | import re 9 | 10 | from tensorflow.python.platform import flags 11 | from tensorflow.python.platform import gfile 12 | 13 | 14 | FLAGS = flags.FLAGS 15 | 16 | # Original image dimensions 17 | ORIGINAL_WIDTH = 640 18 | ORIGINAL_HEIGHT = 512 19 | COLOR_CHAN = 3 20 | BATCH_SIZE = 25 21 | 22 | data_dir = 'push/push_train' 23 | #dest_dir = '/media/haozekun/512SSD_2/push_jpg' 24 | hdf5_path = '/media/haozekun/512SSD_2/robot_push_jpgs.h5' 25 | 26 | f = h5py.File(hdf5_path, 'w', libver='latest') # Supports Single-Write-Multiple-Read 27 | h5_push = f.require_group("push") 28 | h5_push_train = h5_push.require_group("push_train") 29 | 30 | 31 | 32 | def decode_proto(s_example, h5_push_train_vid): 33 | a = tf.train.Example() 34 | a.ParseFromString(s_example) # a: an example 35 | b = a.ListFields()[0][1].ListFields()[0][1] 36 | prog = re.compile('move/(\d+)/image/encoded') 37 | 38 | num_imgs = 0 39 | for key in b.keys(): 40 | m = prog.match(key) 41 | if m: 42 | img_id = int(m.group(1)) 43 | v = b[key] 44 | raw_data = v.ListFields()[0][1].ListFields()[0][1][0] 45 | 46 | h5_push_train_vid_jpg = h5_push_train_vid.require_dataset('{}.jpg'.format(img_id), shape=(len(raw_data),), dtype=np.uint8) 47 | h5_push_train_vid_jpg[:] = np.fromstring(raw_data, dtype=np.uint8) 48 | num_imgs = max(num_imgs, img_id) 49 | return num_imgs+1 50 | 51 | 52 | filenames = gfile.Glob(os.path.join(data_dir, '*')) 53 | if not filenames: 54 | raise RuntimeError('No data files found.') 55 | vid_count = 0 56 | for filename in filenames: 57 | for s_example in tf.python_io.tf_record_iterator(filename): 58 | h5_push_train_vid = h5_push_train.require_group(str(vid_count)) 59 | num_imgs = decode_proto(s_example, h5_push_train_vid) 60 | h5_push_train_vid.attrs['frame_count'] = num_imgs 61 | vid_count += 1 62 | print(vid_count) 63 | 64 | h5_push_train.attrs['video_count'] = vid_count 65 | 66 | f.flush() 67 | f.close() 68 | -------------------------------------------------------------------------------- /datasets/RobotPush/read_push_data.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import cv2 4 | 5 | data_dir = 'push/push_train' 6 | 7 | f = h5py.File('robot_push_jpgs.h5', 'r') 8 | for video_id in range(f['push/push_train'].attrs['video_count']): 9 | for img_id in range(f['push/push_train/{}'.format(video_id)].attrs['frame_count']): 10 | img = cv2.imdecode(f['push/push_train/{}/{}.jpg'.format(video_id, img_id)][()], -1) 11 | print(img.shape) 12 | cv2.imshow('image',img) 13 | cv2.waitKey(100) 14 | 15 | cv2.destroyAllWindows() 16 | -------------------------------------------------------------------------------- /datasets/UCF-101/videos_to_jpg_seq.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | import numpy as np 5 | import h5py 6 | import re 7 | 8 | import cv2 9 | 10 | from multiprocessing.dummy import Pool as ThreadPool 11 | pool = ThreadPool(8) 12 | 13 | # Load UCF101 dataset 14 | DATASET_DIR = '/data2/UCF-101' 15 | RAWFRAME_DIR = '/data1/UCF101seq' 16 | 17 | 18 | 19 | def worker(action_dir): 20 | print(action_dir) 21 | video_files = os.listdir(os.path.join(DATASET_DIR, action_dir)) 22 | action_out_dir = os.path.join(RAWFRAME_DIR, action_dir) 23 | os.mkdir(action_out_dir) 24 | for video_file in video_files: 25 | print(video_file) 26 | video_path = os.path.join(DATASET_DIR, action_dir, video_file) 27 | video_out_dir = os.path.join(RAWFRAME_DIR, action_dir, video_file) 28 | os.mkdir(video_out_dir) 29 | cap = cv2.VideoCapture(video_path) 30 | if not cap.isOpened(): 31 | print('Video open failed!!!') 32 | length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 33 | for frame_no in range(length): 34 | ret, frame = cap.read() # 320 by 240 35 | if not ret: 36 | print('Frame read error!') 37 | break 38 | frame_out_path = os.path.join(RAWFRAME_DIR, action_dir, video_file, str(frame_no)+'.jpg') 39 | #cv2.imwrite(frame_out_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 4]) 40 | cv2.imwrite(frame_out_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 95]) 41 | cap.release() 42 | 43 | 44 | action_dir_list = os.listdir(DATASET_DIR) 45 | #for action_dir in action_dir_list: 46 | 47 | 48 | pool.map(worker, action_dir_list) 49 | -------------------------------------------------------------------------------- /model/README.md: -------------------------------------------------------------------------------- 1 | # Main code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 2 | 3 | 1. Setup dataset paths with `./dataset/utils/set_dataset_path.py`. **READ THE FILE FOR MORE HINTS** 4 | 2. Run `train_[dataset_name].py` to train the video generation model. 5 | -- By default, the model takes 1-5 trajectories as input, which is suitable for human evaluation. 6 | -- You should increase the number of input trajectories to 10 for quantitative quality (PSNR, SSIM) evaluation. Too few trajectories brings too much ambiguity. 7 | 3. Run `aeeval_[dataset_name].py` to evaluate the model on testsets using PSNR and SSIM metrics. 8 | -- Note that our model is not designed for video prediction. Results are for reference only. 9 | -- Our work aims at generating video clips in a user-controllable manner. 10 | 4. For examples on how to build GUI for user evaluation, refer to a simplified example `guieval_rp.py`. 11 | -- Edit `./reader/*.py` to match dataset paths 12 | -- First click defines the start point of a motion vector 13 | -- Second click defines end point of the motion vector 14 | -- Next click add a new vector 15 | -- Left click outside canvas to clear all the vectors 16 | -- Press right mouse button to go to the next image 17 | 18 | - Requires PyTorch3 for train/test and visdom for monitoring. 19 | 20 | **Warning: The code is provided in its original form without any cleanup. Read each program before running. Most files are self-explainable.** 21 | -------------------------------------------------------------------------------- /model/aeeval_kitti.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_kitti_reimpl import KITTIReader_traj 12 | from models.vgg_warper_weak_shortcut_nobn import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper_pad_2x import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | from skimage.measure import compare_ssim as ssim 20 | from skimage.measure import compare_psnr as psnr 21 | from skimage.measure import compare_mse as mse 22 | 23 | args = {} 24 | args['gpus'] = [0] 25 | args['seed'] = 12345 26 | torch.backends.cudnn.benchmark = True 27 | 28 | 29 | # Initialize Pytorch Dataloader 30 | datareader = KITTIReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True) 31 | train_loader = torch.utils.data.DataLoader( 32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True, drop_last = True) 33 | 34 | class MModel(nn.Module): 35 | def __init__(self): 36 | super(MModel, self).__init__() 37 | self.warp_cnn = VGG_Warper(9) 38 | self.flow_warper = FlowWarp() 39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 40 | self.hardshrinkloss = HardshinkLoss(0., 1.) 41 | 42 | def forward(self, img_input, warp_input, img_gt): 43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 44 | warp_imgs = self.flow_warper(img_input, warp_flow, padl=83) 45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 46 | masks = F.sigmoid(masks) 47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 48 | 49 | return recon_img, warp_flow, comp_imgs, masks 50 | 51 | mmodel = MModel() 52 | mmodel.cuda() 53 | mmodel = nn.DataParallel(mmodel, device_ids=[0]) 54 | 55 | visual = VisdomShow('kitti_eval_10') 56 | 57 | def test(): 58 | print('\n\n=========================== Testing ============================') 59 | mmodel.eval() 60 | mse_stor = [] 61 | ssim_stor = [] 62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask, img_input_2x) in enumerate(train_loader): 63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0]) 64 | img_input_2x = Variable(img_input_2x).cuda(args['gpus'][0]) 65 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0]) 66 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0]) 67 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0]) 68 | 69 | 70 | # warp_input : [interval-1, 9, H, W] 71 | # print(warp_input.shape) # ([1, 9, 9, 192, 256]) 72 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input_2x, warp_input, img_gt) 73 | recon_img *= vid_mask 74 | img_gt *= vid_mask 75 | 76 | gen_seq = recon_img.data.cpu().numpy() 77 | gt_seq = img_gt.data.cpu().numpy() 78 | mses = np.zeros(gen_seq.shape[0]) 79 | ssims = np.zeros(gen_seq.shape[0]) 80 | for i in range(gen_seq.shape[0]): 81 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0]) 82 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0]) 83 | mses[i] = mse(gen,gt) 84 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True) 85 | 86 | mse_stor.append(mses.reshape([-1,9])) 87 | ssim_stor.append(ssims.reshape([-1,9])) 88 | 89 | 90 | if batch_idx%1 == 0: 91 | pbar(batch_idx, len(train_loader), 0) 92 | 93 | if batch_idx%10 == 0: 94 | mse_a = np.concatenate(mse_stor, axis=0) 95 | ssim_a = np.concatenate(ssim_stor, axis=0) 96 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 97 | ssim_all = np.mean(ssim_a, axis=0) 98 | 99 | print('PSNR') 100 | print(psnr_all) 101 | print('SSIM') 102 | print(ssim_all) 103 | 104 | if batch_idx%10 == 0: 105 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy() 106 | for i in range(out_seq.shape[0]): 107 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1)) 108 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy() 109 | for i in range(out_gt.shape[0]): 110 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0)) 111 | 112 | out_seq = np.concatenate((out_seq,out_gt), axis=3) 113 | visual.show_vid(out_seq) 114 | 115 | mse_a = np.concatenate(mse_stor, axis=0) 116 | ssim_a = np.concatenate(ssim_stor, axis=0) 117 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 118 | ssim_all = np.mean(ssim_a, axis=0) 119 | print('\nPSNR SSIM') 120 | for i in range(psnr_all.size): 121 | print('{} {}'.format(psnr_all[i], ssim_all[i])) 122 | 123 | def restore(ckpt_file): 124 | ckpt = torch.load(ckpt_file) 125 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 126 | #optimizer.load_state_dict(ckpt['optimizer']) 127 | #hist = ckpt['hist'] 128 | print('Restored from {}'.format(ckpt_file)) 129 | 130 | restore('./snapshots/kitti/ckpt_e0_b0_rev2.pth') 131 | test() 132 | 133 | 134 | -------------------------------------------------------------------------------- /model/aeeval_rp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_rp_reimpl import RPReader_traj 12 | from models.vgg_warper_weak_shortcut import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | from skimage.measure import compare_ssim as ssim 20 | from skimage.measure import compare_psnr as psnr 21 | from skimage.measure import compare_mse as mse 22 | 23 | args = {} 24 | args['gpus'] = [0] 25 | args['seed'] = 12345 26 | torch.backends.cudnn.benchmark = True 27 | 28 | 29 | # Initialize Pytorch Dataloader 30 | datareader = RPReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True) 31 | train_loader = torch.utils.data.DataLoader( 32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True, drop_last = True) 33 | 34 | class MModel(nn.Module): 35 | def __init__(self): 36 | super(MModel, self).__init__() 37 | self.warp_cnn = VGG_Warper(9) 38 | self.flow_warper = FlowWarp() 39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 40 | self.hardshrinkloss = HardshinkLoss(0., 1.) 41 | 42 | def forward(self, img_input, warp_input, img_gt): 43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 44 | warp_imgs = self.flow_warper(img_input, warp_flow) 45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 46 | masks = F.sigmoid(masks) 47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 48 | 49 | return recon_img, warp_flow, comp_imgs, masks 50 | 51 | mmodel = MModel() 52 | mmodel.cuda() 53 | mmodel = nn.DataParallel(mmodel, device_ids=[0]) 54 | 55 | visual = VisdomShow('rp_eval_10') 56 | 57 | def test(): 58 | print('\n\n=========================== Testing ============================') 59 | mmodel.eval() 60 | mse_stor = [] 61 | ssim_stor = [] 62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader): 63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0]) 64 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0]) 65 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0]) 66 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0]) 67 | 68 | 69 | # warp_input : [interval-1, 9, H, W] 70 | # print(warp_input.shape) # ([1, 9, 9, 192, 256]) 71 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt) 72 | recon_img *= vid_mask 73 | img_gt *= vid_mask 74 | 75 | gen_seq = recon_img.data.cpu().numpy() 76 | gt_seq = img_gt.data.cpu().numpy() 77 | mses = np.zeros(gen_seq.shape[0]) 78 | ssims = np.zeros(gen_seq.shape[0]) 79 | for i in range(gen_seq.shape[0]): 80 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0]) 81 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0]) 82 | mses[i] = mse(gen,gt) 83 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True) 84 | 85 | mse_stor.append(mses.reshape([-1,9])) 86 | ssim_stor.append(ssims.reshape([-1,9])) 87 | 88 | 89 | if batch_idx%1 == 0: 90 | pbar(batch_idx, len(train_loader), 0) 91 | 92 | if batch_idx%10 == 0: 93 | mse_a = np.concatenate(mse_stor, axis=0) 94 | ssim_a = np.concatenate(ssim_stor, axis=0) 95 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 96 | ssim_all = np.mean(ssim_a, axis=0) 97 | 98 | print('PSNR') 99 | print(psnr_all) 100 | print('SSIM') 101 | print(ssim_all) 102 | 103 | if batch_idx%10 == 0: 104 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy() 105 | for i in range(out_seq.shape[0]): 106 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1)) 107 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy() 108 | for i in range(out_gt.shape[0]): 109 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0)) 110 | 111 | out_seq = np.concatenate((out_seq,out_gt), axis=3) 112 | visual.show_vid(out_seq) 113 | 114 | mse_a = np.concatenate(mse_stor, axis=0) 115 | ssim_a = np.concatenate(ssim_stor, axis=0) 116 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 117 | ssim_all = np.mean(ssim_a, axis=0) 118 | print('\nPSNR SSIM') 119 | for i in range(psnr_all.size): 120 | print('{} {}'.format(psnr_all[i], ssim_all[i])) 121 | 122 | def restore(ckpt_file): 123 | ckpt = torch.load(ckpt_file) 124 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 125 | #optimizer.load_state_dict(ckpt['optimizer']) 126 | #hist = ckpt['hist'] 127 | print('Restored from {}'.format(ckpt_file)) 128 | 129 | 130 | restore('./snapshots/rp/ckpt_e0_b198000.pth') 131 | test() 132 | 133 | 134 | -------------------------------------------------------------------------------- /model/aeeval_ucf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_ucf_reimpl import UCFReader_traj 12 | from models.vgg_warper_weak_shortcut import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | from skimage.measure import compare_ssim as ssim 20 | from skimage.measure import compare_psnr as psnr 21 | from skimage.measure import compare_mse as mse 22 | 23 | args = {} 24 | args['gpus'] = [0] 25 | args['seed'] = 12345 26 | torch.backends.cudnn.benchmark = True 27 | 28 | 29 | # Initialize Pytorch Dataloader 30 | datareader = UCFReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True) 31 | train_loader = torch.utils.data.DataLoader( 32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn_eval, worker_init_fn=datareader.worker_init_fn, num_workers=6, pin_memory=True, drop_last = True) 33 | 34 | class MModel(nn.Module): 35 | def __init__(self): 36 | super(MModel, self).__init__() 37 | self.warp_cnn = VGG_Warper(9) 38 | self.flow_warper = FlowWarp() 39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 40 | self.hardshrinkloss = HardshinkLoss(0., 1.) 41 | 42 | def forward(self, img_input, warp_input, img_gt): 43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 44 | warp_imgs = self.flow_warper(img_input, warp_flow) 45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 46 | masks = F.sigmoid(masks) 47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 48 | 49 | return recon_img, warp_flow, comp_imgs, masks 50 | 51 | mmodel = MModel() 52 | mmodel.cuda() 53 | mmodel = nn.DataParallel(mmodel, device_ids=[0]) 54 | 55 | visual = VisdomShow('ucf_eval_10') 56 | 57 | def test(): 58 | print('\n\n=========================== Testing ============================') 59 | mmodel.eval() 60 | mse_stor = [] 61 | ssim_stor = [] 62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader): 63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0]) 64 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0]) 65 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0]) 66 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0]) 67 | 68 | 69 | # warp_input : [interval-1, 9, H, W] 70 | # print(warp_input.shape) # ([1, 9, 9, 192, 256]) 71 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt) 72 | recon_img *= vid_mask 73 | img_gt *= vid_mask 74 | 75 | gen_seq = recon_img.data.cpu().numpy() 76 | gt_seq = img_gt.data.cpu().numpy() 77 | mses = np.zeros(gen_seq.shape[0]) 78 | ssims = np.zeros(gen_seq.shape[0]) 79 | for i in range(gen_seq.shape[0]): 80 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0]) 81 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0]) 82 | mses[i] = mse(gen,gt) 83 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True) 84 | 85 | mse_stor.append(mses.reshape([-1,9])) 86 | ssim_stor.append(ssims.reshape([-1,9])) 87 | 88 | 89 | if batch_idx%1 == 0: 90 | pbar(batch_idx, len(train_loader), 0) 91 | 92 | if batch_idx%10 == 0: 93 | mse_a = np.concatenate(mse_stor, axis=0) 94 | ssim_a = np.concatenate(ssim_stor, axis=0) 95 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 96 | ssim_all = np.mean(ssim_a, axis=0) 97 | 98 | print('PSNR') 99 | print(psnr_all) 100 | print('SSIM') 101 | print(ssim_all) 102 | 103 | if batch_idx%10 == 0: 104 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy() 105 | for i in range(out_seq.shape[0]): 106 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1)) 107 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy() 108 | for i in range(out_gt.shape[0]): 109 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0)) 110 | 111 | out_seq = np.concatenate((out_seq,out_gt), axis=3) 112 | visual.show_vid(out_seq) 113 | 114 | mse_a = np.concatenate(mse_stor, axis=0) 115 | ssim_a = np.concatenate(ssim_stor, axis=0) 116 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10) 117 | ssim_all = np.mean(ssim_a, axis=0) 118 | print('\nPSNR SSIM') 119 | for i in range(psnr_all.size): 120 | print('{} {}'.format(psnr_all[i], ssim_all[i])) 121 | 122 | def restore(ckpt_file): 123 | ckpt = torch.load(ckpt_file) 124 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 125 | #optimizer.load_state_dict(ckpt['optimizer']) 126 | #hist = ckpt['hist'] 127 | print('Restored from {}'.format(ckpt_file)) 128 | 129 | restore('./snapshots/ucf/ckpt_e0_b52000_cont.pth') 130 | test() 131 | 132 | 133 | -------------------------------------------------------------------------------- /model/dataset/commons.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | def trajs2featmap(trajs, kpmap_seq): 5 | for traj_no in range(trajs.shape[0]): 6 | #cv2.circle(frame, tuple(trajs[traj_no, frame_no, :]), 2, (0.,1.,0.)) 7 | kp_start_x = trajs[traj_no,0,0] 8 | kp_start_y = trajs[traj_no,0,1] 9 | kp_end_x = trajs[traj_no,1,0] 10 | kp_end_y = trajs[traj_no,1,1] 11 | 12 | kp_start_x_int = int(max(min(kp_start_x, kpmap_seq.shape[2]),0)) 13 | kp_start_y_int = int(max(min(kp_start_y, kpmap_seq.shape[1]),0)) 14 | kp_dx = kp_end_x - kp_start_x 15 | kp_dy = kp_end_y - kp_start_y 16 | kpmap_seq[0,kp_start_y_int,kp_start_x_int] = 1.0 17 | kpmap_seq[1,kp_start_y_int,kp_start_x_int] = kp_dy 18 | kpmap_seq[2,kp_start_y_int,kp_start_x_int] = kp_dx 19 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 20 | 21 | kp_end_x_int = int(max(min(kp_end_x, kpmap_seq.shape[2]),0)) 22 | kp_end_y_int = int(max(min(kp_end_y, kpmap_seq.shape[1]),0)) 23 | kp_dx2 = kp_start_x - kp_end_x 24 | kp_dy2 = kp_start_y - kp_end_y 25 | kpmap_seq[3,kp_end_y_int,kp_end_x_int] = 1.0 26 | kpmap_seq[4,kp_end_y_int,kp_end_x_int] = kp_dy2 27 | kpmap_seq[5,kp_end_y_int,kp_end_x_int] = kp_dx2 28 | return kpmap_seq 29 | 30 | 31 | def drawtrajs(trajs, frame_no, img): 32 | for traj_no in range(trajs.shape[0]): 33 | cv2.circle(img, tuple(trajs[traj_no, frame_no, :]), 2, (0.,1.,0.)) 34 | return img 35 | -------------------------------------------------------------------------------- /model/dataset/utils/set_dataset_path.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | dataset_path = {} 4 | dataset_path['robot_push_jpgs_h5_train'] = '/datasets/robot_push_h5/robot_push_jpgs.h5' 5 | dataset_path['robot_push_jpgs_h5_test'] = '/datasets/robot_push_h5/robot_push_testnovel_jpgs.h5' 6 | dataset_path['robot_push_traj_h5_train'] = '/trajectories/rp/traj_stor_train.h5' 7 | dataset_path['robot_push_traj_h5_test'] = '/trajectories/rp/traj_stor_test.h5' 8 | dataset_path['ucf101_jpgs'] = '/datasets/UCF101_seq/UCF-101' 9 | dataset_path['ucf101_traj_h5_train'] = '/trajectories/ucf/traj_stor_train.h5' 10 | dataset_path['ucf101_traj_h5_test'] = '/trajectories/ucf/traj_stor_test.h5' 11 | dataset_path['kitti_traj_h5_train'] = '/trajectories/kitti/traj_stor_train.h5' 12 | dataset_path['kitti_traj_h5_test'] = '/trajectories/kitti/traj_stor_test_dense.h5' 13 | dataset_path['kitti_png'] = '/datasets/KITTI/dataset/sequences' 14 | dataset_path['kitti_bmp'] = '/datasets/KITTI_bmp/dataset/sequences' 15 | 16 | with open('../dataset_path.json', 'w') as f: 17 | json.dump(dataset_path, f) 18 | 19 | with open('../dataset_path.json', 'r') as f: 20 | data = json.load(f) 21 | 22 | print(data) 23 | -------------------------------------------------------------------------------- /model/guieval_rp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import torch.utils.data 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | from torchvision import datasets, transforms 8 | import torch.nn.functional as F 9 | 10 | import numpy as np 11 | from reader.rp_reader import RPReader 12 | from models.vgg_warper_weak_shortcut import VGG_Warper 13 | from ops.flow_warper import FlowWarp 14 | import matplotlib.pyplot as plt 15 | 16 | import time 17 | import itertools 18 | import math 19 | 20 | from scipy import misc 21 | 22 | from utils.trajs2map import trajs2map 23 | from utils.visual import colorcode 24 | 25 | # Setup parameters 26 | parser = argparse.ArgumentParser(description='Nothing') 27 | parser.add_argument('--batch-size', type=int, default=32, metavar='N', 28 | help='input batch size for training (default: 64)') 29 | parser.add_argument('--epochs', type=int, default=50000, metavar='N', 30 | help='number of epochs to train (default: 2)') 31 | parser.add_argument('--no-cuda', action='store_true', default=False, 32 | help='enables CUDA training') 33 | parser.add_argument('--seed', type=int, default=1, metavar='S', 34 | help='random seed (default: 1)') 35 | parser.add_argument('--log-interval', type=int, default=100, metavar='N', 36 | help='how many batches to wait before logging training status') 37 | 38 | args = parser.parse_args() 39 | args.cuda = not args.no_cuda and torch.cuda.is_available() 40 | 41 | args.num_frames = 5 42 | 43 | args.gpus = [0] 44 | #torch.backends.cudnn.benchmark = True 45 | 46 | torch.manual_seed(args.seed) 47 | if args.cuda: 48 | torch.cuda.manual_seed(args.seed) 49 | 50 | reader = RPReader(num_frames=20) 51 | 52 | class MModel(nn.Module): 53 | def __init__(self): 54 | super(MModel, self).__init__() 55 | self.warp_cnn = VGG_Warper(9) 56 | self.flow_warper = FlowWarp() 57 | 58 | def forward(self, img_input, warp_input, img_gt): 59 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 60 | warp_imgs = self.flow_warper(img_input, warp_flow) 61 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 62 | masks = F.sigmoid(masks) 63 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 64 | 65 | return recon_img, warp_flow, comp_imgs, masks, warp_imgs 66 | 67 | 68 | mmodel = MModel() 69 | mmodel.cuda(args.gpus[0]) 70 | 71 | def get_test_batch(): 72 | vid_seq, kpmap_seq, traj_list = reader[-1] 73 | vid_seq = torch.from_numpy(vid_seq).unsqueeze(0) 74 | kpmap_seq = torch.from_numpy(kpmap_seq).unsqueeze(0) 75 | 76 | vid_seq = Variable(vid_seq, volatile=True) 77 | kpmap_seq = Variable(kpmap_seq, volatile=True) 78 | vid_seq = vid_seq.cuda(args.gpus[0]) 79 | kpmap_seq = kpmap_seq.cuda(args.gpus[0]) 80 | return vid_seq, kpmap_seq, traj_list 81 | # traj_list: Num, Len, x/y 82 | 83 | 84 | # First click defines start point 85 | # second click defines end point 86 | # Click outside canvas to clear trajectories 87 | # Press right mouse button to go to next image 88 | def onclick(event): 89 | global sp, ep 90 | global clr, gonext 91 | global ix, iy 92 | if event.button == 3: 93 | gonext = True 94 | return 95 | ix, iy = event.xdata, event.ydata 96 | if ix is None: 97 | clr = True 98 | return 99 | print('x = %d, y = %d'%(ix, iy)) 100 | 101 | if sp is not None: 102 | if ep is not None: 103 | sp = (ix, iy) 104 | ep = None 105 | else: 106 | ep = (ix, iy) 107 | else: 108 | sp = (ix, iy) 109 | 110 | 111 | #if len(coords) == 2: 112 | # fig.canvas.mpl_disconnect(cid) 113 | 114 | #return coords 115 | 116 | 117 | def img_chooser(): 118 | global sp, ep, clr, gonext 119 | sp = None 120 | ep = None 121 | clr = False 122 | gonext = False 123 | 124 | 125 | fig = plt.figure(1) 126 | ax = fig.add_subplot(231) 127 | ax.set_title('click to build line segments') 128 | ax2 = fig.add_subplot(232) 129 | ax3 = fig.add_subplot(233) 130 | ax4 = fig.add_subplot(234) 131 | ax5 = fig.add_subplot(235) 132 | ax6 = fig.add_subplot(236) 133 | cid = fig.canvas.mpl_connect('button_press_event', onclick) 134 | 135 | mmodel.eval() 136 | counter=0 137 | while True: 138 | #plt.clf() 139 | vid_seq, kpmap_seq, traj_list = get_test_batch() 140 | fram_stor = [] 141 | img_input = vid_seq[:,0,:,:,:] 142 | trajs = [] 143 | while True: 144 | if gonext: 145 | gonext = False 146 | sp = None 147 | ep = None 148 | trajs = [] 149 | break 150 | if sp is not None and ep is not None: 151 | print('Move!') 152 | trajs.append((sp,ep)) 153 | if clr: 154 | print('Clr!') 155 | clr = False 156 | sp = None 157 | ep = None 158 | trajs = [] 159 | kpmap_seq = trajs2map( trajs, img_input.size(2), img_input.size(3)) 160 | warp_input = torch.cat((img_input, kpmap_seq), dim=1) 161 | recon_img, warp_flow, comp, alpha, warp_img = mmodel(img_input, warp_input, None) 162 | 163 | #img_gt = vid_seq[:,ff,:,:,:] 164 | #fram_stor.append(recon_img) 165 | 166 | fram = np.transpose(recon_img[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0]) 167 | framin = np.transpose(img_input[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0]) 168 | warpimga = np.transpose(warp_img[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0]) 169 | #misc.imsave('./FirstImage/{}.png'.format(counter), fram) 170 | counter += 1 171 | ax.clear() 172 | ax2.clear() 173 | ax3.clear() 174 | ax4.clear() 175 | ax5.clear() 176 | ax.imshow(framin) 177 | #ax6.imshow(framin) 178 | ax6.imshow(warpimga) 179 | ax5.imshow(fram) 180 | 181 | max_flow = torch.sqrt(torch.max(warp_flow[0,0,:,:]**2 + warp_flow[0,1,:,:]**2)).data.cpu().numpy() 182 | warp_flow_c = np.clip(colorcode(warp_flow.data.cpu().numpy()[0,0,:,:]/max_flow, warp_flow.data.cpu().numpy()[0,1,:,:]/max_flow),0,1) 183 | ax2.imshow(np.transpose(warp_flow_c,[1,2,0])) 184 | ax3.imshow(np.transpose(comp[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0])) 185 | ax4.imshow(alpha[0,0,:,:].data.cpu().numpy()+0.5, cmap=plt.get_cmap('Greys')) 186 | 187 | 188 | for arr in trajs: 189 | ax.arrow( arr[0][0], arr[0][1], arr[1][0]-arr[0][0], arr[1][1]-arr[0][1], fc="g", ec="g",head_width=5, head_length=5 ) 190 | fig.canvas.draw() 191 | fig.savefig('user_out/{}.png'.format(counter), bbox_inches='tight', pad_inches=0) 192 | 193 | plt.waitforbuttonpress() 194 | 195 | 196 | ckpt = torch.load('./ckpt_RP.pth') 197 | mmodel.load_state_dict(ckpt['mmodel_state_dict']) 198 | img_chooser() 199 | 200 | -------------------------------------------------------------------------------- /model/models/vgg_warper_weak_shortcut.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | from torch.autograd import Variable 6 | 7 | 8 | class VGG_enc(nn.Module): 9 | def __init__(self, input_channels=6): 10 | super(VGG_enc, self).__init__() 11 | in_channels = input_channels 12 | self.c11 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) 13 | self.bn11 = nn.BatchNorm2d(64) 14 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 15 | self.bn12 = nn.BatchNorm2d(64) 16 | self.p1 = nn.MaxPool2d(kernel_size=2, stride=2) 17 | 18 | self.c21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 19 | self.bn21 = nn.BatchNorm2d(128) 20 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 21 | self.bn22 = nn.BatchNorm2d(128) 22 | self.p2 = nn.MaxPool2d(kernel_size=2, stride=2) 23 | 24 | self.c31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) 25 | self.bn31 = nn.BatchNorm2d(256) 26 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 27 | self.bn32 = nn.BatchNorm2d(256) 28 | self.c33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 29 | self.bn33 = nn.BatchNorm2d(256) 30 | self.p3 = nn.MaxPool2d(kernel_size=2, stride=2) 31 | 32 | self.c41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) 33 | self.bn41 = nn.BatchNorm2d(512) 34 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 35 | self.bn42 = nn.BatchNorm2d(512) 36 | self.c43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 37 | self.bn43 = nn.BatchNorm2d(512) 38 | self.p4 = nn.MaxPool2d(kernel_size=2, stride=2) 39 | 40 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 41 | self.bn51 = nn.BatchNorm2d(512) 42 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 43 | self.bn52 = nn.BatchNorm2d(512) 44 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 45 | self.bn53 = nn.BatchNorm2d(512) 46 | 47 | def forward(self, x): 48 | o11 = F.relu(self.bn11(self.c11(x)), inplace=True) 49 | o12 = F.relu(self.bn12(self.c12(o11)), inplace=True) 50 | o1p = self.p1(o12) 51 | o21 = F.relu(self.bn21(self.c21(o1p)), inplace=True) 52 | o22 = F.relu(self.bn22(self.c22(o21)), inplace=True) 53 | o2p = self.p2(o22) 54 | o31 = F.relu(self.bn31(self.c31(o2p)), inplace=True) 55 | o32 = F.relu(self.bn32(self.c32(o31)), inplace=True) 56 | o33 = F.relu(self.bn33(self.c33(o32)), inplace=True) 57 | o3p = self.p3(o33) 58 | o41 = F.relu(self.bn41(self.c41(o3p)), inplace=True) 59 | o42 = F.relu(self.bn42(self.c42(o41)), inplace=True) 60 | o43 = F.relu(self.bn43(self.c43(o42)), inplace=True) 61 | o4p = self.p4(o43) 62 | o51 = F.relu(self.bn51(self.c51(o4p)), inplace=True) 63 | o52 = F.relu(self.bn52(self.c52(o51)), inplace=True) 64 | o53 = F.relu(self.bn53(self.c53(o52)), inplace=True) 65 | return o53, o43, o33 66 | 67 | class VGG_dec(nn.Module): 68 | def __init__(self): 69 | super(VGG_dec, self).__init__() 70 | out_channels = 6 71 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 72 | self.bn53 = nn.BatchNorm2d(512) 73 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 74 | self.bn52 = nn.BatchNorm2d(512) 75 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 76 | self.bn51 = nn.BatchNorm2d(512) 77 | self.u5 = nn.Upsample(scale_factor=2, mode='nearest') 78 | 79 | self.c43 = nn.Conv2d(1024, 512, kernel_size=3, padding=1) 80 | self.bn43 = nn.BatchNorm2d(512) 81 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 82 | self.bn42 = nn.BatchNorm2d(512) 83 | self.c41 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 84 | self.bn41 = nn.BatchNorm2d(256) 85 | self.u4 = nn.Upsample(scale_factor=2, mode='nearest') 86 | 87 | self.c33 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 88 | self.bn33 = nn.BatchNorm2d(256) 89 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 90 | self.bn32 = nn.BatchNorm2d(256) 91 | self.c31 = nn.Conv2d(256, 128, kernel_size=3, padding=1) 92 | self.bn31 = nn.BatchNorm2d(128) 93 | self.u3 = nn.Upsample(scale_factor=2, mode='nearest') 94 | 95 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 96 | self.bn22 = nn.BatchNorm2d(128) 97 | self.c21 = nn.Conv2d(128, 64, kernel_size=3, padding=1) 98 | self.bn21 = nn.BatchNorm2d(64) 99 | self.u2 = nn.Upsample(scale_factor=2, mode='nearest') 100 | 101 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 102 | self.bn12 = nn.BatchNorm2d(64) 103 | #self.c11 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 104 | #self.bn11 = nn.BatchNorm2d(64) 105 | 106 | 107 | def forward(self, i53, i43, i33): 108 | o53 = F.relu(self.bn53(self.c53(i53)), inplace=True) 109 | o52 = F.relu(self.bn52(self.c52(o53)), inplace=True) 110 | o51 = F.relu(self.bn51(self.c51(o52)), inplace=True) 111 | o5u = self.u5(o51) 112 | o5c = torch.cat((o5u, i43), 1) 113 | 114 | o43 = F.relu(self.bn43(self.c43(o5c)), inplace=True) 115 | o42 = F.relu(self.bn42(self.c42(o43)), inplace=True) 116 | o41 = F.relu(self.bn41(self.c41(o42)), inplace=True) 117 | o4u = self.u4(o41) 118 | o4c = torch.cat((o4u, i33), 1) 119 | 120 | o33 = F.relu(self.bn33(self.c33(o4c)), inplace=True) 121 | o32 = F.relu(self.bn32(self.c32(o33)), inplace=True) 122 | o31 = F.relu(self.bn31(self.c31(o32)), inplace=True) 123 | o3u = self.u3(o31) 124 | 125 | o22 = F.relu(self.bn22(self.c22(o3u)), inplace=True) 126 | o21 = F.relu(self.bn21(self.c21(o22)), inplace=True) 127 | o2u = self.u2(o21) 128 | 129 | o12 = F.relu(self.bn12(self.c12(o2u)), inplace=True) 130 | #o11 = F.relu(self.bn11(self.c11(o12)), inplace=True) 131 | 132 | return o12 133 | 134 | class VGG_net(nn.Module): 135 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512] 136 | def __init__(self, input_channels): 137 | super(VGG_net, self).__init__() 138 | self.enc_net = VGG_enc(input_channels) 139 | self.dec_net = VGG_dec() 140 | self.conv_warp = nn.Conv2d(self.cfg[0], 2, kernel_size=3, padding=1) 141 | self.conv_mask = nn.Conv2d(self.cfg[0], 1, kernel_size=3, padding=1) 142 | self.conv_comp = nn.Conv2d(self.cfg[0], 3, kernel_size=3, padding=1) 143 | self._initialize_weights() 144 | 145 | def _initialize_weights(self): 146 | for m in self.modules(): 147 | if isinstance(m, nn.Conv2d): 148 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 149 | m.weight.data.normal_(0, math.sqrt(2. / n)) 150 | if m.bias is not None: 151 | m.bias.data.zero_() 152 | elif isinstance(m, nn.BatchNorm2d): 153 | m.weight.data.fill_(1) 154 | m.bias.data.zero_() 155 | elif isinstance(m, nn.Linear): 156 | m.weight.data.normal_(0, 0.01) 157 | m.bias.data.zero_() 158 | 159 | # input: Nx3x3x256x320 160 | def forward(self, x): 161 | dec_feat = self.dec_net(*self.enc_net(x)) 162 | flow = self.conv_warp(dec_feat) 163 | mask = self.conv_mask(dec_feat) 164 | comp = self.conv_comp(dec_feat) 165 | return flow, mask, comp 166 | 167 | 168 | 169 | def VGG_Warper(input_channels = 6): 170 | return VGG_net(input_channels) 171 | -------------------------------------------------------------------------------- /model/models/vgg_warper_weak_shortcut_nobn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | from torch.autograd import Variable 6 | 7 | 8 | class VGG_enc(nn.Module): 9 | def __init__(self, input_channels=6): 10 | super(VGG_enc, self).__init__() 11 | in_channels = input_channels 12 | self.c11 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) 13 | self.bn11 = nn.BatchNorm2d(64) 14 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 15 | self.bn12 = nn.BatchNorm2d(64) 16 | self.p1 = nn.MaxPool2d(kernel_size=2, stride=2) 17 | 18 | self.c21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 19 | self.bn21 = nn.BatchNorm2d(128) 20 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 21 | self.bn22 = nn.BatchNorm2d(128) 22 | self.p2 = nn.MaxPool2d(kernel_size=2, stride=2) 23 | 24 | self.c31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) 25 | self.bn31 = nn.BatchNorm2d(256) 26 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 27 | self.bn32 = nn.BatchNorm2d(256) 28 | self.c33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 29 | self.bn33 = nn.BatchNorm2d(256) 30 | self.p3 = nn.MaxPool2d(kernel_size=2, stride=2) 31 | 32 | self.c41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) 33 | self.bn41 = nn.BatchNorm2d(512) 34 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 35 | self.bn42 = nn.BatchNorm2d(512) 36 | self.c43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 37 | self.bn43 = nn.BatchNorm2d(512) 38 | self.p4 = nn.MaxPool2d(kernel_size=2, stride=2) 39 | 40 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 41 | self.bn51 = nn.BatchNorm2d(512) 42 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 43 | self.bn52 = nn.BatchNorm2d(512) 44 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 45 | self.bn53 = nn.BatchNorm2d(512) 46 | 47 | def forward(self, x): 48 | o11 = F.relu(self.c11(x), inplace=True) 49 | o12 = F.relu(self.c12(o11), inplace=True) 50 | o1p = self.p1(o12) 51 | o21 = F.relu(self.c21(o1p), inplace=True) 52 | o22 = F.relu(self.c22(o21), inplace=True) 53 | o2p = self.p2(o22) 54 | o31 = F.relu(self.c31(o2p), inplace=True) 55 | o32 = F.relu(self.c32(o31), inplace=True) 56 | o33 = F.relu(self.c33(o32), inplace=True) 57 | o3p = self.p3(o33) 58 | o41 = F.relu(self.c41(o3p), inplace=True) 59 | o42 = F.relu(self.c42(o41), inplace=True) 60 | o43 = F.relu(self.c43(o42), inplace=True) 61 | o4p = self.p4(o43) 62 | o51 = F.relu(self.c51(o4p), inplace=True) 63 | o52 = F.relu(self.c52(o51), inplace=True) 64 | o53 = F.relu(self.c53(o52), inplace=True) 65 | return o53, o43, o33 66 | 67 | class VGG_dec(nn.Module): 68 | def __init__(self): 69 | super(VGG_dec, self).__init__() 70 | out_channels = 6 71 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 72 | self.bn53 = nn.BatchNorm2d(512) 73 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 74 | self.bn52 = nn.BatchNorm2d(512) 75 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 76 | self.bn51 = nn.BatchNorm2d(512) 77 | self.u5 = nn.Upsample(scale_factor=2, mode='nearest') 78 | 79 | self.c43 = nn.Conv2d(1024, 512, kernel_size=3, padding=1) 80 | self.bn43 = nn.BatchNorm2d(512) 81 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 82 | self.bn42 = nn.BatchNorm2d(512) 83 | self.c41 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 84 | self.bn41 = nn.BatchNorm2d(256) 85 | self.u4 = nn.Upsample(scale_factor=2, mode='nearest') 86 | 87 | self.c33 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 88 | self.bn33 = nn.BatchNorm2d(256) 89 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 90 | self.bn32 = nn.BatchNorm2d(256) 91 | self.c31 = nn.Conv2d(256, 128, kernel_size=3, padding=1) 92 | self.bn31 = nn.BatchNorm2d(128) 93 | self.u3 = nn.Upsample(scale_factor=2, mode='nearest') 94 | 95 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 96 | self.bn22 = nn.BatchNorm2d(128) 97 | self.c21 = nn.Conv2d(128, 64, kernel_size=3, padding=1) 98 | self.bn21 = nn.BatchNorm2d(64) 99 | self.u2 = nn.Upsample(scale_factor=2, mode='nearest') 100 | 101 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 102 | self.bn12 = nn.BatchNorm2d(64) 103 | #self.c11 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 104 | #self.bn11 = nn.BatchNorm2d(64) 105 | 106 | 107 | def forward(self, i53, i43, i33): 108 | o53 = F.relu(self.c53(i53), inplace=True) 109 | o52 = F.relu(self.c52(o53), inplace=True) 110 | o51 = F.relu(self.c51(o52), inplace=True) 111 | o5u = self.u5(o51) 112 | o5c = torch.cat((o5u, i43), 1) 113 | 114 | o43 = F.relu(self.c43(o5c), inplace=True) 115 | o42 = F.relu(self.c42(o43), inplace=True) 116 | o41 = F.relu(self.c41(o42), inplace=True) 117 | o4u = self.u4(o41) 118 | o4c = torch.cat((o4u, i33), 1) 119 | 120 | o33 = F.relu(self.c33(o4c), inplace=True) 121 | o32 = F.relu(self.c32(o33), inplace=True) 122 | o31 = F.relu(self.c31(o32), inplace=True) 123 | o3u = self.u3(o31) 124 | 125 | o22 = F.relu(self.c22(o3u), inplace=True) 126 | o21 = F.relu(self.c21(o22), inplace=True) 127 | o2u = self.u2(o21) 128 | 129 | o12 = F.relu(self.c12(o2u), inplace=True) 130 | #o11 = F.relu(self.bn11(self.c11(o12)), inplace=True) 131 | 132 | return o12 133 | 134 | class VGG_net(nn.Module): 135 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512] 136 | def __init__(self, input_channels): 137 | super(VGG_net, self).__init__() 138 | self.enc_net = VGG_enc(input_channels) 139 | self.dec_net = VGG_dec() 140 | self.conv_warp = nn.Conv2d(self.cfg[0], 2, kernel_size=3, padding=1) 141 | self.conv_mask = nn.Conv2d(self.cfg[0], 1, kernel_size=3, padding=1) 142 | self.conv_comp = nn.Conv2d(self.cfg[0], 3, kernel_size=3, padding=1) 143 | self._initialize_weights() 144 | 145 | def _initialize_weights(self): 146 | for m in self.modules(): 147 | if isinstance(m, nn.Conv2d): 148 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 149 | m.weight.data.normal_(0, math.sqrt(2. / n)) 150 | if m.bias is not None: 151 | m.bias.data.zero_() 152 | elif isinstance(m, nn.BatchNorm2d): 153 | m.weight.data.fill_(1) 154 | m.bias.data.zero_() 155 | elif isinstance(m, nn.Linear): 156 | m.weight.data.normal_(0, 0.01) 157 | m.bias.data.zero_() 158 | 159 | # input: Nx3x3x256x320 160 | def forward(self, x): 161 | dec_feat = self.dec_net(*self.enc_net(x)) 162 | flow = self.conv_warp(dec_feat) 163 | mask = self.conv_mask(dec_feat) 164 | comp = self.conv_comp(dec_feat) 165 | return flow, mask, comp 166 | 167 | 168 | 169 | def VGG_Warper(input_channels = 6): 170 | return VGG_net(input_channels) 171 | -------------------------------------------------------------------------------- /model/ops/cooltanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | # Inherit from Function 5 | class CoolTanH(torch.autograd.Function): 6 | # Note that both forward and backward are @staticmethods 7 | @staticmethod 8 | # bias is an optional argument 9 | def forward(ctx, input): 10 | ctx.save_for_backward(input) 11 | output = torch.clamp(input, min=0., max=1.) 12 | return output 13 | 14 | # This function has only a single output, so it gets only one gradient 15 | @staticmethod 16 | def backward(ctx, grad_output): 17 | input = ctx.saved_variables[0] 18 | # input > 1 & grad < 0 --> grad = grad 19 | # input > 1 & grad > 0 --> grad = 0 20 | # input < 0 & grad > 0 --> grad = grad 21 | # input < 0 & grad < 0 --> grad = 0 22 | grad_gtz = grad_output < 0. 23 | passcond = ((input > 1.)&(grad_gtz^1)) | ((input < 0.)&grad_gtz) 24 | grad_input = grad_output*(passcond.type(torch.cuda.FloatTensor)) 25 | return grad_input 26 | 27 | #cooltanh = CoolTanH.apply 28 | -------------------------------------------------------------------------------- /model/ops/flow_warper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import torch.nn.functional as F 4 | import torch.nn as nn 5 | 6 | class FlowWarp(nn.Module): 7 | def __init__(self): 8 | super(FlowWarp, self).__init__() 9 | self.h = -1; 10 | self.w = -1; 11 | 12 | def forward(self, x, f): 13 | # First, generate absolute coordinate from relative coordinates 14 | # f: N (rx,ry) oH oW 15 | # target: N oH oW (ax(width),ay(height)) 16 | 17 | # Generate offset map 18 | width = x.size()[3] 19 | height = x.size()[2] 20 | if width != self.w or height != self.h: 21 | width_map = torch.arange(0, width, step=1).expand([height, width]) 22 | height_map = torch.arange(0, height, step=1).unsqueeze(1).expand([height, width]) 23 | self.offset_map = Variable(torch.stack([width_map,height_map],2).cuda()) 24 | self.w = width 25 | self.h = height 26 | self.scaler = Variable(1./torch.cuda.FloatTensor([(self.w-1)/2, (self.h-1)/2])) 27 | 28 | f = f.permute(0,2,3,1) # N H W C 29 | f = f + self.offset_map # add with dimension expansion 30 | f = f * self.scaler - 1 # scale to [-1,1] 31 | 32 | return F.grid_sample(x, f, mode='bilinear') # eltwise multiply with broadcast 33 | 34 | -------------------------------------------------------------------------------- /model/ops/flow_warper_pad_2x.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import torch.nn.functional as F 4 | import torch.nn as nn 5 | 6 | class FlowWarp(nn.Module): 7 | def __init__(self): 8 | super(FlowWarp, self).__init__() 9 | self.h = -1; 10 | self.w = -1; 11 | 12 | def forward(self, x, f, padl): 13 | # First, generate absolute coordinate from relative coordinates 14 | # f: N (rx,ry) oH oW 15 | # target: N oH oW (ax(width),ay(height)) 16 | 17 | # Generate offset map 18 | width = x.size()[3] 19 | height = x.size()[2] 20 | ow = f.size()[3] 21 | oh = f.size()[2] 22 | if width != self.w or height != self.h or ow != self.ow or oh != self.oh or padl != self.padl: 23 | width_map = torch.arange(0+padl, ow+padl, step=1).expand([oh, ow]) 24 | height_map = torch.arange(0, oh, step=1).unsqueeze(1).expand([oh, ow]) 25 | self.offset_map = Variable(torch.stack([width_map,height_map],2).cuda()) 26 | self.w = width 27 | self.h = height 28 | self.oh = oh 29 | self.ow = ow 30 | self.padl = padl 31 | self.scaler = Variable(2./torch.cuda.FloatTensor([(self.w-1)/2, (self.h-1)/2])) 32 | 33 | f = f.permute(0,2,3,1) # N H W C 34 | f = f + self.offset_map # add with dimension expansion 35 | f = f * self.scaler - 1 # scale to [-1,1] 36 | 37 | return F.grid_sample(x, f, mode='bilinear') # eltwise multiply with broadcast 38 | 39 | -------------------------------------------------------------------------------- /model/ops/grad_hook.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | # Inherit from Function 5 | class CoolTanH(torch.autograd.Function): 6 | # Note that both forward and backward are @staticmethods 7 | @staticmethod 8 | # bias is an optional argument 9 | def forward(ctx, input): 10 | ctx.save_for_backward(input) 11 | 12 | return input, ctx.saved_variables[0] 13 | 14 | # This function has only a single output, so it gets only one gradient 15 | @staticmethod 16 | def backward(ctx, grad_output): 17 | 18 | input = ctx.saved_variables[0] 19 | 20 | return grad_output 21 | 22 | #cooltanh = CoolTanH.apply 23 | -------------------------------------------------------------------------------- /model/ops/hardshinkloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | 6 | class HardshinkLoss(nn.Module): 7 | def __init__(self, lowbound, upbound): 8 | super(HardshinkLoss, self).__init__() 9 | self.lowbound = lowbound 10 | self.upbound = upbound 11 | 12 | def forward(self, input): 13 | passcond = (input>self.upbound)|(input0.4) 24 | traj_vec_stor = traj_vec_stor[good_trajs,:] 25 | 26 | if traj_vec_stor.shape[0] < num_centroids: # too few points 27 | #print("kmeans: TOO FEW USABLE KEYPOINTS") 28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 29 | 30 | # k-means on vectors 31 | #num_centroids = 10 32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to 34 | 35 | # Sample 36 | # Find the nearest vectors to centroids 37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 38 | 39 | rep = good_trajs[rep] 40 | 41 | return rep # return the index of K most representative trajectories 42 | 43 | 44 | 45 | 46 | class KITTIReader(): 47 | TRAJ_H5_PATH = '/trajectories/kitti/traj_stor_test.h5' 48 | DATASET_DIR = '/datasets/KITTI/dataset/sequences' 49 | 50 | def _calc_traj_len(self, traj): # [Traj_no, num_point, (x,y)] 51 | dx = np.sum((traj[:,0:-1,:]-traj[:,1:,:])**2, axis=2) 52 | 53 | def __init__(self, num_frames=10): 54 | self._clip_stor = [] 55 | self._num_frames = num_frames 56 | self.height = 128 57 | self.width = 256 58 | 59 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest') 60 | traj_db = traj_h5["/KITTITraj/by_clip"] 61 | # Load all .mat files to memory 62 | print('Loading Trajectoriess for Penn Dataset...') 63 | for clip_name in traj_db.keys(): 64 | clip_start = traj_db[clip_name].attrs['StartFrame'] 65 | clip_len = traj_db[clip_name].attrs['TrajLen'] 66 | clip_num_trajs = traj_db[clip_name].attrs['TrajCount'] 67 | clip_traj_data = np.array(traj_db[clip_name]) 68 | clip_video_id = traj_db[clip_name].attrs['VidNo'] 69 | 70 | new_clip = {} 71 | new_clip['vid_name'] = clip_video_id 72 | new_clip['clip_start'] = clip_start 73 | new_clip['clip_len'] = clip_len 74 | new_clip['clip_num_trajs'] = clip_num_trajs 75 | new_clip['clip_trajs'] = clip_traj_data 76 | self._clip_stor.append(new_clip) 77 | 78 | print('[KITTI Trajectory Statistics]') 79 | print('Clip count: %d' % (len(self._clip_stor))) 80 | traj_h5.close() 81 | 82 | def get_traj_input(self, trajs, start_frame, num_frames): 83 | num_trajs = trajs.shape[0] 84 | # Load annotations 85 | # Format: 2(frames), 3(T/F,dx,dy), H, W 86 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32) 87 | 88 | #num_appear_trajs = min(num_trajs,10) 89 | num_appear_trajs = min(num_trajs,1) 90 | #good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10) 91 | 92 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs) 93 | 94 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :] 95 | for ff in range(num_frames): 96 | for traj_no in appear_trajs: 97 | kp_start_x = trajs[traj_no,start_frame,0] 98 | kp_start_y = trajs[traj_no,start_frame,1] 99 | kp_end_x = trajs[traj_no,start_frame+ff,0] 100 | kp_end_y = trajs[traj_no,start_frame+ff,1] 101 | 102 | kp_start_x_int = int(max(min(kp_start_x, self.width),0)) 103 | kp_start_y_int = int(max(min(kp_start_y, self.height),0)) 104 | kp_dx = kp_end_x - kp_start_x 105 | kp_dy = kp_end_y - kp_start_y 106 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0 107 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16. 108 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16. 109 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 110 | 111 | kp_end_x_int = int(max(min(kp_end_x, self.width),0)) 112 | kp_end_y_int = int(max(min(kp_end_y, self.height),0)) 113 | kp_dx2 = kp_start_x - kp_end_x 114 | kp_dy2 = kp_start_y - kp_end_y 115 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0 116 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16. 117 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16. 118 | 119 | return kpmap_seq, traj_list 120 | 121 | def __getitem__(self, idx): 122 | if idx == -1: 123 | idx = random.randint(0,len(self._clip_stor)) 124 | 125 | annot = self._clip_stor[idx] 126 | 127 | vid_name = annot['vid_name'] 128 | frame_count = annot['clip_len'] 129 | clip_start = annot['clip_start'] 130 | 131 | num_frames = self._num_frames 132 | # random start frame 133 | start_frame = random.randint(0,frame_count-num_frames) 134 | 135 | # loading frames 136 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32) 137 | for ff in range(num_frames): # only load two frames 138 | frame_no = start_frame+clip_start+ff 139 | img_path = os.path.join(self.DATASET_DIR, '{:02d}'.format(vid_name), 'image_2', '{:06d}.png'.format(frame_no)) 140 | img_load = misc.imread(img_path) # h w c 141 | img = misc.imresize(img_load, (128,422)) 142 | if ff == 0: 143 | img_2x = misc.imresize(img_load, (256,845)) 144 | img_ori = img_2x.astype(np.float32) 145 | img = img[:,83:339,:] 146 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0 147 | 148 | img_ori = np.transpose(img_ori, (2,0,1))/255.0 - 0.5 149 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5] 150 | 151 | num_trajs = annot['clip_num_trajs'] 152 | trajs = annot['clip_trajs'] 153 | kpmap_seq, traj_list = self.get_traj_input(trajs, start_frame, num_frames) 154 | 155 | 156 | print(idx, start_frame) 157 | return vid_seq, kpmap_seq, traj_list, img_ori 158 | 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /model/reader/rp_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import numpy as np 5 | import cv2 6 | import random 7 | 8 | import h5py 9 | 10 | from scipy.cluster.vq import kmeans,kmeans2,vq 11 | 12 | def filter_trajs_kmeans(trajs, num_centroids): 13 | num_trajs = trajs.shape[0] 14 | len_trajs = trajs.shape[1] 15 | traj_vec_stor = np.empty((num_trajs, (len_trajs-1)*2), np.float32) 16 | disp_stor = np.empty((num_trajs,), np.float32) 17 | 18 | for ii in range(num_trajs): 19 | traj = trajs[ii,:,:] # n-by-2 20 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point 21 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1))) 22 | # Remove trajectories that have very low displacement 23 | good_trajs = np.flatnonzero(disp_stor>0.4) 24 | traj_vec_stor = traj_vec_stor[good_trajs,:] 25 | 26 | if traj_vec_stor.shape[0] < num_centroids: # too few points 27 | #print("kmeans: TOO FEW USABLE KEYPOINTS") 28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 29 | 30 | # k-means on vectors 31 | #num_centroids = 10 32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to 34 | 35 | # Sample 36 | # Find the nearest vectors to centroids 37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 38 | 39 | rep = good_trajs[rep] 40 | 41 | return rep # return the index of K most representative trajectories 42 | 43 | 44 | 45 | 46 | class RPReader(): 47 | TRAJ_H5_PATH = '/trajectories/rp/traj_stor_test.h5' 48 | JPG_H5_PATH = '/datasets/robot_push_h5/robot_push_testnovel_jpgs.h5' 49 | 50 | def _calc_traj_len(self, traj): # [Traj_no, num_point, (x,y)] 51 | dx = np.sum((traj[:,0:-1,:]-traj[:,1:,:])**2, axis=2) 52 | 53 | def __init__(self, num_frames=10): 54 | #self._clip_stor = [] 55 | self._num_frames = num_frames 56 | self.height = 192 57 | self.width = 240 58 | 59 | 60 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest') 61 | traj_db = traj_h5["/RPTraj/by_clip"] 62 | self.clip_names = list(traj_db.keys()) 63 | self.clip_num = len(self.clip_names) 64 | 65 | jpg_h5 = h5py.File(self.JPG_H5_PATH, 'r', libver='latest') 66 | jpg_h5 = jpg_h5["push/push_testnovel"] 67 | 68 | print('[Robot Push Trajectory Statistics]') 69 | print('Clip count: %d' % (self.clip_num)) 70 | 71 | self.traj_db = traj_db 72 | self.jpg_h5 = jpg_h5 73 | 74 | 75 | 76 | def get_traj_input(self, trajs, start_frame, num_frames): 77 | num_trajs = trajs.shape[0] 78 | # Load annotations 79 | # Format: 2(frames), 3(T/F,dx,dy), H, W 80 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32) 81 | 82 | #num_appear_trajs = min(num_trajs,10) 83 | num_appear_trajs = min(num_trajs,3) 84 | num_appear_trajs = random.randint(1,min(num_trajs,4)) 85 | #good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10) 86 | 87 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs) 88 | 89 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :] 90 | for ff in range(num_frames): 91 | for traj_no in appear_trajs: 92 | kp_start_x = trajs[traj_no,start_frame,0] 93 | kp_start_y = trajs[traj_no,start_frame,1] 94 | kp_end_x = trajs[traj_no,start_frame+ff,0] 95 | kp_end_y = trajs[traj_no,start_frame+ff,1] 96 | 97 | kp_start_x_int = int(max(min(kp_start_x, self.width),0)) 98 | kp_start_y_int = int(max(min(kp_start_y, self.height),0)) 99 | kp_dx = kp_end_x - kp_start_x 100 | kp_dy = kp_end_y - kp_start_y 101 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0 102 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16. 103 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16. 104 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 105 | 106 | kp_end_x_int = int(max(min(kp_end_x, self.width),0)) 107 | kp_end_y_int = int(max(min(kp_end_y, self.height),0)) 108 | kp_dx2 = kp_start_x - kp_end_x 109 | kp_dy2 = kp_start_y - kp_end_y 110 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0 111 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16. 112 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16. 113 | 114 | return kpmap_seq, traj_list 115 | 116 | def __getitem__(self, idx): 117 | traj_db = self.traj_db 118 | jpg_h5 = self.jpg_h5 119 | 120 | if idx == -1: 121 | idx = random.randint(0,self.clip_num-1) 122 | 123 | annot = traj_db[self.clip_names[idx]] 124 | 125 | vid_id = annot.attrs['VidId'] 126 | annot_traj_len = annot.attrs['TrajLen'] 127 | annot_clip_start = annot.attrs['StartFrame'] 128 | num_trajs = annot.attrs['TrajCount'] 129 | trajs = annot[()] 130 | 131 | num_frames = self._num_frames 132 | # random start frame 133 | annot_start_frame = random.randint(0,annot_traj_len-num_frames) 134 | 135 | # loading frames 136 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32) 137 | for ff in range(num_frames): # only load two frames 138 | frame_no = annot_start_frame+annot_clip_start+ff 139 | img_data = cv2.imdecode(jpg_h5['{}/{}.jpg'.format(vid_id, frame_no)][()], -1) 140 | img_data = cv2.resize(img_data, (240,192)) 141 | img = img_data[:,:,(2,1,0)] # h w c 142 | 143 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0 144 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5] 145 | 146 | kpmap_seq, traj_list = self.get_traj_input(trajs, annot_start_frame, num_frames) 147 | 148 | print(idx, annot_start_frame) 149 | return vid_seq, kpmap_seq, traj_list 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /model/reader/ucf_reader.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from scipy import misc # for imread 4 | from utils.find_border import find_border 5 | import h5py 6 | 7 | import math 8 | import os 9 | 10 | from scipy.cluster.vq import kmeans,kmeans2,vq 11 | 12 | def filter_trajs_kmeans(trajs, num_centroids): 13 | num_trajs = trajs.shape[0] 14 | len_trajs = trajs.shape[1] 15 | traj_vec_stor = np.empty((num_trajs, (len_trajs-1)*2), np.float32) 16 | disp_stor = np.empty((num_trajs,), np.float32) 17 | 18 | for ii in range(num_trajs): 19 | traj = trajs[ii,:,:] # n-by-2 20 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point 21 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1))) 22 | # Remove trajectories that have very low displacement 23 | good_trajs = np.flatnonzero(disp_stor>0.4) 24 | traj_vec_stor = traj_vec_stor[good_trajs,:] 25 | 26 | if traj_vec_stor.shape[0] < num_centroids: # too few points 27 | #print("kmeans: TOO FEW USABLE KEYPOINTS") 28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 29 | 30 | # k-means on vectors 31 | #num_centroids = 10 32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to 34 | 35 | # Sample 36 | # Find the nearest vectors to centroids 37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 38 | 39 | rep = good_trajs[rep] 40 | 41 | return rep # return the index of K most representative trajectories 42 | 43 | 44 | 45 | class UCFReader(): 46 | TRAJ_H5_PATH = '/trajectories/ucf/traj_stor_test.h5' 47 | DATASET_DIR = '/datasets/UCF101/UCF-101' 48 | JPG_DIR = '/datasets/UCF101_seq/UCF-101' 49 | 50 | def __init__(self, num_frames=10): 51 | self._num_frames = num_frames 52 | self.height = 192 53 | self.width = 256 54 | 55 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest') 56 | traj_db = traj_h5["/UCFTraj/by_clip"] 57 | #traj_h5.close() 58 | self.clip_names = list(traj_db.keys()) 59 | self.clip_num = len(self.clip_names) 60 | self.traj_db = traj_db 61 | print('[UCF Trajectory Statistics]') 62 | print('Clip count: %d' % (self.clip_num)) 63 | 64 | def get_traj_input(self, trajs, start_frame, num_frames): 65 | num_trajs = trajs.shape[0] 66 | # Load annotations 67 | # Format: 2(frames), 3(T/F,dx,dy), H, W 68 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32) 69 | 70 | #num_appear_trajs = min(num_trajs,10) 71 | num_appear_trajs = min(num_trajs,1) 72 | good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10) 73 | 74 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs) 75 | 76 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :] 77 | for ff in range(num_frames): 78 | for traj_no in appear_trajs: 79 | kp_start_x = trajs[traj_no,start_frame,0] 80 | kp_start_y = trajs[traj_no,start_frame,1] 81 | kp_end_x = trajs[traj_no,start_frame+ff,0] 82 | kp_end_y = trajs[traj_no,start_frame+ff,1] 83 | 84 | kp_start_x_int = int(max(min(kp_start_x, self.width),0)) 85 | kp_start_y_int = int(max(min(kp_start_y, self.height),0)) 86 | kp_dx = kp_end_x - kp_start_x 87 | kp_dy = kp_end_y - kp_start_y 88 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0 89 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16. 90 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16. 91 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 92 | 93 | kp_end_x_int = int(max(min(kp_end_x, self.width),0)) 94 | kp_end_y_int = int(max(min(kp_end_y, self.height),0)) 95 | kp_dx2 = kp_start_x - kp_end_x 96 | kp_dy2 = kp_start_y - kp_end_y 97 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0 98 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16. 99 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16. 100 | 101 | return kpmap_seq, traj_list 102 | 103 | def __getitem__(self, idx): 104 | traj_db = self.traj_db 105 | 106 | if idx == -1: 107 | while True: 108 | idx = random.randint(0,self.clip_num-1) 109 | #if traj_db[self.clip_names[idx]].attrs['VidPath'].find("PushUps") != -1: 110 | break 111 | 112 | 113 | annot = traj_db[self.clip_names[idx]] 114 | vid_path = annot.attrs['VidPath'] 115 | #vid_path = vid_path.replace('/datasets/UCF101/UCF-101', self.JPG_DIR) # 116 | annot_traj_len = annot.attrs['TrajLen'] 117 | annot_clip_start = annot.attrs['StartFrame'] 118 | num_trajs = annot.attrs['TrajCount'] 119 | trajs = annot[()] 120 | 121 | num_frames = self._num_frames 122 | annot_start_frame = random.randint(0,annot_traj_len-num_frames) 123 | 124 | # preallocate np array 125 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32) 126 | for ff in range(num_frames): 127 | frame_no = annot_start_frame+annot_clip_start+ff 128 | try: 129 | frame = misc.imread(vid_path+'/'+str(frame_no)+'.jpg') 130 | except: 131 | print('Bad image found.') 132 | frame = np.zeros([self.width, self.height, 3], dtype=np.uint8) 133 | img = misc.imresize(frame, (self.height,self.width)) 134 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0 135 | 136 | vid_mask = find_border(vid_seq[0,:,:,:], threshold=10/255) 137 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5] 138 | vid_seq = vid_seq * vid_mask 139 | 140 | kpmap_seq, traj_list = self.get_traj_input(trajs, annot_start_frame, num_frames) 141 | 142 | print(idx, annot_start_frame) 143 | return vid_seq, kpmap_seq, traj_list 144 | -------------------------------------------------------------------------------- /model/train_kitti.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_kitti_reimpl import KITTIReader_traj 12 | from models.vgg_warper_weak_shortcut_nobn import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper_pad_2x import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | 20 | args = {} 21 | args['gpus'] = [0] 22 | args['seed'] = 12345 23 | args['batch_size'] = 32 24 | torch.backends.cudnn.benchmark = True 25 | 26 | # Initialize Pytorch Dataloader 27 | datareader = KITTIReader_traj(is_test=False, max_interval=10, min_ntraj=1, max_ntraj=5) # change to min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation 28 | train_loader = torch.utils.data.DataLoader( 29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True) 30 | 31 | 32 | class MModel(nn.Module): 33 | def __init__(self): 34 | super(MModel, self).__init__() 35 | self.warp_cnn = VGG_Warper(9) 36 | self.flow_warper = FlowWarp() 37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 38 | self.hardshrinkloss = HardshinkLoss(0., 1.) 39 | 40 | def forward(self, img_input, warp_input, img_gt): 41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 42 | warp_imgs = self.flow_warper(img_input, warp_flow, padl=83) 43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 44 | masks = F.sigmoid(masks) 45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 46 | 47 | return recon_img, warp_flow, comp_imgs, masks 48 | 49 | # Charbonnier penalty function 50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2} 51 | class CPF(nn.Module): 52 | def __init__(self): 53 | super(CPF, self).__init__() 54 | def forward(self, x, mean=True): 55 | eps = 0.0001 56 | eps2 = eps**2 57 | if mean: 58 | loss = torch.mean(torch.sqrt(x**2+eps2)) 59 | else: 60 | loss = torch.sum(torch.sqrt(x**2+eps2)) 61 | 62 | return loss 63 | 64 | mmodel = MModel() 65 | mmodel.cuda() 66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1]) 67 | 68 | #reconstruction_function = nn.BCELoss() 69 | #reconstruction_function = nn.L1Loss() 70 | mseloss = nn.MSELoss() 71 | #mseloss.size_average = True 72 | cpfloss = CPF() 73 | hardshrinkloss = HardshinkLoss(0., 1.) 74 | #sl1loss = nn.SmoothL1Loss(size_average=False) 75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0) 76 | 77 | visual = VisdomShow('kitti_train_humaneval') 78 | 79 | def train(epoch): 80 | print('\n\n=========================== Epoch {} ============================'.format(epoch)) 81 | mmodel.train() 82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask, img_input_2x) in enumerate(train_loader): 83 | img_input = Variable(img_input).cuda(args['gpus'][0]) 84 | img_input_2x = Variable(img_input_2x).cuda(args['gpus'][0]) 85 | warp_input = Variable(warp_input).cuda(args['gpus'][0]) 86 | img_gt = Variable(img_gt).cuda(args['gpus'][0]) 87 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0]) 88 | 89 | optimizer.zero_grad() 90 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input_2x, warp_input, img_gt) 91 | 92 | loss_comp_pen = hardshrinkloss(comp_imgs) 93 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask) 94 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask) 95 | loss_mask_pen = torch.mean((masks-1.)**2) 96 | 97 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen 98 | loss.backward() 99 | optimizer.step() 100 | 101 | hist['loss'].append(loss_recon.data.cpu().numpy()[0]) 102 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0]) 103 | 104 | if batch_idx%10 == 0: 105 | pbar(batch_idx, len(train_loader), epoch) 106 | 107 | if batch_idx%200 == 0: 108 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1)) 109 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0)) 110 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0)) 111 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1)) 112 | mask_bw = masks[0,:,:,:].data.cpu().numpy() 113 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0)) 114 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0)) 115 | 116 | visual.show_img(comp_out) 117 | visual.show_img(mask_out) 118 | visual.show_img(warp_out) 119 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0) 120 | visual.show_vid(vid) 121 | if batch_idx%2000 == 0: 122 | ckpt = { 123 | 'mmodel_state_dict': mmodel.module.state_dict(), 124 | 'optimizer': optimizer.state_dict(), 125 | 'hist': hist 126 | } 127 | torch.save(ckpt, './snapshots/kitti/ckpt_e{}_b{}.pth'.format(epoch, batch_idx)) 128 | 129 | def restore(ckpt_file): 130 | ckpt = torch.load(ckpt_file) 131 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 132 | optimizer.load_state_dict(ckpt['optimizer']) 133 | #hist = ckpt['hist'] 134 | print('Restored from {}'.format(ckpt_file)) 135 | 136 | hist = {} 137 | hist['loss'] = [] 138 | hist['comp_pen'] = [] 139 | 140 | #restore('./snapshots2/ckpt_e1_b44000.pth') 141 | for epoch in range(0, 20): 142 | #test(epoch) 143 | train(epoch) 144 | 145 | 146 | -------------------------------------------------------------------------------- /model/train_rp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_rp_reimpl import RPReader_traj 12 | from models.vgg_warper_weak_shortcut import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | 20 | args = {} 21 | args['gpus'] = [0] 22 | args['seed'] = 12345 23 | args['batch_size'] = 32 24 | torch.backends.cudnn.benchmark = True 25 | 26 | # Initialize Pytorch Dataloader 27 | datareader = RPReader_traj(is_test=False, max_interval=10, min_ntraj=1, max_ntraj=5) # change to min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation 28 | train_loader = torch.utils.data.DataLoader( 29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True) 30 | 31 | 32 | class MModel(nn.Module): 33 | def __init__(self): 34 | super(MModel, self).__init__() 35 | self.warp_cnn = VGG_Warper(9) 36 | self.flow_warper = FlowWarp() 37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 38 | self.hardshrinkloss = HardshinkLoss(0., 1.) 39 | 40 | def forward(self, img_input, warp_input, img_gt): 41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 42 | warp_imgs = self.flow_warper(img_input, warp_flow) 43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 44 | masks = F.sigmoid(masks) 45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 46 | 47 | return recon_img, warp_flow, comp_imgs, masks 48 | 49 | # Charbonnier penalty function 50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2} 51 | class CPF(nn.Module): 52 | def __init__(self): 53 | super(CPF, self).__init__() 54 | def forward(self, x, mean=True): 55 | eps = 0.0001 56 | eps2 = eps**2 57 | if mean: 58 | loss = torch.mean(torch.sqrt(x**2+eps2)) 59 | else: 60 | loss = torch.sum(torch.sqrt(x**2+eps2)) 61 | 62 | return loss 63 | 64 | mmodel = MModel() 65 | mmodel.cuda() 66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1]) 67 | 68 | #reconstruction_function = nn.BCELoss() 69 | #reconstruction_function = nn.L1Loss() 70 | mseloss = nn.MSELoss() 71 | #mseloss.size_average = True 72 | cpfloss = CPF() 73 | hardshrinkloss = HardshinkLoss(0., 1.) 74 | #sl1loss = nn.SmoothL1Loss(size_average=False) 75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0) 76 | 77 | visual = VisdomShow('rp_train_humaneval') 78 | 79 | def train(epoch): 80 | print('\n\n=========================== Epoch {} ============================'.format(epoch)) 81 | mmodel.train() 82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader): 83 | img_input = Variable(img_input).cuda(args['gpus'][0]) 84 | warp_input = Variable(warp_input).cuda(args['gpus'][0]) 85 | img_gt = Variable(img_gt).cuda(args['gpus'][0]) 86 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0]) 87 | 88 | optimizer.zero_grad() 89 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt) 90 | 91 | loss_comp_pen = hardshrinkloss(comp_imgs) 92 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask) 93 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask) 94 | loss_mask_pen = torch.mean((masks-1.)**2) 95 | 96 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen 97 | loss.backward() 98 | optimizer.step() 99 | 100 | hist['loss'].append(loss_recon.data.cpu().numpy()[0]) 101 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0]) 102 | 103 | if batch_idx%10 == 0: 104 | pbar(batch_idx, len(train_loader), epoch) 105 | 106 | if batch_idx%200 == 0: 107 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1)) 108 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0)) 109 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0)) 110 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1)) 111 | mask_bw = masks[0,:,:,:].data.cpu().numpy() 112 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0)) 113 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0)) 114 | 115 | visual.show_img(comp_out) 116 | visual.show_img(mask_out) 117 | visual.show_img(warp_out) 118 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0) 119 | visual.show_vid(vid) 120 | if batch_idx%2000 == 0: 121 | ckpt = { 122 | 'mmodel_state_dict': mmodel.module.state_dict(), 123 | 'optimizer': optimizer.state_dict(), 124 | 'hist': hist 125 | } 126 | torch.save(ckpt, './snapshots/rp/ckpt_e{}_b{}.pth'.format(epoch, batch_idx)) 127 | 128 | def restore(ckpt_file): 129 | ckpt = torch.load(ckpt_file) 130 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 131 | optimizer.load_state_dict(ckpt['optimizer']) 132 | #hist = ckpt['hist'] 133 | print('Restored from {}'.format(ckpt_file)) 134 | 135 | hist = {} 136 | hist['loss'] = [] 137 | hist['comp_pen'] = [] 138 | 139 | #restore('./snapshots2/ckpt_e1_b44000.pth') 140 | for epoch in range(0, 20): 141 | #test(epoch) 142 | train(epoch) 143 | 144 | 145 | -------------------------------------------------------------------------------- /model/train_ucf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from torchvision import datasets, transforms 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from dataset.data_loader_ucf_reimpl import UCFReader_traj 12 | from models.vgg_warper_weak_shortcut import VGG_Warper 13 | from utils.visual import colorcode, VisdomShow, pbar 14 | 15 | from ops.flow_warper import FlowWarp 16 | from ops.hardshinkloss import HardshinkLoss 17 | from ops.laplace2d import Laplace2D 18 | 19 | 20 | args = {} 21 | args['gpus'] = [0] 22 | args['seed'] = 12345 23 | args['batch_size'] = 32 24 | torch.backends.cudnn.benchmark = True 25 | 26 | # Initialize Pytorch Dataloader 27 | datareader = UCFReader_traj(is_test=False, max_interval=10) # add min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation 28 | train_loader = torch.utils.data.DataLoader( 29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=6, pin_memory=True) 30 | 31 | 32 | class MModel(nn.Module): 33 | def __init__(self): 34 | super(MModel, self).__init__() 35 | self.warp_cnn = VGG_Warper(9) 36 | self.flow_warper = FlowWarp() 37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True) 38 | self.hardshrinkloss = HardshinkLoss(0., 1.) 39 | 40 | def forward(self, img_input, warp_input, img_gt): 41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2 42 | warp_imgs = self.flow_warper(img_input, warp_flow) 43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.) 44 | masks = F.sigmoid(masks) 45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks) 46 | 47 | return recon_img, warp_flow, comp_imgs, masks 48 | 49 | # Charbonnier penalty function 50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2} 51 | class CPF(nn.Module): 52 | def __init__(self): 53 | super(CPF, self).__init__() 54 | def forward(self, x, mean=True): 55 | eps = 0.0001 56 | eps2 = eps**2 57 | if mean: 58 | loss = torch.mean(torch.sqrt(x**2+eps2)) 59 | else: 60 | loss = torch.sum(torch.sqrt(x**2+eps2)) 61 | 62 | return loss 63 | 64 | mmodel = MModel() 65 | mmodel.cuda() 66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1]) 67 | 68 | #reconstruction_function = nn.BCELoss() 69 | #reconstruction_function = nn.L1Loss() 70 | mseloss = nn.MSELoss() 71 | #mseloss.size_average = True 72 | cpfloss = CPF() 73 | hardshrinkloss = HardshinkLoss(0., 1.) 74 | #sl1loss = nn.SmoothL1Loss(size_average=False) 75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0) 76 | 77 | visual = VisdomShow('ucf_train_humaneval') 78 | 79 | def train(epoch): 80 | print('\n\n=========================== Epoch {} ============================'.format(epoch)) 81 | mmodel.train() 82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader): 83 | img_input = Variable(img_input).cuda(args['gpus'][0]) 84 | warp_input = Variable(warp_input).cuda(args['gpus'][0]) 85 | img_gt = Variable(img_gt).cuda(args['gpus'][0]) 86 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0]) 87 | 88 | optimizer.zero_grad() 89 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt) 90 | 91 | loss_comp_pen = hardshrinkloss(comp_imgs) 92 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask) 93 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask) 94 | loss_mask_pen = torch.mean((masks-1.)**2) 95 | 96 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen 97 | loss.backward() 98 | optimizer.step() 99 | 100 | hist['loss'].append(loss_recon.data.cpu().numpy()[0]) 101 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0]) 102 | 103 | if batch_idx%10 == 0: 104 | pbar(batch_idx, len(train_loader), epoch) 105 | 106 | if batch_idx%200 == 0: 107 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1)) 108 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0)) 109 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0)) 110 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1)) 111 | mask_bw = masks[0,:,:,:].data.cpu().numpy() 112 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0)) 113 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0)) 114 | 115 | visual.show_img(comp_out) 116 | visual.show_img(mask_out) 117 | visual.show_img(warp_out) 118 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0) 119 | visual.show_vid(vid) 120 | if batch_idx%2000 == 0: 121 | ckpt = { 122 | 'mmodel_state_dict': mmodel.module.state_dict(), 123 | 'optimizer': optimizer.state_dict(), 124 | 'hist': hist 125 | } 126 | torch.save(ckpt, './snapshots/ucf/ckpt_e{}_b{}.pth'.format(epoch, batch_idx)) 127 | 128 | def restore(ckpt_file): 129 | ckpt = torch.load(ckpt_file) 130 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict']) 131 | optimizer.load_state_dict(ckpt['optimizer']) 132 | #hist = ckpt['hist'] 133 | print('Restored from {}'.format(ckpt_file)) 134 | 135 | hist = {} 136 | hist['loss'] = [] 137 | hist['comp_pen'] = [] 138 | 139 | restore('./snapshots/ckpt_e2_b58000.pth') 140 | for epoch in range(0, 20): 141 | #test(epoch) 142 | train(epoch) 143 | 144 | 145 | -------------------------------------------------------------------------------- /model/utils/trajs2map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | def trajs2map(trajs, height, width): # traj: [N, S/E, X/Y] 6 | #kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32) 7 | 8 | #height = kpmap_seq.size(2) 9 | #width = kpmap_seq.size(3) 10 | kpmap_seq = Variable(torch.zeros(1,6,height,width).cuda()) 11 | for traj_no in range(len(trajs)): 12 | kp_start_x = trajs[traj_no][0][0] 13 | kp_start_y = trajs[traj_no][0][1] 14 | kp_end_x = trajs[traj_no][1][0] 15 | kp_end_y = trajs[traj_no][1][1] 16 | 17 | kp_start_x_int = int(max(min(kp_start_x, width),0)) 18 | kp_start_y_int = int(max(min(kp_start_y, height),0)) 19 | kp_dx = kp_end_x - kp_start_x 20 | kp_dy = kp_end_y - kp_start_y 21 | kpmap_seq[0, 0,kp_start_y_int,kp_start_x_int] = 1.0 22 | kpmap_seq[0, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16. 23 | kpmap_seq[0, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16. 24 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 25 | 26 | kp_end_x_int = int(max(min(kp_end_x, width),0)) 27 | kp_end_y_int = int(max(min(kp_end_y, height),0)) 28 | kp_dx2 = kp_start_x - kp_end_x 29 | kp_dy2 = kp_start_y - kp_end_y 30 | kpmap_seq[0, 3,kp_end_y_int,kp_end_x_int] = 1.0 31 | kpmap_seq[0, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16. 32 | kpmap_seq[0, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16. 33 | 34 | return kpmap_seq 35 | 36 | 37 | def trajs2map2(trajs, height, width): # traj: [N, S/E, X/Y] 38 | #kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32) 39 | 40 | #height = kpmap_seq.size(2) 41 | #width = kpmap_seq.size(3) 42 | kpmap_seq = Variable(torch.zeros(1,6,height,width).cuda()) 43 | for traj_no in range(trajs.shape[0]): 44 | kp_start_x = trajs[traj_no,0,0] 45 | kp_start_y = trajs[traj_no,0,1] 46 | kp_end_x = trajs[traj_no,1,0] 47 | kp_end_y = trajs[traj_no,1,1] 48 | 49 | kp_start_x_int = int(max(min(kp_start_x, width),0)) 50 | kp_start_y_int = int(max(min(kp_start_y, height),0)) 51 | kp_dx = kp_end_x - kp_start_x 52 | kp_dy = kp_end_y - kp_start_y 53 | kpmap_seq[0, 0,kp_start_y_int,kp_start_x_int] = 1.0 54 | kpmap_seq[0, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16. 55 | kpmap_seq[0, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16. 56 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5 57 | 58 | kp_end_x_int = int(max(min(kp_end_x, width),0)) 59 | kp_end_y_int = int(max(min(kp_end_y, height),0)) 60 | kp_dx2 = kp_start_x - kp_end_x 61 | kp_dy2 = kp_start_y - kp_end_y 62 | kpmap_seq[0, 3,kp_end_y_int,kp_end_x_int] = 1.0 63 | kpmap_seq[0, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16. 64 | kpmap_seq[0, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16. 65 | 66 | return kpmap_seq 67 | -------------------------------------------------------------------------------- /model/utils/visual.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from visdom import Visdom 3 | import cv2 4 | import os 5 | # OpenBLAS screws up with CPU affinity 6 | os.sched_setaffinity(0,range(os.cpu_count())) 7 | 8 | 9 | class VisdomShow(): 10 | def __init__(self, env_name): 11 | self.vis = Visdom(env=env_name) 12 | print('Visdom display initialized') 13 | 14 | def show_img(self, img): 15 | #img = img[(2,1,0),:,:] 16 | self.vis.image(np.clip(img,0,1)) 17 | #self.vis.image(np.clip(img.data.cpu().numpy(),0,1)) 18 | 19 | def show_vid(self, vid): 20 | vid = (np.clip(vid,0.,1.)*255.).astype(np.uint8) 21 | vid = np.transpose(vid[:,(2,1,0),:,:], (0,2,3,1)) 22 | self.vis.video(vid, opts={'fps': 2}) 23 | 24 | def add_text(self, img, text, color=(0,255,0)): 25 | img = np.transpose(img[(2,1,0),:,:], (1,2,0)).copy() 26 | cv2.putText(img, text, (2,24), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 1, cv2.LINE_AA) 27 | img = np.transpose(img, (2,0,1))[(2,1,0),:,:] 28 | return img 29 | 30 | 31 | def colorcode(flow_in): # N 1 H W, H S V=1 32 | #hsv = np.zeros((512, 512, 3)) 33 | #hsv[..., 0] = np.linspace(0, 1, 512) 34 | #hsv[..., 1] = 1. 35 | #hsv[..., 2] = np.linspace(0, 1, 512)[:, np.newaxis] 36 | #rgb = hsv_to_rgb(hsv) 37 | flow_x = flow_in[0,:,:] / 5 38 | flow_y = flow_in[1,:,:] / 5 39 | shape = flow_x.shape 40 | H = np.arctan2(flow_x, flow_y) / (2.*np.pi) # [0,1) 41 | H = np.ravel(H) 42 | S = np.sqrt(flow_x**2+flow_y**2) # [0, len] 43 | S = np.ravel(S) 44 | 45 | i = np.int_(H*6.) 46 | f = H*6.-i 47 | 48 | q = f 49 | t = 1.-f 50 | i = np.ravel(i) 51 | f = np.ravel(f) 52 | i%=6 53 | t = np.ravel(t) 54 | q = np.ravel(q) 55 | v = 1 56 | clist = (1-S*np.vstack([np.zeros_like(f),np.ones_like(f),q,t]))*v 57 | 58 | #0:v 1:p 2:q 3:t 59 | order = np.array([[0,3,1],[2,0,1],[1,0,3],[1,2,0],[3,1,0],[0,1,2]]) 60 | rgb = clist[order[i], np.arange(np.prod(shape))[:,None]] 61 | 62 | rgb = np.transpose(rgb.reshape(shape+(3,)),[2,0,1]) 63 | return rgb 64 | 65 | 66 | import sys 67 | def pbar(count, total, status=''): 68 | bar_len = 50 69 | filled_len = int(round(bar_len * count / float(total))) 70 | 71 | percents = round(100.0 * count / float(total), 1) 72 | bar = '=' * filled_len + '-' * (bar_len - filled_len) 73 | 74 | sys.stdout.write('[%s] %s/%s epoch %s\r' % (bar, count, total, status)) 75 | sys.stdout.flush() 76 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/DenseTrackStab.h: -------------------------------------------------------------------------------- 1 | #ifndef DENSETRACKSTAB_H_ 2 | #define DENSETRACKSTAB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "opencv2/calib3d/calib3d.hpp" 20 | #include "opencv2/highgui/highgui.hpp" 21 | #include "opencv2/imgproc/imgproc.hpp" 22 | #include "opencv2/xfeatures2d.hpp" 23 | #include "opencv2/core/core.hpp" 24 | //#include "opencv2/nonfree/nonfree.hpp" 25 | 26 | using namespace cv; 27 | 28 | typedef struct 29 | { 30 | int traj_length; 31 | int num_trajs; 32 | float* out_trajs; 33 | } Ret; 34 | 35 | extern "C" void free_mem(); 36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret); 37 | 38 | int start_frame = 0; 39 | int end_frame = INT_MAX; 40 | int scale_num = 8; 41 | const float scale_stride = sqrt(2); 42 | char* bb_file = NULL; 43 | 44 | // parameters for descriptors 45 | int patch_size = 32; 46 | int nxy_cell = 2; 47 | int nt_cell = 3; 48 | float epsilon = 0.05; 49 | const float min_flow = 0.4; 50 | 51 | // parameters for tracking 52 | double quality = 0.001; 53 | int min_distance = 5; 54 | int init_gap = 1; 55 | int track_length = 15; 56 | 57 | // parameters for rejecting trajectory 58 | const float min_var = sqrt(3); 59 | const float max_var = 50; 60 | const float max_dis = 20; 61 | 62 | typedef struct { 63 | int x; // top left corner 64 | int y; 65 | int width; 66 | int height; 67 | }RectInfo; 68 | 69 | typedef struct { 70 | int width; // resolution of the video 71 | int height; 72 | int length; // number of frames 73 | }SeqInfo; 74 | 75 | typedef struct { 76 | int length; // length of the trajectory 77 | int gap; // initialization gap for feature re-sampling 78 | }TrackInfo; 79 | 80 | typedef struct { 81 | int nBins; // number of bins for vector quantization 82 | bool isHof; 83 | int nxCells; // number of cells in x direction 84 | int nyCells; 85 | int ntCells; 86 | int dim; // dimension of the descriptor 87 | int height; // size of the block for computing the descriptor 88 | int width; 89 | }DescInfo; 90 | 91 | // integral histogram for the descriptors 92 | typedef struct { 93 | int height; 94 | int width; 95 | int nBins; 96 | float* desc; 97 | }DescMat; 98 | 99 | class Track 100 | { 101 | public: 102 | std::vector point; 103 | std::vector disp; 104 | std::vector hog; 105 | std::vector hof; 106 | std::vector mbhX; 107 | std::vector mbhY; 108 | int index; 109 | 110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo, 111 | const DescInfo& hofInfo, const DescInfo& mbhInfo) 112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length), 113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length) 114 | { 115 | index = 0; 116 | point[0] = point_; 117 | } 118 | 119 | void addPoint(const Point2f& point_) 120 | { 121 | index++; 122 | point[index] = point_; 123 | } 124 | }; 125 | 126 | class BoundBox 127 | { 128 | public: 129 | Point2f TopLeft; 130 | Point2f BottomRight; 131 | float confidence; 132 | 133 | BoundBox(float a1, float a2, float a3, float a4, float a5) 134 | { 135 | TopLeft.x = a1; 136 | TopLeft.y = a2; 137 | BottomRight.x = a3; 138 | BottomRight.y = a4; 139 | confidence = a5; 140 | } 141 | }; 142 | 143 | class Frame 144 | { 145 | public: 146 | int frameID; 147 | std::vector BBs; 148 | 149 | Frame(const int& frame_) 150 | { 151 | frameID = frame_; 152 | BBs.clear(); 153 | } 154 | }; 155 | 156 | #endif /*DENSETRACKSTAB_H_*/ 157 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/Initialize.h: -------------------------------------------------------------------------------- 1 | #ifndef INITIALIZE_H_ 2 | #define INITIALIZE_H_ 3 | 4 | #include "DenseTrackStab.h" 5 | 6 | using namespace cv; 7 | 8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap) 9 | { 10 | trackInfo->length = track_length; 11 | trackInfo->gap = init_gap; 12 | } 13 | 14 | DescMat* InitDescMat(int height, int width, int nBins) 15 | { 16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat)); 17 | descMat->height = height; 18 | descMat->width = width; 19 | descMat->nBins = nBins; 20 | 21 | long size = height*width*nBins; 22 | descMat->desc = (float*)malloc(size*sizeof(float)); 23 | memset(descMat->desc, 0, size*sizeof(float)); 24 | return descMat; 25 | } 26 | 27 | void ReleDescMat(DescMat* descMat) 28 | { 29 | free(descMat->desc); 30 | free(descMat); 31 | } 32 | 33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell) 34 | { 35 | descInfo->nBins = nBins; 36 | descInfo->isHof = isHof; 37 | descInfo->nxCells = nxy_cell; 38 | descInfo->nyCells = nxy_cell; 39 | descInfo->ntCells = nt_cell; 40 | descInfo->dim = nBins*nxy_cell*nxy_cell; 41 | descInfo->height = size; 42 | descInfo->width = size; 43 | } 44 | 45 | void InitSeqInfo(SeqInfo* seqInfo, char* video) 46 | { 47 | VideoCapture capture; 48 | capture.open(video); 49 | 50 | if(!capture.isOpened()) 51 | fprintf(stderr, "Could not initialize capturing..\n"); 52 | 53 | // get the number of frames in the video 54 | int frame_num = 0; 55 | while(true) { 56 | Mat frame; 57 | capture >> frame; 58 | 59 | if(frame.empty()) 60 | break; 61 | 62 | if(frame_num == 0) { 63 | seqInfo->width = frame.cols; 64 | seqInfo->height = frame.rows; 65 | } 66 | 67 | frame_num++; 68 | } 69 | seqInfo->length = frame_num; 70 | } 71 | 72 | void usage() 73 | { 74 | fprintf(stderr, "Extract improved trajectories from a video\n\n"); 75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n"); 76 | fprintf(stderr, "Options:\n"); 77 | fprintf(stderr, " -h Display this message and exit\n"); 78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n"); 79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n"); 80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n"); 81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n"); 82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n"); 83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n"); 84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n"); 85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n"); 86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n"); 87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n"); 88 | } 89 | 90 | bool arg_parse(int argc, char** argv) 91 | { 92 | int c; 93 | bool flag = false; 94 | char* executable = basename(argv[0]); 95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1) 96 | switch(c) { 97 | case 'S': 98 | start_frame = atoi(optarg); 99 | flag = true; 100 | break; 101 | case 'E': 102 | end_frame = atoi(optarg); 103 | flag = true; 104 | break; 105 | case 'L': 106 | track_length = atoi(optarg); 107 | break; 108 | case 'W': 109 | min_distance = atoi(optarg); 110 | break; 111 | case 'N': 112 | patch_size = atoi(optarg); 113 | break; 114 | case 's': 115 | nxy_cell = atoi(optarg); 116 | break; 117 | case 't': 118 | nt_cell = atoi(optarg); 119 | break; 120 | case 'A': 121 | scale_num = atoi(optarg); 122 | break; 123 | case 'I': 124 | init_gap = atoi(optarg); 125 | break; 126 | case 'H': 127 | bb_file = optarg; 128 | break; 129 | case 'h': 130 | usage(); 131 | exit(0); 132 | break; 133 | 134 | default: 135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable ); 136 | abort(); 137 | } 138 | return flag; 139 | } 140 | 141 | #endif /*INITIALIZE_H_*/ 142 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/Makefile: -------------------------------------------------------------------------------- 1 | # set the binaries that have to be built 2 | TARGETS := DenseTrackStab Video 3 | 4 | # set the build configuration set 5 | BUILD := release 6 | #BUILD := debug 7 | 8 | # set bin and build dirs 9 | BUILDDIR := .build_$(BUILD) 10 | BINDIR := $(BUILD) 11 | 12 | # libraries 13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*))) 14 | LIBS := \ 15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \ 16 | avformat avdevice avutil avcodec swscale 17 | 18 | # set some flags and compiler/linker specific commands 19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include 20 | CXXFLAGS_debug := -ggdb 21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb 22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD)) 23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD)) 24 | LDFLAGS_debug := -ggdb 25 | LDFLAGS_release := -O3 -ggdb 26 | 27 | include make/generic.mk 28 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/README.md: -------------------------------------------------------------------------------- 1 | # NOTES ON USAGE 2 | For generating trajectories from video (Tuned for KITTI dataset). 3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 4 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`. Train/test split hard-coded inside. 5 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file. 6 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified. 7 | 8 | **Warning: The code is provided in its original form without any cleanup.** 9 | 10 | # NOTES ON MODIFICATIONS 11 | Code originated from: 12 | http://lear.inrialpes.fr/people/wang/dense_trajectories 13 | ``` 14 | @inproceedings{wang:2011:inria-00583818:1, 15 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu}, 16 | TITLE = {{Action Recognition by Dense Trajectories}}, 17 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition}, 18 | YEAR = {2011}, 19 | MONTH = Jun, 20 | PAGES = {3169-3176}, 21 | ADDRESS = {Colorado Springs, United States}, 22 | URL = {http://hal.inria.fr/inria-00583818/en} 23 | } 24 | ``` 25 | - Modified to support more modern version of OpenCV 26 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction. 27 | - Converted stand-alone excutable to dynamic library for Python CFFI calling 28 | 29 | 30 | # Followings are the original README for Dense Trajectories 31 | 32 | 33 | ### Compiling ### 34 | 35 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system: 36 | * OpenCV library (tested with OpenCV-2.4.2) 37 | * ffmpeg library (tested with ffmpeg-0.11.1) 38 | 39 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories 40 | 41 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'. 42 | 43 | ### test video decoding ### 44 | 45 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization: 46 | 47 | ./release/Video your_video.avi 48 | 49 | If your video plays smoothly, congratulations! You are just one step before getting the features. 50 | 51 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web. 52 | 53 | ### compute features on a test video ### 54 | 55 | Once you are able to decode the video, computing our features is simple: 56 | 57 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz 58 | 59 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type: 60 | 61 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz 62 | 63 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same. 64 | 65 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness. 66 | 67 | There are more explanations about our features on the website, and also a list of FAQ. 68 | 69 | ### History ### 70 | 71 | * October 2013: improved_trajectory_release.tar.gz 72 | The code is an extension of dense_trajectory_release_v1.2.tar.gz 73 | 74 | ### Bugs and extensions ### 75 | 76 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well. 77 | 78 | http://lear.inrialpes.fr/people/wang/ 79 | 80 | ### LICENSE CONDITIONS ### 81 | 82 | Copyright (C) 2011 Heng Wang 83 | 84 | This program is free software; you can redistribute it and/or 85 | modify it under the terms of the GNU General Public License 86 | as published by the Free Software Foundation; either version 2 87 | of the License, or (at your option) any later version. 88 | 89 | This program is distributed in the hope that it will be useful, 90 | but WITHOUT ANY WARRANTY; without even the implied warranty of 91 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 92 | GNU General Public License for more details. 93 | 94 | You should have received a copy of the GNU General Public License 95 | along with this program; if not, write to the Free Software 96 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 97 | 98 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/Video.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | IplImage* image = 0; 17 | IplImage* prev_image = 0; 18 | CvCapture* capture = 0; 19 | 20 | int show = 1; 21 | 22 | int main( int argc, char** argv ) 23 | { 24 | int frameNum = 0; 25 | 26 | char* video = argv[1]; 27 | capture = cvCreateFileCapture(video); 28 | 29 | if( !capture ) { 30 | printf( "Could not initialize capturing..\n" ); 31 | return -1; 32 | } 33 | 34 | if( show == 1 ) 35 | cvNamedWindow( "Video", 0 ); 36 | 37 | while( true ) { 38 | IplImage* frame = 0; 39 | int i, j, c; 40 | 41 | // get a new frame 42 | frame = cvQueryFrame( capture ); 43 | if( !frame ) 44 | break; 45 | 46 | if( !image ) { 47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 ); 48 | image->origin = frame->origin; 49 | } 50 | 51 | cvCopy( frame, image, 0 ); 52 | 53 | if( show == 1 ) { 54 | cvShowImage( "Video", image); 55 | c = cvWaitKey(3); 56 | if((char)c == 27) break; 57 | } 58 | 59 | std::cerr << "The " << frameNum << "-th frame" << std::endl; 60 | frameNum++; 61 | } 62 | 63 | if( show == 1 ) 64 | cvDestroyWindow("Video"); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/batch_process_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from cffi import FFI 4 | import cv2 5 | 6 | from scipy.cluster.vq import kmeans,kmeans2,vq 7 | 8 | # For trajectory storage 9 | import h5py 10 | import uuid 11 | 12 | # OpenBLAS(used by OpenCV) changes CPU affinity 13 | os.sched_setaffinity(0,range(os.cpu_count())) 14 | def setaff(): 15 | os.sched_setaffinity(0,range(os.cpu_count())) 16 | 17 | 18 | # for Multi-threading 19 | from multiprocessing.dummy import Pool as ThreadPool 20 | pool = ThreadPool(5, setaff) 21 | 22 | 23 | 24 | # ======================================================================= 25 | def filter_trajs_displacement(trajs): 26 | num_trajs = len(trajs) 27 | disp_stor = np.empty((num_trajs,), np.float32) 28 | for ii in range(num_trajs): 29 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[1:,:]-trajs[0:-1,:])**2,1))) 30 | # Remove trajectories that have very low displacement 31 | good_trajs = np.flatnonzero(disp_stor>-1) 32 | 33 | return good_trajs 34 | 35 | 36 | # ======================================================================= 37 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids): 38 | num_trajs = len(trajs) 39 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32) 40 | disp_stor = np.empty((num_trajs,), np.float32) 41 | 42 | for ii in range(num_trajs): 43 | traj = trajs[ii,0:dec_frames,:] # n-by-2 44 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point 45 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1))) 46 | # Remove trajectories that have very low displacement 47 | good_trajs = np.flatnonzero(disp_stor>0.4) 48 | traj_vec_stor = traj_vec_stor[good_trajs,:] 49 | 50 | if traj_vec_stor.shape[0] < num_centroids: # too few points 51 | print("kmeans: TOO FEW USABLE KEYPOINTS") 52 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 53 | 54 | # k-means on vectors 55 | #num_centroids = 10 56 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 57 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100) 58 | 59 | # Find the nearest vectors to centroids 60 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 61 | 62 | rep = good_trajs[rep] 63 | 64 | return rep # return the index of K most representative trajectories 65 | 66 | # ========================================================================== 67 | 68 | # This time we don't do clustering 69 | # Setting parameters 70 | CLIP_LENGTH = 10 71 | 72 | 73 | # Load video... 74 | #for vid_idx in range(NUM_VIDEOS): 75 | def worker(idx): 76 | print("Processing %d/%d" % (idx, len(job_stor))) 77 | 78 | vid_id, frame_count, cam_name, start_frame = job_stor[idx] 79 | 80 | for ff in range(CLIP_LENGTH): 81 | img_path = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), cam_name, '{:06d}.png'.format(ff+start_frame)) 82 | img_data = cv2.imread(img_path) # h w c 83 | img_data = cv2.resize(img_data, (422,128)) 84 | img_data = img_data[:,83:339,:] 85 | 86 | #img_data = cv2.resize(img_data, dsize=None, fx=0.25, fy=0.25) 87 | if ff == 0: 88 | height = img_data.shape[0] 89 | width = img_data.shape[1] 90 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8) 91 | vid_seq[ff,:,:,:] = img_data 92 | 93 | # Calculate trajectories 94 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data) 95 | traj_ret = ffi.new("Ret[]", 1) 96 | # note that a lot more parameters can be modified in DenseTrackStab.cpp. 97 | libtest.main_like(vid_seq_cptr, img_data.shape[1], img_data.shape[0], CLIP_LENGTH, traj_ret) 98 | #print(traj_ret[0].traj_length) 99 | #print(traj_ret[0].num_trajs) 100 | #print(traj_ret[0].out_trajs[0]) 101 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32) 102 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2]) 103 | #print(trajs.shape) 104 | libtest.free_mem() 105 | 106 | #filtered_trajs = filter_trajs_kmeans(trajs, DEC_FRAMES, TRAJS_PER_VIDEO) 107 | filtered_trajs = filter_trajs_displacement(trajs) 108 | 109 | if len(filtered_trajs) == 0: 110 | print('No Trajectory detected!!!') 111 | else: 112 | # Write result to HDF5 113 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 114 | h5_kt_bc_traj = h5_kt_bc.require_dataset('%06d_%04d_%04d_%s' % (start_frame+1, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32') 115 | h5_kt_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:] 116 | h5_kt_bc_traj.attrs['VidNo'] = vid_id 117 | h5_kt_bc_traj.attrs['StartFrame'] = start_frame 118 | h5_kt_bc_traj.attrs['TrajLen'] = CLIP_LENGTH 119 | h5_kt_bc_traj.attrs['TrajCount'] = filtered_trajs.size 120 | h5_kt_bc_traj.attrs['CamName'] = cam_name 121 | h5_kt_bc_traj.attrs['VidResH'] = height 122 | h5_kt_bc_traj.attrs['VidResW'] = width 123 | f.flush() 124 | 125 | if __name__ == "__main__": 126 | # ======================================================================== 127 | # Load KITTI dataset 128 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences' # [EDIT ME!] 129 | def get_num(x): 130 | return int(''.join(ele for ele in x if ele.isdigit())) 131 | frame_count_stor = [] 132 | for vid_id in range(21): 133 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), 'image_2') 134 | video_file_list = os.listdir(vid_path_prefix) 135 | frame_count = 0 136 | for filename in video_file_list: 137 | frame_count = max(get_num(filename),frame_count) 138 | print('Video {}, {} frames'.format(vid_id, frame_count)) 139 | frame_count_stor.append(frame_count+1) # file name starts from 0 140 | # 16 / 5 split 141 | test_split = [15, 11, 7, 5, 4] 142 | train_split = list(set(range(21)) - set(test_split)) 143 | frame_count_stor_train = [frame_count_stor[x] for x in train_split] 144 | frame_count_stor_test = [frame_count_stor[x] for x in test_split] 145 | ##frame_count_stor_train_cumsum = np.cumsum(frame_count_stor_train) 146 | ##train_vid_probs = frame_count_stor_train_cumsum/frame_count_stor_train_cumsum[-1] 147 | ##print(train_vid_probs) 148 | 149 | ## Dense sampling procedure 150 | print('Dense sampling videos......') 151 | job_stor = [] 152 | for vid_id in train_split: # [EDIT ME!] you might want test_split 153 | frame_count = frame_count_stor[vid_id] 154 | for offset in range(0, frame_count - CLIP_LENGTH + 1, 1): 155 | job_stor.append((vid_id, frame_count, 'image_2', offset)) 156 | job_stor.append((vid_id, frame_count, 'image_3', offset)) 157 | 158 | print(len(job_stor)) 159 | 160 | 161 | # Load C extension...... 162 | ffi = FFI() 163 | ffi.cdef(''' 164 | typedef struct 165 | { 166 | int traj_length; 167 | int num_trajs; 168 | float* out_trajs; 169 | } Ret; 170 | 171 | void free_mem(); 172 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret); 173 | ''') 174 | libtest = ffi.dlopen("./release/DenseTrackStab") 175 | 176 | # Load HDF5 database...... 177 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!] this is the name of the produced file containing trajectories 178 | h5_kt = f.require_group("/KITTITraj") 179 | #h5_kt_bv = h5_pa.require_group("by_video") # /KITTITraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 180 | h5_kt_bc = h5_kt.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len) 181 | f.swmr_mode = True 182 | 183 | pool.map(worker, range(len(job_stor))) 184 | 185 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/make/dep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2009 Alexander Kl"aser 4 | # 5 | # This piece is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU General Public License 7 | # as published by the Free Software Foundation; either version 2 8 | # of the License, or (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | # 19 | # This software has been downloaded from: 20 | # http://lear.inrialpes.fr/people/klaeser/software 21 | # 22 | 23 | import sys 24 | import os 25 | import string 26 | import os.path 27 | import re 28 | 29 | HELP_USAGE = """ 30 | Usage: dep.py ... 31 | """ 32 | 33 | regSuffix = re.compile(r"\.[^.]*$") 34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$") 35 | regDep = re.compile(r"^.*\.d$") 36 | regDepSplit = re.compile(r"\s*\\*\s*") 37 | 38 | suffixes = ['.cpp', '.c', '.cc'] 39 | includeDirs = [] 40 | 41 | 42 | def parseDepFile(fileName): 43 | # read in the dependency file 44 | depFile = open(fileName, 'r') 45 | depStr = depFile.read() 46 | 47 | # discard everything up to the colon 48 | colonPos = depStr.find(":") 49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format" 50 | depStr = depStr[colonPos + 1:] 51 | 52 | # collect all included files 53 | return regDepSplit.split(depStr) 54 | 55 | 56 | def findSourceFile(headerFile): 57 | # get the basename without extension 58 | headerFile = regSuffix.sub('', headerFile) 59 | if not headerFile: 60 | return None 61 | 62 | # iterate over known suffixes 63 | for suffix in suffixes: 64 | srcFile = headerFile + suffix 65 | 66 | # check whether a source file corresponding to the header exists 67 | if os.path.exists(srcFile): 68 | return srcFile 69 | 70 | # we add to the file path directory by directory and check whether it 71 | # exists in one of the include directories 72 | i = headerFile.find('/') + 1 73 | if i != 1: 74 | i = 0 75 | while True: 76 | # check whether a source file exists in one of the given include dirs 77 | for dir in includeDirs: 78 | # check all suffixes for source files 79 | for suffix in suffixes: 80 | srcFile = os.path.join(dir, headerFile[i:] + suffix) 81 | #srcFile = os.path.abspath(srcFile) 82 | if os.path.exists(srcFile): 83 | return srcFile 84 | 85 | # find next position of '/' 86 | i = headerFile.find('/', i) + 1 87 | if i <= 0: 88 | break 89 | 90 | return None 91 | 92 | 93 | def main(argv): 94 | global includeDirs 95 | 96 | # check command line parameters 97 | if len(sys.argv) < 5: 98 | print HELP_USAGE 99 | return 100 | 101 | args = sys.argv 102 | args.pop(0) 103 | ruleTarget = args.pop(0) 104 | linkFile = args.pop(0) 105 | buildDir = args.pop(0) 106 | rootDepFile = args.pop(0) 107 | includeDirs = args 108 | 109 | 110 | # scan all dependency files for files we need to link to 111 | # do this recursively starting at the root dependency file 112 | linkFiles = set() 113 | incFiles = set() 114 | depFileStack = set([rootDepFile]) 115 | depFilesDone = set() 116 | while depFileStack: 117 | # get the next dependency file to process from the stack 118 | depFile = depFileStack.pop() 119 | if depFile in depFilesDone: 120 | continue 121 | depFilesDone.add(depFile) 122 | 123 | # iterate over all source files in the dependency file 124 | for nextFile in parseDepFile(depFile): 125 | newDepFile = "" 126 | 127 | # if we have a source file, we need to link against it 128 | if regSrc.match(nextFile): 129 | linkFiles.add(nextFile) 130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile) 131 | 132 | # check whether a .cpp/.c/.cc file exist 133 | srcFile = findSourceFile(nextFile) 134 | if srcFile != None: 135 | linkFiles.add(srcFile) 136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile) 137 | 138 | # if the corresponding .d file exists as parameter, add it to the stack 139 | if newDepFile and os.path.exists(newDepFile): 140 | depFileStack.add(newDepFile) 141 | 142 | # 143 | # generate all necessary rules 144 | # 145 | 146 | # all includes of dependency files 147 | for i in linkFiles: 148 | i = regSuffix.sub(".d", i) 149 | print "-include " + buildDir + "/" + i 150 | print 151 | 152 | # dependencies for link file 153 | print linkFile + ": \\" 154 | for i in linkFiles: 155 | i = regSuffix.sub(".d", i) 156 | print "\t" + buildDir + "/" + i + " \\" 157 | print 158 | 159 | # print out all files we need to link against 160 | print ruleTarget + ": " + linkFile + " \\" 161 | for i in linkFiles: 162 | i = regSuffix.sub(".o", i) 163 | print "\t" + buildDir + "/" + i + " \\" 164 | print 165 | 166 | 167 | if __name__ == "__main__": 168 | main( sys.argv ) 169 | 170 | 171 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/make/generic.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2009 Alexander Kl"aser 3 | # 4 | # This piece is free software; you can redistribute it and/or 5 | # modify it under the terms of the GNU General Public License 6 | # as published by the Free Software Foundation; either version 2 7 | # of the License, or (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | # 18 | # This software has been downloaded from: 19 | # http://lear.inrialpes.fr/people/klaeser/software 20 | # 21 | # 22 | # Variables that need to be set in the Makefile that includes this file: 23 | # TARGETS all files that are exectuables without there .cpp extension 24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build") 25 | # BINDIR dir where executables are linked to (optional, by default "bin") 26 | # SRCDIRS list of directories in which source files are located 27 | # this variable needs to be set if you do not have your source and 28 | # include files located in the same directory! 29 | # 30 | # Variables used for compiling/linking: 31 | # CXXFLAGS flags for compiling 32 | # LDFLAGS flags used for linking 33 | # LDLIBS list of libraries to be linked 34 | # CXX compiler linker (should be g++ by default) 35 | # 36 | 37 | # set paths for the dependency tool and gcc 38 | DEP = make/dep.py 39 | 40 | # set some standard directories in case they have not been set 41 | BUILDDIR ?= .build 42 | BINDIR ?= bin 43 | 44 | # all include files 45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l)) 46 | 47 | 48 | # 49 | # some general rules 50 | # 51 | 52 | .PHONY: all clean 53 | .PRECIOUS: $(BUILDDIR)/%.d 54 | 55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS))) 56 | @echo "=== done ===" 57 | 58 | $(INCLUDES): $(BUILDDIR) 59 | 60 | clean: 61 | @echo "=== cleaning up ===" 62 | @rm -rf $(BUILDDIR) 63 | 64 | $(BUILDDIR) $(BINDIR): 65 | @echo "=== creating directory: $@ ===" 66 | @mkdir -p $@ 67 | 68 | 69 | # 70 | # rules for creating dependency files 71 | # 72 | 73 | # dependencies of .cpp files on other files 74 | $(BUILDDIR)/%.d: %.cpp 75 | @echo "=== creating dependency file: $@ ===" 76 | @test -e $(dir $@) || mkdir -p $(dir $@) 77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $< 78 | 79 | # dependencies for the linking 80 | %.so.l %.l: %.d 81 | @echo "=== creating dependency file: $@ ===" 82 | @test -e $(dir $@) || mkdir -p $(dir $@) 83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@ 84 | 85 | 86 | # 87 | # rules for compiling and linking 88 | # (link dependencies are defined in .l files) 89 | # 90 | 91 | # compiling 92 | $(BUILDDIR)/%.o: %.cpp 93 | @echo "=== compiling: $@ ===" 94 | @test -e $(dir $@) || mkdir -p $(dir $@) 95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $< 96 | 97 | # linking for shared libraries 98 | $(BINDIR)/%.so: 99 | @echo "=== linking: $@ ===" 100 | @rm -f $@ 101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 102 | 103 | # linking 104 | $(BINDIR)/%: 105 | @echo "=== linking: $@ ===" 106 | @rm -f $@ 107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 108 | 109 | %: %.o 110 | %.h: ; 111 | %.hpp: ; 112 | %.c: ; 113 | %.cpp: ; 114 | 115 | 116 | # 117 | # include dependency files 118 | # 119 | 120 | ifneq ($(MAKECMDGOALS),clean) 121 | -include $(INCLUDES) 122 | endif 123 | -------------------------------------------------------------------------------- /offline_traj/for_KITTI/view_traj.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | # For trajectory storage 7 | import h5py 8 | 9 | # Setting parameters 10 | DATASET_DIR = '../../../dataset/Penn_Action' 11 | 12 | f = h5py.File("traj_stor.h5", 'r', libver='latest') 13 | # /PennActionTraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 14 | db = f["/PennActionTraj/by_video"] 15 | 16 | fig = plt.figure() 17 | 18 | for vid_name in db.keys(): 19 | for clip_name in db[vid_name].keys(): 20 | clip_start = db[vid_name][clip_name].attrs['StartFrame'] 21 | clip_len = db[vid_name][clip_name].attrs['TrajLen'] 22 | clip_num_trajs = db[vid_name][clip_name].attrs['TrajCount'] 23 | clip_traj_data = db[vid_name][clip_name] 24 | for ff in range(clip_len): 25 | plt.clf() 26 | img_path = os.path.join(DATASET_DIR, 'frames', vid_name, '%06d.jpg' % (ff+clip_start)) 27 | img_data = cv2.imread(img_path)[:,:,(2,1,0)] # h w c 28 | plt.imshow(img_data) 29 | for kk in range(clip_num_trajs): 30 | traj = clip_traj_data[kk,:,:] 31 | plt.scatter(traj[ff,0]*2, traj[ff,1]*2) 32 | fig.canvas.draw() 33 | plt.pause(0.001) 34 | #plt.waitforbuttonpress() 35 | #plt.show() 36 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/DenseTrackStab.h: -------------------------------------------------------------------------------- 1 | #ifndef DENSETRACKSTAB_H_ 2 | #define DENSETRACKSTAB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "opencv2/calib3d/calib3d.hpp" 20 | #include "opencv2/highgui/highgui.hpp" 21 | #include "opencv2/imgproc/imgproc.hpp" 22 | #include "opencv2/xfeatures2d.hpp" 23 | #include "opencv2/core/core.hpp" 24 | //#include "opencv2/nonfree/nonfree.hpp" 25 | 26 | using namespace cv; 27 | 28 | typedef struct 29 | { 30 | int traj_length; 31 | int num_trajs; 32 | float* out_trajs; 33 | } Ret; 34 | 35 | extern "C" void free_mem(); 36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret); 37 | 38 | int start_frame = 0; 39 | int end_frame = INT_MAX; 40 | int scale_num = 8; 41 | const float scale_stride = sqrt(2); 42 | char* bb_file = NULL; 43 | 44 | // parameters for descriptors 45 | int patch_size = 32; 46 | int nxy_cell = 2; 47 | int nt_cell = 3; 48 | float epsilon = 0.05; 49 | const float min_flow = 0.4; 50 | 51 | // parameters for tracking 52 | double quality = 0.001; 53 | int min_distance = 5; 54 | int init_gap = 1; 55 | int track_length = 15; 56 | 57 | // parameters for rejecting trajectory 58 | const float min_var = sqrt(3); 59 | const float max_var = 50; 60 | const float max_dis = 20; 61 | 62 | typedef struct { 63 | int x; // top left corner 64 | int y; 65 | int width; 66 | int height; 67 | }RectInfo; 68 | 69 | typedef struct { 70 | int width; // resolution of the video 71 | int height; 72 | int length; // number of frames 73 | }SeqInfo; 74 | 75 | typedef struct { 76 | int length; // length of the trajectory 77 | int gap; // initialization gap for feature re-sampling 78 | }TrackInfo; 79 | 80 | typedef struct { 81 | int nBins; // number of bins for vector quantization 82 | bool isHof; 83 | int nxCells; // number of cells in x direction 84 | int nyCells; 85 | int ntCells; 86 | int dim; // dimension of the descriptor 87 | int height; // size of the block for computing the descriptor 88 | int width; 89 | }DescInfo; 90 | 91 | // integral histogram for the descriptors 92 | typedef struct { 93 | int height; 94 | int width; 95 | int nBins; 96 | float* desc; 97 | }DescMat; 98 | 99 | class Track 100 | { 101 | public: 102 | std::vector point; 103 | std::vector disp; 104 | std::vector hog; 105 | std::vector hof; 106 | std::vector mbhX; 107 | std::vector mbhY; 108 | int index; 109 | 110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo, 111 | const DescInfo& hofInfo, const DescInfo& mbhInfo) 112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length), 113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length) 114 | { 115 | index = 0; 116 | point[0] = point_; 117 | } 118 | 119 | void addPoint(const Point2f& point_) 120 | { 121 | index++; 122 | point[index] = point_; 123 | } 124 | }; 125 | 126 | class BoundBox 127 | { 128 | public: 129 | Point2f TopLeft; 130 | Point2f BottomRight; 131 | float confidence; 132 | 133 | BoundBox(float a1, float a2, float a3, float a4, float a5) 134 | { 135 | TopLeft.x = a1; 136 | TopLeft.y = a2; 137 | BottomRight.x = a3; 138 | BottomRight.y = a4; 139 | confidence = a5; 140 | } 141 | }; 142 | 143 | class Frame 144 | { 145 | public: 146 | int frameID; 147 | std::vector BBs; 148 | 149 | Frame(const int& frame_) 150 | { 151 | frameID = frame_; 152 | BBs.clear(); 153 | } 154 | }; 155 | 156 | #endif /*DENSETRACKSTAB_H_*/ 157 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/Initialize.h: -------------------------------------------------------------------------------- 1 | #ifndef INITIALIZE_H_ 2 | #define INITIALIZE_H_ 3 | 4 | #include "DenseTrackStab.h" 5 | 6 | using namespace cv; 7 | 8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap) 9 | { 10 | trackInfo->length = track_length; 11 | trackInfo->gap = init_gap; 12 | } 13 | 14 | DescMat* InitDescMat(int height, int width, int nBins) 15 | { 16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat)); 17 | descMat->height = height; 18 | descMat->width = width; 19 | descMat->nBins = nBins; 20 | 21 | long size = height*width*nBins; 22 | descMat->desc = (float*)malloc(size*sizeof(float)); 23 | memset(descMat->desc, 0, size*sizeof(float)); 24 | return descMat; 25 | } 26 | 27 | void ReleDescMat(DescMat* descMat) 28 | { 29 | free(descMat->desc); 30 | free(descMat); 31 | } 32 | 33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell) 34 | { 35 | descInfo->nBins = nBins; 36 | descInfo->isHof = isHof; 37 | descInfo->nxCells = nxy_cell; 38 | descInfo->nyCells = nxy_cell; 39 | descInfo->ntCells = nt_cell; 40 | descInfo->dim = nBins*nxy_cell*nxy_cell; 41 | descInfo->height = size; 42 | descInfo->width = size; 43 | } 44 | 45 | void InitSeqInfo(SeqInfo* seqInfo, char* video) 46 | { 47 | VideoCapture capture; 48 | capture.open(video); 49 | 50 | if(!capture.isOpened()) 51 | fprintf(stderr, "Could not initialize capturing..\n"); 52 | 53 | // get the number of frames in the video 54 | int frame_num = 0; 55 | while(true) { 56 | Mat frame; 57 | capture >> frame; 58 | 59 | if(frame.empty()) 60 | break; 61 | 62 | if(frame_num == 0) { 63 | seqInfo->width = frame.cols; 64 | seqInfo->height = frame.rows; 65 | } 66 | 67 | frame_num++; 68 | } 69 | seqInfo->length = frame_num; 70 | } 71 | 72 | void usage() 73 | { 74 | fprintf(stderr, "Extract improved trajectories from a video\n\n"); 75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n"); 76 | fprintf(stderr, "Options:\n"); 77 | fprintf(stderr, " -h Display this message and exit\n"); 78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n"); 79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n"); 80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n"); 81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n"); 82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n"); 83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n"); 84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n"); 85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n"); 86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n"); 87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n"); 88 | } 89 | 90 | bool arg_parse(int argc, char** argv) 91 | { 92 | int c; 93 | bool flag = false; 94 | char* executable = basename(argv[0]); 95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1) 96 | switch(c) { 97 | case 'S': 98 | start_frame = atoi(optarg); 99 | flag = true; 100 | break; 101 | case 'E': 102 | end_frame = atoi(optarg); 103 | flag = true; 104 | break; 105 | case 'L': 106 | track_length = atoi(optarg); 107 | break; 108 | case 'W': 109 | min_distance = atoi(optarg); 110 | break; 111 | case 'N': 112 | patch_size = atoi(optarg); 113 | break; 114 | case 's': 115 | nxy_cell = atoi(optarg); 116 | break; 117 | case 't': 118 | nt_cell = atoi(optarg); 119 | break; 120 | case 'A': 121 | scale_num = atoi(optarg); 122 | break; 123 | case 'I': 124 | init_gap = atoi(optarg); 125 | break; 126 | case 'H': 127 | bb_file = optarg; 128 | break; 129 | case 'h': 130 | usage(); 131 | exit(0); 132 | break; 133 | 134 | default: 135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable ); 136 | abort(); 137 | } 138 | return flag; 139 | } 140 | 141 | #endif /*INITIALIZE_H_*/ 142 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/Makefile: -------------------------------------------------------------------------------- 1 | # set the binaries that have to be built 2 | TARGETS := DenseTrackStab Video 3 | 4 | # set the build configuration set 5 | BUILD := release 6 | #BUILD := debug 7 | 8 | # set bin and build dirs 9 | BUILDDIR := .build_$(BUILD) 10 | BINDIR := $(BUILD) 11 | 12 | # libraries 13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*))) 14 | LIBS := \ 15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \ 16 | avformat avdevice avutil avcodec swscale 17 | 18 | # set some flags and compiler/linker specific commands 19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include 20 | CXXFLAGS_debug := -ggdb 21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb 22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD)) 23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD)) 24 | LDFLAGS_debug := -ggdb 25 | LDFLAGS_release := -O3 -ggdb 26 | 27 | include make/generic.mk 28 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/README.md: -------------------------------------------------------------------------------- 1 | # NOTES ON USAGE 2 | For generating trajectories from video (Tuned for Robot Push dataset). 3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 4 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`. Train/test split hard-coded inside. 5 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file. 6 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified. 7 | 8 | **Warning: The code is provided in its original form without any cleanup.** 9 | 10 | # NOTES ON MODIFICATIONS 11 | Code originated from: 12 | http://lear.inrialpes.fr/people/wang/dense_trajectories 13 | ``` 14 | @inproceedings{wang:2011:inria-00583818:1, 15 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu}, 16 | TITLE = {{Action Recognition by Dense Trajectories}}, 17 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition}, 18 | YEAR = {2011}, 19 | MONTH = Jun, 20 | PAGES = {3169-3176}, 21 | ADDRESS = {Colorado Springs, United States}, 22 | URL = {http://hal.inria.fr/inria-00583818/en} 23 | } 24 | ``` 25 | - Modified to support more modern version of OpenCV 26 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction. 27 | - Converted stand-alone excutable to dynamic library for Python CFFI calling 28 | 29 | 30 | # Followings are the original README for Dense Trajectories 31 | 32 | 33 | ### Compiling ### 34 | 35 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system: 36 | * OpenCV library (tested with OpenCV-2.4.2) 37 | * ffmpeg library (tested with ffmpeg-0.11.1) 38 | 39 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories 40 | 41 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'. 42 | 43 | ### test video decoding ### 44 | 45 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization: 46 | 47 | ./release/Video your_video.avi 48 | 49 | If your video plays smoothly, congratulations! You are just one step before getting the features. 50 | 51 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web. 52 | 53 | ### compute features on a test video ### 54 | 55 | Once you are able to decode the video, computing our features is simple: 56 | 57 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz 58 | 59 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type: 60 | 61 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz 62 | 63 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same. 64 | 65 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness. 66 | 67 | There are more explanations about our features on the website, and also a list of FAQ. 68 | 69 | ### History ### 70 | 71 | * October 2013: improved_trajectory_release.tar.gz 72 | The code is an extension of dense_trajectory_release_v1.2.tar.gz 73 | 74 | ### Bugs and extensions ### 75 | 76 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well. 77 | 78 | http://lear.inrialpes.fr/people/wang/ 79 | 80 | ### LICENSE CONDITIONS ### 81 | 82 | Copyright (C) 2011 Heng Wang 83 | 84 | This program is free software; you can redistribute it and/or 85 | modify it under the terms of the GNU General Public License 86 | as published by the Free Software Foundation; either version 2 87 | of the License, or (at your option) any later version. 88 | 89 | This program is distributed in the hope that it will be useful, 90 | but WITHOUT ANY WARRANTY; without even the implied warranty of 91 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 92 | GNU General Public License for more details. 93 | 94 | You should have received a copy of the GNU General Public License 95 | along with this program; if not, write to the Free Software 96 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 97 | 98 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/Video.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | IplImage* image = 0; 17 | IplImage* prev_image = 0; 18 | CvCapture* capture = 0; 19 | 20 | int show = 1; 21 | 22 | int main( int argc, char** argv ) 23 | { 24 | int frameNum = 0; 25 | 26 | char* video = argv[1]; 27 | capture = cvCreateFileCapture(video); 28 | 29 | if( !capture ) { 30 | printf( "Could not initialize capturing..\n" ); 31 | return -1; 32 | } 33 | 34 | if( show == 1 ) 35 | cvNamedWindow( "Video", 0 ); 36 | 37 | while( true ) { 38 | IplImage* frame = 0; 39 | int i, j, c; 40 | 41 | // get a new frame 42 | frame = cvQueryFrame( capture ); 43 | if( !frame ) 44 | break; 45 | 46 | if( !image ) { 47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 ); 48 | image->origin = frame->origin; 49 | } 50 | 51 | cvCopy( frame, image, 0 ); 52 | 53 | if( show == 1 ) { 54 | cvShowImage( "Video", image); 55 | c = cvWaitKey(3); 56 | if((char)c == 27) break; 57 | } 58 | 59 | std::cerr << "The " << frameNum << "-th frame" << std::endl; 60 | frameNum++; 61 | } 62 | 63 | if( show == 1 ) 64 | cvDestroyWindow("Video"); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/batch_process_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from cffi import FFI 4 | import cv2 5 | import matplotlib.pyplot as plt 6 | 7 | from scipy.cluster.vq import kmeans,kmeans2,vq 8 | 9 | # For trajectory storage 10 | import h5py 11 | import uuid 12 | 13 | # For loading dataset MATLAB metadata 14 | import scipy.io as sio 15 | 16 | import random 17 | 18 | # for Multi-threading 19 | from multiprocessing.dummy import Pool as ThreadPool 20 | pool = ThreadPool(10) 21 | 22 | 23 | 24 | # ======================================================================= 25 | def filter_trajs_displacement(trajs): 26 | #print(trajs.shape) 27 | num_trajs = len(trajs) 28 | disp_stor = np.empty((num_trajs,), np.float32) 29 | for ii in range(num_trajs): 30 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[ii,1:,:]-trajs[ii,0:-1,:])**2,1))) 31 | # Remove trajectories that have very low displacement 32 | good_trajs = np.flatnonzero(disp_stor>3) 33 | 34 | return good_trajs 35 | 36 | 37 | # ======================================================================= 38 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids): 39 | num_trajs = len(trajs) 40 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32) 41 | disp_stor = np.empty((num_trajs,), np.float32) 42 | 43 | for ii in range(num_trajs): 44 | traj = trajs[ii,0:dec_frames,:] # n-by-2 45 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point 46 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1))) 47 | # Remove trajectories that have very low displacement 48 | good_trajs = np.flatnonzero(disp_stor>0.4) 49 | traj_vec_stor = traj_vec_stor[good_trajs,:] 50 | 51 | if traj_vec_stor.shape[0] < num_centroids: # too few points 52 | print("kmeans: TOO FEW USABLE KEYPOINTS") 53 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 54 | 55 | # k-means on vectors 56 | #num_centroids = 10 57 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 58 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100) 59 | 60 | # Find the nearest vectors to centroids 61 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 62 | 63 | rep = good_trajs[rep] 64 | 65 | return rep # return the index of K most representative trajectories 66 | 67 | # ========================================================================== 68 | 69 | # This time we don't do clustering 70 | # Setting parameters 71 | SAMPLES = 5000 72 | CLIP_LENGTH = 20 73 | ORIGINAL_WIDTH = 640 74 | ORIGINAL_HEIGHT = 512 75 | 76 | random.seed() 77 | 78 | # Load video... 79 | #for vid_idx in range(NUM_VIDEOS): 80 | def worker(idx): 81 | print("Processing %d/%d" % (idx, len(job_stor))) 82 | vid_id, start_frame = job_stor[idx] 83 | 84 | for fram_no in range(CLIP_LENGTH): 85 | img_id = fram_no + start_frame 86 | #print('push/push_train/{}/{}.jpg'.format(vid_id, img_id)) 87 | img = cv2.imdecode(h5f['push/push_train/{}/{}.jpg'.format(vid_id, img_id)][()], -1) 88 | img = cv2.resize(img, (240,192)) 89 | if fram_no == 0: 90 | height = img.shape[0] 91 | width = img.shape[1] 92 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8) 93 | vid_seq[fram_no,:,:,:] = img 94 | 95 | # Calculate trajectories 96 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data) 97 | traj_ret = ffi.new("Ret[]", 1) 98 | # note that a lot more parameters can be modified in DenseTrackStab.cpp. 99 | libtest.main_like(vid_seq_cptr, width, height, CLIP_LENGTH, traj_ret) 100 | #print(traj_ret[0].traj_length) 101 | #print(traj_ret[0].num_trajs) 102 | #print(traj_ret[0].out_trajs[0]) 103 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32) 104 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2]) 105 | #print(trajs.shape) 106 | libtest.free_mem() 107 | 108 | #filtered_trajs = filter_trajs_kmeans(trajs, DEC_FRAMES, TRAJS_PER_VIDEO) 109 | filtered_trajs = filter_trajs_displacement(trajs) 110 | 111 | if len(filtered_trajs) == 0: 112 | print('No Trajectory detected!!!') 113 | else: 114 | # Write result to HDF5 115 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 116 | h5_rp_bc_traj = h5_rp_bc.require_dataset('%06d_%04d_%04d_%04d_%s' % (vid_id, start_frame, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32') 117 | h5_rp_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:] 118 | h5_rp_bc_traj.attrs['VidId'] = vid_id 119 | h5_rp_bc_traj.attrs['StartFrame'] = start_frame 120 | h5_rp_bc_traj.attrs['TrajLen'] = CLIP_LENGTH 121 | h5_rp_bc_traj.attrs['TrajCount'] = filtered_trajs.size 122 | h5_rp_bc_traj.attrs['VidResH'] = height 123 | h5_rp_bc_traj.attrs['VidResW'] = width 124 | f.flush() 125 | 126 | if __name__ == "__main__": 127 | # ======================================================================== 128 | H5_PATH = '/media/haozekun/512SSD_2/robot_push_h5/robot_push_jpgs.h5' # [EDIT ME!] 129 | DATASET_PATH = 'push/push_train/' 130 | h5f = h5py.File(H5_PATH, 'r', libver='latest') 131 | video_count = h5f['push/push_train'].attrs['video_count'] # [EDIT ME!] push_test 132 | 133 | # Generating sample list... 134 | #video_list = random.sample(xrange(video_count), SAMPLES) 135 | print('Generating sample list...') 136 | job_stor = [] 137 | for vid_id in range(video_count): 138 | frame_count = h5f['push/push_train/{}'.format(vid_id)].attrs['frame_count'] # [EDIT ME!] push_test 139 | if frame_count < CLIP_LENGTH: 140 | continue 141 | start_frame = random.randint(0,frame_count-CLIP_LENGTH) 142 | job_stor.append((vid_id,start_frame)) 143 | print('{} samples generated...'.format(len(job_stor))) 144 | 145 | # Load C extension...... 146 | ffi = FFI() 147 | ffi.cdef(''' 148 | typedef struct 149 | { 150 | int traj_length; 151 | int num_trajs; 152 | float* out_trajs; 153 | } Ret; 154 | 155 | void free_mem(); 156 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret); 157 | ''') 158 | libtest = ffi.dlopen("./release/DenseTrackStab") 159 | 160 | # Load HDF5 database...... 161 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!] 162 | h5_rp = f.require_group("RPTraj") 163 | h5_rp_bc = h5_rp.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len) 164 | f.swmr_mode = True 165 | 166 | pool.map(worker, range(len(job_stor))) # sample 5000 clips each time 167 | #for ii in range(len(job_stor)): 168 | # worker(ii) 169 | 170 | print('Done!!!!') 171 | 172 | """ 173 | # Now we plot the trajectory out 174 | vid_h = height 175 | vid_w = width 176 | plt.figure() 177 | plt.ylim(vid_h, 0) 178 | plt.xlim(0, vid_w) 179 | for ii in range(trajs.shape[0]): 180 | plt.plot(trajs[ii,:,0], trajs[ii,:,1]) 181 | 182 | plt.figure() 183 | plt.imshow(vid_seq[0,:,:,:]) 184 | plt.ylim(vid_h, 0) 185 | plt.xlim(0, vid_w) 186 | for topk in range(12): # plot top-12 trajectories 187 | traj = trajs[filtered_trajs[topk],:,:] 188 | #plt.plot(traj[0:4,0], traj[0:4,1]) 189 | plt.plot(traj[:,0], traj[:,1]) 190 | plt.show() 191 | """ 192 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/make/dep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2009 Alexander Kl"aser 4 | # 5 | # This piece is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU General Public License 7 | # as published by the Free Software Foundation; either version 2 8 | # of the License, or (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | # 19 | # This software has been downloaded from: 20 | # http://lear.inrialpes.fr/people/klaeser/software 21 | # 22 | 23 | import sys 24 | import os 25 | import string 26 | import os.path 27 | import re 28 | 29 | HELP_USAGE = """ 30 | Usage: dep.py ... 31 | """ 32 | 33 | regSuffix = re.compile(r"\.[^.]*$") 34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$") 35 | regDep = re.compile(r"^.*\.d$") 36 | regDepSplit = re.compile(r"\s*\\*\s*") 37 | 38 | suffixes = ['.cpp', '.c', '.cc'] 39 | includeDirs = [] 40 | 41 | 42 | def parseDepFile(fileName): 43 | # read in the dependency file 44 | depFile = open(fileName, 'r') 45 | depStr = depFile.read() 46 | 47 | # discard everything up to the colon 48 | colonPos = depStr.find(":") 49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format" 50 | depStr = depStr[colonPos + 1:] 51 | 52 | # collect all included files 53 | return regDepSplit.split(depStr) 54 | 55 | 56 | def findSourceFile(headerFile): 57 | # get the basename without extension 58 | headerFile = regSuffix.sub('', headerFile) 59 | if not headerFile: 60 | return None 61 | 62 | # iterate over known suffixes 63 | for suffix in suffixes: 64 | srcFile = headerFile + suffix 65 | 66 | # check whether a source file corresponding to the header exists 67 | if os.path.exists(srcFile): 68 | return srcFile 69 | 70 | # we add to the file path directory by directory and check whether it 71 | # exists in one of the include directories 72 | i = headerFile.find('/') + 1 73 | if i != 1: 74 | i = 0 75 | while True: 76 | # check whether a source file exists in one of the given include dirs 77 | for dir in includeDirs: 78 | # check all suffixes for source files 79 | for suffix in suffixes: 80 | srcFile = os.path.join(dir, headerFile[i:] + suffix) 81 | #srcFile = os.path.abspath(srcFile) 82 | if os.path.exists(srcFile): 83 | return srcFile 84 | 85 | # find next position of '/' 86 | i = headerFile.find('/', i) + 1 87 | if i <= 0: 88 | break 89 | 90 | return None 91 | 92 | 93 | def main(argv): 94 | global includeDirs 95 | 96 | # check command line parameters 97 | if len(sys.argv) < 5: 98 | print HELP_USAGE 99 | return 100 | 101 | args = sys.argv 102 | args.pop(0) 103 | ruleTarget = args.pop(0) 104 | linkFile = args.pop(0) 105 | buildDir = args.pop(0) 106 | rootDepFile = args.pop(0) 107 | includeDirs = args 108 | 109 | 110 | # scan all dependency files for files we need to link to 111 | # do this recursively starting at the root dependency file 112 | linkFiles = set() 113 | incFiles = set() 114 | depFileStack = set([rootDepFile]) 115 | depFilesDone = set() 116 | while depFileStack: 117 | # get the next dependency file to process from the stack 118 | depFile = depFileStack.pop() 119 | if depFile in depFilesDone: 120 | continue 121 | depFilesDone.add(depFile) 122 | 123 | # iterate over all source files in the dependency file 124 | for nextFile in parseDepFile(depFile): 125 | newDepFile = "" 126 | 127 | # if we have a source file, we need to link against it 128 | if regSrc.match(nextFile): 129 | linkFiles.add(nextFile) 130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile) 131 | 132 | # check whether a .cpp/.c/.cc file exist 133 | srcFile = findSourceFile(nextFile) 134 | if srcFile != None: 135 | linkFiles.add(srcFile) 136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile) 137 | 138 | # if the corresponding .d file exists as parameter, add it to the stack 139 | if newDepFile and os.path.exists(newDepFile): 140 | depFileStack.add(newDepFile) 141 | 142 | # 143 | # generate all necessary rules 144 | # 145 | 146 | # all includes of dependency files 147 | for i in linkFiles: 148 | i = regSuffix.sub(".d", i) 149 | print "-include " + buildDir + "/" + i 150 | print 151 | 152 | # dependencies for link file 153 | print linkFile + ": \\" 154 | for i in linkFiles: 155 | i = regSuffix.sub(".d", i) 156 | print "\t" + buildDir + "/" + i + " \\" 157 | print 158 | 159 | # print out all files we need to link against 160 | print ruleTarget + ": " + linkFile + " \\" 161 | for i in linkFiles: 162 | i = regSuffix.sub(".o", i) 163 | print "\t" + buildDir + "/" + i + " \\" 164 | print 165 | 166 | 167 | if __name__ == "__main__": 168 | main( sys.argv ) 169 | 170 | 171 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/make/generic.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2009 Alexander Kl"aser 3 | # 4 | # This piece is free software; you can redistribute it and/or 5 | # modify it under the terms of the GNU General Public License 6 | # as published by the Free Software Foundation; either version 2 7 | # of the License, or (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | # 18 | # This software has been downloaded from: 19 | # http://lear.inrialpes.fr/people/klaeser/software 20 | # 21 | # 22 | # Variables that need to be set in the Makefile that includes this file: 23 | # TARGETS all files that are exectuables without there .cpp extension 24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build") 25 | # BINDIR dir where executables are linked to (optional, by default "bin") 26 | # SRCDIRS list of directories in which source files are located 27 | # this variable needs to be set if you do not have your source and 28 | # include files located in the same directory! 29 | # 30 | # Variables used for compiling/linking: 31 | # CXXFLAGS flags for compiling 32 | # LDFLAGS flags used for linking 33 | # LDLIBS list of libraries to be linked 34 | # CXX compiler linker (should be g++ by default) 35 | # 36 | 37 | # set paths for the dependency tool and gcc 38 | DEP = make/dep.py 39 | 40 | # set some standard directories in case they have not been set 41 | BUILDDIR ?= .build 42 | BINDIR ?= bin 43 | 44 | # all include files 45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l)) 46 | 47 | 48 | # 49 | # some general rules 50 | # 51 | 52 | .PHONY: all clean 53 | .PRECIOUS: $(BUILDDIR)/%.d 54 | 55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS))) 56 | @echo "=== done ===" 57 | 58 | $(INCLUDES): $(BUILDDIR) 59 | 60 | clean: 61 | @echo "=== cleaning up ===" 62 | @rm -rf $(BUILDDIR) 63 | 64 | $(BUILDDIR) $(BINDIR): 65 | @echo "=== creating directory: $@ ===" 66 | @mkdir -p $@ 67 | 68 | 69 | # 70 | # rules for creating dependency files 71 | # 72 | 73 | # dependencies of .cpp files on other files 74 | $(BUILDDIR)/%.d: %.cpp 75 | @echo "=== creating dependency file: $@ ===" 76 | @test -e $(dir $@) || mkdir -p $(dir $@) 77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $< 78 | 79 | # dependencies for the linking 80 | %.so.l %.l: %.d 81 | @echo "=== creating dependency file: $@ ===" 82 | @test -e $(dir $@) || mkdir -p $(dir $@) 83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@ 84 | 85 | 86 | # 87 | # rules for compiling and linking 88 | # (link dependencies are defined in .l files) 89 | # 90 | 91 | # compiling 92 | $(BUILDDIR)/%.o: %.cpp 93 | @echo "=== compiling: $@ ===" 94 | @test -e $(dir $@) || mkdir -p $(dir $@) 95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $< 96 | 97 | # linking for shared libraries 98 | $(BINDIR)/%.so: 99 | @echo "=== linking: $@ ===" 100 | @rm -f $@ 101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 102 | 103 | # linking 104 | $(BINDIR)/%: 105 | @echo "=== linking: $@ ===" 106 | @rm -f $@ 107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 108 | 109 | %: %.o 110 | %.h: ; 111 | %.hpp: ; 112 | %.c: ; 113 | %.cpp: ; 114 | 115 | 116 | # 117 | # include dependency files 118 | # 119 | 120 | ifneq ($(MAKECMDGOALS),clean) 121 | -include $(INCLUDES) 122 | endif 123 | -------------------------------------------------------------------------------- /offline_traj/for_RobotPush/view_traj.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | # For trajectory storage 7 | import h5py 8 | 9 | # Setting parameters 10 | TRAJ_H5_PATH = 'traj_stor_train.h5' 11 | JPG_H5_PATH = '/media/haozekun/512SSD_2/robot_push_h5/robot_push_jpgs.h5' 12 | 13 | f_traj = h5py.File(TRAJ_H5_PATH, 'r', libver='latest') 14 | db_traj = f_traj["/RPTraj/by_clip"] 15 | 16 | f_jpg = h5py.File(JPG_H5_PATH, 'r', libver='latest') 17 | #f_jpg = f_jpg['push/push_train/'] 18 | 19 | fig = plt.figure() 20 | 21 | for clip_name in db_traj.keys(): 22 | video_id = db_traj[clip_name].attrs['VidId'] 23 | clip_start = db_traj[clip_name].attrs['StartFrame'] 24 | clip_len = db_traj[clip_name].attrs['TrajLen'] 25 | clip_num_trajs = db_traj[clip_name].attrs['TrajCount'] 26 | clip_traj_data = db_traj[clip_name] 27 | 28 | for ff in range(clip_len): 29 | plt.clf() 30 | img_id = ff + clip_start 31 | img_data = cv2.imdecode(f_jpg['push/push_train/{}/{}.jpg'.format(video_id, img_id)][()], -1) 32 | img_data = cv2.resize(img_data, (240,192)) 33 | img_data = img_data[:,:,(2,1,0)] # h w c 34 | 35 | plt.imshow(img_data) 36 | for kk in range(clip_num_trajs): 37 | traj = clip_traj_data[kk,:,:] 38 | plt.scatter(traj[ff,0], traj[ff,1]) 39 | fig.canvas.draw() 40 | plt.pause(0.001) 41 | #plt.waitforbuttonpress() 42 | #plt.show() 43 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/DenseTrackStab.h: -------------------------------------------------------------------------------- 1 | #ifndef DENSETRACKSTAB_H_ 2 | #define DENSETRACKSTAB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "opencv2/calib3d/calib3d.hpp" 20 | #include "opencv2/highgui/highgui.hpp" 21 | #include "opencv2/imgproc/imgproc.hpp" 22 | #include "opencv2/xfeatures2d.hpp" 23 | #include "opencv2/core/core.hpp" 24 | //#include "opencv2/nonfree/nonfree.hpp" 25 | 26 | using namespace cv; 27 | 28 | typedef struct 29 | { 30 | int traj_length; 31 | int num_trajs; 32 | float* out_trajs; 33 | } Ret; 34 | 35 | extern "C" void free_mem(); 36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret); 37 | 38 | int start_frame = 0; 39 | int end_frame = INT_MAX; 40 | int scale_num = 8; 41 | const float scale_stride = sqrt(2); 42 | char* bb_file = NULL; 43 | 44 | // parameters for descriptors 45 | int patch_size = 32; 46 | int nxy_cell = 2; 47 | int nt_cell = 3; 48 | float epsilon = 0.05; 49 | const float min_flow = 0.4; 50 | 51 | // parameters for tracking 52 | double quality = 0.001; 53 | int min_distance = 5; 54 | int init_gap = 1; 55 | int track_length = 15; 56 | 57 | // parameters for rejecting trajectory 58 | const float min_var = sqrt(3); 59 | const float max_var = 50; 60 | const float max_dis = 20; 61 | 62 | typedef struct { 63 | int x; // top left corner 64 | int y; 65 | int width; 66 | int height; 67 | }RectInfo; 68 | 69 | typedef struct { 70 | int width; // resolution of the video 71 | int height; 72 | int length; // number of frames 73 | }SeqInfo; 74 | 75 | typedef struct { 76 | int length; // length of the trajectory 77 | int gap; // initialization gap for feature re-sampling 78 | }TrackInfo; 79 | 80 | typedef struct { 81 | int nBins; // number of bins for vector quantization 82 | bool isHof; 83 | int nxCells; // number of cells in x direction 84 | int nyCells; 85 | int ntCells; 86 | int dim; // dimension of the descriptor 87 | int height; // size of the block for computing the descriptor 88 | int width; 89 | }DescInfo; 90 | 91 | // integral histogram for the descriptors 92 | typedef struct { 93 | int height; 94 | int width; 95 | int nBins; 96 | float* desc; 97 | }DescMat; 98 | 99 | class Track 100 | { 101 | public: 102 | std::vector point; 103 | std::vector disp; 104 | std::vector hog; 105 | std::vector hof; 106 | std::vector mbhX; 107 | std::vector mbhY; 108 | int index; 109 | 110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo, 111 | const DescInfo& hofInfo, const DescInfo& mbhInfo) 112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length), 113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length) 114 | { 115 | index = 0; 116 | point[0] = point_; 117 | } 118 | 119 | void addPoint(const Point2f& point_) 120 | { 121 | index++; 122 | point[index] = point_; 123 | } 124 | }; 125 | 126 | class BoundBox 127 | { 128 | public: 129 | Point2f TopLeft; 130 | Point2f BottomRight; 131 | float confidence; 132 | 133 | BoundBox(float a1, float a2, float a3, float a4, float a5) 134 | { 135 | TopLeft.x = a1; 136 | TopLeft.y = a2; 137 | BottomRight.x = a3; 138 | BottomRight.y = a4; 139 | confidence = a5; 140 | } 141 | }; 142 | 143 | class Frame 144 | { 145 | public: 146 | int frameID; 147 | std::vector BBs; 148 | 149 | Frame(const int& frame_) 150 | { 151 | frameID = frame_; 152 | BBs.clear(); 153 | } 154 | }; 155 | 156 | #endif /*DENSETRACKSTAB_H_*/ 157 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/Initialize.h: -------------------------------------------------------------------------------- 1 | #ifndef INITIALIZE_H_ 2 | #define INITIALIZE_H_ 3 | 4 | #include "DenseTrackStab.h" 5 | 6 | using namespace cv; 7 | 8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap) 9 | { 10 | trackInfo->length = track_length; 11 | trackInfo->gap = init_gap; 12 | } 13 | 14 | DescMat* InitDescMat(int height, int width, int nBins) 15 | { 16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat)); 17 | descMat->height = height; 18 | descMat->width = width; 19 | descMat->nBins = nBins; 20 | 21 | long size = height*width*nBins; 22 | descMat->desc = (float*)malloc(size*sizeof(float)); 23 | memset(descMat->desc, 0, size*sizeof(float)); 24 | return descMat; 25 | } 26 | 27 | void ReleDescMat(DescMat* descMat) 28 | { 29 | free(descMat->desc); 30 | free(descMat); 31 | } 32 | 33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell) 34 | { 35 | descInfo->nBins = nBins; 36 | descInfo->isHof = isHof; 37 | descInfo->nxCells = nxy_cell; 38 | descInfo->nyCells = nxy_cell; 39 | descInfo->ntCells = nt_cell; 40 | descInfo->dim = nBins*nxy_cell*nxy_cell; 41 | descInfo->height = size; 42 | descInfo->width = size; 43 | } 44 | 45 | void InitSeqInfo(SeqInfo* seqInfo, char* video) 46 | { 47 | VideoCapture capture; 48 | capture.open(video); 49 | 50 | if(!capture.isOpened()) 51 | fprintf(stderr, "Could not initialize capturing..\n"); 52 | 53 | // get the number of frames in the video 54 | int frame_num = 0; 55 | while(true) { 56 | Mat frame; 57 | capture >> frame; 58 | 59 | if(frame.empty()) 60 | break; 61 | 62 | if(frame_num == 0) { 63 | seqInfo->width = frame.cols; 64 | seqInfo->height = frame.rows; 65 | } 66 | 67 | frame_num++; 68 | } 69 | seqInfo->length = frame_num; 70 | } 71 | 72 | void usage() 73 | { 74 | fprintf(stderr, "Extract improved trajectories from a video\n\n"); 75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n"); 76 | fprintf(stderr, "Options:\n"); 77 | fprintf(stderr, " -h Display this message and exit\n"); 78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n"); 79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n"); 80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n"); 81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n"); 82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n"); 83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n"); 84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n"); 85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n"); 86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n"); 87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n"); 88 | } 89 | 90 | bool arg_parse(int argc, char** argv) 91 | { 92 | int c; 93 | bool flag = false; 94 | char* executable = basename(argv[0]); 95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1) 96 | switch(c) { 97 | case 'S': 98 | start_frame = atoi(optarg); 99 | flag = true; 100 | break; 101 | case 'E': 102 | end_frame = atoi(optarg); 103 | flag = true; 104 | break; 105 | case 'L': 106 | track_length = atoi(optarg); 107 | break; 108 | case 'W': 109 | min_distance = atoi(optarg); 110 | break; 111 | case 'N': 112 | patch_size = atoi(optarg); 113 | break; 114 | case 's': 115 | nxy_cell = atoi(optarg); 116 | break; 117 | case 't': 118 | nt_cell = atoi(optarg); 119 | break; 120 | case 'A': 121 | scale_num = atoi(optarg); 122 | break; 123 | case 'I': 124 | init_gap = atoi(optarg); 125 | break; 126 | case 'H': 127 | bb_file = optarg; 128 | break; 129 | case 'h': 130 | usage(); 131 | exit(0); 132 | break; 133 | 134 | default: 135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable ); 136 | abort(); 137 | } 138 | return flag; 139 | } 140 | 141 | #endif /*INITIALIZE_H_*/ 142 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/Makefile: -------------------------------------------------------------------------------- 1 | # set the binaries that have to be built 2 | TARGETS := DenseTrackStab Video 3 | 4 | # set the build configuration set 5 | BUILD := release 6 | #BUILD := debug 7 | 8 | # set bin and build dirs 9 | BUILDDIR := .build_$(BUILD) 10 | BINDIR := $(BUILD) 11 | 12 | # libraries 13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*))) 14 | LIBS := \ 15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \ 16 | avformat avdevice avutil avcodec swscale 17 | 18 | # set some flags and compiler/linker specific commands 19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include 20 | CXXFLAGS_debug := -ggdb 21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb 22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD)) 23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD)) 24 | LDFLAGS_debug := -ggdb 25 | LDFLAGS_release := -O3 -ggdb 26 | 27 | include make/generic.mk 28 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/README.md: -------------------------------------------------------------------------------- 1 | # NOTES ON USAGE 2 | For generating trajectories from video (Tuned for UCF-101 dataset). 3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18. 4 | - **testlist01.txt** & **trainlist01.txt**: Train & test split we used. 5 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`. 6 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file. 7 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified. 8 | 9 | **Warning: The code is provided in its original form without any cleanup.** 10 | 11 | # NOTES ON MODIFICATIONS 12 | Code originated from: 13 | http://lear.inrialpes.fr/people/wang/dense_trajectories 14 | ``` 15 | @inproceedings{wang:2011:inria-00583818:1, 16 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu}, 17 | TITLE = {{Action Recognition by Dense Trajectories}}, 18 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition}, 19 | YEAR = {2011}, 20 | MONTH = Jun, 21 | PAGES = {3169-3176}, 22 | ADDRESS = {Colorado Springs, United States}, 23 | URL = {http://hal.inria.fr/inria-00583818/en} 24 | } 25 | ``` 26 | - Modified to support more modern version of OpenCV 27 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction. 28 | - Converted stand-alone excutable to dynamic library for Python CFFI calling 29 | 30 | 31 | # Followings are the original README for Dense Trajectories 32 | 33 | 34 | ### Compiling ### 35 | 36 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system: 37 | * OpenCV library (tested with OpenCV-2.4.2) 38 | * ffmpeg library (tested with ffmpeg-0.11.1) 39 | 40 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories 41 | 42 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'. 43 | 44 | ### test video decoding ### 45 | 46 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization: 47 | 48 | ./release/Video your_video.avi 49 | 50 | If your video plays smoothly, congratulations! You are just one step before getting the features. 51 | 52 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web. 53 | 54 | ### compute features on a test video ### 55 | 56 | Once you are able to decode the video, computing our features is simple: 57 | 58 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz 59 | 60 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type: 61 | 62 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz 63 | 64 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same. 65 | 66 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness. 67 | 68 | There are more explanations about our features on the website, and also a list of FAQ. 69 | 70 | ### History ### 71 | 72 | * October 2013: improved_trajectory_release.tar.gz 73 | The code is an extension of dense_trajectory_release_v1.2.tar.gz 74 | 75 | ### Bugs and extensions ### 76 | 77 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well. 78 | 79 | http://lear.inrialpes.fr/people/wang/ 80 | 81 | ### LICENSE CONDITIONS ### 82 | 83 | Copyright (C) 2011 Heng Wang 84 | 85 | This program is free software; you can redistribute it and/or 86 | modify it under the terms of the GNU General Public License 87 | as published by the Free Software Foundation; either version 2 88 | of the License, or (at your option) any later version. 89 | 90 | This program is distributed in the hope that it will be useful, 91 | but WITHOUT ANY WARRANTY; without even the implied warranty of 92 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 93 | GNU General Public License for more details. 94 | 95 | You should have received a copy of the GNU General Public License 96 | along with this program; if not, write to the Free Software 97 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 98 | 99 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/Video.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | IplImage* image = 0; 17 | IplImage* prev_image = 0; 18 | CvCapture* capture = 0; 19 | 20 | int show = 1; 21 | 22 | int main( int argc, char** argv ) 23 | { 24 | int frameNum = 0; 25 | 26 | char* video = argv[1]; 27 | capture = cvCreateFileCapture(video); 28 | 29 | if( !capture ) { 30 | printf( "Could not initialize capturing..\n" ); 31 | return -1; 32 | } 33 | 34 | if( show == 1 ) 35 | cvNamedWindow( "Video", 0 ); 36 | 37 | while( true ) { 38 | IplImage* frame = 0; 39 | int i, j, c; 40 | 41 | // get a new frame 42 | frame = cvQueryFrame( capture ); 43 | if( !frame ) 44 | break; 45 | 46 | if( !image ) { 47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 ); 48 | image->origin = frame->origin; 49 | } 50 | 51 | cvCopy( frame, image, 0 ); 52 | 53 | if( show == 1 ) { 54 | cvShowImage( "Video", image); 55 | c = cvWaitKey(3); 56 | if((char)c == 27) break; 57 | } 58 | 59 | std::cerr << "The " << frameNum << "-th frame" << std::endl; 60 | frameNum++; 61 | } 62 | 63 | if( show == 1 ) 64 | cvDestroyWindow("Video"); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/batch_process_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from cffi import FFI 4 | import cv2 5 | 6 | from scipy.cluster.vq import kmeans,kmeans2,vq 7 | 8 | # For trajectory storage 9 | import h5py 10 | import uuid 11 | 12 | import re 13 | 14 | # OpenBLAS affects CPU affinity 15 | os.sched_setaffinity(0,range(os.cpu_count())) 16 | def setaff(): 17 | os.sched_setaffinity(0,range(os.cpu_count())) 18 | 19 | # for Multi-threading 20 | from multiprocessing.dummy import Pool as ThreadPool 21 | pool = ThreadPool(5, setaff) 22 | 23 | 24 | # ======================================================================= 25 | def filter_trajs_displacement(trajs): 26 | #print(trajs.shape) 27 | num_trajs = len(trajs) 28 | disp_stor = np.empty((num_trajs,), np.float32) 29 | for ii in range(num_trajs): 30 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[ii,1:,:]-trajs[ii,0:-1,:])**2,1))) 31 | # Remove trajectories that have very low displacement 32 | good_trajs = np.flatnonzero(disp_stor>5) 33 | 34 | return good_trajs 35 | 36 | 37 | # ======================================================================= 38 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids): 39 | num_trajs = len(trajs) 40 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32) 41 | disp_stor = np.empty((num_trajs,), np.float32) 42 | 43 | for ii in range(num_trajs): 44 | traj = trajs[ii,0:dec_frames,:] # n-by-2 45 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point 46 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1))) 47 | # Remove trajectories that have very low displacement 48 | good_trajs = np.flatnonzero(disp_stor>0.4) 49 | traj_vec_stor = traj_vec_stor[good_trajs,:] 50 | 51 | if traj_vec_stor.shape[0] < num_centroids: # too few points 52 | print("kmeans: TOO FEW USABLE KEYPOINTS") 53 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them 54 | 55 | # k-means on vectors 56 | #num_centroids = 10 57 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100) 58 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100) 59 | 60 | # Find the nearest vectors to centroids 61 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim 62 | 63 | rep = good_trajs[rep] 64 | 65 | return rep # return the index of K most representative trajectories 66 | 67 | # ========================================================================== 68 | 69 | CLIP_LENGTH = 11 70 | 71 | # Load video... 72 | #for vid_idx in range(NUM_VIDEOS): 73 | def worker(idx): 74 | print("Processing %d/%d" % (idx, len(job_stor))) 75 | video_path, length, offset = job_stor[idx] 76 | 77 | #start_frame = random.randint(0,length-CLIP_LENGTH+1-1) 78 | start_frame = offset 79 | for fram_no in range(CLIP_LENGTH): 80 | frame = cv2.imread(video_path+'/'+str(start_frame+fram_no)+'.jpg') 81 | img = cv2.resize(frame, (256,192), interpolation=cv2.INTER_AREA) 82 | if fram_no == 0: 83 | height = img.shape[0] 84 | width = img.shape[1] 85 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8) 86 | vid_seq[fram_no,:,:,:] = img[:,:,:] 87 | 88 | # Calculate trajectories 89 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data) 90 | traj_ret = ffi.new("Ret[]", 1) 91 | # note that a lot more parameters are hard-coded in DenseTrackStab.cpp due to laziness. 92 | libtest.main_like(vid_seq_cptr, width, height, CLIP_LENGTH, traj_ret) 93 | #print(traj_ret[0].traj_length) 94 | #print(traj_ret[0].num_trajs) 95 | #print(traj_ret[0].out_trajs[0]) 96 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32) 97 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2]) 98 | #print(trajs.shape) 99 | libtest.free_mem() 100 | 101 | #filtered_trajs = filter_trajs_kmeans(trajs, 15, 10) 102 | filtered_trajs = filter_trajs_displacement(trajs) 103 | 104 | if len(filtered_trajs) == 0: 105 | print('No Trajectory detected!!!') 106 | else: 107 | # Write result to HDF5 108 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 109 | h5_ucf_bc_traj = h5_ucf_bc.require_dataset('%06d_%04d_%04d_%s' % (start_frame+1, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32') 110 | h5_ucf_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:] 111 | h5_ucf_bc_traj.attrs['VidPath'] = video_path 112 | h5_ucf_bc_traj.attrs['StartFrame'] = start_frame 113 | h5_ucf_bc_traj.attrs['TrajLen'] = CLIP_LENGTH 114 | h5_ucf_bc_traj.attrs['TrajCount'] = filtered_trajs.size 115 | h5_ucf_bc_traj.attrs['VidResH'] = height 116 | h5_ucf_bc_traj.attrs['VidResW'] = width 117 | f.flush() 118 | 119 | if __name__ == "__main__": 120 | # ======================================================================== 121 | # Load UCF101 dataset 122 | DATASET_DIR = '/media/haozekun/512SSD_2/UCF101_seq/UCF-101' # [EDIT ME!] 123 | 124 | # Load split file: 125 | f = open('trainlist01.txt','r') # Sample: ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi # [EDIT ME!] 126 | job_stor = [] 127 | for line in f: 128 | vid_name = line.split()[0] 129 | video_path = os.path.join(DATASET_DIR, vid_name) 130 | img_list = os.listdir(video_path) 131 | frame_count = 0 132 | for filename in img_list: 133 | frame_count = max(frame_count, int(filename.split('.')[0])) 134 | frame_count += 1 135 | for offset in range(0, frame_count - CLIP_LENGTH + 1, 8): # Stride = 8 136 | job_stor.append((video_path, frame_count, offset)) 137 | f.close() 138 | 139 | print('Job count: {:d}'.format(len(job_stor))) # 13320, or 9537 140 | 141 | # Load C extension...... 142 | ffi = FFI() 143 | ffi.cdef(''' 144 | typedef struct 145 | { 146 | int traj_length; 147 | int num_trajs; 148 | float* out_trajs; 149 | } Ret; 150 | W 151 | void free_mem(); 152 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret); 153 | ''') 154 | libtest = ffi.dlopen("./release/DenseTrackStab") 155 | 156 | # Load HDF5 database...... 157 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!] 158 | h5_ucf = f.require_group("UCFTraj") 159 | #h5_kt_bv = h5_pa.require_group("by_video") # /KITTITraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 160 | h5_ucf_bc = h5_ucf.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len) 161 | f.swmr_mode = True 162 | 163 | pool.map(worker, range(len(job_stor))) 164 | 165 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/make/dep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2009 Alexander Kl"aser 4 | # 5 | # This piece is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU General Public License 7 | # as published by the Free Software Foundation; either version 2 8 | # of the License, or (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | # 19 | # This software has been downloaded from: 20 | # http://lear.inrialpes.fr/people/klaeser/software 21 | # 22 | 23 | import sys 24 | import os 25 | import string 26 | import os.path 27 | import re 28 | 29 | HELP_USAGE = """ 30 | Usage: dep.py ... 31 | """ 32 | 33 | regSuffix = re.compile(r"\.[^.]*$") 34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$") 35 | regDep = re.compile(r"^.*\.d$") 36 | regDepSplit = re.compile(r"\s*\\*\s*") 37 | 38 | suffixes = ['.cpp', '.c', '.cc'] 39 | includeDirs = [] 40 | 41 | 42 | def parseDepFile(fileName): 43 | # read in the dependency file 44 | depFile = open(fileName, 'r') 45 | depStr = depFile.read() 46 | 47 | # discard everything up to the colon 48 | colonPos = depStr.find(":") 49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format" 50 | depStr = depStr[colonPos + 1:] 51 | 52 | # collect all included files 53 | return regDepSplit.split(depStr) 54 | 55 | 56 | def findSourceFile(headerFile): 57 | # get the basename without extension 58 | headerFile = regSuffix.sub('', headerFile) 59 | if not headerFile: 60 | return None 61 | 62 | # iterate over known suffixes 63 | for suffix in suffixes: 64 | srcFile = headerFile + suffix 65 | 66 | # check whether a source file corresponding to the header exists 67 | if os.path.exists(srcFile): 68 | return srcFile 69 | 70 | # we add to the file path directory by directory and check whether it 71 | # exists in one of the include directories 72 | i = headerFile.find('/') + 1 73 | if i != 1: 74 | i = 0 75 | while True: 76 | # check whether a source file exists in one of the given include dirs 77 | for dir in includeDirs: 78 | # check all suffixes for source files 79 | for suffix in suffixes: 80 | srcFile = os.path.join(dir, headerFile[i:] + suffix) 81 | #srcFile = os.path.abspath(srcFile) 82 | if os.path.exists(srcFile): 83 | return srcFile 84 | 85 | # find next position of '/' 86 | i = headerFile.find('/', i) + 1 87 | if i <= 0: 88 | break 89 | 90 | return None 91 | 92 | 93 | def main(argv): 94 | global includeDirs 95 | 96 | # check command line parameters 97 | if len(sys.argv) < 5: 98 | print HELP_USAGE 99 | return 100 | 101 | args = sys.argv 102 | args.pop(0) 103 | ruleTarget = args.pop(0) 104 | linkFile = args.pop(0) 105 | buildDir = args.pop(0) 106 | rootDepFile = args.pop(0) 107 | includeDirs = args 108 | 109 | 110 | # scan all dependency files for files we need to link to 111 | # do this recursively starting at the root dependency file 112 | linkFiles = set() 113 | incFiles = set() 114 | depFileStack = set([rootDepFile]) 115 | depFilesDone = set() 116 | while depFileStack: 117 | # get the next dependency file to process from the stack 118 | depFile = depFileStack.pop() 119 | if depFile in depFilesDone: 120 | continue 121 | depFilesDone.add(depFile) 122 | 123 | # iterate over all source files in the dependency file 124 | for nextFile in parseDepFile(depFile): 125 | newDepFile = "" 126 | 127 | # if we have a source file, we need to link against it 128 | if regSrc.match(nextFile): 129 | linkFiles.add(nextFile) 130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile) 131 | 132 | # check whether a .cpp/.c/.cc file exist 133 | srcFile = findSourceFile(nextFile) 134 | if srcFile != None: 135 | linkFiles.add(srcFile) 136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile) 137 | 138 | # if the corresponding .d file exists as parameter, add it to the stack 139 | if newDepFile and os.path.exists(newDepFile): 140 | depFileStack.add(newDepFile) 141 | 142 | # 143 | # generate all necessary rules 144 | # 145 | 146 | # all includes of dependency files 147 | for i in linkFiles: 148 | i = regSuffix.sub(".d", i) 149 | print "-include " + buildDir + "/" + i 150 | print 151 | 152 | # dependencies for link file 153 | print linkFile + ": \\" 154 | for i in linkFiles: 155 | i = regSuffix.sub(".d", i) 156 | print "\t" + buildDir + "/" + i + " \\" 157 | print 158 | 159 | # print out all files we need to link against 160 | print ruleTarget + ": " + linkFile + " \\" 161 | for i in linkFiles: 162 | i = regSuffix.sub(".o", i) 163 | print "\t" + buildDir + "/" + i + " \\" 164 | print 165 | 166 | 167 | if __name__ == "__main__": 168 | main( sys.argv ) 169 | 170 | 171 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/make/generic.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2009 Alexander Kl"aser 3 | # 4 | # This piece is free software; you can redistribute it and/or 5 | # modify it under the terms of the GNU General Public License 6 | # as published by the Free Software Foundation; either version 2 7 | # of the License, or (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | # 18 | # This software has been downloaded from: 19 | # http://lear.inrialpes.fr/people/klaeser/software 20 | # 21 | # 22 | # Variables that need to be set in the Makefile that includes this file: 23 | # TARGETS all files that are exectuables without there .cpp extension 24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build") 25 | # BINDIR dir where executables are linked to (optional, by default "bin") 26 | # SRCDIRS list of directories in which source files are located 27 | # this variable needs to be set if you do not have your source and 28 | # include files located in the same directory! 29 | # 30 | # Variables used for compiling/linking: 31 | # CXXFLAGS flags for compiling 32 | # LDFLAGS flags used for linking 33 | # LDLIBS list of libraries to be linked 34 | # CXX compiler linker (should be g++ by default) 35 | # 36 | 37 | # set paths for the dependency tool and gcc 38 | DEP = make/dep.py 39 | 40 | # set some standard directories in case they have not been set 41 | BUILDDIR ?= .build 42 | BINDIR ?= bin 43 | 44 | # all include files 45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l)) 46 | 47 | 48 | # 49 | # some general rules 50 | # 51 | 52 | .PHONY: all clean 53 | .PRECIOUS: $(BUILDDIR)/%.d 54 | 55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS))) 56 | @echo "=== done ===" 57 | 58 | $(INCLUDES): $(BUILDDIR) 59 | 60 | clean: 61 | @echo "=== cleaning up ===" 62 | @rm -rf $(BUILDDIR) 63 | 64 | $(BUILDDIR) $(BINDIR): 65 | @echo "=== creating directory: $@ ===" 66 | @mkdir -p $@ 67 | 68 | 69 | # 70 | # rules for creating dependency files 71 | # 72 | 73 | # dependencies of .cpp files on other files 74 | $(BUILDDIR)/%.d: %.cpp 75 | @echo "=== creating dependency file: $@ ===" 76 | @test -e $(dir $@) || mkdir -p $(dir $@) 77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $< 78 | 79 | # dependencies for the linking 80 | %.so.l %.l: %.d 81 | @echo "=== creating dependency file: $@ ===" 82 | @test -e $(dir $@) || mkdir -p $(dir $@) 83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@ 84 | 85 | 86 | # 87 | # rules for compiling and linking 88 | # (link dependencies are defined in .l files) 89 | # 90 | 91 | # compiling 92 | $(BUILDDIR)/%.o: %.cpp 93 | @echo "=== compiling: $@ ===" 94 | @test -e $(dir $@) || mkdir -p $(dir $@) 95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $< 96 | 97 | # linking for shared libraries 98 | $(BINDIR)/%.so: 99 | @echo "=== linking: $@ ===" 100 | @rm -f $@ 101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 102 | 103 | # linking 104 | $(BINDIR)/%: 105 | @echo "=== linking: $@ ===" 106 | @rm -f $@ 107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS) 108 | 109 | %: %.o 110 | %.h: ; 111 | %.hpp: ; 112 | %.c: ; 113 | %.cpp: ; 114 | 115 | 116 | # 117 | # include dependency files 118 | # 119 | 120 | ifneq ($(MAKECMDGOALS),clean) 121 | -include $(INCLUDES) 122 | endif 123 | -------------------------------------------------------------------------------- /offline_traj/for_UCF101/view_traj.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | # For trajectory storage 7 | import h5py 8 | 9 | # Setting parameters 10 | TRAJ_H5_PATH = './traj_stor_train.h5' 11 | DATASET_DIR = '/media/haozekun/512SSD_2/UCF101_seq/UCF-101' 12 | 13 | f = h5py.File(TRAJ_H5_PATH, 'r', libver='latest') 14 | # /PennActionTraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount) 15 | db = f["/UCFTraj/by_clip"] 16 | 17 | fig = plt.figure() 18 | 19 | for clip_name in db.keys(): 20 | video_path = db[clip_name].attrs['VidPath'] 21 | print(video_path) 22 | clip_start = db[clip_name].attrs['StartFrame'] 23 | clip_len = db[clip_name].attrs['TrajLen'] 24 | clip_num_trajs = db[clip_name].attrs['TrajCount'] 25 | clip_traj_data = db[clip_name] 26 | #cap = cv2.VideoCapture(video_path) 27 | #if not cap.isOpened(): 28 | # print('Video open failed!!!') 29 | #cap.set(cv2.CAP_PROP_POS_FRAMES ,clip_start) 30 | 31 | for ff in range(clip_len): 32 | #for ff in [0]: 33 | plt.clf() 34 | #ret, frame = cap.read() # 320 by 240 35 | #if not ret: 36 | # print('Frame read error!') 37 | frame = cv2.imread(video_path+'/'+str(clip_start+ff)+'.jpg') 38 | img_data = cv2.resize(frame, (256,192)) 39 | 40 | img_data = img_data[:,:,(2,1,0)] # h w c 41 | plt.imshow(img_data) 42 | for kk in range(clip_num_trajs): 43 | traj = clip_traj_data[kk,:,:] 44 | plt.scatter(traj[ff,0], traj[ff,1]) 45 | print('Count: {}'.format(kk)) 46 | fig.canvas.draw() 47 | plt.pause(0.001) 48 | #plt.waitforbuttonpress() 49 | #plt.show() 50 | #cap.release() 51 | --------------------------------------------------------------------------------