├── Controllable_imgs
├── Controllable_compare.gif
├── Controllable_kitti.gif
├── Controllable_rp.gif
└── Controllable_ucf_pushups.gif
├── LICENSE
├── README.md
├── datasets
├── KITTI
│ ├── parse_sequence.py
│ └── png2jpg.py
├── README.md
├── RobotPush
│ ├── grab_train_images_to_hdf5.py
│ └── read_push_data.py
└── UCF-101
│ └── videos_to_jpg_seq.py
├── model
├── README.md
├── aeeval_kitti.py
├── aeeval_rp.py
├── aeeval_ucf.py
├── dataset
│ ├── commons.py
│ ├── data_loader_kitti_reimpl.py
│ ├── data_loader_rp_reimpl.py
│ ├── data_loader_ucf_reimpl.py
│ └── utils
│ │ └── set_dataset_path.py
├── guieval_rp.py
├── models
│ ├── vgg_warper_weak_shortcut.py
│ └── vgg_warper_weak_shortcut_nobn.py
├── ops
│ ├── cooltanh.py
│ ├── flow_warper.py
│ ├── flow_warper_pad_2x.py
│ ├── grad_hook.py
│ ├── hardshinkloss.py
│ └── laplace2d.py
├── reader
│ ├── kitti_reader.py
│ ├── rp_reader.py
│ └── ucf_reader.py
├── train_kitti.py
├── train_rp.py
├── train_ucf.py
└── utils
│ ├── trajs2map.py
│ └── visual.py
└── offline_traj
├── for_KITTI
├── DenseTrackStab.cpp
├── DenseTrackStab.h
├── Descriptors.h
├── Initialize.h
├── Makefile
├── OpticalFlow.h
├── README.md
├── Video.cpp
├── batch_process_dataset.py
├── make
│ ├── dep.py
│ └── generic.mk
└── view_traj.py
├── for_RobotPush
├── DenseTrackStab.cpp
├── DenseTrackStab.h
├── Descriptors.h
├── Initialize.h
├── Makefile
├── OpticalFlow.h
├── README.md
├── Video.cpp
├── batch_process_dataset.py
├── make
│ ├── dep.py
│ └── generic.mk
└── view_traj.py
└── for_UCF101
├── DenseTrackStab.cpp
├── DenseTrackStab.h
├── Descriptors.h
├── Initialize.h
├── Makefile
├── OpticalFlow.h
├── README.md
├── Video.cpp
├── batch_process_dataset.py
├── make
├── dep.py
└── generic.mk
├── testlist01.txt
├── trainlist01.txt
└── view_traj.py
/Controllable_imgs/Controllable_compare.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_compare.gif
--------------------------------------------------------------------------------
/Controllable_imgs/Controllable_kitti.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_kitti.gif
--------------------------------------------------------------------------------
/Controllable_imgs/Controllable_rp.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_rp.gif
--------------------------------------------------------------------------------
/Controllable_imgs/Controllable_ucf_pushups.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zekunhao1995/ControllableVideoGen/cae9bdf46a4eee1145b268ec74189f9f6ccbbb42/Controllable_imgs/Controllable_ucf_pushups.gif
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | Code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
3 |
4 | 1. Pre-process datasets using the tools provided in **datasets** directory.
5 | 2. Generate trajectories for each dataset using codes in **offline_traj** directory.
6 | 3. Train & evaluate with the code in **model** directory.
7 |
8 | **Warning: The code is provided in its original form without any cleanup.**
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/datasets/KITTI/parse_sequence.py:
--------------------------------------------------------------------------------
1 | #from os import listdir
2 | #from os.path import isfile, join
3 | import os
4 | import re
5 | import numpy as np
6 |
7 | 'dataset/sequences/[00 to 21]/image2/[000000 to n]'
8 |
9 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences'
10 |
11 | def get_num(x):
12 | return int(''.join(ele for ele in x if ele.isdigit()))
13 |
14 | frame_count_stor = []
15 | for vid_id in range(21):
16 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), 'image_2')
17 | video_file_list = os.listdir(vid_path_prefix)
18 | frame_count = 0
19 | for filename in video_file_list:
20 | frame_count = max(get_num(filename),frame_count)
21 | print(frame_count)
22 | frame_count_stor.append(frame_count)
23 |
24 | # 16 / 5 split
25 | # test: 15 11 7 5 4
26 | frame_count_cumsum = np.cumsum(frame_count_stor)
27 | print(frame_count_cumsum)
28 |
29 |
30 |
--------------------------------------------------------------------------------
/datasets/KITTI/png2jpg.py:
--------------------------------------------------------------------------------
1 | #from os import listdir
2 | #from os.path import isfile, join
3 | import os
4 | import re
5 | import numpy as np
6 | import cv2
7 | # OpenBLAS screws up with CPU affinity
8 | # Spawned process will inherit this
9 | os.sched_setaffinity(0,range(os.cpu_count()))
10 |
11 | 'dataset/sequences/[00 to 21]/image_2/[000000 to n]'
12 |
13 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences'
14 |
15 | out_path_prefix = '/media/haozekun/512SSD_2/KITTI_bmp/dataset/sequences'
16 |
17 | cam_names = ['image_2', 'image_3']
18 |
19 | for vid_id in range(21):
20 | for cam_name in cam_names:
21 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), cam_name)
22 | out_path = os.path.join(out_path_prefix, '{:02d}'.format(vid_id), cam_name)
23 | os.makedirs(out_path)
24 | video_file_list = os.listdir(vid_path_prefix)
25 | print('{} - {}'.format(vid_id, cam_name))
26 | for filename in video_file_list:
27 | png_full_path = os.path.join(vid_path_prefix,filename)
28 | out_full_path = os.path.join(out_path,filename.rsplit('.',1)[0]+'.bmp')
29 | frame = cv2.imread(png_full_path)
30 | frame2x = cv2.resize(frame, (845,256), interpolation=cv2.INTER_AREA)
31 | cv2.imwrite(out_full_path,frame2x)
32 | #cv2.imwrite(out_full_path,frame2x,[cv2.IMWRITE_JPEG_QUALITY, 100])
33 | #cv2.imwrite(out_full_path,frame2x,[cv2.IMWRITE_WEBP_QUALITY, 100])
34 | #print(out_full_path)
35 |
36 |
--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
1 | # Scripts for preprocessing datasets
2 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
3 |
4 | 1. **KITTI Odometry**
5 | data_odometry_color.zip
6 | http://www.cvlibs.net/datasets/kitti/eval_odometry.php
7 | Converting PNGs to other formats is recommended to reduce CPU load (script provided).
8 |
9 | 2. **Push Dataset**
10 | https://sites.google.com/site/brainrobotdata/home/push-dataset
11 | You may want to use the provided script to convert TFRecords to HDF5 format for easier use outside TF.
12 |
13 | 3. **UCF101 - Action Recognition Data Set**
14 | http://crcv.ucf.edu/data/UCF101.php
15 | It is recommended to convert videos to image sequences for better random-access performance.
16 |
17 |
--------------------------------------------------------------------------------
/datasets/RobotPush/grab_train_images_to_hdf5.py:
--------------------------------------------------------------------------------
1 | """Code for converting TFRecords to HDF5"""
2 |
3 | import os
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | import h5py
8 | import re
9 |
10 | from tensorflow.python.platform import flags
11 | from tensorflow.python.platform import gfile
12 |
13 |
14 | FLAGS = flags.FLAGS
15 |
16 | # Original image dimensions
17 | ORIGINAL_WIDTH = 640
18 | ORIGINAL_HEIGHT = 512
19 | COLOR_CHAN = 3
20 | BATCH_SIZE = 25
21 |
22 | data_dir = 'push/push_train'
23 | #dest_dir = '/media/haozekun/512SSD_2/push_jpg'
24 | hdf5_path = '/media/haozekun/512SSD_2/robot_push_jpgs.h5'
25 |
26 | f = h5py.File(hdf5_path, 'w', libver='latest') # Supports Single-Write-Multiple-Read
27 | h5_push = f.require_group("push")
28 | h5_push_train = h5_push.require_group("push_train")
29 |
30 |
31 |
32 | def decode_proto(s_example, h5_push_train_vid):
33 | a = tf.train.Example()
34 | a.ParseFromString(s_example) # a: an example
35 | b = a.ListFields()[0][1].ListFields()[0][1]
36 | prog = re.compile('move/(\d+)/image/encoded')
37 |
38 | num_imgs = 0
39 | for key in b.keys():
40 | m = prog.match(key)
41 | if m:
42 | img_id = int(m.group(1))
43 | v = b[key]
44 | raw_data = v.ListFields()[0][1].ListFields()[0][1][0]
45 |
46 | h5_push_train_vid_jpg = h5_push_train_vid.require_dataset('{}.jpg'.format(img_id), shape=(len(raw_data),), dtype=np.uint8)
47 | h5_push_train_vid_jpg[:] = np.fromstring(raw_data, dtype=np.uint8)
48 | num_imgs = max(num_imgs, img_id)
49 | return num_imgs+1
50 |
51 |
52 | filenames = gfile.Glob(os.path.join(data_dir, '*'))
53 | if not filenames:
54 | raise RuntimeError('No data files found.')
55 | vid_count = 0
56 | for filename in filenames:
57 | for s_example in tf.python_io.tf_record_iterator(filename):
58 | h5_push_train_vid = h5_push_train.require_group(str(vid_count))
59 | num_imgs = decode_proto(s_example, h5_push_train_vid)
60 | h5_push_train_vid.attrs['frame_count'] = num_imgs
61 | vid_count += 1
62 | print(vid_count)
63 |
64 | h5_push_train.attrs['video_count'] = vid_count
65 |
66 | f.flush()
67 | f.close()
68 |
--------------------------------------------------------------------------------
/datasets/RobotPush/read_push_data.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import numpy as np
3 | import cv2
4 |
5 | data_dir = 'push/push_train'
6 |
7 | f = h5py.File('robot_push_jpgs.h5', 'r')
8 | for video_id in range(f['push/push_train'].attrs['video_count']):
9 | for img_id in range(f['push/push_train/{}'.format(video_id)].attrs['frame_count']):
10 | img = cv2.imdecode(f['push/push_train/{}/{}.jpg'.format(video_id, img_id)][()], -1)
11 | print(img.shape)
12 | cv2.imshow('image',img)
13 | cv2.waitKey(100)
14 |
15 | cv2.destroyAllWindows()
16 |
--------------------------------------------------------------------------------
/datasets/UCF-101/videos_to_jpg_seq.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | import numpy as np
5 | import h5py
6 | import re
7 |
8 | import cv2
9 |
10 | from multiprocessing.dummy import Pool as ThreadPool
11 | pool = ThreadPool(8)
12 |
13 | # Load UCF101 dataset
14 | DATASET_DIR = '/data2/UCF-101'
15 | RAWFRAME_DIR = '/data1/UCF101seq'
16 |
17 |
18 |
19 | def worker(action_dir):
20 | print(action_dir)
21 | video_files = os.listdir(os.path.join(DATASET_DIR, action_dir))
22 | action_out_dir = os.path.join(RAWFRAME_DIR, action_dir)
23 | os.mkdir(action_out_dir)
24 | for video_file in video_files:
25 | print(video_file)
26 | video_path = os.path.join(DATASET_DIR, action_dir, video_file)
27 | video_out_dir = os.path.join(RAWFRAME_DIR, action_dir, video_file)
28 | os.mkdir(video_out_dir)
29 | cap = cv2.VideoCapture(video_path)
30 | if not cap.isOpened():
31 | print('Video open failed!!!')
32 | length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
33 | for frame_no in range(length):
34 | ret, frame = cap.read() # 320 by 240
35 | if not ret:
36 | print('Frame read error!')
37 | break
38 | frame_out_path = os.path.join(RAWFRAME_DIR, action_dir, video_file, str(frame_no)+'.jpg')
39 | #cv2.imwrite(frame_out_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 4])
40 | cv2.imwrite(frame_out_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
41 | cap.release()
42 |
43 |
44 | action_dir_list = os.listdir(DATASET_DIR)
45 | #for action_dir in action_dir_list:
46 |
47 |
48 | pool.map(worker, action_dir_list)
49 |
--------------------------------------------------------------------------------
/model/README.md:
--------------------------------------------------------------------------------
1 | # Main code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
2 |
3 | 1. Setup dataset paths with `./dataset/utils/set_dataset_path.py`. **READ THE FILE FOR MORE HINTS**
4 | 2. Run `train_[dataset_name].py` to train the video generation model.
5 | -- By default, the model takes 1-5 trajectories as input, which is suitable for human evaluation.
6 | -- You should increase the number of input trajectories to 10 for quantitative quality (PSNR, SSIM) evaluation. Too few trajectories brings too much ambiguity.
7 | 3. Run `aeeval_[dataset_name].py` to evaluate the model on testsets using PSNR and SSIM metrics.
8 | -- Note that our model is not designed for video prediction. Results are for reference only.
9 | -- Our work aims at generating video clips in a user-controllable manner.
10 | 4. For examples on how to build GUI for user evaluation, refer to a simplified example `guieval_rp.py`.
11 | -- Edit `./reader/*.py` to match dataset paths
12 | -- First click defines the start point of a motion vector
13 | -- Second click defines end point of the motion vector
14 | -- Next click add a new vector
15 | -- Left click outside canvas to clear all the vectors
16 | -- Press right mouse button to go to the next image
17 |
18 | - Requires PyTorch3 for train/test and visdom for monitoring.
19 |
20 | **Warning: The code is provided in its original form without any cleanup. Read each program before running. Most files are self-explainable.**
21 |
--------------------------------------------------------------------------------
/model/aeeval_kitti.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_kitti_reimpl import KITTIReader_traj
12 | from models.vgg_warper_weak_shortcut_nobn import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper_pad_2x import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 | from skimage.measure import compare_ssim as ssim
20 | from skimage.measure import compare_psnr as psnr
21 | from skimage.measure import compare_mse as mse
22 |
23 | args = {}
24 | args['gpus'] = [0]
25 | args['seed'] = 12345
26 | torch.backends.cudnn.benchmark = True
27 |
28 |
29 | # Initialize Pytorch Dataloader
30 | datareader = KITTIReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True)
31 | train_loader = torch.utils.data.DataLoader(
32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True, drop_last = True)
33 |
34 | class MModel(nn.Module):
35 | def __init__(self):
36 | super(MModel, self).__init__()
37 | self.warp_cnn = VGG_Warper(9)
38 | self.flow_warper = FlowWarp()
39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
40 | self.hardshrinkloss = HardshinkLoss(0., 1.)
41 |
42 | def forward(self, img_input, warp_input, img_gt):
43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
44 | warp_imgs = self.flow_warper(img_input, warp_flow, padl=83)
45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
46 | masks = F.sigmoid(masks)
47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
48 |
49 | return recon_img, warp_flow, comp_imgs, masks
50 |
51 | mmodel = MModel()
52 | mmodel.cuda()
53 | mmodel = nn.DataParallel(mmodel, device_ids=[0])
54 |
55 | visual = VisdomShow('kitti_eval_10')
56 |
57 | def test():
58 | print('\n\n=========================== Testing ============================')
59 | mmodel.eval()
60 | mse_stor = []
61 | ssim_stor = []
62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask, img_input_2x) in enumerate(train_loader):
63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0])
64 | img_input_2x = Variable(img_input_2x).cuda(args['gpus'][0])
65 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0])
66 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0])
67 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0])
68 |
69 |
70 | # warp_input : [interval-1, 9, H, W]
71 | # print(warp_input.shape) # ([1, 9, 9, 192, 256])
72 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input_2x, warp_input, img_gt)
73 | recon_img *= vid_mask
74 | img_gt *= vid_mask
75 |
76 | gen_seq = recon_img.data.cpu().numpy()
77 | gt_seq = img_gt.data.cpu().numpy()
78 | mses = np.zeros(gen_seq.shape[0])
79 | ssims = np.zeros(gen_seq.shape[0])
80 | for i in range(gen_seq.shape[0]):
81 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0])
82 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0])
83 | mses[i] = mse(gen,gt)
84 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True)
85 |
86 | mse_stor.append(mses.reshape([-1,9]))
87 | ssim_stor.append(ssims.reshape([-1,9]))
88 |
89 |
90 | if batch_idx%1 == 0:
91 | pbar(batch_idx, len(train_loader), 0)
92 |
93 | if batch_idx%10 == 0:
94 | mse_a = np.concatenate(mse_stor, axis=0)
95 | ssim_a = np.concatenate(ssim_stor, axis=0)
96 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
97 | ssim_all = np.mean(ssim_a, axis=0)
98 |
99 | print('PSNR')
100 | print(psnr_all)
101 | print('SSIM')
102 | print(ssim_all)
103 |
104 | if batch_idx%10 == 0:
105 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy()
106 | for i in range(out_seq.shape[0]):
107 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1))
108 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy()
109 | for i in range(out_gt.shape[0]):
110 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0))
111 |
112 | out_seq = np.concatenate((out_seq,out_gt), axis=3)
113 | visual.show_vid(out_seq)
114 |
115 | mse_a = np.concatenate(mse_stor, axis=0)
116 | ssim_a = np.concatenate(ssim_stor, axis=0)
117 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
118 | ssim_all = np.mean(ssim_a, axis=0)
119 | print('\nPSNR SSIM')
120 | for i in range(psnr_all.size):
121 | print('{} {}'.format(psnr_all[i], ssim_all[i]))
122 |
123 | def restore(ckpt_file):
124 | ckpt = torch.load(ckpt_file)
125 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
126 | #optimizer.load_state_dict(ckpt['optimizer'])
127 | #hist = ckpt['hist']
128 | print('Restored from {}'.format(ckpt_file))
129 |
130 | restore('./snapshots/kitti/ckpt_e0_b0_rev2.pth')
131 | test()
132 |
133 |
134 |
--------------------------------------------------------------------------------
/model/aeeval_rp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_rp_reimpl import RPReader_traj
12 | from models.vgg_warper_weak_shortcut import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 | from skimage.measure import compare_ssim as ssim
20 | from skimage.measure import compare_psnr as psnr
21 | from skimage.measure import compare_mse as mse
22 |
23 | args = {}
24 | args['gpus'] = [0]
25 | args['seed'] = 12345
26 | torch.backends.cudnn.benchmark = True
27 |
28 |
29 | # Initialize Pytorch Dataloader
30 | datareader = RPReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True)
31 | train_loader = torch.utils.data.DataLoader(
32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True, drop_last = True)
33 |
34 | class MModel(nn.Module):
35 | def __init__(self):
36 | super(MModel, self).__init__()
37 | self.warp_cnn = VGG_Warper(9)
38 | self.flow_warper = FlowWarp()
39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
40 | self.hardshrinkloss = HardshinkLoss(0., 1.)
41 |
42 | def forward(self, img_input, warp_input, img_gt):
43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
44 | warp_imgs = self.flow_warper(img_input, warp_flow)
45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
46 | masks = F.sigmoid(masks)
47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
48 |
49 | return recon_img, warp_flow, comp_imgs, masks
50 |
51 | mmodel = MModel()
52 | mmodel.cuda()
53 | mmodel = nn.DataParallel(mmodel, device_ids=[0])
54 |
55 | visual = VisdomShow('rp_eval_10')
56 |
57 | def test():
58 | print('\n\n=========================== Testing ============================')
59 | mmodel.eval()
60 | mse_stor = []
61 | ssim_stor = []
62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader):
63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0])
64 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0])
65 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0])
66 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0])
67 |
68 |
69 | # warp_input : [interval-1, 9, H, W]
70 | # print(warp_input.shape) # ([1, 9, 9, 192, 256])
71 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt)
72 | recon_img *= vid_mask
73 | img_gt *= vid_mask
74 |
75 | gen_seq = recon_img.data.cpu().numpy()
76 | gt_seq = img_gt.data.cpu().numpy()
77 | mses = np.zeros(gen_seq.shape[0])
78 | ssims = np.zeros(gen_seq.shape[0])
79 | for i in range(gen_seq.shape[0]):
80 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0])
81 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0])
82 | mses[i] = mse(gen,gt)
83 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True)
84 |
85 | mse_stor.append(mses.reshape([-1,9]))
86 | ssim_stor.append(ssims.reshape([-1,9]))
87 |
88 |
89 | if batch_idx%1 == 0:
90 | pbar(batch_idx, len(train_loader), 0)
91 |
92 | if batch_idx%10 == 0:
93 | mse_a = np.concatenate(mse_stor, axis=0)
94 | ssim_a = np.concatenate(ssim_stor, axis=0)
95 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
96 | ssim_all = np.mean(ssim_a, axis=0)
97 |
98 | print('PSNR')
99 | print(psnr_all)
100 | print('SSIM')
101 | print(ssim_all)
102 |
103 | if batch_idx%10 == 0:
104 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy()
105 | for i in range(out_seq.shape[0]):
106 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1))
107 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy()
108 | for i in range(out_gt.shape[0]):
109 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0))
110 |
111 | out_seq = np.concatenate((out_seq,out_gt), axis=3)
112 | visual.show_vid(out_seq)
113 |
114 | mse_a = np.concatenate(mse_stor, axis=0)
115 | ssim_a = np.concatenate(ssim_stor, axis=0)
116 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
117 | ssim_all = np.mean(ssim_a, axis=0)
118 | print('\nPSNR SSIM')
119 | for i in range(psnr_all.size):
120 | print('{} {}'.format(psnr_all[i], ssim_all[i]))
121 |
122 | def restore(ckpt_file):
123 | ckpt = torch.load(ckpt_file)
124 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
125 | #optimizer.load_state_dict(ckpt['optimizer'])
126 | #hist = ckpt['hist']
127 | print('Restored from {}'.format(ckpt_file))
128 |
129 |
130 | restore('./snapshots/rp/ckpt_e0_b198000.pth')
131 | test()
132 |
133 |
134 |
--------------------------------------------------------------------------------
/model/aeeval_ucf.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_ucf_reimpl import UCFReader_traj
12 | from models.vgg_warper_weak_shortcut import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 | from skimage.measure import compare_ssim as ssim
20 | from skimage.measure import compare_psnr as psnr
21 | from skimage.measure import compare_mse as mse
22 |
23 | args = {}
24 | args['gpus'] = [0]
25 | args['seed'] = 12345
26 | torch.backends.cudnn.benchmark = True
27 |
28 |
29 | # Initialize Pytorch Dataloader
30 | datareader = UCFReader_traj(is_test=True, max_interval=10, min_ntraj=10, max_ntraj=10, is_eval=True)
31 | train_loader = torch.utils.data.DataLoader(
32 | datareader, batch_size=4, shuffle=False, collate_fn=datareader.collate_fn_eval, worker_init_fn=datareader.worker_init_fn, num_workers=6, pin_memory=True, drop_last = True)
33 |
34 | class MModel(nn.Module):
35 | def __init__(self):
36 | super(MModel, self).__init__()
37 | self.warp_cnn = VGG_Warper(9)
38 | self.flow_warper = FlowWarp()
39 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
40 | self.hardshrinkloss = HardshinkLoss(0., 1.)
41 |
42 | def forward(self, img_input, warp_input, img_gt):
43 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
44 | warp_imgs = self.flow_warper(img_input, warp_flow)
45 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
46 | masks = F.sigmoid(masks)
47 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
48 |
49 | return recon_img, warp_flow, comp_imgs, masks
50 |
51 | mmodel = MModel()
52 | mmodel.cuda()
53 | mmodel = nn.DataParallel(mmodel, device_ids=[0])
54 |
55 | visual = VisdomShow('ucf_eval_10')
56 |
57 | def test():
58 | print('\n\n=========================== Testing ============================')
59 | mmodel.eval()
60 | mse_stor = []
61 | ssim_stor = []
62 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader):
63 | img_input = Variable(img_input, volatile=True).cuda(args['gpus'][0])
64 | warp_input = Variable(warp_input, volatile=True).cuda(args['gpus'][0])
65 | img_gt = Variable(img_gt, volatile=True).cuda(args['gpus'][0])
66 | vid_mask = Variable(vid_mask, volatile=True).cuda(args['gpus'][0])
67 |
68 |
69 | # warp_input : [interval-1, 9, H, W]
70 | # print(warp_input.shape) # ([1, 9, 9, 192, 256])
71 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt)
72 | recon_img *= vid_mask
73 | img_gt *= vid_mask
74 |
75 | gen_seq = recon_img.data.cpu().numpy()
76 | gt_seq = img_gt.data.cpu().numpy()
77 | mses = np.zeros(gen_seq.shape[0])
78 | ssims = np.zeros(gen_seq.shape[0])
79 | for i in range(gen_seq.shape[0]):
80 | gen = np.transpose(gen_seq[i,:,:,:], [1,2,0])
81 | gt = np.transpose(gt_seq[i,:,:,:], [1,2,0])
82 | mses[i] = mse(gen,gt)
83 | ssims[i] = ssim(gt, gen, data_range=1., multichannel=True)
84 |
85 | mse_stor.append(mses.reshape([-1,9]))
86 | ssim_stor.append(ssims.reshape([-1,9]))
87 |
88 |
89 | if batch_idx%1 == 0:
90 | pbar(batch_idx, len(train_loader), 0)
91 |
92 | if batch_idx%10 == 0:
93 | mse_a = np.concatenate(mse_stor, axis=0)
94 | ssim_a = np.concatenate(ssim_stor, axis=0)
95 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
96 | ssim_all = np.mean(ssim_a, axis=0)
97 |
98 | print('PSNR')
99 | print(psnr_all)
100 | print('SSIM')
101 | print(ssim_all)
102 |
103 | if batch_idx%10 == 0:
104 | out_seq = torch.cat((img_input[(0,),:,:,:],recon_img), dim=0).data.cpu().numpy()
105 | for i in range(out_seq.shape[0]):
106 | out_seq[i,:,:,:] = visual.add_text(out_seq[i,:,:,:], str(i), (0,1,1))
107 | out_gt = torch.cat((img_input[(0,),:,:,:],img_gt), dim=0).data.cpu().numpy()
108 | for i in range(out_gt.shape[0]):
109 | out_gt[i,:,:,:] = visual.add_text(out_gt[i,:,:,:], 'GT', (0,1,0))
110 |
111 | out_seq = np.concatenate((out_seq,out_gt), axis=3)
112 | visual.show_vid(out_seq)
113 |
114 | mse_a = np.concatenate(mse_stor, axis=0)
115 | ssim_a = np.concatenate(ssim_stor, axis=0)
116 | psnr_all = -10*np.log(np.mean(mse_a, axis=0))/np.log(10)
117 | ssim_all = np.mean(ssim_a, axis=0)
118 | print('\nPSNR SSIM')
119 | for i in range(psnr_all.size):
120 | print('{} {}'.format(psnr_all[i], ssim_all[i]))
121 |
122 | def restore(ckpt_file):
123 | ckpt = torch.load(ckpt_file)
124 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
125 | #optimizer.load_state_dict(ckpt['optimizer'])
126 | #hist = ckpt['hist']
127 | print('Restored from {}'.format(ckpt_file))
128 |
129 | restore('./snapshots/ucf/ckpt_e0_b52000_cont.pth')
130 | test()
131 |
132 |
133 |
--------------------------------------------------------------------------------
/model/dataset/commons.py:
--------------------------------------------------------------------------------
1 | import cv2
2 |
3 |
4 | def trajs2featmap(trajs, kpmap_seq):
5 | for traj_no in range(trajs.shape[0]):
6 | #cv2.circle(frame, tuple(trajs[traj_no, frame_no, :]), 2, (0.,1.,0.))
7 | kp_start_x = trajs[traj_no,0,0]
8 | kp_start_y = trajs[traj_no,0,1]
9 | kp_end_x = trajs[traj_no,1,0]
10 | kp_end_y = trajs[traj_no,1,1]
11 |
12 | kp_start_x_int = int(max(min(kp_start_x, kpmap_seq.shape[2]),0))
13 | kp_start_y_int = int(max(min(kp_start_y, kpmap_seq.shape[1]),0))
14 | kp_dx = kp_end_x - kp_start_x
15 | kp_dy = kp_end_y - kp_start_y
16 | kpmap_seq[0,kp_start_y_int,kp_start_x_int] = 1.0
17 | kpmap_seq[1,kp_start_y_int,kp_start_x_int] = kp_dy
18 | kpmap_seq[2,kp_start_y_int,kp_start_x_int] = kp_dx
19 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
20 |
21 | kp_end_x_int = int(max(min(kp_end_x, kpmap_seq.shape[2]),0))
22 | kp_end_y_int = int(max(min(kp_end_y, kpmap_seq.shape[1]),0))
23 | kp_dx2 = kp_start_x - kp_end_x
24 | kp_dy2 = kp_start_y - kp_end_y
25 | kpmap_seq[3,kp_end_y_int,kp_end_x_int] = 1.0
26 | kpmap_seq[4,kp_end_y_int,kp_end_x_int] = kp_dy2
27 | kpmap_seq[5,kp_end_y_int,kp_end_x_int] = kp_dx2
28 | return kpmap_seq
29 |
30 |
31 | def drawtrajs(trajs, frame_no, img):
32 | for traj_no in range(trajs.shape[0]):
33 | cv2.circle(img, tuple(trajs[traj_no, frame_no, :]), 2, (0.,1.,0.))
34 | return img
35 |
--------------------------------------------------------------------------------
/model/dataset/utils/set_dataset_path.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | dataset_path = {}
4 | dataset_path['robot_push_jpgs_h5_train'] = '/datasets/robot_push_h5/robot_push_jpgs.h5'
5 | dataset_path['robot_push_jpgs_h5_test'] = '/datasets/robot_push_h5/robot_push_testnovel_jpgs.h5'
6 | dataset_path['robot_push_traj_h5_train'] = '/trajectories/rp/traj_stor_train.h5'
7 | dataset_path['robot_push_traj_h5_test'] = '/trajectories/rp/traj_stor_test.h5'
8 | dataset_path['ucf101_jpgs'] = '/datasets/UCF101_seq/UCF-101'
9 | dataset_path['ucf101_traj_h5_train'] = '/trajectories/ucf/traj_stor_train.h5'
10 | dataset_path['ucf101_traj_h5_test'] = '/trajectories/ucf/traj_stor_test.h5'
11 | dataset_path['kitti_traj_h5_train'] = '/trajectories/kitti/traj_stor_train.h5'
12 | dataset_path['kitti_traj_h5_test'] = '/trajectories/kitti/traj_stor_test_dense.h5'
13 | dataset_path['kitti_png'] = '/datasets/KITTI/dataset/sequences'
14 | dataset_path['kitti_bmp'] = '/datasets/KITTI_bmp/dataset/sequences'
15 |
16 | with open('../dataset_path.json', 'w') as f:
17 | json.dump(dataset_path, f)
18 |
19 | with open('../dataset_path.json', 'r') as f:
20 | data = json.load(f)
21 |
22 | print(data)
23 |
--------------------------------------------------------------------------------
/model/guieval_rp.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | import torch.utils.data
4 | import torch.nn as nn
5 | import torch.optim as optim
6 | from torch.autograd import Variable
7 | from torchvision import datasets, transforms
8 | import torch.nn.functional as F
9 |
10 | import numpy as np
11 | from reader.rp_reader import RPReader
12 | from models.vgg_warper_weak_shortcut import VGG_Warper
13 | from ops.flow_warper import FlowWarp
14 | import matplotlib.pyplot as plt
15 |
16 | import time
17 | import itertools
18 | import math
19 |
20 | from scipy import misc
21 |
22 | from utils.trajs2map import trajs2map
23 | from utils.visual import colorcode
24 |
25 | # Setup parameters
26 | parser = argparse.ArgumentParser(description='Nothing')
27 | parser.add_argument('--batch-size', type=int, default=32, metavar='N',
28 | help='input batch size for training (default: 64)')
29 | parser.add_argument('--epochs', type=int, default=50000, metavar='N',
30 | help='number of epochs to train (default: 2)')
31 | parser.add_argument('--no-cuda', action='store_true', default=False,
32 | help='enables CUDA training')
33 | parser.add_argument('--seed', type=int, default=1, metavar='S',
34 | help='random seed (default: 1)')
35 | parser.add_argument('--log-interval', type=int, default=100, metavar='N',
36 | help='how many batches to wait before logging training status')
37 |
38 | args = parser.parse_args()
39 | args.cuda = not args.no_cuda and torch.cuda.is_available()
40 |
41 | args.num_frames = 5
42 |
43 | args.gpus = [0]
44 | #torch.backends.cudnn.benchmark = True
45 |
46 | torch.manual_seed(args.seed)
47 | if args.cuda:
48 | torch.cuda.manual_seed(args.seed)
49 |
50 | reader = RPReader(num_frames=20)
51 |
52 | class MModel(nn.Module):
53 | def __init__(self):
54 | super(MModel, self).__init__()
55 | self.warp_cnn = VGG_Warper(9)
56 | self.flow_warper = FlowWarp()
57 |
58 | def forward(self, img_input, warp_input, img_gt):
59 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
60 | warp_imgs = self.flow_warper(img_input, warp_flow)
61 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
62 | masks = F.sigmoid(masks)
63 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
64 |
65 | return recon_img, warp_flow, comp_imgs, masks, warp_imgs
66 |
67 |
68 | mmodel = MModel()
69 | mmodel.cuda(args.gpus[0])
70 |
71 | def get_test_batch():
72 | vid_seq, kpmap_seq, traj_list = reader[-1]
73 | vid_seq = torch.from_numpy(vid_seq).unsqueeze(0)
74 | kpmap_seq = torch.from_numpy(kpmap_seq).unsqueeze(0)
75 |
76 | vid_seq = Variable(vid_seq, volatile=True)
77 | kpmap_seq = Variable(kpmap_seq, volatile=True)
78 | vid_seq = vid_seq.cuda(args.gpus[0])
79 | kpmap_seq = kpmap_seq.cuda(args.gpus[0])
80 | return vid_seq, kpmap_seq, traj_list
81 | # traj_list: Num, Len, x/y
82 |
83 |
84 | # First click defines start point
85 | # second click defines end point
86 | # Click outside canvas to clear trajectories
87 | # Press right mouse button to go to next image
88 | def onclick(event):
89 | global sp, ep
90 | global clr, gonext
91 | global ix, iy
92 | if event.button == 3:
93 | gonext = True
94 | return
95 | ix, iy = event.xdata, event.ydata
96 | if ix is None:
97 | clr = True
98 | return
99 | print('x = %d, y = %d'%(ix, iy))
100 |
101 | if sp is not None:
102 | if ep is not None:
103 | sp = (ix, iy)
104 | ep = None
105 | else:
106 | ep = (ix, iy)
107 | else:
108 | sp = (ix, iy)
109 |
110 |
111 | #if len(coords) == 2:
112 | # fig.canvas.mpl_disconnect(cid)
113 |
114 | #return coords
115 |
116 |
117 | def img_chooser():
118 | global sp, ep, clr, gonext
119 | sp = None
120 | ep = None
121 | clr = False
122 | gonext = False
123 |
124 |
125 | fig = plt.figure(1)
126 | ax = fig.add_subplot(231)
127 | ax.set_title('click to build line segments')
128 | ax2 = fig.add_subplot(232)
129 | ax3 = fig.add_subplot(233)
130 | ax4 = fig.add_subplot(234)
131 | ax5 = fig.add_subplot(235)
132 | ax6 = fig.add_subplot(236)
133 | cid = fig.canvas.mpl_connect('button_press_event', onclick)
134 |
135 | mmodel.eval()
136 | counter=0
137 | while True:
138 | #plt.clf()
139 | vid_seq, kpmap_seq, traj_list = get_test_batch()
140 | fram_stor = []
141 | img_input = vid_seq[:,0,:,:,:]
142 | trajs = []
143 | while True:
144 | if gonext:
145 | gonext = False
146 | sp = None
147 | ep = None
148 | trajs = []
149 | break
150 | if sp is not None and ep is not None:
151 | print('Move!')
152 | trajs.append((sp,ep))
153 | if clr:
154 | print('Clr!')
155 | clr = False
156 | sp = None
157 | ep = None
158 | trajs = []
159 | kpmap_seq = trajs2map( trajs, img_input.size(2), img_input.size(3))
160 | warp_input = torch.cat((img_input, kpmap_seq), dim=1)
161 | recon_img, warp_flow, comp, alpha, warp_img = mmodel(img_input, warp_input, None)
162 |
163 | #img_gt = vid_seq[:,ff,:,:,:]
164 | #fram_stor.append(recon_img)
165 |
166 | fram = np.transpose(recon_img[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0])
167 | framin = np.transpose(img_input[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0])
168 | warpimga = np.transpose(warp_img[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0])
169 | #misc.imsave('./FirstImage/{}.png'.format(counter), fram)
170 | counter += 1
171 | ax.clear()
172 | ax2.clear()
173 | ax3.clear()
174 | ax4.clear()
175 | ax5.clear()
176 | ax.imshow(framin)
177 | #ax6.imshow(framin)
178 | ax6.imshow(warpimga)
179 | ax5.imshow(fram)
180 |
181 | max_flow = torch.sqrt(torch.max(warp_flow[0,0,:,:]**2 + warp_flow[0,1,:,:]**2)).data.cpu().numpy()
182 | warp_flow_c = np.clip(colorcode(warp_flow.data.cpu().numpy()[0,0,:,:]/max_flow, warp_flow.data.cpu().numpy()[0,1,:,:]/max_flow),0,1)
183 | ax2.imshow(np.transpose(warp_flow_c,[1,2,0]))
184 | ax3.imshow(np.transpose(comp[0,:,:,:].data.cpu().numpy()+0.5, [1,2,0]))
185 | ax4.imshow(alpha[0,0,:,:].data.cpu().numpy()+0.5, cmap=plt.get_cmap('Greys'))
186 |
187 |
188 | for arr in trajs:
189 | ax.arrow( arr[0][0], arr[0][1], arr[1][0]-arr[0][0], arr[1][1]-arr[0][1], fc="g", ec="g",head_width=5, head_length=5 )
190 | fig.canvas.draw()
191 | fig.savefig('user_out/{}.png'.format(counter), bbox_inches='tight', pad_inches=0)
192 |
193 | plt.waitforbuttonpress()
194 |
195 |
196 | ckpt = torch.load('./ckpt_RP.pth')
197 | mmodel.load_state_dict(ckpt['mmodel_state_dict'])
198 | img_chooser()
199 |
200 |
--------------------------------------------------------------------------------
/model/models/vgg_warper_weak_shortcut.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import math
5 | from torch.autograd import Variable
6 |
7 |
8 | class VGG_enc(nn.Module):
9 | def __init__(self, input_channels=6):
10 | super(VGG_enc, self).__init__()
11 | in_channels = input_channels
12 | self.c11 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
13 | self.bn11 = nn.BatchNorm2d(64)
14 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
15 | self.bn12 = nn.BatchNorm2d(64)
16 | self.p1 = nn.MaxPool2d(kernel_size=2, stride=2)
17 |
18 | self.c21 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
19 | self.bn21 = nn.BatchNorm2d(128)
20 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
21 | self.bn22 = nn.BatchNorm2d(128)
22 | self.p2 = nn.MaxPool2d(kernel_size=2, stride=2)
23 |
24 | self.c31 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
25 | self.bn31 = nn.BatchNorm2d(256)
26 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
27 | self.bn32 = nn.BatchNorm2d(256)
28 | self.c33 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
29 | self.bn33 = nn.BatchNorm2d(256)
30 | self.p3 = nn.MaxPool2d(kernel_size=2, stride=2)
31 |
32 | self.c41 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
33 | self.bn41 = nn.BatchNorm2d(512)
34 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
35 | self.bn42 = nn.BatchNorm2d(512)
36 | self.c43 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
37 | self.bn43 = nn.BatchNorm2d(512)
38 | self.p4 = nn.MaxPool2d(kernel_size=2, stride=2)
39 |
40 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
41 | self.bn51 = nn.BatchNorm2d(512)
42 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
43 | self.bn52 = nn.BatchNorm2d(512)
44 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
45 | self.bn53 = nn.BatchNorm2d(512)
46 |
47 | def forward(self, x):
48 | o11 = F.relu(self.bn11(self.c11(x)), inplace=True)
49 | o12 = F.relu(self.bn12(self.c12(o11)), inplace=True)
50 | o1p = self.p1(o12)
51 | o21 = F.relu(self.bn21(self.c21(o1p)), inplace=True)
52 | o22 = F.relu(self.bn22(self.c22(o21)), inplace=True)
53 | o2p = self.p2(o22)
54 | o31 = F.relu(self.bn31(self.c31(o2p)), inplace=True)
55 | o32 = F.relu(self.bn32(self.c32(o31)), inplace=True)
56 | o33 = F.relu(self.bn33(self.c33(o32)), inplace=True)
57 | o3p = self.p3(o33)
58 | o41 = F.relu(self.bn41(self.c41(o3p)), inplace=True)
59 | o42 = F.relu(self.bn42(self.c42(o41)), inplace=True)
60 | o43 = F.relu(self.bn43(self.c43(o42)), inplace=True)
61 | o4p = self.p4(o43)
62 | o51 = F.relu(self.bn51(self.c51(o4p)), inplace=True)
63 | o52 = F.relu(self.bn52(self.c52(o51)), inplace=True)
64 | o53 = F.relu(self.bn53(self.c53(o52)), inplace=True)
65 | return o53, o43, o33
66 |
67 | class VGG_dec(nn.Module):
68 | def __init__(self):
69 | super(VGG_dec, self).__init__()
70 | out_channels = 6
71 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
72 | self.bn53 = nn.BatchNorm2d(512)
73 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
74 | self.bn52 = nn.BatchNorm2d(512)
75 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
76 | self.bn51 = nn.BatchNorm2d(512)
77 | self.u5 = nn.Upsample(scale_factor=2, mode='nearest')
78 |
79 | self.c43 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
80 | self.bn43 = nn.BatchNorm2d(512)
81 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
82 | self.bn42 = nn.BatchNorm2d(512)
83 | self.c41 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
84 | self.bn41 = nn.BatchNorm2d(256)
85 | self.u4 = nn.Upsample(scale_factor=2, mode='nearest')
86 |
87 | self.c33 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
88 | self.bn33 = nn.BatchNorm2d(256)
89 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
90 | self.bn32 = nn.BatchNorm2d(256)
91 | self.c31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
92 | self.bn31 = nn.BatchNorm2d(128)
93 | self.u3 = nn.Upsample(scale_factor=2, mode='nearest')
94 |
95 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
96 | self.bn22 = nn.BatchNorm2d(128)
97 | self.c21 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
98 | self.bn21 = nn.BatchNorm2d(64)
99 | self.u2 = nn.Upsample(scale_factor=2, mode='nearest')
100 |
101 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
102 | self.bn12 = nn.BatchNorm2d(64)
103 | #self.c11 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
104 | #self.bn11 = nn.BatchNorm2d(64)
105 |
106 |
107 | def forward(self, i53, i43, i33):
108 | o53 = F.relu(self.bn53(self.c53(i53)), inplace=True)
109 | o52 = F.relu(self.bn52(self.c52(o53)), inplace=True)
110 | o51 = F.relu(self.bn51(self.c51(o52)), inplace=True)
111 | o5u = self.u5(o51)
112 | o5c = torch.cat((o5u, i43), 1)
113 |
114 | o43 = F.relu(self.bn43(self.c43(o5c)), inplace=True)
115 | o42 = F.relu(self.bn42(self.c42(o43)), inplace=True)
116 | o41 = F.relu(self.bn41(self.c41(o42)), inplace=True)
117 | o4u = self.u4(o41)
118 | o4c = torch.cat((o4u, i33), 1)
119 |
120 | o33 = F.relu(self.bn33(self.c33(o4c)), inplace=True)
121 | o32 = F.relu(self.bn32(self.c32(o33)), inplace=True)
122 | o31 = F.relu(self.bn31(self.c31(o32)), inplace=True)
123 | o3u = self.u3(o31)
124 |
125 | o22 = F.relu(self.bn22(self.c22(o3u)), inplace=True)
126 | o21 = F.relu(self.bn21(self.c21(o22)), inplace=True)
127 | o2u = self.u2(o21)
128 |
129 | o12 = F.relu(self.bn12(self.c12(o2u)), inplace=True)
130 | #o11 = F.relu(self.bn11(self.c11(o12)), inplace=True)
131 |
132 | return o12
133 |
134 | class VGG_net(nn.Module):
135 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
136 | def __init__(self, input_channels):
137 | super(VGG_net, self).__init__()
138 | self.enc_net = VGG_enc(input_channels)
139 | self.dec_net = VGG_dec()
140 | self.conv_warp = nn.Conv2d(self.cfg[0], 2, kernel_size=3, padding=1)
141 | self.conv_mask = nn.Conv2d(self.cfg[0], 1, kernel_size=3, padding=1)
142 | self.conv_comp = nn.Conv2d(self.cfg[0], 3, kernel_size=3, padding=1)
143 | self._initialize_weights()
144 |
145 | def _initialize_weights(self):
146 | for m in self.modules():
147 | if isinstance(m, nn.Conv2d):
148 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
149 | m.weight.data.normal_(0, math.sqrt(2. / n))
150 | if m.bias is not None:
151 | m.bias.data.zero_()
152 | elif isinstance(m, nn.BatchNorm2d):
153 | m.weight.data.fill_(1)
154 | m.bias.data.zero_()
155 | elif isinstance(m, nn.Linear):
156 | m.weight.data.normal_(0, 0.01)
157 | m.bias.data.zero_()
158 |
159 | # input: Nx3x3x256x320
160 | def forward(self, x):
161 | dec_feat = self.dec_net(*self.enc_net(x))
162 | flow = self.conv_warp(dec_feat)
163 | mask = self.conv_mask(dec_feat)
164 | comp = self.conv_comp(dec_feat)
165 | return flow, mask, comp
166 |
167 |
168 |
169 | def VGG_Warper(input_channels = 6):
170 | return VGG_net(input_channels)
171 |
--------------------------------------------------------------------------------
/model/models/vgg_warper_weak_shortcut_nobn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import math
5 | from torch.autograd import Variable
6 |
7 |
8 | class VGG_enc(nn.Module):
9 | def __init__(self, input_channels=6):
10 | super(VGG_enc, self).__init__()
11 | in_channels = input_channels
12 | self.c11 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
13 | self.bn11 = nn.BatchNorm2d(64)
14 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
15 | self.bn12 = nn.BatchNorm2d(64)
16 | self.p1 = nn.MaxPool2d(kernel_size=2, stride=2)
17 |
18 | self.c21 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
19 | self.bn21 = nn.BatchNorm2d(128)
20 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
21 | self.bn22 = nn.BatchNorm2d(128)
22 | self.p2 = nn.MaxPool2d(kernel_size=2, stride=2)
23 |
24 | self.c31 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
25 | self.bn31 = nn.BatchNorm2d(256)
26 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
27 | self.bn32 = nn.BatchNorm2d(256)
28 | self.c33 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
29 | self.bn33 = nn.BatchNorm2d(256)
30 | self.p3 = nn.MaxPool2d(kernel_size=2, stride=2)
31 |
32 | self.c41 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
33 | self.bn41 = nn.BatchNorm2d(512)
34 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
35 | self.bn42 = nn.BatchNorm2d(512)
36 | self.c43 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
37 | self.bn43 = nn.BatchNorm2d(512)
38 | self.p4 = nn.MaxPool2d(kernel_size=2, stride=2)
39 |
40 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
41 | self.bn51 = nn.BatchNorm2d(512)
42 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
43 | self.bn52 = nn.BatchNorm2d(512)
44 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
45 | self.bn53 = nn.BatchNorm2d(512)
46 |
47 | def forward(self, x):
48 | o11 = F.relu(self.c11(x), inplace=True)
49 | o12 = F.relu(self.c12(o11), inplace=True)
50 | o1p = self.p1(o12)
51 | o21 = F.relu(self.c21(o1p), inplace=True)
52 | o22 = F.relu(self.c22(o21), inplace=True)
53 | o2p = self.p2(o22)
54 | o31 = F.relu(self.c31(o2p), inplace=True)
55 | o32 = F.relu(self.c32(o31), inplace=True)
56 | o33 = F.relu(self.c33(o32), inplace=True)
57 | o3p = self.p3(o33)
58 | o41 = F.relu(self.c41(o3p), inplace=True)
59 | o42 = F.relu(self.c42(o41), inplace=True)
60 | o43 = F.relu(self.c43(o42), inplace=True)
61 | o4p = self.p4(o43)
62 | o51 = F.relu(self.c51(o4p), inplace=True)
63 | o52 = F.relu(self.c52(o51), inplace=True)
64 | o53 = F.relu(self.c53(o52), inplace=True)
65 | return o53, o43, o33
66 |
67 | class VGG_dec(nn.Module):
68 | def __init__(self):
69 | super(VGG_dec, self).__init__()
70 | out_channels = 6
71 | self.c53 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
72 | self.bn53 = nn.BatchNorm2d(512)
73 | self.c52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
74 | self.bn52 = nn.BatchNorm2d(512)
75 | self.c51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
76 | self.bn51 = nn.BatchNorm2d(512)
77 | self.u5 = nn.Upsample(scale_factor=2, mode='nearest')
78 |
79 | self.c43 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
80 | self.bn43 = nn.BatchNorm2d(512)
81 | self.c42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
82 | self.bn42 = nn.BatchNorm2d(512)
83 | self.c41 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
84 | self.bn41 = nn.BatchNorm2d(256)
85 | self.u4 = nn.Upsample(scale_factor=2, mode='nearest')
86 |
87 | self.c33 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
88 | self.bn33 = nn.BatchNorm2d(256)
89 | self.c32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
90 | self.bn32 = nn.BatchNorm2d(256)
91 | self.c31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
92 | self.bn31 = nn.BatchNorm2d(128)
93 | self.u3 = nn.Upsample(scale_factor=2, mode='nearest')
94 |
95 | self.c22 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
96 | self.bn22 = nn.BatchNorm2d(128)
97 | self.c21 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
98 | self.bn21 = nn.BatchNorm2d(64)
99 | self.u2 = nn.Upsample(scale_factor=2, mode='nearest')
100 |
101 | self.c12 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
102 | self.bn12 = nn.BatchNorm2d(64)
103 | #self.c11 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
104 | #self.bn11 = nn.BatchNorm2d(64)
105 |
106 |
107 | def forward(self, i53, i43, i33):
108 | o53 = F.relu(self.c53(i53), inplace=True)
109 | o52 = F.relu(self.c52(o53), inplace=True)
110 | o51 = F.relu(self.c51(o52), inplace=True)
111 | o5u = self.u5(o51)
112 | o5c = torch.cat((o5u, i43), 1)
113 |
114 | o43 = F.relu(self.c43(o5c), inplace=True)
115 | o42 = F.relu(self.c42(o43), inplace=True)
116 | o41 = F.relu(self.c41(o42), inplace=True)
117 | o4u = self.u4(o41)
118 | o4c = torch.cat((o4u, i33), 1)
119 |
120 | o33 = F.relu(self.c33(o4c), inplace=True)
121 | o32 = F.relu(self.c32(o33), inplace=True)
122 | o31 = F.relu(self.c31(o32), inplace=True)
123 | o3u = self.u3(o31)
124 |
125 | o22 = F.relu(self.c22(o3u), inplace=True)
126 | o21 = F.relu(self.c21(o22), inplace=True)
127 | o2u = self.u2(o21)
128 |
129 | o12 = F.relu(self.c12(o2u), inplace=True)
130 | #o11 = F.relu(self.bn11(self.c11(o12)), inplace=True)
131 |
132 | return o12
133 |
134 | class VGG_net(nn.Module):
135 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
136 | def __init__(self, input_channels):
137 | super(VGG_net, self).__init__()
138 | self.enc_net = VGG_enc(input_channels)
139 | self.dec_net = VGG_dec()
140 | self.conv_warp = nn.Conv2d(self.cfg[0], 2, kernel_size=3, padding=1)
141 | self.conv_mask = nn.Conv2d(self.cfg[0], 1, kernel_size=3, padding=1)
142 | self.conv_comp = nn.Conv2d(self.cfg[0], 3, kernel_size=3, padding=1)
143 | self._initialize_weights()
144 |
145 | def _initialize_weights(self):
146 | for m in self.modules():
147 | if isinstance(m, nn.Conv2d):
148 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
149 | m.weight.data.normal_(0, math.sqrt(2. / n))
150 | if m.bias is not None:
151 | m.bias.data.zero_()
152 | elif isinstance(m, nn.BatchNorm2d):
153 | m.weight.data.fill_(1)
154 | m.bias.data.zero_()
155 | elif isinstance(m, nn.Linear):
156 | m.weight.data.normal_(0, 0.01)
157 | m.bias.data.zero_()
158 |
159 | # input: Nx3x3x256x320
160 | def forward(self, x):
161 | dec_feat = self.dec_net(*self.enc_net(x))
162 | flow = self.conv_warp(dec_feat)
163 | mask = self.conv_mask(dec_feat)
164 | comp = self.conv_comp(dec_feat)
165 | return flow, mask, comp
166 |
167 |
168 |
169 | def VGG_Warper(input_channels = 6):
170 | return VGG_net(input_channels)
171 |
--------------------------------------------------------------------------------
/model/ops/cooltanh.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 |
4 | # Inherit from Function
5 | class CoolTanH(torch.autograd.Function):
6 | # Note that both forward and backward are @staticmethods
7 | @staticmethod
8 | # bias is an optional argument
9 | def forward(ctx, input):
10 | ctx.save_for_backward(input)
11 | output = torch.clamp(input, min=0., max=1.)
12 | return output
13 |
14 | # This function has only a single output, so it gets only one gradient
15 | @staticmethod
16 | def backward(ctx, grad_output):
17 | input = ctx.saved_variables[0]
18 | # input > 1 & grad < 0 --> grad = grad
19 | # input > 1 & grad > 0 --> grad = 0
20 | # input < 0 & grad > 0 --> grad = grad
21 | # input < 0 & grad < 0 --> grad = 0
22 | grad_gtz = grad_output < 0.
23 | passcond = ((input > 1.)&(grad_gtz^1)) | ((input < 0.)&grad_gtz)
24 | grad_input = grad_output*(passcond.type(torch.cuda.FloatTensor))
25 | return grad_input
26 |
27 | #cooltanh = CoolTanH.apply
28 |
--------------------------------------------------------------------------------
/model/ops/flow_warper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | import torch.nn.functional as F
4 | import torch.nn as nn
5 |
6 | class FlowWarp(nn.Module):
7 | def __init__(self):
8 | super(FlowWarp, self).__init__()
9 | self.h = -1;
10 | self.w = -1;
11 |
12 | def forward(self, x, f):
13 | # First, generate absolute coordinate from relative coordinates
14 | # f: N (rx,ry) oH oW
15 | # target: N oH oW (ax(width),ay(height))
16 |
17 | # Generate offset map
18 | width = x.size()[3]
19 | height = x.size()[2]
20 | if width != self.w or height != self.h:
21 | width_map = torch.arange(0, width, step=1).expand([height, width])
22 | height_map = torch.arange(0, height, step=1).unsqueeze(1).expand([height, width])
23 | self.offset_map = Variable(torch.stack([width_map,height_map],2).cuda())
24 | self.w = width
25 | self.h = height
26 | self.scaler = Variable(1./torch.cuda.FloatTensor([(self.w-1)/2, (self.h-1)/2]))
27 |
28 | f = f.permute(0,2,3,1) # N H W C
29 | f = f + self.offset_map # add with dimension expansion
30 | f = f * self.scaler - 1 # scale to [-1,1]
31 |
32 | return F.grid_sample(x, f, mode='bilinear') # eltwise multiply with broadcast
33 |
34 |
--------------------------------------------------------------------------------
/model/ops/flow_warper_pad_2x.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | import torch.nn.functional as F
4 | import torch.nn as nn
5 |
6 | class FlowWarp(nn.Module):
7 | def __init__(self):
8 | super(FlowWarp, self).__init__()
9 | self.h = -1;
10 | self.w = -1;
11 |
12 | def forward(self, x, f, padl):
13 | # First, generate absolute coordinate from relative coordinates
14 | # f: N (rx,ry) oH oW
15 | # target: N oH oW (ax(width),ay(height))
16 |
17 | # Generate offset map
18 | width = x.size()[3]
19 | height = x.size()[2]
20 | ow = f.size()[3]
21 | oh = f.size()[2]
22 | if width != self.w or height != self.h or ow != self.ow or oh != self.oh or padl != self.padl:
23 | width_map = torch.arange(0+padl, ow+padl, step=1).expand([oh, ow])
24 | height_map = torch.arange(0, oh, step=1).unsqueeze(1).expand([oh, ow])
25 | self.offset_map = Variable(torch.stack([width_map,height_map],2).cuda())
26 | self.w = width
27 | self.h = height
28 | self.oh = oh
29 | self.ow = ow
30 | self.padl = padl
31 | self.scaler = Variable(2./torch.cuda.FloatTensor([(self.w-1)/2, (self.h-1)/2]))
32 |
33 | f = f.permute(0,2,3,1) # N H W C
34 | f = f + self.offset_map # add with dimension expansion
35 | f = f * self.scaler - 1 # scale to [-1,1]
36 |
37 | return F.grid_sample(x, f, mode='bilinear') # eltwise multiply with broadcast
38 |
39 |
--------------------------------------------------------------------------------
/model/ops/grad_hook.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 |
4 | # Inherit from Function
5 | class CoolTanH(torch.autograd.Function):
6 | # Note that both forward and backward are @staticmethods
7 | @staticmethod
8 | # bias is an optional argument
9 | def forward(ctx, input):
10 | ctx.save_for_backward(input)
11 |
12 | return input, ctx.saved_variables[0]
13 |
14 | # This function has only a single output, so it gets only one gradient
15 | @staticmethod
16 | def backward(ctx, grad_output):
17 |
18 | input = ctx.saved_variables[0]
19 |
20 | return grad_output
21 |
22 | #cooltanh = CoolTanH.apply
23 |
--------------------------------------------------------------------------------
/model/ops/hardshinkloss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 |
5 |
6 | class HardshinkLoss(nn.Module):
7 | def __init__(self, lowbound, upbound):
8 | super(HardshinkLoss, self).__init__()
9 | self.lowbound = lowbound
10 | self.upbound = upbound
11 |
12 | def forward(self, input):
13 | passcond = (input>self.upbound)|(input0.4)
24 | traj_vec_stor = traj_vec_stor[good_trajs,:]
25 |
26 | if traj_vec_stor.shape[0] < num_centroids: # too few points
27 | #print("kmeans: TOO FEW USABLE KEYPOINTS")
28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
29 |
30 | # k-means on vectors
31 | #num_centroids = 10
32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to
34 |
35 | # Sample
36 | # Find the nearest vectors to centroids
37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
38 |
39 | rep = good_trajs[rep]
40 |
41 | return rep # return the index of K most representative trajectories
42 |
43 |
44 |
45 |
46 | class KITTIReader():
47 | TRAJ_H5_PATH = '/trajectories/kitti/traj_stor_test.h5'
48 | DATASET_DIR = '/datasets/KITTI/dataset/sequences'
49 |
50 | def _calc_traj_len(self, traj): # [Traj_no, num_point, (x,y)]
51 | dx = np.sum((traj[:,0:-1,:]-traj[:,1:,:])**2, axis=2)
52 |
53 | def __init__(self, num_frames=10):
54 | self._clip_stor = []
55 | self._num_frames = num_frames
56 | self.height = 128
57 | self.width = 256
58 |
59 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest')
60 | traj_db = traj_h5["/KITTITraj/by_clip"]
61 | # Load all .mat files to memory
62 | print('Loading Trajectoriess for Penn Dataset...')
63 | for clip_name in traj_db.keys():
64 | clip_start = traj_db[clip_name].attrs['StartFrame']
65 | clip_len = traj_db[clip_name].attrs['TrajLen']
66 | clip_num_trajs = traj_db[clip_name].attrs['TrajCount']
67 | clip_traj_data = np.array(traj_db[clip_name])
68 | clip_video_id = traj_db[clip_name].attrs['VidNo']
69 |
70 | new_clip = {}
71 | new_clip['vid_name'] = clip_video_id
72 | new_clip['clip_start'] = clip_start
73 | new_clip['clip_len'] = clip_len
74 | new_clip['clip_num_trajs'] = clip_num_trajs
75 | new_clip['clip_trajs'] = clip_traj_data
76 | self._clip_stor.append(new_clip)
77 |
78 | print('[KITTI Trajectory Statistics]')
79 | print('Clip count: %d' % (len(self._clip_stor)))
80 | traj_h5.close()
81 |
82 | def get_traj_input(self, trajs, start_frame, num_frames):
83 | num_trajs = trajs.shape[0]
84 | # Load annotations
85 | # Format: 2(frames), 3(T/F,dx,dy), H, W
86 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32)
87 |
88 | #num_appear_trajs = min(num_trajs,10)
89 | num_appear_trajs = min(num_trajs,1)
90 | #good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10)
91 |
92 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs)
93 |
94 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :]
95 | for ff in range(num_frames):
96 | for traj_no in appear_trajs:
97 | kp_start_x = trajs[traj_no,start_frame,0]
98 | kp_start_y = trajs[traj_no,start_frame,1]
99 | kp_end_x = trajs[traj_no,start_frame+ff,0]
100 | kp_end_y = trajs[traj_no,start_frame+ff,1]
101 |
102 | kp_start_x_int = int(max(min(kp_start_x, self.width),0))
103 | kp_start_y_int = int(max(min(kp_start_y, self.height),0))
104 | kp_dx = kp_end_x - kp_start_x
105 | kp_dy = kp_end_y - kp_start_y
106 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0
107 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16.
108 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16.
109 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
110 |
111 | kp_end_x_int = int(max(min(kp_end_x, self.width),0))
112 | kp_end_y_int = int(max(min(kp_end_y, self.height),0))
113 | kp_dx2 = kp_start_x - kp_end_x
114 | kp_dy2 = kp_start_y - kp_end_y
115 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0
116 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16.
117 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16.
118 |
119 | return kpmap_seq, traj_list
120 |
121 | def __getitem__(self, idx):
122 | if idx == -1:
123 | idx = random.randint(0,len(self._clip_stor))
124 |
125 | annot = self._clip_stor[idx]
126 |
127 | vid_name = annot['vid_name']
128 | frame_count = annot['clip_len']
129 | clip_start = annot['clip_start']
130 |
131 | num_frames = self._num_frames
132 | # random start frame
133 | start_frame = random.randint(0,frame_count-num_frames)
134 |
135 | # loading frames
136 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32)
137 | for ff in range(num_frames): # only load two frames
138 | frame_no = start_frame+clip_start+ff
139 | img_path = os.path.join(self.DATASET_DIR, '{:02d}'.format(vid_name), 'image_2', '{:06d}.png'.format(frame_no))
140 | img_load = misc.imread(img_path) # h w c
141 | img = misc.imresize(img_load, (128,422))
142 | if ff == 0:
143 | img_2x = misc.imresize(img_load, (256,845))
144 | img_ori = img_2x.astype(np.float32)
145 | img = img[:,83:339,:]
146 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0
147 |
148 | img_ori = np.transpose(img_ori, (2,0,1))/255.0 - 0.5
149 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5]
150 |
151 | num_trajs = annot['clip_num_trajs']
152 | trajs = annot['clip_trajs']
153 | kpmap_seq, traj_list = self.get_traj_input(trajs, start_frame, num_frames)
154 |
155 |
156 | print(idx, start_frame)
157 | return vid_seq, kpmap_seq, traj_list, img_ori
158 |
159 |
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/model/reader/rp_reader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import math
4 | import numpy as np
5 | import cv2
6 | import random
7 |
8 | import h5py
9 |
10 | from scipy.cluster.vq import kmeans,kmeans2,vq
11 |
12 | def filter_trajs_kmeans(trajs, num_centroids):
13 | num_trajs = trajs.shape[0]
14 | len_trajs = trajs.shape[1]
15 | traj_vec_stor = np.empty((num_trajs, (len_trajs-1)*2), np.float32)
16 | disp_stor = np.empty((num_trajs,), np.float32)
17 |
18 | for ii in range(num_trajs):
19 | traj = trajs[ii,:,:] # n-by-2
20 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point
21 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1)))
22 | # Remove trajectories that have very low displacement
23 | good_trajs = np.flatnonzero(disp_stor>0.4)
24 | traj_vec_stor = traj_vec_stor[good_trajs,:]
25 |
26 | if traj_vec_stor.shape[0] < num_centroids: # too few points
27 | #print("kmeans: TOO FEW USABLE KEYPOINTS")
28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
29 |
30 | # k-means on vectors
31 | #num_centroids = 10
32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to
34 |
35 | # Sample
36 | # Find the nearest vectors to centroids
37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
38 |
39 | rep = good_trajs[rep]
40 |
41 | return rep # return the index of K most representative trajectories
42 |
43 |
44 |
45 |
46 | class RPReader():
47 | TRAJ_H5_PATH = '/trajectories/rp/traj_stor_test.h5'
48 | JPG_H5_PATH = '/datasets/robot_push_h5/robot_push_testnovel_jpgs.h5'
49 |
50 | def _calc_traj_len(self, traj): # [Traj_no, num_point, (x,y)]
51 | dx = np.sum((traj[:,0:-1,:]-traj[:,1:,:])**2, axis=2)
52 |
53 | def __init__(self, num_frames=10):
54 | #self._clip_stor = []
55 | self._num_frames = num_frames
56 | self.height = 192
57 | self.width = 240
58 |
59 |
60 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest')
61 | traj_db = traj_h5["/RPTraj/by_clip"]
62 | self.clip_names = list(traj_db.keys())
63 | self.clip_num = len(self.clip_names)
64 |
65 | jpg_h5 = h5py.File(self.JPG_H5_PATH, 'r', libver='latest')
66 | jpg_h5 = jpg_h5["push/push_testnovel"]
67 |
68 | print('[Robot Push Trajectory Statistics]')
69 | print('Clip count: %d' % (self.clip_num))
70 |
71 | self.traj_db = traj_db
72 | self.jpg_h5 = jpg_h5
73 |
74 |
75 |
76 | def get_traj_input(self, trajs, start_frame, num_frames):
77 | num_trajs = trajs.shape[0]
78 | # Load annotations
79 | # Format: 2(frames), 3(T/F,dx,dy), H, W
80 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32)
81 |
82 | #num_appear_trajs = min(num_trajs,10)
83 | num_appear_trajs = min(num_trajs,3)
84 | num_appear_trajs = random.randint(1,min(num_trajs,4))
85 | #good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10)
86 |
87 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs)
88 |
89 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :]
90 | for ff in range(num_frames):
91 | for traj_no in appear_trajs:
92 | kp_start_x = trajs[traj_no,start_frame,0]
93 | kp_start_y = trajs[traj_no,start_frame,1]
94 | kp_end_x = trajs[traj_no,start_frame+ff,0]
95 | kp_end_y = trajs[traj_no,start_frame+ff,1]
96 |
97 | kp_start_x_int = int(max(min(kp_start_x, self.width),0))
98 | kp_start_y_int = int(max(min(kp_start_y, self.height),0))
99 | kp_dx = kp_end_x - kp_start_x
100 | kp_dy = kp_end_y - kp_start_y
101 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0
102 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16.
103 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16.
104 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
105 |
106 | kp_end_x_int = int(max(min(kp_end_x, self.width),0))
107 | kp_end_y_int = int(max(min(kp_end_y, self.height),0))
108 | kp_dx2 = kp_start_x - kp_end_x
109 | kp_dy2 = kp_start_y - kp_end_y
110 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0
111 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16.
112 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16.
113 |
114 | return kpmap_seq, traj_list
115 |
116 | def __getitem__(self, idx):
117 | traj_db = self.traj_db
118 | jpg_h5 = self.jpg_h5
119 |
120 | if idx == -1:
121 | idx = random.randint(0,self.clip_num-1)
122 |
123 | annot = traj_db[self.clip_names[idx]]
124 |
125 | vid_id = annot.attrs['VidId']
126 | annot_traj_len = annot.attrs['TrajLen']
127 | annot_clip_start = annot.attrs['StartFrame']
128 | num_trajs = annot.attrs['TrajCount']
129 | trajs = annot[()]
130 |
131 | num_frames = self._num_frames
132 | # random start frame
133 | annot_start_frame = random.randint(0,annot_traj_len-num_frames)
134 |
135 | # loading frames
136 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32)
137 | for ff in range(num_frames): # only load two frames
138 | frame_no = annot_start_frame+annot_clip_start+ff
139 | img_data = cv2.imdecode(jpg_h5['{}/{}.jpg'.format(vid_id, frame_no)][()], -1)
140 | img_data = cv2.resize(img_data, (240,192))
141 | img = img_data[:,:,(2,1,0)] # h w c
142 |
143 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0
144 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5]
145 |
146 | kpmap_seq, traj_list = self.get_traj_input(trajs, annot_start_frame, num_frames)
147 |
148 | print(idx, annot_start_frame)
149 | return vid_seq, kpmap_seq, traj_list
150 |
151 |
152 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/model/reader/ucf_reader.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | from scipy import misc # for imread
4 | from utils.find_border import find_border
5 | import h5py
6 |
7 | import math
8 | import os
9 |
10 | from scipy.cluster.vq import kmeans,kmeans2,vq
11 |
12 | def filter_trajs_kmeans(trajs, num_centroids):
13 | num_trajs = trajs.shape[0]
14 | len_trajs = trajs.shape[1]
15 | traj_vec_stor = np.empty((num_trajs, (len_trajs-1)*2), np.float32)
16 | disp_stor = np.empty((num_trajs,), np.float32)
17 |
18 | for ii in range(num_trajs):
19 | traj = trajs[ii,:,:] # n-by-2
20 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point
21 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1)))
22 | # Remove trajectories that have very low displacement
23 | good_trajs = np.flatnonzero(disp_stor>0.4)
24 | traj_vec_stor = traj_vec_stor[good_trajs,:]
25 |
26 | if traj_vec_stor.shape[0] < num_centroids: # too few points
27 | #print("kmeans: TOO FEW USABLE KEYPOINTS")
28 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
29 |
30 | # k-means on vectors
31 | #num_centroids = 10
32 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
33 | centroids,label = kmeans(traj_vec_stor,num_centroids, iter=20) # Label[i] is the cluster no that i-th datapoint belongs to
34 |
35 | # Sample
36 | # Find the nearest vectors to centroids
37 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
38 |
39 | rep = good_trajs[rep]
40 |
41 | return rep # return the index of K most representative trajectories
42 |
43 |
44 |
45 | class UCFReader():
46 | TRAJ_H5_PATH = '/trajectories/ucf/traj_stor_test.h5'
47 | DATASET_DIR = '/datasets/UCF101/UCF-101'
48 | JPG_DIR = '/datasets/UCF101_seq/UCF-101'
49 |
50 | def __init__(self, num_frames=10):
51 | self._num_frames = num_frames
52 | self.height = 192
53 | self.width = 256
54 |
55 | traj_h5 = h5py.File(self.TRAJ_H5_PATH, 'r', libver='latest')
56 | traj_db = traj_h5["/UCFTraj/by_clip"]
57 | #traj_h5.close()
58 | self.clip_names = list(traj_db.keys())
59 | self.clip_num = len(self.clip_names)
60 | self.traj_db = traj_db
61 | print('[UCF Trajectory Statistics]')
62 | print('Clip count: %d' % (self.clip_num))
63 |
64 | def get_traj_input(self, trajs, start_frame, num_frames):
65 | num_trajs = trajs.shape[0]
66 | # Load annotations
67 | # Format: 2(frames), 3(T/F,dx,dy), H, W
68 | kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32)
69 |
70 | #num_appear_trajs = min(num_trajs,10)
71 | num_appear_trajs = min(num_trajs,1)
72 | good_idx = filter_trajs_kmeans(trajs[:,start_frame:start_frame+num_frames,:], 10)
73 |
74 | appear_trajs = random.sample(range(num_trajs), num_appear_trajs)
75 |
76 | traj_list = trajs[appear_trajs, start_frame:start_frame+num_frames, :]
77 | for ff in range(num_frames):
78 | for traj_no in appear_trajs:
79 | kp_start_x = trajs[traj_no,start_frame,0]
80 | kp_start_y = trajs[traj_no,start_frame,1]
81 | kp_end_x = trajs[traj_no,start_frame+ff,0]
82 | kp_end_y = trajs[traj_no,start_frame+ff,1]
83 |
84 | kp_start_x_int = int(max(min(kp_start_x, self.width),0))
85 | kp_start_y_int = int(max(min(kp_start_y, self.height),0))
86 | kp_dx = kp_end_x - kp_start_x
87 | kp_dy = kp_end_y - kp_start_y
88 | kpmap_seq[ff, 0,kp_start_y_int,kp_start_x_int] = 1.0
89 | kpmap_seq[ff, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16.
90 | kpmap_seq[ff, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16.
91 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
92 |
93 | kp_end_x_int = int(max(min(kp_end_x, self.width),0))
94 | kp_end_y_int = int(max(min(kp_end_y, self.height),0))
95 | kp_dx2 = kp_start_x - kp_end_x
96 | kp_dy2 = kp_start_y - kp_end_y
97 | kpmap_seq[ff, 3,kp_end_y_int,kp_end_x_int] = 1.0
98 | kpmap_seq[ff, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16.
99 | kpmap_seq[ff, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16.
100 |
101 | return kpmap_seq, traj_list
102 |
103 | def __getitem__(self, idx):
104 | traj_db = self.traj_db
105 |
106 | if idx == -1:
107 | while True:
108 | idx = random.randint(0,self.clip_num-1)
109 | #if traj_db[self.clip_names[idx]].attrs['VidPath'].find("PushUps") != -1:
110 | break
111 |
112 |
113 | annot = traj_db[self.clip_names[idx]]
114 | vid_path = annot.attrs['VidPath']
115 | #vid_path = vid_path.replace('/datasets/UCF101/UCF-101', self.JPG_DIR) #
116 | annot_traj_len = annot.attrs['TrajLen']
117 | annot_clip_start = annot.attrs['StartFrame']
118 | num_trajs = annot.attrs['TrajCount']
119 | trajs = annot[()]
120 |
121 | num_frames = self._num_frames
122 | annot_start_frame = random.randint(0,annot_traj_len-num_frames)
123 |
124 | # preallocate np array
125 | vid_seq = np.empty([num_frames,3,self.height,self.width], dtype=np.float32)
126 | for ff in range(num_frames):
127 | frame_no = annot_start_frame+annot_clip_start+ff
128 | try:
129 | frame = misc.imread(vid_path+'/'+str(frame_no)+'.jpg')
130 | except:
131 | print('Bad image found.')
132 | frame = np.zeros([self.width, self.height, 3], dtype=np.uint8)
133 | img = misc.imresize(frame, (self.height,self.width))
134 | vid_seq[ff,:,:,:] = np.transpose(img, (2,0,1))/255.0
135 |
136 | vid_mask = find_border(vid_seq[0,:,:,:], threshold=10/255)
137 | vid_seq = vid_seq - 0.5 # 2 C H W, [-0.5,0.5]
138 | vid_seq = vid_seq * vid_mask
139 |
140 | kpmap_seq, traj_list = self.get_traj_input(trajs, annot_start_frame, num_frames)
141 |
142 | print(idx, annot_start_frame)
143 | return vid_seq, kpmap_seq, traj_list
144 |
--------------------------------------------------------------------------------
/model/train_kitti.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_kitti_reimpl import KITTIReader_traj
12 | from models.vgg_warper_weak_shortcut_nobn import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper_pad_2x import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 |
20 | args = {}
21 | args['gpus'] = [0]
22 | args['seed'] = 12345
23 | args['batch_size'] = 32
24 | torch.backends.cudnn.benchmark = True
25 |
26 | # Initialize Pytorch Dataloader
27 | datareader = KITTIReader_traj(is_test=False, max_interval=10, min_ntraj=1, max_ntraj=5) # change to min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation
28 | train_loader = torch.utils.data.DataLoader(
29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True)
30 |
31 |
32 | class MModel(nn.Module):
33 | def __init__(self):
34 | super(MModel, self).__init__()
35 | self.warp_cnn = VGG_Warper(9)
36 | self.flow_warper = FlowWarp()
37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
38 | self.hardshrinkloss = HardshinkLoss(0., 1.)
39 |
40 | def forward(self, img_input, warp_input, img_gt):
41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
42 | warp_imgs = self.flow_warper(img_input, warp_flow, padl=83)
43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
44 | masks = F.sigmoid(masks)
45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
46 |
47 | return recon_img, warp_flow, comp_imgs, masks
48 |
49 | # Charbonnier penalty function
50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2}
51 | class CPF(nn.Module):
52 | def __init__(self):
53 | super(CPF, self).__init__()
54 | def forward(self, x, mean=True):
55 | eps = 0.0001
56 | eps2 = eps**2
57 | if mean:
58 | loss = torch.mean(torch.sqrt(x**2+eps2))
59 | else:
60 | loss = torch.sum(torch.sqrt(x**2+eps2))
61 |
62 | return loss
63 |
64 | mmodel = MModel()
65 | mmodel.cuda()
66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1])
67 |
68 | #reconstruction_function = nn.BCELoss()
69 | #reconstruction_function = nn.L1Loss()
70 | mseloss = nn.MSELoss()
71 | #mseloss.size_average = True
72 | cpfloss = CPF()
73 | hardshrinkloss = HardshinkLoss(0., 1.)
74 | #sl1loss = nn.SmoothL1Loss(size_average=False)
75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0)
76 |
77 | visual = VisdomShow('kitti_train_humaneval')
78 |
79 | def train(epoch):
80 | print('\n\n=========================== Epoch {} ============================'.format(epoch))
81 | mmodel.train()
82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask, img_input_2x) in enumerate(train_loader):
83 | img_input = Variable(img_input).cuda(args['gpus'][0])
84 | img_input_2x = Variable(img_input_2x).cuda(args['gpus'][0])
85 | warp_input = Variable(warp_input).cuda(args['gpus'][0])
86 | img_gt = Variable(img_gt).cuda(args['gpus'][0])
87 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0])
88 |
89 | optimizer.zero_grad()
90 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input_2x, warp_input, img_gt)
91 |
92 | loss_comp_pen = hardshrinkloss(comp_imgs)
93 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask)
94 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask)
95 | loss_mask_pen = torch.mean((masks-1.)**2)
96 |
97 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen
98 | loss.backward()
99 | optimizer.step()
100 |
101 | hist['loss'].append(loss_recon.data.cpu().numpy()[0])
102 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0])
103 |
104 | if batch_idx%10 == 0:
105 | pbar(batch_idx, len(train_loader), epoch)
106 |
107 | if batch_idx%200 == 0:
108 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1))
109 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0))
110 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0))
111 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1))
112 | mask_bw = masks[0,:,:,:].data.cpu().numpy()
113 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0))
114 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0))
115 |
116 | visual.show_img(comp_out)
117 | visual.show_img(mask_out)
118 | visual.show_img(warp_out)
119 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0)
120 | visual.show_vid(vid)
121 | if batch_idx%2000 == 0:
122 | ckpt = {
123 | 'mmodel_state_dict': mmodel.module.state_dict(),
124 | 'optimizer': optimizer.state_dict(),
125 | 'hist': hist
126 | }
127 | torch.save(ckpt, './snapshots/kitti/ckpt_e{}_b{}.pth'.format(epoch, batch_idx))
128 |
129 | def restore(ckpt_file):
130 | ckpt = torch.load(ckpt_file)
131 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
132 | optimizer.load_state_dict(ckpt['optimizer'])
133 | #hist = ckpt['hist']
134 | print('Restored from {}'.format(ckpt_file))
135 |
136 | hist = {}
137 | hist['loss'] = []
138 | hist['comp_pen'] = []
139 |
140 | #restore('./snapshots2/ckpt_e1_b44000.pth')
141 | for epoch in range(0, 20):
142 | #test(epoch)
143 | train(epoch)
144 |
145 |
146 |
--------------------------------------------------------------------------------
/model/train_rp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_rp_reimpl import RPReader_traj
12 | from models.vgg_warper_weak_shortcut import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 |
20 | args = {}
21 | args['gpus'] = [0]
22 | args['seed'] = 12345
23 | args['batch_size'] = 32
24 | torch.backends.cudnn.benchmark = True
25 |
26 | # Initialize Pytorch Dataloader
27 | datareader = RPReader_traj(is_test=False, max_interval=10, min_ntraj=1, max_ntraj=5) # change to min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation
28 | train_loader = torch.utils.data.DataLoader(
29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=4, pin_memory=True)
30 |
31 |
32 | class MModel(nn.Module):
33 | def __init__(self):
34 | super(MModel, self).__init__()
35 | self.warp_cnn = VGG_Warper(9)
36 | self.flow_warper = FlowWarp()
37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
38 | self.hardshrinkloss = HardshinkLoss(0., 1.)
39 |
40 | def forward(self, img_input, warp_input, img_gt):
41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
42 | warp_imgs = self.flow_warper(img_input, warp_flow)
43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
44 | masks = F.sigmoid(masks)
45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
46 |
47 | return recon_img, warp_flow, comp_imgs, masks
48 |
49 | # Charbonnier penalty function
50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2}
51 | class CPF(nn.Module):
52 | def __init__(self):
53 | super(CPF, self).__init__()
54 | def forward(self, x, mean=True):
55 | eps = 0.0001
56 | eps2 = eps**2
57 | if mean:
58 | loss = torch.mean(torch.sqrt(x**2+eps2))
59 | else:
60 | loss = torch.sum(torch.sqrt(x**2+eps2))
61 |
62 | return loss
63 |
64 | mmodel = MModel()
65 | mmodel.cuda()
66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1])
67 |
68 | #reconstruction_function = nn.BCELoss()
69 | #reconstruction_function = nn.L1Loss()
70 | mseloss = nn.MSELoss()
71 | #mseloss.size_average = True
72 | cpfloss = CPF()
73 | hardshrinkloss = HardshinkLoss(0., 1.)
74 | #sl1loss = nn.SmoothL1Loss(size_average=False)
75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0)
76 |
77 | visual = VisdomShow('rp_train_humaneval')
78 |
79 | def train(epoch):
80 | print('\n\n=========================== Epoch {} ============================'.format(epoch))
81 | mmodel.train()
82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader):
83 | img_input = Variable(img_input).cuda(args['gpus'][0])
84 | warp_input = Variable(warp_input).cuda(args['gpus'][0])
85 | img_gt = Variable(img_gt).cuda(args['gpus'][0])
86 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0])
87 |
88 | optimizer.zero_grad()
89 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt)
90 |
91 | loss_comp_pen = hardshrinkloss(comp_imgs)
92 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask)
93 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask)
94 | loss_mask_pen = torch.mean((masks-1.)**2)
95 |
96 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen
97 | loss.backward()
98 | optimizer.step()
99 |
100 | hist['loss'].append(loss_recon.data.cpu().numpy()[0])
101 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0])
102 |
103 | if batch_idx%10 == 0:
104 | pbar(batch_idx, len(train_loader), epoch)
105 |
106 | if batch_idx%200 == 0:
107 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1))
108 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0))
109 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0))
110 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1))
111 | mask_bw = masks[0,:,:,:].data.cpu().numpy()
112 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0))
113 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0))
114 |
115 | visual.show_img(comp_out)
116 | visual.show_img(mask_out)
117 | visual.show_img(warp_out)
118 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0)
119 | visual.show_vid(vid)
120 | if batch_idx%2000 == 0:
121 | ckpt = {
122 | 'mmodel_state_dict': mmodel.module.state_dict(),
123 | 'optimizer': optimizer.state_dict(),
124 | 'hist': hist
125 | }
126 | torch.save(ckpt, './snapshots/rp/ckpt_e{}_b{}.pth'.format(epoch, batch_idx))
127 |
128 | def restore(ckpt_file):
129 | ckpt = torch.load(ckpt_file)
130 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
131 | optimizer.load_state_dict(ckpt['optimizer'])
132 | #hist = ckpt['hist']
133 | print('Restored from {}'.format(ckpt_file))
134 |
135 | hist = {}
136 | hist['loss'] = []
137 | hist['comp_pen'] = []
138 |
139 | #restore('./snapshots2/ckpt_e1_b44000.pth')
140 | for epoch in range(0, 20):
141 | #test(epoch)
142 | train(epoch)
143 |
144 |
145 |
--------------------------------------------------------------------------------
/model/train_ucf.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 | import torch.nn.functional as F
8 |
9 | import numpy as np
10 |
11 | from dataset.data_loader_ucf_reimpl import UCFReader_traj
12 | from models.vgg_warper_weak_shortcut import VGG_Warper
13 | from utils.visual import colorcode, VisdomShow, pbar
14 |
15 | from ops.flow_warper import FlowWarp
16 | from ops.hardshinkloss import HardshinkLoss
17 | from ops.laplace2d import Laplace2D
18 |
19 |
20 | args = {}
21 | args['gpus'] = [0]
22 | args['seed'] = 12345
23 | args['batch_size'] = 32
24 | torch.backends.cudnn.benchmark = True
25 |
26 | # Initialize Pytorch Dataloader
27 | datareader = UCFReader_traj(is_test=False, max_interval=10) # add min_ntraj=10, max_ntraj=10 for autoencoding (video prediction) evaluation
28 | train_loader = torch.utils.data.DataLoader(
29 | datareader, batch_size=args['batch_size'], shuffle=True, collate_fn=datareader.collate_fn, worker_init_fn=datareader.worker_init_fn, num_workers=6, pin_memory=True)
30 |
31 |
32 | class MModel(nn.Module):
33 | def __init__(self):
34 | super(MModel, self).__init__()
35 | self.warp_cnn = VGG_Warper(9)
36 | self.flow_warper = FlowWarp()
37 | self.mseloss = nn.MSELoss(size_average=True, reduce=True)
38 | self.hardshrinkloss = HardshinkLoss(0., 1.)
39 |
40 | def forward(self, img_input, warp_input, img_gt):
41 | warp_flow, masks, comp_imgs = self.warp_cnn(warp_input) # W*H*2
42 | warp_imgs = self.flow_warper(img_input, warp_flow)
43 | comp_imgs = F.hardtanh(comp_imgs,0.,1.)
44 | masks = F.sigmoid(masks)
45 | recon_img = torch.mul(warp_imgs, masks)+torch.mul(comp_imgs,1-masks)
46 |
47 | return recon_img, warp_flow, comp_imgs, masks
48 |
49 | # Charbonnier penalty function
50 | # Φ(x) = (x^2 + \epsilon^2)^{1/2}
51 | class CPF(nn.Module):
52 | def __init__(self):
53 | super(CPF, self).__init__()
54 | def forward(self, x, mean=True):
55 | eps = 0.0001
56 | eps2 = eps**2
57 | if mean:
58 | loss = torch.mean(torch.sqrt(x**2+eps2))
59 | else:
60 | loss = torch.sum(torch.sqrt(x**2+eps2))
61 |
62 | return loss
63 |
64 | mmodel = MModel()
65 | mmodel.cuda()
66 | mmodel = nn.DataParallel(mmodel, device_ids=[0,1])
67 |
68 | #reconstruction_function = nn.BCELoss()
69 | #reconstruction_function = nn.L1Loss()
70 | mseloss = nn.MSELoss()
71 | #mseloss.size_average = True
72 | cpfloss = CPF()
73 | hardshrinkloss = HardshinkLoss(0., 1.)
74 | #sl1loss = nn.SmoothL1Loss(size_average=False)
75 | optimizer = optim.Adam(mmodel.parameters(), lr=1e-3, weight_decay=0)
76 |
77 | visual = VisdomShow('ucf_train_humaneval')
78 |
79 | def train(epoch):
80 | print('\n\n=========================== Epoch {} ============================'.format(epoch))
81 | mmodel.train()
82 | for batch_idx, (img_input, warp_input, img_gt, vid_mask) in enumerate(train_loader):
83 | img_input = Variable(img_input).cuda(args['gpus'][0])
84 | warp_input = Variable(warp_input).cuda(args['gpus'][0])
85 | img_gt = Variable(img_gt).cuda(args['gpus'][0])
86 | vid_mask = Variable(vid_mask).cuda(args['gpus'][0])
87 |
88 | optimizer.zero_grad()
89 | recon_img, warp_flow, comp_imgs, masks = mmodel(img_input, warp_input, img_gt)
90 |
91 | loss_comp_pen = hardshrinkloss(comp_imgs)
92 | loss_recon = cpfloss((recon_img-img_gt)*vid_mask)
93 | #loss_recon = mseloss(recon_img*vid_mask,img_gt*vid_mask)
94 | loss_mask_pen = torch.mean((masks-1.)**2)
95 |
96 | loss = loss_recon + 0.1*loss_comp_pen + 0.01*loss_mask_pen
97 | loss.backward()
98 | optimizer.step()
99 |
100 | hist['loss'].append(loss_recon.data.cpu().numpy()[0])
101 | hist['comp_pen'].append(loss_comp_pen.data.cpu().numpy()[0])
102 |
103 | if batch_idx%10 == 0:
104 | pbar(batch_idx, len(train_loader), epoch)
105 |
106 | if batch_idx%200 == 0:
107 | img_out = visual.add_text(recon_img[0,:,:,:].data.cpu().numpy(), 'Out', (0,0,1))
108 | img_in = visual.add_text(img_input[0,:,:,:].data.cpu().numpy(), 'In', (0,1,0))
109 | img_gt = visual.add_text(img_gt[0,:,:,:].data.cpu().numpy(), 'GT', (1,0,0))
110 | comp_out = visual.add_text(comp_imgs[0,:,:,:].data.cpu().numpy(), 'Comp', (0,1,1))
111 | mask_bw = masks[0,:,:,:].data.cpu().numpy()
112 | mask_out = visual.add_text(np.concatenate((mask_bw,mask_bw,mask_bw),0), 'Mask', (1,0,0))
113 | warp_out = visual.add_text(colorcode(warp_flow[0,:,:,:].data.cpu().numpy()), 'Flow', (0,0,0))
114 |
115 | visual.show_img(comp_out)
116 | visual.show_img(mask_out)
117 | visual.show_img(warp_out)
118 | vid = np.stack((img_in, img_out, img_gt, img_in, img_out, img_gt, img_gt), axis=0)
119 | visual.show_vid(vid)
120 | if batch_idx%2000 == 0:
121 | ckpt = {
122 | 'mmodel_state_dict': mmodel.module.state_dict(),
123 | 'optimizer': optimizer.state_dict(),
124 | 'hist': hist
125 | }
126 | torch.save(ckpt, './snapshots/ucf/ckpt_e{}_b{}.pth'.format(epoch, batch_idx))
127 |
128 | def restore(ckpt_file):
129 | ckpt = torch.load(ckpt_file)
130 | mmodel.module.load_state_dict(ckpt['mmodel_state_dict'])
131 | optimizer.load_state_dict(ckpt['optimizer'])
132 | #hist = ckpt['hist']
133 | print('Restored from {}'.format(ckpt_file))
134 |
135 | hist = {}
136 | hist['loss'] = []
137 | hist['comp_pen'] = []
138 |
139 | restore('./snapshots/ckpt_e2_b58000.pth')
140 | for epoch in range(0, 20):
141 | #test(epoch)
142 | train(epoch)
143 |
144 |
145 |
--------------------------------------------------------------------------------
/model/utils/trajs2map.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.autograd import Variable
4 |
5 | def trajs2map(trajs, height, width): # traj: [N, S/E, X/Y]
6 | #kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32)
7 |
8 | #height = kpmap_seq.size(2)
9 | #width = kpmap_seq.size(3)
10 | kpmap_seq = Variable(torch.zeros(1,6,height,width).cuda())
11 | for traj_no in range(len(trajs)):
12 | kp_start_x = trajs[traj_no][0][0]
13 | kp_start_y = trajs[traj_no][0][1]
14 | kp_end_x = trajs[traj_no][1][0]
15 | kp_end_y = trajs[traj_no][1][1]
16 |
17 | kp_start_x_int = int(max(min(kp_start_x, width),0))
18 | kp_start_y_int = int(max(min(kp_start_y, height),0))
19 | kp_dx = kp_end_x - kp_start_x
20 | kp_dy = kp_end_y - kp_start_y
21 | kpmap_seq[0, 0,kp_start_y_int,kp_start_x_int] = 1.0
22 | kpmap_seq[0, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16.
23 | kpmap_seq[0, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16.
24 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
25 |
26 | kp_end_x_int = int(max(min(kp_end_x, width),0))
27 | kp_end_y_int = int(max(min(kp_end_y, height),0))
28 | kp_dx2 = kp_start_x - kp_end_x
29 | kp_dy2 = kp_start_y - kp_end_y
30 | kpmap_seq[0, 3,kp_end_y_int,kp_end_x_int] = 1.0
31 | kpmap_seq[0, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16.
32 | kpmap_seq[0, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16.
33 |
34 | return kpmap_seq
35 |
36 |
37 | def trajs2map2(trajs, height, width): # traj: [N, S/E, X/Y]
38 | #kpmap_seq = np.zeros([num_frames, 6,self.height,self.width], dtype=np.float32)
39 |
40 | #height = kpmap_seq.size(2)
41 | #width = kpmap_seq.size(3)
42 | kpmap_seq = Variable(torch.zeros(1,6,height,width).cuda())
43 | for traj_no in range(trajs.shape[0]):
44 | kp_start_x = trajs[traj_no,0,0]
45 | kp_start_y = trajs[traj_no,0,1]
46 | kp_end_x = trajs[traj_no,1,0]
47 | kp_end_y = trajs[traj_no,1,1]
48 |
49 | kp_start_x_int = int(max(min(kp_start_x, width),0))
50 | kp_start_y_int = int(max(min(kp_start_y, height),0))
51 | kp_dx = kp_end_x - kp_start_x
52 | kp_dy = kp_end_y - kp_start_y
53 | kpmap_seq[0, 0,kp_start_y_int,kp_start_x_int] = 1.0
54 | kpmap_seq[0, 1,kp_start_y_int,kp_start_x_int] = kp_dy/16.
55 | kpmap_seq[0, 2,kp_start_y_int,kp_start_x_int] = kp_dx/16.
56 | #vid_seq[0,1,kp_start_y,kp_start_x] = 0.5
57 |
58 | kp_end_x_int = int(max(min(kp_end_x, width),0))
59 | kp_end_y_int = int(max(min(kp_end_y, height),0))
60 | kp_dx2 = kp_start_x - kp_end_x
61 | kp_dy2 = kp_start_y - kp_end_y
62 | kpmap_seq[0, 3,kp_end_y_int,kp_end_x_int] = 1.0
63 | kpmap_seq[0, 4,kp_end_y_int,kp_end_x_int] = kp_dy2/16.
64 | kpmap_seq[0, 5,kp_end_y_int,kp_end_x_int] = kp_dx2/16.
65 |
66 | return kpmap_seq
67 |
--------------------------------------------------------------------------------
/model/utils/visual.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from visdom import Visdom
3 | import cv2
4 | import os
5 | # OpenBLAS screws up with CPU affinity
6 | os.sched_setaffinity(0,range(os.cpu_count()))
7 |
8 |
9 | class VisdomShow():
10 | def __init__(self, env_name):
11 | self.vis = Visdom(env=env_name)
12 | print('Visdom display initialized')
13 |
14 | def show_img(self, img):
15 | #img = img[(2,1,0),:,:]
16 | self.vis.image(np.clip(img,0,1))
17 | #self.vis.image(np.clip(img.data.cpu().numpy(),0,1))
18 |
19 | def show_vid(self, vid):
20 | vid = (np.clip(vid,0.,1.)*255.).astype(np.uint8)
21 | vid = np.transpose(vid[:,(2,1,0),:,:], (0,2,3,1))
22 | self.vis.video(vid, opts={'fps': 2})
23 |
24 | def add_text(self, img, text, color=(0,255,0)):
25 | img = np.transpose(img[(2,1,0),:,:], (1,2,0)).copy()
26 | cv2.putText(img, text, (2,24), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 1, cv2.LINE_AA)
27 | img = np.transpose(img, (2,0,1))[(2,1,0),:,:]
28 | return img
29 |
30 |
31 | def colorcode(flow_in): # N 1 H W, H S V=1
32 | #hsv = np.zeros((512, 512, 3))
33 | #hsv[..., 0] = np.linspace(0, 1, 512)
34 | #hsv[..., 1] = 1.
35 | #hsv[..., 2] = np.linspace(0, 1, 512)[:, np.newaxis]
36 | #rgb = hsv_to_rgb(hsv)
37 | flow_x = flow_in[0,:,:] / 5
38 | flow_y = flow_in[1,:,:] / 5
39 | shape = flow_x.shape
40 | H = np.arctan2(flow_x, flow_y) / (2.*np.pi) # [0,1)
41 | H = np.ravel(H)
42 | S = np.sqrt(flow_x**2+flow_y**2) # [0, len]
43 | S = np.ravel(S)
44 |
45 | i = np.int_(H*6.)
46 | f = H*6.-i
47 |
48 | q = f
49 | t = 1.-f
50 | i = np.ravel(i)
51 | f = np.ravel(f)
52 | i%=6
53 | t = np.ravel(t)
54 | q = np.ravel(q)
55 | v = 1
56 | clist = (1-S*np.vstack([np.zeros_like(f),np.ones_like(f),q,t]))*v
57 |
58 | #0:v 1:p 2:q 3:t
59 | order = np.array([[0,3,1],[2,0,1],[1,0,3],[1,2,0],[3,1,0],[0,1,2]])
60 | rgb = clist[order[i], np.arange(np.prod(shape))[:,None]]
61 |
62 | rgb = np.transpose(rgb.reshape(shape+(3,)),[2,0,1])
63 | return rgb
64 |
65 |
66 | import sys
67 | def pbar(count, total, status=''):
68 | bar_len = 50
69 | filled_len = int(round(bar_len * count / float(total)))
70 |
71 | percents = round(100.0 * count / float(total), 1)
72 | bar = '=' * filled_len + '-' * (bar_len - filled_len)
73 |
74 | sys.stdout.write('[%s] %s/%s epoch %s\r' % (bar, count, total, status))
75 | sys.stdout.flush()
76 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/DenseTrackStab.h:
--------------------------------------------------------------------------------
1 | #ifndef DENSETRACKSTAB_H_
2 | #define DENSETRACKSTAB_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | #include "opencv2/calib3d/calib3d.hpp"
20 | #include "opencv2/highgui/highgui.hpp"
21 | #include "opencv2/imgproc/imgproc.hpp"
22 | #include "opencv2/xfeatures2d.hpp"
23 | #include "opencv2/core/core.hpp"
24 | //#include "opencv2/nonfree/nonfree.hpp"
25 |
26 | using namespace cv;
27 |
28 | typedef struct
29 | {
30 | int traj_length;
31 | int num_trajs;
32 | float* out_trajs;
33 | } Ret;
34 |
35 | extern "C" void free_mem();
36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret);
37 |
38 | int start_frame = 0;
39 | int end_frame = INT_MAX;
40 | int scale_num = 8;
41 | const float scale_stride = sqrt(2);
42 | char* bb_file = NULL;
43 |
44 | // parameters for descriptors
45 | int patch_size = 32;
46 | int nxy_cell = 2;
47 | int nt_cell = 3;
48 | float epsilon = 0.05;
49 | const float min_flow = 0.4;
50 |
51 | // parameters for tracking
52 | double quality = 0.001;
53 | int min_distance = 5;
54 | int init_gap = 1;
55 | int track_length = 15;
56 |
57 | // parameters for rejecting trajectory
58 | const float min_var = sqrt(3);
59 | const float max_var = 50;
60 | const float max_dis = 20;
61 |
62 | typedef struct {
63 | int x; // top left corner
64 | int y;
65 | int width;
66 | int height;
67 | }RectInfo;
68 |
69 | typedef struct {
70 | int width; // resolution of the video
71 | int height;
72 | int length; // number of frames
73 | }SeqInfo;
74 |
75 | typedef struct {
76 | int length; // length of the trajectory
77 | int gap; // initialization gap for feature re-sampling
78 | }TrackInfo;
79 |
80 | typedef struct {
81 | int nBins; // number of bins for vector quantization
82 | bool isHof;
83 | int nxCells; // number of cells in x direction
84 | int nyCells;
85 | int ntCells;
86 | int dim; // dimension of the descriptor
87 | int height; // size of the block for computing the descriptor
88 | int width;
89 | }DescInfo;
90 |
91 | // integral histogram for the descriptors
92 | typedef struct {
93 | int height;
94 | int width;
95 | int nBins;
96 | float* desc;
97 | }DescMat;
98 |
99 | class Track
100 | {
101 | public:
102 | std::vector point;
103 | std::vector disp;
104 | std::vector hog;
105 | std::vector hof;
106 | std::vector mbhX;
107 | std::vector mbhY;
108 | int index;
109 |
110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo,
111 | const DescInfo& hofInfo, const DescInfo& mbhInfo)
112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length),
113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length)
114 | {
115 | index = 0;
116 | point[0] = point_;
117 | }
118 |
119 | void addPoint(const Point2f& point_)
120 | {
121 | index++;
122 | point[index] = point_;
123 | }
124 | };
125 |
126 | class BoundBox
127 | {
128 | public:
129 | Point2f TopLeft;
130 | Point2f BottomRight;
131 | float confidence;
132 |
133 | BoundBox(float a1, float a2, float a3, float a4, float a5)
134 | {
135 | TopLeft.x = a1;
136 | TopLeft.y = a2;
137 | BottomRight.x = a3;
138 | BottomRight.y = a4;
139 | confidence = a5;
140 | }
141 | };
142 |
143 | class Frame
144 | {
145 | public:
146 | int frameID;
147 | std::vector BBs;
148 |
149 | Frame(const int& frame_)
150 | {
151 | frameID = frame_;
152 | BBs.clear();
153 | }
154 | };
155 |
156 | #endif /*DENSETRACKSTAB_H_*/
157 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/Initialize.h:
--------------------------------------------------------------------------------
1 | #ifndef INITIALIZE_H_
2 | #define INITIALIZE_H_
3 |
4 | #include "DenseTrackStab.h"
5 |
6 | using namespace cv;
7 |
8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap)
9 | {
10 | trackInfo->length = track_length;
11 | trackInfo->gap = init_gap;
12 | }
13 |
14 | DescMat* InitDescMat(int height, int width, int nBins)
15 | {
16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat));
17 | descMat->height = height;
18 | descMat->width = width;
19 | descMat->nBins = nBins;
20 |
21 | long size = height*width*nBins;
22 | descMat->desc = (float*)malloc(size*sizeof(float));
23 | memset(descMat->desc, 0, size*sizeof(float));
24 | return descMat;
25 | }
26 |
27 | void ReleDescMat(DescMat* descMat)
28 | {
29 | free(descMat->desc);
30 | free(descMat);
31 | }
32 |
33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell)
34 | {
35 | descInfo->nBins = nBins;
36 | descInfo->isHof = isHof;
37 | descInfo->nxCells = nxy_cell;
38 | descInfo->nyCells = nxy_cell;
39 | descInfo->ntCells = nt_cell;
40 | descInfo->dim = nBins*nxy_cell*nxy_cell;
41 | descInfo->height = size;
42 | descInfo->width = size;
43 | }
44 |
45 | void InitSeqInfo(SeqInfo* seqInfo, char* video)
46 | {
47 | VideoCapture capture;
48 | capture.open(video);
49 |
50 | if(!capture.isOpened())
51 | fprintf(stderr, "Could not initialize capturing..\n");
52 |
53 | // get the number of frames in the video
54 | int frame_num = 0;
55 | while(true) {
56 | Mat frame;
57 | capture >> frame;
58 |
59 | if(frame.empty())
60 | break;
61 |
62 | if(frame_num == 0) {
63 | seqInfo->width = frame.cols;
64 | seqInfo->height = frame.rows;
65 | }
66 |
67 | frame_num++;
68 | }
69 | seqInfo->length = frame_num;
70 | }
71 |
72 | void usage()
73 | {
74 | fprintf(stderr, "Extract improved trajectories from a video\n\n");
75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n");
76 | fprintf(stderr, "Options:\n");
77 | fprintf(stderr, " -h Display this message and exit\n");
78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n");
79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n");
80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n");
81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n");
82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n");
83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n");
84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n");
85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n");
86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n");
87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n");
88 | }
89 |
90 | bool arg_parse(int argc, char** argv)
91 | {
92 | int c;
93 | bool flag = false;
94 | char* executable = basename(argv[0]);
95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1)
96 | switch(c) {
97 | case 'S':
98 | start_frame = atoi(optarg);
99 | flag = true;
100 | break;
101 | case 'E':
102 | end_frame = atoi(optarg);
103 | flag = true;
104 | break;
105 | case 'L':
106 | track_length = atoi(optarg);
107 | break;
108 | case 'W':
109 | min_distance = atoi(optarg);
110 | break;
111 | case 'N':
112 | patch_size = atoi(optarg);
113 | break;
114 | case 's':
115 | nxy_cell = atoi(optarg);
116 | break;
117 | case 't':
118 | nt_cell = atoi(optarg);
119 | break;
120 | case 'A':
121 | scale_num = atoi(optarg);
122 | break;
123 | case 'I':
124 | init_gap = atoi(optarg);
125 | break;
126 | case 'H':
127 | bb_file = optarg;
128 | break;
129 | case 'h':
130 | usage();
131 | exit(0);
132 | break;
133 |
134 | default:
135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable );
136 | abort();
137 | }
138 | return flag;
139 | }
140 |
141 | #endif /*INITIALIZE_H_*/
142 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/Makefile:
--------------------------------------------------------------------------------
1 | # set the binaries that have to be built
2 | TARGETS := DenseTrackStab Video
3 |
4 | # set the build configuration set
5 | BUILD := release
6 | #BUILD := debug
7 |
8 | # set bin and build dirs
9 | BUILDDIR := .build_$(BUILD)
10 | BINDIR := $(BUILD)
11 |
12 | # libraries
13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*)))
14 | LIBS := \
15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \
16 | avformat avdevice avutil avcodec swscale
17 |
18 | # set some flags and compiler/linker specific commands
19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include
20 | CXXFLAGS_debug := -ggdb
21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb
22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD))
23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD))
24 | LDFLAGS_debug := -ggdb
25 | LDFLAGS_release := -O3 -ggdb
26 |
27 | include make/generic.mk
28 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/README.md:
--------------------------------------------------------------------------------
1 | # NOTES ON USAGE
2 | For generating trajectories from video (Tuned for KITTI dataset).
3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
4 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`. Train/test split hard-coded inside.
5 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file.
6 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified.
7 |
8 | **Warning: The code is provided in its original form without any cleanup.**
9 |
10 | # NOTES ON MODIFICATIONS
11 | Code originated from:
12 | http://lear.inrialpes.fr/people/wang/dense_trajectories
13 | ```
14 | @inproceedings{wang:2011:inria-00583818:1,
15 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu},
16 | TITLE = {{Action Recognition by Dense Trajectories}},
17 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition},
18 | YEAR = {2011},
19 | MONTH = Jun,
20 | PAGES = {3169-3176},
21 | ADDRESS = {Colorado Springs, United States},
22 | URL = {http://hal.inria.fr/inria-00583818/en}
23 | }
24 | ```
25 | - Modified to support more modern version of OpenCV
26 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction.
27 | - Converted stand-alone excutable to dynamic library for Python CFFI calling
28 |
29 |
30 | # Followings are the original README for Dense Trajectories
31 |
32 |
33 | ### Compiling ###
34 |
35 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system:
36 | * OpenCV library (tested with OpenCV-2.4.2)
37 | * ffmpeg library (tested with ffmpeg-0.11.1)
38 |
39 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories
40 |
41 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'.
42 |
43 | ### test video decoding ###
44 |
45 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization:
46 |
47 | ./release/Video your_video.avi
48 |
49 | If your video plays smoothly, congratulations! You are just one step before getting the features.
50 |
51 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web.
52 |
53 | ### compute features on a test video ###
54 |
55 | Once you are able to decode the video, computing our features is simple:
56 |
57 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz
58 |
59 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type:
60 |
61 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz
62 |
63 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same.
64 |
65 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness.
66 |
67 | There are more explanations about our features on the website, and also a list of FAQ.
68 |
69 | ### History ###
70 |
71 | * October 2013: improved_trajectory_release.tar.gz
72 | The code is an extension of dense_trajectory_release_v1.2.tar.gz
73 |
74 | ### Bugs and extensions ###
75 |
76 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well.
77 |
78 | http://lear.inrialpes.fr/people/wang/
79 |
80 | ### LICENSE CONDITIONS ###
81 |
82 | Copyright (C) 2011 Heng Wang
83 |
84 | This program is free software; you can redistribute it and/or
85 | modify it under the terms of the GNU General Public License
86 | as published by the Free Software Foundation; either version 2
87 | of the License, or (at your option) any later version.
88 |
89 | This program is distributed in the hope that it will be useful,
90 | but WITHOUT ANY WARRANTY; without even the implied warranty of
91 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
92 | GNU General Public License for more details.
93 |
94 | You should have received a copy of the GNU General Public License
95 | along with this program; if not, write to the Free Software
96 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
97 |
98 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/Video.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | IplImage* image = 0;
17 | IplImage* prev_image = 0;
18 | CvCapture* capture = 0;
19 |
20 | int show = 1;
21 |
22 | int main( int argc, char** argv )
23 | {
24 | int frameNum = 0;
25 |
26 | char* video = argv[1];
27 | capture = cvCreateFileCapture(video);
28 |
29 | if( !capture ) {
30 | printf( "Could not initialize capturing..\n" );
31 | return -1;
32 | }
33 |
34 | if( show == 1 )
35 | cvNamedWindow( "Video", 0 );
36 |
37 | while( true ) {
38 | IplImage* frame = 0;
39 | int i, j, c;
40 |
41 | // get a new frame
42 | frame = cvQueryFrame( capture );
43 | if( !frame )
44 | break;
45 |
46 | if( !image ) {
47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 );
48 | image->origin = frame->origin;
49 | }
50 |
51 | cvCopy( frame, image, 0 );
52 |
53 | if( show == 1 ) {
54 | cvShowImage( "Video", image);
55 | c = cvWaitKey(3);
56 | if((char)c == 27) break;
57 | }
58 |
59 | std::cerr << "The " << frameNum << "-th frame" << std::endl;
60 | frameNum++;
61 | }
62 |
63 | if( show == 1 )
64 | cvDestroyWindow("Video");
65 |
66 | return 0;
67 | }
68 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/batch_process_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from cffi import FFI
4 | import cv2
5 |
6 | from scipy.cluster.vq import kmeans,kmeans2,vq
7 |
8 | # For trajectory storage
9 | import h5py
10 | import uuid
11 |
12 | # OpenBLAS(used by OpenCV) changes CPU affinity
13 | os.sched_setaffinity(0,range(os.cpu_count()))
14 | def setaff():
15 | os.sched_setaffinity(0,range(os.cpu_count()))
16 |
17 |
18 | # for Multi-threading
19 | from multiprocessing.dummy import Pool as ThreadPool
20 | pool = ThreadPool(5, setaff)
21 |
22 |
23 |
24 | # =======================================================================
25 | def filter_trajs_displacement(trajs):
26 | num_trajs = len(trajs)
27 | disp_stor = np.empty((num_trajs,), np.float32)
28 | for ii in range(num_trajs):
29 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[1:,:]-trajs[0:-1,:])**2,1)))
30 | # Remove trajectories that have very low displacement
31 | good_trajs = np.flatnonzero(disp_stor>-1)
32 |
33 | return good_trajs
34 |
35 |
36 | # =======================================================================
37 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids):
38 | num_trajs = len(trajs)
39 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32)
40 | disp_stor = np.empty((num_trajs,), np.float32)
41 |
42 | for ii in range(num_trajs):
43 | traj = trajs[ii,0:dec_frames,:] # n-by-2
44 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point
45 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1)))
46 | # Remove trajectories that have very low displacement
47 | good_trajs = np.flatnonzero(disp_stor>0.4)
48 | traj_vec_stor = traj_vec_stor[good_trajs,:]
49 |
50 | if traj_vec_stor.shape[0] < num_centroids: # too few points
51 | print("kmeans: TOO FEW USABLE KEYPOINTS")
52 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
53 |
54 | # k-means on vectors
55 | #num_centroids = 10
56 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
57 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100)
58 |
59 | # Find the nearest vectors to centroids
60 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
61 |
62 | rep = good_trajs[rep]
63 |
64 | return rep # return the index of K most representative trajectories
65 |
66 | # ==========================================================================
67 |
68 | # This time we don't do clustering
69 | # Setting parameters
70 | CLIP_LENGTH = 10
71 |
72 |
73 | # Load video...
74 | #for vid_idx in range(NUM_VIDEOS):
75 | def worker(idx):
76 | print("Processing %d/%d" % (idx, len(job_stor)))
77 |
78 | vid_id, frame_count, cam_name, start_frame = job_stor[idx]
79 |
80 | for ff in range(CLIP_LENGTH):
81 | img_path = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), cam_name, '{:06d}.png'.format(ff+start_frame))
82 | img_data = cv2.imread(img_path) # h w c
83 | img_data = cv2.resize(img_data, (422,128))
84 | img_data = img_data[:,83:339,:]
85 |
86 | #img_data = cv2.resize(img_data, dsize=None, fx=0.25, fy=0.25)
87 | if ff == 0:
88 | height = img_data.shape[0]
89 | width = img_data.shape[1]
90 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8)
91 | vid_seq[ff,:,:,:] = img_data
92 |
93 | # Calculate trajectories
94 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data)
95 | traj_ret = ffi.new("Ret[]", 1)
96 | # note that a lot more parameters can be modified in DenseTrackStab.cpp.
97 | libtest.main_like(vid_seq_cptr, img_data.shape[1], img_data.shape[0], CLIP_LENGTH, traj_ret)
98 | #print(traj_ret[0].traj_length)
99 | #print(traj_ret[0].num_trajs)
100 | #print(traj_ret[0].out_trajs[0])
101 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32)
102 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2])
103 | #print(trajs.shape)
104 | libtest.free_mem()
105 |
106 | #filtered_trajs = filter_trajs_kmeans(trajs, DEC_FRAMES, TRAJS_PER_VIDEO)
107 | filtered_trajs = filter_trajs_displacement(trajs)
108 |
109 | if len(filtered_trajs) == 0:
110 | print('No Trajectory detected!!!')
111 | else:
112 | # Write result to HDF5
113 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
114 | h5_kt_bc_traj = h5_kt_bc.require_dataset('%06d_%04d_%04d_%s' % (start_frame+1, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32')
115 | h5_kt_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:]
116 | h5_kt_bc_traj.attrs['VidNo'] = vid_id
117 | h5_kt_bc_traj.attrs['StartFrame'] = start_frame
118 | h5_kt_bc_traj.attrs['TrajLen'] = CLIP_LENGTH
119 | h5_kt_bc_traj.attrs['TrajCount'] = filtered_trajs.size
120 | h5_kt_bc_traj.attrs['CamName'] = cam_name
121 | h5_kt_bc_traj.attrs['VidResH'] = height
122 | h5_kt_bc_traj.attrs['VidResW'] = width
123 | f.flush()
124 |
125 | if __name__ == "__main__":
126 | # ========================================================================
127 | # Load KITTI dataset
128 | kitti_path_prefix = '/data1/Video_Prediction/dataset/KITTI/dataset/sequences' # [EDIT ME!]
129 | def get_num(x):
130 | return int(''.join(ele for ele in x if ele.isdigit()))
131 | frame_count_stor = []
132 | for vid_id in range(21):
133 | vid_path_prefix = os.path.join(kitti_path_prefix, '{:02d}'.format(vid_id), 'image_2')
134 | video_file_list = os.listdir(vid_path_prefix)
135 | frame_count = 0
136 | for filename in video_file_list:
137 | frame_count = max(get_num(filename),frame_count)
138 | print('Video {}, {} frames'.format(vid_id, frame_count))
139 | frame_count_stor.append(frame_count+1) # file name starts from 0
140 | # 16 / 5 split
141 | test_split = [15, 11, 7, 5, 4]
142 | train_split = list(set(range(21)) - set(test_split))
143 | frame_count_stor_train = [frame_count_stor[x] for x in train_split]
144 | frame_count_stor_test = [frame_count_stor[x] for x in test_split]
145 | ##frame_count_stor_train_cumsum = np.cumsum(frame_count_stor_train)
146 | ##train_vid_probs = frame_count_stor_train_cumsum/frame_count_stor_train_cumsum[-1]
147 | ##print(train_vid_probs)
148 |
149 | ## Dense sampling procedure
150 | print('Dense sampling videos......')
151 | job_stor = []
152 | for vid_id in train_split: # [EDIT ME!] you might want test_split
153 | frame_count = frame_count_stor[vid_id]
154 | for offset in range(0, frame_count - CLIP_LENGTH + 1, 1):
155 | job_stor.append((vid_id, frame_count, 'image_2', offset))
156 | job_stor.append((vid_id, frame_count, 'image_3', offset))
157 |
158 | print(len(job_stor))
159 |
160 |
161 | # Load C extension......
162 | ffi = FFI()
163 | ffi.cdef('''
164 | typedef struct
165 | {
166 | int traj_length;
167 | int num_trajs;
168 | float* out_trajs;
169 | } Ret;
170 |
171 | void free_mem();
172 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret);
173 | ''')
174 | libtest = ffi.dlopen("./release/DenseTrackStab")
175 |
176 | # Load HDF5 database......
177 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!] this is the name of the produced file containing trajectories
178 | h5_kt = f.require_group("/KITTITraj")
179 | #h5_kt_bv = h5_pa.require_group("by_video") # /KITTITraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
180 | h5_kt_bc = h5_kt.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len)
181 | f.swmr_mode = True
182 |
183 | pool.map(worker, range(len(job_stor)))
184 |
185 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/make/dep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (C) 2009 Alexander Kl"aser
4 | #
5 | # This piece is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License
7 | # as published by the Free Software Foundation; either version 2
8 | # of the License, or (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 | #
19 | # This software has been downloaded from:
20 | # http://lear.inrialpes.fr/people/klaeser/software
21 | #
22 |
23 | import sys
24 | import os
25 | import string
26 | import os.path
27 | import re
28 |
29 | HELP_USAGE = """
30 | Usage: dep.py ...
31 | """
32 |
33 | regSuffix = re.compile(r"\.[^.]*$")
34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$")
35 | regDep = re.compile(r"^.*\.d$")
36 | regDepSplit = re.compile(r"\s*\\*\s*")
37 |
38 | suffixes = ['.cpp', '.c', '.cc']
39 | includeDirs = []
40 |
41 |
42 | def parseDepFile(fileName):
43 | # read in the dependency file
44 | depFile = open(fileName, 'r')
45 | depStr = depFile.read()
46 |
47 | # discard everything up to the colon
48 | colonPos = depStr.find(":")
49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format"
50 | depStr = depStr[colonPos + 1:]
51 |
52 | # collect all included files
53 | return regDepSplit.split(depStr)
54 |
55 |
56 | def findSourceFile(headerFile):
57 | # get the basename without extension
58 | headerFile = regSuffix.sub('', headerFile)
59 | if not headerFile:
60 | return None
61 |
62 | # iterate over known suffixes
63 | for suffix in suffixes:
64 | srcFile = headerFile + suffix
65 |
66 | # check whether a source file corresponding to the header exists
67 | if os.path.exists(srcFile):
68 | return srcFile
69 |
70 | # we add to the file path directory by directory and check whether it
71 | # exists in one of the include directories
72 | i = headerFile.find('/') + 1
73 | if i != 1:
74 | i = 0
75 | while True:
76 | # check whether a source file exists in one of the given include dirs
77 | for dir in includeDirs:
78 | # check all suffixes for source files
79 | for suffix in suffixes:
80 | srcFile = os.path.join(dir, headerFile[i:] + suffix)
81 | #srcFile = os.path.abspath(srcFile)
82 | if os.path.exists(srcFile):
83 | return srcFile
84 |
85 | # find next position of '/'
86 | i = headerFile.find('/', i) + 1
87 | if i <= 0:
88 | break
89 |
90 | return None
91 |
92 |
93 | def main(argv):
94 | global includeDirs
95 |
96 | # check command line parameters
97 | if len(sys.argv) < 5:
98 | print HELP_USAGE
99 | return
100 |
101 | args = sys.argv
102 | args.pop(0)
103 | ruleTarget = args.pop(0)
104 | linkFile = args.pop(0)
105 | buildDir = args.pop(0)
106 | rootDepFile = args.pop(0)
107 | includeDirs = args
108 |
109 |
110 | # scan all dependency files for files we need to link to
111 | # do this recursively starting at the root dependency file
112 | linkFiles = set()
113 | incFiles = set()
114 | depFileStack = set([rootDepFile])
115 | depFilesDone = set()
116 | while depFileStack:
117 | # get the next dependency file to process from the stack
118 | depFile = depFileStack.pop()
119 | if depFile in depFilesDone:
120 | continue
121 | depFilesDone.add(depFile)
122 |
123 | # iterate over all source files in the dependency file
124 | for nextFile in parseDepFile(depFile):
125 | newDepFile = ""
126 |
127 | # if we have a source file, we need to link against it
128 | if regSrc.match(nextFile):
129 | linkFiles.add(nextFile)
130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile)
131 |
132 | # check whether a .cpp/.c/.cc file exist
133 | srcFile = findSourceFile(nextFile)
134 | if srcFile != None:
135 | linkFiles.add(srcFile)
136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile)
137 |
138 | # if the corresponding .d file exists as parameter, add it to the stack
139 | if newDepFile and os.path.exists(newDepFile):
140 | depFileStack.add(newDepFile)
141 |
142 | #
143 | # generate all necessary rules
144 | #
145 |
146 | # all includes of dependency files
147 | for i in linkFiles:
148 | i = regSuffix.sub(".d", i)
149 | print "-include " + buildDir + "/" + i
150 | print
151 |
152 | # dependencies for link file
153 | print linkFile + ": \\"
154 | for i in linkFiles:
155 | i = regSuffix.sub(".d", i)
156 | print "\t" + buildDir + "/" + i + " \\"
157 | print
158 |
159 | # print out all files we need to link against
160 | print ruleTarget + ": " + linkFile + " \\"
161 | for i in linkFiles:
162 | i = regSuffix.sub(".o", i)
163 | print "\t" + buildDir + "/" + i + " \\"
164 | print
165 |
166 |
167 | if __name__ == "__main__":
168 | main( sys.argv )
169 |
170 |
171 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/make/generic.mk:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (C) 2009 Alexander Kl"aser
3 | #
4 | # This piece is free software; you can redistribute it and/or
5 | # modify it under the terms of the GNU General Public License
6 | # as published by the Free Software Foundation; either version 2
7 | # of the License, or (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program; if not, write to the Free Software
16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 | #
18 | # This software has been downloaded from:
19 | # http://lear.inrialpes.fr/people/klaeser/software
20 | #
21 | #
22 | # Variables that need to be set in the Makefile that includes this file:
23 | # TARGETS all files that are exectuables without there .cpp extension
24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build")
25 | # BINDIR dir where executables are linked to (optional, by default "bin")
26 | # SRCDIRS list of directories in which source files are located
27 | # this variable needs to be set if you do not have your source and
28 | # include files located in the same directory!
29 | #
30 | # Variables used for compiling/linking:
31 | # CXXFLAGS flags for compiling
32 | # LDFLAGS flags used for linking
33 | # LDLIBS list of libraries to be linked
34 | # CXX compiler linker (should be g++ by default)
35 | #
36 |
37 | # set paths for the dependency tool and gcc
38 | DEP = make/dep.py
39 |
40 | # set some standard directories in case they have not been set
41 | BUILDDIR ?= .build
42 | BINDIR ?= bin
43 |
44 | # all include files
45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l))
46 |
47 |
48 | #
49 | # some general rules
50 | #
51 |
52 | .PHONY: all clean
53 | .PRECIOUS: $(BUILDDIR)/%.d
54 |
55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS)))
56 | @echo "=== done ==="
57 |
58 | $(INCLUDES): $(BUILDDIR)
59 |
60 | clean:
61 | @echo "=== cleaning up ==="
62 | @rm -rf $(BUILDDIR)
63 |
64 | $(BUILDDIR) $(BINDIR):
65 | @echo "=== creating directory: $@ ==="
66 | @mkdir -p $@
67 |
68 |
69 | #
70 | # rules for creating dependency files
71 | #
72 |
73 | # dependencies of .cpp files on other files
74 | $(BUILDDIR)/%.d: %.cpp
75 | @echo "=== creating dependency file: $@ ==="
76 | @test -e $(dir $@) || mkdir -p $(dir $@)
77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $<
78 |
79 | # dependencies for the linking
80 | %.so.l %.l: %.d
81 | @echo "=== creating dependency file: $@ ==="
82 | @test -e $(dir $@) || mkdir -p $(dir $@)
83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@
84 |
85 |
86 | #
87 | # rules for compiling and linking
88 | # (link dependencies are defined in .l files)
89 | #
90 |
91 | # compiling
92 | $(BUILDDIR)/%.o: %.cpp
93 | @echo "=== compiling: $@ ==="
94 | @test -e $(dir $@) || mkdir -p $(dir $@)
95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $<
96 |
97 | # linking for shared libraries
98 | $(BINDIR)/%.so:
99 | @echo "=== linking: $@ ==="
100 | @rm -f $@
101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
102 |
103 | # linking
104 | $(BINDIR)/%:
105 | @echo "=== linking: $@ ==="
106 | @rm -f $@
107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
108 |
109 | %: %.o
110 | %.h: ;
111 | %.hpp: ;
112 | %.c: ;
113 | %.cpp: ;
114 |
115 |
116 | #
117 | # include dependency files
118 | #
119 |
120 | ifneq ($(MAKECMDGOALS),clean)
121 | -include $(INCLUDES)
122 | endif
123 |
--------------------------------------------------------------------------------
/offline_traj/for_KITTI/view_traj.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import cv2
4 | import matplotlib.pyplot as plt
5 |
6 | # For trajectory storage
7 | import h5py
8 |
9 | # Setting parameters
10 | DATASET_DIR = '../../../dataset/Penn_Action'
11 |
12 | f = h5py.File("traj_stor.h5", 'r', libver='latest')
13 | # /PennActionTraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
14 | db = f["/PennActionTraj/by_video"]
15 |
16 | fig = plt.figure()
17 |
18 | for vid_name in db.keys():
19 | for clip_name in db[vid_name].keys():
20 | clip_start = db[vid_name][clip_name].attrs['StartFrame']
21 | clip_len = db[vid_name][clip_name].attrs['TrajLen']
22 | clip_num_trajs = db[vid_name][clip_name].attrs['TrajCount']
23 | clip_traj_data = db[vid_name][clip_name]
24 | for ff in range(clip_len):
25 | plt.clf()
26 | img_path = os.path.join(DATASET_DIR, 'frames', vid_name, '%06d.jpg' % (ff+clip_start))
27 | img_data = cv2.imread(img_path)[:,:,(2,1,0)] # h w c
28 | plt.imshow(img_data)
29 | for kk in range(clip_num_trajs):
30 | traj = clip_traj_data[kk,:,:]
31 | plt.scatter(traj[ff,0]*2, traj[ff,1]*2)
32 | fig.canvas.draw()
33 | plt.pause(0.001)
34 | #plt.waitforbuttonpress()
35 | #plt.show()
36 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/DenseTrackStab.h:
--------------------------------------------------------------------------------
1 | #ifndef DENSETRACKSTAB_H_
2 | #define DENSETRACKSTAB_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | #include "opencv2/calib3d/calib3d.hpp"
20 | #include "opencv2/highgui/highgui.hpp"
21 | #include "opencv2/imgproc/imgproc.hpp"
22 | #include "opencv2/xfeatures2d.hpp"
23 | #include "opencv2/core/core.hpp"
24 | //#include "opencv2/nonfree/nonfree.hpp"
25 |
26 | using namespace cv;
27 |
28 | typedef struct
29 | {
30 | int traj_length;
31 | int num_trajs;
32 | float* out_trajs;
33 | } Ret;
34 |
35 | extern "C" void free_mem();
36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret);
37 |
38 | int start_frame = 0;
39 | int end_frame = INT_MAX;
40 | int scale_num = 8;
41 | const float scale_stride = sqrt(2);
42 | char* bb_file = NULL;
43 |
44 | // parameters for descriptors
45 | int patch_size = 32;
46 | int nxy_cell = 2;
47 | int nt_cell = 3;
48 | float epsilon = 0.05;
49 | const float min_flow = 0.4;
50 |
51 | // parameters for tracking
52 | double quality = 0.001;
53 | int min_distance = 5;
54 | int init_gap = 1;
55 | int track_length = 15;
56 |
57 | // parameters for rejecting trajectory
58 | const float min_var = sqrt(3);
59 | const float max_var = 50;
60 | const float max_dis = 20;
61 |
62 | typedef struct {
63 | int x; // top left corner
64 | int y;
65 | int width;
66 | int height;
67 | }RectInfo;
68 |
69 | typedef struct {
70 | int width; // resolution of the video
71 | int height;
72 | int length; // number of frames
73 | }SeqInfo;
74 |
75 | typedef struct {
76 | int length; // length of the trajectory
77 | int gap; // initialization gap for feature re-sampling
78 | }TrackInfo;
79 |
80 | typedef struct {
81 | int nBins; // number of bins for vector quantization
82 | bool isHof;
83 | int nxCells; // number of cells in x direction
84 | int nyCells;
85 | int ntCells;
86 | int dim; // dimension of the descriptor
87 | int height; // size of the block for computing the descriptor
88 | int width;
89 | }DescInfo;
90 |
91 | // integral histogram for the descriptors
92 | typedef struct {
93 | int height;
94 | int width;
95 | int nBins;
96 | float* desc;
97 | }DescMat;
98 |
99 | class Track
100 | {
101 | public:
102 | std::vector point;
103 | std::vector disp;
104 | std::vector hog;
105 | std::vector hof;
106 | std::vector mbhX;
107 | std::vector mbhY;
108 | int index;
109 |
110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo,
111 | const DescInfo& hofInfo, const DescInfo& mbhInfo)
112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length),
113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length)
114 | {
115 | index = 0;
116 | point[0] = point_;
117 | }
118 |
119 | void addPoint(const Point2f& point_)
120 | {
121 | index++;
122 | point[index] = point_;
123 | }
124 | };
125 |
126 | class BoundBox
127 | {
128 | public:
129 | Point2f TopLeft;
130 | Point2f BottomRight;
131 | float confidence;
132 |
133 | BoundBox(float a1, float a2, float a3, float a4, float a5)
134 | {
135 | TopLeft.x = a1;
136 | TopLeft.y = a2;
137 | BottomRight.x = a3;
138 | BottomRight.y = a4;
139 | confidence = a5;
140 | }
141 | };
142 |
143 | class Frame
144 | {
145 | public:
146 | int frameID;
147 | std::vector BBs;
148 |
149 | Frame(const int& frame_)
150 | {
151 | frameID = frame_;
152 | BBs.clear();
153 | }
154 | };
155 |
156 | #endif /*DENSETRACKSTAB_H_*/
157 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/Initialize.h:
--------------------------------------------------------------------------------
1 | #ifndef INITIALIZE_H_
2 | #define INITIALIZE_H_
3 |
4 | #include "DenseTrackStab.h"
5 |
6 | using namespace cv;
7 |
8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap)
9 | {
10 | trackInfo->length = track_length;
11 | trackInfo->gap = init_gap;
12 | }
13 |
14 | DescMat* InitDescMat(int height, int width, int nBins)
15 | {
16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat));
17 | descMat->height = height;
18 | descMat->width = width;
19 | descMat->nBins = nBins;
20 |
21 | long size = height*width*nBins;
22 | descMat->desc = (float*)malloc(size*sizeof(float));
23 | memset(descMat->desc, 0, size*sizeof(float));
24 | return descMat;
25 | }
26 |
27 | void ReleDescMat(DescMat* descMat)
28 | {
29 | free(descMat->desc);
30 | free(descMat);
31 | }
32 |
33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell)
34 | {
35 | descInfo->nBins = nBins;
36 | descInfo->isHof = isHof;
37 | descInfo->nxCells = nxy_cell;
38 | descInfo->nyCells = nxy_cell;
39 | descInfo->ntCells = nt_cell;
40 | descInfo->dim = nBins*nxy_cell*nxy_cell;
41 | descInfo->height = size;
42 | descInfo->width = size;
43 | }
44 |
45 | void InitSeqInfo(SeqInfo* seqInfo, char* video)
46 | {
47 | VideoCapture capture;
48 | capture.open(video);
49 |
50 | if(!capture.isOpened())
51 | fprintf(stderr, "Could not initialize capturing..\n");
52 |
53 | // get the number of frames in the video
54 | int frame_num = 0;
55 | while(true) {
56 | Mat frame;
57 | capture >> frame;
58 |
59 | if(frame.empty())
60 | break;
61 |
62 | if(frame_num == 0) {
63 | seqInfo->width = frame.cols;
64 | seqInfo->height = frame.rows;
65 | }
66 |
67 | frame_num++;
68 | }
69 | seqInfo->length = frame_num;
70 | }
71 |
72 | void usage()
73 | {
74 | fprintf(stderr, "Extract improved trajectories from a video\n\n");
75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n");
76 | fprintf(stderr, "Options:\n");
77 | fprintf(stderr, " -h Display this message and exit\n");
78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n");
79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n");
80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n");
81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n");
82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n");
83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n");
84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n");
85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n");
86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n");
87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n");
88 | }
89 |
90 | bool arg_parse(int argc, char** argv)
91 | {
92 | int c;
93 | bool flag = false;
94 | char* executable = basename(argv[0]);
95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1)
96 | switch(c) {
97 | case 'S':
98 | start_frame = atoi(optarg);
99 | flag = true;
100 | break;
101 | case 'E':
102 | end_frame = atoi(optarg);
103 | flag = true;
104 | break;
105 | case 'L':
106 | track_length = atoi(optarg);
107 | break;
108 | case 'W':
109 | min_distance = atoi(optarg);
110 | break;
111 | case 'N':
112 | patch_size = atoi(optarg);
113 | break;
114 | case 's':
115 | nxy_cell = atoi(optarg);
116 | break;
117 | case 't':
118 | nt_cell = atoi(optarg);
119 | break;
120 | case 'A':
121 | scale_num = atoi(optarg);
122 | break;
123 | case 'I':
124 | init_gap = atoi(optarg);
125 | break;
126 | case 'H':
127 | bb_file = optarg;
128 | break;
129 | case 'h':
130 | usage();
131 | exit(0);
132 | break;
133 |
134 | default:
135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable );
136 | abort();
137 | }
138 | return flag;
139 | }
140 |
141 | #endif /*INITIALIZE_H_*/
142 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/Makefile:
--------------------------------------------------------------------------------
1 | # set the binaries that have to be built
2 | TARGETS := DenseTrackStab Video
3 |
4 | # set the build configuration set
5 | BUILD := release
6 | #BUILD := debug
7 |
8 | # set bin and build dirs
9 | BUILDDIR := .build_$(BUILD)
10 | BINDIR := $(BUILD)
11 |
12 | # libraries
13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*)))
14 | LIBS := \
15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \
16 | avformat avdevice avutil avcodec swscale
17 |
18 | # set some flags and compiler/linker specific commands
19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include
20 | CXXFLAGS_debug := -ggdb
21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb
22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD))
23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD))
24 | LDFLAGS_debug := -ggdb
25 | LDFLAGS_release := -O3 -ggdb
26 |
27 | include make/generic.mk
28 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/README.md:
--------------------------------------------------------------------------------
1 | # NOTES ON USAGE
2 | For generating trajectories from video (Tuned for Robot Push dataset).
3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
4 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`. Train/test split hard-coded inside.
5 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file.
6 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified.
7 |
8 | **Warning: The code is provided in its original form without any cleanup.**
9 |
10 | # NOTES ON MODIFICATIONS
11 | Code originated from:
12 | http://lear.inrialpes.fr/people/wang/dense_trajectories
13 | ```
14 | @inproceedings{wang:2011:inria-00583818:1,
15 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu},
16 | TITLE = {{Action Recognition by Dense Trajectories}},
17 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition},
18 | YEAR = {2011},
19 | MONTH = Jun,
20 | PAGES = {3169-3176},
21 | ADDRESS = {Colorado Springs, United States},
22 | URL = {http://hal.inria.fr/inria-00583818/en}
23 | }
24 | ```
25 | - Modified to support more modern version of OpenCV
26 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction.
27 | - Converted stand-alone excutable to dynamic library for Python CFFI calling
28 |
29 |
30 | # Followings are the original README for Dense Trajectories
31 |
32 |
33 | ### Compiling ###
34 |
35 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system:
36 | * OpenCV library (tested with OpenCV-2.4.2)
37 | * ffmpeg library (tested with ffmpeg-0.11.1)
38 |
39 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories
40 |
41 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'.
42 |
43 | ### test video decoding ###
44 |
45 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization:
46 |
47 | ./release/Video your_video.avi
48 |
49 | If your video plays smoothly, congratulations! You are just one step before getting the features.
50 |
51 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web.
52 |
53 | ### compute features on a test video ###
54 |
55 | Once you are able to decode the video, computing our features is simple:
56 |
57 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz
58 |
59 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type:
60 |
61 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz
62 |
63 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same.
64 |
65 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness.
66 |
67 | There are more explanations about our features on the website, and also a list of FAQ.
68 |
69 | ### History ###
70 |
71 | * October 2013: improved_trajectory_release.tar.gz
72 | The code is an extension of dense_trajectory_release_v1.2.tar.gz
73 |
74 | ### Bugs and extensions ###
75 |
76 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well.
77 |
78 | http://lear.inrialpes.fr/people/wang/
79 |
80 | ### LICENSE CONDITIONS ###
81 |
82 | Copyright (C) 2011 Heng Wang
83 |
84 | This program is free software; you can redistribute it and/or
85 | modify it under the terms of the GNU General Public License
86 | as published by the Free Software Foundation; either version 2
87 | of the License, or (at your option) any later version.
88 |
89 | This program is distributed in the hope that it will be useful,
90 | but WITHOUT ANY WARRANTY; without even the implied warranty of
91 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
92 | GNU General Public License for more details.
93 |
94 | You should have received a copy of the GNU General Public License
95 | along with this program; if not, write to the Free Software
96 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
97 |
98 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/Video.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | IplImage* image = 0;
17 | IplImage* prev_image = 0;
18 | CvCapture* capture = 0;
19 |
20 | int show = 1;
21 |
22 | int main( int argc, char** argv )
23 | {
24 | int frameNum = 0;
25 |
26 | char* video = argv[1];
27 | capture = cvCreateFileCapture(video);
28 |
29 | if( !capture ) {
30 | printf( "Could not initialize capturing..\n" );
31 | return -1;
32 | }
33 |
34 | if( show == 1 )
35 | cvNamedWindow( "Video", 0 );
36 |
37 | while( true ) {
38 | IplImage* frame = 0;
39 | int i, j, c;
40 |
41 | // get a new frame
42 | frame = cvQueryFrame( capture );
43 | if( !frame )
44 | break;
45 |
46 | if( !image ) {
47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 );
48 | image->origin = frame->origin;
49 | }
50 |
51 | cvCopy( frame, image, 0 );
52 |
53 | if( show == 1 ) {
54 | cvShowImage( "Video", image);
55 | c = cvWaitKey(3);
56 | if((char)c == 27) break;
57 | }
58 |
59 | std::cerr << "The " << frameNum << "-th frame" << std::endl;
60 | frameNum++;
61 | }
62 |
63 | if( show == 1 )
64 | cvDestroyWindow("Video");
65 |
66 | return 0;
67 | }
68 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/batch_process_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from cffi import FFI
4 | import cv2
5 | import matplotlib.pyplot as plt
6 |
7 | from scipy.cluster.vq import kmeans,kmeans2,vq
8 |
9 | # For trajectory storage
10 | import h5py
11 | import uuid
12 |
13 | # For loading dataset MATLAB metadata
14 | import scipy.io as sio
15 |
16 | import random
17 |
18 | # for Multi-threading
19 | from multiprocessing.dummy import Pool as ThreadPool
20 | pool = ThreadPool(10)
21 |
22 |
23 |
24 | # =======================================================================
25 | def filter_trajs_displacement(trajs):
26 | #print(trajs.shape)
27 | num_trajs = len(trajs)
28 | disp_stor = np.empty((num_trajs,), np.float32)
29 | for ii in range(num_trajs):
30 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[ii,1:,:]-trajs[ii,0:-1,:])**2,1)))
31 | # Remove trajectories that have very low displacement
32 | good_trajs = np.flatnonzero(disp_stor>3)
33 |
34 | return good_trajs
35 |
36 |
37 | # =======================================================================
38 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids):
39 | num_trajs = len(trajs)
40 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32)
41 | disp_stor = np.empty((num_trajs,), np.float32)
42 |
43 | for ii in range(num_trajs):
44 | traj = trajs[ii,0:dec_frames,:] # n-by-2
45 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point
46 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1)))
47 | # Remove trajectories that have very low displacement
48 | good_trajs = np.flatnonzero(disp_stor>0.4)
49 | traj_vec_stor = traj_vec_stor[good_trajs,:]
50 |
51 | if traj_vec_stor.shape[0] < num_centroids: # too few points
52 | print("kmeans: TOO FEW USABLE KEYPOINTS")
53 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
54 |
55 | # k-means on vectors
56 | #num_centroids = 10
57 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
58 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100)
59 |
60 | # Find the nearest vectors to centroids
61 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
62 |
63 | rep = good_trajs[rep]
64 |
65 | return rep # return the index of K most representative trajectories
66 |
67 | # ==========================================================================
68 |
69 | # This time we don't do clustering
70 | # Setting parameters
71 | SAMPLES = 5000
72 | CLIP_LENGTH = 20
73 | ORIGINAL_WIDTH = 640
74 | ORIGINAL_HEIGHT = 512
75 |
76 | random.seed()
77 |
78 | # Load video...
79 | #for vid_idx in range(NUM_VIDEOS):
80 | def worker(idx):
81 | print("Processing %d/%d" % (idx, len(job_stor)))
82 | vid_id, start_frame = job_stor[idx]
83 |
84 | for fram_no in range(CLIP_LENGTH):
85 | img_id = fram_no + start_frame
86 | #print('push/push_train/{}/{}.jpg'.format(vid_id, img_id))
87 | img = cv2.imdecode(h5f['push/push_train/{}/{}.jpg'.format(vid_id, img_id)][()], -1)
88 | img = cv2.resize(img, (240,192))
89 | if fram_no == 0:
90 | height = img.shape[0]
91 | width = img.shape[1]
92 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8)
93 | vid_seq[fram_no,:,:,:] = img
94 |
95 | # Calculate trajectories
96 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data)
97 | traj_ret = ffi.new("Ret[]", 1)
98 | # note that a lot more parameters can be modified in DenseTrackStab.cpp.
99 | libtest.main_like(vid_seq_cptr, width, height, CLIP_LENGTH, traj_ret)
100 | #print(traj_ret[0].traj_length)
101 | #print(traj_ret[0].num_trajs)
102 | #print(traj_ret[0].out_trajs[0])
103 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32)
104 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2])
105 | #print(trajs.shape)
106 | libtest.free_mem()
107 |
108 | #filtered_trajs = filter_trajs_kmeans(trajs, DEC_FRAMES, TRAJS_PER_VIDEO)
109 | filtered_trajs = filter_trajs_displacement(trajs)
110 |
111 | if len(filtered_trajs) == 0:
112 | print('No Trajectory detected!!!')
113 | else:
114 | # Write result to HDF5
115 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
116 | h5_rp_bc_traj = h5_rp_bc.require_dataset('%06d_%04d_%04d_%04d_%s' % (vid_id, start_frame, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32')
117 | h5_rp_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:]
118 | h5_rp_bc_traj.attrs['VidId'] = vid_id
119 | h5_rp_bc_traj.attrs['StartFrame'] = start_frame
120 | h5_rp_bc_traj.attrs['TrajLen'] = CLIP_LENGTH
121 | h5_rp_bc_traj.attrs['TrajCount'] = filtered_trajs.size
122 | h5_rp_bc_traj.attrs['VidResH'] = height
123 | h5_rp_bc_traj.attrs['VidResW'] = width
124 | f.flush()
125 |
126 | if __name__ == "__main__":
127 | # ========================================================================
128 | H5_PATH = '/media/haozekun/512SSD_2/robot_push_h5/robot_push_jpgs.h5' # [EDIT ME!]
129 | DATASET_PATH = 'push/push_train/'
130 | h5f = h5py.File(H5_PATH, 'r', libver='latest')
131 | video_count = h5f['push/push_train'].attrs['video_count'] # [EDIT ME!] push_test
132 |
133 | # Generating sample list...
134 | #video_list = random.sample(xrange(video_count), SAMPLES)
135 | print('Generating sample list...')
136 | job_stor = []
137 | for vid_id in range(video_count):
138 | frame_count = h5f['push/push_train/{}'.format(vid_id)].attrs['frame_count'] # [EDIT ME!] push_test
139 | if frame_count < CLIP_LENGTH:
140 | continue
141 | start_frame = random.randint(0,frame_count-CLIP_LENGTH)
142 | job_stor.append((vid_id,start_frame))
143 | print('{} samples generated...'.format(len(job_stor)))
144 |
145 | # Load C extension......
146 | ffi = FFI()
147 | ffi.cdef('''
148 | typedef struct
149 | {
150 | int traj_length;
151 | int num_trajs;
152 | float* out_trajs;
153 | } Ret;
154 |
155 | void free_mem();
156 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret);
157 | ''')
158 | libtest = ffi.dlopen("./release/DenseTrackStab")
159 |
160 | # Load HDF5 database......
161 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!]
162 | h5_rp = f.require_group("RPTraj")
163 | h5_rp_bc = h5_rp.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len)
164 | f.swmr_mode = True
165 |
166 | pool.map(worker, range(len(job_stor))) # sample 5000 clips each time
167 | #for ii in range(len(job_stor)):
168 | # worker(ii)
169 |
170 | print('Done!!!!')
171 |
172 | """
173 | # Now we plot the trajectory out
174 | vid_h = height
175 | vid_w = width
176 | plt.figure()
177 | plt.ylim(vid_h, 0)
178 | plt.xlim(0, vid_w)
179 | for ii in range(trajs.shape[0]):
180 | plt.plot(trajs[ii,:,0], trajs[ii,:,1])
181 |
182 | plt.figure()
183 | plt.imshow(vid_seq[0,:,:,:])
184 | plt.ylim(vid_h, 0)
185 | plt.xlim(0, vid_w)
186 | for topk in range(12): # plot top-12 trajectories
187 | traj = trajs[filtered_trajs[topk],:,:]
188 | #plt.plot(traj[0:4,0], traj[0:4,1])
189 | plt.plot(traj[:,0], traj[:,1])
190 | plt.show()
191 | """
192 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/make/dep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (C) 2009 Alexander Kl"aser
4 | #
5 | # This piece is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License
7 | # as published by the Free Software Foundation; either version 2
8 | # of the License, or (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 | #
19 | # This software has been downloaded from:
20 | # http://lear.inrialpes.fr/people/klaeser/software
21 | #
22 |
23 | import sys
24 | import os
25 | import string
26 | import os.path
27 | import re
28 |
29 | HELP_USAGE = """
30 | Usage: dep.py ...
31 | """
32 |
33 | regSuffix = re.compile(r"\.[^.]*$")
34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$")
35 | regDep = re.compile(r"^.*\.d$")
36 | regDepSplit = re.compile(r"\s*\\*\s*")
37 |
38 | suffixes = ['.cpp', '.c', '.cc']
39 | includeDirs = []
40 |
41 |
42 | def parseDepFile(fileName):
43 | # read in the dependency file
44 | depFile = open(fileName, 'r')
45 | depStr = depFile.read()
46 |
47 | # discard everything up to the colon
48 | colonPos = depStr.find(":")
49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format"
50 | depStr = depStr[colonPos + 1:]
51 |
52 | # collect all included files
53 | return regDepSplit.split(depStr)
54 |
55 |
56 | def findSourceFile(headerFile):
57 | # get the basename without extension
58 | headerFile = regSuffix.sub('', headerFile)
59 | if not headerFile:
60 | return None
61 |
62 | # iterate over known suffixes
63 | for suffix in suffixes:
64 | srcFile = headerFile + suffix
65 |
66 | # check whether a source file corresponding to the header exists
67 | if os.path.exists(srcFile):
68 | return srcFile
69 |
70 | # we add to the file path directory by directory and check whether it
71 | # exists in one of the include directories
72 | i = headerFile.find('/') + 1
73 | if i != 1:
74 | i = 0
75 | while True:
76 | # check whether a source file exists in one of the given include dirs
77 | for dir in includeDirs:
78 | # check all suffixes for source files
79 | for suffix in suffixes:
80 | srcFile = os.path.join(dir, headerFile[i:] + suffix)
81 | #srcFile = os.path.abspath(srcFile)
82 | if os.path.exists(srcFile):
83 | return srcFile
84 |
85 | # find next position of '/'
86 | i = headerFile.find('/', i) + 1
87 | if i <= 0:
88 | break
89 |
90 | return None
91 |
92 |
93 | def main(argv):
94 | global includeDirs
95 |
96 | # check command line parameters
97 | if len(sys.argv) < 5:
98 | print HELP_USAGE
99 | return
100 |
101 | args = sys.argv
102 | args.pop(0)
103 | ruleTarget = args.pop(0)
104 | linkFile = args.pop(0)
105 | buildDir = args.pop(0)
106 | rootDepFile = args.pop(0)
107 | includeDirs = args
108 |
109 |
110 | # scan all dependency files for files we need to link to
111 | # do this recursively starting at the root dependency file
112 | linkFiles = set()
113 | incFiles = set()
114 | depFileStack = set([rootDepFile])
115 | depFilesDone = set()
116 | while depFileStack:
117 | # get the next dependency file to process from the stack
118 | depFile = depFileStack.pop()
119 | if depFile in depFilesDone:
120 | continue
121 | depFilesDone.add(depFile)
122 |
123 | # iterate over all source files in the dependency file
124 | for nextFile in parseDepFile(depFile):
125 | newDepFile = ""
126 |
127 | # if we have a source file, we need to link against it
128 | if regSrc.match(nextFile):
129 | linkFiles.add(nextFile)
130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile)
131 |
132 | # check whether a .cpp/.c/.cc file exist
133 | srcFile = findSourceFile(nextFile)
134 | if srcFile != None:
135 | linkFiles.add(srcFile)
136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile)
137 |
138 | # if the corresponding .d file exists as parameter, add it to the stack
139 | if newDepFile and os.path.exists(newDepFile):
140 | depFileStack.add(newDepFile)
141 |
142 | #
143 | # generate all necessary rules
144 | #
145 |
146 | # all includes of dependency files
147 | for i in linkFiles:
148 | i = regSuffix.sub(".d", i)
149 | print "-include " + buildDir + "/" + i
150 | print
151 |
152 | # dependencies for link file
153 | print linkFile + ": \\"
154 | for i in linkFiles:
155 | i = regSuffix.sub(".d", i)
156 | print "\t" + buildDir + "/" + i + " \\"
157 | print
158 |
159 | # print out all files we need to link against
160 | print ruleTarget + ": " + linkFile + " \\"
161 | for i in linkFiles:
162 | i = regSuffix.sub(".o", i)
163 | print "\t" + buildDir + "/" + i + " \\"
164 | print
165 |
166 |
167 | if __name__ == "__main__":
168 | main( sys.argv )
169 |
170 |
171 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/make/generic.mk:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (C) 2009 Alexander Kl"aser
3 | #
4 | # This piece is free software; you can redistribute it and/or
5 | # modify it under the terms of the GNU General Public License
6 | # as published by the Free Software Foundation; either version 2
7 | # of the License, or (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program; if not, write to the Free Software
16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 | #
18 | # This software has been downloaded from:
19 | # http://lear.inrialpes.fr/people/klaeser/software
20 | #
21 | #
22 | # Variables that need to be set in the Makefile that includes this file:
23 | # TARGETS all files that are exectuables without there .cpp extension
24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build")
25 | # BINDIR dir where executables are linked to (optional, by default "bin")
26 | # SRCDIRS list of directories in which source files are located
27 | # this variable needs to be set if you do not have your source and
28 | # include files located in the same directory!
29 | #
30 | # Variables used for compiling/linking:
31 | # CXXFLAGS flags for compiling
32 | # LDFLAGS flags used for linking
33 | # LDLIBS list of libraries to be linked
34 | # CXX compiler linker (should be g++ by default)
35 | #
36 |
37 | # set paths for the dependency tool and gcc
38 | DEP = make/dep.py
39 |
40 | # set some standard directories in case they have not been set
41 | BUILDDIR ?= .build
42 | BINDIR ?= bin
43 |
44 | # all include files
45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l))
46 |
47 |
48 | #
49 | # some general rules
50 | #
51 |
52 | .PHONY: all clean
53 | .PRECIOUS: $(BUILDDIR)/%.d
54 |
55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS)))
56 | @echo "=== done ==="
57 |
58 | $(INCLUDES): $(BUILDDIR)
59 |
60 | clean:
61 | @echo "=== cleaning up ==="
62 | @rm -rf $(BUILDDIR)
63 |
64 | $(BUILDDIR) $(BINDIR):
65 | @echo "=== creating directory: $@ ==="
66 | @mkdir -p $@
67 |
68 |
69 | #
70 | # rules for creating dependency files
71 | #
72 |
73 | # dependencies of .cpp files on other files
74 | $(BUILDDIR)/%.d: %.cpp
75 | @echo "=== creating dependency file: $@ ==="
76 | @test -e $(dir $@) || mkdir -p $(dir $@)
77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $<
78 |
79 | # dependencies for the linking
80 | %.so.l %.l: %.d
81 | @echo "=== creating dependency file: $@ ==="
82 | @test -e $(dir $@) || mkdir -p $(dir $@)
83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@
84 |
85 |
86 | #
87 | # rules for compiling and linking
88 | # (link dependencies are defined in .l files)
89 | #
90 |
91 | # compiling
92 | $(BUILDDIR)/%.o: %.cpp
93 | @echo "=== compiling: $@ ==="
94 | @test -e $(dir $@) || mkdir -p $(dir $@)
95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $<
96 |
97 | # linking for shared libraries
98 | $(BINDIR)/%.so:
99 | @echo "=== linking: $@ ==="
100 | @rm -f $@
101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
102 |
103 | # linking
104 | $(BINDIR)/%:
105 | @echo "=== linking: $@ ==="
106 | @rm -f $@
107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
108 |
109 | %: %.o
110 | %.h: ;
111 | %.hpp: ;
112 | %.c: ;
113 | %.cpp: ;
114 |
115 |
116 | #
117 | # include dependency files
118 | #
119 |
120 | ifneq ($(MAKECMDGOALS),clean)
121 | -include $(INCLUDES)
122 | endif
123 |
--------------------------------------------------------------------------------
/offline_traj/for_RobotPush/view_traj.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import cv2
4 | import matplotlib.pyplot as plt
5 |
6 | # For trajectory storage
7 | import h5py
8 |
9 | # Setting parameters
10 | TRAJ_H5_PATH = 'traj_stor_train.h5'
11 | JPG_H5_PATH = '/media/haozekun/512SSD_2/robot_push_h5/robot_push_jpgs.h5'
12 |
13 | f_traj = h5py.File(TRAJ_H5_PATH, 'r', libver='latest')
14 | db_traj = f_traj["/RPTraj/by_clip"]
15 |
16 | f_jpg = h5py.File(JPG_H5_PATH, 'r', libver='latest')
17 | #f_jpg = f_jpg['push/push_train/']
18 |
19 | fig = plt.figure()
20 |
21 | for clip_name in db_traj.keys():
22 | video_id = db_traj[clip_name].attrs['VidId']
23 | clip_start = db_traj[clip_name].attrs['StartFrame']
24 | clip_len = db_traj[clip_name].attrs['TrajLen']
25 | clip_num_trajs = db_traj[clip_name].attrs['TrajCount']
26 | clip_traj_data = db_traj[clip_name]
27 |
28 | for ff in range(clip_len):
29 | plt.clf()
30 | img_id = ff + clip_start
31 | img_data = cv2.imdecode(f_jpg['push/push_train/{}/{}.jpg'.format(video_id, img_id)][()], -1)
32 | img_data = cv2.resize(img_data, (240,192))
33 | img_data = img_data[:,:,(2,1,0)] # h w c
34 |
35 | plt.imshow(img_data)
36 | for kk in range(clip_num_trajs):
37 | traj = clip_traj_data[kk,:,:]
38 | plt.scatter(traj[ff,0], traj[ff,1])
39 | fig.canvas.draw()
40 | plt.pause(0.001)
41 | #plt.waitforbuttonpress()
42 | #plt.show()
43 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/DenseTrackStab.h:
--------------------------------------------------------------------------------
1 | #ifndef DENSETRACKSTAB_H_
2 | #define DENSETRACKSTAB_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | #include "opencv2/calib3d/calib3d.hpp"
20 | #include "opencv2/highgui/highgui.hpp"
21 | #include "opencv2/imgproc/imgproc.hpp"
22 | #include "opencv2/xfeatures2d.hpp"
23 | #include "opencv2/core/core.hpp"
24 | //#include "opencv2/nonfree/nonfree.hpp"
25 |
26 | using namespace cv;
27 |
28 | typedef struct
29 | {
30 | int traj_length;
31 | int num_trajs;
32 | float* out_trajs;
33 | } Ret;
34 |
35 | extern "C" void free_mem();
36 | extern "C" void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret* ret);
37 |
38 | int start_frame = 0;
39 | int end_frame = INT_MAX;
40 | int scale_num = 8;
41 | const float scale_stride = sqrt(2);
42 | char* bb_file = NULL;
43 |
44 | // parameters for descriptors
45 | int patch_size = 32;
46 | int nxy_cell = 2;
47 | int nt_cell = 3;
48 | float epsilon = 0.05;
49 | const float min_flow = 0.4;
50 |
51 | // parameters for tracking
52 | double quality = 0.001;
53 | int min_distance = 5;
54 | int init_gap = 1;
55 | int track_length = 15;
56 |
57 | // parameters for rejecting trajectory
58 | const float min_var = sqrt(3);
59 | const float max_var = 50;
60 | const float max_dis = 20;
61 |
62 | typedef struct {
63 | int x; // top left corner
64 | int y;
65 | int width;
66 | int height;
67 | }RectInfo;
68 |
69 | typedef struct {
70 | int width; // resolution of the video
71 | int height;
72 | int length; // number of frames
73 | }SeqInfo;
74 |
75 | typedef struct {
76 | int length; // length of the trajectory
77 | int gap; // initialization gap for feature re-sampling
78 | }TrackInfo;
79 |
80 | typedef struct {
81 | int nBins; // number of bins for vector quantization
82 | bool isHof;
83 | int nxCells; // number of cells in x direction
84 | int nyCells;
85 | int ntCells;
86 | int dim; // dimension of the descriptor
87 | int height; // size of the block for computing the descriptor
88 | int width;
89 | }DescInfo;
90 |
91 | // integral histogram for the descriptors
92 | typedef struct {
93 | int height;
94 | int width;
95 | int nBins;
96 | float* desc;
97 | }DescMat;
98 |
99 | class Track
100 | {
101 | public:
102 | std::vector point;
103 | std::vector disp;
104 | std::vector hog;
105 | std::vector hof;
106 | std::vector mbhX;
107 | std::vector mbhY;
108 | int index;
109 |
110 | Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo,
111 | const DescInfo& hofInfo, const DescInfo& mbhInfo)
112 | : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length),
113 | hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length)
114 | {
115 | index = 0;
116 | point[0] = point_;
117 | }
118 |
119 | void addPoint(const Point2f& point_)
120 | {
121 | index++;
122 | point[index] = point_;
123 | }
124 | };
125 |
126 | class BoundBox
127 | {
128 | public:
129 | Point2f TopLeft;
130 | Point2f BottomRight;
131 | float confidence;
132 |
133 | BoundBox(float a1, float a2, float a3, float a4, float a5)
134 | {
135 | TopLeft.x = a1;
136 | TopLeft.y = a2;
137 | BottomRight.x = a3;
138 | BottomRight.y = a4;
139 | confidence = a5;
140 | }
141 | };
142 |
143 | class Frame
144 | {
145 | public:
146 | int frameID;
147 | std::vector BBs;
148 |
149 | Frame(const int& frame_)
150 | {
151 | frameID = frame_;
152 | BBs.clear();
153 | }
154 | };
155 |
156 | #endif /*DENSETRACKSTAB_H_*/
157 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/Initialize.h:
--------------------------------------------------------------------------------
1 | #ifndef INITIALIZE_H_
2 | #define INITIALIZE_H_
3 |
4 | #include "DenseTrackStab.h"
5 |
6 | using namespace cv;
7 |
8 | void InitTrackInfo(TrackInfo* trackInfo, int track_length, int init_gap)
9 | {
10 | trackInfo->length = track_length;
11 | trackInfo->gap = init_gap;
12 | }
13 |
14 | DescMat* InitDescMat(int height, int width, int nBins)
15 | {
16 | DescMat* descMat = (DescMat*)malloc(sizeof(DescMat));
17 | descMat->height = height;
18 | descMat->width = width;
19 | descMat->nBins = nBins;
20 |
21 | long size = height*width*nBins;
22 | descMat->desc = (float*)malloc(size*sizeof(float));
23 | memset(descMat->desc, 0, size*sizeof(float));
24 | return descMat;
25 | }
26 |
27 | void ReleDescMat(DescMat* descMat)
28 | {
29 | free(descMat->desc);
30 | free(descMat);
31 | }
32 |
33 | void InitDescInfo(DescInfo* descInfo, int nBins, bool isHof, int size, int nxy_cell, int nt_cell)
34 | {
35 | descInfo->nBins = nBins;
36 | descInfo->isHof = isHof;
37 | descInfo->nxCells = nxy_cell;
38 | descInfo->nyCells = nxy_cell;
39 | descInfo->ntCells = nt_cell;
40 | descInfo->dim = nBins*nxy_cell*nxy_cell;
41 | descInfo->height = size;
42 | descInfo->width = size;
43 | }
44 |
45 | void InitSeqInfo(SeqInfo* seqInfo, char* video)
46 | {
47 | VideoCapture capture;
48 | capture.open(video);
49 |
50 | if(!capture.isOpened())
51 | fprintf(stderr, "Could not initialize capturing..\n");
52 |
53 | // get the number of frames in the video
54 | int frame_num = 0;
55 | while(true) {
56 | Mat frame;
57 | capture >> frame;
58 |
59 | if(frame.empty())
60 | break;
61 |
62 | if(frame_num == 0) {
63 | seqInfo->width = frame.cols;
64 | seqInfo->height = frame.rows;
65 | }
66 |
67 | frame_num++;
68 | }
69 | seqInfo->length = frame_num;
70 | }
71 |
72 | void usage()
73 | {
74 | fprintf(stderr, "Extract improved trajectories from a video\n\n");
75 | fprintf(stderr, "Usage: DenseTrackStab video_file [options]\n");
76 | fprintf(stderr, "Options:\n");
77 | fprintf(stderr, " -h Display this message and exit\n");
78 | fprintf(stderr, " -S [start frame] The start frame to compute feature (default: S=0 frame)\n");
79 | fprintf(stderr, " -E [end frame] The end frame for feature computing (default: E=last frame)\n");
80 | fprintf(stderr, " -L [trajectory length] The length of the trajectory (default: L=15 frames)\n");
81 | fprintf(stderr, " -W [sampling stride] The stride for dense sampling feature points (default: W=5 pixels)\n");
82 | fprintf(stderr, " -N [neighborhood size] The neighborhood size for computing the descriptor (default: N=32 pixels)\n");
83 | fprintf(stderr, " -s [spatial cells] The number of cells in the nxy axis (default: nxy=2 cells)\n");
84 | fprintf(stderr, " -t [temporal cells] The number of cells in the nt axis (default: nt=3 cells)\n");
85 | fprintf(stderr, " -A [scale number] The number of maximal spatial scales (default: 8 scales)\n");
86 | fprintf(stderr, " -I [initial gap] The gap for re-sampling feature points (default: 1 frame)\n");
87 | fprintf(stderr, " -H [human bounding box] The human bounding box file to remove outlier matches (default: None)\n");
88 | }
89 |
90 | bool arg_parse(int argc, char** argv)
91 | {
92 | int c;
93 | bool flag = false;
94 | char* executable = basename(argv[0]);
95 | while((c = getopt (argc, argv, "hS:E:L:W:N:s:t:A:I:H:")) != -1)
96 | switch(c) {
97 | case 'S':
98 | start_frame = atoi(optarg);
99 | flag = true;
100 | break;
101 | case 'E':
102 | end_frame = atoi(optarg);
103 | flag = true;
104 | break;
105 | case 'L':
106 | track_length = atoi(optarg);
107 | break;
108 | case 'W':
109 | min_distance = atoi(optarg);
110 | break;
111 | case 'N':
112 | patch_size = atoi(optarg);
113 | break;
114 | case 's':
115 | nxy_cell = atoi(optarg);
116 | break;
117 | case 't':
118 | nt_cell = atoi(optarg);
119 | break;
120 | case 'A':
121 | scale_num = atoi(optarg);
122 | break;
123 | case 'I':
124 | init_gap = atoi(optarg);
125 | break;
126 | case 'H':
127 | bb_file = optarg;
128 | break;
129 | case 'h':
130 | usage();
131 | exit(0);
132 | break;
133 |
134 | default:
135 | fprintf(stderr, "error parsing arguments at -%c\n Try '%s -h' for help.", c, executable );
136 | abort();
137 | }
138 | return flag;
139 | }
140 |
141 | #endif /*INITIALIZE_H_*/
142 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/Makefile:
--------------------------------------------------------------------------------
1 | # set the binaries that have to be built
2 | TARGETS := DenseTrackStab Video
3 |
4 | # set the build configuration set
5 | BUILD := release
6 | #BUILD := debug
7 |
8 | # set bin and build dirs
9 | BUILDDIR := .build_$(BUILD)
10 | BINDIR := $(BUILD)
11 |
12 | # libraries
13 | LDLIBS = $(addprefix -l, $(LIBS) $(LIBS_$(notdir $*)))
14 | LIBS := \
15 | opencv_core opencv_highgui opencv_video opencv_imgproc opencv_calib3d opencv_features2d opencv_xfeatures2d opencv_videoio \
16 | avformat avdevice avutil avcodec swscale
17 |
18 | # set some flags and compiler/linker specific commands
19 | CXXFLAGS = -pipe -D __STDC_CONSTANT_MACROS -D STD=std -Wall -fvisibility=hidden $(CXXFLAGS_$(BUILD)) -I. -I/opt/include
20 | CXXFLAGS_debug := -ggdb
21 | CXXFLAGS_release := -O3 -DNDEBUG -ggdb
22 | #LDFLAGS = -L/opt/lib -pipe -Wall -shared $(LDFLAGS_$(BUILD))
23 | LDFLAGS = -L/opt/lib -pipe -Wall -shared -fPIC -fvisibility=hidden $(LDFLAGS_$(BUILD))
24 | LDFLAGS_debug := -ggdb
25 | LDFLAGS_release := -O3 -ggdb
26 |
27 | include make/generic.mk
28 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/README.md:
--------------------------------------------------------------------------------
1 | # NOTES ON USAGE
2 | For generating trajectories from video (Tuned for UCF-101 dataset).
3 | As a part of code for "Controllable Video Generation with Sparse Trajectories", CVPR'18.
4 | - **testlist01.txt** & **trainlist01.txt**: Train & test split we used.
5 | - **batch_process_dataset.py**: Generate trajectories. To set up search for comments containing `[EDIT ME!]`.
6 | - **view_traj.py**: Visualize generated trajectories. Detailed instructions inside the file.
7 | - **\*.cpp** & **\*.h**: Code for *Dense Trajectories* algorithm. Slightly modified.
8 |
9 | **Warning: The code is provided in its original form without any cleanup.**
10 |
11 | # NOTES ON MODIFICATIONS
12 | Code originated from:
13 | http://lear.inrialpes.fr/people/wang/dense_trajectories
14 | ```
15 | @inproceedings{wang:2011:inria-00583818:1,
16 | AUTHOR = {Heng Wang and Alexander Kl{\"a}ser and Cordelia Schmid and Cheng-Lin Liu},
17 | TITLE = {{Action Recognition by Dense Trajectories}},
18 | BOOKTITLE = {IEEE Conference on Computer Vision \& Pattern Recognition},
19 | YEAR = {2011},
20 | MONTH = Jun,
21 | PAGES = {3169-3176},
22 | ADDRESS = {Colorado Springs, United States},
23 | URL = {http://hal.inria.fr/inria-00583818/en}
24 | }
25 | ```
26 | - Modified to support more modern version of OpenCV
27 | - Need OpenCV >= 3.0 with "Contrib" add-in for SURF and SIFT feature extraction.
28 | - Converted stand-alone excutable to dynamic library for Python CFFI calling
29 |
30 |
31 | # Followings are the original README for Dense Trajectories
32 |
33 |
34 | ### Compiling ###
35 |
36 | In order to compile the improved trajectories code, you need to have the following libraries installed in your system:
37 | * OpenCV library (tested with OpenCV-2.4.2)
38 | * ffmpeg library (tested with ffmpeg-0.11.1)
39 |
40 | Currently, the libraries are the latest versions. In case they will be out of date, you can also find them on our website: http://lear.inrialpes.fr/people/wang/improved_trajectories
41 |
42 | If these libraries are installed correctly, simply type 'make' to compile the code. The executable will be in the directory './release/'.
43 |
44 | ### test video decoding ###
45 |
46 | The most complicated part of compiling is to install opencv and ffmpeg. To make sure your video is decoded properly, we have a simple code (named 'Video.cpp') for visualization:
47 |
48 | ./release/Video your_video.avi
49 |
50 | If your video plays smoothly, congratulations! You are just one step before getting the features.
51 |
52 | If there is a bug and the video can't be decoded, you need first fix your bug. You can find plenty of instructions about how to install opencv and ffmpeg on the web.
53 |
54 | ### compute features on a test video ###
55 |
56 | Once you are able to decode the video, computing our features is simple:
57 |
58 | ./release/DenseTrackStab ./test_sequences/person01_boxing_d1_uncomp.avi | gzip > out.features.gz
59 |
60 | Now you want to compare your file out.features.gz with the file that we have computed to verify that everything is working correctly. To do so, type:
61 |
62 | vimdiff out.features.gz ./test_sequences/person01_boxing_d1.gz
63 |
64 | Note that due to different versions of codecs, your features may be slightly different with ours. But the major part should be the same.
65 |
66 | Due to the randomness of RANSAC, you may get different features for some videos. But for the example "person01_boxing_d1_uncomp.avi", I don't observe any randomness.
67 |
68 | There are more explanations about our features on the website, and also a list of FAQ.
69 |
70 | ### History ###
71 |
72 | * October 2013: improved_trajectory_release.tar.gz
73 | The code is an extension of dense_trajectory_release_v1.2.tar.gz
74 |
75 | ### Bugs and extensions ###
76 |
77 | If you find bugs, etc., feel free to drop me a line. Also if you developed some extension to the program, let me know and I can include it in the code. You can find my contact data on my webpage, as well.
78 |
79 | http://lear.inrialpes.fr/people/wang/
80 |
81 | ### LICENSE CONDITIONS ###
82 |
83 | Copyright (C) 2011 Heng Wang
84 |
85 | This program is free software; you can redistribute it and/or
86 | modify it under the terms of the GNU General Public License
87 | as published by the Free Software Foundation; either version 2
88 | of the License, or (at your option) any later version.
89 |
90 | This program is distributed in the hope that it will be useful,
91 | but WITHOUT ANY WARRANTY; without even the implied warranty of
92 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
93 | GNU General Public License for more details.
94 |
95 | You should have received a copy of the GNU General Public License
96 | along with this program; if not, write to the Free Software
97 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
98 |
99 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/Video.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | IplImage* image = 0;
17 | IplImage* prev_image = 0;
18 | CvCapture* capture = 0;
19 |
20 | int show = 1;
21 |
22 | int main( int argc, char** argv )
23 | {
24 | int frameNum = 0;
25 |
26 | char* video = argv[1];
27 | capture = cvCreateFileCapture(video);
28 |
29 | if( !capture ) {
30 | printf( "Could not initialize capturing..\n" );
31 | return -1;
32 | }
33 |
34 | if( show == 1 )
35 | cvNamedWindow( "Video", 0 );
36 |
37 | while( true ) {
38 | IplImage* frame = 0;
39 | int i, j, c;
40 |
41 | // get a new frame
42 | frame = cvQueryFrame( capture );
43 | if( !frame )
44 | break;
45 |
46 | if( !image ) {
47 | image = cvCreateImage( cvSize(frame->width,frame->height), 8, 3 );
48 | image->origin = frame->origin;
49 | }
50 |
51 | cvCopy( frame, image, 0 );
52 |
53 | if( show == 1 ) {
54 | cvShowImage( "Video", image);
55 | c = cvWaitKey(3);
56 | if((char)c == 27) break;
57 | }
58 |
59 | std::cerr << "The " << frameNum << "-th frame" << std::endl;
60 | frameNum++;
61 | }
62 |
63 | if( show == 1 )
64 | cvDestroyWindow("Video");
65 |
66 | return 0;
67 | }
68 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/batch_process_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from cffi import FFI
4 | import cv2
5 |
6 | from scipy.cluster.vq import kmeans,kmeans2,vq
7 |
8 | # For trajectory storage
9 | import h5py
10 | import uuid
11 |
12 | import re
13 |
14 | # OpenBLAS affects CPU affinity
15 | os.sched_setaffinity(0,range(os.cpu_count()))
16 | def setaff():
17 | os.sched_setaffinity(0,range(os.cpu_count()))
18 |
19 | # for Multi-threading
20 | from multiprocessing.dummy import Pool as ThreadPool
21 | pool = ThreadPool(5, setaff)
22 |
23 |
24 | # =======================================================================
25 | def filter_trajs_displacement(trajs):
26 | #print(trajs.shape)
27 | num_trajs = len(trajs)
28 | disp_stor = np.empty((num_trajs,), np.float32)
29 | for ii in range(num_trajs):
30 | disp_stor[ii] = np.sum(np.sqrt(np.sum((trajs[ii,1:,:]-trajs[ii,0:-1,:])**2,1)))
31 | # Remove trajectories that have very low displacement
32 | good_trajs = np.flatnonzero(disp_stor>5)
33 |
34 | return good_trajs
35 |
36 |
37 | # =======================================================================
38 | def filter_trajs_kmeans(trajs, dec_frames, num_centroids):
39 | num_trajs = len(trajs)
40 | traj_vec_stor = np.empty((num_trajs, (dec_frames-1)*2), np.float32)
41 | disp_stor = np.empty((num_trajs,), np.float32)
42 |
43 | for ii in range(num_trajs):
44 | traj = trajs[ii,0:dec_frames,:] # n-by-2
45 | traj_vec_stor[ii,:] = (traj[1:,:] - traj[0,:]).flatten() # substract start point
46 | disp_stor[ii] = np.sum(np.sqrt(np.sum((traj[1:,:]-traj[0:-1,:])**2,1)))
47 | # Remove trajectories that have very low displacement
48 | good_trajs = np.flatnonzero(disp_stor>0.4)
49 | traj_vec_stor = traj_vec_stor[good_trajs,:]
50 |
51 | if traj_vec_stor.shape[0] < num_centroids: # too few points
52 | print("kmeans: TOO FEW USABLE KEYPOINTS")
53 | return good_trajs[np.arange(0,traj_vec_stor.shape[0]-1)] # try to use all of them
54 |
55 | # k-means on vectors
56 | #num_centroids = 10
57 | #centroids,_ = kmeans(traj_vec_stor,k_or_guess=num_centroids, iter=100)
58 | centroids,_ = kmeans(traj_vec_stor,num_centroids, iter=100)
59 |
60 | # Find the nearest vectors to centroids
61 | rep = np.argmin(np.sum((traj_vec_stor[:,np.newaxis,:]-centroids[:,:])**2,2),0) # 10-dim
62 |
63 | rep = good_trajs[rep]
64 |
65 | return rep # return the index of K most representative trajectories
66 |
67 | # ==========================================================================
68 |
69 | CLIP_LENGTH = 11
70 |
71 | # Load video...
72 | #for vid_idx in range(NUM_VIDEOS):
73 | def worker(idx):
74 | print("Processing %d/%d" % (idx, len(job_stor)))
75 | video_path, length, offset = job_stor[idx]
76 |
77 | #start_frame = random.randint(0,length-CLIP_LENGTH+1-1)
78 | start_frame = offset
79 | for fram_no in range(CLIP_LENGTH):
80 | frame = cv2.imread(video_path+'/'+str(start_frame+fram_no)+'.jpg')
81 | img = cv2.resize(frame, (256,192), interpolation=cv2.INTER_AREA)
82 | if fram_no == 0:
83 | height = img.shape[0]
84 | width = img.shape[1]
85 | vid_seq = np.empty([CLIP_LENGTH,height,width,3], dtype=np.uint8)
86 | vid_seq[fram_no,:,:,:] = img[:,:,:]
87 |
88 | # Calculate trajectories
89 | vid_seq_cptr = ffi.cast("char *", vid_seq.ctypes.data)
90 | traj_ret = ffi.new("Ret[]", 1)
91 | # note that a lot more parameters are hard-coded in DenseTrackStab.cpp due to laziness.
92 | libtest.main_like(vid_seq_cptr, width, height, CLIP_LENGTH, traj_ret)
93 | #print(traj_ret[0].traj_length)
94 | #print(traj_ret[0].num_trajs)
95 | #print(traj_ret[0].out_trajs[0])
96 | trajs = np.frombuffer(ffi.buffer(traj_ret[0].out_trajs, traj_ret[0].traj_length*traj_ret[0].num_trajs*2*4), dtype=np.float32)
97 | trajs = np.resize(trajs,[traj_ret[0].num_trajs,traj_ret[0].traj_length,2])
98 | #print(trajs.shape)
99 | libtest.free_mem()
100 |
101 | #filtered_trajs = filter_trajs_kmeans(trajs, 15, 10)
102 | filtered_trajs = filter_trajs_displacement(trajs)
103 |
104 | if len(filtered_trajs) == 0:
105 | print('No Trajectory detected!!!')
106 | else:
107 | # Write result to HDF5
108 | # %06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
109 | h5_ucf_bc_traj = h5_ucf_bc.require_dataset('%06d_%04d_%04d_%s' % (start_frame+1, CLIP_LENGTH, filtered_trajs.size, uuid.uuid1()), shape=(filtered_trajs.size, CLIP_LENGTH, 2), dtype='float32')
110 | h5_ucf_bc_traj[:,:,:] = trajs[filtered_trajs[:],:,:]
111 | h5_ucf_bc_traj.attrs['VidPath'] = video_path
112 | h5_ucf_bc_traj.attrs['StartFrame'] = start_frame
113 | h5_ucf_bc_traj.attrs['TrajLen'] = CLIP_LENGTH
114 | h5_ucf_bc_traj.attrs['TrajCount'] = filtered_trajs.size
115 | h5_ucf_bc_traj.attrs['VidResH'] = height
116 | h5_ucf_bc_traj.attrs['VidResW'] = width
117 | f.flush()
118 |
119 | if __name__ == "__main__":
120 | # ========================================================================
121 | # Load UCF101 dataset
122 | DATASET_DIR = '/media/haozekun/512SSD_2/UCF101_seq/UCF-101' # [EDIT ME!]
123 |
124 | # Load split file:
125 | f = open('trainlist01.txt','r') # Sample: ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi # [EDIT ME!]
126 | job_stor = []
127 | for line in f:
128 | vid_name = line.split()[0]
129 | video_path = os.path.join(DATASET_DIR, vid_name)
130 | img_list = os.listdir(video_path)
131 | frame_count = 0
132 | for filename in img_list:
133 | frame_count = max(frame_count, int(filename.split('.')[0]))
134 | frame_count += 1
135 | for offset in range(0, frame_count - CLIP_LENGTH + 1, 8): # Stride = 8
136 | job_stor.append((video_path, frame_count, offset))
137 | f.close()
138 |
139 | print('Job count: {:d}'.format(len(job_stor))) # 13320, or 9537
140 |
141 | # Load C extension......
142 | ffi = FFI()
143 | ffi.cdef('''
144 | typedef struct
145 | {
146 | int traj_length;
147 | int num_trajs;
148 | float* out_trajs;
149 | } Ret;
150 | W
151 | void free_mem();
152 | void main_like(char* in_video, int in_width, int in_height, int in_frames, Ret * ret);
153 | ''')
154 | libtest = ffi.dlopen("./release/DenseTrackStab")
155 |
156 | # Load HDF5 database......
157 | f = h5py.File("traj_stor_train.h5", 'a', libver='latest') # Supports Single-Write-Multiple-Read # [EDIT ME!]
158 | h5_ucf = f.require_group("UCFTraj")
159 | #h5_kt_bv = h5_pa.require_group("by_video") # /KITTITraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
160 | h5_ucf_bc = h5_ucf.require_group("by_clip") # /KITTITraj/by_clip/%02d_%04d_%04d_uuid1(video, startframe, len)
161 | f.swmr_mode = True
162 |
163 | pool.map(worker, range(len(job_stor)))
164 |
165 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/make/dep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (C) 2009 Alexander Kl"aser
4 | #
5 | # This piece is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License
7 | # as published by the Free Software Foundation; either version 2
8 | # of the License, or (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 | #
19 | # This software has been downloaded from:
20 | # http://lear.inrialpes.fr/people/klaeser/software
21 | #
22 |
23 | import sys
24 | import os
25 | import string
26 | import os.path
27 | import re
28 |
29 | HELP_USAGE = """
30 | Usage: dep.py ...
31 | """
32 |
33 | regSuffix = re.compile(r"\.[^.]*$")
34 | regSrc = re.compile(r"^.*\.(c|cc|cpp)$")
35 | regDep = re.compile(r"^.*\.d$")
36 | regDepSplit = re.compile(r"\s*\\*\s*")
37 |
38 | suffixes = ['.cpp', '.c', '.cc']
39 | includeDirs = []
40 |
41 |
42 | def parseDepFile(fileName):
43 | # read in the dependency file
44 | depFile = open(fileName, 'r')
45 | depStr = depFile.read()
46 |
47 | # discard everything up to the colon
48 | colonPos = depStr.find(":")
49 | assert colonPos > 0, "the dependency file '" + fileName + "' does not have the correct format"
50 | depStr = depStr[colonPos + 1:]
51 |
52 | # collect all included files
53 | return regDepSplit.split(depStr)
54 |
55 |
56 | def findSourceFile(headerFile):
57 | # get the basename without extension
58 | headerFile = regSuffix.sub('', headerFile)
59 | if not headerFile:
60 | return None
61 |
62 | # iterate over known suffixes
63 | for suffix in suffixes:
64 | srcFile = headerFile + suffix
65 |
66 | # check whether a source file corresponding to the header exists
67 | if os.path.exists(srcFile):
68 | return srcFile
69 |
70 | # we add to the file path directory by directory and check whether it
71 | # exists in one of the include directories
72 | i = headerFile.find('/') + 1
73 | if i != 1:
74 | i = 0
75 | while True:
76 | # check whether a source file exists in one of the given include dirs
77 | for dir in includeDirs:
78 | # check all suffixes for source files
79 | for suffix in suffixes:
80 | srcFile = os.path.join(dir, headerFile[i:] + suffix)
81 | #srcFile = os.path.abspath(srcFile)
82 | if os.path.exists(srcFile):
83 | return srcFile
84 |
85 | # find next position of '/'
86 | i = headerFile.find('/', i) + 1
87 | if i <= 0:
88 | break
89 |
90 | return None
91 |
92 |
93 | def main(argv):
94 | global includeDirs
95 |
96 | # check command line parameters
97 | if len(sys.argv) < 5:
98 | print HELP_USAGE
99 | return
100 |
101 | args = sys.argv
102 | args.pop(0)
103 | ruleTarget = args.pop(0)
104 | linkFile = args.pop(0)
105 | buildDir = args.pop(0)
106 | rootDepFile = args.pop(0)
107 | includeDirs = args
108 |
109 |
110 | # scan all dependency files for files we need to link to
111 | # do this recursively starting at the root dependency file
112 | linkFiles = set()
113 | incFiles = set()
114 | depFileStack = set([rootDepFile])
115 | depFilesDone = set()
116 | while depFileStack:
117 | # get the next dependency file to process from the stack
118 | depFile = depFileStack.pop()
119 | if depFile in depFilesDone:
120 | continue
121 | depFilesDone.add(depFile)
122 |
123 | # iterate over all source files in the dependency file
124 | for nextFile in parseDepFile(depFile):
125 | newDepFile = ""
126 |
127 | # if we have a source file, we need to link against it
128 | if regSrc.match(nextFile):
129 | linkFiles.add(nextFile)
130 | newDepFile = buildDir + "/" + regSuffix.sub(".d", nextFile)
131 |
132 | # check whether a .cpp/.c/.cc file exist
133 | srcFile = findSourceFile(nextFile)
134 | if srcFile != None:
135 | linkFiles.add(srcFile)
136 | newDepFile = buildDir + "/" + regSuffix.sub(".d", srcFile)
137 |
138 | # if the corresponding .d file exists as parameter, add it to the stack
139 | if newDepFile and os.path.exists(newDepFile):
140 | depFileStack.add(newDepFile)
141 |
142 | #
143 | # generate all necessary rules
144 | #
145 |
146 | # all includes of dependency files
147 | for i in linkFiles:
148 | i = regSuffix.sub(".d", i)
149 | print "-include " + buildDir + "/" + i
150 | print
151 |
152 | # dependencies for link file
153 | print linkFile + ": \\"
154 | for i in linkFiles:
155 | i = regSuffix.sub(".d", i)
156 | print "\t" + buildDir + "/" + i + " \\"
157 | print
158 |
159 | # print out all files we need to link against
160 | print ruleTarget + ": " + linkFile + " \\"
161 | for i in linkFiles:
162 | i = regSuffix.sub(".o", i)
163 | print "\t" + buildDir + "/" + i + " \\"
164 | print
165 |
166 |
167 | if __name__ == "__main__":
168 | main( sys.argv )
169 |
170 |
171 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/make/generic.mk:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (C) 2009 Alexander Kl"aser
3 | #
4 | # This piece is free software; you can redistribute it and/or
5 | # modify it under the terms of the GNU General Public License
6 | # as published by the Free Software Foundation; either version 2
7 | # of the License, or (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program; if not, write to the Free Software
16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 | #
18 | # This software has been downloaded from:
19 | # http://lear.inrialpes.fr/people/klaeser/software
20 | #
21 | #
22 | # Variables that need to be set in the Makefile that includes this file:
23 | # TARGETS all files that are exectuables without there .cpp extension
24 | # BUILDDIR temporary dir where things are compiled to (optional, by default ".build")
25 | # BINDIR dir where executables are linked to (optional, by default "bin")
26 | # SRCDIRS list of directories in which source files are located
27 | # this variable needs to be set if you do not have your source and
28 | # include files located in the same directory!
29 | #
30 | # Variables used for compiling/linking:
31 | # CXXFLAGS flags for compiling
32 | # LDFLAGS flags used for linking
33 | # LDLIBS list of libraries to be linked
34 | # CXX compiler linker (should be g++ by default)
35 | #
36 |
37 | # set paths for the dependency tool and gcc
38 | DEP = make/dep.py
39 |
40 | # set some standard directories in case they have not been set
41 | BUILDDIR ?= .build
42 | BINDIR ?= bin
43 |
44 | # all include files
45 | INCLUDES := $(addprefix $(BUILDDIR)/,$(TARGETS:=.l))
46 |
47 |
48 | #
49 | # some general rules
50 | #
51 |
52 | .PHONY: all clean
53 | .PRECIOUS: $(BUILDDIR)/%.d
54 |
55 | all: $(BINDIR) $(addprefix $(BINDIR)/,$(notdir $(TARGETS)))
56 | @echo "=== done ==="
57 |
58 | $(INCLUDES): $(BUILDDIR)
59 |
60 | clean:
61 | @echo "=== cleaning up ==="
62 | @rm -rf $(BUILDDIR)
63 |
64 | $(BUILDDIR) $(BINDIR):
65 | @echo "=== creating directory: $@ ==="
66 | @mkdir -p $@
67 |
68 |
69 | #
70 | # rules for creating dependency files
71 | #
72 |
73 | # dependencies of .cpp files on other files
74 | $(BUILDDIR)/%.d: %.cpp
75 | @echo "=== creating dependency file: $@ ==="
76 | @test -e $(dir $@) || mkdir -p $(dir $@)
77 | g++ $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MT $(BUILDDIR)/$*.d -MF $@ $<
78 |
79 | # dependencies for the linking
80 | %.so.l %.l: %.d
81 | @echo "=== creating dependency file: $@ ==="
82 | @test -e $(dir $@) || mkdir -p $(dir $@)
83 | $(DEP) "$(BINDIR)/$(@F:.l=)" $*.l $(BUILDDIR) $< $(SRCDIRS) > $@
84 |
85 |
86 | #
87 | # rules for compiling and linking
88 | # (link dependencies are defined in .l files)
89 | #
90 |
91 | # compiling
92 | $(BUILDDIR)/%.o: %.cpp
93 | @echo "=== compiling: $@ ==="
94 | @test -e $(dir $@) || mkdir -p $(dir $@)
95 | $(CXX) -fPIC $(CXXFLAGS) -c -o $@ $<
96 |
97 | # linking for shared libraries
98 | $(BINDIR)/%.so:
99 | @echo "=== linking: $@ ==="
100 | @rm -f $@
101 | $(CXX) -shared $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
102 |
103 | # linking
104 | $(BINDIR)/%:
105 | @echo "=== linking: $@ ==="
106 | @rm -f $@
107 | $(CXX) $(LDFLAGS) -o $@ $(filter %.o, $^) $(LDLIBS)
108 |
109 | %: %.o
110 | %.h: ;
111 | %.hpp: ;
112 | %.c: ;
113 | %.cpp: ;
114 |
115 |
116 | #
117 | # include dependency files
118 | #
119 |
120 | ifneq ($(MAKECMDGOALS),clean)
121 | -include $(INCLUDES)
122 | endif
123 |
--------------------------------------------------------------------------------
/offline_traj/for_UCF101/view_traj.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import cv2
4 | import matplotlib.pyplot as plt
5 |
6 | # For trajectory storage
7 | import h5py
8 |
9 | # Setting parameters
10 | TRAJ_H5_PATH = './traj_stor_train.h5'
11 | DATASET_DIR = '/media/haozekun/512SSD_2/UCF101_seq/UCF-101'
12 |
13 | f = h5py.File(TRAJ_H5_PATH, 'r', libver='latest')
14 | # /PennActionTraj/by_video/%04d(videoNo)/%06d_%04d_%04d_uuid1(startFrame, trajLen, trajCount)
15 | db = f["/UCFTraj/by_clip"]
16 |
17 | fig = plt.figure()
18 |
19 | for clip_name in db.keys():
20 | video_path = db[clip_name].attrs['VidPath']
21 | print(video_path)
22 | clip_start = db[clip_name].attrs['StartFrame']
23 | clip_len = db[clip_name].attrs['TrajLen']
24 | clip_num_trajs = db[clip_name].attrs['TrajCount']
25 | clip_traj_data = db[clip_name]
26 | #cap = cv2.VideoCapture(video_path)
27 | #if not cap.isOpened():
28 | # print('Video open failed!!!')
29 | #cap.set(cv2.CAP_PROP_POS_FRAMES ,clip_start)
30 |
31 | for ff in range(clip_len):
32 | #for ff in [0]:
33 | plt.clf()
34 | #ret, frame = cap.read() # 320 by 240
35 | #if not ret:
36 | # print('Frame read error!')
37 | frame = cv2.imread(video_path+'/'+str(clip_start+ff)+'.jpg')
38 | img_data = cv2.resize(frame, (256,192))
39 |
40 | img_data = img_data[:,:,(2,1,0)] # h w c
41 | plt.imshow(img_data)
42 | for kk in range(clip_num_trajs):
43 | traj = clip_traj_data[kk,:,:]
44 | plt.scatter(traj[ff,0], traj[ff,1])
45 | print('Count: {}'.format(kk))
46 | fig.canvas.draw()
47 | plt.pause(0.001)
48 | #plt.waitforbuttonpress()
49 | #plt.show()
50 | #cap.release()
51 |
--------------------------------------------------------------------------------