├── .gitignore ├── 3rd party ├── YOLO_network.py └── sort_yolo.py ├── LICENSE ├── MOLO ├── MOLO_network_test.py └── MOLO_network_train.py ├── README.md ├── ROLO_demo_heat.py ├── ROLO_demo_test.py ├── ROLO_evaluation.py ├── experiments ├── testing │ ├── ROLO_network_test_all.py │ └── ROLO_network_test_single.py └── training │ ├── ROLO_step1_train_30_exp2.py │ ├── ROLO_step3_train_30_exp2.py │ ├── ROLO_step6_train_20_exp1.py │ ├── ROLO_step6_train_30_exp2.py │ ├── ROLO_step6_train_30_exp3.py │ └── ROLO_step9_train_30_exp2.py ├── heatmap ├── ROLO_heatmap_test.py └── ROLO_heatmap_train.py ├── update ├── src │ ├── rnn.py │ ├── rnn_cell.py │ ├── testing.py │ └── training.py ├── unit_test │ ├── test_all.py │ ├── test_utils_convert_coord.py │ ├── test_utils_dataset.py │ ├── test_utils_io_coord.py │ ├── test_utils_io_file.py │ ├── test_utils_io_folder.py │ ├── test_utils_io_folder.pyc │ ├── test_utils_io_list.py │ └── test_utils_natural_sort.py └── utils │ ├── utils_cal_iou.py │ ├── utils_convert_coord.py │ ├── utils_convert_heatmap.py │ ├── utils_dataset.py │ ├── utils_draw_coord.py │ ├── utils_draw_heatmap.py │ ├── utils_io_coord.py │ ├── utils_io_file.py │ ├── utils_io_folder.py │ ├── utils_io_heatmap.py │ ├── utils_io_list.py │ └── utils_natural_sort.py └── utils ├── MOLO_utils.py └── ROLO_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {2016} {GUANGHAN NING} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MOLO/MOLO_network_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: MOLO_network_test.py 17 | 18 | Description: 19 | MOLO is short for Multi-target ROLO, aimed at simultaneous detection and tracking of multiple targets 20 | Paper: http://arxiv.org/abs/1607.05781 21 | Author: Guanghan Ning 22 | Webpage: http://guanghan.info/ 23 | ''' 24 | 25 | # Imports 26 | import ROLO_utils as utils 27 | 28 | import tensorflow as tf 29 | from tensorflow.models.rnn import rnn, rnn_cell 30 | import cv2 31 | 32 | import numpy as np 33 | import os.path 34 | import time 35 | import random 36 | 37 | 38 | class ROLO_TF: 39 | disp_console = False 40 | restore_weights = False 41 | 42 | # YOLO parameters 43 | fromfile = None 44 | tofile_img = 'test/output.jpg' 45 | tofile_txt = 'test/output.txt' 46 | imshow = True 47 | filewrite_img = False 48 | filewrite_txt = False 49 | yolo_weights_file = 'weights/YOLO_small.ckpt' 50 | alpha = 0.1 51 | threshold = 0.2 52 | iou_threshold = 0.5 53 | num_class = 20 54 | num_box = 2 55 | grid_size = 7 56 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 57 | w_img, h_img = [352, 240] 58 | 59 | # ROLO Network Parameters 60 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt' 61 | lstm_depth = 3 62 | num_steps = 3 # number of frames as an input sequence 63 | num_feat = 4096 64 | num_predict = 6 # final output of LSTM 6 loc parameters 65 | num_gt = 4 66 | num_input = num_feat + num_predict # data input: 4096+6= 5002 67 | 68 | # ROLO Training Parameters 69 | #learning_rate = 0.00001 #training 70 | learning_rate = 0.00001 #testing 71 | 72 | training_iters = 210#100000 73 | batch_size = 1 #128 74 | display_step = 1 75 | 76 | # tf Graph input 77 | x = tf.placeholder("float32", [None, num_steps, num_input]) 78 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 79 | y = tf.placeholder("float32", [None, num_gt]) 80 | 81 | # Define weights 82 | weights = { 83 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 84 | } 85 | biases = { 86 | 'out': tf.Variable(tf.random_normal([num_predict])) 87 | } 88 | 89 | 90 | def __init__(self,argvs = []): 91 | print("ROLO init") 92 | self.ROLO(argvs) 93 | 94 | 95 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 96 | with tf.device('/gpu:0'): 97 | # input shape: (batch_size, n_steps, n_input) 98 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 99 | # Reshape to prepare input to hidden activation 100 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 101 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 102 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 103 | #print("_X: ", _X) 104 | 105 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 106 | state = _istate 107 | for step in range(self.num_steps): 108 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 109 | tf.get_variable_scope().reuse_variables() 110 | 111 | #print("output: ", outputs) 112 | #print("state: ", state) 113 | return outputs 114 | 115 | 116 | # Experiment with dropout 117 | def dropout_features(self, feature, prob): 118 | if prob == 0: return feature 119 | else: 120 | num_drop = int(prob * 4096) 121 | drop_index = random.sample(xrange(4096), num_drop) 122 | for i in range(len(drop_index)): 123 | index = drop_index[i] 124 | feature[index] = 0 125 | return feature 126 | 127 | 128 | '''---------------------------------------------------------------------------------------''' 129 | def build_networks(self): 130 | if self.disp_console : print "Building MOLO graph..." 131 | 132 | # Build rolo layers 133 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 134 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 135 | self.sess = tf.Session() 136 | self.sess.run(tf.initialize_all_variables()) 137 | self.saver = tf.train.Saver() 138 | #self.saver.restore(self.sess, self.rolo_weights_file) 139 | if self.disp_console : print "Loading complete!" + '\n' 140 | 141 | 142 | def merge_dets(self, dets_yolo, dets_rcnn): 143 | 144 | 145 | 146 | for person in range(len(dets)): 147 | box_num += 1 148 | #print('id, person = ', id, person) 149 | person_id = dets[person][0]-1 # person_id starts from 1, but index starts from 0, so minus 1 150 | 151 | # Merge the features with dets in batch_xs 152 | loc_last = dets_last[dets_last[:,0]==person_id, 1:5] 153 | loc_prst = dets[dets[:,0]==person_id, 1:5] 154 | loc_next = dets_next[dets_next[:,0]==person_id, 1:5] 155 | if len(loc_last) == 0 or len(loc_next)==0: 156 | continue 157 | loc_last = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_last[0][:]]) 158 | loc_prst = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_prst[0][:]]) 159 | loc_next = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_next[0][:]]) 160 | batch_xs_raw[0][4097:4101] = loc_last[0][:] 161 | batch_xs_raw[1][4097:4101] = loc_prst[0][:] 162 | batch_xs_raw[2][4097:4101] = loc_next[0][:] 163 | 164 | # Reshape data to get 3 seq of 5002 elements 165 | batch_xs = np.reshape(batch_xs_raw, [self.batch_size, self.num_steps, self.num_input]) 166 | return 167 | 168 | 169 | def test_7(self): 170 | print("Testing MOLO...") 171 | self.build_networks() 172 | 173 | ''' TUNE THIS''' 174 | offset = 37 175 | num_videos = 7 176 | epoches = 7 177 | 178 | # Use rolo_input for LSTM training 179 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 180 | self.pred_location = pred[0][:, 4097:4101] 181 | self.correct_prediction = tf.square(self.pred_location - self.y) 182 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 183 | self.learning_rate = 0.00001 184 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 185 | 186 | # Initializing the variables 187 | init = tf.initialize_all_variables() 188 | 189 | # Launch the graph 190 | with tf.Session() as sess: 191 | if (self.restore_weights == True): 192 | sess.run(init) 193 | self.saver.restore(sess, self.rolo_weights_file) 194 | print "Loading complete!" + '\n' 195 | else: 196 | sess.run(init) 197 | 198 | for epoch in range(1, epoches): 199 | i = epoch % num_videos + offset 200 | [self.w_img, self.h_img, sequence_name, self.training_iters, self.testing_iters]= utils.choose_video_sequence(i) 201 | 202 | x_path = os.path.join('benchmark/MOT/MOT2016/test/', sequence_name, 'yolo_out/') 203 | seq_dets = np.loadtxt('3rd_party/sort-master/output/%s.txt'%(sequence_name),delimiter=',') #load detections 204 | #y_path = os.path.join('benchmark/MOT/MOT2016/test/', sequence_name, 'gt/gt.txt') 205 | out_file = open('output/MOLO/%s.txt'%(sequence_name),'w') 206 | 207 | id = 1 208 | # Keep training until reach max iterations 209 | while id < self.testing_iters- self.num_steps: 210 | # Load locs and feat from yolo output 211 | batch_xs_raw = self.rolo_utils.load_yolo_output_test_MOLO(x_path, self.batch_size, self.num_steps, id-1) # 3 features: (id-1, id, id+1), start from 0. 212 | 213 | # Load dets from faster r-cnn 214 | dets_last = seq_dets[ (seq_dets[:,0]== id)&(seq_dets[:,6]==1) , 1:6] # dets starts from 1 215 | dets = seq_dets[ (seq_dets[:,0]== (id+1))&(seq_dets[:,6]==1) , 1:6] 216 | dets_next = seq_dets[ (seq_dets[:,0]== (id+2))&(seq_dets[:,6]==1) , 1:6] 217 | 218 | # Need to load batch_xs in a different way, get the feature as well as the yolo locations 219 | # Need a function to leverage the yolo detections and faster r-cnn detections 220 | # assign the updated detection to dets\dets_last\dets_next 221 | 222 | final_dets, person_ids = utils.merge_dets(batch_xs_raw, [dets_last, dets, dets_next]) #Take in the two source of locations 223 | 224 | for person in range(len(final_dets)): 225 | person_id = person_ids[person] 226 | 227 | # Reshape data to get 3 seq of 5002 elements 228 | batch_xs = np.reshape(final_dets[person], [self.batch_size, self.num_steps, self.num_input]) 229 | 230 | # Output prediction to txt file 231 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 232 | 233 | d = utils.locations_normal(self.w_img, self.h_img, pred_location[0]) # d = [x_mid, y_mid, w, h] in pixels 234 | out_file.write('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3])) 235 | id += 1 236 | out_file.close() 237 | return 238 | 239 | 240 | def ROLO(self, argvs): 241 | self.rolo_utils= utils.ROLO_utils() 242 | self.rolo_utils.loadCfg() 243 | self.params = self.rolo_utils.params 244 | 245 | arguments = self.rolo_utils.argv_parser(argvs) 246 | 247 | if self.rolo_utils.flag_train is True: 248 | self.training(utils.x_path, utils.y_path) 249 | elif self.rolo_utils.flag_track is True: 250 | self.build_networks() 251 | self.track_from_file(utils.file_in_path) 252 | elif self.rolo_utils.flag_detect is True: 253 | self.build_networks() 254 | self.detect_from_file(utils.file_in_path) 255 | else: 256 | self.test_7() 257 | 258 | '''----------------------------------------main-----------------------------------------------------''' 259 | def main(argvs): 260 | ROLO_TF(argvs) 261 | 262 | if __name__=='__main__': 263 | main(' ') 264 | 265 | -------------------------------------------------------------------------------- /MOLO/MOLO_network_train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: MOLO_network_train.py 17 | 18 | Description: 19 | MOLO is short for Multi-target ROLO, aimed at simultaneous detection and tracking of multiple targets 20 | Paper: http://arxiv.org/abs/1607.05781 21 | Author: Guanghan Ning 22 | Webpage: http://guanghan.info/ 23 | ''' 24 | 25 | # Imports 26 | import ROLO_utils as utils 27 | 28 | import tensorflow as tf 29 | from tensorflow.models.rnn import rnn, rnn_cell 30 | import cv2 31 | 32 | import numpy as np 33 | import os.path 34 | import time 35 | import random 36 | 37 | 38 | class ROLO_TF: 39 | disp_console = False 40 | restore_weights = True 41 | 42 | # YOLO parameters 43 | fromfile = None 44 | tofile_img = 'test/output.jpg' 45 | tofile_txt = 'test/output.txt' 46 | imshow = True 47 | filewrite_img = False 48 | filewrite_txt = False 49 | yolo_weights_file = 'weights/YOLO_small.ckpt' 50 | alpha = 0.1 51 | threshold = 0.2 52 | iou_threshold = 0.5 53 | num_class = 20 54 | num_box = 2 55 | grid_size = 7 56 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 57 | w_img, h_img = [352, 240] 58 | 59 | # ROLO Network Parameters 60 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt' 61 | lstm_depth = 3 62 | num_steps = 3 # number of frames as an input sequence 63 | num_feat = 4096 64 | num_predict = 6 # final output of LSTM 6 loc parameters 65 | num_gt = 4 66 | num_input = num_feat + num_predict # data input: 4096+6= 5002 67 | 68 | # ROLO Training Parameters 69 | #learning_rate = 0.00001 #training 70 | learning_rate = 0.00001 #testing 71 | 72 | training_iters = 210#100000 73 | batch_size = 1 #128 74 | display_step = 1 75 | 76 | # tf Graph input 77 | x = tf.placeholder("float32", [None, num_steps, num_input]) 78 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 79 | y = tf.placeholder("float32", [None, num_gt]) 80 | 81 | # Define weights 82 | weights = { 83 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 84 | } 85 | biases = { 86 | 'out': tf.Variable(tf.random_normal([num_predict])) 87 | } 88 | 89 | 90 | def __init__(self,argvs = []): 91 | print("ROLO init") 92 | self.ROLO(argvs) 93 | 94 | 95 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 96 | with tf.device('/gpu:0'): 97 | # input shape: (batch_size, n_steps, n_input) 98 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 99 | # Reshape to prepare input to hidden activation 100 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 101 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 102 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 103 | #print("_X: ", _X) 104 | 105 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 106 | state = _istate 107 | for step in range(self.num_steps): 108 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 109 | tf.get_variable_scope().reuse_variables() 110 | 111 | #print("output: ", outputs) 112 | #print("state: ", state) 113 | return outputs 114 | 115 | 116 | # Experiment with dropout 117 | def dropout_features(self, feature, prob): 118 | if prob == 0: return feature 119 | else: 120 | num_drop = int(prob * 4096) 121 | drop_index = random.sample(xrange(4096), num_drop) 122 | for i in range(len(drop_index)): 123 | index = drop_index[i] 124 | feature[index] = 0 125 | return feature 126 | 127 | 128 | # Experiment with input box noise (translate, scale) 129 | def det_add_noise(self, det): 130 | translate_rate = random.uniform(0.98, 1.02) 131 | scale_rate = random.uniform(0.8, 1.2) 132 | 133 | det[0] *= translate_rate 134 | det[1] *= translate_rate 135 | det[2] *= scale_rate 136 | det[3]*= scale_rate 137 | 138 | return det 139 | 140 | 141 | '''---------------------------------------------------------------------------------------''' 142 | def build_networks(self): 143 | if self.disp_console : print "Building MOLO graph..." 144 | 145 | # Build rolo layers 146 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 147 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 148 | self.sess = tf.Session() 149 | self.sess.run(tf.initialize_all_variables()) 150 | self.saver = tf.train.Saver() 151 | #self.saver.restore(self.sess, self.rolo_weights_file) 152 | if self.disp_console : print "Loading complete!" + '\n' 153 | 154 | 155 | def train_7(self): 156 | print("TRAINING MOLO...") 157 | log_file = open("output/trainging-7-log.txt", "a") #open in append mode 158 | self.build_networks() 159 | 160 | ''' TUNE THIS''' 161 | offset = 30 162 | num_videos = 7 163 | epoches = 7 * 300 164 | 165 | 166 | # Use rolo_input for LSTM training 167 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 168 | self.pred_location = pred[0][:, 4097:4101] 169 | self.correct_prediction = tf.square(self.pred_location - self.y) 170 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 171 | self.learning_rate = 0.00001 172 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 173 | 174 | # Initializing the variables 175 | init = tf.initialize_all_variables() 176 | 177 | # Launch the graph 178 | with tf.Session() as sess: 179 | if (self.restore_weights == True): 180 | sess.run(init) 181 | self.saver.restore(sess, self.rolo_weights_file) 182 | print "Loading complete!" + '\n' 183 | else: 184 | sess.run(init) 185 | 186 | avg_loss = 0 187 | stay_epoch = 2 188 | for epoch in range(2, epoches): 189 | #if(avg_loss > 0.1): 190 | # epoch= stay_epoch 191 | #print(epoch) 192 | i = epoch % num_videos + offset 193 | [self.w_img, self.h_img, sequence_name, self.training_iters, self.testing_iters]= utils.choose_video_sequence(i) 194 | 195 | x_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'yolo_out/') 196 | seq_dets = np.loadtxt('3rd_party/sort-master/output/%s.txt'%(sequence_name),delimiter=',') #load detections 197 | 198 | y_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'gt/gt.txt') 199 | 200 | #out_file = open('output/MOLO/%s.txt'%(sequence_name),'w') 201 | 202 | #self.output_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'molo_out_train/') 203 | #utils.createFolder(self.output_path) 204 | total_loss = 0 205 | id = 1 206 | box_num= 0 207 | 208 | # Keep training until reach max iterations 209 | while id < self.testing_iters- self.num_steps: # + 1 210 | # Load training data & ground truth 211 | batch_xs_raw = self.rolo_utils.load_yolo_feat_test_MOLO(x_path, self.batch_size, self.num_steps, id-1) # 3 features: (id-1, id, id+1), start from 0. 212 | dets_last = seq_dets[ (seq_dets[:,0]== (id))&(seq_dets[:,6]==1) , 1:6] # dets starts from 1 213 | dets = seq_dets[ (seq_dets[:,0]== (id+1))&(seq_dets[:,6]==1) , 1:6] 214 | dets_next = seq_dets[ (seq_dets[:,0]== (id+2))&(seq_dets[:,6]==1) , 1:6] 215 | 216 | for person in range(len(dets)): 217 | box_num += 1 218 | 219 | batch_ys = [dets[person][1:5]] 220 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 221 | 222 | # Merge the features with dets in batch_xs 223 | person_id = dets[person][0] #BUG!!!!: (-1 is wrong) -1 # person_id starts from 1, but index starts from 0, so minus 1 224 | loc_last = dets_last[dets_last[:,0]==person_id, 1:5] 225 | loc_prst = dets[dets[:,0]==person_id, 1:5] 226 | loc_next = dets_next[dets_next[:,0]==person_id, 1:5] 227 | #print('loca_last', loc_last[0][:]) 228 | #print('batch_xs', batch_xs_raw[0]) 229 | if len(loc_last) == 0 or len(loc_next)==0: 230 | continue 231 | loc_last = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_last[0][:]]) 232 | loc_prst = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_prst[0][:]]) 233 | loc_next = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_next[0][:]]) 234 | 235 | # Add noise to batch_xs 236 | #loc_last[0]= self.det_add_noise( loc_last[0]) 237 | #loc_prst[0]= self.det_add_noise( loc_prst[0]) 238 | #loc_next[0]= self.det_add_noise( loc_next[0]) 239 | 240 | #print('loca_last', loc_last[0][:]) 241 | batch_xs_raw[0][4097:4101] = loc_last[0][:] 242 | batch_xs_raw[1][4097:4101] = loc_prst[0][:] 243 | batch_xs_raw[2][4097:4101] = loc_next[0][:] 244 | 245 | # Reshape data to get 3 seq of 5002 elements 246 | batch_xs = np.reshape(batch_xs_raw, [self.batch_size, self.num_steps, self.num_input]) 247 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 248 | #print("Batch_ys: ", batch_ys) 249 | 250 | # Output prediction to txt file 251 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 252 | d = utils.locations_normal(self.w_img, self.h_img, pred_location[0]) # d = [x_mid, y_mid, w, h] in pixels 253 | #print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3]), out_file) 254 | #print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3])) 255 | 256 | if self.disp_console: print("ROLO Pred: ", pred_location) 257 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 258 | 259 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 260 | 261 | # Calculate batch loss 262 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 263 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 264 | total_loss += loss 265 | id += 1 266 | 267 | #out_file.close() 268 | 269 | #print "Optimization Finished!" 270 | avg_loss = total_loss/box_num 271 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 272 | 273 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 274 | if epoch % num_videos == 0: 275 | log_file.write('\n') 276 | save_path = self.saver.save(sess, self.rolo_weights_file) 277 | print("Model saved in file: %s" % save_path) 278 | 279 | log_file.close() 280 | return 281 | 282 | 283 | def ROLO(self, argvs): 284 | 285 | self.rolo_utils= utils.ROLO_utils() 286 | self.rolo_utils.loadCfg() 287 | self.params = self.rolo_utils.params 288 | 289 | arguments = self.rolo_utils.argv_parser(argvs) 290 | 291 | if self.rolo_utils.flag_train is True: 292 | self.training(utils.x_path, utils.y_path) 293 | elif self.rolo_utils.flag_track is True: 294 | self.build_networks() 295 | self.track_from_file(utils.file_in_path) 296 | elif self.rolo_utils.flag_detect is True: 297 | self.build_networks() 298 | self.detect_from_file(utils.file_in_path) 299 | else: 300 | self.train_7() 301 | 302 | '''----------------------------------------main-----------------------------------------------------''' 303 | def main(argvs): 304 | ROLO_TF(argvs) 305 | 306 | if __name__=='__main__': 307 | main(' ') 308 | 309 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ROLO 2 | ======= 3 | -------- 4 | 5 | Project Page: [http://guanghan.info/projects/ROLO/](http://guanghan.info/projects/ROLO/) 6 | 7 | ## Overview 8 | 9 | ROLO is short for Recurrent YOLO [[1]], aimed at simultaneous object detection and tracking. 10 | 11 | With the regression capability of LSTMs both spatially and temporally, ROLO is able to interpret a series of high-level visual features directly into coordinates of tracked objects. By concatenating high-level visual features with YOLO detection results, ROLO is spatially supervised into specific targets. 12 | 13 | The regression is two-folds: (1) The regression within one unit, i.e., 14 | between the visual features and the concatenated region representations. LSTM is capable of inferring region locations from the visual features when they are concatenated to be one unit. (2) The regression over the units of a sequence, i.e., between concatenated features over a sequence of frames. 15 | 16 | The supervision is helpful in two aspects: 17 | (1) When LSTM interpret the high-level visual features, the preliminary location inference helps 18 | to regress the features into the location of a certain visual elements/cues. The spatially supervised regression acts as an online appearance model. (2) Temporally, the LSTM learns over the sequence units to restrict the location prediction to a spatial range. 19 | 20 | ROLO is currently an offline approach, and is expected to gain a performance boost with proper online model updating. It is still a single object tracker, and data association techniques are not yet explored for the simultaneous tracking of multiple targets. 21 | 22 | ---- 23 | ## Prerequisites 24 | - Python 2.7 or Python 3.3+ 25 | - Tensorflow 26 | - Scipy 27 | 28 | ---- 29 | ## Getting Started 30 | 31 | ### 1. Download Data and Pre-trained Models 32 | 33 | As a generic object detector, YOLO can be trained to recognize arbitrary objects. Nevertheless, as the performance of ROLO depends on the YOLO part, we choose the default YOLO small model in order to provide a fair comparison. We believed it unfair to give credit to the tracking module if we train a customized YOLO model. The model is pre-trained on ImageNet dataset and finetuned on VOC dataset, capable of detecting objects of only 20 classes. We therefore picked 30 out of 100 videos from the benchmark [OTB100](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html), where the tracking targets belong to these classes. The subset is so-called OTB30. 34 | 35 | **DATA** 36 | 37 | - [DATA and Results for Demo](http://guanghan.info/projects/ROLO/DATA/DATA.zip) 38 | 39 | **Models** 40 | 41 | - [Model for demo](http://guanghan.info/projects/ROLO/demo/model_demo.ckpt) 42 | 43 | - [Model for experiment 1](http://guanghan.info/projects/ROLO/experiment_1/model_step6_exp1.ckpt) 44 | 45 | - Model for experiment 2: [step=1](http://guanghan.info/projects/ROLO/experiment_2/model_step1_exp2.ckpt), [step=3](http://guanghan.info/projects/ROLO/experiment_2/model_step3_exp2.ckpt), [step=6](http://guanghan.info/projects/ROLO/experiment_2/model_step6_exp2.ckpt), [step=9](http://guanghan.info/projects/ROLO/experiment_2/model_step9_exp2.ckpt) 46 | 47 | - [Model for experiment 3](http://guanghan.info/projects/ROLO/experiment_3/model_step3_exp3.ckpt) 48 | 49 | **Evaluation** 50 | 51 | - [Evaluation Results (including other trackers)](http://guanghan.info/projects/ROLO/output/evaluation.rar) 52 | 53 | ### 2. Run Demo 54 | 55 | Reproduce the results with the pre-trained model: 56 | 57 | python ./experiments/testing/ROLO_network_test_all.py 58 | 59 | Or download the results at [Results](http://). 60 | 61 | Run video Demo: 62 | 63 | ./python ROLO_demo_test.py 64 | 65 | 66 | ### 3. Training and Testing 67 | 68 | As deep learning applications get mature, it will be more efficient to have multi-functional networks consisted of orthogonal modules. Feature representation, in this case, had better be trained separately to provide shared features. Pre-training of visual features from ImageNet are skipped, as were discussed already in YOLO. We focus on training the LSTM module. 69 | 70 | 71 | **Experiment 1**: 72 | 73 | The limitation of offline tracking is that the offline models need to be trained with large amounts of data, which is hard to find in publicly available object tracking benchmarks. Even considering the whole 100 videos of OTB100 [[2]], the amount is still smaller than that of image recognition tasks by order of magnitudes. Therefore trackers are prone to over-fitting. 74 | 75 | In order to test the generalization ability of ROLO, we conduct experiment 1. 76 | Training on 22 videos and testing on the rest 8 videos of OTB30, the model is able to outperform all the traditional trackers from the benchmark [[2]]. 77 | 78 | 79 | We also test on 3 additional videos that are not selected for OTB30, as their ground truth is face but not human body. Since face is not included in the default YOLO model, YOLO will detect human body instead and ROLO will be supervised to track the human body. 80 | Demo videos are available here. 81 | [Video 1](https://www.youtube.com/watch?v=7dDsvVEt4ak), 82 | [Video 2](https://www.youtube.com/watch?v=w7Bxf4guddg), 83 | [Video 3](https://www.youtube.com/watch?v=qElDUVmYSpY). 84 | 85 | 86 | 87 | To reproduce experiment 1: 88 | 89 | - Training: 90 | 91 | ``` 92 | python ./experiments/training/ROLO_step6_train_20_exp1.py 93 | ``` 94 | 95 | - Testing: 96 | 97 | ``` 98 | python ./experiments/testing/ROLO_network_test_all.py 99 | ``` 100 | 101 | **Experiment 2**: 102 | 103 | If the model is inevitably trained with limited data, one way to remedy this is to train the model with similar dynamics. (Same strategy is used by trackers that employ online model updating). We train a 2nd LSTM model with the first 1/3 frames of OTB30 and test on the rest frames. Results show that performance has improved. We find that, once trained on auxiliary frames with the similar dynamics, ROLO will perform better on testing sequences. This attribute makes ROLO especially useful in surveillance environments, where models can be trained offline with pre-captured data. 104 | 105 | To reproduce experiment 2: 106 | 107 | - Training: 108 | 109 | ``` 110 | python ./experiments/training/ROLO_step6_train_30_exp2.py 111 | ``` 112 | - Testing: 113 | ``` 114 | python ./experiments/testing/ROLO_network_test_all.py 115 | ``` 116 | 117 | 118 | **Experiment 3**: 119 | 120 | Considering this attribute observed in experiment 2, we experiment incrementing training frames. 121 | Training with full frames but using only 1/3 ground truths will give an additional boost to the performance. 122 | 123 | To reproduce experiment 3: 124 | 125 | - Training: 126 | 127 | ``` 128 | python ./experiments/training/ROLO_step6_train_30_exp3.py 129 | ``` 130 | - Testing: 131 | ``` 132 | python ./experiments/testing/ROLO_network_test_all.py 133 | ``` 134 | 135 | **Limitations** 136 | 137 | Note that experiment 2&3 use 1/3 training frames. Upon evaluation, we should exclude these frames. Note also that using different frames from the same video sequences for training and testing can still be problematic. An online updating scheme for ROLO will be very useful in the future. 138 | 139 | We will update experiments using customized YOLO models, in order to be able to detect arbitrary objects and therefore test on the whole OTB100 dataset, where we will also be able to train and test on different datasets to perform cross-validation. 140 | 141 | **Parameter Sensitivity** 142 | 143 | Repeat experiment 2 with different step sizes: [1, 3, 6, 9] 144 | 145 | 146 | ``` 147 | python ./experiments/testing/ROLO_step1_train_30_exp2.py 148 | ``` 149 | 150 | ``` 151 | python ./experiments/testing/ROLO_step3_train_30_exp2.py 152 | ``` 153 | 154 | ``` 155 | python ./experiments/testing/ROLO_step6_train_30_exp2.py 156 | ``` 157 | 158 | ``` 159 | python ./experiments/testing/ROLO_step9_train_30_exp2.py 160 | ``` 161 | 162 | ![](http://guanghan.info/projects/ROLO/fps_over_steps.png) 163 | ![](http://guanghan.info/projects/ROLO/IOU_over_steps.png) 164 | 165 | ### 4. Visualization with Heatmap 166 | 167 | - Demo: 168 | ``` 169 | python ./ROLO_demo_heat.py 170 | ``` 171 | - Training: 172 | ``` 173 | python ./heatmap/ROLO_heatmap_train.py 174 | ``` 175 | - Testing: 176 | ``` 177 | python ./heatmap/ROLO_heatmap_test.py 178 | ``` 179 | 180 | ![](http://guanghan.info/projects/ROLO/heatmap_small1.png) 181 | ![](http://guanghan.info/projects/ROLO/heatmap_small2.png) 182 | - Blue: YOLO detection 183 | - Red: Ground Truth 184 | 185 | ### 5. Performance Evaluation 186 | 187 | python ./ROLO_evaluation.py 188 | 189 | 190 | ### 6. Results 191 | 192 | More Qualitative results can be found in the project page. Quantitative results please refer to the arxiv paper. 193 | 194 | ![](http://guanghan.info/projects/ROLO/occlusion.jpeg) 195 | ![](http://guanghan.info/projects/ROLO/occlusion2.jpeg) 196 | 197 | - Blue: YOLO detection 198 | - Green: ROLO Tracking 199 | - Red: Ground Truth 200 | 201 | 202 | --- 203 | ## License 204 | 205 | ROLO is released under the Apache License Version 2.0 (refer to the LICENSE file for details). 206 | 207 | --- 208 | ## Citation 209 | The details are published as a technical report on arXiv. If you use the code and models, please cite the following paper: 210 | [arXiv:1607.05781](http://arxiv.org/abs/1607.05781). 211 | 212 | @article{ning2016spatially, 213 | title={Spatially Supervised Recurrent Convolutional Neural Networks for Visual Object Tracking}, 214 | author={Ning, Guanghan and Zhang, Zhi and Huang, Chen and He, Zhihai and Ren, Xiaobo and Wang, Haohong}, 215 | journal={arXiv preprint arXiv:1607.05781}, 216 | year={2016} 217 | } 218 | 219 | 220 | --- 221 | ## Reference 222 | [[1]] Redmon, Joseph, et al. "You only look once: Unified, real-time object detection." CVPR (2016). 223 | 224 | [1]: http://arxiv.org/pdf/1506.02640.pdf "YOLO" 225 | 226 | [[2]] Wu, Yi, Jongwoo Lim, and Ming-Hsuan Yang. "Object tracking benchmark." IEEE Transactions on Pattern Analysis and Machine Intelligence 37.9 (2015): 1834-1848. 227 | 228 | [2]: http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=7001050&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D7001050 "OTB100" 229 | -------------------------------------------------------------------------------- /ROLO_demo_heat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_demo_heat.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | 27 | import cv2 28 | import os 29 | import numpy as np 30 | import sys 31 | import ROLO_utils as utils 32 | '''----------------------------------------main-----------------------------------------------------''' 33 | def main(argv): 34 | ''' PARAMETERS ''' 35 | num_steps= 6 36 | test = 91 37 | 38 | [wid, ht, sequence_name, dummy_1, dummy_2] = utils.choose_video_sequence(test) 39 | 40 | img_fold_path = os.path.join('benchmark/DATA', sequence_name, 'img/') 41 | gt_file_path= os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 42 | yolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 43 | rolo_heat_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_heat_test/') 44 | rolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/') 45 | 46 | paths_imgs = utils.load_folder( img_fold_path) 47 | paths_rolo= utils.load_folder( rolo_out_path) 48 | lines = utils.load_dataset_gt( gt_file_path) 49 | 50 | # Define the codec and create VideoWriter object 51 | fourcc= cv2.cv.CV_FOURCC(*'DIVX') 52 | video_name = sequence_name + '_test.avi' 53 | video_path = os.path.join('output/videos/', video_name) 54 | video = cv2.VideoWriter(video_path, fourcc, 20, (wid, ht)) 55 | 56 | total= 0 57 | rolo_avgloss= 0 58 | for i in range(len(paths_rolo)- num_steps): 59 | id= i + 1 60 | test_id= id + num_steps - 2 #* num_steps + 1 61 | 62 | path = paths_imgs[test_id] 63 | img = utils.file_to_img(path) 64 | 65 | if(img is None): break 66 | 67 | yolo_location= utils.find_yolo_location(yolo_out_path, test_id) 68 | yolo_location= utils.locations_normal( wid, ht, yolo_location) 69 | 70 | heatmap_vec= utils.find_rolo_location(rolo_heat_path, test_id) 71 | heatmap = utils.heatmap_vec_to_heatmap(None, heatmap_vec) 72 | utils.draw_heatmap(None, heatmap) 73 | 74 | rolo_location= utils.find_rolo_location( rolo_out_path, test_id) 75 | rolo_location = utils.locations_normal( wid, ht, rolo_location) 76 | print(rolo_location) 77 | 78 | gt_location = utils.find_gt_location( lines, test_id - 1) 79 | #gt_location= locations_from_0_to_1(None, 480, 640, gt_location) 80 | #gt_location = locations_normal(None, 480, 640, gt_location) 81 | print('gt: ' + str(test_id)) 82 | print(gt_location) 83 | 84 | frame = utils.debug_2_locations( img, gt_location, yolo_location) 85 | video.write(frame) 86 | 87 | utils.createFolder(os.path.join('output/frames/',sequence_name)) 88 | frame_name= os.path.join('output/frames/',sequence_name,str(test_id)+'.jpg') 89 | print(frame_name) 90 | cv2.imwrite(frame_name, frame) 91 | #cv2.imshow('frame',frame) 92 | #cv2.waitKey(100) 93 | 94 | rolo_loss = utils.cal_rolo_IOU(rolo_location, gt_location) 95 | rolo_avgloss += rolo_loss 96 | total += 1 97 | 98 | rolo_avgloss /= total 99 | print("rolo_avg_iou = ", rolo_avgloss) 100 | video.release() 101 | cv2.destroyAllWindows() 102 | 103 | 104 | 105 | if __name__=='__main__': 106 | main(sys.argv) 107 | -------------------------------------------------------------------------------- /ROLO_demo_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_demo_test.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | import cv2 27 | import os 28 | import numpy as np 29 | import sys 30 | import ROLO_utils as utils 31 | '''----------------------------------------main-----------------------------------------------------''' 32 | def main(argv): 33 | ''' PARAMETERS ''' 34 | num_steps= 6 35 | test = 11 36 | 37 | [wid, ht, sequence_name, dummy_1, dummy_2] = utils.choose_video_sequence(test) 38 | 39 | img_fold_path = os.path.join('benchmark/DATA', sequence_name, 'img/') 40 | gt_file_path= os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 41 | yolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 42 | rolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/') 43 | 44 | paths_imgs = utils.load_folder( img_fold_path) 45 | paths_rolo= utils.load_folder( rolo_out_path) 46 | lines = utils.load_dataset_gt( gt_file_path) 47 | 48 | # Define the codec and create VideoWriter object 49 | fourcc= cv2.cv.CV_FOURCC(*'DIVX') 50 | video_name = sequence_name + '_test.avi' 51 | video_path = os.path.join('output/videos/', video_name) 52 | video = cv2.VideoWriter(video_path, fourcc, 20, (wid, ht)) 53 | 54 | total= 0 55 | rolo_avgloss= 0 56 | yolo_avgloss= 0 57 | for i in range(len(paths_rolo)- num_steps): 58 | id= i + 1 59 | test_id= id + num_steps - 2 #* num_steps + 1 60 | 61 | path = paths_imgs[test_id] 62 | img = utils.file_to_img(path) 63 | 64 | if(img is None): break 65 | 66 | yolo_location= utils.find_yolo_location(yolo_out_path, test_id) 67 | yolo_location= utils.locations_normal( wid, ht, yolo_location) 68 | print(yolo_location) 69 | 70 | rolo_location= utils.find_rolo_location( rolo_out_path, test_id) 71 | rolo_location = utils.locations_normal( wid, ht, rolo_location) 72 | print(rolo_location) 73 | 74 | gt_location = utils.find_gt_location( lines, test_id - 1) 75 | #gt_location= locations_from_0_to_1(None, 480, 640, gt_location) 76 | #gt_location = locations_normal(None, 480, 640, gt_location) 77 | print('gt: ' + str(test_id)) 78 | print(gt_location) 79 | 80 | frame = utils.debug_3_locations( img, gt_location, yolo_location, rolo_location) 81 | video.write(frame) 82 | 83 | utils.createFolder(os.path.join('output/frames/',sequence_name)) 84 | frame_name= os.path.join('output/frames/',sequence_name,str(test_id)+'.jpg') 85 | print(frame_name) 86 | cv2.imwrite(frame_name, frame) 87 | #cv2.imshow('frame',frame) 88 | #cv2.waitKey(100) 89 | 90 | rolo_loss = utils.cal_rolo_IOU(rolo_location, gt_location) 91 | rolo_avgloss += rolo_loss 92 | yolo_loss= utils.cal_yolo_IOU(yolo_location, gt_location) 93 | yolo_avgloss += yolo_loss 94 | total += 1 95 | 96 | rolo_avgloss /= total 97 | yolo_avgloss /= total 98 | print("yolo_avg_iou = ", yolo_avgloss) 99 | print("rolo_avg_iou = ", rolo_avgloss) 100 | video.release() 101 | cv2.destroyAllWindows() 102 | 103 | 104 | 105 | if __name__=='__main__': 106 | main(sys.argv) 107 | -------------------------------------------------------------------------------- /experiments/testing/ROLO_network_test_all.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_network_test_all.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = True 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | disp_console = True 51 | yolo_weights_file = 'weights/YOLO_small.ckpt' 52 | alpha = 0.1 53 | threshold = 0.2 54 | iou_threshold = 0.5 55 | num_class = 20 56 | num_box = 2 57 | grid_size = 7 58 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 59 | w_img, h_img = [352, 240] 60 | 61 | # ROLO Network Parameters 62 | rolo_weights_file = 'null' 63 | # rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_30.ckpt' 64 | lstm_depth = 3 65 | num_steps = 3 # number of frames as an input sequence 66 | num_feat = 4096 67 | num_predict = 6 # final output of LSTM 6 loc parameters 68 | num_gt = 4 69 | num_input = num_feat + num_predict # data input: 4096+6= 5002 70 | 71 | # ROLO Parameters 72 | batch_size = 1 73 | display_step = 1 74 | 75 | # tf Graph input 76 | x = tf.placeholder("float32", [None, num_steps, num_input]) 77 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 78 | y = tf.placeholder("float32", [None, num_gt]) 79 | 80 | # Define weights 81 | weights = { 82 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 83 | } 84 | biases = { 85 | 'out': tf.Variable(tf.random_normal([num_predict])) 86 | } 87 | 88 | 89 | def __init__(self,argvs = []): 90 | print("ROLO init") 91 | self.ROLO(argvs) 92 | 93 | 94 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 95 | with tf.device('/gpu:0'): 96 | # input shape: (batch_size, n_steps, n_input) 97 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 98 | # Reshape to prepare input to hidden activation 99 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 100 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 101 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 102 | 103 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 104 | state = _istate 105 | for step in range(self.num_steps): 106 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 107 | tf.get_variable_scope().reuse_variables() 108 | return outputs 109 | 110 | 111 | # Experiment with dropout 112 | def dropout_features(self, feature, prob): 113 | num_drop = int(prob * 4096) 114 | drop_index = random.sample(xrange(4096), num_drop) 115 | for i in range(len(drop_index)): 116 | index = drop_index[i] 117 | feature[index] = 0 118 | return feature 119 | '''---------------------------------------------------------------------------------------''' 120 | def build_networks(self): 121 | if self.disp_console : print "Building ROLO graph..." 122 | # Build rolo layers 123 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 124 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 125 | self.sess = tf.Session() 126 | self.sess.run(tf.initialize_all_variables()) 127 | self.saver = tf.train.Saver() 128 | #self.saver.restore(self.sess, self.rolo_weights_file) 129 | if self.disp_console : print "Loading complete!" + '\n' 130 | 131 | 132 | def testing(self, x_path, y_path): 133 | total_loss = 0 134 | # Use rolo_input for LSTM training 135 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 136 | #print("pred: ", pred) 137 | self.pred_location = pred[0][:, 4097:4101] 138 | #print("pred_location: ", self.pred_location) 139 | #print("self.y: ", self.y) 140 | self.correct_prediction = tf.square(self.pred_location - self.y) 141 | #print("self.correct_prediction: ", self.correct_prediction) 142 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 143 | #print("self.accuracy: ", self.accuracy) 144 | #optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 145 | 146 | # Initializing the variables 147 | init = tf.initialize_all_variables() 148 | # Launch the graph 149 | with tf.Session() as sess: 150 | 151 | if (self.restore_weights == True): 152 | sess.run(init) 153 | self.saver.restore(sess, self.rolo_weights_file) 154 | print "Loading complete!" + '\n' 155 | else: 156 | sess.run(init) 157 | 158 | id = 0 #don't change this 159 | total_time = 0.0 160 | #id= 1 161 | 162 | # Keep training until reach max iterations 163 | while id < self.testing_iters - self.num_steps: 164 | # Load training data & ground truth 165 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 166 | 167 | # Apply dropout to batch_xs 168 | #for item in range(len(batch_xs)): 169 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0.4) 170 | 171 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 172 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 173 | 174 | # Reshape data to get 3 seq of 5002 elements 175 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 176 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 177 | #print("Batch_ys: ", batch_ys) 178 | 179 | start_time = time.time() 180 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 181 | cycle_time = time.time() - start_time 182 | total_time += cycle_time 183 | 184 | #print("ROLO Pred: ", pred_location) 185 | #print("len(pred) = ", len(pred_location)) 186 | #print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 187 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 188 | 189 | # Save pred_location to file 190 | utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size) 191 | 192 | #sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 193 | 194 | if id % self.display_step == 0: 195 | # Calculate batch loss 196 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 197 | #print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 198 | total_loss += loss 199 | id += 1 200 | #print(id) 201 | 202 | #print "Testing Finished!" 203 | avg_loss = total_loss/id 204 | print "Avg loss: " + str(avg_loss) 205 | print "Time Spent on Tracking: " + str(total_time) 206 | print "fps: " + str(id/total_time) 207 | #save_path = self.saver.save(sess, self.rolo_weights_file) 208 | #print("Model saved in file: %s" % save_path) 209 | 210 | return None 211 | 212 | 213 | def ROLO(self, argvs): 214 | 215 | self.rolo_utils= utils.ROLO_utils() 216 | self.rolo_utils.loadCfg() 217 | self.params = self.rolo_utils.params 218 | 219 | arguments = self.rolo_utils.argv_parser(argvs) 220 | 221 | if self.rolo_utils.flag_train is True: 222 | self.training(utils.x_path, utils.y_path) 223 | elif self.rolo_utils.flag_track is True: 224 | self.build_networks() 225 | self.track_from_file(utils.file_in_path) 226 | elif self.rolo_utils.flag_detect is True: 227 | self.build_networks() 228 | self.detect_from_file(utils.file_in_path) 229 | else: 230 | print "Default: running ROLO test." 231 | self.build_networks() 232 | 233 | evaluate_st = 0 234 | evaluate_ed = 29 235 | 236 | for test in range(evaluate_st, evaluate_ed + 1): 237 | 238 | [self.w_img, self.h_img, sequence_name, dummy_1, self.testing_iters] = utils.choose_video_sequence(test) 239 | 240 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 241 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 242 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/') 243 | utils.createFolder(self.output_path) 244 | 245 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_nodrop_30_2.ckpt' #no dropout 246 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_online.ckpt' 247 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt' 248 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT_0.2.ckpt' 249 | 250 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp0.ckpt' 251 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp1.ckpt' 252 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp2.ckpt' 253 | 254 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp2.ckpt' 255 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step9_exp2.ckpt' 256 | #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step1_exp2.ckpt' 257 | 258 | self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp1_old.ckpt' 259 | 260 | self.num_steps = 3 # number of frames as an input sequence 261 | print("TESTING ROLO on video sequence: ", sequence_name) 262 | self.testing(x_path, y_path) 263 | 264 | 265 | '''----------------------------------------main-----------------------------------------------------''' 266 | def main(argvs): 267 | ROLO_TF(argvs) 268 | 269 | 270 | if __name__=='__main__': 271 | main(' ') 272 | 273 | -------------------------------------------------------------------------------- /experiments/testing/ROLO_network_test_single.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_network_test_single.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = True 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | disp_console = True 51 | yolo_weights_file = 'weights/YOLO_small.ckpt' 52 | alpha = 0.1 53 | threshold = 0.2 54 | iou_threshold = 0.5 55 | num_class = 20 56 | num_box = 2 57 | grid_size = 7 58 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 59 | w_img, h_img = [352, 240] 60 | 61 | # ROLO Network Parameters 62 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_20.ckpt' 63 | # rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_30.ckpt' 64 | lstm_depth = 3 65 | num_steps = 3 # number of frames as an input sequence 66 | num_feat = 4096 67 | num_predict = 6 # final output of LSTM 6 loc parameters 68 | num_gt = 4 69 | num_input = num_feat + num_predict # data input: 4096+6= 5002 70 | 71 | # ROLO Parameters 72 | batch_size = 1 73 | display_step = 1 74 | 75 | # tf Graph input 76 | x = tf.placeholder("float32", [None, num_steps, num_input]) 77 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 78 | y = tf.placeholder("float32", [None, num_gt]) 79 | 80 | # Define weights 81 | weights = { 82 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 83 | } 84 | biases = { 85 | 'out': tf.Variable(tf.random_normal([num_predict])) 86 | } 87 | 88 | 89 | def __init__(self,argvs = []): 90 | print("ROLO init") 91 | self.ROLO(argvs) 92 | 93 | 94 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 95 | 96 | # input shape: (batch_size, n_steps, n_input) 97 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 98 | # Reshape to prepare input to hidden activation 99 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 100 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 101 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 102 | #print("_X: ", _X) 103 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 104 | state = _istate 105 | for step in range(self.num_steps): 106 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 107 | tf.get_variable_scope().reuse_variables() 108 | #print("output: ", outputs) 109 | #print("state: ", state) 110 | return outputs 111 | 112 | 113 | # Experiment with dropout 114 | def dropout_features(self, feature, prob): 115 | num_drop = int(prob * 4096) 116 | drop_index = random.sample(xrange(4096), num_drop) 117 | for i in range(len(drop_index)): 118 | index = drop_index[i] 119 | feature[index] = 0 120 | return feature 121 | '''---------------------------------------------------------------------------------------''' 122 | def build_networks(self): 123 | if self.disp_console : print "Building ROLO graph..." 124 | 125 | # Build rolo layers 126 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 127 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 128 | self.sess = tf.Session() 129 | self.sess.run(tf.initialize_all_variables()) 130 | self.saver = tf.train.Saver() 131 | #self.saver.restore(self.sess, self.rolo_weights_file) 132 | if self.disp_console : print "Loading complete!" + '\n' 133 | 134 | 135 | def testing(self, x_path, y_path): 136 | total_loss = 0 137 | 138 | print("TESTING ROLO...") 139 | # Use rolo_input for LSTM training 140 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 141 | print("pred: ", pred) 142 | self.pred_location = pred[0][:, 4097:4101] 143 | print("pred_location: ", self.pred_location) 144 | print("self.y: ", self.y) 145 | 146 | self.correct_prediction = tf.square(self.pred_location - self.y) 147 | print("self.correct_prediction: ", self.correct_prediction) 148 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 149 | print("self.accuracy: ", self.accuracy) 150 | #optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 151 | 152 | # Initializing the variables 153 | init = tf.initialize_all_variables() 154 | 155 | # Launch the graph 156 | with tf.Session() as sess: 157 | 158 | if (self.restore_weights == True): 159 | sess.run(init) 160 | self.saver.restore(sess, self.rolo_weights_file) 161 | print "Loading complete!" + '\n' 162 | else: 163 | sess.run(init) 164 | 165 | id = 0 #don't change this 166 | 167 | # Keep training until reach max iterations 168 | while id < self.testing_iters - self.num_steps: 169 | # Load training data & ground truth 170 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 171 | 172 | # Apply dropout to batch_xs 173 | #for item in range(len(batch_xs)): 174 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0.4) 175 | 176 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 177 | print("Batch_ys_initial: ", batch_ys) 178 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 179 | 180 | 181 | # Reshape data to get 3 seq of 5002 elements 182 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 183 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 184 | print("Batch_ys: ", batch_ys) 185 | 186 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 187 | print("ROLO Pred: ", pred_location) 188 | #print("len(pred) = ", len(pred_location)) 189 | print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 190 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 191 | 192 | # Save pred_location to file 193 | utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size) 194 | 195 | #sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 196 | 197 | if id % self.display_step == 0: 198 | # Calculate batch loss 199 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 200 | print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 201 | total_loss += loss 202 | id += 1 203 | print(id) 204 | 205 | print "Testing Finished!" 206 | avg_loss = total_loss/id 207 | print "Avg loss: " + str(avg_loss) 208 | #save_path = self.saver.save(sess, self.rolo_weights_file) 209 | #print("Model saved in file: %s" % save_path) 210 | 211 | return None 212 | 213 | def ROLO(self, argvs): 214 | 215 | self.rolo_utils= utils.ROLO_utils() 216 | self.rolo_utils.loadCfg() 217 | self.params = self.rolo_utils.params 218 | 219 | arguments = self.rolo_utils.argv_parser(argvs) 220 | 221 | if self.rolo_utils.flag_train is True: 222 | self.training(utils.x_path, utils.y_path) 223 | elif self.rolo_utils.flag_track is True: 224 | self.build_networks() 225 | self.track_from_file(utils.file_in_path) 226 | elif self.rolo_utils.flag_detect is True: 227 | self.build_networks() 228 | self.detect_from_file(utils.file_in_path) 229 | else: 230 | print "Default: running ROLO test." 231 | self.build_networks() 232 | 233 | test= 8 234 | [self.w_img, self.h_img, sequence_name, dummy_1, self.testing_iters] = utils.choose_video_sequence(test) 235 | 236 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 237 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 238 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/') 239 | utils.createFolder(self.output_path) 240 | 241 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_20.ckpt' 242 | # self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_30.ckpt' 243 | #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_30_2.ckpt' 244 | self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_30_2_nd_newfit.ckpt' 245 | self.testing(x_path, y_path) 246 | 247 | '''----------------------------------------main-----------------------------------------------------''' 248 | def main(argvs): 249 | ROLO_TF(argvs) 250 | 251 | if __name__=='__main__': 252 | main(' ') 253 | 254 | -------------------------------------------------------------------------------- /experiments/training/ROLO_step1_train_30_exp2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_step1_train_30_exp3.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = False 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | yolo_weights_file = 'weights/YOLO_small.ckpt' 51 | alpha = 0.1 52 | threshold = 0.2 53 | iou_threshold = 0.5 54 | num_class = 20 55 | num_box = 2 56 | grid_size = 7 57 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 58 | w_img, h_img = [352, 240] 59 | 60 | # ROLO Network Parameters 61 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step1_exp2.ckpt' 62 | lstm_depth = 3 63 | num_steps = 1 # number of frames as an input sequence 64 | num_feat = 4096 65 | num_predict = 6 # final output of LSTM 6 loc parameters 66 | num_gt = 4 67 | num_input = num_feat + num_predict # data input: 4096+6= 5002 68 | 69 | # ROLO Training Parameters 70 | #learning_rate = 0.00001 #training 71 | learning_rate = 0.00001 #testing 72 | 73 | training_iters = 210#100000 74 | batch_size = 1 #128 75 | display_step = 1 76 | 77 | # tf Graph input 78 | x = tf.placeholder("float32", [None, num_steps, num_input]) 79 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 80 | y = tf.placeholder("float32", [None, num_gt]) 81 | 82 | # Define weights 83 | weights = { 84 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 85 | } 86 | biases = { 87 | 'out': tf.Variable(tf.random_normal([num_predict])) 88 | } 89 | 90 | 91 | def __init__(self,argvs = []): 92 | print("ROLO init") 93 | self.ROLO(argvs) 94 | 95 | 96 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 97 | 98 | # input shape: (batch_size, n_steps, n_input) 99 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 100 | # Reshape to prepare input to hidden activation 101 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 102 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 103 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 104 | #print("_X: ", _X) 105 | 106 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 107 | state = _istate 108 | for step in range(self.num_steps): 109 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 110 | tf.get_variable_scope().reuse_variables() 111 | 112 | #print("output: ", outputs) 113 | #print("state: ", state) 114 | return outputs 115 | 116 | 117 | # Experiment with dropout 118 | def dropout_features(self, feature, prob): 119 | if prob == 0: return feature 120 | else: 121 | num_drop = int(prob * 4096) 122 | drop_index = random.sample(xrange(4096), num_drop) 123 | for i in range(len(drop_index)): 124 | index = drop_index[i] 125 | feature[index] = 0 126 | return feature 127 | 128 | # Experiment with input box noise (translate, scale) 129 | def det_add_noise(self, det): 130 | translate_rate = random.uniform(0.98, 1.02) 131 | scale_rate = random.uniform(0.8, 1.2) 132 | 133 | det[0] *= translate_rate 134 | det[1] *= translate_rate 135 | det[2] *= scale_rate 136 | det[3]*= scale_rate 137 | 138 | return det 139 | 140 | '''---------------------------------------------------------------------------------------''' 141 | def build_networks(self): 142 | if self.disp_console : print "Building ROLO graph..." 143 | 144 | # Build rolo layers 145 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 146 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 147 | self.sess = tf.Session() 148 | self.sess.run(tf.initialize_all_variables()) 149 | self.saver = tf.train.Saver() 150 | #self.saver.restore(self.sess, self.rolo_weights_file) 151 | if self.disp_console : print "Loading complete!" + '\n' 152 | 153 | 154 | def training(self, x_path, y_path): 155 | total_loss = 0 156 | 157 | if self.disp_console: print("TRAINING ROLO...") 158 | # Use rolo_input for LSTM training 159 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 160 | if self.disp_console: print("pred: ", pred) 161 | self.pred_location = pred[0][:, 4097:4101] 162 | if self.disp_console: print("pred_location: ", self.pred_location) 163 | if self.disp_console: print("self.y: ", self.y) 164 | 165 | self.correct_prediction = tf.square(self.pred_location - self.y) 166 | if self.disp_console: print("self.correct_prediction: ", self.correct_prediction) 167 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 168 | if self.disp_console: print("self.accuracy: ", self.accuracy) 169 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 170 | 171 | # Initializing the variables 172 | init = tf.initialize_all_variables() 173 | 174 | # Launch the graph 175 | with tf.Session() as sess: 176 | 177 | if (self.restore_weights == True): 178 | sess.run(init) 179 | self.saver.restore(sess, self.rolo_weights_file) 180 | print "Loading complete!" + '\n' 181 | else: 182 | sess.run(init) 183 | 184 | id = 0 185 | 186 | # Keep training until reach max iterations 187 | while id * self.batch_size < self.training_iters: 188 | # Load training data & ground truth 189 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 190 | print('len(batch_xs)= ', len(batch_xs)) 191 | # for item in range(len(batch_xs)): 192 | 193 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id) 194 | batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 195 | 196 | # Reshape data to get 3 seq of 5002 elements 197 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 198 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 199 | if self.disp_console: print("Batch_ys: ", batch_ys) 200 | 201 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 202 | if self.disp_console: print("ROLO Pred: ", pred_location) 203 | #print("len(pred) = ", len(pred_location)) 204 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 205 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 206 | 207 | # Save pred_location to file 208 | utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size) 209 | 210 | sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 211 | if id % self.display_step == 0: 212 | # Calculate batch loss 213 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 214 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 215 | total_loss += loss 216 | id += 1 217 | if self.disp_console: print(id) 218 | 219 | # show 3 kinds of locations, compare! 220 | 221 | print "Optimization Finished!" 222 | avg_loss = total_loss/id 223 | print "Avg loss: " + str(avg_loss) 224 | save_path = self.saver.save(sess, self.rolo_weights_file) 225 | print("Model saved in file: %s" % save_path) 226 | 227 | return avg_loss 228 | 229 | 230 | def train_30_2(self): 231 | print("TRAINING ROLO...") 232 | log_file = open("output/trainging-step1-exp2.txt", "a") #open in append mode 233 | self.build_networks() 234 | 235 | ''' TUNE THIS''' 236 | num_videos = 30 237 | epoches = 30 * 200 238 | 239 | # Use rolo_input for LSTM training 240 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 241 | self.pred_location = pred[0][:, 4097:4101] 242 | self.correct_prediction = tf.square(self.pred_location - self.y) 243 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 244 | self.learning_rate = 0.00001 245 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 246 | 247 | # Initializing the variables 248 | init = tf.initialize_all_variables() 249 | 250 | # Launch the graph 251 | with tf.Session() as sess: 252 | if (self.restore_weights == True): 253 | sess.run(init) 254 | self.saver.restore(sess, self.rolo_weights_file) 255 | print "Loading complete!" + '\n' 256 | else: 257 | sess.run(init) 258 | 259 | for epoch in range(epoches): 260 | i = epoch % num_videos 261 | [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i) 262 | 263 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 264 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 265 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/') 266 | utils.createFolder(self.output_path) 267 | total_loss = 0 268 | id = 1 269 | 270 | # Keep training until reach max iterations 271 | while id < self.training_iters- self.num_steps: 272 | # Load training data & ground truth 273 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 274 | 275 | # Apply dropout to batch_xs 276 | #for item in range(len(batch_xs)): 277 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0) 278 | 279 | #print(id) 280 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 281 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 282 | 283 | # Reshape data to get 3 seq of 5002 elements 284 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 285 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 286 | if self.disp_console: print("Batch_ys: ", batch_ys) 287 | 288 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 289 | if self.disp_console: print("ROLO Pred: ", pred_location) 290 | #print("len(pred) = ", len(pred_location)) 291 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 292 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 293 | 294 | # Save pred_location to file 295 | utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size) 296 | 297 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 298 | if id % self.display_step == 0: 299 | # Calculate batch loss 300 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 301 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 302 | total_loss += loss 303 | id += 1 304 | if self.disp_console: print(id) 305 | 306 | #print "Optimization Finished!" 307 | avg_loss = total_loss/id 308 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 309 | 310 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 311 | if i+1==num_videos: 312 | log_file.write('\n') 313 | save_path = self.saver.save(sess, self.rolo_weights_file) 314 | print("Model saved in file: %s" % save_path) 315 | 316 | log_file.close() 317 | return 318 | 319 | 320 | def ROLO(self, argvs): 321 | 322 | self.rolo_utils= utils.ROLO_utils() 323 | self.rolo_utils.loadCfg() 324 | self.params = self.rolo_utils.params 325 | 326 | arguments = self.rolo_utils.argv_parser(argvs) 327 | 328 | if self.rolo_utils.flag_train is True: 329 | self.training(utils.x_path, utils.y_path) 330 | elif self.rolo_utils.flag_track is True: 331 | self.build_networks() 332 | self.track_from_file(utils.file_in_path) 333 | elif self.rolo_utils.flag_detect is True: 334 | self.build_networks() 335 | self.detect_from_file(utils.file_in_path) 336 | else: 337 | self.train_30_2() 338 | 339 | '''----------------------------------------main-----------------------------------------------------''' 340 | def main(argvs): 341 | ROLO_TF(argvs) 342 | 343 | if __name__=='__main__': 344 | main(' ') 345 | 346 | -------------------------------------------------------------------------------- /experiments/training/ROLO_step3_train_30_exp2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_network.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, invented by Guanghan Ning for object detection, tracking and predicting 21 | (Paper: arXiv.org/) 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = False 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | yolo_weights_file = 'weights/YOLO_small.ckpt' 51 | alpha = 0.1 52 | threshold = 0.2 53 | iou_threshold = 0.5 54 | num_class = 20 55 | num_box = 2 56 | grid_size = 7 57 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 58 | w_img, h_img = [352, 240] 59 | 60 | # ROLO Network Parameters 61 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp2.ckpt' 62 | lstm_depth = 3 63 | num_steps = 3 # number of frames as an input sequence 64 | num_feat = 4096 65 | num_predict = 6 # final output of LSTM 6 loc parameters 66 | num_gt = 4 67 | num_input = num_feat + num_predict # data input: 4096+6= 5002 68 | 69 | # ROLO Training Parameters 70 | #learning_rate = 0.00001 #training 71 | learning_rate = 0.00001 #testing 72 | 73 | training_iters = 210#100000 74 | batch_size = 1 #128 75 | display_step = 1 76 | 77 | # tf Graph input 78 | x = tf.placeholder("float32", [None, num_steps, num_input]) 79 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 80 | y = tf.placeholder("float32", [None, num_gt]) 81 | 82 | # Define weights 83 | weights = { 84 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 85 | } 86 | biases = { 87 | 'out': tf.Variable(tf.random_normal([num_predict])) 88 | } 89 | 90 | 91 | def __init__(self,argvs = []): 92 | print("ROLO init") 93 | self.ROLO(argvs) 94 | 95 | 96 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 97 | 98 | # input shape: (batch_size, n_steps, n_input) 99 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 100 | # Reshape to prepare input to hidden activation 101 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 102 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 103 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 104 | #print("_X: ", _X) 105 | 106 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 107 | state = _istate 108 | for step in range(self.num_steps): 109 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 110 | tf.get_variable_scope().reuse_variables() 111 | 112 | #print("output: ", outputs) 113 | #print("state: ", state) 114 | return outputs 115 | 116 | 117 | # Experiment with dropout 118 | def dropout_features(self, feature, prob): 119 | if prob == 0: return feature 120 | else: 121 | num_drop = int(prob * 4096) 122 | drop_index = random.sample(xrange(4096), num_drop) 123 | for i in range(len(drop_index)): 124 | index = drop_index[i] 125 | feature[index] = 0 126 | return feature 127 | 128 | 129 | '''---------------------------------------------------------------------------------------''' 130 | def build_networks(self): 131 | if self.disp_console : print "Building ROLO graph..." 132 | 133 | # Build rolo layers 134 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 135 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 136 | self.sess = tf.Session() 137 | self.sess.run(tf.initialize_all_variables()) 138 | self.saver = tf.train.Saver() 139 | #self.saver.restore(self.sess, self.rolo_weights_file) 140 | if self.disp_console : print "Loading complete!" + '\n' 141 | 142 | 143 | def training(self, x_path, y_path): 144 | total_loss = 0 145 | 146 | if self.disp_console: print("TRAINING ROLO...") 147 | # Use rolo_input for LSTM training 148 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 149 | if self.disp_console: print("pred: ", pred) 150 | self.pred_location = pred[0][:, 4097:4101] 151 | if self.disp_console: print("pred_location: ", self.pred_location) 152 | if self.disp_console: print("self.y: ", self.y) 153 | 154 | self.correct_prediction = tf.square(self.pred_location - self.y) 155 | if self.disp_console: print("self.correct_prediction: ", self.correct_prediction) 156 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 157 | if self.disp_console: print("self.accuracy: ", self.accuracy) 158 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 159 | 160 | # Initializing the variables 161 | init = tf.initialize_all_variables() 162 | 163 | # Launch the graph 164 | with tf.Session() as sess: 165 | 166 | if (self.restore_weights == True): 167 | sess.run(init) 168 | self.saver.restore(sess, self.rolo_weights_file) 169 | print "Loading complete!" + '\n' 170 | else: 171 | sess.run(init) 172 | 173 | id = 0 174 | 175 | # Keep training until reach max iterations 176 | while id * self.batch_size < self.training_iters: 177 | # Load training data & ground truth 178 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 179 | print('len(batch_xs)= ', len(batch_xs)) 180 | # for item in range(len(batch_xs)): 181 | 182 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id) 183 | batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 184 | 185 | # Reshape data to get 3 seq of 5002 elements 186 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 187 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 188 | if self.disp_console: print("Batch_ys: ", batch_ys) 189 | 190 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 191 | if self.disp_console: print("ROLO Pred: ", pred_location) 192 | #print("len(pred) = ", len(pred_location)) 193 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 194 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 195 | 196 | # Save pred_location to file 197 | utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size) 198 | 199 | sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 200 | if id % self.display_step == 0: 201 | # Calculate batch loss 202 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 203 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 204 | total_loss += loss 205 | id += 1 206 | if self.disp_console: print(id) 207 | 208 | # show 3 kinds of locations, compare! 209 | 210 | print "Optimization Finished!" 211 | avg_loss = total_loss/id 212 | print "Avg loss: " + str(avg_loss) 213 | save_path = self.saver.save(sess, self.rolo_weights_file) 214 | print("Model saved in file: %s" % save_path) 215 | 216 | return avg_loss 217 | 218 | 219 | def train_30_2(self): 220 | print("TRAINING ROLO...") 221 | log_file = open("output/trainging-30-2-log.txt", "a") #open in append mode 222 | self.build_networks() 223 | 224 | ''' TUNE THIS''' 225 | num_videos = 30 226 | epoches = 30 * 300 227 | 228 | # Use rolo_input for LSTM training 229 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 230 | self.pred_location = pred[0][:, 4097:4101] 231 | self.correct_prediction = tf.square(self.pred_location - self.y) 232 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 233 | self.learning_rate = 0.00001 234 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 235 | 236 | # Initializing the variables 237 | init = tf.initialize_all_variables() 238 | 239 | # Launch the graph 240 | with tf.Session() as sess: 241 | if (self.restore_weights == True): 242 | sess.run(init) 243 | self.saver.restore(sess, self.rolo_weights_file) 244 | print "Loading complete!" + '\n' 245 | else: 246 | sess.run(init) 247 | 248 | for epoch in range(epoches): 249 | i = epoch % num_videos 250 | [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i) 251 | 252 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 253 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 254 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/') 255 | utils.createFolder(self.output_path) 256 | total_loss = 0 257 | id = 0 258 | 259 | # Keep training until reach max iterations 260 | while id < self.training_iters- self.num_steps: 261 | # Load training data & ground truth 262 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 263 | 264 | # Apply dropout to batch_xs 265 | #for item in range(len(batch_xs)): 266 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0) 267 | 268 | #print(id) 269 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 270 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 271 | 272 | # Reshape data to get 3 seq of 5002 elements 273 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 274 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 275 | if self.disp_console: print("Batch_ys: ", batch_ys) 276 | 277 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 278 | if self.disp_console: print("ROLO Pred: ", pred_location) 279 | #print("len(pred) = ", len(pred_location)) 280 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 281 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 282 | 283 | # Save pred_location to file 284 | utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size) 285 | 286 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 287 | if id % self.display_step == 0: 288 | # Calculate batch loss 289 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 290 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 291 | total_loss += loss 292 | id += 1 293 | if self.disp_console: print(id) 294 | 295 | #print "Optimization Finished!" 296 | avg_loss = total_loss/id 297 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 298 | 299 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 300 | if i+1==num_videos: 301 | log_file.write('\n') 302 | save_path = self.saver.save(sess, self.rolo_weights_file) 303 | print("Model saved in file: %s" % save_path) 304 | 305 | log_file.close() 306 | return 307 | 308 | 309 | def ROLO(self, argvs): 310 | 311 | self.rolo_utils= utils.ROLO_utils() 312 | self.rolo_utils.loadCfg() 313 | self.params = self.rolo_utils.params 314 | 315 | arguments = self.rolo_utils.argv_parser(argvs) 316 | 317 | if self.rolo_utils.flag_train is True: 318 | self.training(utils.x_path, utils.y_path) 319 | elif self.rolo_utils.flag_track is True: 320 | self.build_networks() 321 | self.track_from_file(utils.file_in_path) 322 | elif self.rolo_utils.flag_detect is True: 323 | self.build_networks() 324 | self.detect_from_file(utils.file_in_path) 325 | else: 326 | self.train_30_2() 327 | 328 | '''----------------------------------------main-----------------------------------------------------''' 329 | def main(argvs): 330 | ROLO_TF(argvs) 331 | 332 | if __name__=='__main__': 333 | main(' ') 334 | 335 | -------------------------------------------------------------------------------- /experiments/training/ROLO_step6_train_20_exp1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_step6_train_20_exp1.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = False 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | yolo_weights_file = 'weights/YOLO_small.ckpt' 51 | alpha = 0.1 52 | threshold = 0.2 53 | iou_threshold = 0.5 54 | num_class = 20 55 | num_box = 2 56 | grid_size = 7 57 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 58 | w_img, h_img = [352, 240] 59 | 60 | # ROLO Network Parameters 61 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp1.ckpt' 62 | lstm_depth = 3 63 | num_steps = 6 # number of frames as an input sequence 64 | num_feat = 4096 65 | num_predict = 6 # final output of LSTM 6 loc parameters 66 | num_gt = 4 67 | num_input = num_feat + num_predict # data input: 4096+6= 5002 68 | 69 | # ROLO Training Parameters 70 | #learning_rate = 0.00001 #training 71 | learning_rate = 0.00001 #testing 72 | 73 | training_iters = 210#100000 74 | batch_size = 1 #128 75 | display_step = 1 76 | 77 | # tf Graph input 78 | x = tf.placeholder("float32", [None, num_steps, num_input]) 79 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 80 | y = tf.placeholder("float32", [None, num_gt]) 81 | 82 | # Define weights 83 | weights = { 84 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 85 | } 86 | biases = { 87 | 'out': tf.Variable(tf.random_normal([num_predict])) 88 | } 89 | 90 | 91 | def __init__(self,argvs = []): 92 | print("ROLO init") 93 | self.ROLO(argvs) 94 | 95 | 96 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 97 | 98 | # input shape: (batch_size, n_steps, n_input) 99 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 100 | # Reshape to prepare input to hidden activation 101 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 102 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 103 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 104 | #print("_X: ", _X) 105 | 106 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 107 | state = _istate 108 | for step in range(self.num_steps): 109 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 110 | tf.get_variable_scope().reuse_variables() 111 | 112 | #print("output: ", outputs) 113 | #print("state: ", state) 114 | return outputs 115 | 116 | 117 | # Experiment with dropout 118 | def dropout_features(self, feature, prob): 119 | num_drop = int(prob * 4096) 120 | drop_index = random.sample(xrange(4096), num_drop) 121 | for i in range(len(drop_index)): 122 | index = drop_index[i] 123 | feature[index] = 0 124 | return feature 125 | 126 | 127 | '''---------------------------------------------------------------------------------------''' 128 | def build_networks(self): 129 | if self.disp_console : print "Building ROLO graph..." 130 | 131 | # Build rolo layers 132 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 133 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 134 | self.sess = tf.Session() 135 | self.sess.run(tf.initialize_all_variables()) 136 | self.saver = tf.train.Saver() 137 | #self.saver.restore(self.sess, self.rolo_weights_file) 138 | if self.disp_console : print "Loading complete!" + '\n' 139 | 140 | 141 | def training(self, x_path, y_path): 142 | total_loss = 0 143 | 144 | if self.disp_console: print("TRAINING ROLO...") 145 | # Use rolo_input for LSTM training 146 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 147 | if self.disp_console: print("pred: ", pred) 148 | self.pred_location = pred[0][:, 4097:4101] 149 | if self.disp_console: print("pred_location: ", self.pred_location) 150 | if self.disp_console: print("self.y: ", self.y) 151 | 152 | self.correct_prediction = tf.square(self.pred_location - self.y) 153 | if self.disp_console: print("self.correct_prediction: ", self.correct_prediction) 154 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 155 | if self.disp_console: print("self.accuracy: ", self.accuracy) 156 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 157 | 158 | # Initializing the variables 159 | init = tf.initialize_all_variables() 160 | 161 | # Launch the graph 162 | with tf.Session() as sess: 163 | 164 | if (self.restore_weights == True): 165 | sess.run(init) 166 | self.saver.restore(sess, self.rolo_weights_file) 167 | print "Loading complete!" + '\n' 168 | else: 169 | sess.run(init) 170 | 171 | id = 0 172 | 173 | # Keep training until reach max iterations 174 | while id * self.batch_size < self.training_iters: 175 | # Load training data & ground truth 176 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 177 | print('len(batch_xs)= ', len(batch_xs)) 178 | # for item in range(len(batch_xs)): 179 | 180 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id) 181 | batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 182 | 183 | # Reshape data to get 3 seq of 5002 elements 184 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 185 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 186 | if self.disp_console: print("Batch_ys: ", batch_ys) 187 | 188 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 189 | if self.disp_console: print("ROLO Pred: ", pred_location) 190 | #print("len(pred) = ", len(pred_location)) 191 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 192 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 193 | 194 | # Save pred_location to file 195 | utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size) 196 | 197 | sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 198 | if id % self.display_step == 0: 199 | # Calculate batch loss 200 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 201 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 202 | total_loss += loss 203 | id += 1 204 | if self.disp_console: print(id) 205 | 206 | # show 3 kinds of locations, compare! 207 | 208 | print "Optimization Finished!" 209 | avg_loss = total_loss/id 210 | print "Avg loss: " + str(avg_loss) 211 | save_path = self.saver.save(sess, self.rolo_weights_file) 212 | print("Model saved in file: %s" % save_path) 213 | 214 | return avg_loss 215 | 216 | 217 | def train_20(self): 218 | print("TRAINING ROLO...") 219 | log_file = open("output/trainging-20-log.txt", "a") #open in append mode 220 | self.build_networks() 221 | 222 | ''' TUNE THIS''' 223 | num_videos = 20 224 | epoches = 20 * 100 225 | 226 | # Use rolo_input for LSTM training 227 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 228 | self.pred_location = pred[0][:, 4097:4101] 229 | self.correct_prediction = tf.square(self.pred_location - self.y) 230 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 231 | self.learning_rate = 0.00001 232 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 233 | 234 | # Initializing the variables 235 | init = tf.initialize_all_variables() 236 | 237 | # Launch the graph 238 | with tf.Session() as sess: 239 | if (self.restore_weights == True): 240 | sess.run(init) 241 | self.saver.restore(sess, self.rolo_weights_file) 242 | print "Loading complete!" + '\n' 243 | else: 244 | sess.run(init) 245 | 246 | for epoch in range(epoches): 247 | i = epoch % num_videos 248 | [self.w_img, self.h_img, sequence_name, dummy, self.training_iters]= utils.choose_video_sequence(i) 249 | 250 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 251 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 252 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/') 253 | utils.createFolder(self.output_path) 254 | total_loss = 0 255 | id = 0 256 | 257 | # Keep training until reach max iterations 258 | while id < self.training_iters- self.num_steps: 259 | # Load training data & ground truth 260 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 261 | 262 | # Apply dropout to batch_xs 263 | #for item in range(len(batch_xs)): 264 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0.4) 265 | 266 | #print(id) 267 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 268 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 269 | 270 | # Reshape data to get 3 seq of 5002 elements 271 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 272 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 273 | if self.disp_console: print("Batch_ys: ", batch_ys) 274 | 275 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 276 | if self.disp_console: print("ROLO Pred: ", pred_location) 277 | #print("len(pred) = ", len(pred_location)) 278 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 279 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 280 | 281 | # Save pred_location to file 282 | utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size) 283 | 284 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 285 | if id % self.display_step == 0: 286 | # Calculate batch loss 287 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 288 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 289 | total_loss += loss 290 | id += 1 291 | if self.disp_console: print(id) 292 | 293 | #print "Optimization Finished!" 294 | avg_loss = total_loss/id 295 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 296 | 297 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 298 | if i+1==num_videos: 299 | log_file.write('\n') 300 | save_path = self.saver.save(sess, self.rolo_weights_file) 301 | print("Model saved in file: %s" % save_path) 302 | 303 | log_file.close() 304 | return 305 | 306 | 307 | def ROLO(self, argvs): 308 | 309 | self.rolo_utils= utils.ROLO_utils() 310 | self.rolo_utils.loadCfg() 311 | self.params = self.rolo_utils.params 312 | 313 | arguments = self.rolo_utils.argv_parser(argvs) 314 | 315 | if self.rolo_utils.flag_train is True: 316 | self.training(utils.x_path, utils.y_path) 317 | elif self.rolo_utils.flag_track is True: 318 | self.build_networks() 319 | self.track_from_file(utils.file_in_path) 320 | elif self.rolo_utils.flag_detect is True: 321 | self.build_networks() 322 | self.detect_from_file(utils.file_in_path) 323 | else: 324 | self.train_20() 325 | 326 | '''----------------------------------------main-----------------------------------------------------''' 327 | def main(argvs): 328 | ROLO_TF(argvs) 329 | 330 | if __name__=='__main__': 331 | main(' ') 332 | 333 | -------------------------------------------------------------------------------- /experiments/training/ROLO_step6_train_30_exp2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_step6_train_30_exp3.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = False 41 | restore_weights = True#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | yolo_weights_file = 'weights/YOLO_small.ckpt' 51 | alpha = 0.1 52 | threshold = 0.2 53 | iou_threshold = 0.5 54 | num_class = 20 55 | num_box = 2 56 | grid_size = 7 57 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 58 | w_img, h_img = [352, 240] 59 | 60 | # ROLO Network Parameters 61 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp2.ckpt' 62 | lstm_depth = 3 63 | num_steps = 6 # number of frames as an input sequence 64 | num_feat = 4096 65 | num_predict = 6 # final output of LSTM 6 loc parameters 66 | num_gt = 4 67 | num_input = num_feat + num_predict # data input: 4096+6= 5002 68 | 69 | # ROLO Training Parameters 70 | learning_rate = 0.00001 71 | 72 | training_iters = 210#100000 73 | batch_size = 1 #128 74 | display_step = 1 75 | 76 | # tf Graph input 77 | x = tf.placeholder("float32", [None, num_steps, num_input]) 78 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 79 | y = tf.placeholder("float32", [None, num_gt]) 80 | 81 | # Define weights 82 | weights = { 83 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 84 | } 85 | biases = { 86 | 'out': tf.Variable(tf.random_normal([num_predict])) 87 | } 88 | 89 | 90 | def __init__(self,argvs = []): 91 | print("ROLO init") 92 | self.ROLO(argvs) 93 | 94 | 95 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 96 | 97 | # input shape: (batch_size, n_steps, n_input) 98 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 99 | # Reshape to prepare input to hidden activation 100 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 101 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 102 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 103 | #print("_X: ", _X) 104 | 105 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 106 | state = _istate 107 | for step in range(self.num_steps): 108 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 109 | tf.get_variable_scope().reuse_variables() 110 | return outputs 111 | 112 | 113 | # Experiment with dropout 114 | def dropout_features(self, feature, prob): 115 | if prob == 0: return feature 116 | else: 117 | num_drop = int(prob * 4096) 118 | drop_index = random.sample(xrange(4096), num_drop) 119 | for i in range(len(drop_index)): 120 | index = drop_index[i] 121 | feature[index] = 0 122 | return feature 123 | 124 | # Experiment with input box noise (translate, scale) 125 | def det_add_noise(self, det): 126 | translate_rate = random.uniform(0.98, 1.02) 127 | scale_rate = random.uniform(0.8, 1.2) 128 | 129 | det[0] *= translate_rate 130 | det[1] *= translate_rate 131 | det[2] *= scale_rate 132 | det[3]*= scale_rate 133 | 134 | return det 135 | 136 | '''---------------------------------------------------------------------------------------''' 137 | def build_networks(self): 138 | if self.disp_console : print "Building ROLO graph..." 139 | 140 | # Build rolo layers 141 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 142 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 143 | self.sess = tf.Session() 144 | self.sess.run(tf.initialize_all_variables()) 145 | self.saver = tf.train.Saver() 146 | #self.saver.restore(self.sess, self.rolo_weights_file) 147 | if self.disp_console : print "Loading complete!" + '\n' 148 | 149 | 150 | def training(self, x_path, y_path): 151 | total_loss = 0 152 | 153 | if self.disp_console: print("TRAINING ROLO...") 154 | # Use rolo_input for LSTM training 155 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 156 | if self.disp_console: print("pred: ", pred) 157 | self.pred_location = pred[0][:, 4097:4101] 158 | if self.disp_console: print("pred_location: ", self.pred_location) 159 | if self.disp_console: print("self.y: ", self.y) 160 | 161 | self.correct_prediction = tf.square(self.pred_location - self.y) 162 | if self.disp_console: print("self.correct_prediction: ", self.correct_prediction) 163 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 164 | if self.disp_console: print("self.accuracy: ", self.accuracy) 165 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 166 | 167 | # Initializing the variables 168 | init = tf.initialize_all_variables() 169 | 170 | # Launch the graph 171 | with tf.Session() as sess: 172 | 173 | if (self.restore_weights == True): 174 | sess.run(init) 175 | self.saver.restore(sess, self.rolo_weights_file) 176 | print "Loading complete!" + '\n' 177 | else: 178 | sess.run(init) 179 | 180 | id = 0 181 | 182 | # Keep training until reach max iterations 183 | while id * self.batch_size < self.training_iters: 184 | # Load training data & ground truth 185 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 186 | print('len(batch_xs)= ', len(batch_xs)) 187 | # for item in range(len(batch_xs)): 188 | 189 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id) 190 | batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 191 | 192 | # Reshape data to get 3 seq of 5002 elements 193 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 194 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 195 | if self.disp_console: print("Batch_ys: ", batch_ys) 196 | 197 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 198 | if self.disp_console: print("ROLO Pred: ", pred_location) 199 | #print("len(pred) = ", len(pred_location)) 200 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 201 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 202 | 203 | # Save pred_location to file 204 | utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size) 205 | 206 | sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 207 | if id % self.display_step == 0: 208 | # Calculate batch loss 209 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 210 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 211 | total_loss += loss 212 | id += 1 213 | if self.disp_console: print(id) 214 | 215 | # show 3 kinds of locations, compare! 216 | 217 | print "Optimization Finished!" 218 | avg_loss = total_loss/id 219 | print "Avg loss: " + str(avg_loss) 220 | save_path = self.saver.save(sess, self.rolo_weights_file) 221 | print("Model saved in file: %s" % save_path) 222 | 223 | return avg_loss 224 | 225 | 226 | def train_30_2(self): 227 | print("TRAINING ROLO...") 228 | log_file = open("output/trainging-step6-30-2-log.txt", "a") #open in append mode 229 | self.build_networks() 230 | 231 | ''' TUNE THIS''' 232 | num_videos = 30 233 | epoches = 30 * 300 234 | 235 | # Use rolo_input for LSTM training 236 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 237 | self.pred_location = pred[0][:, 4097:4101] 238 | self.correct_prediction = tf.square(self.pred_location - self.y) 239 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 240 | self.learning_rate = 0.00001 241 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 242 | 243 | # Initializing the variables 244 | init = tf.initialize_all_variables() 245 | 246 | # Launch the graph 247 | with tf.Session() as sess: 248 | if (self.restore_weights == True): 249 | sess.run(init) 250 | self.saver.restore(sess, self.rolo_weights_file) 251 | print "Loading complete!" + '\n' 252 | else: 253 | sess.run(init) 254 | 255 | for epoch in range(epoches): 256 | i = epoch % num_videos 257 | [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i) 258 | 259 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 260 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 261 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/') 262 | utils.createFolder(self.output_path) 263 | total_loss = 0 264 | id = 0 265 | 266 | # Keep training until reach max iterations 267 | while id < self.training_iters- self.num_steps: 268 | # Load training data & ground truth 269 | batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1) 270 | 271 | # Apply dropout to batch_xs 272 | #for item in range(len(batch_xs)): 273 | # batch_xs[item] = self.dropout_features(batch_xs[item], 0) 274 | 275 | #print(id) 276 | batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id) 277 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 278 | 279 | # Reshape data to get 3 seq of 5002 elements 280 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 281 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 282 | if self.disp_console: print("Batch_ys: ", batch_ys) 283 | 284 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 285 | if self.disp_console: print("ROLO Pred: ", pred_location) 286 | #print("len(pred) = ", len(pred_location)) 287 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 288 | #print("correct_prediction int: ", (pred_location + 0.1).astype(int)) 289 | 290 | # Save pred_location to file 291 | utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size) 292 | 293 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 294 | if id % self.display_step == 0: 295 | # Calculate batch loss 296 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 297 | if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 298 | total_loss += loss 299 | id += 1 300 | if self.disp_console: print(id) 301 | 302 | #print "Optimization Finished!" 303 | avg_loss = total_loss/id 304 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 305 | 306 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 307 | if i+1==num_videos: 308 | log_file.write('\n') 309 | save_path = self.saver.save(sess, self.rolo_weights_file) 310 | print("Model saved in file: %s" % save_path) 311 | 312 | log_file.close() 313 | return 314 | 315 | 316 | def ROLO(self, argvs): 317 | 318 | self.rolo_utils= utils.ROLO_utils() 319 | self.rolo_utils.loadCfg() 320 | self.params = self.rolo_utils.params 321 | 322 | arguments = self.rolo_utils.argv_parser(argvs) 323 | 324 | if self.rolo_utils.flag_train is True: 325 | self.training(utils.x_path, utils.y_path) 326 | elif self.rolo_utils.flag_track is True: 327 | self.build_networks() 328 | self.track_from_file(utils.file_in_path) 329 | elif self.rolo_utils.flag_detect is True: 330 | self.build_networks() 331 | self.detect_from_file(utils.file_in_path) 332 | else: 333 | self.train_30_2() 334 | 335 | '''----------------------------------------main-----------------------------------------------------''' 336 | def main(argvs): 337 | ROLO_TF(argvs) 338 | 339 | if __name__=='__main__': 340 | main(' ') 341 | 342 | -------------------------------------------------------------------------------- /experiments/training/ROLO_step6_train_30_exp3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) <2016> . All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ''' 16 | Script File: ROLO_step6_train_30_exp3.py 17 | 18 | Description: 19 | 20 | ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking 21 | Paper: http://arxiv.org/abs/1607.05781 22 | Author: Guanghan Ning 23 | Webpage: http://guanghan.info/ 24 | ''' 25 | 26 | # Imports 27 | import ROLO_utils as utils 28 | 29 | import tensorflow as tf 30 | from tensorflow.models.rnn import rnn, rnn_cell 31 | import cv2 32 | 33 | import numpy as np 34 | import os.path 35 | import time 36 | import random 37 | 38 | 39 | class ROLO_TF: 40 | disp_console = False 41 | restore_weights = False#False 42 | 43 | # YOLO parameters 44 | fromfile = None 45 | tofile_img = 'test/output.jpg' 46 | tofile_txt = 'test/output.txt' 47 | imshow = True 48 | filewrite_img = False 49 | filewrite_txt = False 50 | yolo_weights_file = 'weights/YOLO_small.ckpt' 51 | alpha = 0.1 52 | threshold = 0.2 53 | iou_threshold = 0.5 54 | num_class = 20 55 | num_box = 2 56 | grid_size = 7 57 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] 58 | w_img, h_img = [352, 240] 59 | 60 | # ROLO Network Parameters 61 | rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp3.ckpt' 62 | lstm_depth = 3 63 | num_steps = 6 # number of frames as an input sequence 64 | num_feat = 4096 65 | num_predict = 6 # final output of LSTM 6 loc parameters 66 | num_gt = 4 67 | num_input = num_feat + num_predict # data input: 4096+6= 5002 68 | 69 | # ROLO Training Parameters 70 | learning_rate = 0.00001 71 | 72 | training_iters = 210 73 | batch_size = 1 #128 74 | display_step = 1 75 | 76 | # tf Graph input 77 | x = tf.placeholder("float32", [None, num_steps, num_input]) 78 | istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input 79 | y = tf.placeholder("float32", [None, num_gt]) 80 | 81 | # Define weights 82 | weights = { 83 | 'out': tf.Variable(tf.random_normal([num_input, num_predict])) 84 | } 85 | biases = { 86 | 'out': tf.Variable(tf.random_normal([num_predict])) 87 | } 88 | 89 | 90 | def __init__(self,argvs = []): 91 | print("ROLO init") 92 | self.ROLO(argvs) 93 | 94 | 95 | def createFolder(self, path): 96 | if not os.path.exists(path): 97 | os.makedirs(path) 98 | 99 | 100 | def LSTM_single(self, name, _X, _istate, _weights, _biases): 101 | 102 | # input shape: (batch_size, n_steps, n_input) 103 | _X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size 104 | # Reshape to prepare input to hidden activation 105 | _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input) 106 | # Split data because rnn cell needs a list of inputs for the RNN inner loop 107 | _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input) 108 | #print("_X: ", _X) 109 | 110 | cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input) 111 | state = _istate 112 | for step in range(self.num_steps): 113 | outputs, state = tf.nn.rnn(cell, [_X[step]], state) 114 | tf.get_variable_scope().reuse_variables() 115 | return outputs 116 | 117 | 118 | # Experiment with dropout 119 | def dropout_features(self, feature, prob): 120 | num_drop = int(prob * 4096) 121 | drop_index = random.sample(xrange(4096), num_drop) 122 | for i in range(len(drop_index)): 123 | index = drop_index[i] 124 | feature[index] = 0 125 | return feature 126 | 127 | 128 | '''---------------------------------------------------------------------------------------''' 129 | def build_networks(self): 130 | if self.disp_console : print "Building ROLO graph..." 131 | 132 | # Build rolo layers 133 | self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases) 134 | self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious") 135 | self.sess = tf.Session() 136 | self.sess.run(tf.initialize_all_variables()) 137 | self.saver = tf.train.Saver() 138 | #self.saver.restore(self.sess, self.rolo_weights_file) 139 | if self.disp_console : print "Loading complete!" + '\n' 140 | 141 | 142 | def training(self, x_path, y_path): 143 | total_loss = 0 144 | 145 | if self.disp_console: print("TRAINING ROLO...") 146 | # Use rolo_input for LSTM training 147 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 148 | if self.disp_console: print("pred: ", pred) 149 | self.pred_location = pred[0][:, 4097:4101] 150 | if self.disp_console: print("pred_location: ", self.pred_location) 151 | if self.disp_console: print("self.y: ", self.y) 152 | 153 | self.correct_prediction = tf.square(self.pred_location - self.y) 154 | if self.disp_console: print("self.correct_prediction: ", self.correct_prediction) 155 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 156 | if self.disp_console: print("self.accuracy: ", self.accuracy) 157 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 158 | 159 | # Initializing the variables 160 | init = tf.initialize_all_variables() 161 | 162 | # Launch the graph 163 | with tf.Session() as sess: 164 | 165 | if (self.restore_weights == True): 166 | sess.run(init) 167 | self.saver.restore(sess, self.rolo_weights_file) 168 | print "Loading complete!" + '\n' 169 | else: 170 | sess.run(init) 171 | 172 | step = 0 173 | 174 | # Keep training until reach max iterations 175 | while step * self.batch_size < self.training_iters: 176 | # Load training data & ground truth 177 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, step) # [num_of_examples, num_input] (depth == 1) 178 | print('len(batch_xs)= ', len(batch_xs)) 179 | # for item in range(len(batch_xs)): 180 | 181 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, step) 182 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 183 | 184 | # Reshape data to get 3 seq of 5002 elements 185 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 186 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 187 | if self.disp_console: print("Batch_ys: ", batch_ys) 188 | 189 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 190 | if self.disp_console: print("ROLO Pred: ", pred_location) 191 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 192 | 193 | # Save pred_location to file 194 | utils.save_rolo_output(self.output_path, pred_location, step, self.num_steps, self.batch_size) 195 | 196 | sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 197 | if step % self.display_step == 0: 198 | # Calculate batch loss 199 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 200 | if self.disp_console: print "Iter " + str(step*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 201 | total_loss += loss 202 | step += 1 203 | if self.disp_console: print(step) 204 | # show 3 kinds of locations, compare! 205 | print "Optimization Finished!" 206 | avg_loss = total_loss/step 207 | print "Avg loss: " + str(avg_loss) 208 | save_path = self.saver.save(sess, self.rolo_weights_file) 209 | print("Model saved in file: %s" % save_path) 210 | return avg_loss 211 | 212 | 213 | def train_30(self): 214 | print("TRAINING ROLO...") 215 | log_file = open("output/trainging-30-log.txt", "a") #open in append mode 216 | self.build_networks() 217 | 218 | ''' TUNE THIS''' 219 | num_videos = 30 220 | epoches = 30 * 200 221 | 222 | # Use rolo_input for LSTM training 223 | pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases) 224 | self.pred_location = pred[0][:, 4097:4101] 225 | self.correct_prediction = tf.square(self.pred_location - self.y) 226 | self.accuracy = tf.reduce_mean(self.correct_prediction) * 100 227 | self.learning_rate = 0.00001 228 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer 229 | 230 | # Initializing the variables 231 | init = tf.initialize_all_variables() 232 | 233 | # Launch the graph 234 | with tf.Session() as sess: 235 | if (self.restore_weights == True): 236 | sess.run(init) 237 | self.saver.restore(sess, self.rolo_weights_file) 238 | print "Loading complete!" + '\n' 239 | else: 240 | sess.run(init) 241 | 242 | for epoch in range(epoches): 243 | i = epoch % num_videos 244 | [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i) 245 | 246 | x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/') 247 | y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt') 248 | self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/') 249 | self.createFolder(self.output_path) 250 | total_loss = 0 251 | step = 0 252 | 253 | # Keep training until reach max iterations 254 | num_iters= self.training_iters * 3 / self.num_steps 255 | print num_iters 256 | while step * self.batch_size < num_iters: 257 | # Load training data & ground truth 258 | batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, step) # [num_of_examples, num_input] (depth == 1) 259 | 260 | batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, step) 261 | batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys) 262 | 263 | # Reshape data to get 3 seq of 5002 elements 264 | batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input]) 265 | batch_ys = np.reshape(batch_ys, [self.batch_size, 4]) 266 | if self.disp_console: print("Batch_ys: ", batch_ys) 267 | 268 | pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 269 | if self.disp_console: print("ROLO Pred: ", pred_location) 270 | 271 | if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img) 272 | # Save pred_location to file 273 | utils.save_rolo_output(self.output_path, pred_location, step, self.num_steps, self.batch_size) 274 | 275 | sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 276 | if step % self.display_step == 0: 277 | # Calculate batch loss 278 | loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))}) 279 | if self.disp_console: print "Iter " + str(step*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy) 280 | total_loss += loss 281 | step += 1 282 | if self.disp_console: print(step) 283 | # show 3 kinds of locations, compare! 284 | print "Optimization Finished!" 285 | avg_loss = total_loss/step 286 | print "Avg loss: " + sequence_name + ": " + str(avg_loss) 287 | 288 | log_file.write(str("{:.3f}".format(avg_loss)) + ' ') 289 | if i+1==num_videos: 290 | log_file.write('\n') 291 | save_path = self.saver.save(sess, self.rolo_weights_file) 292 | print("Model saved in file: %s" % save_path) 293 | log_file.close() 294 | return 295 | 296 | 297 | def ROLO(self, argvs): 298 | self.rolo_utils= utils.ROLO_utils() 299 | self.rolo_utils.loadCfg() 300 | self.params = self.rolo_utils.params 301 | arguments = self.rolo_utils.argv_parser(argvs) 302 | if self.rolo_utils.flag_train is True: 303 | self.training(utils.x_path, utils.y_path) 304 | elif self.rolo_utils.flag_track is True: 305 | self.build_networks() 306 | self.track_from_file(utils.file_in_path) 307 | elif self.rolo_utils.flag_detect is True: 308 | self.build_networks() 309 | self.detect_from_file(utils.file_in_path) 310 | else: 311 | self.train_30() 312 | 313 | 314 | '''----------------------------------------main-----------------------------------------------------''' 315 | def main(argvs): 316 | ROLO_TF(argvs) 317 | 318 | if __name__=='__main__': 319 | main(' ') 320 | 321 | -------------------------------------------------------------------------------- /update/src/testing.py: -------------------------------------------------------------------------------- 1 | from utils_dataset import * 2 | from utils_draw_coord import debug_decimal_coord 3 | from utils_io_folder import * 4 | from utils_io_coord import * 5 | 6 | def get_batch_by_repeat(ndarray, batchsize): 7 | batch_ndarray = [] 8 | for id in range(batchsize): 9 | batch_ndarray.append(ndarray) 10 | return batch_ndarray 11 | 12 | 13 | def test(self, sess, loss, batch_pred_coords): 14 | print("\n\n\n--------------------------------------------TESTING OTB-50---------------------------------------------------------\n") 15 | num_videos = 50 16 | loss_dataset_total = 0 17 | OTB_folder_path = "/home/ngh/dev/ROLO-dev/benchmark/DATA/" 18 | 19 | for video_id in range(num_videos): 20 | if video_id in [1, 5, 16, 20, 21, 22, 23, 28, 30, 32, 36, 42, 43, 46]: continue 21 | 22 | [img_wid, img_ht, sequence_name, st_frame, self.training_iters] = choose_video_sequence_from_OTB50(video_id) 23 | print('testing sequence: ', sequence_name) 24 | 25 | x_path = os.path.join(OTB_folder_path, sequence_name, 'yolo_out/') 26 | y_path = os.path.join(OTB_folder_path, sequence_name, 'groundtruth_rect.txt') 27 | self.output_path = os.path.join(OTB_folder_path, sequence_name, 'rolo_loc_test/') 28 | create_folder(self.output_path) 29 | 30 | img_folder_path = os.path.join(OTB_folder_path, sequence_name, 'img/') 31 | img_paths = get_immediate_childfile_paths(img_folder_path) 32 | 33 | loss_seq_total = frame_id = 0 34 | offset_id = self.nsteps 35 | 36 | init_state_zeros = np.zeros((self.batchsize, 2*self.len_vec)) 37 | 38 | while frame_id < self.training_iters- self.nsteps: 39 | 40 | ''' The index start from zero, while the frame usually starts from one ''' 41 | st_id = st_frame - 1 42 | if frame_id < st_id: 43 | frame_id += 1 44 | continue 45 | 46 | ''' Load input data & ground truth ''' 47 | xs = load_vecs_of_stepsize_in_numpy_folder(x_path, 48 | frame_id - st_id, 49 | self.nsteps) 50 | ys = load_gt_decimal_coords_from_file(y_path, 51 | frame_id - st_id + offset_id, 52 | img_wid, 53 | img_ht) 54 | 55 | batch_xs = get_batch_by_repeat(xs, self.batchsize) 56 | batch_ys = get_batch_by_repeat(ys, self.batchsize) 57 | 58 | batch_xs = np.reshape(batch_xs, [self.batchsize, self.nsteps, self.len_vec]) 59 | batch_ys = np.reshape(batch_ys, [self.batchsize, 4]) 60 | 61 | ''' Save pred_location to file ''' 62 | #utils.save_rolo_output(self.output_path, pred_loc, id, self.nsteps, self.batchsize) 63 | 64 | init_state = init_state_zeros 65 | #init_state = sess.run(self.final_state, 66 | # feed_dict={self.x: batch_xs, 67 | # self.y: batch_ys, 68 | # self.istate: init_state_zeros}) 69 | batch_loss = sess.run(loss, 70 | feed_dict={self.x: batch_xs, 71 | self.y: batch_ys, 72 | self.istate: init_state}) 73 | loss_seq_total += batch_loss 74 | 75 | if self.display_validate is True: 76 | coord_decimal_gt = sess.run(self.y, 77 | feed_dict = {self.x: batch_xs, 78 | self.y: batch_ys, 79 | self.istate: init_state}) 80 | coord_decimal_pred = sess.run(batch_pred_coords, 81 | feed_dict = {self.x: batch_xs, 82 | self.y: batch_ys, 83 | self.istate: init_state} 84 | )[0] 85 | 86 | img = cv2.imread(img_paths[frame_id]) 87 | debug_decimal_coord(img, coord_decimal_pred) 88 | 89 | frame_id += 1 90 | 91 | loss_seq_avg = loss_seq_total / frame_id 92 | print "Avg loss for " + sequence_name + ": " + str(loss_seq_avg) 93 | loss_dataset_total += loss_seq_avg 94 | 95 | print('Total loss of Dataset: %f \n', loss_dataset_total) 96 | print("-----------------------------------------TESTING OTB-50 END---------------------------------------------------------\n\n\n") 97 | return loss_dataset_total 98 | -------------------------------------------------------------------------------- /update/src/training.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | import time, random 4 | 5 | from utils_io_coord import * 6 | from utils_io_list import * 7 | from utils_dataset import * 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import matplotlib.pyplot as plt 12 | import rnn, rnn_cell, cv2 13 | 14 | from testing import test 15 | 16 | class ROLO_TF: 17 | # Buttons 18 | validate = True 19 | validate_step = 1000 20 | display_validate = True 21 | save_step = 1000 22 | display_step = 1 23 | restore_weights = True 24 | display_coords = False 25 | display_regu = False 26 | 27 | # Magic numbers 28 | learning_rate = 0.0001 29 | lamda = 1.0 30 | 31 | # Path 32 | list_pairs_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list_0.npy' 33 | dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000' 34 | numpy_folder_name = 'VID_loc_gt' # Alternatives: 'VID_loc_gt' and 'VID_loc' 35 | rolo_weights_file = '../rolo_weights.ckpt' 36 | rolo_current_save = '../rolo_weights_temp.ckpt' 37 | 38 | # Vector 39 | len_feat = 4096 40 | len_predict = 6 41 | len_coord = 4 42 | len_vec = 4102 43 | 44 | # Batch 45 | nsteps = 3 46 | batchsize = 16 47 | n_iters = 180000 48 | batch_offset = 0 49 | 50 | # Data 51 | x = tf.placeholder("float32", [None, nsteps, len_vec]) 52 | y = tf.placeholder("float32", [None, len_coord]) 53 | istate = tf.placeholder("float32", [None, 2*len_vec]) 54 | list_batch_pairs = [] 55 | 56 | # Initializing 57 | def __init__(self, argvs = []): 58 | print("ROLO Initializing") 59 | self.ROLO() 60 | 61 | 62 | # Routines: Data 63 | def load_training_list(self): 64 | self.list_batch_pairs = load_list_batch_pairs_from_numpy_file(self.list_pairs_numpy_file_path, 65 | self.batchsize) 66 | 67 | 68 | def load_batch(self, b_id): 69 | max_id = len(self.list_batch_pairs) 70 | if b_id <= max_id: 71 | batch_pairs = self.list_batch_pairs[b_id] 72 | batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs] 73 | 74 | batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs] 75 | batch_numpy_folder_paths = [os.path.join(self.dataset_annotation_folder_path, 76 | subfolder_name, 77 | self.numpy_folder_name) 78 | for subfolder_name in batch_subfolder_names] 79 | 80 | attempted_batch_yolovecs = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths, 81 | batch_frame_ids, 82 | self.batchsize, 83 | self.nsteps) 84 | if b_id > max_id or attempted_batch_yolovecs == -1: 85 | self.update_dataset_annotation_folder_path() 86 | self.batch_offset = self.iter_id 87 | self.load_training_list() 88 | attempted_batch_yolovecs = False 89 | batch_subfolder_names = [] 90 | batch_frame_ids = [] 91 | return [attempted_batch_yolovecs, batch_subfolder_names, batch_frame_ids] 92 | 93 | 94 | def update_dataset_annotation_folder_path(self): 95 | try: 96 | list_folder_path = list(self.dataset_annotation_folder_path) 97 | list_file_path = list(self.list_pairs_numpy_file_path) 98 | 99 | last_int = int(self.dataset_annotation_folder_path[-1]) 100 | new_int = (last_int + 1)%4 101 | list_folder_path[-1] = str(new_int) 102 | list_file_path[-5] = str(new_int) 103 | 104 | self.dataset_annotation_folder_path = ''.join(list_folder_path) 105 | self.list_pairs_numpy_file_path = ''.join(list_file_path) 106 | print(self.dataset_annotation_folder_path) 107 | print(self.list_pairs_numpy_file_path) 108 | print("Finished 1/4 of all data. Annotation folder updated") 109 | except ValueError: 110 | print("Error updating dataset annotation folder") 111 | 112 | 113 | # Routines: Network 114 | def LSTM(self, name, _X, _istate): 115 | ''' shape: (batchsize, nsteps, len_vec) ''' 116 | _X = tf.transpose(_X, [1, 0, 2]) 117 | ''' shape: (nsteps, batchsize, len_vec) ''' 118 | _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec]) 119 | ''' shape: n_steps * (batchsize, len_vec) ''' 120 | _X = tf.split(0, self.nsteps, _X) 121 | 122 | lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec, self.len_vec, state_is_tuple = False) 123 | state = _istate 124 | for step in range(self.nsteps): 125 | pred, state = rnn.rnn(lstm_cell, [_X[step]], state, dtype=tf.float32) 126 | tf.get_variable_scope().reuse_variables() 127 | if step == 0: output_state = state 128 | 129 | batch_pred_feats = pred[0][:, 0:4096] 130 | batch_pred_coords = pred[0][:, 4097:4101] 131 | return batch_pred_feats, batch_pred_coords, output_state 132 | 133 | 134 | # Routines: Train & Test 135 | def train(self): 136 | ''' Network ''' 137 | batch_pred_feats, batch_pred_coords, self.final_state = self.LSTM('lstm', self.x, self.istate) 138 | 139 | ''' Loss: L2 ''' 140 | loss = tf.reduce_mean(tf.square(self.y - batch_pred_coords)) * 100 141 | 142 | ''' regularization term: L2 ''' 143 | regularization_term = tf.reduce_mean(tf.square(self.x[:, self.nsteps-1, 0:4096] - batch_pred_feats)) * 100 144 | 145 | ''' Optimizer ''' 146 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss + self.lamda * regularization_term) # Adam Optimizer 147 | 148 | ''' Summary for tensorboard analysis ''' 149 | dataset_loss = -1 150 | dataset_loss_best = 100 151 | test_writer = tf.train.SummaryWriter('summary/test') 152 | tf.scalar_summary('dataset_loss', dataset_loss) 153 | summary_op = tf.merge_all_summaries() 154 | 155 | ''' Initializing the variables ''' 156 | init = tf.initialize_all_variables() 157 | self.saver = tf.train.Saver() 158 | batch_states = np.zeros((self.batchsize, 2*self.len_vec)) 159 | 160 | ''' Launch the graph ''' 161 | with tf.Session() as sess: 162 | if self.restore_weights == True and os.path.isfile(self.rolo_current_save): 163 | sess.run(init) 164 | self.saver.restore(sess, self.rolo_current_save) 165 | print("Weight loaded, finetuning") 166 | else: 167 | sess.run(init) 168 | print("Training from scratch") 169 | 170 | self.load_training_list() 171 | 172 | for self.iter_id in range(self.n_iters): 173 | ''' Load training data & ground truth ''' 174 | batch_id = self.iter_id - self.batch_offset 175 | [batch_vecs, batch_seq_names, batch_frame_ids] = self.load_batch(batch_id) 176 | if batch_vecs is False: continue 177 | 178 | batch_xs = batch_vecs 179 | batch_ys = batchload_gt_decimal_coords_from_VID(self.dataset_annotation_folder_path, 180 | batch_seq_names, 181 | batch_frame_ids, 182 | offset = self.nsteps - 1) 183 | if batch_ys is False: continue 184 | 185 | ''' Reshape data ''' 186 | batch_xs = np.reshape(batch_xs, [self.batchsize, self.nsteps, self.len_vec]) 187 | batch_ys = np.reshape(batch_ys, [self.batchsize, 4]) 188 | 189 | ''' Update weights by back-propagation ''' 190 | sess.run(optimizer, feed_dict={self.x: batch_xs, 191 | self.y: batch_ys, 192 | self.istate: batch_states}) 193 | 194 | if self.iter_id % self.display_step == 0: 195 | ''' Calculate batch loss ''' 196 | batch_loss = sess.run(loss, 197 | feed_dict={self.x: batch_xs, 198 | self.y: batch_ys, 199 | self.istate: batch_states}) 200 | print("Batch loss for iteration %d: %.3f" % (self.iter_id, batch_loss)) 201 | 202 | if self.display_regu is True: 203 | ''' Caculate regularization term''' 204 | batch_regularization = sess.run(regularization_term, 205 | feed_dict={self.x: batch_xs, 206 | self.y: batch_ys, 207 | self.istate: batch_states}) 208 | print("Batch regu for iteration %d: %.3f" % (self.iter_id, batch_regularization)) 209 | 210 | if self.display_coords is True: 211 | ''' Caculate predicted coordinates ''' 212 | coords_predict = sess.run(batch_pred_coords, 213 | feed_dict={self.x: batch_xs, 214 | self.y: batch_ys, 215 | self.istate: batch_states}) 216 | print("predicted coords:" + str(coords_predict[0])) 217 | print("ground truth coords:" + str(batch_ys[0])) 218 | 219 | ''' Save model ''' 220 | if self.iter_id % self.save_step == 1: 221 | self.saver.save(sess, self.rolo_current_save) 222 | print("\n Model saved in file: %s" % self.rolo_current_save) 223 | 224 | ''' Validation ''' 225 | if self.validate == True and self.iter_id % self.validate_step == 0: 226 | dataset_loss = test(self, sess, loss, batch_pred_coords) 227 | 228 | ''' Early-stop regularization ''' 229 | if dataset_loss <= dataset_loss_best: 230 | dataset_loss_best = dataset_loss 231 | self.saver.save(sess, self.rolo_weights_file) 232 | print("\n Better Model saved in file: %s" % self.rolo_weights_file) 233 | 234 | ''' Write summary for tensorboard ''' 235 | summary = sess.run(summary_op, feed_dict={self.x: batch_xs, 236 | self.y: batch_ys, 237 | self.istate: batch_states}) 238 | test_writer.add_summary(summary, self.iter_id) 239 | return 240 | 241 | 242 | def ROLO(self): 243 | print("Initializing ROLO") 244 | self.train() 245 | print("Training Completed") 246 | 247 | '''----------------------------------------main-----------------------------------------------------''' 248 | def main(argvs): 249 | ROLO_TF(argvs) 250 | 251 | if __name__ == "__main__": 252 | main(' ') 253 | -------------------------------------------------------------------------------- /update/unit_test/test_all.py: -------------------------------------------------------------------------------- 1 | import sys, os, shutil 2 | sys.path.append(os.path.abspath("../utils/")) 3 | 4 | def test_script(script_name): 5 | cmd = os.path.join(os.getcwd(), script_name) 6 | os.system('{} {}'.format('python', cmd)) 7 | 8 | def clean(): 9 | shutil.rmtree('../temp_folder') 10 | 11 | def main(): 12 | scripts = ['test_utils_natural_sort.py', 13 | 'test_utils_io_file.py', 14 | 'test_utils_io_folder.py', 15 | 'test_utils_io_coord.py', 16 | 'test_utils_io_list.py', 17 | 'test_utils_dataset.py', 18 | 'test_utils_convert_coord.py'] 19 | 20 | for script in scripts: 21 | test_script(script) 22 | 23 | clean() 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_convert_coord.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | from utils_io_file import * 4 | from test_utils_io_folder import create_dummy_files_in_folder 5 | from utils_convert_coord import * 6 | import numpy as np 7 | 8 | def test_coord_decimal_to_regular(): 9 | [img_wid, img_ht] = [640, 480] 10 | coord_decimal = [0.44312766, 0.64272517, 0.15378259, 0.27607924] 11 | 12 | coord_regular_converted = coord_decimal_to_regular(coord_decimal, img_wid, img_ht) 13 | coord_decimal_converted = coord_regular_to_decimal(coord_regular_converted, img_wid, img_ht) 14 | 15 | print("\t decimal coords : " + str(coord_decimal)) 16 | print("\t decimal coords after conversion: " + str(coord_decimal_converted)) 17 | 18 | loss = sum(abs(np.array(coord_decimal_converted) - np.array(coord_decimal))) 19 | 20 | if loss <= 0.004: 21 | return True 22 | else: 23 | print("loss for decimal coords is: " + str(loss)) 24 | return False 25 | 26 | 27 | def test_coord_regular_to_decimal(): 28 | [img_wid, img_ht] = [640, 480] 29 | coord_regular = [234, 242, 98, 132] 30 | 31 | coord_decimal_converted = coord_regular_to_decimal(coord_regular, img_wid, img_ht) 32 | coord_regular_converted = coord_decimal_to_regular(coord_decimal_converted, img_wid, img_ht) 33 | 34 | print("\t regular coords : " + str(coord_regular)) 35 | print("\t regular coords after conversion: " + str(coord_regular_converted)) 36 | 37 | loss = sum(abs(np.array(coord_regular_converted) - np.array(coord_regular))) 38 | if loss <= 4: 39 | return True 40 | else: 41 | print("loss for regular coordinates is: " + str(loss)) 42 | return False 43 | 44 | 45 | def main(): 46 | print("Testing: utils_convert_coord") 47 | 48 | passed = test_coord_decimal_to_regular() 49 | if passed is False: 50 | print("\t test_coord_decimal_to_regular failed") 51 | 52 | passed = test_coord_regular_to_decimal() 53 | if passed is False: 54 | print("\t test_coord_regular_to_decimal failed") 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_dataset.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | 4 | from utils_io_coord import * 5 | from utils_io_list import * 6 | from utils_dataset import batchload_gt_decimal_coords_from_VID 7 | from utils_io_folder import create_folder 8 | 9 | def test_batchload_gt_decimal_coords_from_VID(): 10 | VID_annotation_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000' 11 | pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy' 12 | batchsize = 8 13 | nsteps = 3 14 | offset = nsteps - 1 15 | 16 | list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize) 17 | 18 | for batch_pairs in list_batch_pairs[0:2]: 19 | batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs] 20 | batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs] 21 | 22 | batch_gt_decimal_coords = batchload_gt_decimal_coords_from_VID(VID_annotation_path, batch_subfolder_names, batch_frame_ids, offset) 23 | 24 | if batch_gt_decimal_coords is False: 25 | return False 26 | else: 27 | return True 28 | 29 | 30 | def main(): 31 | print("Testing: utils_io_dataset") 32 | 33 | finished = test_batchload_gt_decimal_coords_from_VID() 34 | if finished is not True: 35 | print("test_batchload_gt_decimal_coords failed") 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_io_coord.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | 4 | from utils_io_coord import * 5 | from utils_io_list import * 6 | from utils_io_folder import create_folder 7 | 8 | def test_batchload_yolovecs_from_numpy_folders(): 9 | pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy' 10 | dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000' 11 | batchsize = 8 12 | nsteps = 3 13 | 14 | list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize) 15 | for batch_pairs in list_batch_pairs[0:10]: 16 | batch_frame_ids = [batch_pair[1] for batch_pair in batch_pairs] 17 | batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs] 18 | batch_numpy_folder_paths = [os.path.join(dataset_annotation_folder_path, subfolder_name, 'VID_loc_gt') for subfolder_name in batch_subfolder_names] 19 | 20 | attempted_yolo_batch = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths, batch_frame_ids, batchsize, nsteps) 21 | if attempted_yolo_batch is not False: 22 | output_batch_vecs = attempted_yolo_batch 23 | return True 24 | 25 | 26 | def test_save_vec_as_numpy(): 27 | output_folder_path = '../temp_folder' 28 | create_folder(output_folder_path) 29 | 30 | pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy' 31 | dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000' 32 | batchsize = 8 33 | nsteps = 3 34 | 35 | list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize) 36 | for batch_pairs in list_batch_pairs[0:10]: 37 | batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs] 38 | batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs] 39 | batch_numpy_folder_paths = [os.path.join(dataset_annotation_folder_path, subfolder_name, 'VID_loc_gt') for subfolder_name in batch_subfolder_names] 40 | 41 | attempted_yolo_batch = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths, batch_frame_ids, batchsize, nsteps) 42 | if attempted_yolo_batch is not False: 43 | batch_output_vecs = attempted_yolo_batch 44 | for id in range(batchsize): 45 | frame_id = batch_frame_ids[id] 46 | output_vec = batch_output_vecs[id][nsteps-1] 47 | save_vec_as_numpy_by_frame_id(output_folder_path, frame_id, output_vec) 48 | return True 49 | 50 | 51 | def main(): 52 | print("Testing: utils_io_coord") 53 | 54 | finished = test_batchload_yolovecs_from_numpy_folders() 55 | if finished is not True: 56 | print("test_batchload_yolovecs_from_numpy_folder failed") 57 | 58 | finished = test_save_vec_as_numpy() 59 | if finished is not True: 60 | print("test_batchsave_vecs_as_numpy failed") 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_io_file.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | from utils_io_file import * 4 | from test_utils_io_folder import create_dummy_files_in_folder 5 | 6 | def test_validate_file_format(): 7 | temp_folder = '../temp_folder' 8 | create_dummy_files_in_folder(temp_folder, file_format = 'txt') 9 | create_dummy_files_in_folder(temp_folder, file_format = 'png') 10 | txt_file_path = os.path.join(temp_folder, '1.txt') 11 | png_file_path = os.path.join(temp_folder, '1.png') 12 | allowed_format = ['txt', 'jpg'] 13 | 14 | expecting_true = validate_file_format(txt_file_path, allowed_format) 15 | expecting_false = validate_file_format(png_file_path, allowed_format) 16 | 17 | if expecting_true is True and expecting_false is False: 18 | return True 19 | else: 20 | return False 21 | 22 | 23 | def main(): 24 | print("Testing: utils_io_file") 25 | 26 | passed = test_validate_file_format() 27 | if passed is False: 28 | print("\t test_validate_file_format failed") 29 | 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_io_folder.py: -------------------------------------------------------------------------------- 1 | import sys, os, io, shutil 2 | sys.path.append(os.path.abspath("../utils/")) 3 | from utils_io_folder import * 4 | 5 | def test_create_folder(): 6 | folder_path = "../temp_folder/" 7 | 8 | if os.path.exists(folder_path): 9 | shutil.rmtree(folder_path) 10 | create_folder(folder_path) 11 | 12 | if os.path.exists(folder_path): 13 | return True 14 | else: 15 | return False 16 | 17 | 18 | def test_get_immediate_subfolder_paths(): 19 | folder_path = '../temp_folder/' 20 | subfolder_paths = ['../temp_folder/subfolder_1/', '../temp_folder/subfolder_2'] 21 | 22 | create_folder(folder_path) 23 | create_folder(subfolder_paths[0]) 24 | create_folder(subfolder_paths[1]) 25 | 26 | subfolder_paths_derived = get_immediate_subfolder_paths(folder_path) 27 | 28 | if set(subfolder_paths_derived).isdisjoint(subfolder_paths): 29 | return False 30 | else: 31 | return True 32 | 33 | 34 | def test_get_immediate_subfolder_names(): 35 | folder_path = '../temp_folder/' 36 | subfolder_paths = ['../temp_folder/subfolder_1/', '../temp_folder/subfolder_2'] 37 | subfolder_names = ['subfolder_1', 'subfolder_2'] 38 | 39 | create_folder(folder_path) 40 | create_folder(subfolder_paths[0]) 41 | create_folder(subfolder_paths[1]) 42 | 43 | subfolder_names_derived = get_immediate_subfolder_names(folder_path) 44 | 45 | if set(subfolder_names_derived).isdisjoint(subfolder_names): 46 | return False 47 | else: 48 | return True 49 | 50 | 51 | def test_get_immediate_childfile_paths(): 52 | temp_folder = '../temp_folder' 53 | create_dummy_files_in_folder(temp_folder) 54 | childfile_paths = [ os.path.join(temp_folder, (str(ct)+ '.txt')) for ct in range(10)] 55 | 56 | childfile_paths_derived = get_immediate_childfile_paths(temp_folder) 57 | shutil.rmtree(temp_folder) 58 | 59 | if set(childfile_paths_derived).isdisjoint(childfile_paths): 60 | return False 61 | else: 62 | return True 63 | 64 | 65 | def test_get_immediate_childfile_names(): 66 | temp_folder = '../temp_folder' 67 | create_dummy_files_in_folder(temp_folder) 68 | childfile_names = [(str(ct)+ '.txt') for ct in range(10)] 69 | 70 | childfile_names_derived = get_immediate_childfile_names(temp_folder) 71 | shutil.rmtree(temp_folder) 72 | 73 | if set(childfile_names_derived).isdisjoint(childfile_names): 74 | return False 75 | else: 76 | return True 77 | 78 | 79 | def create_dummy_files_in_folder(temp_folder, file_format = 'txt'): 80 | create_folder(temp_folder) 81 | for ct in range(10): 82 | file_name = str(ct) + '.' + file_format 83 | file_path = os.path.join(temp_folder, file_name) 84 | with io.FileIO(file_path, "w") as file: 85 | file.write("Hello!") 86 | 87 | 88 | def main(): 89 | print("Testing: utils_io_folder") 90 | 91 | passed = test_create_folder() 92 | if passed is False: 93 | print("\t create_folder failed") 94 | 95 | passed = test_get_immediate_subfolder_names() 96 | if passed is False: 97 | print("\t get_immediate_subfolder_names failed") 98 | 99 | passed = test_get_immediate_subfolder_paths() 100 | if passed is False: 101 | print("\t get_immediate_childfile_paths failed") 102 | 103 | paseed = test_get_immediate_childfile_names() 104 | if passed is False: 105 | print("\t get immediate_childfile_names failed") 106 | 107 | passed = test_get_immediate_childfile_paths() 108 | if passed is False: 109 | print("\t get_immediate_childfile_paths failed") 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_io_folder.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/ROLO/6612007e35edb73dac734e7a4dac2cd4c1dca6c1/update/unit_test/test_utils_io_folder.pyc -------------------------------------------------------------------------------- /update/unit_test/test_utils_io_list.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | from utils_io_list import * 4 | from test_utils_io_folder import * 5 | 6 | def test_generate_pairs_for_each_folder(): 7 | images_folder_path= "folder/path/example" 8 | num_of_frames = 2 9 | 10 | pairs = generate_pairs_for_each_folder(images_folder_path, num_of_frames) 11 | 12 | expected_pair = [("example", 0), ("example", 1)] 13 | if expected_pair == pairs: 14 | return True 15 | else: 16 | return False 17 | 18 | 19 | def test_generate_num_of_frames_list(): 20 | folders_paths_list = ['../temp_folder_1', '../temp_folder_2'] 21 | for folder_path in folders_paths_list: 22 | create_folder(folder_path) 23 | create_dummy_files_in_folder(folder_path) 24 | 25 | num_of_frames_list = generate_num_of_frames_list(folders_paths_list) 26 | 27 | for folder_path in folders_paths_list: 28 | shutil.rmtree(folder_path) 29 | 30 | expected_list = [10, 10] 31 | if expected_list == num_of_frames_list: 32 | return True 33 | else: 34 | return False 35 | 36 | 37 | def test_generate_pairs_with_two_lists(): 38 | folders_paths_list = ['../temp_folder_1', '../temp_folder_2'] 39 | num_of_frames_list = [1, 2] 40 | 41 | pairs_list = generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list) 42 | 43 | expected_list = [('temp_folder_1', 0), ('temp_folder_2', 0), ('temp_folder_2', 1)] 44 | if expected_list == pairs_list: 45 | return True 46 | else: 47 | return False 48 | 49 | 50 | def test_generate_pairs_list_for_training(): 51 | dataset_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0000/' 52 | output_folder_path = '/home/ngh/dev/ROLO-TRACK/training_list/' 53 | create_folder(output_folder_path) 54 | 55 | txt_file_path = os.path.join(output_folder_path, 'list_0.txt') 56 | numpy_file_path = os.path.join(output_folder_path, 'list_0') 57 | 58 | finished = generate_pairs_list_for_training(dataset_folder_path, numpy_file_path, txt_file_path) 59 | 60 | if finished is True: 61 | return True 62 | else: 63 | return False 64 | 65 | 66 | def main(): 67 | print("Testing: utils_io_list") 68 | 69 | passed = test_generate_num_of_frames_list() 70 | if passed is False: 71 | print("test_generate_num_of_frames_list failed") 72 | 73 | passed = test_generate_pairs_for_each_folder() 74 | if passed is False: 75 | print("test_generate_pairs_for_each_folder failed") 76 | 77 | passed = test_generate_pairs_with_two_lists() 78 | if passed is False: 79 | print("test_generate_pairs_with_two_lists failed") 80 | 81 | passed = test_generate_pairs_list_for_training() 82 | if passed is False: 83 | print("test_generate_pairs_list_for_training failed") 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /update/unit_test/test_utils_natural_sort.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.abspath("../utils/")) 3 | from utils_natural_sort import * 4 | 5 | def test_natural_sort(): 6 | test_string_list_desired = ['A00001', 'A00002', 'A00010', 'A00011', 'B00001', 'B00002', 'B00010', 'B00011'] 7 | test_string_list = ['B00002', 'A00010', 'A00011', 'B00010', 'A00001', 'B00011', 'A00002', 'B00001'] 8 | 9 | natural_sort(test_string_list) 10 | 11 | if test_string_list == test_string_list_desired: 12 | return True 13 | else: 14 | return False 15 | 16 | 17 | def main(): 18 | print("Testing: utils_natural_sort") 19 | 20 | passed = test_natural_sort() 21 | if passed is False: 22 | print("\t natural_sort failed") 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /update/utils/utils_cal_iou.py: -------------------------------------------------------------------------------- 1 | 2 | def compute_iou_with_regular_coord(box1, box2): 3 | # Prevent NaN in benchmark results 4 | validate_box(box1) 5 | validate_box(box2) 6 | 7 | # change float to int, in order to prevent overflow 8 | box1 = map(int, box1) 9 | box2 = map(int, box2) 10 | 11 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 12 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 13 | if tb <= 0 or lr <= 0 : 14 | intersection = 0 15 | else : intersection = tb*lr 16 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 17 | 18 | 19 | def compute_iou_with_decimal_coord(box1, box2, w, h): 20 | box1 = coord_decimal_to_regular(w,h,box1) 21 | box2 = coord_decimal_to_regular(w,h,box2) 22 | return compute_iou_with_regular_coord(box1,box2) 23 | 24 | 25 | def cal_score(location, gt_location, thresh): 26 | iou_score = compute_iou_with_regular_coord(regular_box1, regular_box2) 27 | if iou_score >= thresh: 28 | score = 1 29 | else: 30 | score = 0 31 | return score 32 | -------------------------------------------------------------------------------- /update/utils/utils_convert_coord.py: -------------------------------------------------------------------------------- 1 | # There are 3 kinds of representation of coordinates 2 | # 1. Coord_decimal: (x0, y0, w, h) all represented in a float between [0, 1], ratio to image width and height, respectively. 3 | # (x0, y0) is the middle point of the bounding box. 4 | # Used by YOLO output, ROLO input and output. 5 | # 2. Coord_regular: (X1, Y1, W, H) all represented by pixel values in int. 6 | # (X1, Y1) is the top-left point of the bounding box 7 | # Usually the ground truth box that is read from files are of this format 8 | # 3. Detection in Vector: [4096-d feature_vector] + (class, x0, y0, w, h, prob). 9 | # The same as Coord_decimal except that there are more information in the detection 10 | #----------------------------------------------------------------------------------------------- 11 | 12 | def coord_regular_to_decimal(coord_regular, img_wid, img_ht): 13 | img_wid *= 1.0 14 | img_ht *= 1.0 15 | coord_decimal = list(coord_regular) 16 | 17 | # convert top-left point (x,y) to mid point (x, y) 18 | coord_decimal[0] += coord_regular[2] / 2.0 19 | coord_decimal[1] += coord_regular[3] / 2.0 20 | 21 | # convert to [0, 1] 22 | coord_decimal[0] /= img_wid 23 | coord_decimal[1] /= img_ht 24 | coord_decimal[2] /= img_wid 25 | coord_decimal[3] /= img_ht 26 | 27 | return coord_decimal 28 | 29 | 30 | def coord_decimal_to_regular(coord_decimal, img_wid, img_ht): 31 | w_box = int(coord_decimal[2] * img_wid) 32 | h_box = int(coord_decimal[3] * img_ht) 33 | x_topleft = int( img_wid * (coord_decimal[0] - coord_decimal[2]/2.0) ) 34 | y_topleft = int( img_ht * (coord_decimal[1] - coord_decimal[3]/2.0) ) 35 | 36 | coord_regular = [x_topleft, y_topleft, w_box, h_box] 37 | 38 | return coord_regular 39 | -------------------------------------------------------------------------------- /update/utils/utils_convert_heatmap.py: -------------------------------------------------------------------------------- 1 | def coordinates_to_heatmap_vec(coord): 2 | heatmap_vec = np.zeros(1024) 3 | [x1, y1, x2, y2] = coord 4 | for y in range(y1, y2+1): 5 | for x in range(x1, x2+1): 6 | index = y*32 + x 7 | heatmap_vec[index] = 1.0 #random.uniform(0.8, 1)#1.0 8 | return heatmap_vec 9 | 10 | 11 | def heatmap_vec_to_heatmap(heatmap_vec): 12 | size = 32 13 | heatmap= np.zeros((size, size)) 14 | for y in range(0, size): 15 | for x in range(0, size): 16 | index = y*size + x 17 | heatmap[y][x] = heatmap_vec[index] 18 | return heatmap 19 | -------------------------------------------------------------------------------- /update/utils/utils_dataset.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | ''' 13 | Script File: 14 | ROLO_utils.py 15 | [Input] A network model, a file 16 | [Output] A file with Detection or Tracking results 17 | Description: 18 | ROLO is short for Recurrent YOLO, aimed for object detection, tracking and predicting 19 | Paper: http://arxiv.org/abs/1607.05781 20 | Author: Guanghan Ning 21 | Webpage: http://guanghan.info/ 22 | ''' 23 | 24 | import cv2 25 | import os, sys, time, math, re 26 | import numpy as np 27 | import tensorflow as tf 28 | import matplotlib.pyplot as plt 29 | from utils_io_coord import load_lines_from_txt_file, load_regular_coord_by_line 30 | from utils_convert_coord import coord_regular_to_decimal 31 | 32 | def batchload_gt_decimal_coords_from_VID(VID_annotation_path, batch_seq_names, batch_frame_ids, offset = 3): 33 | batch_decimal_coords = [] 34 | batch_seq_paths = [os.path.join(VID_annotation_path, seq_name) 35 | for seq_name in batch_seq_names] 36 | 37 | for id, seq_path in enumerate(batch_seq_paths): 38 | frame_id = batch_frame_ids[id] 39 | line_id = frame_id + offset # Prediction of future frame 40 | 41 | info_file_path = find_sequence_info_file_from_VID(seq_path) 42 | [img_wid, img_ht] = load_sequence_info(info_file_path) 43 | 44 | gt_file_path = find_sequence_gt_file_from_VID(seq_path) 45 | decimal_coord = load_gt_decimal_coords_from_file(gt_file_path, line_id, img_wid, img_ht) 46 | batch_decimal_coords.append(decimal_coord) 47 | 48 | return batch_decimal_coords 49 | 50 | 51 | def load_gt_decimal_coords_from_file(gt_file_path, line_id, img_wid, img_ht): 52 | lines = load_lines_from_txt_file(gt_file_path) 53 | regular_coord = load_regular_coord_by_line(lines, line_id) 54 | if regular_coord is False: return False 55 | 56 | decimal_coord = coord_regular_to_decimal(regular_coord, img_wid, img_ht) 57 | return decimal_coord 58 | 59 | 60 | def find_sequence_info_file_from_VID(seq_path): 61 | info_file_path = os.path.join(seq_path, "sequence_info.txt") 62 | return info_file_path 63 | 64 | 65 | def find_sequence_gt_file_from_VID(seq_path): 66 | gt_file_path = os.path.join(seq_path, "groundtruth_rect.txt") 67 | return gt_file_path 68 | 69 | 70 | def load_sequence_info(info_file_path): 71 | with open(info_file_path, "r") as text_file: 72 | lines = text_file.read().split(' ') 73 | [img_wid, img_ht, sequence_name, training_iters] = [int(lines[0]), int(lines[1]), lines[2], int(lines[3])] 74 | return [img_wid, img_ht] 75 | 76 | 77 | def choose_video_sequence_from_VID_by_id(folder, i): 78 | if i< 1000: 79 | mfolder = folder + '/ILSVRC2015_VID_train_0000' 80 | j = i 81 | elif i < 2000: 82 | mfolder = folder + '/ILSVRC2015_VID_train_0001' 83 | j = i%1000 84 | elif i < 3000: 85 | mfolder = folder + '/ILSVRC2015_VID_train_0002' 86 | j = i%2000 87 | else: 88 | mfolder = folder + '/ILSVRC2015_VID_train_0003' 89 | j = i%3000 90 | subfolders = get_immediate_subfolder_names(mfolder) 91 | subfolder_sequence_info_file = os.path.join(mfolder, subfolders[j], 'sequence_info.txt') 92 | with open(subfolder_sequence_info_file, "r") as text_file: 93 | lines = text_file.read().split(' ') 94 | [img_wid, img_ht, sequence_name, training_iters] = [int(lines[0]), int(lines[1]), lines[2], int(lines[3])] 95 | return [img_wid, img_ht, sequence_name, training_iters] 96 | 97 | 98 | def choose_video_sequence_from_OTB50(test): 99 | start_frame= 1 100 | # For VOT-50: 101 | if test == 0: 102 | w_img, h_img = [576, 432] 103 | sequence_name = 'Basketball' 104 | testing_iters = 725 105 | elif test == 1: 106 | w_img, h_img = [640, 360] 107 | sequence_name = 'Biker' 108 | testing_iters = 142 109 | elif test == 2: 110 | w_img, h_img = [720, 400] 111 | sequence_name = 'Bird1' 112 | testing_iters = 408 113 | elif test == 3: 114 | w_img, h_img = [640, 480] 115 | sequence_name = 'BlurBody' 116 | testing_iters = 334 117 | elif test == 4: 118 | w_img, h_img = [640, 480] 119 | sequence_name = 'BlurCar2' 120 | testing_iters = 585 121 | elif test == 5: 122 | w_img, h_img = [640, 480] # 123 | sequence_name = 'BlurFace' 124 | testing_iters = 493 125 | elif test == 6: 126 | w_img, h_img = [640, 480] 127 | sequence_name = 'BlurOwl' 128 | testing_iters = 631 129 | elif test == 7: 130 | w_img, h_img = [640, 360] 131 | sequence_name = 'Bolt' 132 | testing_iters = 350 133 | elif test == 8: 134 | w_img, h_img = [640, 480] 135 | sequence_name = 'Box' 136 | testing_iters = 1161 137 | elif test == 9: 138 | w_img, h_img = [320, 240] 139 | sequence_name = 'Car1' 140 | testing_iters = 1020 141 | elif test == 10: 142 | w_img, h_img = [360, 240] 143 | sequence_name = 'Car4' 144 | testing_iters = 659 145 | elif test == 11: 146 | w_img, h_img = [320, 240] 147 | sequence_name = 'CarDark' 148 | testing_iters = 393 149 | elif test == 12: 150 | w_img, h_img = [640, 272] 151 | sequence_name = 'CarScale' 152 | testing_iters = 252 153 | elif test == 13: 154 | w_img, h_img = [320, 240] 155 | sequence_name = 'ClifBar' 156 | testing_iters = 472 157 | elif test == 14: 158 | w_img, h_img = [320, 240] 159 | sequence_name = 'Couple' 160 | testing_iters = 140 161 | elif test == 15: 162 | w_img, h_img = [600, 480] 163 | sequence_name = 'Crowds' 164 | testing_iters = 347 165 | elif test == 16: 166 | w_img, h_img = [320, 240] # 167 | sequence_name = 'David' 168 | testing_iters = 770 169 | start_frame= 300 170 | elif test == 17: 171 | w_img, h_img = [704, 400] 172 | sequence_name = 'Deer' 173 | testing_iters = 71 174 | elif test == 18: 175 | w_img, h_img = [400, 224] 176 | sequence_name = 'Diving' 177 | testing_iters = 214 178 | elif test == 19: 179 | w_img, h_img = [640, 360] 180 | sequence_name = 'DragonBaby' 181 | testing_iters = 113 182 | elif test == 20: 183 | w_img, h_img = [720, 480] # 184 | sequence_name = 'Dudek' 185 | testing_iters = 1145 186 | elif test == 21: 187 | w_img, h_img = [624, 352] # 188 | sequence_name = 'Football' 189 | testing_iters = 74 190 | elif test == 22: 191 | w_img, h_img = [360, 240] # 192 | sequence_name = 'Freeman4' 193 | testing_iters = 283 194 | elif test == 23: 195 | w_img, h_img = [128, 96] # 196 | sequence_name = 'Girl' 197 | testing_iters = 500 198 | elif test == 24: 199 | w_img, h_img = [480, 640] 200 | sequence_name = 'Human3' 201 | testing_iters = 1698 202 | elif test == 25: 203 | w_img, h_img = [640, 480] 204 | sequence_name = 'Human4' 205 | testing_iters = 667 206 | elif test == 26: 207 | w_img, h_img = [480, 640] 208 | sequence_name = 'Human6' 209 | testing_iters = 792 210 | elif test == 27: 211 | w_img, h_img = [320, 240] 212 | sequence_name = 'Human9' 213 | testing_iters = 302 214 | elif test == 28: 215 | w_img, h_img = [720, 304] # 216 | sequence_name = 'Ironman' 217 | testing_iters = 166 218 | elif test == 29: 219 | w_img, h_img = [416, 234] 220 | sequence_name = 'Jump' 221 | testing_iters = 122 222 | elif test == 30: 223 | w_img, h_img = [352, 288] # 224 | sequence_name = 'Jumping' 225 | testing_iters = 313 226 | elif test == 31: 227 | w_img, h_img = [640, 480] 228 | sequence_name = 'Liquor' 229 | testing_iters = 1741 230 | elif test == 32: 231 | w_img, h_img = [800, 336] # 232 | sequence_name = 'Matrix' 233 | testing_iters = 100 234 | elif test == 33: 235 | w_img, h_img = [640, 360] 236 | sequence_name = 'MotorRolling' 237 | testing_iters = 164 238 | elif test == 34: 239 | w_img, h_img = [312, 233] 240 | sequence_name = 'Panda' 241 | testing_iters = 1000 242 | elif test == 35: 243 | w_img, h_img = [352, 240] 244 | sequence_name = 'RedTeam' 245 | testing_iters = 1918 246 | elif test == 36: 247 | w_img, h_img = [624, 352] # 248 | sequence_name = 'Shaking' 249 | testing_iters = 365 250 | elif test == 37: 251 | w_img, h_img = [624, 352] 252 | sequence_name = 'Singer2' 253 | testing_iters = 366 254 | elif test == 38: 255 | w_img, h_img = [640, 360] 256 | sequence_name = 'Skating1' 257 | testing_iters = 400 258 | elif test == 39: 259 | w_img, h_img = [640, 352] 260 | sequence_name = 'Skating2-1' 261 | testing_iters = 473 262 | elif test == 40: 263 | w_img, h_img = [640, 352] 264 | sequence_name = 'Skating2-2' 265 | testing_iters = 473 266 | elif test == 41: 267 | w_img, h_img = [640, 360] 268 | sequence_name = 'Skiing' 269 | testing_iters = 81 270 | elif test == 42: 271 | w_img, h_img = [640, 360] # 272 | sequence_name = 'Soccer' 273 | testing_iters = 392 274 | elif test == 43: 275 | w_img, h_img = [480, 360] 276 | sequence_name = 'Surfer' 277 | testing_iters = 376 278 | elif test == 44: 279 | w_img, h_img = [320, 240] 280 | sequence_name = 'Sylvester' 281 | testing_iters = 1345 282 | elif test == 45: 283 | w_img, h_img = [640, 480] 284 | sequence_name = 'Tiger2' 285 | testing_iters = 365 286 | elif test == 46: 287 | w_img, h_img = [320, 240] # 288 | sequence_name = 'Trellis' 289 | testing_iters = 569 290 | elif test == 47: 291 | w_img, h_img = [768, 576] 292 | sequence_name = 'Walking' 293 | testing_iters = 412 294 | elif test == 48: 295 | w_img, h_img = [384, 288] 296 | sequence_name = 'Walking2' 297 | testing_iters = 500 298 | elif test == 49: 299 | w_img, h_img = [352, 288] 300 | sequence_name = 'Woman' 301 | testing_iters = 597 302 | 303 | # For VOT-2015, read the list.txt and get the corresponding sequences. 304 | 305 | 306 | return [w_img, h_img, sequence_name, start_frame, testing_iters] 307 | -------------------------------------------------------------------------------- /update/utils/utils_draw_coord.py: -------------------------------------------------------------------------------- 1 | from utils_convert_coord import coord_regular_to_decimal, coord_decimal_to_regular 2 | import cv2 3 | 4 | def debug_decimal_coord(img, coord_decimal, prob = None, class_id = None): 5 | img_cp = img.copy() 6 | img_ht, img_wid, nchannels = img.shape 7 | 8 | coord_regular = coord_decimal_to_regular(coord_decimal, img_wid, img_ht) 9 | 10 | debug_regular_coord(img, coord_regular, prob, class_id) 11 | 12 | 13 | def debug_regular_coord(img, coord_regular, prob = None, class_id = None): 14 | img_cp = img.copy() 15 | [x_topleft, y_topleft, w_box, h_box] = coord_regular 16 | 17 | cv2.rectangle(img_cp, 18 | (x_topleft, y_topleft), 19 | (x_topleft + w_box, y_topleft + h_box), 20 | (0,255,0), 2) 21 | 22 | if prob is not None and class_id is not None: 23 | assert(isinstance(prob, (float))) 24 | assert(isinstance(class_id, (int, long))) 25 | cv2.rectangle(img_cp, 26 | (x_topleft, y_topleft - 20), 27 | (x_topleft + w_box, y_topleft), 28 | (125,125,125),-1) 29 | cv2.putText(img_cp, 30 | str(class_id) + ' : %.2f' % prob, 31 | (x_topleft + 5, y_topleft - 7), 32 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1) 33 | 34 | cv2.imshow('debug_detection',img_cp) 35 | cv2.waitKey(1) 36 | 37 | 38 | def debug_3_locations( img, gt_location, yolo_location, rolo_location): 39 | img_cp = img.copy() 40 | for i in range(3): # b-g-r channels 41 | if i== 0: location= gt_location; color= (0, 0, 255) # red for gt 42 | elif i ==1: location= yolo_location; color= (255, 0, 0) # blur for yolo 43 | elif i ==2: location= rolo_location; color= (0, 255, 0) # green for rolo 44 | x = int(location[0]) 45 | y = int(location[1]) 46 | w = int(location[2]) 47 | h = int(location[3]) 48 | if i == 1 or i== 2: cv2.rectangle(img_cp,(x-w//2, y-h//2),(x+w//2,y+h//2), color, 2) 49 | elif i== 0: cv2.rectangle(img_cp,(x,y),(x+w,y+h), color, 2) 50 | cv2.imshow('3 locations',img_cp) 51 | cv2.waitKey(100) 52 | return img_cp 53 | -------------------------------------------------------------------------------- /update/utils/utils_draw_heatmap.py: -------------------------------------------------------------------------------- 1 | def draw_heatmap( heatmap): 2 | fig = plt.figure(1, figsize=(10,10)) 3 | ax2 = fig.add_subplot(222) 4 | ax2.imshow(heatmap, origin='lower', aspect='auto') 5 | ax2.set_title("heatmap") 6 | plt.show() 7 | -------------------------------------------------------------------------------- /update/utils/utils_io_coord.py: -------------------------------------------------------------------------------- 1 | 2 | # There are 3 kinds of representation of coordinates 3 | # 1. Coord_decimal: (x0, y0, w, h) all represented in a float between [0, 1], ratio to image width and height, respectively. 4 | # (x0, y0) is the middle point of the bounding box. 5 | # Used by YOLO output, ROLO input and output. 6 | # 2. Coord_regular: (X1, Y1, W, H) all represented by pixel values in int. 7 | # (X1, Y1) is the top-left point of the bounding box 8 | # Usually the ground truth box that is read from files are of this format 9 | # 3. Detection in Vector: [4096-d feature_vector] + (class, x0, y0, w, h, prob). 10 | # The same as Coord_decimal except that there are more information in the detection 11 | #----------------------------------------------------------------------------------------------- 12 | from utils_io_folder import get_immediate_childfile_paths 13 | import numpy as np 14 | import os 15 | 16 | ''' 1. I/O with numpy ''' 17 | 18 | ''' 1.1 Save''' 19 | def save_vec_as_numpy_by_frame_id(output_folder_path, frame_id, output_vec): 20 | filename = str(frame_id) 21 | save_vec_as_numpy_by_name(output_folder_path, filename, output_vec) 22 | 23 | def save_vec_as_numpy_by_name(output_folder_path, filename, output_vec): 24 | filename_without_ext = os.path.splitext(filename)[0] 25 | output_file_path = os.path.join(output_folder_path, filename_without_ext) 26 | np.save(output_file_path, output_vec) 27 | 28 | 29 | ''' 1.2 Load ''' 30 | def batchload_yolovecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps): 31 | batch_vecs = batchload_vecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps) 32 | 33 | if batch_vecs is not False and batch_vecs != -1: 34 | for vec in batch_vecs: 35 | vec[0][:][4096] = 0 36 | vec[0][:][4101] = 0 37 | return batch_vecs 38 | 39 | 40 | def batchload_vecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps): 41 | try: 42 | assert(len(batch_folders_paths) == batchsize) 43 | except AssertionError: 44 | print("\t Not enough pairs to form a minibatch, skip") 45 | return -1 46 | 47 | batch_vecs = [] 48 | for ct, folder_path in enumerate(batch_folders_paths): 49 | frame_id = int(batch_frame_ids[ct]) 50 | nsteps_vecs = load_vecs_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps) 51 | batch_vecs.append(nsteps_vecs) 52 | 53 | try: 54 | test_vecs = np.reshape(batch_vecs, [batchsize * nsteps, 4102]) 55 | return batch_vecs 56 | except ValueError: 57 | print("\t Not enough frames in video (it's ok), skipped this minibatch") 58 | return False 59 | 60 | 61 | def load_vecs_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps): 62 | file_paths = get_file_paths_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps) 63 | nsteps_vecs = [] 64 | for file_path in file_paths: 65 | vec_from_file = load_vec_from_numpy_file(file_path) 66 | nsteps_vecs.append(vec_from_file) 67 | return nsteps_vecs 68 | 69 | 70 | def get_file_paths_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps): 71 | file_paths = get_immediate_childfile_paths(folder_path) 72 | [st, ed] = get_range_of_stepsize_by_frame_id(nsteps, frame_id) 73 | file_paths_batch = file_paths[st:ed] 74 | return file_paths_batch 75 | 76 | 77 | def load_vec_from_numpy_file(file_path): 78 | vec_from_file = np.load(file_path) 79 | vec_from_file = np.reshape(vec_from_file, 4102) 80 | return vec_from_file 81 | 82 | 83 | def batchload_decimal_coords_from_vecs(batch_vecs): 84 | batch_coords = [vec[4097:4101] for vec in batch_vecs] 85 | return batch_coords 86 | 87 | 88 | def load_decimal_coord_from_vec(vec_from_file): 89 | coord_decimal = vec_from_file[4097:4101] 90 | return coord_decimal 91 | 92 | 93 | def get_range_of_stepsize_by_frame_id(nsteps, frame_id, offset= 0): 94 | [st, ed] = [frame_id, frame_id + nsteps] 95 | st_ed_range = [st + offset, ed + offset] 96 | return st_ed_range 97 | 98 | 99 | ''' 2. I/O with text file ''' 100 | 101 | def load_lines_from_txt_file(txt_file_path): 102 | with open(txt_file_path, "r") as txtfile: 103 | lines = txtfile.read().split('\n') 104 | return lines 105 | 106 | 107 | def load_regular_coord_by_line(lines, line_id): 108 | line = lines[line_id] 109 | elems = line.split('\t') 110 | if len(elems) < 4: 111 | elems = line.split(',') 112 | if len(elems) < 4: 113 | elems = line.split(' ') 114 | 115 | try: 116 | [X1, Y1, W, H] = elems[0:4] 117 | coord_regular = [int(X1), int(Y1), int(W), int(H)] 118 | return coord_regular 119 | except IOError: 120 | print("Not enough ground truth in text file.") 121 | return False 122 | 123 | 124 | def find_best_decimal_coord(multiple_coords_decimal, gt_coord_decimal): 125 | max_iou = 0 126 | for coord_decimal, id in enumerate(multiple_coords_decimal): 127 | iou = compute_iou_with_decimal_coord(coord_decimal, gt_coord_decimal) 128 | if iou >= max_iou: 129 | max_iou = iou 130 | index = id 131 | return multiple_coord_decimal[index] 132 | 133 | 134 | def validate_coord(box): 135 | for i in range(len(box)): 136 | if math.isnan(box[i]): box[i] = 0 137 | -------------------------------------------------------------------------------- /update/utils/utils_io_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def validate_file_format(file_in_path, allowed_format): 4 | if os.path.isfile(file_in_path) and os.path.splitext(file_in_path)[1][1:] in allowed_format: 5 | return True 6 | else: 7 | return False 8 | 9 | 10 | class Error(Exception): 11 | """Base class for other exceptions""" 12 | pass 13 | 14 | 15 | class FormatIncorrectError(Error): 16 | """Raised when the file is of incorrect format""" 17 | pass 18 | 19 | 20 | def is_image(file_in_path): 21 | if validate_file_format(file_in_path, ['jpg', 'JPEG', 'png', 'JPG']): 22 | return True 23 | else: 24 | return False 25 | 26 | 27 | def is_video(file_in_path): 28 | if validate_file_format(file_in_path, ['avi', 'mkv', 'mp4']): 29 | return True 30 | else: 31 | return False 32 | 33 | 34 | def file_to_img(filepath): 35 | try: 36 | img = cv2.imread(filepath) 37 | except IOError: 38 | print('cannot open image file: ' + filepath) 39 | else: 40 | print('unknown error reading image file') 41 | return img 42 | 43 | 44 | def file_to_video(filepath): 45 | try: 46 | video = cv2.VideoCapture(filepath) 47 | except IOError: 48 | print('cannot open video file: ' + filepath) 49 | else: 50 | print('unknown error reading video file') 51 | return video 52 | -------------------------------------------------------------------------------- /update/utils/utils_io_folder.py: -------------------------------------------------------------------------------- 1 | import os 2 | from utils_natural_sort import natural_sort 3 | 4 | def get_immediate_subfolder_paths(folder_path): 5 | subfolder_names = get_immediate_subfolder_names(folder_path) 6 | subfolder_paths = [os.path.join(folder_path, subfolder_name) for subfolder_name in subfolder_names] 7 | return subfolder_paths 8 | 9 | 10 | def get_immediate_subfolder_names(folder_path): 11 | subfolder_names = [folder_name for folder_name in os.listdir(folder_path) 12 | if os.path.isdir(os.path.join(folder_path, folder_name))] 13 | natural_sort(subfolder_names) 14 | return subfolder_names 15 | 16 | 17 | def get_immediate_childfile_paths(folder_path): 18 | files_names = get_immediate_childfile_names(folder_path) 19 | files_full_paths = [os.path.join(folder_path, file_name) for file_name in files_names] 20 | return files_full_paths 21 | 22 | 23 | def get_immediate_childfile_names(folder_path): 24 | files_names = [file_name for file_name in next(os.walk(folder_path))[2]] 25 | natural_sort(files_names) 26 | return files_names 27 | 28 | 29 | def get_folder_name_from_path(folder_path): 30 | path, folder_name = os.path.split(folder_path) 31 | return folder_name 32 | 33 | 34 | def create_folder(folder_path): 35 | if not os.path.exists(folder_path): 36 | os.makedirs(folder_path) 37 | -------------------------------------------------------------------------------- /update/utils/utils_io_heatmap.py: -------------------------------------------------------------------------------- 1 | from utils_io_folder import * 2 | 3 | def load_unready_heatmap(tensorflow_x_path, batchsize, nsteps, id, w_img, h_img): 4 | lines = load_dataset_gt(tensorflow_x_path) 5 | [st, ed] = [id, id + batchsize * nsteps] 6 | 7 | heatmap_vec_batch= [] 8 | for id in range(st, ed): 9 | location = find_gt_location(lines, id) 10 | location = locations_from_0_to_1(w_img, h_img, location) 11 | coords = loc_to_coordinates(location) 12 | heatmap_vec = [coordinates_to_heatmap_vec(coords)] 13 | heatmap_vec_batch.append(heatmap_vec) 14 | return heatmap_vec_batch 15 | 16 | 17 | def load_ready_heatmap(folder_path, params, id): 18 | batchsize = params['batchsize'] 19 | nsteps = params['nsteps'] 20 | vec_len = params['vec_len'] 21 | 22 | heatmap_files_paths = get_immediate_childfile_paths(folder_path) 23 | [st, ed] = [id, id + batchsize * nsteps] 24 | paths_batch = heatmap_files_paths[st:ed] 25 | 26 | heatmap_vec_batch= [] 27 | for path in paths_batch: 28 | heatmap_vec = np.load(path) 29 | heatmap_vec = np.reshape(heatmap_vec, vec_len) 30 | heatmap_vec_batch.append(heatmap_vec) 31 | heatmap_vec_batch = np.reshape(heatmap_vec_batch, [batchsize*nsteps, vec_len]) 32 | return heatmap_vec_batch 33 | -------------------------------------------------------------------------------- /update/utils/utils_io_list.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | from utils_io_folder import get_immediate_subfolder_paths, get_immediate_childfile_names, get_folder_name_from_path 4 | 5 | ''' 1. generate the list of pairs ''' 6 | 7 | def generate_pairs_list_for_training(dataset_folder_path, numpy_file_path, txt_file_path = None): 8 | folders_paths_list = generate_folders_paths_list(dataset_folder_path) 9 | num_of_frames_list = generate_num_of_frames_list(folders_paths_list) 10 | 11 | pairs_list = generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list) 12 | shuffled_pairs_list= shuffle_list(pairs_list) 13 | 14 | generate_numpy_file_with_shuffled_list_of_pairs(numpy_file_path, shuffled_pairs_list) 15 | if txt_file_path is not None: 16 | generate_txt_file_with_shuffled_list_of_pairs(txt_file_path, shuffled_pairs_list) 17 | 18 | return True 19 | 20 | 21 | def generate_folders_paths_list(dataset_folder_path): 22 | folders_paths_list = get_immediate_subfolder_paths(dataset_folder_path) 23 | return folders_paths_list 24 | 25 | 26 | def generate_num_of_frames_list(folders_paths_list): 27 | num_of_frames_list = [len(get_immediate_childfile_names(folder_path)) 28 | for folder_path in folders_paths_list] 29 | return num_of_frames_list 30 | 31 | 32 | def generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list): 33 | pairs_list = [] 34 | assert(len(folders_paths_list) == len(num_of_frames_list)) 35 | 36 | for folder_id, images_folder_path in enumerate(folders_paths_list): 37 | num_of_frames = num_of_frames_list[folder_id] 38 | pairs = generate_pairs_for_each_folder(images_folder_path, num_of_frames) 39 | 40 | for pair in pairs: 41 | pairs_list.append(pair) 42 | return pairs_list 43 | 44 | 45 | def generate_pairs_for_each_folder(images_folder_path, num_of_frames): 46 | pairs =[(get_folder_name_from_path(images_folder_path), ct) 47 | for ct in range(num_of_frames)] 48 | return pairs 49 | 50 | 51 | def generate_txt_file_with_shuffled_list_of_pairs(txt_file_path, shuffled_pairs_list): 52 | try: 53 | with open(txt_file_path, "w") as txt_file: 54 | for pairs in shuffled_pairs_list: 55 | line = str(pairs) + '\n' 56 | txt_file.write(line) 57 | except IOError: 58 | print('unable to open text file') 59 | 60 | 61 | def generate_numpy_file_with_shuffled_list_of_pairs(numpy_file_path, shuffled_pairs_list): 62 | np.save(numpy_file_path, shuffled_pairs_list) 63 | 64 | 65 | def shuffle_list(pairs_list): 66 | shuffle(pairs_list) 67 | return pairs_list 68 | 69 | 70 | ''' 2. Load the list of pairs ''' 71 | def load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize): 72 | shuffled_pairs_list = load_pairs_list_from_numpy_file(pairs_list_numpy_file_path) 73 | list_batch_pairs = convert_pairs_to_list_batch_pairs(shuffled_pairs_list, batchsize) 74 | return list_batch_pairs 75 | 76 | 77 | def load_pairs_list_from_numpy_file(pairs_list_numpy_file_path): 78 | shuffled_pairs_list = np.load(pairs_list_numpy_file_path) 79 | return shuffled_pairs_list 80 | 81 | 82 | def convert_pairs_to_list_batch_pairs(pairs_list, batchsize): 83 | list_batch_pairs = [] 84 | for batch_id in range(0, len(pairs_list), batchsize): 85 | st = batch_id 86 | ed = st + batchsize 87 | batch_pairs = pairs_list[st:ed] 88 | list_batch_pairs.append(batch_pairs) 89 | return list_batch_pairs 90 | -------------------------------------------------------------------------------- /update/utils/utils_natural_sort.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def natural_sort(given_list): 4 | """ Sort the given list in the way that humans expect.""" 5 | given_list.sort(key=alphanum_key) 6 | 7 | 8 | def alphanum_key(s): 9 | """ Turn a string into a list of string and number chunks. 10 | "z23a" -> ["z", 23, "a"] """ 11 | return [ tryint(c) for c in re.split('([0-9]+)', s) ] 12 | 13 | 14 | def tryint(s): 15 | try: 16 | return int(s) 17 | except: 18 | return s 19 | --------------------------------------------------------------------------------