├── .gitignore
├── 3rd party
    ├── YOLO_network.py
    └── sort_yolo.py
├── LICENSE
├── MOLO
    ├── MOLO_network_test.py
    └── MOLO_network_train.py
├── README.md
├── ROLO_demo_heat.py
├── ROLO_demo_test.py
├── ROLO_evaluation.py
├── experiments
    ├── testing
    │   ├── ROLO_network_test_all.py
    │   └── ROLO_network_test_single.py
    └── training
    │   ├── ROLO_step1_train_30_exp2.py
    │   ├── ROLO_step3_train_30_exp2.py
    │   ├── ROLO_step6_train_20_exp1.py
    │   ├── ROLO_step6_train_30_exp2.py
    │   ├── ROLO_step6_train_30_exp3.py
    │   └── ROLO_step9_train_30_exp2.py
├── heatmap
    ├── ROLO_heatmap_test.py
    └── ROLO_heatmap_train.py
├── update
    ├── src
    │   ├── rnn.py
    │   ├── rnn_cell.py
    │   ├── testing.py
    │   └── training.py
    ├── unit_test
    │   ├── test_all.py
    │   ├── test_utils_convert_coord.py
    │   ├── test_utils_dataset.py
    │   ├── test_utils_io_coord.py
    │   ├── test_utils_io_file.py
    │   ├── test_utils_io_folder.py
    │   ├── test_utils_io_folder.pyc
    │   ├── test_utils_io_list.py
    │   └── test_utils_natural_sort.py
    └── utils
    │   ├── utils_cal_iou.py
    │   ├── utils_convert_coord.py
    │   ├── utils_convert_heatmap.py
    │   ├── utils_dataset.py
    │   ├── utils_draw_coord.py
    │   ├── utils_draw_heatmap.py
    │   ├── utils_io_coord.py
    │   ├── utils_io_file.py
    │   ├── utils_io_folder.py
    │   ├── utils_io_heatmap.py
    │   ├── utils_io_list.py
    │   └── utils_natural_sort.py
└── utils
    ├── MOLO_utils.py
    └── ROLO_utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {2016} {GUANGHAN NING}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MOLO/MOLO_network_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: MOLO_network_test.py
 17 | 
 18 | Description:
 19 | 	MOLO is short for Multi-target ROLO, aimed at simultaneous detection and tracking of multiple targets
 20 | 	Paper: http://arxiv.org/abs/1607.05781
 21 | 	Author: Guanghan Ning
 22 | 	Webpage: http://guanghan.info/
 23 | '''
 24 | 
 25 | # Imports
 26 | import ROLO_utils as utils
 27 | 
 28 | import tensorflow as tf
 29 | from tensorflow.models.rnn import rnn, rnn_cell
 30 | import cv2
 31 | 
 32 | import numpy as np
 33 | import os.path
 34 | import time
 35 | import random
 36 | 
 37 | 
 38 | class ROLO_TF:
 39 |     disp_console = False
 40 |     restore_weights = False
 41 | 
 42 |     # YOLO parameters
 43 |     fromfile = None
 44 |     tofile_img = 'test/output.jpg'
 45 |     tofile_txt = 'test/output.txt'
 46 |     imshow = True
 47 |     filewrite_img = False
 48 |     filewrite_txt = False
 49 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 50 |     alpha = 0.1
 51 |     threshold = 0.2
 52 |     iou_threshold = 0.5
 53 |     num_class = 20
 54 |     num_box = 2
 55 |     grid_size = 7
 56 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 57 |     w_img, h_img = [352, 240]
 58 | 
 59 |     # ROLO Network Parameters
 60 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt'
 61 |     lstm_depth = 3
 62 |     num_steps = 3  # number of frames as an input sequence
 63 |     num_feat = 4096
 64 |     num_predict = 6 # final output of LSTM 6 loc parameters
 65 |     num_gt = 4
 66 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 67 | 
 68 |     # ROLO Training Parameters
 69 |     #learning_rate = 0.00001 #training
 70 |     learning_rate = 0.00001 #testing
 71 | 
 72 |     training_iters = 210#100000
 73 |     batch_size = 1 #128
 74 |     display_step = 1
 75 | 
 76 |     # tf Graph input
 77 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 78 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 79 |     y = tf.placeholder("float32", [None, num_gt])
 80 | 
 81 |     # Define weights
 82 |     weights = {
 83 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 84 |     }
 85 |     biases = {
 86 |         'out': tf.Variable(tf.random_normal([num_predict]))
 87 |     }
 88 | 
 89 | 
 90 |     def __init__(self,argvs = []):
 91 |         print("ROLO init")
 92 |         self.ROLO(argvs)
 93 | 
 94 | 
 95 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 96 |         with tf.device('/gpu:0'):
 97 |             # input shape: (batch_size, n_steps, n_input)
 98 |             _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
 99 |             # Reshape to prepare input to hidden activation
100 |             _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
101 |             # Split data because rnn cell needs a list of inputs for the RNN inner loop
102 |             _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
103 |             #print("_X: ", _X)
104 | 
105 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
106 |         state = _istate
107 |         for step in range(self.num_steps):
108 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
109 |             tf.get_variable_scope().reuse_variables()
110 | 
111 |         #print("output: ", outputs)
112 |         #print("state: ", state)
113 |         return outputs
114 | 
115 | 
116 |     # Experiment with dropout
117 |     def dropout_features(self, feature, prob):
118 |         if prob == 0: return feature
119 |         else:
120 |             num_drop = int(prob * 4096)
121 |             drop_index = random.sample(xrange(4096), num_drop)
122 |             for i in range(len(drop_index)):
123 |                 index = drop_index[i]
124 |                 feature[index] = 0
125 |             return feature
126 | 
127 | 
128 |     '''---------------------------------------------------------------------------------------'''
129 |     def build_networks(self):
130 |         if self.disp_console : print "Building MOLO graph..."
131 | 
132 |         # Build rolo layers
133 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
134 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
135 |         self.sess = tf.Session()
136 |         self.sess.run(tf.initialize_all_variables())
137 |         self.saver = tf.train.Saver()
138 |         #self.saver.restore(self.sess, self.rolo_weights_file)
139 |         if self.disp_console : print "Loading complete!" + '\n'
140 | 
141 | 
142 |     def merge_dets(self, dets_yolo, dets_rcnn):
143 | 
144 | 
145 | 
146 |         for person in range(len(dets)):
147 |             box_num += 1
148 |             #print('id, person = ', id, person)
149 |             person_id = dets[person][0]-1  # person_id starts from 1, but index starts from 0, so minus 1
150 | 
151 |             # Merge the features with dets in batch_xs
152 |             loc_last = dets_last[dets_last[:,0]==person_id, 1:5]
153 |             loc_prst = dets[dets[:,0]==person_id, 1:5]
154 |             loc_next = dets_next[dets_next[:,0]==person_id, 1:5]
155 |             if len(loc_last) == 0 or len(loc_next)==0:
156 |                 continue
157 |             loc_last = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_last[0][:]])
158 |             loc_prst = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_prst[0][:]])
159 |             loc_next = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_next[0][:]])
160 |             batch_xs_raw[0][4097:4101] = loc_last[0][:]
161 |             batch_xs_raw[1][4097:4101] = loc_prst[0][:]
162 |             batch_xs_raw[2][4097:4101] = loc_next[0][:]
163 | 
164 |             # Reshape data to get 3 seq of 5002 elements
165 |             batch_xs = np.reshape(batch_xs_raw, [self.batch_size, self.num_steps, self.num_input])
166 |         return
167 | 
168 | 
169 |     def test_7(self):
170 |         print("Testing MOLO...")
171 |         self.build_networks()
172 | 
173 |         ''' TUNE THIS'''
174 |         offset = 37
175 |         num_videos = 7
176 |         epoches = 7
177 | 
178 |         # Use rolo_input for LSTM training
179 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
180 |         self.pred_location = pred[0][:, 4097:4101]
181 |         self.correct_prediction = tf.square(self.pred_location - self.y)
182 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
183 |         self.learning_rate = 0.00001
184 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
185 | 
186 |         # Initializing the variables
187 |         init = tf.initialize_all_variables()
188 | 
189 |         # Launch the graph
190 |         with tf.Session() as sess:
191 |             if (self.restore_weights == True):
192 |                 sess.run(init)
193 |                 self.saver.restore(sess, self.rolo_weights_file)
194 |                 print "Loading complete!" + '\n'
195 |             else:
196 |                 sess.run(init)
197 | 
198 |             for epoch in range(1, epoches):
199 |                 i = epoch % num_videos + offset
200 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, self.testing_iters]= utils.choose_video_sequence(i)
201 | 
202 |                 x_path = os.path.join('benchmark/MOT/MOT2016/test/', sequence_name, 'yolo_out/')
203 |                 seq_dets = np.loadtxt('3rd_party/sort-master/output/%s.txt'%(sequence_name),delimiter=',') #load detections
204 |                 #y_path = os.path.join('benchmark/MOT/MOT2016/test/', sequence_name, 'gt/gt.txt')
205 |                 out_file = open('output/MOLO/%s.txt'%(sequence_name),'w')
206 | 
207 |                 id = 1
208 |                 # Keep training until reach max iterations
209 |                 while id  < self.testing_iters- self.num_steps:
210 |                     # Load locs and feat from yolo output
211 |                     batch_xs_raw = self.rolo_utils.load_yolo_output_test_MOLO(x_path, self.batch_size, self.num_steps, id-1)  # 3 features: (id-1, id, id+1), start from 0.
212 | 
213 |                     # Load dets from faster r-cnn
214 |                     dets_last = seq_dets[ (seq_dets[:,0]== id)&(seq_dets[:,6]==1) , 1:6]  # dets starts from 1
215 |                     dets = seq_dets[ (seq_dets[:,0]== (id+1))&(seq_dets[:,6]==1) , 1:6]
216 |                     dets_next = seq_dets[ (seq_dets[:,0]== (id+2))&(seq_dets[:,6]==1) , 1:6]
217 | 
218 |                     # Need to load batch_xs in a different way, get the feature as well as the yolo locations
219 |                     # Need a function to leverage the yolo detections and faster r-cnn detections
220 |                     # assign the updated detection to dets\dets_last\dets_next
221 | 
222 |                     final_dets, person_ids = utils.merge_dets(batch_xs_raw, [dets_last, dets, dets_next]) #Take in the two source of locations
223 | 
224 |                     for person in range(len(final_dets)):
225 |                         person_id = person_ids[person]
226 | 
227 |                         # Reshape data to get 3 seq of 5002 elements
228 |                         batch_xs = np.reshape(final_dets[person], [self.batch_size, self.num_steps, self.num_input])
229 | 
230 |                         # Output prediction to txt file
231 |                         pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
232 | 
233 |                         d = utils.locations_normal(self.w_img, self.h_img, pred_location[0])  # d = [x_mid, y_mid, w, h] in pixels
234 |                         out_file.write('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3]))
235 |                     id += 1
236 |                 out_file.close()
237 |         return
238 | 
239 | 
240 |     def ROLO(self, argvs):
241 |             self.rolo_utils= utils.ROLO_utils()
242 |             self.rolo_utils.loadCfg()
243 |             self.params = self.rolo_utils.params
244 | 
245 |             arguments = self.rolo_utils.argv_parser(argvs)
246 | 
247 |             if self.rolo_utils.flag_train is True:
248 |                 self.training(utils.x_path, utils.y_path)
249 |             elif self.rolo_utils.flag_track is True:
250 |                 self.build_networks()
251 |                 self.track_from_file(utils.file_in_path)
252 |             elif self.rolo_utils.flag_detect is True:
253 |                 self.build_networks()
254 |                 self.detect_from_file(utils.file_in_path)
255 |             else:
256 |                 self.test_7()
257 | 
258 |     '''----------------------------------------main-----------------------------------------------------'''
259 | def main(argvs):
260 |         ROLO_TF(argvs)
261 | 
262 | if __name__=='__main__':
263 |         main(' ')
264 | 
265 | 


--------------------------------------------------------------------------------
/MOLO/MOLO_network_train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: MOLO_network_train.py
 17 | 
 18 | Description:
 19 | 	MOLO is short for Multi-target ROLO, aimed at simultaneous detection and tracking of multiple targets
 20 | 	Paper: http://arxiv.org/abs/1607.05781
 21 | 	Author: Guanghan Ning
 22 | 	Webpage: http://guanghan.info/
 23 | '''
 24 | 
 25 | # Imports
 26 | import ROLO_utils as utils
 27 | 
 28 | import tensorflow as tf
 29 | from tensorflow.models.rnn import rnn, rnn_cell
 30 | import cv2
 31 | 
 32 | import numpy as np
 33 | import os.path
 34 | import time
 35 | import random
 36 | 
 37 | 
 38 | class ROLO_TF:
 39 |     disp_console = False
 40 |     restore_weights = True
 41 | 
 42 |     # YOLO parameters
 43 |     fromfile = None
 44 |     tofile_img = 'test/output.jpg'
 45 |     tofile_txt = 'test/output.txt'
 46 |     imshow = True
 47 |     filewrite_img = False
 48 |     filewrite_txt = False
 49 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 50 |     alpha = 0.1
 51 |     threshold = 0.2
 52 |     iou_threshold = 0.5
 53 |     num_class = 20
 54 |     num_box = 2
 55 |     grid_size = 7
 56 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 57 |     w_img, h_img = [352, 240]
 58 | 
 59 |     # ROLO Network Parameters
 60 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt'
 61 |     lstm_depth = 3
 62 |     num_steps = 3  # number of frames as an input sequence
 63 |     num_feat = 4096
 64 |     num_predict = 6 # final output of LSTM 6 loc parameters
 65 |     num_gt = 4
 66 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 67 | 
 68 |     # ROLO Training Parameters
 69 |     #learning_rate = 0.00001 #training
 70 |     learning_rate = 0.00001 #testing
 71 | 
 72 |     training_iters = 210#100000
 73 |     batch_size = 1 #128
 74 |     display_step = 1
 75 | 
 76 |     # tf Graph input
 77 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 78 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 79 |     y = tf.placeholder("float32", [None, num_gt])
 80 | 
 81 |     # Define weights
 82 |     weights = {
 83 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 84 |     }
 85 |     biases = {
 86 |         'out': tf.Variable(tf.random_normal([num_predict]))
 87 |     }
 88 | 
 89 | 
 90 |     def __init__(self,argvs = []):
 91 |         print("ROLO init")
 92 |         self.ROLO(argvs)
 93 | 
 94 | 
 95 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 96 |         with tf.device('/gpu:0'):
 97 |             # input shape: (batch_size, n_steps, n_input)
 98 |             _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
 99 |             # Reshape to prepare input to hidden activation
100 |             _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
101 |             # Split data because rnn cell needs a list of inputs for the RNN inner loop
102 |             _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
103 |             #print("_X: ", _X)
104 | 
105 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
106 |         state = _istate
107 |         for step in range(self.num_steps):
108 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
109 |             tf.get_variable_scope().reuse_variables()
110 | 
111 |         #print("output: ", outputs)
112 |         #print("state: ", state)
113 |         return outputs
114 | 
115 | 
116 |     # Experiment with dropout
117 |     def dropout_features(self, feature, prob):
118 |         if prob == 0: return feature
119 |         else:
120 |             num_drop = int(prob * 4096)
121 |             drop_index = random.sample(xrange(4096), num_drop)
122 |             for i in range(len(drop_index)):
123 |                 index = drop_index[i]
124 |                 feature[index] = 0
125 |             return feature
126 | 
127 | 
128 |     # Experiment with input box noise (translate, scale)
129 |     def det_add_noise(self, det):
130 |         translate_rate = random.uniform(0.98, 1.02)
131 |         scale_rate = random.uniform(0.8, 1.2)
132 | 
133 |         det[0] *= translate_rate
134 |         det[1] *= translate_rate
135 |         det[2] *= scale_rate
136 |         det[3]*= scale_rate
137 | 
138 |         return det
139 | 
140 | 
141 |     '''---------------------------------------------------------------------------------------'''
142 |     def build_networks(self):
143 |         if self.disp_console : print "Building MOLO graph..."
144 | 
145 |         # Build rolo layers
146 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
147 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
148 |         self.sess = tf.Session()
149 |         self.sess.run(tf.initialize_all_variables())
150 |         self.saver = tf.train.Saver()
151 |         #self.saver.restore(self.sess, self.rolo_weights_file)
152 |         if self.disp_console : print "Loading complete!" + '\n'
153 | 
154 | 
155 |     def train_7(self):
156 |         print("TRAINING MOLO...")
157 |         log_file = open("output/trainging-7-log.txt", "a") #open in append mode
158 |         self.build_networks()
159 | 
160 |         ''' TUNE THIS'''
161 |         offset = 30
162 |         num_videos = 7
163 |         epoches = 7 * 300
164 | 
165 | 
166 |         # Use rolo_input for LSTM training
167 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
168 |         self.pred_location = pred[0][:, 4097:4101]
169 |         self.correct_prediction = tf.square(self.pred_location - self.y)
170 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
171 |         self.learning_rate = 0.00001
172 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
173 | 
174 |         # Initializing the variables
175 |         init = tf.initialize_all_variables()
176 | 
177 |         # Launch the graph
178 |         with tf.Session() as sess:
179 |             if (self.restore_weights == True):
180 |                 sess.run(init)
181 |                 self.saver.restore(sess, self.rolo_weights_file)
182 |                 print "Loading complete!" + '\n'
183 |             else:
184 |                 sess.run(init)
185 | 
186 |             avg_loss = 0
187 |             stay_epoch = 2
188 |             for epoch in range(2, epoches):
189 |                 #if(avg_loss > 0.1):
190 |                 #    epoch= stay_epoch
191 |                 #print(epoch)
192 |                 i = epoch % num_videos + offset
193 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, self.testing_iters]= utils.choose_video_sequence(i)
194 | 
195 |                 x_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'yolo_out/')
196 |                 seq_dets = np.loadtxt('3rd_party/sort-master/output/%s.txt'%(sequence_name),delimiter=',') #load detections
197 | 
198 |                 y_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'gt/gt.txt')
199 | 
200 |                 #out_file = open('output/MOLO/%s.txt'%(sequence_name),'w')
201 | 
202 |                 #self.output_path = os.path.join('benchmark/MOT/MOT2016/train/', sequence_name, 'molo_out_train/')
203 |                 #utils.createFolder(self.output_path)
204 |                 total_loss = 0
205 |                 id = 1
206 |                 box_num= 0
207 | 
208 |                 # Keep training until reach max iterations
209 |                 while id  < self.testing_iters- self.num_steps: # + 1
210 |                     # Load training data & ground truth
211 |                     batch_xs_raw = self.rolo_utils.load_yolo_feat_test_MOLO(x_path, self.batch_size, self.num_steps, id-1)  # 3 features: (id-1, id, id+1), start from 0.
212 |                     dets_last = seq_dets[ (seq_dets[:,0]== (id))&(seq_dets[:,6]==1) , 1:6]  # dets starts from 1
213 |                     dets = seq_dets[ (seq_dets[:,0]== (id+1))&(seq_dets[:,6]==1) , 1:6]
214 |                     dets_next = seq_dets[ (seq_dets[:,0]== (id+2))&(seq_dets[:,6]==1) , 1:6]
215 | 
216 |                     for person in range(len(dets)):
217 |                         box_num += 1
218 | 
219 |                         batch_ys = [dets[person][1:5]]
220 |                         batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
221 | 
222 |                         # Merge the features with dets in batch_xs
223 |                         person_id = dets[person][0]            #BUG!!!!:    (-1 is wrong)   -1  # person_id starts from 1, but index starts from 0, so minus 1
224 |                         loc_last = dets_last[dets_last[:,0]==person_id, 1:5]
225 |                         loc_prst = dets[dets[:,0]==person_id, 1:5]
226 |                         loc_next = dets_next[dets_next[:,0]==person_id, 1:5]
227 |                         #print('loca_last', loc_last[0][:])
228 |                         #print('batch_xs', batch_xs_raw[0])
229 |                         if len(loc_last) == 0 or len(loc_next)==0:
230 |                             continue
231 |                         loc_last = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_last[0][:]])
232 |                         loc_prst = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_prst[0][:]])
233 |                         loc_next = utils.locations_from_0_to_1(self.w_img, self.h_img, [loc_next[0][:]])
234 | 
235 |                         # Add noise to batch_xs
236 |                         #loc_last[0]= self.det_add_noise( loc_last[0])
237 |                         #loc_prst[0]= self.det_add_noise( loc_prst[0])
238 |                         #loc_next[0]= self.det_add_noise( loc_next[0])
239 | 
240 |                         #print('loca_last', loc_last[0][:])
241 |                         batch_xs_raw[0][4097:4101] = loc_last[0][:]
242 |                         batch_xs_raw[1][4097:4101] = loc_prst[0][:]
243 |                         batch_xs_raw[2][4097:4101] = loc_next[0][:]
244 | 
245 |                         # Reshape data to get 3 seq of 5002 elements
246 |                         batch_xs = np.reshape(batch_xs_raw, [self.batch_size, self.num_steps, self.num_input])
247 |                         batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
248 |                         #print("Batch_ys: ", batch_ys)
249 | 
250 |                         # Output prediction to txt file
251 |                         pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
252 |                         d = utils.locations_normal(self.w_img, self.h_img, pred_location[0])  # d = [x_mid, y_mid, w, h] in pixels
253 |                         #print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3]), out_file)
254 |                         #print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(id+1, person_id, d[0]-d[2]/2.0, d[1]- d[3]/2.0, d[2], d[3]))
255 | 
256 |                         if self.disp_console: print("ROLO Pred: ", pred_location)
257 |                         if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
258 | 
259 |                         sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
260 | 
261 |                         # Calculate batch loss
262 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
263 |                         if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
264 |                         total_loss += loss
265 |                     id += 1
266 | 
267 |                 #out_file.close()
268 | 
269 |                 #print "Optimization Finished!"
270 |                 avg_loss = total_loss/box_num
271 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
272 | 
273 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
274 |                 if epoch % num_videos == 0:
275 |                     log_file.write('\n')
276 |                 save_path = self.saver.save(sess, self.rolo_weights_file)
277 |                 print("Model saved in file: %s" % save_path)
278 | 
279 |         log_file.close()
280 |         return
281 | 
282 | 
283 |     def ROLO(self, argvs):
284 | 
285 |             self.rolo_utils= utils.ROLO_utils()
286 |             self.rolo_utils.loadCfg()
287 |             self.params = self.rolo_utils.params
288 | 
289 |             arguments = self.rolo_utils.argv_parser(argvs)
290 | 
291 |             if self.rolo_utils.flag_train is True:
292 |                 self.training(utils.x_path, utils.y_path)
293 |             elif self.rolo_utils.flag_track is True:
294 |                 self.build_networks()
295 |                 self.track_from_file(utils.file_in_path)
296 |             elif self.rolo_utils.flag_detect is True:
297 |                 self.build_networks()
298 |                 self.detect_from_file(utils.file_in_path)
299 |             else:
300 |                 self.train_7()
301 | 
302 |     '''----------------------------------------main-----------------------------------------------------'''
303 | def main(argvs):
304 |         ROLO_TF(argvs)
305 | 
306 | if __name__=='__main__':
307 |         main(' ')
308 | 
309 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ROLO
  2 | =======
  3 | --------
  4 | 
  5 | Project Page: [http://guanghan.info/projects/ROLO/](http://guanghan.info/projects/ROLO/)
  6 | 
  7 | ## Overview
  8 | 
  9 | ROLO is short for Recurrent YOLO [[1]], aimed at simultaneous object detection and tracking. 
 10 | 
 11 | With the regression capability of LSTMs both spatially and temporally, ROLO is able to interpret a series of high-level visual features directly into coordinates of tracked objects. By concatenating high-level visual features with YOLO detection results, ROLO is spatially supervised into specific targets. 
 12 | 
 13 | The regression is two-folds: (1) The regression within one unit, i.e.,
 14 | between the visual features and the concatenated region representations. LSTM is capable of inferring region locations from the visual features when they are concatenated to be one unit. (2) The regression over the units of a sequence, i.e., between concatenated features over a sequence of frames.
 15 | 
 16 | The supervision is helpful in two aspects:
 17 | (1) When LSTM interpret the high-level visual features, the preliminary location inference helps
 18 | to regress the features into the location of a certain visual elements/cues. The spatially supervised regression acts as an online appearance model. (2) Temporally, the LSTM learns over the sequence units to restrict the location prediction to a spatial range. 
 19 | 
 20 | ROLO is currently an offline approach, and is expected to gain a performance boost with proper online model updating. It is still a single object tracker, and data association techniques are not yet explored for the simultaneous tracking of multiple targets.
 21 | 
 22 | ----
 23 | ## Prerequisites
 24 | - Python 2.7 or Python 3.3+
 25 | - Tensorflow
 26 | - Scipy
 27 | 
 28 | ----
 29 | ## Getting Started
 30 | 
 31 | ### 1. Download Data and Pre-trained Models
 32 | 
 33 | As a generic object detector, YOLO can be trained to recognize arbitrary objects. Nevertheless, as the performance of ROLO depends on the YOLO part, we choose the default YOLO small model in order to provide a fair comparison. We believed it unfair to give credit to the tracking module if we train a customized YOLO model. The model is pre-trained on ImageNet dataset and finetuned on VOC dataset, capable of detecting objects of only 20 classes. We therefore picked 30 out of 100 videos from the benchmark [OTB100](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html), where the tracking targets belong to these classes. The subset is so-called OTB30.
 34 | 
 35 | **DATA**
 36 | 
 37 | - [DATA and Results for Demo](http://guanghan.info/projects/ROLO/DATA/DATA.zip)
 38 | 
 39 | **Models**
 40 | 
 41 | - [Model for demo](http://guanghan.info/projects/ROLO/demo/model_demo.ckpt)
 42 | 
 43 | - [Model for experiment 1](http://guanghan.info/projects/ROLO/experiment_1/model_step6_exp1.ckpt)
 44 | 
 45 | - Model for experiment 2: [step=1](http://guanghan.info/projects/ROLO/experiment_2/model_step1_exp2.ckpt), [step=3](http://guanghan.info/projects/ROLO/experiment_2/model_step3_exp2.ckpt), [step=6](http://guanghan.info/projects/ROLO/experiment_2/model_step6_exp2.ckpt), [step=9](http://guanghan.info/projects/ROLO/experiment_2/model_step9_exp2.ckpt)
 46 | 
 47 | - [Model for experiment 3](http://guanghan.info/projects/ROLO/experiment_3/model_step3_exp3.ckpt)
 48 | 
 49 | **Evaluation**
 50 | 
 51 | - [Evaluation Results (including other trackers)](http://guanghan.info/projects/ROLO/output/evaluation.rar)
 52 | 
 53 | ### 2. Run Demo
 54 | 
 55 | Reproduce the results with the pre-trained model:
 56 | 
 57 | 	python ./experiments/testing/ROLO_network_test_all.py
 58 | 
 59 | Or download the results at [Results](http://).
 60 | 
 61 | Run video Demo:
 62 | 
 63 | 	./python ROLO_demo_test.py
 64 | 
 65 | 
 66 | ### 3. Training and Testing
 67 | 
 68 | As deep learning applications get mature, it will be more efficient to have multi-functional networks consisted of orthogonal modules. Feature representation, in this case, had better be trained separately to provide shared features. Pre-training of visual features from ImageNet are skipped, as were discussed already in YOLO. We focus on training the LSTM module.
 69 | 
 70 | 
 71 | **Experiment 1**:
 72 | 
 73 | The limitation of offline tracking is that the offline models need to be trained with large amounts of data, which is hard to find in publicly available object tracking benchmarks. Even considering the whole 100 videos of OTB100 [[2]], the amount is still smaller than that of image recognition tasks by order of magnitudes. Therefore trackers are prone to over-fitting. 
 74 | 
 75 | In order to test the generalization ability of ROLO, we conduct experiment 1. 
 76 | Training on 22 videos and testing on the rest 8 videos of OTB30, the model is able to outperform all the traditional trackers from the benchmark [[2]].
 77 | 
 78 | 
 79 | We also test on 3 additional videos that are not selected for OTB30, as their ground truth is face but not human body. Since face is not included in the default YOLO model, YOLO will detect human body instead and ROLO will be supervised to track the human body. 
 80 | Demo videos are available here.
 81 | [Video 1](https://www.youtube.com/watch?v=7dDsvVEt4ak),
 82 | [Video 2](https://www.youtube.com/watch?v=w7Bxf4guddg),
 83 | [Video 3](https://www.youtube.com/watch?v=qElDUVmYSpY).
 84 | 
 85 | <iframe width="420" height="315" src="https://www.youtube.com/embed/7dDsvVEt4ak" frameborder="0" allowfullscreen></iframe>
 86 | 
 87 | To reproduce experiment 1：
 88 | 
 89 | - Training: 
 90 | 
 91 | 	```
 92 | 	python ./experiments/training/ROLO_step6_train_20_exp1.py
 93 | 	```
 94 | 
 95 | - Testing: 
 96 | 
 97 | 	```
 98 | 	python ./experiments/testing/ROLO_network_test_all.py
 99 | 	```
100 | 
101 | **Experiment 2**:
102 | 
103 | If the model is inevitably trained with limited data, one way to remedy this is to train the model with similar dynamics. (Same strategy is used by trackers that employ online model updating). We train a 2nd LSTM model with the first 1/3 frames of OTB30 and test on the rest frames. Results show that performance has improved. We find that, once trained on auxiliary frames with the similar dynamics, ROLO will perform better on testing sequences. This attribute makes ROLO especially useful in surveillance environments, where models can be trained offline with pre-captured data. 
104 | 
105 | To reproduce experiment 2：
106 | 
107 | - Training:
108 | 
109 | 	```
110 | 	python ./experiments/training/ROLO_step6_train_30_exp2.py
111 | 	```
112 | - Testing:
113 | 	```
114 | 	python ./experiments/testing/ROLO_network_test_all.py
115 | 	```
116 | 
117 | 
118 | **Experiment 3**:
119 | 
120 | Considering this attribute observed in experiment 2, we experiment incrementing training frames.
121 | Training with full frames but using only 1/3 ground truths will give an additional boost to the performance. 
122 | 
123 | To reproduce experiment 3：
124 | 
125 | - Training:
126 | 
127 | 	```
128 | 	python ./experiments/training/ROLO_step6_train_30_exp3.py
129 | 	```
130 | - Testing:
131 | 	```
132 | 	python ./experiments/testing/ROLO_network_test_all.py
133 | 	```
134 | 
135 | **Limitations**
136 | 
137 | Note that experiment 2&3 use 1/3 training frames. Upon evaluation, we should exclude these frames. Note also that using different frames from the same video sequences for training and testing can still be problematic. An online updating scheme for ROLO will be very useful in the future. 
138 | 
139 | We will update experiments using customized YOLO models, in order to be able to detect arbitrary objects and therefore test on the whole OTB100 dataset, where we will also be able to train and test on different datasets to perform cross-validation.
140 | 
141 | **Parameter Sensitivity**
142 | 
143 | Repeat experiment 2 with different step sizes: [1, 3, 6, 9]
144 | 
145 | 
146 | ```
147 | python ./experiments/testing/ROLO_step1_train_30_exp2.py
148 | ```
149 | 
150 | ```
151 | python ./experiments/testing/ROLO_step3_train_30_exp2.py
152 | ```
153 | 
154 | ```
155 | python ./experiments/testing/ROLO_step6_train_30_exp2.py
156 | ```
157 | 
158 | ```
159 | python ./experiments/testing/ROLO_step9_train_30_exp2.py
160 | ```
161 | 
162 | ![](http://guanghan.info/projects/ROLO/fps_over_steps.png)
163 | ![](http://guanghan.info/projects/ROLO/IOU_over_steps.png)
164 | 
165 | ### 4. Visualization with Heatmap 
166 | 
167 | - Demo:
168 | 	```
169 | 	python ./ROLO_demo_heat.py
170 | 	```
171 | - Training:
172 | 	```
173 | 	python ./heatmap/ROLO_heatmap_train.py
174 | 	```
175 | - Testing:
176 | 	```
177 | 	python ./heatmap/ROLO_heatmap_test.py
178 | 	```
179 | 
180 | ![](http://guanghan.info/projects/ROLO/heatmap_small1.png)
181 | ![](http://guanghan.info/projects/ROLO/heatmap_small2.png)
182 | - Blue: YOLO detection
183 | - Red: Ground Truth
184 | 
185 | ### 5. Performance Evaluation 
186 | 
187 | 	python ./ROLO_evaluation.py
188 | 
189 | 
190 | ### 6. Results
191 | 
192 | More Qualitative results can be found in the project page.  Quantitative results please refer to the arxiv paper.
193 | 
194 | ![](http://guanghan.info/projects/ROLO/occlusion.jpeg)
195 | ![](http://guanghan.info/projects/ROLO/occlusion2.jpeg)
196 | 
197 | - Blue: YOLO detection
198 | - Green: ROLO Tracking
199 | - Red: Ground Truth
200 | 
201 | 
202 | ---
203 | ## License
204 | 
205 | ROLO is released under the Apache License Version 2.0 (refer to the LICENSE file for details).
206 | 
207 | ---
208 | ## Citation
209 | The details are published as a technical report on arXiv. If you use the code and models, please cite the following paper:
210 | [arXiv:1607.05781](http://arxiv.org/abs/1607.05781).
211 | 
212 | 	@article{ning2016spatially,
213 | 	  title={Spatially Supervised Recurrent Convolutional Neural Networks for Visual Object Tracking},
214 | 	  author={Ning, Guanghan and Zhang, Zhi and Huang, Chen and He, Zhihai and Ren, Xiaobo and Wang, Haohong},
215 | 	  journal={arXiv preprint arXiv:1607.05781},
216 | 	  year={2016}
217 | 	}
218 | 
219 | 
220 | ---
221 | ## Reference
222 | [[1]] Redmon, Joseph, et al. "You only look once: Unified, real-time object detection." CVPR (2016).
223 | 
224 | [1]: http://arxiv.org/pdf/1506.02640.pdf "YOLO"
225 | 
226 | [[2]] Wu, Yi, Jongwoo Lim, and Ming-Hsuan Yang. "Object tracking benchmark." IEEE Transactions on Pattern Analysis and Machine Intelligence 37.9 (2015): 1834-1848.
227 | 
228 | [2]: http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=7001050&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D7001050 "OTB100"
229 | 


--------------------------------------------------------------------------------
/ROLO_demo_heat.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | '''
 16 | Script File: ROLO_demo_heat.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | 
 27 | import cv2
 28 | import os
 29 | import numpy as np
 30 | import sys
 31 | import ROLO_utils as utils
 32 | '''----------------------------------------main-----------------------------------------------------'''
 33 | def main(argv):
 34 |     ''' PARAMETERS '''
 35 |     num_steps= 6
 36 |     test = 91
 37 | 
 38 |     [wid, ht, sequence_name, dummy_1, dummy_2] = utils.choose_video_sequence(test)
 39 | 
 40 |     img_fold_path = os.path.join('benchmark/DATA', sequence_name, 'img/')
 41 |     gt_file_path= os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
 42 |     yolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
 43 |     rolo_heat_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_heat_test/')
 44 |     rolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/')
 45 | 
 46 |     paths_imgs = utils.load_folder( img_fold_path)
 47 |     paths_rolo= utils.load_folder( rolo_out_path)
 48 |     lines = utils.load_dataset_gt( gt_file_path)
 49 | 
 50 |     # Define the codec and create VideoWriter object
 51 |     fourcc= cv2.cv.CV_FOURCC(*'DIVX')
 52 |     video_name = sequence_name + '_test.avi'
 53 |     video_path = os.path.join('output/videos/', video_name)
 54 |     video = cv2.VideoWriter(video_path, fourcc, 20, (wid, ht))
 55 | 
 56 |     total= 0
 57 |     rolo_avgloss= 0
 58 |     for i in range(len(paths_rolo)- num_steps):
 59 |         id= i + 1
 60 |         test_id= id + num_steps - 2  #* num_steps + 1
 61 | 
 62 |         path = paths_imgs[test_id]
 63 |         img = utils.file_to_img(path)
 64 | 
 65 |         if(img is None): break
 66 | 
 67 |         yolo_location= utils.find_yolo_location(yolo_out_path, test_id)
 68 |         yolo_location= utils.locations_normal( wid, ht, yolo_location)
 69 | 
 70 |         heatmap_vec= utils.find_rolo_location(rolo_heat_path, test_id)
 71 |         heatmap = utils.heatmap_vec_to_heatmap(None, heatmap_vec)
 72 |         utils.draw_heatmap(None, heatmap)
 73 | 
 74 |         rolo_location= utils.find_rolo_location( rolo_out_path, test_id)
 75 |         rolo_location = utils.locations_normal( wid, ht, rolo_location)
 76 |         print(rolo_location)
 77 | 
 78 |         gt_location = utils.find_gt_location( lines, test_id - 1)
 79 |         #gt_location= locations_from_0_to_1(None, 480, 640, gt_location)
 80 |         #gt_location = locations_normal(None, 480, 640, gt_location)
 81 |         print('gt: ' + str(test_id))
 82 |         print(gt_location)
 83 | 
 84 |         frame = utils.debug_2_locations( img, gt_location, yolo_location)
 85 |         video.write(frame)
 86 | 
 87 |         utils.createFolder(os.path.join('output/frames/',sequence_name))
 88 |         frame_name= os.path.join('output/frames/',sequence_name,str(test_id)+'.jpg')
 89 |         print(frame_name)
 90 |         cv2.imwrite(frame_name, frame)
 91 |         #cv2.imshow('frame',frame)
 92 |         #cv2.waitKey(100)
 93 | 
 94 |         rolo_loss = utils.cal_rolo_IOU(rolo_location, gt_location)
 95 |         rolo_avgloss += rolo_loss
 96 |         total += 1
 97 | 
 98 |     rolo_avgloss /= total
 99 |     print("rolo_avg_iou = ", rolo_avgloss)
100 |     video.release()
101 |     cv2.destroyAllWindows()
102 | 
103 | 
104 | 
105 | if __name__=='__main__':
106 | 	main(sys.argv)
107 | 


--------------------------------------------------------------------------------
/ROLO_demo_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | '''
 16 | Script File: ROLO_demo_test.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | import cv2
 27 | import os
 28 | import numpy as np
 29 | import sys
 30 | import ROLO_utils as utils
 31 | '''----------------------------------------main-----------------------------------------------------'''
 32 | def main(argv):
 33 |     ''' PARAMETERS '''
 34 |     num_steps= 6
 35 |     test = 11
 36 | 
 37 |     [wid, ht, sequence_name, dummy_1, dummy_2] = utils.choose_video_sequence(test)
 38 | 
 39 |     img_fold_path = os.path.join('benchmark/DATA', sequence_name, 'img/')
 40 |     gt_file_path= os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
 41 |     yolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
 42 |     rolo_out_path= os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/')
 43 | 
 44 |     paths_imgs = utils.load_folder( img_fold_path)
 45 |     paths_rolo= utils.load_folder( rolo_out_path)
 46 |     lines = utils.load_dataset_gt( gt_file_path)
 47 | 
 48 |     # Define the codec and create VideoWriter object
 49 |     fourcc= cv2.cv.CV_FOURCC(*'DIVX')
 50 |     video_name = sequence_name + '_test.avi'
 51 |     video_path = os.path.join('output/videos/', video_name)
 52 |     video = cv2.VideoWriter(video_path, fourcc, 20, (wid, ht))
 53 | 
 54 |     total= 0
 55 |     rolo_avgloss= 0
 56 |     yolo_avgloss= 0
 57 |     for i in range(len(paths_rolo)- num_steps):
 58 |         id= i + 1
 59 |         test_id= id + num_steps - 2  #* num_steps + 1
 60 | 
 61 |         path = paths_imgs[test_id]
 62 |         img = utils.file_to_img(path)
 63 | 
 64 |         if(img is None): break
 65 | 
 66 |         yolo_location= utils.find_yolo_location(yolo_out_path, test_id)
 67 |         yolo_location= utils.locations_normal( wid, ht, yolo_location)
 68 |         print(yolo_location)
 69 | 
 70 |         rolo_location= utils.find_rolo_location( rolo_out_path, test_id)
 71 |         rolo_location = utils.locations_normal( wid, ht, rolo_location)
 72 |         print(rolo_location)
 73 | 
 74 |         gt_location = utils.find_gt_location( lines, test_id - 1)
 75 |         #gt_location= locations_from_0_to_1(None, 480, 640, gt_location)
 76 |         #gt_location = locations_normal(None, 480, 640, gt_location)
 77 |         print('gt: ' + str(test_id))
 78 |         print(gt_location)
 79 | 
 80 |         frame = utils.debug_3_locations( img, gt_location, yolo_location, rolo_location)
 81 |         video.write(frame)
 82 | 
 83 |         utils.createFolder(os.path.join('output/frames/',sequence_name))
 84 |         frame_name= os.path.join('output/frames/',sequence_name,str(test_id)+'.jpg')
 85 |         print(frame_name)
 86 |         cv2.imwrite(frame_name, frame)
 87 |         #cv2.imshow('frame',frame)
 88 |         #cv2.waitKey(100)
 89 | 
 90 |         rolo_loss = utils.cal_rolo_IOU(rolo_location, gt_location)
 91 |         rolo_avgloss += rolo_loss
 92 |         yolo_loss=  utils.cal_yolo_IOU(yolo_location, gt_location)
 93 |         yolo_avgloss += yolo_loss
 94 |         total += 1
 95 | 
 96 |     rolo_avgloss /= total
 97 |     yolo_avgloss /= total
 98 |     print("yolo_avg_iou = ", yolo_avgloss)
 99 |     print("rolo_avg_iou = ", rolo_avgloss)
100 |     video.release()
101 |     cv2.destroyAllWindows()
102 | 
103 | 
104 | 
105 | if __name__=='__main__':
106 | 	main(sys.argv)
107 | 


--------------------------------------------------------------------------------
/experiments/testing/ROLO_network_test_all.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_network_test_all.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = True
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     disp_console = True
 51 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 52 |     alpha = 0.1
 53 |     threshold = 0.2
 54 |     iou_threshold = 0.5
 55 |     num_class = 20
 56 |     num_box = 2
 57 |     grid_size = 7
 58 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 59 |     w_img, h_img = [352, 240]
 60 | 
 61 |     # ROLO Network Parameters
 62 |     rolo_weights_file = 'null'
 63 |     # rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_30.ckpt'
 64 |     lstm_depth = 3
 65 |     num_steps = 3  # number of frames as an input sequence
 66 |     num_feat = 4096
 67 |     num_predict = 6 # final output of LSTM 6 loc parameters
 68 |     num_gt = 4
 69 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 70 | 
 71 |     # ROLO Parameters
 72 |     batch_size = 1
 73 |     display_step = 1
 74 | 
 75 |     # tf Graph input
 76 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 77 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 78 |     y = tf.placeholder("float32", [None, num_gt])
 79 | 
 80 |     # Define weights
 81 |     weights = {
 82 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 83 |     }
 84 |     biases = {
 85 |         'out': tf.Variable(tf.random_normal([num_predict]))
 86 |     }
 87 | 
 88 | 
 89 |     def __init__(self,argvs = []):
 90 |         print("ROLO init")
 91 |         self.ROLO(argvs)
 92 | 
 93 | 
 94 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 95 |         with tf.device('/gpu:0'):
 96 |             # input shape: (batch_size, n_steps, n_input)
 97 |             _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
 98 |             # Reshape to prepare input to hidden activation
 99 |             _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
100 |             # Split data because rnn cell needs a list of inputs for the RNN inner loop
101 |             _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
102 | 
103 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
104 |         state = _istate
105 |         for step in range(self.num_steps):
106 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
107 |             tf.get_variable_scope().reuse_variables()
108 |         return outputs
109 | 
110 | 
111 |     # Experiment with dropout
112 |     def dropout_features(self, feature, prob):
113 |         num_drop = int(prob * 4096)
114 |         drop_index = random.sample(xrange(4096), num_drop)
115 |         for i in range(len(drop_index)):
116 |             index = drop_index[i]
117 |             feature[index] = 0
118 |         return feature
119 |     '''---------------------------------------------------------------------------------------'''
120 |     def build_networks(self):
121 |         if self.disp_console : print "Building ROLO graph..."
122 |         # Build rolo layers
123 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
124 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
125 |         self.sess = tf.Session()
126 |         self.sess.run(tf.initialize_all_variables())
127 |         self.saver = tf.train.Saver()
128 |         #self.saver.restore(self.sess, self.rolo_weights_file)
129 |         if self.disp_console : print "Loading complete!" + '\n'
130 | 
131 | 
132 |     def testing(self, x_path, y_path):
133 |         total_loss = 0
134 |         # Use rolo_input for LSTM training
135 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
136 |         #print("pred: ", pred)
137 |         self.pred_location = pred[0][:, 4097:4101]
138 |         #print("pred_location: ", self.pred_location)
139 |         #print("self.y: ", self.y)
140 |         self.correct_prediction = tf.square(self.pred_location - self.y)
141 |         #print("self.correct_prediction: ", self.correct_prediction)
142 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
143 |         #print("self.accuracy: ", self.accuracy)
144 |         #optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
145 | 
146 |         # Initializing the variables
147 |         init = tf.initialize_all_variables()
148 |         # Launch the graph
149 |         with tf.Session() as sess:
150 | 
151 |             if (self.restore_weights == True):
152 |                 sess.run(init)
153 |                 self.saver.restore(sess, self.rolo_weights_file)
154 |                 print "Loading complete!" + '\n'
155 |             else:
156 |                 sess.run(init)
157 | 
158 |             id = 0 #don't change this
159 |             total_time = 0.0
160 |             #id= 1
161 | 
162 |             # Keep training until reach max iterations
163 |             while id < self.testing_iters - self.num_steps:
164 |                 # Load training data & ground truth
165 |                 batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
166 | 
167 |                 # Apply dropout to batch_xs
168 |                 #for item in range(len(batch_xs)):
169 |                 #    batch_xs[item] = self.dropout_features(batch_xs[item], 0.4)
170 | 
171 |                 batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
172 |                 batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
173 | 
174 |                 # Reshape data to get 3 seq of 5002 elements
175 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
176 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
177 |                 #print("Batch_ys: ", batch_ys)
178 | 
179 |                 start_time = time.time()
180 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
181 |                 cycle_time = time.time() - start_time
182 |                 total_time += cycle_time
183 | 
184 |                 #print("ROLO Pred: ", pred_location)
185 |                 #print("len(pred) = ", len(pred_location))
186 |                 #print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
187 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
188 | 
189 |                 # Save pred_location to file
190 |                 utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
191 | 
192 |                 #sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
193 | 
194 |                 if id % self.display_step == 0:
195 |                     # Calculate batch loss
196 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
197 |                     #print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
198 |                     total_loss += loss
199 |                 id += 1
200 |                 #print(id)
201 | 
202 |             #print "Testing Finished!"
203 |             avg_loss = total_loss/id
204 |             print "Avg loss: " + str(avg_loss)
205 |             print "Time Spent on Tracking: " + str(total_time)
206 |             print "fps: " + str(id/total_time)
207 |             #save_path = self.saver.save(sess, self.rolo_weights_file)
208 |             #print("Model saved in file: %s" % save_path)
209 | 
210 |         return None
211 | 
212 | 
213 |     def ROLO(self, argvs):
214 | 
215 |             self.rolo_utils= utils.ROLO_utils()
216 |             self.rolo_utils.loadCfg()
217 |             self.params = self.rolo_utils.params
218 | 
219 |             arguments = self.rolo_utils.argv_parser(argvs)
220 | 
221 |             if self.rolo_utils.flag_train is True:
222 |                 self.training(utils.x_path, utils.y_path)
223 |             elif self.rolo_utils.flag_track is True:
224 |                 self.build_networks()
225 |                 self.track_from_file(utils.file_in_path)
226 |             elif self.rolo_utils.flag_detect is True:
227 |                 self.build_networks()
228 |                 self.detect_from_file(utils.file_in_path)
229 |             else:
230 |                 print "Default: running ROLO test."
231 |                 self.build_networks()
232 | 
233 |                 evaluate_st = 0
234 |                 evaluate_ed = 29
235 | 
236 |                 for test in range(evaluate_st, evaluate_ed + 1):
237 | 
238 |                     [self.w_img, self.h_img, sequence_name, dummy_1, self.testing_iters] = utils.choose_video_sequence(test)
239 | 
240 |                     x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
241 |                     y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
242 |                     self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/')
243 |                     utils.createFolder(self.output_path)
244 | 
245 |                     #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_nodrop_30_2.ckpt'  #no dropout
246 |                     #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_online.ckpt'
247 |                     #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT.ckpt'
248 |                     #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/MOLO/model_MOT_0.2.ckpt'
249 | 
250 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp0.ckpt'
251 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp1.ckpt'
252 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp2.ckpt'
253 | 
254 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp2.ckpt'
255 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step9_exp2.ckpt'
256 |                     #self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step1_exp2.ckpt'
257 | 
258 |                     self.rolo_weights_file= '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp1_old.ckpt'
259 | 
260 |                     self.num_steps = 3  # number of frames as an input sequence
261 |                     print("TESTING ROLO on video sequence: ", sequence_name)
262 |                     self.testing(x_path, y_path)
263 | 
264 | 
265 |     '''----------------------------------------main-----------------------------------------------------'''
266 | def main(argvs):
267 |         ROLO_TF(argvs)
268 | 
269 | 
270 | if __name__=='__main__':
271 |         main(' ')
272 | 
273 | 


--------------------------------------------------------------------------------
/experiments/testing/ROLO_network_test_single.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_network_test_single.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = True
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     disp_console = True
 51 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 52 |     alpha = 0.1
 53 |     threshold = 0.2
 54 |     iou_threshold = 0.5
 55 |     num_class = 20
 56 |     num_box = 2
 57 |     grid_size = 7
 58 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 59 |     w_img, h_img = [352, 240]
 60 | 
 61 |     # ROLO Network Parameters
 62 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_20.ckpt'
 63 |     # rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/model_dropout_30.ckpt'
 64 |     lstm_depth = 3
 65 |     num_steps = 3  # number of frames as an input sequence
 66 |     num_feat = 4096
 67 |     num_predict = 6 # final output of LSTM 6 loc parameters
 68 |     num_gt = 4
 69 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 70 | 
 71 |     # ROLO Parameters
 72 |     batch_size = 1
 73 |     display_step = 1
 74 | 
 75 |     # tf Graph input
 76 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 77 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 78 |     y = tf.placeholder("float32", [None, num_gt])
 79 | 
 80 |     # Define weights
 81 |     weights = {
 82 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 83 |     }
 84 |     biases = {
 85 |         'out': tf.Variable(tf.random_normal([num_predict]))
 86 |     }
 87 | 
 88 | 
 89 |     def __init__(self,argvs = []):
 90 |         print("ROLO init")
 91 |         self.ROLO(argvs)
 92 | 
 93 | 
 94 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 95 | 
 96 |         # input shape: (batch_size, n_steps, n_input)
 97 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
 98 |         # Reshape to prepare input to hidden activation
 99 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
100 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
101 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
102 |         #print("_X: ", _X)
103 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
104 |         state = _istate
105 |         for step in range(self.num_steps):
106 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
107 |             tf.get_variable_scope().reuse_variables()
108 |         #print("output: ", outputs)
109 |         #print("state: ", state)
110 |         return outputs
111 | 
112 | 
113 |         # Experiment with dropout
114 |     def dropout_features(self, feature, prob):
115 |         num_drop = int(prob * 4096)
116 |         drop_index = random.sample(xrange(4096), num_drop)
117 |         for i in range(len(drop_index)):
118 |             index = drop_index[i]
119 |             feature[index] = 0
120 |         return feature
121 |     '''---------------------------------------------------------------------------------------'''
122 |     def build_networks(self):
123 |         if self.disp_console : print "Building ROLO graph..."
124 | 
125 |         # Build rolo layers
126 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
127 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
128 |         self.sess = tf.Session()
129 |         self.sess.run(tf.initialize_all_variables())
130 |         self.saver = tf.train.Saver()
131 |         #self.saver.restore(self.sess, self.rolo_weights_file)
132 |         if self.disp_console : print "Loading complete!" + '\n'
133 | 
134 | 
135 |     def testing(self, x_path, y_path):
136 |         total_loss = 0
137 | 
138 |         print("TESTING ROLO...")
139 |         # Use rolo_input for LSTM training
140 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
141 |         print("pred: ", pred)
142 |         self.pred_location = pred[0][:, 4097:4101]
143 |         print("pred_location: ", self.pred_location)
144 |         print("self.y: ", self.y)
145 | 
146 |         self.correct_prediction = tf.square(self.pred_location - self.y)
147 |         print("self.correct_prediction: ", self.correct_prediction)
148 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
149 |         print("self.accuracy: ", self.accuracy)
150 |         #optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
151 | 
152 |         # Initializing the variables
153 |         init = tf.initialize_all_variables()
154 | 
155 |         # Launch the graph
156 |         with tf.Session() as sess:
157 | 
158 |             if (self.restore_weights == True):
159 |                 sess.run(init)
160 |                 self.saver.restore(sess, self.rolo_weights_file)
161 |                 print "Loading complete!" + '\n'
162 |             else:
163 |                 sess.run(init)
164 | 
165 |             id = 0 #don't change this
166 | 
167 |             # Keep training until reach max iterations
168 |             while id < self.testing_iters - self.num_steps:
169 |                 # Load training data & ground truth
170 |                 batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
171 | 
172 |                 # Apply dropout to batch_xs
173 |                 #for item in range(len(batch_xs)):
174 |                 #    batch_xs[item] = self.dropout_features(batch_xs[item], 0.4)
175 | 
176 |                 batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
177 |                 print("Batch_ys_initial: ", batch_ys)
178 |                 batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
179 | 
180 | 
181 |                 # Reshape data to get 3 seq of 5002 elements
182 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
183 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
184 |                 print("Batch_ys: ", batch_ys)
185 | 
186 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
187 |                 print("ROLO Pred: ", pred_location)
188 |                 #print("len(pred) = ", len(pred_location))
189 |                 print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
190 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
191 | 
192 |                 # Save pred_location to file
193 |                 utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
194 | 
195 |                 #sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
196 | 
197 |                 if id % self.display_step == 0:
198 |                     # Calculate batch loss
199 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
200 |                     print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
201 |                     total_loss += loss
202 |                 id += 1
203 |                 print(id)
204 | 
205 |             print "Testing Finished!"
206 |             avg_loss = total_loss/id
207 |             print "Avg loss: " + str(avg_loss)
208 |             #save_path = self.saver.save(sess, self.rolo_weights_file)
209 |             #print("Model saved in file: %s" % save_path)
210 | 
211 |         return None
212 | 
213 |     def ROLO(self, argvs):
214 | 
215 |             self.rolo_utils= utils.ROLO_utils()
216 |             self.rolo_utils.loadCfg()
217 |             self.params = self.rolo_utils.params
218 | 
219 |             arguments = self.rolo_utils.argv_parser(argvs)
220 | 
221 |             if self.rolo_utils.flag_train is True:
222 |                 self.training(utils.x_path, utils.y_path)
223 |             elif self.rolo_utils.flag_track is True:
224 |                 self.build_networks()
225 |                 self.track_from_file(utils.file_in_path)
226 |             elif self.rolo_utils.flag_detect is True:
227 |                 self.build_networks()
228 |                 self.detect_from_file(utils.file_in_path)
229 |             else:
230 |                 print "Default: running ROLO test."
231 |                 self.build_networks()
232 | 
233 |                 test= 8
234 |                 [self.w_img, self.h_img, sequence_name, dummy_1, self.testing_iters] = utils.choose_video_sequence(test)
235 | 
236 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
237 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
238 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_test/')
239 |                 utils.createFolder(self.output_path)
240 | 
241 |                 #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_20.ckpt'
242 |                 # self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_30.ckpt'
243 |                 #self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_dropout_30_2.ckpt'
244 |                 self.rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_30_2_nd_newfit.ckpt'
245 |                 self.testing(x_path, y_path)
246 | 
247 |     '''----------------------------------------main-----------------------------------------------------'''
248 | def main(argvs):
249 |         ROLO_TF(argvs)
250 | 
251 | if __name__=='__main__':
252 |         main(' ')
253 | 
254 | 


--------------------------------------------------------------------------------
/experiments/training/ROLO_step1_train_30_exp2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_step1_train_30_exp3.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = False
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 51 |     alpha = 0.1
 52 |     threshold = 0.2
 53 |     iou_threshold = 0.5
 54 |     num_class = 20
 55 |     num_box = 2
 56 |     grid_size = 7
 57 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 58 |     w_img, h_img = [352, 240]
 59 | 
 60 |     # ROLO Network Parameters
 61 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step1_exp2.ckpt'
 62 |     lstm_depth = 3
 63 |     num_steps = 1  # number of frames as an input sequence
 64 |     num_feat = 4096
 65 |     num_predict = 6 # final output of LSTM 6 loc parameters
 66 |     num_gt = 4
 67 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 68 | 
 69 |     # ROLO Training Parameters
 70 |     #learning_rate = 0.00001 #training
 71 |     learning_rate = 0.00001 #testing
 72 | 
 73 |     training_iters = 210#100000
 74 |     batch_size = 1 #128
 75 |     display_step = 1
 76 | 
 77 |     # tf Graph input
 78 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 79 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 80 |     y = tf.placeholder("float32", [None, num_gt])
 81 | 
 82 |     # Define weights
 83 |     weights = {
 84 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 85 |     }
 86 |     biases = {
 87 |         'out': tf.Variable(tf.random_normal([num_predict]))
 88 |     }
 89 | 
 90 | 
 91 |     def __init__(self,argvs = []):
 92 |         print("ROLO init")
 93 |         self.ROLO(argvs)
 94 | 
 95 | 
 96 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 97 | 
 98 |         # input shape: (batch_size, n_steps, n_input)
 99 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
100 |         # Reshape to prepare input to hidden activation
101 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
102 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
103 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
104 |         #print("_X: ", _X)
105 | 
106 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
107 |         state = _istate
108 |         for step in range(self.num_steps):
109 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
110 |             tf.get_variable_scope().reuse_variables()
111 | 
112 |         #print("output: ", outputs)
113 |         #print("state: ", state)
114 |         return outputs
115 | 
116 | 
117 |     # Experiment with dropout
118 |     def dropout_features(self, feature, prob):
119 |         if prob == 0: return feature
120 |         else:
121 |             num_drop = int(prob * 4096)
122 |             drop_index = random.sample(xrange(4096), num_drop)
123 |             for i in range(len(drop_index)):
124 |                 index = drop_index[i]
125 |                 feature[index] = 0
126 |             return feature
127 | 
128 |         # Experiment with input box noise (translate, scale)
129 |     def det_add_noise(self, det):
130 |         translate_rate = random.uniform(0.98, 1.02)
131 |         scale_rate = random.uniform(0.8, 1.2)
132 | 
133 |         det[0] *= translate_rate
134 |         det[1] *= translate_rate
135 |         det[2] *= scale_rate
136 |         det[3]*= scale_rate
137 | 
138 |         return det
139 | 
140 |     '''---------------------------------------------------------------------------------------'''
141 |     def build_networks(self):
142 |         if self.disp_console : print "Building ROLO graph..."
143 | 
144 |         # Build rolo layers
145 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
146 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
147 |         self.sess = tf.Session()
148 |         self.sess.run(tf.initialize_all_variables())
149 |         self.saver = tf.train.Saver()
150 |         #self.saver.restore(self.sess, self.rolo_weights_file)
151 |         if self.disp_console : print "Loading complete!" + '\n'
152 | 
153 | 
154 |     def training(self, x_path, y_path):
155 |         total_loss = 0
156 | 
157 |         if self.disp_console: print("TRAINING ROLO...")
158 |         # Use rolo_input for LSTM training
159 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
160 |         if self.disp_console: print("pred: ", pred)
161 |         self.pred_location = pred[0][:, 4097:4101]
162 |         if self.disp_console: print("pred_location: ", self.pred_location)
163 |         if self.disp_console: print("self.y: ", self.y)
164 | 
165 |         self.correct_prediction = tf.square(self.pred_location - self.y)
166 |         if self.disp_console: print("self.correct_prediction: ", self.correct_prediction)
167 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
168 |         if self.disp_console: print("self.accuracy: ", self.accuracy)
169 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
170 | 
171 |         # Initializing the variables
172 |         init = tf.initialize_all_variables()
173 | 
174 |         # Launch the graph
175 |         with tf.Session() as sess:
176 | 
177 |             if (self.restore_weights == True):
178 |                 sess.run(init)
179 |                 self.saver.restore(sess, self.rolo_weights_file)
180 |                 print "Loading complete!" + '\n'
181 |             else:
182 |                 sess.run(init)
183 | 
184 |             id = 0
185 | 
186 |             # Keep training until reach max iterations
187 |             while id * self.batch_size < self.training_iters:
188 |                 # Load training data & ground truth
189 |                 batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
190 |                 print('len(batch_xs)= ', len(batch_xs))
191 |                 # for item in range(len(batch_xs)):
192 | 
193 |                 batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id)
194 |                 batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
195 | 
196 |                 # Reshape data to get 3 seq of 5002 elements
197 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
198 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
199 |                 if self.disp_console: print("Batch_ys: ", batch_ys)
200 | 
201 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
202 |                 if self.disp_console: print("ROLO Pred: ", pred_location)
203 |                 #print("len(pred) = ", len(pred_location))
204 |                 if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
205 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
206 | 
207 |                 # Save pred_location to file
208 |                 utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size)
209 | 
210 |                 sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
211 |                 if id % self.display_step == 0:
212 |                     # Calculate batch loss
213 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
214 |                     if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
215 |                     total_loss += loss
216 |                 id += 1
217 |                 if self.disp_console: print(id)
218 | 
219 |                 # show 3 kinds of locations, compare!
220 | 
221 |             print "Optimization Finished!"
222 |             avg_loss = total_loss/id
223 |             print "Avg loss: " + str(avg_loss)
224 |             save_path = self.saver.save(sess, self.rolo_weights_file)
225 |             print("Model saved in file: %s" % save_path)
226 | 
227 |         return avg_loss
228 | 
229 | 
230 |     def train_30_2(self):
231 |         print("TRAINING ROLO...")
232 |         log_file = open("output/trainging-step1-exp2.txt", "a") #open in append mode
233 |         self.build_networks()
234 | 
235 |         ''' TUNE THIS'''
236 |         num_videos = 30
237 |         epoches = 30 * 200
238 | 
239 |         # Use rolo_input for LSTM training
240 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
241 |         self.pred_location = pred[0][:, 4097:4101]
242 |         self.correct_prediction = tf.square(self.pred_location - self.y)
243 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
244 |         self.learning_rate = 0.00001
245 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
246 | 
247 |         # Initializing the variables
248 |         init = tf.initialize_all_variables()
249 | 
250 |         # Launch the graph
251 |         with tf.Session() as sess:
252 |             if (self.restore_weights == True):
253 |                 sess.run(init)
254 |                 self.saver.restore(sess, self.rolo_weights_file)
255 |                 print "Loading complete!" + '\n'
256 |             else:
257 |                 sess.run(init)
258 | 
259 |             for epoch in range(epoches):
260 |                 i = epoch % num_videos
261 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i)
262 | 
263 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
264 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
265 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/')
266 |                 utils.createFolder(self.output_path)
267 |                 total_loss = 0
268 |                 id = 1
269 | 
270 |                 # Keep training until reach max iterations
271 |                 while id  < self.training_iters- self.num_steps:
272 |                     # Load training data & ground truth
273 |                     batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
274 | 
275 |                     # Apply dropout to batch_xs
276 |                     #for item in range(len(batch_xs)):
277 |                     #    batch_xs[item] = self.dropout_features(batch_xs[item], 0)
278 | 
279 |                     #print(id)
280 |                     batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
281 |                     batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
282 | 
283 |                     # Reshape data to get 3 seq of 5002 elements
284 |                     batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
285 |                     batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
286 |                     if self.disp_console: print("Batch_ys: ", batch_ys)
287 | 
288 |                     pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
289 |                     if self.disp_console: print("ROLO Pred: ", pred_location)
290 |                     #print("len(pred) = ", len(pred_location))
291 |                     if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
292 |                     #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
293 | 
294 |                     # Save pred_location to file
295 |                     utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
296 | 
297 |                     sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
298 |                     if id % self.display_step == 0:
299 |                         # Calculate batch loss
300 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
301 |                         if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
302 |                         total_loss += loss
303 |                     id += 1
304 |                     if self.disp_console: print(id)
305 | 
306 |                 #print "Optimization Finished!"
307 |                 avg_loss = total_loss/id
308 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
309 | 
310 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
311 |                 if i+1==num_videos:
312 |                     log_file.write('\n')
313 |                     save_path = self.saver.save(sess, self.rolo_weights_file)
314 |                     print("Model saved in file: %s" % save_path)
315 | 
316 |         log_file.close()
317 |         return
318 | 
319 | 
320 |     def ROLO(self, argvs):
321 | 
322 |             self.rolo_utils= utils.ROLO_utils()
323 |             self.rolo_utils.loadCfg()
324 |             self.params = self.rolo_utils.params
325 | 
326 |             arguments = self.rolo_utils.argv_parser(argvs)
327 | 
328 |             if self.rolo_utils.flag_train is True:
329 |                 self.training(utils.x_path, utils.y_path)
330 |             elif self.rolo_utils.flag_track is True:
331 |                 self.build_networks()
332 |                 self.track_from_file(utils.file_in_path)
333 |             elif self.rolo_utils.flag_detect is True:
334 |                 self.build_networks()
335 |                 self.detect_from_file(utils.file_in_path)
336 |             else:
337 |                 self.train_30_2()
338 | 
339 |     '''----------------------------------------main-----------------------------------------------------'''
340 | def main(argvs):
341 |         ROLO_TF(argvs)
342 | 
343 | if __name__=='__main__':
344 |         main(' ')
345 | 
346 | 


--------------------------------------------------------------------------------
/experiments/training/ROLO_step3_train_30_exp2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_network.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, invented by Guanghan Ning for object detection, tracking and predicting
 21 | 	(Paper: arXiv.org/)
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = False
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 51 |     alpha = 0.1
 52 |     threshold = 0.2
 53 |     iou_threshold = 0.5
 54 |     num_class = 20
 55 |     num_box = 2
 56 |     grid_size = 7
 57 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 58 |     w_img, h_img = [352, 240]
 59 | 
 60 |     # ROLO Network Parameters
 61 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step3_exp2.ckpt'
 62 |     lstm_depth = 3
 63 |     num_steps = 3  # number of frames as an input sequence
 64 |     num_feat = 4096
 65 |     num_predict = 6 # final output of LSTM 6 loc parameters
 66 |     num_gt = 4
 67 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 68 | 
 69 |     # ROLO Training Parameters
 70 |     #learning_rate = 0.00001 #training
 71 |     learning_rate = 0.00001 #testing
 72 | 
 73 |     training_iters = 210#100000
 74 |     batch_size = 1 #128
 75 |     display_step = 1
 76 | 
 77 |     # tf Graph input
 78 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 79 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 80 |     y = tf.placeholder("float32", [None, num_gt])
 81 | 
 82 |     # Define weights
 83 |     weights = {
 84 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 85 |     }
 86 |     biases = {
 87 |         'out': tf.Variable(tf.random_normal([num_predict]))
 88 |     }
 89 | 
 90 | 
 91 |     def __init__(self,argvs = []):
 92 |         print("ROLO init")
 93 |         self.ROLO(argvs)
 94 | 
 95 | 
 96 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 97 | 
 98 |         # input shape: (batch_size, n_steps, n_input)
 99 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
100 |         # Reshape to prepare input to hidden activation
101 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
102 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
103 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
104 |         #print("_X: ", _X)
105 | 
106 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
107 |         state = _istate
108 |         for step in range(self.num_steps):
109 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
110 |             tf.get_variable_scope().reuse_variables()
111 | 
112 |         #print("output: ", outputs)
113 |         #print("state: ", state)
114 |         return outputs
115 | 
116 | 
117 |     # Experiment with dropout
118 |     def dropout_features(self, feature, prob):
119 |         if prob == 0: return feature
120 |         else:
121 |             num_drop = int(prob * 4096)
122 |             drop_index = random.sample(xrange(4096), num_drop)
123 |             for i in range(len(drop_index)):
124 |                 index = drop_index[i]
125 |                 feature[index] = 0
126 |             return feature
127 | 
128 | 
129 |     '''---------------------------------------------------------------------------------------'''
130 |     def build_networks(self):
131 |         if self.disp_console : print "Building ROLO graph..."
132 | 
133 |         # Build rolo layers
134 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
135 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
136 |         self.sess = tf.Session()
137 |         self.sess.run(tf.initialize_all_variables())
138 |         self.saver = tf.train.Saver()
139 |         #self.saver.restore(self.sess, self.rolo_weights_file)
140 |         if self.disp_console : print "Loading complete!" + '\n'
141 | 
142 | 
143 |     def training(self, x_path, y_path):
144 |         total_loss = 0
145 | 
146 |         if self.disp_console: print("TRAINING ROLO...")
147 |         # Use rolo_input for LSTM training
148 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
149 |         if self.disp_console: print("pred: ", pred)
150 |         self.pred_location = pred[0][:, 4097:4101]
151 |         if self.disp_console: print("pred_location: ", self.pred_location)
152 |         if self.disp_console: print("self.y: ", self.y)
153 | 
154 |         self.correct_prediction = tf.square(self.pred_location - self.y)
155 |         if self.disp_console: print("self.correct_prediction: ", self.correct_prediction)
156 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
157 |         if self.disp_console: print("self.accuracy: ", self.accuracy)
158 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
159 | 
160 |         # Initializing the variables
161 |         init = tf.initialize_all_variables()
162 | 
163 |         # Launch the graph
164 |         with tf.Session() as sess:
165 | 
166 |             if (self.restore_weights == True):
167 |                 sess.run(init)
168 |                 self.saver.restore(sess, self.rolo_weights_file)
169 |                 print "Loading complete!" + '\n'
170 |             else:
171 |                 sess.run(init)
172 | 
173 |             id = 0
174 | 
175 |             # Keep training until reach max iterations
176 |             while id * self.batch_size < self.training_iters:
177 |                 # Load training data & ground truth
178 |                 batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
179 |                 print('len(batch_xs)= ', len(batch_xs))
180 |                 # for item in range(len(batch_xs)):
181 | 
182 |                 batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id)
183 |                 batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
184 | 
185 |                 # Reshape data to get 3 seq of 5002 elements
186 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
187 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
188 |                 if self.disp_console: print("Batch_ys: ", batch_ys)
189 | 
190 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
191 |                 if self.disp_console: print("ROLO Pred: ", pred_location)
192 |                 #print("len(pred) = ", len(pred_location))
193 |                 if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
194 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
195 | 
196 |                 # Save pred_location to file
197 |                 utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size)
198 | 
199 |                 sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
200 |                 if id % self.display_step == 0:
201 |                     # Calculate batch loss
202 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
203 |                     if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
204 |                     total_loss += loss
205 |                 id += 1
206 |                 if self.disp_console: print(id)
207 | 
208 |                 # show 3 kinds of locations, compare!
209 | 
210 |             print "Optimization Finished!"
211 |             avg_loss = total_loss/id
212 |             print "Avg loss: " + str(avg_loss)
213 |             save_path = self.saver.save(sess, self.rolo_weights_file)
214 |             print("Model saved in file: %s" % save_path)
215 | 
216 |         return avg_loss
217 | 
218 | 
219 |     def train_30_2(self):
220 |         print("TRAINING ROLO...")
221 |         log_file = open("output/trainging-30-2-log.txt", "a") #open in append mode
222 |         self.build_networks()
223 | 
224 |         ''' TUNE THIS'''
225 |         num_videos = 30
226 |         epoches = 30 * 300
227 | 
228 |         # Use rolo_input for LSTM training
229 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
230 |         self.pred_location = pred[0][:, 4097:4101]
231 |         self.correct_prediction = tf.square(self.pred_location - self.y)
232 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
233 |         self.learning_rate = 0.00001
234 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
235 | 
236 |         # Initializing the variables
237 |         init = tf.initialize_all_variables()
238 | 
239 |         # Launch the graph
240 |         with tf.Session() as sess:
241 |             if (self.restore_weights == True):
242 |                 sess.run(init)
243 |                 self.saver.restore(sess, self.rolo_weights_file)
244 |                 print "Loading complete!" + '\n'
245 |             else:
246 |                 sess.run(init)
247 | 
248 |             for epoch in range(epoches):
249 |                 i = epoch % num_videos
250 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i)
251 | 
252 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
253 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
254 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/')
255 |                 utils.createFolder(self.output_path)
256 |                 total_loss = 0
257 |                 id = 0
258 | 
259 |                 # Keep training until reach max iterations
260 |                 while id  < self.training_iters- self.num_steps:
261 |                     # Load training data & ground truth
262 |                     batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
263 | 
264 |                     # Apply dropout to batch_xs
265 |                     #for item in range(len(batch_xs)):
266 |                     #    batch_xs[item] = self.dropout_features(batch_xs[item], 0)
267 | 
268 |                     #print(id)
269 |                     batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
270 |                     batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
271 | 
272 |                     # Reshape data to get 3 seq of 5002 elements
273 |                     batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
274 |                     batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
275 |                     if self.disp_console: print("Batch_ys: ", batch_ys)
276 | 
277 |                     pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
278 |                     if self.disp_console: print("ROLO Pred: ", pred_location)
279 |                     #print("len(pred) = ", len(pred_location))
280 |                     if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
281 |                     #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
282 | 
283 |                     # Save pred_location to file
284 |                     utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
285 | 
286 |                     sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
287 |                     if id % self.display_step == 0:
288 |                         # Calculate batch loss
289 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
290 |                         if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
291 |                         total_loss += loss
292 |                     id += 1
293 |                     if self.disp_console: print(id)
294 | 
295 |                 #print "Optimization Finished!"
296 |                 avg_loss = total_loss/id
297 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
298 | 
299 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
300 |                 if i+1==num_videos:
301 |                     log_file.write('\n')
302 |                     save_path = self.saver.save(sess, self.rolo_weights_file)
303 |                     print("Model saved in file: %s" % save_path)
304 | 
305 |         log_file.close()
306 |         return
307 | 
308 | 
309 |     def ROLO(self, argvs):
310 | 
311 |             self.rolo_utils= utils.ROLO_utils()
312 |             self.rolo_utils.loadCfg()
313 |             self.params = self.rolo_utils.params
314 | 
315 |             arguments = self.rolo_utils.argv_parser(argvs)
316 | 
317 |             if self.rolo_utils.flag_train is True:
318 |                 self.training(utils.x_path, utils.y_path)
319 |             elif self.rolo_utils.flag_track is True:
320 |                 self.build_networks()
321 |                 self.track_from_file(utils.file_in_path)
322 |             elif self.rolo_utils.flag_detect is True:
323 |                 self.build_networks()
324 |                 self.detect_from_file(utils.file_in_path)
325 |             else:
326 |                 self.train_30_2()
327 | 
328 |     '''----------------------------------------main-----------------------------------------------------'''
329 | def main(argvs):
330 |         ROLO_TF(argvs)
331 | 
332 | if __name__=='__main__':
333 |         main(' ')
334 | 
335 | 


--------------------------------------------------------------------------------
/experiments/training/ROLO_step6_train_20_exp1.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_step6_train_20_exp1.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = False
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 51 |     alpha = 0.1
 52 |     threshold = 0.2
 53 |     iou_threshold = 0.5
 54 |     num_class = 20
 55 |     num_box = 2
 56 |     grid_size = 7
 57 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 58 |     w_img, h_img = [352, 240]
 59 | 
 60 |     # ROLO Network Parameters
 61 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp1.ckpt' 
 62 |     lstm_depth = 3
 63 |     num_steps = 6  # number of frames as an input sequence
 64 |     num_feat = 4096
 65 |     num_predict = 6 # final output of LSTM 6 loc parameters
 66 |     num_gt = 4
 67 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 68 | 
 69 |     # ROLO Training Parameters
 70 |     #learning_rate = 0.00001 #training
 71 |     learning_rate = 0.00001 #testing
 72 | 
 73 |     training_iters = 210#100000
 74 |     batch_size = 1 #128
 75 |     display_step = 1
 76 | 
 77 |     # tf Graph input
 78 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 79 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 80 |     y = tf.placeholder("float32", [None, num_gt])
 81 | 
 82 |     # Define weights
 83 |     weights = {
 84 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 85 |     }
 86 |     biases = {
 87 |         'out': tf.Variable(tf.random_normal([num_predict]))
 88 |     }
 89 | 
 90 | 
 91 |     def __init__(self,argvs = []):
 92 |         print("ROLO init")
 93 |         self.ROLO(argvs)
 94 | 
 95 | 
 96 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 97 | 
 98 |         # input shape: (batch_size, n_steps, n_input)
 99 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
100 |         # Reshape to prepare input to hidden activation
101 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
102 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
103 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
104 |         #print("_X: ", _X)
105 | 
106 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
107 |         state = _istate
108 |         for step in range(self.num_steps):
109 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
110 |             tf.get_variable_scope().reuse_variables()
111 | 
112 |         #print("output: ", outputs)
113 |         #print("state: ", state)
114 |         return outputs
115 | 
116 | 
117 |     # Experiment with dropout
118 |     def dropout_features(self, feature, prob):
119 |         num_drop = int(prob * 4096)
120 |         drop_index = random.sample(xrange(4096), num_drop)
121 |         for i in range(len(drop_index)):
122 |             index = drop_index[i]
123 |             feature[index] = 0
124 |         return feature
125 | 
126 | 
127 |     '''---------------------------------------------------------------------------------------'''
128 |     def build_networks(self):
129 |         if self.disp_console : print "Building ROLO graph..."
130 | 
131 |         # Build rolo layers
132 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
133 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
134 |         self.sess = tf.Session()
135 |         self.sess.run(tf.initialize_all_variables())
136 |         self.saver = tf.train.Saver()
137 |         #self.saver.restore(self.sess, self.rolo_weights_file)
138 |         if self.disp_console : print "Loading complete!" + '\n'
139 | 
140 | 
141 |     def training(self, x_path, y_path):
142 |         total_loss = 0
143 | 
144 |         if self.disp_console: print("TRAINING ROLO...")
145 |         # Use rolo_input for LSTM training
146 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
147 |         if self.disp_console: print("pred: ", pred)
148 |         self.pred_location = pred[0][:, 4097:4101]
149 |         if self.disp_console: print("pred_location: ", self.pred_location)
150 |         if self.disp_console: print("self.y: ", self.y)
151 | 
152 |         self.correct_prediction = tf.square(self.pred_location - self.y)
153 |         if self.disp_console: print("self.correct_prediction: ", self.correct_prediction)
154 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
155 |         if self.disp_console: print("self.accuracy: ", self.accuracy)
156 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
157 | 
158 |         # Initializing the variables
159 |         init = tf.initialize_all_variables()
160 | 
161 |         # Launch the graph
162 |         with tf.Session() as sess:
163 | 
164 |             if (self.restore_weights == True):
165 |                 sess.run(init)
166 |                 self.saver.restore(sess, self.rolo_weights_file)
167 |                 print "Loading complete!" + '\n'
168 |             else:
169 |                 sess.run(init)
170 | 
171 |             id = 0
172 | 
173 |             # Keep training until reach max iterations
174 |             while id * self.batch_size < self.training_iters:
175 |                 # Load training data & ground truth
176 |                 batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
177 |                 print('len(batch_xs)= ', len(batch_xs))
178 |                 # for item in range(len(batch_xs)):
179 | 
180 |                 batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id)
181 |                 batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
182 | 
183 |                 # Reshape data to get 3 seq of 5002 elements
184 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
185 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
186 |                 if self.disp_console: print("Batch_ys: ", batch_ys)
187 | 
188 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
189 |                 if self.disp_console: print("ROLO Pred: ", pred_location)
190 |                 #print("len(pred) = ", len(pred_location))
191 |                 if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
192 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
193 | 
194 |                 # Save pred_location to file
195 |                 utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size)
196 | 
197 |                 sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
198 |                 if id % self.display_step == 0:
199 |                     # Calculate batch loss
200 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
201 |                     if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
202 |                     total_loss += loss
203 |                 id += 1
204 |                 if self.disp_console: print(id)
205 | 
206 |                 # show 3 kinds of locations, compare!
207 | 
208 |             print "Optimization Finished!"
209 |             avg_loss = total_loss/id
210 |             print "Avg loss: " + str(avg_loss)
211 |             save_path = self.saver.save(sess, self.rolo_weights_file)
212 |             print("Model saved in file: %s" % save_path)
213 | 
214 |         return avg_loss
215 | 
216 | 
217 |     def train_20(self):
218 |         print("TRAINING ROLO...")
219 |         log_file = open("output/trainging-20-log.txt", "a") #open in append mode
220 |         self.build_networks()
221 | 
222 |         ''' TUNE THIS'''
223 |         num_videos = 20
224 |         epoches = 20 * 100
225 | 
226 |         # Use rolo_input for LSTM training
227 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
228 |         self.pred_location = pred[0][:, 4097:4101]
229 |         self.correct_prediction = tf.square(self.pred_location - self.y)
230 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
231 |         self.learning_rate = 0.00001
232 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
233 | 
234 |         # Initializing the variables
235 |         init = tf.initialize_all_variables()
236 | 
237 |         # Launch the graph
238 |         with tf.Session() as sess:
239 |             if (self.restore_weights == True):
240 |                 sess.run(init)
241 |                 self.saver.restore(sess, self.rolo_weights_file)
242 |                 print "Loading complete!" + '\n'
243 |             else:
244 |                 sess.run(init)
245 | 
246 |             for epoch in range(epoches):
247 |                 i = epoch % num_videos
248 |                 [self.w_img, self.h_img, sequence_name, dummy, self.training_iters]= utils.choose_video_sequence(i)
249 | 
250 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
251 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
252 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/')
253 |                 utils.createFolder(self.output_path)
254 |                 total_loss = 0
255 |                 id = 0
256 | 
257 |                 # Keep training until reach max iterations
258 |                 while id  < self.training_iters- self.num_steps:
259 |                     # Load training data & ground truth
260 |                     batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
261 | 
262 |                     # Apply dropout to batch_xs
263 |                     #for item in range(len(batch_xs)):
264 |                     #    batch_xs[item] = self.dropout_features(batch_xs[item], 0.4)
265 | 
266 |                     #print(id)
267 |                     batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
268 |                     batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
269 | 
270 |                     # Reshape data to get 3 seq of 5002 elements
271 |                     batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
272 |                     batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
273 |                     if self.disp_console: print("Batch_ys: ", batch_ys)
274 | 
275 |                     pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
276 |                     if self.disp_console: print("ROLO Pred: ", pred_location)
277 |                     #print("len(pred) = ", len(pred_location))
278 |                     if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
279 |                     #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
280 | 
281 |                     # Save pred_location to file
282 |                     utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size)
283 | 
284 |                     sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
285 |                     if id % self.display_step == 0:
286 |                         # Calculate batch loss
287 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
288 |                         if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
289 |                         total_loss += loss
290 |                     id += 1
291 |                     if self.disp_console: print(id)
292 | 
293 |                 #print "Optimization Finished!"
294 |                 avg_loss = total_loss/id
295 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
296 | 
297 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
298 |                 if i+1==num_videos:
299 |                     log_file.write('\n')
300 |                     save_path = self.saver.save(sess, self.rolo_weights_file)
301 |                     print("Model saved in file: %s" % save_path)
302 | 
303 |         log_file.close()
304 |         return
305 | 
306 | 
307 |     def ROLO(self, argvs):
308 | 
309 |             self.rolo_utils= utils.ROLO_utils()
310 |             self.rolo_utils.loadCfg()
311 |             self.params = self.rolo_utils.params
312 | 
313 |             arguments = self.rolo_utils.argv_parser(argvs)
314 | 
315 |             if self.rolo_utils.flag_train is True:
316 |                 self.training(utils.x_path, utils.y_path)
317 |             elif self.rolo_utils.flag_track is True:
318 |                 self.build_networks()
319 |                 self.track_from_file(utils.file_in_path)
320 |             elif self.rolo_utils.flag_detect is True:
321 |                 self.build_networks()
322 |                 self.detect_from_file(utils.file_in_path)
323 |             else:
324 |                 self.train_20()
325 | 
326 |     '''----------------------------------------main-----------------------------------------------------'''
327 | def main(argvs):
328 |         ROLO_TF(argvs)
329 | 
330 | if __name__=='__main__':
331 |         main(' ')
332 | 
333 | 


--------------------------------------------------------------------------------
/experiments/training/ROLO_step6_train_30_exp2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_step6_train_30_exp3.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = False
 41 |     restore_weights = True#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 51 |     alpha = 0.1
 52 |     threshold = 0.2
 53 |     iou_threshold = 0.5
 54 |     num_class = 20
 55 |     num_box = 2
 56 |     grid_size = 7
 57 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 58 |     w_img, h_img = [352, 240]
 59 | 
 60 |     # ROLO Network Parameters
 61 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp2.ckpt'
 62 |     lstm_depth = 3
 63 |     num_steps = 6  # number of frames as an input sequence
 64 |     num_feat = 4096
 65 |     num_predict = 6 # final output of LSTM 6 loc parameters
 66 |     num_gt = 4
 67 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 68 | 
 69 |     # ROLO Training Parameters
 70 |     learning_rate = 0.00001 
 71 | 
 72 |     training_iters = 210#100000
 73 |     batch_size = 1 #128
 74 |     display_step = 1
 75 | 
 76 |     # tf Graph input
 77 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 78 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 79 |     y = tf.placeholder("float32", [None, num_gt])
 80 | 
 81 |     # Define weights
 82 |     weights = {
 83 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 84 |     }
 85 |     biases = {
 86 |         'out': tf.Variable(tf.random_normal([num_predict]))
 87 |     }
 88 | 
 89 | 
 90 |     def __init__(self,argvs = []):
 91 |         print("ROLO init")
 92 |         self.ROLO(argvs)
 93 | 
 94 | 
 95 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
 96 | 
 97 |         # input shape: (batch_size, n_steps, n_input)
 98 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
 99 |         # Reshape to prepare input to hidden activation
100 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
101 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
102 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
103 |         #print("_X: ", _X)
104 | 
105 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
106 |         state = _istate
107 |         for step in range(self.num_steps):
108 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
109 |             tf.get_variable_scope().reuse_variables()
110 |         return outputs
111 | 
112 | 
113 |     # Experiment with dropout
114 |     def dropout_features(self, feature, prob):
115 |         if prob == 0: return feature
116 |         else:
117 |             num_drop = int(prob * 4096)
118 |             drop_index = random.sample(xrange(4096), num_drop)
119 |             for i in range(len(drop_index)):
120 |                 index = drop_index[i]
121 |                 feature[index] = 0
122 |             return feature
123 | 
124 |         # Experiment with input box noise (translate, scale)
125 |     def det_add_noise(self, det):
126 |         translate_rate = random.uniform(0.98, 1.02)
127 |         scale_rate = random.uniform(0.8, 1.2)
128 | 
129 |         det[0] *= translate_rate
130 |         det[1] *= translate_rate
131 |         det[2] *= scale_rate
132 |         det[3]*= scale_rate
133 | 
134 |         return det
135 | 
136 |     '''---------------------------------------------------------------------------------------'''
137 |     def build_networks(self):
138 |         if self.disp_console : print "Building ROLO graph..."
139 | 
140 |         # Build rolo layers
141 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
142 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
143 |         self.sess = tf.Session()
144 |         self.sess.run(tf.initialize_all_variables())
145 |         self.saver = tf.train.Saver()
146 |         #self.saver.restore(self.sess, self.rolo_weights_file)
147 |         if self.disp_console : print "Loading complete!" + '\n'
148 | 
149 | 
150 |     def training(self, x_path, y_path):
151 |         total_loss = 0
152 | 
153 |         if self.disp_console: print("TRAINING ROLO...")
154 |         # Use rolo_input for LSTM training
155 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
156 |         if self.disp_console: print("pred: ", pred)
157 |         self.pred_location = pred[0][:, 4097:4101]
158 |         if self.disp_console: print("pred_location: ", self.pred_location)
159 |         if self.disp_console: print("self.y: ", self.y)
160 | 
161 |         self.correct_prediction = tf.square(self.pred_location - self.y)
162 |         if self.disp_console: print("self.correct_prediction: ", self.correct_prediction)
163 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
164 |         if self.disp_console: print("self.accuracy: ", self.accuracy)
165 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
166 | 
167 |         # Initializing the variables
168 |         init = tf.initialize_all_variables()
169 | 
170 |         # Launch the graph
171 |         with tf.Session() as sess:
172 | 
173 |             if (self.restore_weights == True):
174 |                 sess.run(init)
175 |                 self.saver.restore(sess, self.rolo_weights_file)
176 |                 print "Loading complete!" + '\n'
177 |             else:
178 |                 sess.run(init)
179 | 
180 |             id = 0
181 | 
182 |             # Keep training until reach max iterations
183 |             while id * self.batch_size < self.training_iters:
184 |                 # Load training data & ground truth
185 |                 batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
186 |                 print('len(batch_xs)= ', len(batch_xs))
187 |                 # for item in range(len(batch_xs)):
188 | 
189 |                 batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, id)
190 |                 batch_ys = self.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
191 | 
192 |                 # Reshape data to get 3 seq of 5002 elements
193 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
194 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
195 |                 if self.disp_console: print("Batch_ys: ", batch_ys)
196 | 
197 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
198 |                 if self.disp_console: print("ROLO Pred: ", pred_location)
199 |                 #print("len(pred) = ", len(pred_location))
200 |                 if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
201 |                 #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
202 | 
203 |                 # Save pred_location to file
204 |                 utils.save_rolo_output(self.output_path, pred_location, id, self.num_steps, self.batch_size)
205 | 
206 |                 sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
207 |                 if id % self.display_step == 0:
208 |                     # Calculate batch loss
209 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
210 |                     if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
211 |                     total_loss += loss
212 |                 id += 1
213 |                 if self.disp_console: print(id)
214 | 
215 |                 # show 3 kinds of locations, compare!
216 | 
217 |             print "Optimization Finished!"
218 |             avg_loss = total_loss/id
219 |             print "Avg loss: " + str(avg_loss)
220 |             save_path = self.saver.save(sess, self.rolo_weights_file)
221 |             print("Model saved in file: %s" % save_path)
222 | 
223 |         return avg_loss
224 | 
225 | 
226 |     def train_30_2(self):
227 |         print("TRAINING ROLO...")
228 |         log_file = open("output/trainging-step6-30-2-log.txt", "a") #open in append mode
229 |         self.build_networks()
230 | 
231 |         ''' TUNE THIS'''
232 |         num_videos = 30
233 |         epoches = 30 * 300
234 | 
235 |         # Use rolo_input for LSTM training
236 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
237 |         self.pred_location = pred[0][:, 4097:4101]
238 |         self.correct_prediction = tf.square(self.pred_location - self.y)
239 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
240 |         self.learning_rate = 0.00001
241 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
242 | 
243 |         # Initializing the variables
244 |         init = tf.initialize_all_variables()
245 | 
246 |         # Launch the graph
247 |         with tf.Session() as sess:
248 |             if (self.restore_weights == True):
249 |                 sess.run(init)
250 |                 self.saver.restore(sess, self.rolo_weights_file)
251 |                 print "Loading complete!" + '\n'
252 |             else:
253 |                 sess.run(init)
254 | 
255 |             for epoch in range(epoches):
256 |                 i = epoch % num_videos
257 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i)
258 | 
259 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
260 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
261 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/')
262 |                 utils.createFolder(self.output_path)
263 |                 total_loss = 0
264 |                 id = 0
265 | 
266 |                 # Keep training until reach max iterations
267 |                 while id  < self.training_iters- self.num_steps:
268 |                     # Load training data & ground truth
269 |                     batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id) # [num_of_examples, num_input] (depth == 1)
270 | 
271 |                     # Apply dropout to batch_xs
272 |                     #for item in range(len(batch_xs)):
273 |                     #    batch_xs[item] = self.dropout_features(batch_xs[item], 0)
274 | 
275 |                     #print(id)
276 |                     batch_ys = self.rolo_utils.load_rolo_gt_test(y_path, self.batch_size, self.num_steps, id)
277 |                     batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
278 | 
279 |                     # Reshape data to get 3 seq of 5002 elements
280 |                     batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
281 |                     batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
282 |                     if self.disp_console: print("Batch_ys: ", batch_ys)
283 | 
284 |                     pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
285 |                     if self.disp_console: print("ROLO Pred: ", pred_location)
286 |                     #print("len(pred) = ", len(pred_location))
287 |                     if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
288 |                     #print("correct_prediction int: ", (pred_location + 0.1).astype(int))
289 | 
290 |                     # Save pred_location to file
291 |                     utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
292 | 
293 |                     sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
294 |                     if id % self.display_step == 0:
295 |                         # Calculate batch loss
296 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
297 |                         if self.disp_console: print "Iter " + str(id*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
298 |                         total_loss += loss
299 |                     id += 1
300 |                     if self.disp_console: print(id)
301 | 
302 |                 #print "Optimization Finished!"
303 |                 avg_loss = total_loss/id
304 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
305 | 
306 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
307 |                 if i+1==num_videos:
308 |                     log_file.write('\n')
309 |                     save_path = self.saver.save(sess, self.rolo_weights_file)
310 |                     print("Model saved in file: %s" % save_path)
311 | 
312 |         log_file.close()
313 |         return
314 | 
315 | 
316 |     def ROLO(self, argvs):
317 | 
318 |             self.rolo_utils= utils.ROLO_utils()
319 |             self.rolo_utils.loadCfg()
320 |             self.params = self.rolo_utils.params
321 | 
322 |             arguments = self.rolo_utils.argv_parser(argvs)
323 | 
324 |             if self.rolo_utils.flag_train is True:
325 |                 self.training(utils.x_path, utils.y_path)
326 |             elif self.rolo_utils.flag_track is True:
327 |                 self.build_networks()
328 |                 self.track_from_file(utils.file_in_path)
329 |             elif self.rolo_utils.flag_detect is True:
330 |                 self.build_networks()
331 |                 self.detect_from_file(utils.file_in_path)
332 |             else:
333 |                 self.train_30_2()
334 | 
335 |     '''----------------------------------------main-----------------------------------------------------'''
336 | def main(argvs):
337 |         ROLO_TF(argvs)
338 | 
339 | if __name__=='__main__':
340 |         main(' ')
341 | 
342 | 


--------------------------------------------------------------------------------
/experiments/training/ROLO_step6_train_30_exp3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) <2016> <GUANGHAN NING>. All Rights Reserved.
  2 |  
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License. 
 14 | 
 15 | '''
 16 | Script File: ROLO_step6_train_30_exp3.py
 17 | 
 18 | Description:
 19 | 
 20 | 	ROLO is short for Recurrent YOLO, aimed at simultaneous object detection and tracking
 21 | 	Paper: http://arxiv.org/abs/1607.05781
 22 | 	Author: Guanghan Ning
 23 | 	Webpage: http://guanghan.info/
 24 | '''
 25 | 
 26 | # Imports
 27 | import ROLO_utils as utils
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.models.rnn import rnn, rnn_cell
 31 | import cv2
 32 | 
 33 | import numpy as np
 34 | import os.path
 35 | import time
 36 | import random
 37 | 
 38 | 
 39 | class ROLO_TF:
 40 |     disp_console = False
 41 |     restore_weights = False#False
 42 | 
 43 |     # YOLO parameters
 44 |     fromfile = None
 45 |     tofile_img = 'test/output.jpg'
 46 |     tofile_txt = 'test/output.txt'
 47 |     imshow = True
 48 |     filewrite_img = False
 49 |     filewrite_txt = False
 50 |     yolo_weights_file = 'weights/YOLO_small.ckpt'
 51 |     alpha = 0.1
 52 |     threshold = 0.2
 53 |     iou_threshold = 0.5
 54 |     num_class = 20
 55 |     num_box = 2
 56 |     grid_size = 7
 57 |     classes =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 58 |     w_img, h_img = [352, 240]
 59 | 
 60 |     # ROLO Network Parameters
 61 |     rolo_weights_file = '/u03/Guanghan/dev/ROLO-dev/output/ROLO_model/model_step6_exp3.ckpt' 
 62 |     lstm_depth = 3
 63 |     num_steps = 6  # number of frames as an input sequence
 64 |     num_feat = 4096
 65 |     num_predict = 6 # final output of LSTM 6 loc parameters
 66 |     num_gt = 4
 67 |     num_input = num_feat + num_predict # data input: 4096+6= 5002
 68 | 
 69 |     # ROLO Training Parameters
 70 |     learning_rate = 0.00001 
 71 | 
 72 |     training_iters = 210
 73 |     batch_size = 1 #128
 74 |     display_step = 1
 75 | 
 76 |     # tf Graph input
 77 |     x = tf.placeholder("float32", [None, num_steps, num_input])
 78 |     istate = tf.placeholder("float32", [None, 2*num_input]) #state & cell => 2x num_input
 79 |     y = tf.placeholder("float32", [None, num_gt])
 80 | 
 81 |     # Define weights
 82 |     weights = {
 83 |         'out': tf.Variable(tf.random_normal([num_input, num_predict]))
 84 |     }
 85 |     biases = {
 86 |         'out': tf.Variable(tf.random_normal([num_predict]))
 87 |     }
 88 | 
 89 | 
 90 |     def __init__(self,argvs = []):
 91 |         print("ROLO init")
 92 |         self.ROLO(argvs)
 93 | 
 94 | 
 95 |     def createFolder(self, path):
 96 | 		if not os.path.exists(path):
 97 | 			os.makedirs(path)
 98 | 
 99 | 
100 |     def LSTM_single(self, name,  _X, _istate, _weights, _biases):
101 | 
102 |         # input shape: (batch_size, n_steps, n_input)
103 |         _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
104 |         # Reshape to prepare input to hidden activation
105 |         _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
106 |         # Split data because rnn cell needs a list of inputs for the RNN inner loop
107 |         _X = tf.split(0, self.num_steps, _X) # n_steps * (batch_size, num_input)
108 |         #print("_X: ", _X)
109 | 
110 |         cell = tf.nn.rnn_cell.LSTMCell(self.num_input, self.num_input)
111 |         state = _istate
112 |         for step in range(self.num_steps):
113 |             outputs, state = tf.nn.rnn(cell, [_X[step]], state)
114 |             tf.get_variable_scope().reuse_variables()
115 |         return outputs
116 | 
117 | 
118 |     # Experiment with dropout
119 |     def dropout_features(self, feature, prob):
120 |         num_drop = int(prob * 4096)
121 |         drop_index = random.sample(xrange(4096), num_drop)
122 |         for i in range(len(drop_index)):
123 |             index = drop_index[i]
124 |             feature[index] = 0
125 |         return feature
126 | 
127 | 
128 |     '''---------------------------------------------------------------------------------------'''
129 |     def build_networks(self):
130 |         if self.disp_console : print "Building ROLO graph..."
131 | 
132 |         # Build rolo layers
133 |         self.lstm_module = self.LSTM_single('lstm_test', self.x, self.istate, self.weights, self.biases)
134 |         self.ious= tf.Variable(tf.zeros([self.batch_size]), name="ious")
135 |         self.sess = tf.Session()
136 |         self.sess.run(tf.initialize_all_variables())
137 |         self.saver = tf.train.Saver()
138 |         #self.saver.restore(self.sess, self.rolo_weights_file)
139 |         if self.disp_console : print "Loading complete!" + '\n'
140 | 
141 | 
142 |     def training(self, x_path, y_path):
143 |         total_loss = 0
144 | 
145 |         if self.disp_console: print("TRAINING ROLO...")
146 |         # Use rolo_input for LSTM training
147 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
148 |         if self.disp_console: print("pred: ", pred)
149 |         self.pred_location = pred[0][:, 4097:4101]
150 |         if self.disp_console: print("pred_location: ", self.pred_location)
151 |         if self.disp_console: print("self.y: ", self.y)
152 | 
153 |         self.correct_prediction = tf.square(self.pred_location - self.y)
154 |         if self.disp_console: print("self.correct_prediction: ", self.correct_prediction)
155 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
156 |         if self.disp_console: print("self.accuracy: ", self.accuracy)
157 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
158 | 
159 |         # Initializing the variables
160 |         init = tf.initialize_all_variables()
161 | 
162 |         # Launch the graph
163 |         with tf.Session() as sess:
164 | 
165 |             if (self.restore_weights == True):
166 |                 sess.run(init)
167 |                 self.saver.restore(sess, self.rolo_weights_file)
168 |                 print "Loading complete!" + '\n'
169 |             else:
170 |                 sess.run(init)
171 | 
172 |             step = 0
173 | 
174 |             # Keep training until reach max iterations
175 |             while step * self.batch_size < self.training_iters:
176 |                 # Load training data & ground truth
177 |                 batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, step) # [num_of_examples, num_input] (depth == 1)
178 |                 print('len(batch_xs)= ', len(batch_xs))
179 |                 # for item in range(len(batch_xs)):
180 | 
181 |                 batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, step)
182 |                 batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
183 | 
184 |                 # Reshape data to get 3 seq of 5002 elements
185 |                 batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
186 |                 batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
187 |                 if self.disp_console: print("Batch_ys: ", batch_ys)
188 | 
189 |                 pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
190 |                 if self.disp_console: print("ROLO Pred: ", pred_location)
191 |                 if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
192 | 
193 |                 # Save pred_location to file
194 |                 utils.save_rolo_output(self.output_path, pred_location, step, self.num_steps, self.batch_size)
195 | 
196 |                 sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
197 |                 if step % self.display_step == 0:
198 |                     # Calculate batch loss
199 |                     loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
200 |                     if self.disp_console: print "Iter " + str(step*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
201 |                     total_loss += loss
202 |                 step += 1
203 |                 if self.disp_console: print(step)
204 |                 # show 3 kinds of locations, compare!
205 |             print "Optimization Finished!"
206 |             avg_loss = total_loss/step
207 |             print "Avg loss: " + str(avg_loss)
208 |             save_path = self.saver.save(sess, self.rolo_weights_file)
209 |             print("Model saved in file: %s" % save_path)
210 |         return avg_loss
211 | 
212 | 
213 |     def train_30(self):
214 |         print("TRAINING ROLO...")
215 |         log_file = open("output/trainging-30-log.txt", "a") #open in append mode
216 |         self.build_networks()
217 | 
218 |         ''' TUNE THIS'''
219 |         num_videos = 30
220 |         epoches = 30 * 200
221 | 
222 |         # Use rolo_input for LSTM training
223 |         pred = self.LSTM_single('lstm_train', self.x, self.istate, self.weights, self.biases)
224 |         self.pred_location = pred[0][:, 4097:4101]
225 |         self.correct_prediction = tf.square(self.pred_location - self.y)
226 |         self.accuracy = tf.reduce_mean(self.correct_prediction) * 100
227 |         self.learning_rate = 0.00001
228 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.accuracy) # Adam Optimizer
229 | 
230 |         # Initializing the variables
231 |         init = tf.initialize_all_variables()
232 | 
233 |         # Launch the graph
234 |         with tf.Session() as sess:
235 |             if (self.restore_weights == True):
236 |                 sess.run(init)
237 |                 self.saver.restore(sess, self.rolo_weights_file)
238 |                 print "Loading complete!" + '\n'
239 |             else:
240 |                 sess.run(init)
241 | 
242 |             for epoch in range(epoches):
243 |                 i = epoch % num_videos
244 |                 [self.w_img, self.h_img, sequence_name, self.training_iters, dummy]= utils.choose_video_sequence(i)
245 | 
246 |                 x_path = os.path.join('benchmark/DATA', sequence_name, 'yolo_out/')
247 |                 y_path = os.path.join('benchmark/DATA', sequence_name, 'groundtruth_rect.txt')
248 |                 self.output_path = os.path.join('benchmark/DATA', sequence_name, 'rolo_out_train/')
249 |                 self.createFolder(self.output_path)
250 |                 total_loss = 0
251 |                 step = 0
252 | 
253 |                 # Keep training until reach max iterations
254 |                 num_iters= self.training_iters * 3 / self.num_steps
255 |                 print num_iters
256 |                 while step * self.batch_size < num_iters:
257 |                     # Load training data & ground truth
258 |                     batch_xs = self.rolo_utils.load_yolo_output(x_path, self.batch_size, self.num_steps, step) # [num_of_examples, num_input] (depth == 1)
259 | 
260 |                     batch_ys = self.rolo_utils.load_rolo_gt(y_path, self.batch_size, self.num_steps, step)
261 |                     batch_ys = utils.locations_from_0_to_1(self.w_img, self.h_img, batch_ys)
262 | 
263 |                     # Reshape data to get 3 seq of 5002 elements
264 |                     batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
265 |                     batch_ys = np.reshape(batch_ys, [self.batch_size, 4])
266 |                     if self.disp_console: print("Batch_ys: ", batch_ys)
267 | 
268 |                     pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
269 |                     if self.disp_console: print("ROLO Pred: ", pred_location)
270 |                     
271 |                     if self.disp_console: print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
272 |                     # Save pred_location to file
273 |                     utils.save_rolo_output(self.output_path, pred_location, step, self.num_steps, self.batch_size)
274 | 
275 |                     sess.run(self.optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
276 |                     if step % self.display_step == 0:
277 |                         # Calculate batch loss
278 |                         loss = sess.run(self.accuracy, feed_dict={self.x: batch_xs, self.y: batch_ys, self.istate: np.zeros((self.batch_size, 2*self.num_input))})
279 |                         if self.disp_console: print "Iter " + str(step*self.batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) #+ "{:.5f}".format(self.accuracy)
280 |                         total_loss += loss
281 |                     step += 1
282 |                     if self.disp_console: print(step)
283 |                     # show 3 kinds of locations, compare!
284 |                 print "Optimization Finished!"
285 |                 avg_loss = total_loss/step
286 |                 print "Avg loss: " + sequence_name + ": " + str(avg_loss)
287 | 
288 |                 log_file.write(str("{:.3f}".format(avg_loss)) + '  ')
289 |                 if i+1==num_videos:
290 |                     log_file.write('\n')
291 |                     save_path = self.saver.save(sess, self.rolo_weights_file)
292 |                     print("Model saved in file: %s" % save_path)
293 |         log_file.close()
294 |         return
295 | 
296 | 
297 |     def ROLO(self, argvs):
298 |             self.rolo_utils= utils.ROLO_utils()
299 |             self.rolo_utils.loadCfg()
300 |             self.params = self.rolo_utils.params
301 |             arguments = self.rolo_utils.argv_parser(argvs)
302 |             if self.rolo_utils.flag_train is True:
303 |                 self.training(utils.x_path, utils.y_path)
304 |             elif self.rolo_utils.flag_track is True:
305 |                 self.build_networks()
306 |                 self.track_from_file(utils.file_in_path)
307 |             elif self.rolo_utils.flag_detect is True:
308 |                 self.build_networks()
309 |                 self.detect_from_file(utils.file_in_path)
310 |             else:
311 |                 self.train_30()
312 | 
313 | 
314 |     '''----------------------------------------main-----------------------------------------------------'''
315 | def main(argvs):
316 |         ROLO_TF(argvs)
317 | 
318 | if __name__=='__main__':
319 |         main(' ')
320 | 
321 | 


--------------------------------------------------------------------------------
/update/src/testing.py:
--------------------------------------------------------------------------------
 1 | from utils_dataset import *
 2 | from utils_draw_coord import debug_decimal_coord
 3 | from utils_io_folder import *
 4 | from utils_io_coord import *
 5 | 
 6 | def get_batch_by_repeat(ndarray, batchsize):
 7 |     batch_ndarray = []
 8 |     for id in range(batchsize):
 9 |         batch_ndarray.append(ndarray)
10 |     return batch_ndarray
11 | 
12 | 
13 | def test(self, sess, loss, batch_pred_coords):
14 |     print("\n\n\n--------------------------------------------TESTING OTB-50---------------------------------------------------------\n")
15 |     num_videos = 50
16 |     loss_dataset_total = 0
17 |     OTB_folder_path = "/home/ngh/dev/ROLO-dev/benchmark/DATA/"
18 | 
19 |     for video_id in range(num_videos):
20 |         if video_id in [1, 5, 16, 20, 21, 22, 23, 28, 30, 32, 36, 42, 43, 46]: continue
21 | 
22 |         [img_wid, img_ht, sequence_name, st_frame, self.training_iters] = choose_video_sequence_from_OTB50(video_id)
23 |         print('testing sequence: ', sequence_name)
24 | 
25 |         x_path = os.path.join(OTB_folder_path, sequence_name, 'yolo_out/')
26 |         y_path = os.path.join(OTB_folder_path, sequence_name, 'groundtruth_rect.txt')
27 |         self.output_path = os.path.join(OTB_folder_path, sequence_name, 'rolo_loc_test/')
28 |         create_folder(self.output_path)
29 | 
30 |         img_folder_path = os.path.join(OTB_folder_path, sequence_name, 'img/')
31 |         img_paths = get_immediate_childfile_paths(img_folder_path)
32 | 
33 |         loss_seq_total = frame_id = 0
34 |         offset_id = self.nsteps
35 | 
36 |         init_state_zeros = np.zeros((self.batchsize, 2*self.len_vec))
37 | 
38 |         while frame_id  < self.training_iters- self.nsteps:
39 | 
40 |             ''' The index start from zero, while the frame usually starts from one '''
41 |             st_id = st_frame - 1
42 |             if frame_id < st_id:
43 |                 frame_id += 1
44 |                 continue
45 | 
46 |             ''' Load input data & ground truth '''
47 |             xs = load_vecs_of_stepsize_in_numpy_folder(x_path,
48 |                                                        frame_id - st_id,
49 |                                                        self.nsteps)
50 |             ys = load_gt_decimal_coords_from_file(y_path,
51 |                                                   frame_id - st_id + offset_id,
52 |                                                   img_wid,
53 |                                                   img_ht)
54 | 
55 |             batch_xs = get_batch_by_repeat(xs, self.batchsize)
56 |             batch_ys = get_batch_by_repeat(ys, self.batchsize)
57 | 
58 |             batch_xs = np.reshape(batch_xs, [self.batchsize, self.nsteps, self.len_vec])
59 |             batch_ys = np.reshape(batch_ys, [self.batchsize, 4])
60 | 
61 |             ''' Save pred_location to file '''
62 |             #utils.save_rolo_output(self.output_path, pred_loc, id, self.nsteps, self.batchsize)
63 | 
64 |             init_state = init_state_zeros
65 |             #init_state = sess.run(self.final_state,
66 |             #                      feed_dict={self.x: batch_xs,
67 |             #                                 self.y: batch_ys,
68 |             #                                 self.istate: init_state_zeros})
69 |             batch_loss = sess.run(loss,
70 |                                   feed_dict={self.x: batch_xs,
71 |                                              self.y: batch_ys,
72 |                                              self.istate: init_state})
73 |             loss_seq_total += batch_loss
74 | 
75 |             if self.display_validate is True:
76 |                 coord_decimal_gt = sess.run(self.y,
77 |                                             feed_dict = {self.x: batch_xs,
78 |                                                          self.y: batch_ys,
79 |                                                          self.istate: init_state})
80 |                 coord_decimal_pred = sess.run(batch_pred_coords,
81 |                                               feed_dict = {self.x: batch_xs,
82 |                                                            self.y: batch_ys,
83 |                                                            self.istate: init_state}
84 |                                               )[0]
85 | 
86 |                 img = cv2.imread(img_paths[frame_id])
87 |                 debug_decimal_coord(img, coord_decimal_pred)
88 | 
89 |             frame_id += 1
90 | 
91 |         loss_seq_avg = loss_seq_total / frame_id
92 |         print "Avg loss for " + sequence_name + ": " + str(loss_seq_avg)
93 |         loss_dataset_total += loss_seq_avg
94 | 
95 |     print('Total loss of Dataset: %f \n', loss_dataset_total)
96 |     print("-----------------------------------------TESTING OTB-50 END---------------------------------------------------------\n\n\n")
97 |     return loss_dataset_total
98 | 


--------------------------------------------------------------------------------
/update/src/training.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | sys.path.append(os.path.abspath("../utils/"))
  3 | import time, random
  4 | 
  5 | from utils_io_coord import *
  6 | from utils_io_list import *
  7 | from utils_dataset import *
  8 | 
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | import matplotlib.pyplot as plt
 12 | import rnn, rnn_cell, cv2
 13 | 
 14 | from testing import test
 15 | 
 16 | class ROLO_TF:
 17 |     # Buttons
 18 |     validate = True
 19 |     validate_step = 1000
 20 |     display_validate = True
 21 |     save_step = 1000
 22 |     display_step = 1
 23 |     restore_weights = True
 24 |     display_coords = False
 25 |     display_regu = False
 26 | 
 27 |     # Magic numbers
 28 |     learning_rate = 0.0001
 29 |     lamda = 1.0
 30 | 
 31 |     # Path
 32 |     list_pairs_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list_0.npy'
 33 |     dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000'
 34 |     numpy_folder_name = 'VID_loc_gt'   # Alternatives: 'VID_loc_gt' and 'VID_loc'
 35 |     rolo_weights_file = '../rolo_weights.ckpt'
 36 |     rolo_current_save = '../rolo_weights_temp.ckpt'
 37 | 
 38 |     # Vector
 39 |     len_feat = 4096
 40 |     len_predict = 6
 41 |     len_coord = 4
 42 |     len_vec = 4102
 43 | 
 44 |     # Batch
 45 |     nsteps = 3
 46 |     batchsize = 16
 47 |     n_iters = 180000
 48 |     batch_offset = 0
 49 | 
 50 |     # Data
 51 |     x = tf.placeholder("float32", [None, nsteps, len_vec])
 52 |     y = tf.placeholder("float32", [None, len_coord])
 53 |     istate = tf.placeholder("float32", [None, 2*len_vec])
 54 |     list_batch_pairs = []
 55 | 
 56 |     # Initializing
 57 |     def __init__(self, argvs = []):
 58 |         print("ROLO Initializing")
 59 |         self.ROLO()
 60 | 
 61 | 
 62 |     # Routines: Data
 63 |     def load_training_list(self):
 64 |         self.list_batch_pairs = load_list_batch_pairs_from_numpy_file(self.list_pairs_numpy_file_path,
 65 |                                                                       self.batchsize)
 66 | 
 67 | 
 68 |     def load_batch(self, b_id):
 69 |         max_id = len(self.list_batch_pairs)
 70 |         if b_id <= max_id:
 71 |             batch_pairs = self.list_batch_pairs[b_id]
 72 |             batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs]
 73 | 
 74 |             batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs]
 75 |             batch_numpy_folder_paths = [os.path.join(self.dataset_annotation_folder_path,
 76 |                                                      subfolder_name,
 77 |                                                      self.numpy_folder_name)
 78 |                                         for subfolder_name in batch_subfolder_names]
 79 | 
 80 |             attempted_batch_yolovecs = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths,
 81 |                                                                              batch_frame_ids,
 82 |                                                                              self.batchsize,
 83 |                                                                              self.nsteps)
 84 |         if b_id > max_id or attempted_batch_yolovecs == -1:
 85 |             self.update_dataset_annotation_folder_path()
 86 |             self.batch_offset = self.iter_id
 87 |             self.load_training_list()
 88 |             attempted_batch_yolovecs = False
 89 |             batch_subfolder_names = []
 90 |             batch_frame_ids = []
 91 |         return [attempted_batch_yolovecs, batch_subfolder_names, batch_frame_ids]
 92 | 
 93 | 
 94 |     def update_dataset_annotation_folder_path(self):
 95 |         try:
 96 |             list_folder_path = list(self.dataset_annotation_folder_path)
 97 |             list_file_path = list(self.list_pairs_numpy_file_path)
 98 | 
 99 |             last_int = int(self.dataset_annotation_folder_path[-1])
100 |             new_int = (last_int + 1)%4
101 |             list_folder_path[-1] = str(new_int)
102 |             list_file_path[-5] = str(new_int)
103 | 
104 |             self.dataset_annotation_folder_path = ''.join(list_folder_path)
105 |             self.list_pairs_numpy_file_path = ''.join(list_file_path)
106 |             print(self.dataset_annotation_folder_path)
107 |             print(self.list_pairs_numpy_file_path)
108 |             print("Finished 1/4 of all data. Annotation folder updated")
109 |         except ValueError:
110 |             print("Error updating dataset annotation folder")
111 | 
112 | 
113 |     # Routines: Network
114 |     def LSTM(self, name,  _X, _istate):
115 |         ''' shape: (batchsize, nsteps, len_vec) '''
116 |         _X = tf.transpose(_X, [1, 0, 2])
117 |         ''' shape: (nsteps, batchsize, len_vec) '''
118 |         _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec])
119 |         ''' shape: n_steps * (batchsize, len_vec) '''
120 |         _X = tf.split(0, self.nsteps, _X)
121 | 
122 |         lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec, self.len_vec, state_is_tuple = False)
123 |         state = _istate
124 |         for step in range(self.nsteps):
125 |             pred, state = rnn.rnn(lstm_cell, [_X[step]], state, dtype=tf.float32)
126 |             tf.get_variable_scope().reuse_variables()
127 |             if step == 0:   output_state = state
128 | 
129 |         batch_pred_feats = pred[0][:, 0:4096]
130 |         batch_pred_coords = pred[0][:, 4097:4101]
131 |         return batch_pred_feats, batch_pred_coords, output_state
132 | 
133 | 
134 |     # Routines: Train & Test
135 |     def train(self):
136 |         ''' Network '''
137 |         batch_pred_feats, batch_pred_coords, self.final_state = self.LSTM('lstm', self.x, self.istate)
138 | 
139 |         ''' Loss: L2 '''
140 |         loss = tf.reduce_mean(tf.square(self.y - batch_pred_coords)) * 100
141 | 
142 |         ''' regularization term: L2 '''
143 |         regularization_term = tf.reduce_mean(tf.square(self.x[:, self.nsteps-1, 0:4096] - batch_pred_feats)) * 100
144 | 
145 |         ''' Optimizer '''
146 |         optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss  + self.lamda * regularization_term) # Adam Optimizer
147 | 
148 |         ''' Summary for tensorboard analysis '''
149 |         dataset_loss = -1
150 |         dataset_loss_best = 100
151 |         test_writer = tf.train.SummaryWriter('summary/test')
152 |         tf.scalar_summary('dataset_loss', dataset_loss)
153 |         summary_op = tf.merge_all_summaries()
154 | 
155 |         ''' Initializing the variables '''
156 |         init = tf.initialize_all_variables()
157 |         self.saver = tf.train.Saver()
158 |         batch_states = np.zeros((self.batchsize, 2*self.len_vec))
159 | 
160 |         ''' Launch the graph '''
161 |         with tf.Session() as sess:
162 |             if self.restore_weights == True and os.path.isfile(self.rolo_current_save):
163 |                 sess.run(init)
164 |                 self.saver.restore(sess, self.rolo_current_save)
165 |                 print("Weight loaded, finetuning")
166 |             else:
167 |                 sess.run(init)
168 |                 print("Training from scratch")
169 | 
170 |             self.load_training_list()
171 | 
172 |             for self.iter_id in range(self.n_iters):
173 |                 ''' Load training data & ground truth '''
174 |                 batch_id = self.iter_id - self.batch_offset
175 |                 [batch_vecs, batch_seq_names, batch_frame_ids] = self.load_batch(batch_id)
176 |                 if batch_vecs is False: continue
177 | 
178 |                 batch_xs = batch_vecs
179 |                 batch_ys = batchload_gt_decimal_coords_from_VID(self.dataset_annotation_folder_path,
180 |                                                                 batch_seq_names,
181 |                                                                 batch_frame_ids,
182 |                                                                 offset = self.nsteps - 1)
183 |                 if batch_ys is False: continue
184 | 
185 |                 ''' Reshape data '''
186 |                 batch_xs = np.reshape(batch_xs, [self.batchsize, self.nsteps, self.len_vec])
187 |                 batch_ys = np.reshape(batch_ys, [self.batchsize, 4])
188 | 
189 |                 ''' Update weights by back-propagation '''
190 |                 sess.run(optimizer, feed_dict={self.x: batch_xs,
191 |                                                self.y: batch_ys,
192 |                                                self.istate: batch_states})
193 | 
194 |                 if self.iter_id % self.display_step == 0:
195 |                     ''' Calculate batch loss '''
196 |                     batch_loss = sess.run(loss,
197 |                                           feed_dict={self.x: batch_xs,
198 |                                                      self.y: batch_ys,
199 |                                                      self.istate: batch_states})
200 |                     print("Batch loss for iteration %d: %.3f" % (self.iter_id, batch_loss))
201 | 
202 |                 if self.display_regu is True:
203 |                     ''' Caculate regularization term'''
204 |                     batch_regularization = sess.run(regularization_term,
205 |                                                     feed_dict={self.x: batch_xs,
206 |                                                                self.y: batch_ys,
207 |                                                                self.istate: batch_states})
208 |                     print("Batch regu for iteration %d: %.3f" % (self.iter_id, batch_regularization))
209 | 
210 |                 if self.display_coords is True:
211 |                     ''' Caculate predicted coordinates '''
212 |                     coords_predict = sess.run(batch_pred_coords,
213 |                                               feed_dict={self.x: batch_xs,
214 |                                                          self.y: batch_ys,
215 |                                                          self.istate: batch_states})
216 |                     print("predicted coords:" + str(coords_predict[0]))
217 |                     print("ground truth coords:" + str(batch_ys[0]))
218 | 
219 |                 ''' Save model '''
220 |                 if self.iter_id % self.save_step == 1:
221 |                     self.saver.save(sess, self.rolo_current_save)
222 |                     print("\n Model saved in file: %s" % self.rolo_current_save)
223 | 
224 |                 ''' Validation '''
225 |                 if self.validate == True and self.iter_id % self.validate_step == 0:
226 |                     dataset_loss = test(self, sess, loss, batch_pred_coords)
227 | 
228 |                     ''' Early-stop regularization '''
229 |                     if dataset_loss <= dataset_loss_best:
230 |                         dataset_loss_best = dataset_loss
231 |                         self.saver.save(sess, self.rolo_weights_file)
232 |                         print("\n Better Model saved in file: %s" % self.rolo_weights_file)
233 | 
234 |                     ''' Write summary for tensorboard '''
235 |                     summary = sess.run(summary_op, feed_dict={self.x: batch_xs,
236 |                                                               self.y: batch_ys,
237 |                                                               self.istate: batch_states})
238 |                     test_writer.add_summary(summary, self.iter_id)
239 |         return
240 | 
241 | 
242 |     def ROLO(self):
243 |         print("Initializing ROLO")
244 |         self.train()
245 |         print("Training Completed")
246 | 
247 | '''----------------------------------------main-----------------------------------------------------'''
248 | def main(argvs):
249 |     ROLO_TF(argvs)
250 | 
251 | if __name__ == "__main__":
252 |     main(' ')
253 | 


--------------------------------------------------------------------------------
/update/unit_test/test_all.py:
--------------------------------------------------------------------------------
 1 | import sys, os, shutil
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | 
 4 | def test_script(script_name):
 5 |     cmd = os.path.join(os.getcwd(), script_name)
 6 |     os.system('{} {}'.format('python', cmd))
 7 | 
 8 | def clean():
 9 |     shutil.rmtree('../temp_folder')
10 | 
11 | def main():
12 |     scripts = ['test_utils_natural_sort.py',
13 |                'test_utils_io_file.py',
14 |                'test_utils_io_folder.py',
15 |                'test_utils_io_coord.py',
16 |                'test_utils_io_list.py',
17 |                'test_utils_dataset.py',
18 |                'test_utils_convert_coord.py']
19 | 
20 |     for script in scripts:
21 |         test_script(script)
22 | 
23 |     clean()
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_convert_coord.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | from utils_io_file import *
 4 | from test_utils_io_folder import create_dummy_files_in_folder
 5 | from utils_convert_coord import *
 6 | import numpy as np
 7 | 
 8 | def test_coord_decimal_to_regular():
 9 |     [img_wid, img_ht] = [640, 480]
10 |     coord_decimal = [0.44312766, 0.64272517, 0.15378259, 0.27607924]
11 | 
12 |     coord_regular_converted = coord_decimal_to_regular(coord_decimal, img_wid, img_ht)
13 |     coord_decimal_converted = coord_regular_to_decimal(coord_regular_converted, img_wid, img_ht)
14 | 
15 |     print("\t decimal coords                 : " + str(coord_decimal))
16 |     print("\t decimal coords after conversion: " + str(coord_decimal_converted))
17 | 
18 |     loss = sum(abs(np.array(coord_decimal_converted) - np.array(coord_decimal)))
19 | 
20 |     if loss <= 0.004:
21 |         return True
22 |     else:
23 |         print("loss for decimal coords is: " + str(loss))
24 |         return False
25 | 
26 | 
27 | def test_coord_regular_to_decimal():
28 |     [img_wid, img_ht] = [640, 480]
29 |     coord_regular = [234, 242, 98, 132]
30 | 
31 |     coord_decimal_converted = coord_regular_to_decimal(coord_regular, img_wid, img_ht)
32 |     coord_regular_converted = coord_decimal_to_regular(coord_decimal_converted, img_wid, img_ht)
33 | 
34 |     print("\t regular coords                 : " + str(coord_regular))
35 |     print("\t regular coords after conversion: " + str(coord_regular_converted))
36 | 
37 |     loss = sum(abs(np.array(coord_regular_converted) - np.array(coord_regular)))
38 |     if loss <= 4:
39 |         return True
40 |     else:
41 |         print("loss for regular coordinates is: " + str(loss))
42 |         return False
43 | 
44 | 
45 | def main():
46 |     print("Testing: utils_convert_coord")
47 | 
48 |     passed = test_coord_decimal_to_regular()
49 |     if passed is False:
50 |         print("\t test_coord_decimal_to_regular failed")
51 | 
52 |     passed = test_coord_regular_to_decimal()
53 |     if passed is False:
54 |         print("\t test_coord_regular_to_decimal failed")
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_dataset.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | 
 4 | from utils_io_coord import *
 5 | from utils_io_list import *
 6 | from utils_dataset import batchload_gt_decimal_coords_from_VID
 7 | from utils_io_folder import create_folder
 8 | 
 9 | def test_batchload_gt_decimal_coords_from_VID():
10 |     VID_annotation_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000'
11 |     pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy'
12 |     batchsize = 8
13 |     nsteps = 3
14 |     offset = nsteps - 1
15 | 
16 |     list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize)
17 | 
18 |     for batch_pairs in list_batch_pairs[0:2]:
19 |         batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs]
20 |         batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs]
21 | 
22 |         batch_gt_decimal_coords = batchload_gt_decimal_coords_from_VID(VID_annotation_path, batch_subfolder_names, batch_frame_ids, offset)
23 | 
24 |         if batch_gt_decimal_coords is False:
25 |             return False
26 |         else:
27 |             return True
28 | 
29 | 
30 | def main():
31 |     print("Testing: utils_io_dataset")
32 | 
33 |     finished = test_batchload_gt_decimal_coords_from_VID()
34 |     if finished is not True:
35 |         print("test_batchload_gt_decimal_coords failed")
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_io_coord.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | 
 4 | from utils_io_coord import *
 5 | from utils_io_list import *
 6 | from utils_io_folder import create_folder
 7 | 
 8 | def test_batchload_yolovecs_from_numpy_folders():
 9 |     pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy'
10 |     dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000'
11 |     batchsize = 8
12 |     nsteps = 3
13 | 
14 |     list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize)
15 |     for batch_pairs in list_batch_pairs[0:10]:
16 |         batch_frame_ids = [batch_pair[1] for batch_pair in batch_pairs]
17 |         batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs]
18 |         batch_numpy_folder_paths = [os.path.join(dataset_annotation_folder_path, subfolder_name, 'VID_loc_gt') for subfolder_name in batch_subfolder_names]
19 | 
20 |         attempted_yolo_batch = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths, batch_frame_ids, batchsize, nsteps)
21 |         if attempted_yolo_batch is not False:
22 |             output_batch_vecs = attempted_yolo_batch
23 |     return True
24 | 
25 | 
26 | def test_save_vec_as_numpy():
27 |     output_folder_path = '../temp_folder'
28 |     create_folder(output_folder_path)
29 | 
30 |     pairs_list_numpy_file_path = '/home/ngh/dev/ROLO-TRACK/training_list/list.npy'
31 |     dataset_annotation_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000'
32 |     batchsize = 8
33 |     nsteps = 3
34 | 
35 |     list_batch_pairs = load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize)
36 |     for batch_pairs in list_batch_pairs[0:10]:
37 |         batch_frame_ids = [int(batch_pair[1]) for batch_pair in batch_pairs]
38 |         batch_subfolder_names = [batch_pair[0] for batch_pair in batch_pairs]
39 |         batch_numpy_folder_paths = [os.path.join(dataset_annotation_folder_path, subfolder_name, 'VID_loc_gt') for subfolder_name in batch_subfolder_names]
40 | 
41 |         attempted_yolo_batch = batchload_yolovecs_from_numpy_folders(batch_numpy_folder_paths, batch_frame_ids, batchsize, nsteps)
42 |         if attempted_yolo_batch is not False:
43 |             batch_output_vecs = attempted_yolo_batch
44 |             for id in range(batchsize):
45 |                 frame_id = batch_frame_ids[id]
46 |                 output_vec = batch_output_vecs[id][nsteps-1]
47 |                 save_vec_as_numpy_by_frame_id(output_folder_path, frame_id, output_vec)
48 |     return True
49 | 
50 | 
51 | def main():
52 |     print("Testing: utils_io_coord")
53 | 
54 |     finished = test_batchload_yolovecs_from_numpy_folders()
55 |     if finished is not True:
56 |         print("test_batchload_yolovecs_from_numpy_folder failed")
57 | 
58 |     finished = test_save_vec_as_numpy()
59 |     if finished is not True:
60 |         print("test_batchsave_vecs_as_numpy failed")
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_io_file.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | from utils_io_file import *
 4 | from test_utils_io_folder import create_dummy_files_in_folder
 5 | 
 6 | def test_validate_file_format():
 7 |     temp_folder = '../temp_folder'
 8 |     create_dummy_files_in_folder(temp_folder, file_format = 'txt')
 9 |     create_dummy_files_in_folder(temp_folder, file_format = 'png')
10 |     txt_file_path = os.path.join(temp_folder, '1.txt')
11 |     png_file_path = os.path.join(temp_folder, '1.png')
12 |     allowed_format = ['txt', 'jpg']
13 | 
14 |     expecting_true = validate_file_format(txt_file_path, allowed_format)
15 |     expecting_false = validate_file_format(png_file_path, allowed_format)
16 | 
17 |     if expecting_true is True and expecting_false is False:
18 |         return True
19 |     else:
20 |         return False
21 | 
22 | 
23 | def main():
24 |     print("Testing: utils_io_file")
25 | 
26 |     passed = test_validate_file_format()
27 |     if passed is False:
28 |         print("\t test_validate_file_format failed")
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_io_folder.py:
--------------------------------------------------------------------------------
  1 | import sys, os, io, shutil
  2 | sys.path.append(os.path.abspath("../utils/"))
  3 | from utils_io_folder import *
  4 | 
  5 | def test_create_folder():
  6 |     folder_path = "../temp_folder/"
  7 | 
  8 |     if os.path.exists(folder_path):
  9 |         shutil.rmtree(folder_path)
 10 |     create_folder(folder_path)
 11 | 
 12 |     if os.path.exists(folder_path):
 13 |         return True
 14 |     else:
 15 |         return False
 16 | 
 17 | 
 18 | def test_get_immediate_subfolder_paths():
 19 |     folder_path = '../temp_folder/'
 20 |     subfolder_paths = ['../temp_folder/subfolder_1/', '../temp_folder/subfolder_2']
 21 | 
 22 |     create_folder(folder_path)
 23 |     create_folder(subfolder_paths[0])
 24 |     create_folder(subfolder_paths[1])
 25 | 
 26 |     subfolder_paths_derived = get_immediate_subfolder_paths(folder_path)
 27 | 
 28 |     if set(subfolder_paths_derived).isdisjoint(subfolder_paths):
 29 |         return False
 30 |     else:
 31 |         return True
 32 | 
 33 | 
 34 | def test_get_immediate_subfolder_names():
 35 |     folder_path = '../temp_folder/'
 36 |     subfolder_paths = ['../temp_folder/subfolder_1/', '../temp_folder/subfolder_2']
 37 |     subfolder_names = ['subfolder_1', 'subfolder_2']
 38 | 
 39 |     create_folder(folder_path)
 40 |     create_folder(subfolder_paths[0])
 41 |     create_folder(subfolder_paths[1])
 42 | 
 43 |     subfolder_names_derived = get_immediate_subfolder_names(folder_path)
 44 | 
 45 |     if set(subfolder_names_derived).isdisjoint(subfolder_names):
 46 |         return False
 47 |     else:
 48 |         return True
 49 | 
 50 | 
 51 | def test_get_immediate_childfile_paths():
 52 |     temp_folder = '../temp_folder'
 53 |     create_dummy_files_in_folder(temp_folder)
 54 |     childfile_paths = [ os.path.join(temp_folder, (str(ct)+ '.txt')) for ct in range(10)]
 55 | 
 56 |     childfile_paths_derived = get_immediate_childfile_paths(temp_folder)
 57 |     shutil.rmtree(temp_folder)
 58 | 
 59 |     if set(childfile_paths_derived).isdisjoint(childfile_paths):
 60 |         return False
 61 |     else:
 62 |         return True
 63 | 
 64 | 
 65 | def test_get_immediate_childfile_names():
 66 |     temp_folder = '../temp_folder'
 67 |     create_dummy_files_in_folder(temp_folder)
 68 |     childfile_names = [(str(ct)+ '.txt') for ct in range(10)]
 69 | 
 70 |     childfile_names_derived = get_immediate_childfile_names(temp_folder)
 71 |     shutil.rmtree(temp_folder)
 72 | 
 73 |     if set(childfile_names_derived).isdisjoint(childfile_names):
 74 |         return False
 75 |     else:
 76 |         return True
 77 | 
 78 | 
 79 | def create_dummy_files_in_folder(temp_folder, file_format = 'txt'):
 80 |     create_folder(temp_folder)
 81 |     for ct in range(10):
 82 |         file_name = str(ct) + '.' + file_format
 83 |         file_path = os.path.join(temp_folder, file_name)
 84 |         with io.FileIO(file_path, "w") as file:
 85 |             file.write("Hello!")
 86 | 
 87 | 
 88 | def main():
 89 |     print("Testing: utils_io_folder")
 90 | 
 91 |     passed = test_create_folder()
 92 |     if passed is False:
 93 |         print("\t create_folder failed")
 94 | 
 95 |     passed = test_get_immediate_subfolder_names()
 96 |     if passed is False:
 97 |         print("\t get_immediate_subfolder_names failed")
 98 | 
 99 |     passed = test_get_immediate_subfolder_paths()
100 |     if passed is False:
101 |         print("\t get_immediate_childfile_paths failed")
102 | 
103 |     paseed = test_get_immediate_childfile_names()
104 |     if passed is False:
105 |         print("\t get immediate_childfile_names failed")
106 | 
107 |     passed = test_get_immediate_childfile_paths()
108 |     if passed is False:
109 |         print("\t get_immediate_childfile_paths failed")
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_io_folder.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Guanghan/ROLO/6612007e35edb73dac734e7a4dac2cd4c1dca6c1/update/unit_test/test_utils_io_folder.pyc


--------------------------------------------------------------------------------
/update/unit_test/test_utils_io_list.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | from utils_io_list import *
 4 | from test_utils_io_folder import *
 5 | 
 6 | def test_generate_pairs_for_each_folder():
 7 |     images_folder_path= "folder/path/example"
 8 |     num_of_frames = 2
 9 | 
10 |     pairs = generate_pairs_for_each_folder(images_folder_path, num_of_frames)
11 | 
12 |     expected_pair = [("example", 0), ("example", 1)]
13 |     if expected_pair == pairs:
14 |         return True
15 |     else:
16 |         return False
17 | 
18 | 
19 | def test_generate_num_of_frames_list():
20 |     folders_paths_list = ['../temp_folder_1', '../temp_folder_2']
21 |     for folder_path in folders_paths_list:
22 |         create_folder(folder_path)
23 |         create_dummy_files_in_folder(folder_path)
24 | 
25 |     num_of_frames_list = generate_num_of_frames_list(folders_paths_list)
26 | 
27 |     for folder_path in folders_paths_list:
28 |         shutil.rmtree(folder_path)
29 | 
30 |     expected_list = [10, 10]
31 |     if expected_list == num_of_frames_list:
32 |         return True
33 |     else:
34 |         return False
35 | 
36 | 
37 | def test_generate_pairs_with_two_lists():
38 |     folders_paths_list = ['../temp_folder_1', '../temp_folder_2']
39 |     num_of_frames_list = [1, 2]
40 | 
41 |     pairs_list = generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list)
42 | 
43 |     expected_list = [('temp_folder_1', 0), ('temp_folder_2', 0), ('temp_folder_2', 1)]
44 |     if expected_list == pairs_list:
45 |         return True
46 |     else:
47 |         return False
48 | 
49 | 
50 | def test_generate_pairs_list_for_training():
51 |     dataset_folder_path = '/home/ngh/dev/ROLO-dev/benchmark/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0000/'
52 |     output_folder_path = '/home/ngh/dev/ROLO-TRACK/training_list/'
53 |     create_folder(output_folder_path)
54 | 
55 |     txt_file_path = os.path.join(output_folder_path, 'list_0.txt')
56 |     numpy_file_path = os.path.join(output_folder_path, 'list_0')
57 | 
58 |     finished = generate_pairs_list_for_training(dataset_folder_path, numpy_file_path, txt_file_path)
59 | 
60 |     if finished is True:
61 |         return True
62 |     else:
63 |         return False
64 | 
65 | 
66 | def main():
67 |     print("Testing: utils_io_list")
68 | 
69 |     passed = test_generate_num_of_frames_list()
70 |     if passed is False:
71 |         print("test_generate_num_of_frames_list failed")
72 | 
73 |     passed = test_generate_pairs_for_each_folder()
74 |     if passed is False:
75 |         print("test_generate_pairs_for_each_folder failed")
76 | 
77 |     passed = test_generate_pairs_with_two_lists()
78 |     if passed is False:
79 |         print("test_generate_pairs_with_two_lists failed")
80 | 
81 |     passed = test_generate_pairs_list_for_training()
82 |     if passed is False:
83 |         print("test_generate_pairs_list_for_training failed")
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     main()
88 | 


--------------------------------------------------------------------------------
/update/unit_test/test_utils_natural_sort.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | sys.path.append(os.path.abspath("../utils/"))
 3 | from utils_natural_sort import *
 4 | 
 5 | def test_natural_sort():
 6 |     test_string_list_desired = ['A00001', 'A00002', 'A00010', 'A00011', 'B00001', 'B00002', 'B00010', 'B00011']
 7 |     test_string_list = ['B00002', 'A00010', 'A00011', 'B00010', 'A00001', 'B00011', 'A00002', 'B00001']
 8 | 
 9 |     natural_sort(test_string_list)
10 | 
11 |     if test_string_list == test_string_list_desired:
12 |         return True
13 |     else:
14 |         return False
15 | 
16 | 
17 | def main():
18 |     print("Testing: utils_natural_sort")
19 | 
20 |     passed = test_natural_sort()
21 |     if passed is False:
22 |         print("\t natural_sort failed")
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/update/utils/utils_cal_iou.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def compute_iou_with_regular_coord(box1, box2):
 3 |     # Prevent NaN in benchmark results
 4 |     validate_box(box1)
 5 |     validate_box(box2)
 6 | 
 7 |     # change float to int, in order to prevent overflow
 8 |     box1 = map(int, box1)
 9 |     box2 = map(int, box2)
10 | 
11 |     tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
12 |     lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
13 |     if tb <= 0 or lr <= 0 :
14 |         intersection = 0
15 |     else : intersection =  tb*lr
16 |     return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
17 | 
18 | 
19 | def compute_iou_with_decimal_coord(box1, box2, w, h):
20 |     box1 = coord_decimal_to_regular(w,h,box1)
21 |     box2 = coord_decimal_to_regular(w,h,box2)
22 |     return compute_iou_with_regular_coord(box1,box2)
23 | 
24 | 
25 | def cal_score(location, gt_location, thresh):
26 |     iou_score = compute_iou_with_regular_coord(regular_box1, regular_box2)
27 |     if iou_score >= thresh:
28 |         score = 1
29 |     else:
30 |         score = 0
31 |     return score
32 | 


--------------------------------------------------------------------------------
/update/utils/utils_convert_coord.py:
--------------------------------------------------------------------------------
 1 | # There are 3 kinds of representation of coordinates
 2 | # 1. Coord_decimal: (x0, y0, w, h) all represented in a float between [0, 1], ratio to image width and height, respectively.
 3 | #                   (x0, y0) is the middle point of the bounding box.
 4 | #                   Used by YOLO output, ROLO input and output.
 5 | # 2. Coord_regular: (X1, Y1, W, H) all represented by pixel values in int.
 6 | #                   (X1, Y1) is the top-left point of the bounding box
 7 | #                   Usually the ground truth box that is read from files are of this format
 8 | # 3. Detection in Vector: [4096-d feature_vector] + (class, x0, y0, w, h, prob).
 9 | #                   The same as Coord_decimal except that there are more information in the detection
10 | #-----------------------------------------------------------------------------------------------
11 | 
12 | def coord_regular_to_decimal(coord_regular, img_wid, img_ht):
13 |     img_wid *= 1.0
14 |     img_ht *= 1.0
15 |     coord_decimal = list(coord_regular)
16 | 
17 |     # convert top-left point (x,y) to mid point (x, y)
18 |     coord_decimal[0] += coord_regular[2] / 2.0
19 |     coord_decimal[1] += coord_regular[3] / 2.0
20 | 
21 |     # convert to [0, 1]
22 |     coord_decimal[0] /= img_wid
23 |     coord_decimal[1] /= img_ht
24 |     coord_decimal[2] /= img_wid
25 |     coord_decimal[3] /= img_ht
26 | 
27 |     return coord_decimal
28 | 
29 | 
30 | def coord_decimal_to_regular(coord_decimal, img_wid, img_ht):
31 |     w_box = int(coord_decimal[2] * img_wid)
32 |     h_box = int(coord_decimal[3] * img_ht)
33 |     x_topleft = int( img_wid * (coord_decimal[0] - coord_decimal[2]/2.0) )
34 |     y_topleft = int( img_ht * (coord_decimal[1] - coord_decimal[3]/2.0) )
35 | 
36 |     coord_regular = [x_topleft, y_topleft, w_box, h_box]
37 | 
38 |     return coord_regular
39 | 


--------------------------------------------------------------------------------
/update/utils/utils_convert_heatmap.py:
--------------------------------------------------------------------------------
 1 | def coordinates_to_heatmap_vec(coord):
 2 |         heatmap_vec = np.zeros(1024)
 3 |         [x1, y1, x2, y2] = coord
 4 |         for y in range(y1, y2+1):
 5 |             for x in range(x1, x2+1):
 6 |                 index = y*32 + x
 7 |                 heatmap_vec[index] = 1.0   #random.uniform(0.8, 1)#1.0
 8 |         return heatmap_vec
 9 | 
10 | 
11 | def heatmap_vec_to_heatmap(heatmap_vec):
12 |     size = 32
13 |     heatmap= np.zeros((size, size))
14 |     for y in range(0, size):
15 |         for x in range(0, size):
16 |             index = y*size + x
17 |             heatmap[y][x] = heatmap_vec[index]
18 |     return heatmap
19 | 


--------------------------------------------------------------------------------
/update/utils/utils_dataset.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | 
  5 | #    http://www.apache.org/licenses/LICENSE-2.0
  6 | 
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | '''
 13 | Script File:
 14 |     ROLO_utils.py
 15 |     [Input] A network model, a file
 16 |     [Output] A file with Detection or Tracking results
 17 | Description:
 18 | 	ROLO is short for Recurrent YOLO, aimed for object detection, tracking and predicting
 19 | 	Paper: http://arxiv.org/abs/1607.05781
 20 | 	Author: Guanghan Ning
 21 | 	Webpage: http://guanghan.info/
 22 | '''
 23 | 
 24 | import cv2
 25 | import os, sys, time, math, re
 26 | import numpy as np
 27 | import tensorflow as tf
 28 | import matplotlib.pyplot as plt
 29 | from utils_io_coord import load_lines_from_txt_file, load_regular_coord_by_line
 30 | from utils_convert_coord import coord_regular_to_decimal
 31 | 
 32 | def batchload_gt_decimal_coords_from_VID(VID_annotation_path, batch_seq_names, batch_frame_ids, offset = 3):
 33 |     batch_decimal_coords = []
 34 |     batch_seq_paths = [os.path.join(VID_annotation_path, seq_name)
 35 |                       for seq_name in batch_seq_names]
 36 | 
 37 |     for id, seq_path in enumerate(batch_seq_paths):
 38 |         frame_id = batch_frame_ids[id]
 39 |         line_id = frame_id + offset # Prediction of future frame
 40 | 
 41 |         info_file_path = find_sequence_info_file_from_VID(seq_path)
 42 |         [img_wid, img_ht] = load_sequence_info(info_file_path)
 43 | 
 44 |         gt_file_path = find_sequence_gt_file_from_VID(seq_path)
 45 |         decimal_coord = load_gt_decimal_coords_from_file(gt_file_path, line_id, img_wid, img_ht)
 46 |         batch_decimal_coords.append(decimal_coord)
 47 | 
 48 |     return batch_decimal_coords
 49 | 
 50 | 
 51 | def load_gt_decimal_coords_from_file(gt_file_path, line_id, img_wid, img_ht):
 52 |     lines = load_lines_from_txt_file(gt_file_path)
 53 |     regular_coord = load_regular_coord_by_line(lines, line_id)
 54 |     if regular_coord is False: return False
 55 | 
 56 |     decimal_coord = coord_regular_to_decimal(regular_coord, img_wid, img_ht)
 57 |     return decimal_coord
 58 | 
 59 | 
 60 | def find_sequence_info_file_from_VID(seq_path):
 61 |     info_file_path = os.path.join(seq_path, "sequence_info.txt")
 62 |     return info_file_path
 63 | 
 64 | 
 65 | def find_sequence_gt_file_from_VID(seq_path):
 66 |     gt_file_path = os.path.join(seq_path, "groundtruth_rect.txt")
 67 |     return gt_file_path
 68 | 
 69 | 
 70 | def load_sequence_info(info_file_path):
 71 |     with open(info_file_path, "r") as text_file:
 72 |         lines = text_file.read().split(' ')
 73 |         [img_wid, img_ht, sequence_name, training_iters] = [int(lines[0]), int(lines[1]), lines[2], int(lines[3])]
 74 |     return  [img_wid, img_ht]
 75 | 
 76 | 
 77 | def choose_video_sequence_from_VID_by_id(folder, i):
 78 |     if i< 1000:
 79 |         mfolder = folder + '/ILSVRC2015_VID_train_0000'
 80 |         j = i
 81 |     elif i < 2000:
 82 |         mfolder = folder + '/ILSVRC2015_VID_train_0001'
 83 |         j = i%1000
 84 |     elif i < 3000:
 85 |         mfolder = folder + '/ILSVRC2015_VID_train_0002'
 86 |         j = i%2000
 87 |     else:
 88 |         mfolder = folder + '/ILSVRC2015_VID_train_0003'
 89 |         j = i%3000
 90 |     subfolders = get_immediate_subfolder_names(mfolder)
 91 |     subfolder_sequence_info_file = os.path.join(mfolder, subfolders[j], 'sequence_info.txt')
 92 |     with open(subfolder_sequence_info_file, "r") as text_file:
 93 |         lines = text_file.read().split(' ')
 94 |         [img_wid, img_ht, sequence_name, training_iters] = [int(lines[0]), int(lines[1]), lines[2], int(lines[3])]
 95 |     return  [img_wid, img_ht, sequence_name, training_iters]
 96 | 
 97 | 
 98 | def choose_video_sequence_from_OTB50(test):
 99 |     start_frame= 1
100 |     # For VOT-50:
101 |     if test == 0:
102 |         w_img, h_img = [576, 432]
103 |         sequence_name = 'Basketball'
104 |         testing_iters = 725
105 |     elif test == 1:
106 |         w_img, h_img = [640, 360]
107 |         sequence_name = 'Biker'
108 |         testing_iters = 142
109 |     elif test == 2:
110 |         w_img, h_img = [720, 400]
111 |         sequence_name = 'Bird1'
112 |         testing_iters = 408
113 |     elif test == 3:
114 |         w_img, h_img = [640, 480]
115 |         sequence_name = 'BlurBody'
116 |         testing_iters = 334
117 |     elif test == 4:
118 |         w_img, h_img = [640, 480]
119 |         sequence_name = 'BlurCar2'
120 |         testing_iters = 585
121 |     elif test == 5:
122 |         w_img, h_img = [640, 480]    #
123 |         sequence_name = 'BlurFace'
124 |         testing_iters = 493
125 |     elif test == 6:
126 |         w_img, h_img = [640, 480]
127 |         sequence_name = 'BlurOwl'
128 |         testing_iters = 631
129 |     elif test == 7:
130 |         w_img, h_img = [640, 360]
131 |         sequence_name = 'Bolt'
132 |         testing_iters = 350
133 |     elif test == 8:
134 |         w_img, h_img = [640, 480]
135 |         sequence_name = 'Box'
136 |         testing_iters = 1161
137 |     elif test == 9:
138 |         w_img, h_img = [320, 240]
139 |         sequence_name = 'Car1'
140 |         testing_iters = 1020
141 |     elif test == 10:
142 |         w_img, h_img = [360, 240]
143 |         sequence_name = 'Car4'
144 |         testing_iters = 659
145 |     elif test == 11:
146 |         w_img, h_img = [320, 240]
147 |         sequence_name = 'CarDark'
148 |         testing_iters = 393
149 |     elif test == 12:
150 |         w_img, h_img = [640, 272]
151 |         sequence_name = 'CarScale'
152 |         testing_iters = 252
153 |     elif test == 13:
154 |         w_img, h_img = [320, 240]
155 |         sequence_name = 'ClifBar'
156 |         testing_iters = 472
157 |     elif test == 14:
158 |         w_img, h_img = [320, 240]
159 |         sequence_name = 'Couple'
160 |         testing_iters = 140
161 |     elif test == 15:
162 |         w_img, h_img = [600, 480]
163 |         sequence_name = 'Crowds'
164 |         testing_iters = 347
165 |     elif test == 16:
166 |         w_img, h_img = [320, 240]   #
167 |         sequence_name = 'David'
168 |         testing_iters = 770
169 |         start_frame= 300
170 |     elif test == 17:
171 |         w_img, h_img = [704, 400]
172 |         sequence_name = 'Deer'
173 |         testing_iters = 71
174 |     elif test == 18:
175 |         w_img, h_img = [400, 224]
176 |         sequence_name = 'Diving'
177 |         testing_iters = 214
178 |     elif test == 19:
179 |         w_img, h_img = [640, 360]
180 |         sequence_name = 'DragonBaby'
181 |         testing_iters = 113
182 |     elif test == 20:
183 |         w_img, h_img = [720, 480]   #
184 |         sequence_name = 'Dudek'
185 |         testing_iters = 1145
186 |     elif test == 21:
187 |         w_img, h_img = [624, 352]    #
188 |         sequence_name = 'Football'
189 |         testing_iters = 74
190 |     elif test == 22:
191 |         w_img, h_img = [360, 240]     #
192 |         sequence_name = 'Freeman4'
193 |         testing_iters = 283
194 |     elif test == 23:
195 |         w_img, h_img = [128, 96]     #
196 |         sequence_name = 'Girl'
197 |         testing_iters = 500
198 |     elif test == 24:
199 |         w_img, h_img = [480, 640]
200 |         sequence_name = 'Human3'
201 |         testing_iters = 1698
202 |     elif test == 25:
203 |         w_img, h_img = [640, 480]
204 |         sequence_name = 'Human4'
205 |         testing_iters = 667
206 |     elif test == 26:
207 |         w_img, h_img = [480, 640]
208 |         sequence_name = 'Human6'
209 |         testing_iters = 792
210 |     elif test == 27:
211 |         w_img, h_img = [320, 240]
212 |         sequence_name = 'Human9'
213 |         testing_iters = 302
214 |     elif test == 28:
215 |         w_img, h_img = [720, 304]   #
216 |         sequence_name = 'Ironman'
217 |         testing_iters = 166
218 |     elif test == 29:
219 |         w_img, h_img = [416, 234]
220 |         sequence_name = 'Jump'
221 |         testing_iters = 122
222 |     elif test == 30:
223 |         w_img, h_img = [352, 288]   #
224 |         sequence_name = 'Jumping'
225 |         testing_iters = 313
226 |     elif test == 31:
227 |         w_img, h_img = [640, 480]
228 |         sequence_name = 'Liquor'
229 |         testing_iters = 1741
230 |     elif test == 32:
231 |         w_img, h_img = [800, 336]    #
232 |         sequence_name = 'Matrix'
233 |         testing_iters = 100
234 |     elif test == 33:
235 |         w_img, h_img = [640, 360]
236 |         sequence_name = 'MotorRolling'
237 |         testing_iters = 164
238 |     elif test == 34:
239 |         w_img, h_img = [312, 233]
240 |         sequence_name = 'Panda'
241 |         testing_iters = 1000
242 |     elif test == 35:
243 |         w_img, h_img = [352, 240]
244 |         sequence_name = 'RedTeam'
245 |         testing_iters = 1918
246 |     elif test == 36:
247 |         w_img, h_img = [624, 352]   #
248 |         sequence_name = 'Shaking'
249 |         testing_iters = 365
250 |     elif test == 37:
251 |         w_img, h_img = [624, 352]
252 |         sequence_name = 'Singer2'
253 |         testing_iters = 366
254 |     elif test == 38:
255 |         w_img, h_img = [640, 360]
256 |         sequence_name = 'Skating1'
257 |         testing_iters = 400
258 |     elif test == 39:
259 |         w_img, h_img = [640, 352]
260 |         sequence_name = 'Skating2-1'
261 |         testing_iters = 473
262 |     elif test == 40:
263 |         w_img, h_img = [640, 352]
264 |         sequence_name = 'Skating2-2'
265 |         testing_iters = 473
266 |     elif test == 41:
267 |         w_img, h_img = [640, 360]
268 |         sequence_name = 'Skiing'
269 |         testing_iters = 81
270 |     elif test == 42:
271 |         w_img, h_img = [640, 360]   #
272 |         sequence_name = 'Soccer'
273 |         testing_iters = 392
274 |     elif test == 43:
275 |         w_img, h_img = [480, 360]
276 |         sequence_name = 'Surfer'
277 |         testing_iters = 376
278 |     elif test == 44:
279 |         w_img, h_img = [320, 240]
280 |         sequence_name = 'Sylvester'
281 |         testing_iters = 1345
282 |     elif test == 45:
283 |         w_img, h_img = [640, 480]
284 |         sequence_name = 'Tiger2'
285 |         testing_iters = 365
286 |     elif test == 46:
287 |         w_img, h_img = [320, 240]   #
288 |         sequence_name = 'Trellis'
289 |         testing_iters = 569
290 |     elif test == 47:
291 |         w_img, h_img = [768, 576]
292 |         sequence_name = 'Walking'
293 |         testing_iters = 412
294 |     elif test == 48:
295 |         w_img, h_img = [384, 288]
296 |         sequence_name = 'Walking2'
297 |         testing_iters = 500
298 |     elif test == 49:
299 |         w_img, h_img = [352, 288]
300 |         sequence_name = 'Woman'
301 |         testing_iters = 597
302 | 
303 |     # For VOT-2015, read the list.txt and get the corresponding sequences.
304 | 
305 | 
306 |     return [w_img, h_img, sequence_name, start_frame, testing_iters]
307 | 


--------------------------------------------------------------------------------
/update/utils/utils_draw_coord.py:
--------------------------------------------------------------------------------
 1 | from utils_convert_coord import coord_regular_to_decimal, coord_decimal_to_regular
 2 | import cv2
 3 | 
 4 | def debug_decimal_coord(img, coord_decimal, prob = None, class_id = None):
 5 |     img_cp = img.copy()
 6 |     img_ht, img_wid, nchannels = img.shape
 7 | 
 8 |     coord_regular = coord_decimal_to_regular(coord_decimal, img_wid, img_ht)
 9 | 
10 |     debug_regular_coord(img, coord_regular, prob, class_id)
11 | 
12 | 
13 | def debug_regular_coord(img, coord_regular, prob = None, class_id = None):
14 |     img_cp = img.copy()
15 |     [x_topleft, y_topleft, w_box, h_box] = coord_regular
16 | 
17 |     cv2.rectangle(img_cp,
18 |                  (x_topleft, y_topleft),
19 |                  (x_topleft + w_box, y_topleft + h_box),
20 |                  (0,255,0), 2)
21 | 
22 |     if prob is not None and class_id is not None:
23 |         assert(isinstance(prob, (float)))
24 |         assert(isinstance(class_id, (int, long)))
25 |         cv2.rectangle(img_cp,
26 |                       (x_topleft, y_topleft - 20),
27 |                       (x_topleft + w_box, y_topleft),
28 |                       (125,125,125),-1)
29 |         cv2.putText(img_cp,
30 |                     str(class_id) + ' : %.2f' % prob,
31 |                     (x_topleft + 5, y_topleft - 7),
32 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
33 | 
34 |     cv2.imshow('debug_detection',img_cp)
35 |     cv2.waitKey(1)
36 | 
37 | 
38 | def debug_3_locations( img, gt_location, yolo_location, rolo_location):
39 |     img_cp = img.copy()
40 |     for i in range(3):  # b-g-r channels
41 |         if i== 0: location= gt_location; color= (0, 0, 255)       # red for gt
42 |         elif i ==1: location= yolo_location; color= (255, 0, 0)   # blur for yolo
43 |         elif i ==2: location= rolo_location; color= (0, 255, 0)   # green for rolo
44 |         x = int(location[0])
45 |         y = int(location[1])
46 |         w = int(location[2])
47 |         h = int(location[3])
48 |         if i == 1 or i== 2: cv2.rectangle(img_cp,(x-w//2, y-h//2),(x+w//2,y+h//2), color, 2)
49 |         elif i== 0: cv2.rectangle(img_cp,(x,y),(x+w,y+h), color, 2)
50 |     cv2.imshow('3 locations',img_cp)
51 |     cv2.waitKey(100)
52 |     return img_cp
53 | 


--------------------------------------------------------------------------------
/update/utils/utils_draw_heatmap.py:
--------------------------------------------------------------------------------
1 | def draw_heatmap(  heatmap):
2 |     fig = plt.figure(1, figsize=(10,10))
3 |     ax2 = fig.add_subplot(222)
4 |     ax2.imshow(heatmap, origin='lower', aspect='auto')
5 |     ax2.set_title("heatmap")
6 |     plt.show()
7 | 


--------------------------------------------------------------------------------
/update/utils/utils_io_coord.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # There are 3 kinds of representation of coordinates
  3 | # 1. Coord_decimal: (x0, y0, w, h) all represented in a float between [0, 1], ratio to image width and height, respectively.
  4 | #                   (x0, y0) is the middle point of the bounding box.
  5 | #                   Used by YOLO output, ROLO input and output.
  6 | # 2. Coord_regular: (X1, Y1, W, H) all represented by pixel values in int.
  7 | #                   (X1, Y1) is the top-left point of the bounding box
  8 | #                   Usually the ground truth box that is read from files are of this format
  9 | # 3. Detection in Vector: [4096-d feature_vector] + (class, x0, y0, w, h, prob).
 10 | #                   The same as Coord_decimal except that there are more information in the detection
 11 | #-----------------------------------------------------------------------------------------------
 12 | from utils_io_folder import get_immediate_childfile_paths
 13 | import numpy as np
 14 | import os
 15 | 
 16 | ''' 1. I/O with numpy '''
 17 | 
 18 | ''' 1.1 Save'''
 19 | def save_vec_as_numpy_by_frame_id(output_folder_path, frame_id, output_vec):
 20 |     filename = str(frame_id)
 21 |     save_vec_as_numpy_by_name(output_folder_path, filename, output_vec)
 22 | 
 23 | def save_vec_as_numpy_by_name(output_folder_path, filename, output_vec):
 24 |     filename_without_ext = os.path.splitext(filename)[0]
 25 |     output_file_path = os.path.join(output_folder_path, filename_without_ext)
 26 |     np.save(output_file_path, output_vec)
 27 | 
 28 | 
 29 | ''' 1.2 Load '''
 30 | def batchload_yolovecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps):
 31 |     batch_vecs = batchload_vecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps)
 32 | 
 33 |     if batch_vecs is not False and batch_vecs != -1:
 34 |         for vec in batch_vecs:
 35 |             vec[0][:][4096] = 0
 36 |             vec[0][:][4101] = 0
 37 |     return batch_vecs
 38 | 
 39 | 
 40 | def batchload_vecs_from_numpy_folders(batch_folders_paths, batch_frame_ids, batchsize, nsteps):
 41 |     try:
 42 |         assert(len(batch_folders_paths) == batchsize)
 43 |     except AssertionError:
 44 |         print("\t Not enough pairs to form a minibatch, skip")
 45 |         return -1
 46 | 
 47 |     batch_vecs = []
 48 |     for ct, folder_path in enumerate(batch_folders_paths):
 49 |         frame_id = int(batch_frame_ids[ct])
 50 |         nsteps_vecs = load_vecs_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps)
 51 |         batch_vecs.append(nsteps_vecs)
 52 | 
 53 |     try:
 54 |         test_vecs = np.reshape(batch_vecs, [batchsize * nsteps, 4102])
 55 |         return batch_vecs
 56 |     except ValueError:
 57 |         print("\t Not enough frames in video (it's ok), skipped this minibatch")
 58 |         return False
 59 | 
 60 | 
 61 | def load_vecs_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps):
 62 |     file_paths = get_file_paths_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps)
 63 |     nsteps_vecs = []
 64 |     for file_path in file_paths:
 65 |         vec_from_file = load_vec_from_numpy_file(file_path)
 66 |         nsteps_vecs.append(vec_from_file)
 67 |     return nsteps_vecs
 68 | 
 69 | 
 70 | def get_file_paths_of_stepsize_in_numpy_folder(folder_path, frame_id, nsteps):
 71 |     file_paths = get_immediate_childfile_paths(folder_path)
 72 |     [st, ed] = get_range_of_stepsize_by_frame_id(nsteps, frame_id)
 73 |     file_paths_batch = file_paths[st:ed]
 74 |     return file_paths_batch
 75 | 
 76 | 
 77 | def load_vec_from_numpy_file(file_path):
 78 |     vec_from_file = np.load(file_path)
 79 |     vec_from_file = np.reshape(vec_from_file, 4102)
 80 |     return vec_from_file
 81 | 
 82 | 
 83 | def batchload_decimal_coords_from_vecs(batch_vecs):
 84 |     batch_coords = [vec[4097:4101] for vec in batch_vecs]
 85 |     return batch_coords
 86 | 
 87 | 
 88 | def load_decimal_coord_from_vec(vec_from_file):
 89 |     coord_decimal = vec_from_file[4097:4101]
 90 |     return coord_decimal
 91 | 
 92 | 
 93 | def get_range_of_stepsize_by_frame_id(nsteps, frame_id, offset= 0):
 94 |     [st, ed] = [frame_id, frame_id + nsteps]
 95 |     st_ed_range = [st + offset, ed + offset]
 96 |     return st_ed_range
 97 | 
 98 | 
 99 | ''' 2. I/O with text file '''
100 | 
101 | def load_lines_from_txt_file(txt_file_path):
102 |     with open(txt_file_path, "r") as txtfile:
103 |         lines = txtfile.read().split('\n')
104 |     return lines
105 | 
106 | 
107 | def load_regular_coord_by_line(lines, line_id):
108 |     line = lines[line_id]
109 |     elems = line.split('\t')
110 |     if len(elems) < 4:
111 |         elems = line.split(',')
112 |         if len(elems) < 4:
113 |             elems = line.split(' ')
114 | 
115 |     try:
116 |         [X1, Y1, W, H] = elems[0:4]
117 |         coord_regular = [int(X1), int(Y1), int(W), int(H)]
118 |         return coord_regular
119 |     except IOError:
120 |         print("Not enough ground truth in text file.")
121 |         return False
122 | 
123 | 
124 | def find_best_decimal_coord(multiple_coords_decimal, gt_coord_decimal):
125 |     max_iou = 0
126 |     for coord_decimal, id in enumerate(multiple_coords_decimal):
127 |             iou = compute_iou_with_decimal_coord(coord_decimal, gt_coord_decimal)
128 |             if iou >= max_iou:
129 |                     max_iou = iou
130 |                     index = id
131 |     return multiple_coord_decimal[index]
132 | 
133 | 
134 | def validate_coord(box):
135 |     for i in range(len(box)):
136 |         if math.isnan(box[i]):  box[i] = 0
137 | 


--------------------------------------------------------------------------------
/update/utils/utils_io_file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def validate_file_format(file_in_path, allowed_format):
 4 |     if os.path.isfile(file_in_path) and os.path.splitext(file_in_path)[1][1:] in allowed_format:
 5 |         return True
 6 |     else:
 7 |         return False
 8 | 
 9 | 
10 | class Error(Exception):
11 |     """Base class for other exceptions"""
12 |     pass
13 | 
14 | 
15 | class FormatIncorrectError(Error):
16 |     """Raised when the file is of incorrect format"""
17 |     pass
18 | 
19 | 
20 | def is_image(file_in_path):
21 |     if validate_file_format(file_in_path, ['jpg', 'JPEG', 'png', 'JPG']):
22 |         return True
23 |     else:
24 |         return False
25 | 
26 | 
27 | def is_video(file_in_path):
28 |     if validate_file_format(file_in_path, ['avi', 'mkv', 'mp4']):
29 |         return True
30 |     else:
31 |         return False
32 | 
33 | 
34 | def file_to_img(filepath):
35 |     try:
36 |         img = cv2.imread(filepath)
37 |     except IOError:
38 |         print('cannot open image file: ' + filepath)
39 |     else:
40 |         print('unknown error reading image file')
41 |     return img
42 | 
43 | 
44 | def file_to_video(filepath):
45 |     try:
46 |             video = cv2.VideoCapture(filepath)
47 |     except IOError:
48 |             print('cannot open video file: ' + filepath)
49 |     else:
50 |             print('unknown error reading video file')
51 |     return video
52 | 


--------------------------------------------------------------------------------
/update/utils/utils_io_folder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from utils_natural_sort import natural_sort
 3 | 
 4 | def get_immediate_subfolder_paths(folder_path):
 5 |     subfolder_names = get_immediate_subfolder_names(folder_path)
 6 |     subfolder_paths = [os.path.join(folder_path, subfolder_name) for subfolder_name in subfolder_names]
 7 |     return subfolder_paths
 8 | 
 9 | 
10 | def get_immediate_subfolder_names(folder_path):
11 |     subfolder_names = [folder_name for folder_name in os.listdir(folder_path)
12 |                       if os.path.isdir(os.path.join(folder_path, folder_name))]
13 |     natural_sort(subfolder_names)
14 |     return subfolder_names
15 | 
16 | 
17 | def get_immediate_childfile_paths(folder_path):
18 |     files_names = get_immediate_childfile_names(folder_path)
19 |     files_full_paths = [os.path.join(folder_path, file_name) for file_name in files_names]
20 |     return files_full_paths
21 | 
22 | 
23 | def get_immediate_childfile_names(folder_path):
24 |     files_names = [file_name for file_name in next(os.walk(folder_path))[2]]
25 |     natural_sort(files_names)
26 |     return files_names
27 | 
28 | 
29 | def get_folder_name_from_path(folder_path):
30 |     path, folder_name = os.path.split(folder_path)
31 |     return folder_name
32 | 
33 | 
34 | def create_folder(folder_path):
35 |     if not os.path.exists(folder_path):
36 |         os.makedirs(folder_path)
37 | 


--------------------------------------------------------------------------------
/update/utils/utils_io_heatmap.py:
--------------------------------------------------------------------------------
 1 | from utils_io_folder import *
 2 | 
 3 | def load_unready_heatmap(tensorflow_x_path, batchsize, nsteps, id, w_img, h_img):
 4 |     lines = load_dataset_gt(tensorflow_x_path)
 5 |     [st, ed] = [id, id + batchsize * nsteps]
 6 | 
 7 |     heatmap_vec_batch= []
 8 |     for id in range(st, ed):
 9 |         location = find_gt_location(lines, id)
10 |         location = locations_from_0_to_1(w_img, h_img, location)
11 |         coords =  loc_to_coordinates(location)
12 |         heatmap_vec = [coordinates_to_heatmap_vec(coords)]
13 |         heatmap_vec_batch.append(heatmap_vec)
14 | 	return heatmap_vec_batch
15 | 
16 | 
17 | def load_ready_heatmap(folder_path, params, id):
18 |     batchsize = params['batchsize']
19 |     nsteps = params['nsteps']
20 |     vec_len = params['vec_len']
21 | 
22 |     heatmap_files_paths = get_immediate_childfile_paths(folder_path)
23 |     [st, ed] = [id, id + batchsize * nsteps]
24 |     paths_batch = heatmap_files_paths[st:ed]
25 | 
26 |     heatmap_vec_batch= []
27 |     for path in paths_batch:
28 |         heatmap_vec = np.load(path)
29 |         heatmap_vec = np.reshape(heatmap_vec, vec_len)
30 |         heatmap_vec_batch.append(heatmap_vec)
31 |     heatmap_vec_batch = np.reshape(heatmap_vec_batch, [batchsize*nsteps, vec_len])
32 |     return heatmap_vec_batch
33 | 


--------------------------------------------------------------------------------
/update/utils/utils_io_list.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from random import shuffle
 3 | from utils_io_folder import get_immediate_subfolder_paths, get_immediate_childfile_names, get_folder_name_from_path
 4 | 
 5 | ''' 1. generate the list of pairs '''
 6 | 
 7 | def generate_pairs_list_for_training(dataset_folder_path, numpy_file_path, txt_file_path = None):
 8 |     folders_paths_list = generate_folders_paths_list(dataset_folder_path)
 9 |     num_of_frames_list = generate_num_of_frames_list(folders_paths_list)
10 | 
11 |     pairs_list = generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list)
12 |     shuffled_pairs_list= shuffle_list(pairs_list)
13 | 
14 |     generate_numpy_file_with_shuffled_list_of_pairs(numpy_file_path, shuffled_pairs_list)
15 |     if txt_file_path is not None:
16 |         generate_txt_file_with_shuffled_list_of_pairs(txt_file_path, shuffled_pairs_list)
17 | 
18 |     return True
19 | 
20 | 
21 | def generate_folders_paths_list(dataset_folder_path):
22 |     folders_paths_list = get_immediate_subfolder_paths(dataset_folder_path)
23 |     return folders_paths_list
24 | 
25 | 
26 | def generate_num_of_frames_list(folders_paths_list):
27 |     num_of_frames_list = [len(get_immediate_childfile_names(folder_path))
28 |                          for folder_path in folders_paths_list]
29 |     return num_of_frames_list
30 | 
31 | 
32 | def generate_pairs_with_two_lists(folders_paths_list, num_of_frames_list):
33 |     pairs_list = []
34 |     assert(len(folders_paths_list) == len(num_of_frames_list))
35 | 
36 |     for folder_id, images_folder_path in enumerate(folders_paths_list):
37 |         num_of_frames = num_of_frames_list[folder_id]
38 |         pairs = generate_pairs_for_each_folder(images_folder_path, num_of_frames)
39 | 
40 |         for pair in pairs:
41 |             pairs_list.append(pair)
42 |     return pairs_list
43 | 
44 | 
45 | def generate_pairs_for_each_folder(images_folder_path, num_of_frames):
46 |     pairs =[(get_folder_name_from_path(images_folder_path), ct)
47 |             for ct in range(num_of_frames)]
48 |     return pairs
49 | 
50 | 
51 | def generate_txt_file_with_shuffled_list_of_pairs(txt_file_path, shuffled_pairs_list):
52 |     try:
53 |         with open(txt_file_path, "w") as txt_file:
54 |             for pairs in shuffled_pairs_list:
55 |                 line = str(pairs) + '\n'
56 |                 txt_file.write(line)
57 |     except IOError:
58 |         print('unable to open text file')
59 | 
60 | 
61 | def generate_numpy_file_with_shuffled_list_of_pairs(numpy_file_path, shuffled_pairs_list):
62 |     np.save(numpy_file_path, shuffled_pairs_list)
63 | 
64 | 
65 | def shuffle_list(pairs_list):
66 |     shuffle(pairs_list)
67 |     return pairs_list
68 | 
69 | 
70 | ''' 2. Load the list of pairs '''
71 | def load_list_batch_pairs_from_numpy_file(pairs_list_numpy_file_path, batchsize):
72 |     shuffled_pairs_list = load_pairs_list_from_numpy_file(pairs_list_numpy_file_path)
73 |     list_batch_pairs = convert_pairs_to_list_batch_pairs(shuffled_pairs_list, batchsize)
74 |     return list_batch_pairs
75 | 
76 | 
77 | def load_pairs_list_from_numpy_file(pairs_list_numpy_file_path):
78 |     shuffled_pairs_list = np.load(pairs_list_numpy_file_path)
79 |     return shuffled_pairs_list
80 | 
81 | 
82 | def convert_pairs_to_list_batch_pairs(pairs_list, batchsize):
83 |     list_batch_pairs = []
84 |     for batch_id in range(0, len(pairs_list), batchsize):
85 |         st = batch_id
86 |         ed = st + batchsize
87 |         batch_pairs = pairs_list[st:ed]
88 |         list_batch_pairs.append(batch_pairs)
89 |     return list_batch_pairs
90 | 


--------------------------------------------------------------------------------
/update/utils/utils_natural_sort.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def natural_sort(given_list):
 4 |     """ Sort the given list in the way that humans expect."""
 5 |     given_list.sort(key=alphanum_key)
 6 | 
 7 | 
 8 | def alphanum_key(s):
 9 |     """ Turn a string into a list of string and number chunks.
10 |         "z23a" -> ["z", 23, "a"] """
11 |     return [ tryint(c) for c in re.split('([0-9]+)', s) ]
12 | 
13 | 
14 | def tryint(s):
15 |     try:
16 |         return int(s)
17 |     except:
18 |         return s
19 | 


--------------------------------------------------------------------------------