├── LICENSE
├── README.md
├── fhog.py
├── kcftracker.py
└── run.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 uoip
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # KCF tracker in Python
2 |
3 | Python implementation of
4 | > [High-Speed Tracking with Kernelized Correlation Filters](http://www.robots.ox.ac.uk/~joao/publications/henriques_tpami2015.pdf)
5 | > J. F. Henriques, R. Caseiro, P. Martins, J. Batista
6 | > TPAMI 2015
7 |
8 | It is translated from [KCFcpp](https://github.com/joaofaro/KCFcpp) (Authors: Joao Faro, Christian Bailer, Joao F. Henriques), a C++ implementation of Kernelized Correlation Filters. Find more references and code of KCF at http://www.robots.ox.ac.uk/~joao/circulant/
9 |
10 | ### Requirements
11 | - Python 2.7
12 | - NumPy
13 | - Numba (needed if you want to use the hog feature)
14 | - OpenCV (ensure that you can `import cv2` in python)
15 |
16 | Actually, I have installed Anaconda(for Python 2.7), and OpenCV 3.1(from [opencv.org](http://opencv.org/)).
17 |
18 | ### Use
19 | Download the sources and execute
20 | ```shell
21 | git clone https://github.com/uoip/KCFpy.git
22 | cd KCFpy
23 | python run.py
24 | ```
25 | It will open the default camera of your computer, you can also open a different camera or a video
26 | ```shell
27 | python run.py 2
28 | ```
29 | ```shell
30 | python run.py ./test.avi
31 | ```
32 | Try different options (hog/gray, fixed/flexible window, singlescale/multiscale) of KCF tracker by modifying the arguments in line `tracker = kcftracker.KCFTracker(False, True, False) # hog, fixed_window, multiscale` in run.py.
33 |
34 |
35 | ### Peoblem
36 | I have struggled to make this python implementation as fast as possible, but it's still 2 ~ 3 times slower than its C++ counterpart, furthermore, the use of Numba introduce some unpleasant delay when initializing tracker (***NEW:*** the problem has been solved in [KCFnb](https://github.com/uoip/KCFnb) by using AOT compilation).
37 |
38 | ***NEWER:*** I write a python wrapper for KCFcpp, see [KCFcpp-py-wrapper](https://github.com/uoip/KCFcpp-py-wrapper), so we can benefit from C++'s speed in python now.
39 |
--------------------------------------------------------------------------------
/fhog.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from numba import jit
4 |
5 | # constant
6 | NUM_SECTOR = 9
7 | FLT_EPSILON = 1e-07
8 |
9 |
10 | @jit
11 | def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels):
12 | r = np.zeros((height, width), np.float32)
13 | alfa = np.zeros((height, width, 2), np.int)
14 |
15 | for j in xrange(1, height-1):
16 | for i in xrange(1, width-1):
17 | c = 0
18 | x = dx[j, i, c]
19 | y = dy[j, i, c]
20 | r[j, i] = np.sqrt(x*x + y*y)
21 |
22 | for ch in xrange(1, numChannels):
23 | tx = dx[j, i, ch]
24 | ty = dy[j, i, ch]
25 | magnitude = np.sqrt(tx*tx + ty*ty)
26 | if(magnitude > r[j, i]):
27 | r[j, i] = magnitude
28 | c = ch
29 | x = tx
30 | y = ty
31 |
32 | mmax = boundary_x[0]*x + boundary_y[0]*y
33 | maxi = 0
34 |
35 | for kk in xrange(0, NUM_SECTOR):
36 | dotProd = boundary_x[kk]*x + boundary_y[kk]*y
37 | if(dotProd > mmax):
38 | mmax = dotProd
39 | maxi = kk
40 | elif(-dotProd > mmax):
41 | mmax = -dotProd
42 | maxi = kk + NUM_SECTOR
43 |
44 | alfa[j, i, 0] = maxi % NUM_SECTOR
45 | alfa[j, i, 1] = maxi
46 | return r, alfa
47 |
48 | @jit
49 | def func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize):
50 | mapp = np.zeros((sizeX*sizeY*p), np.float32)
51 | for i in xrange(sizeY):
52 | for j in xrange(sizeX):
53 | for ii in xrange(k):
54 | for jj in xrange(k):
55 | if((i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width - 1)):
56 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
57 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
58 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1)):
59 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
60 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
61 | if((j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
62 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
63 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
64 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
65 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
66 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
67 | return mapp
68 |
69 | @jit
70 | def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp):
71 | newData = np.zeros((sizeY*sizeX*pp), np.float32)
72 | for i in xrange(1, sizeY+1):
73 | for j in xrange(1, sizeX+1):
74 | pos1 = i * (sizeX+2) * xp + j * xp
75 | pos2 = (i-1) * sizeX * pp + (j-1) * pp
76 |
77 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
78 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
79 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
80 | partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
81 | newData[pos2:pos2+p] = mappmap[pos1:pos1+p] / valOfNorm
82 | newData[pos2+4*p:pos2+6*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
83 |
84 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
85 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
86 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
87 | partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
88 | newData[pos2+p:pos2+2*p] = mappmap[pos1:pos1+p] / valOfNorm
89 | newData[pos2+6*p:pos2+8*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
90 |
91 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
92 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
93 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
94 | partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
95 | newData[pos2+2*p:pos2+3*p] = mappmap[pos1:pos1+p] / valOfNorm
96 | newData[pos2+8*p:pos2+10*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
97 |
98 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] +
99 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
100 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
101 | partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
102 | newData[pos2+3*p:pos2+4*p] = mappmap[pos1:pos1+p] / valOfNorm
103 | newData[pos2+10*p:pos2+12*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
104 | return newData
105 |
106 | @jit
107 | def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny):
108 | newData = np.zeros((sizeX*sizeY*pp), np.float32)
109 | for i in xrange(sizeY):
110 | for j in xrange(sizeX):
111 | pos1 = (i*sizeX + j) * p
112 | pos2 = (i*sizeX + j) * pp
113 |
114 | for jj in xrange(2 * xp): # 2*9
115 | newData[pos2 + jj] = np.sum(mappmap[pos1 + yp*xp + jj : pos1 + 3*yp*xp + jj : 2*xp]) * ny
116 | for jj in xrange(xp): # 9
117 | newData[pos2 + 2*xp + jj] = np.sum(mappmap[pos1 + jj : pos1 + jj + yp*xp : xp]) * ny
118 | for ii in xrange(yp): # 4
119 | newData[pos2 + 3*xp + ii] = np.sum(mappmap[pos1 + yp*xp + ii*xp*2 : pos1 + yp*xp + ii*xp*2 + 2*xp]) * nx
120 | return newData
121 |
122 |
123 |
124 | def getFeatureMaps(image, k, mapp):
125 | kernel = np.array([[-1., 0., 1.]], np.float32)
126 |
127 | height = image.shape[0]
128 | width = image.shape[1]
129 | assert(image.ndim==3 and image.shape[2])
130 | numChannels = 3 #(1 if image.ndim==2 else image.shape[2])
131 |
132 | sizeX = width / k
133 | sizeY = height / k
134 | px = 3 * NUM_SECTOR
135 | p = px
136 | stringSize = sizeX * p
137 |
138 | mapp['sizeX'] = sizeX
139 | mapp['sizeY'] = sizeY
140 | mapp['numFeatures'] = p
141 | mapp['map'] = np.zeros((mapp['sizeX']*mapp['sizeY']*mapp['numFeatures']), np.float32)
142 |
143 | dx = cv2.filter2D(np.float32(image), -1, kernel) # np.float32(...) is necessary
144 | dy = cv2.filter2D(np.float32(image), -1, kernel.T)
145 |
146 | arg_vector = np.arange(NUM_SECTOR+1).astype(np.float32) * np.pi / NUM_SECTOR
147 | boundary_x = np.cos(arg_vector)
148 | boundary_y = np.sin(arg_vector)
149 |
150 | '''
151 | ### original implementation
152 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #func1 without @jit ###
153 |
154 | ### 40x speedup
155 | magnitude = np.sqrt(dx**2 + dy**2)
156 | r = np.max(magnitude, axis=2)
157 | c = np.argmax(magnitude, axis=2)
158 | idx = (np.arange(c.shape[0])[:,np.newaxis], np.arange(c.shape[1]), c)
159 | x, y = dx[idx], dy[idx]
160 |
161 | dotProd = x[:,:,np.newaxis] * boundary_x[np.newaxis,np.newaxis,:] + y[:,:,np.newaxis] * boundary_y[np.newaxis,np.newaxis,:]
162 | dotProd = np.concatenate((dotProd, -dotProd), axis=2)
163 | maxi = np.argmax(dotProd, axis=2)
164 | alfa = np.dstack((maxi % NUM_SECTOR, maxi)) ###
165 | '''
166 | ### 200x speedup
167 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #with @jit
168 | ### ~0.001s
169 |
170 | nearest = np.ones((k), np.int)
171 | nearest[0:k/2] = -1
172 |
173 | w = np.zeros((k, 2), np.float32)
174 | a_x = np.concatenate((k/2 - np.arange(k/2) - 0.5, np.arange(k/2,k) - k/2 + 0.5)).astype(np.float32)
175 | b_x = np.concatenate((k/2 + np.arange(k/2) + 0.5, -np.arange(k/2,k) + k/2 - 0.5 + k)).astype(np.float32)
176 | w[:, 0] = 1.0 / a_x * ((a_x*b_x) / (a_x+b_x))
177 | w[:, 1] = 1.0 / b_x * ((a_x*b_x) / (a_x+b_x))
178 |
179 | '''
180 | ### original implementation
181 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #func2 without @jit ###
182 | '''
183 | ### 500x speedup
184 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #with @jit
185 | ### ~0.001s
186 |
187 | return mapp
188 |
189 |
190 | def normalizeAndTruncate(mapp, alfa):
191 | sizeX = mapp['sizeX']
192 | sizeY = mapp['sizeY']
193 |
194 | p = NUM_SECTOR
195 | xp = NUM_SECTOR * 3
196 | pp = NUM_SECTOR * 12
197 |
198 | '''
199 | ### original implementation
200 | partOfNorm = np.zeros((sizeY*sizeX), np.float32)
201 |
202 | for i in xrange(sizeX*sizeY):
203 | pos = i * mapp['numFeatures']
204 | partOfNorm[i] = np.sum(mapp['map'][pos:pos+p]**2) ###
205 | '''
206 | ### 50x speedup
207 | idx = np.arange(0, sizeX*sizeY*mapp['numFeatures'], mapp['numFeatures']).reshape((sizeX*sizeY, 1)) + np.arange(p)
208 | partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1) ### ~0.0002s
209 |
210 | sizeX, sizeY = sizeX-2, sizeY-2
211 |
212 |
213 | '''
214 | ### original implementation
215 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #func3 without @jit ###
216 |
217 | ### 30x speedup
218 | newData = np.zeros((sizeY*sizeX*pp), np.float32)
219 | idx = (np.arange(1,sizeY+1)[:,np.newaxis] * (sizeX+2) + np.arange(1,sizeX+1)).reshape((sizeY*sizeX, 1)) # much faster than it's List Comprehension counterpart (see next line)
220 | #idx = np.array([[i*(sizeX+2) + j] for i in xrange(1,sizeY+1) for j in xrange(1,sizeX+1)])
221 | pos1 = idx * xp
222 | pos2 = np.arange(sizeY*sizeX)[:,np.newaxis] * pp
223 |
224 | valOfNorm1 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2+1]) + FLT_EPSILON
225 | valOfNorm2 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2+1]) + FLT_EPSILON
226 | valOfNorm3 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2-1]) + FLT_EPSILON
227 | valOfNorm4 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2-1]) + FLT_EPSILON
228 |
229 | map1 = mapp['map'][pos1 + np.arange(p)]
230 | map2 = mapp['map'][pos1 + np.arange(p,3*p)]
231 |
232 | newData[pos2 + np.arange(p)] = map1 / valOfNorm1
233 | newData[pos2 + np.arange(4*p,6*p)] = map2 / valOfNorm1
234 | newData[pos2 + np.arange(p,2*p)] = map1 / valOfNorm2
235 | newData[pos2 + np.arange(6*p,8*p)] = map2 / valOfNorm2
236 | newData[pos2 + np.arange(2*p,3*p)] = map1 / valOfNorm3
237 | newData[pos2 + np.arange(8*p,10*p)] = map2 / valOfNorm3
238 | newData[pos2 + np.arange(3*p,4*p)] = map1 / valOfNorm4
239 | newData[pos2 + np.arange(10*p,12*p)] = map2 / valOfNorm4 ###
240 | '''
241 | ### 30x speedup
242 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #with @jit
243 | ###
244 |
245 | # truncation
246 | newData[newData > alfa] = alfa
247 |
248 | mapp['numFeatures'] = pp
249 | mapp['sizeX'] = sizeX
250 | mapp['sizeY'] = sizeY
251 | mapp['map'] = newData
252 |
253 | return mapp
254 |
255 |
256 | def PCAFeatureMaps(mapp):
257 | sizeX = mapp['sizeX']
258 | sizeY = mapp['sizeY']
259 |
260 | p = mapp['numFeatures']
261 | pp = NUM_SECTOR * 3 + 4
262 | yp = 4
263 | xp = NUM_SECTOR
264 |
265 | nx = 1.0 / np.sqrt(xp*2)
266 | ny = 1.0 / np.sqrt(yp)
267 |
268 | '''
269 | ### original implementation
270 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #func without @jit ###
271 |
272 | ### 7.5x speedup
273 | newData = np.zeros((sizeX*sizeY*pp), np.float32)
274 | idx1 = np.arange(2*xp).reshape((2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
275 | idx2 = np.arange(xp).reshape((xp, 1)) + np.arange(0, xp*yp, xp)
276 | idx3 = np.arange(0, 2*xp*yp, 2*xp).reshape((yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
277 |
278 | for i in xrange(sizeY):
279 | for j in xrange(sizeX):
280 | pos1 = (i*sizeX + j) * p
281 | pos2 = (i*sizeX + j) * pp
282 |
283 | newData[pos2 : pos2+2*xp] = np.sum(mapp['map'][pos1 + idx1], axis=1) * ny
284 | newData[pos2+2*xp : pos2+3*xp] = np.sum(mapp['map'][pos1 + idx2], axis=1) * ny
285 | newData[pos2+3*xp : pos2+3*xp+yp] = np.sum(mapp['map'][pos1 + idx3], axis=1) * nx ###
286 |
287 | ### 120x speedup
288 | newData = np.zeros((sizeX*sizeY*pp), np.float32)
289 | idx01 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp))
290 | idx02 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp,3*xp)).reshape((sizeX*sizeY*xp))
291 | idx03 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(3*xp,3*xp+yp)).reshape((sizeX*sizeY*yp))
292 |
293 | idx11 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
294 | idx12 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(xp)).reshape((sizeX*sizeY*xp, 1)) + np.arange(0, xp*yp, xp)
295 | idx13 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(0, 2*xp*yp, 2*xp)).reshape((sizeX*sizeY*yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
296 |
297 | newData[idx01] = np.sum(mapp['map'][idx11], axis=1) * ny
298 | newData[idx02] = np.sum(mapp['map'][idx12], axis=1) * ny
299 | newData[idx03] = np.sum(mapp['map'][idx13], axis=1) * nx ###
300 | '''
301 | ### 190x speedup
302 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #with @jit
303 | ###
304 |
305 | mapp['numFeatures'] = pp
306 | mapp['map'] = newData
307 |
308 | return mapp
309 |
--------------------------------------------------------------------------------
/kcftracker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | import fhog
5 |
6 | # ffttools
7 | def fftd(img, backwards=False):
8 | # shape of img can be (m,n), (m,n,1) or (m,n,2)
9 | # in my test, fft provided by numpy and scipy are slower than cv2.dft
10 | return cv2.dft(np.float32(img), flags = ((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT)) # 'flags =' is necessary!
11 |
12 | def real(img):
13 | return img[:,:,0]
14 |
15 | def imag(img):
16 | return img[:,:,1]
17 |
18 | def complexMultiplication(a, b):
19 | res = np.zeros(a.shape, a.dtype)
20 |
21 | res[:,:,0] = a[:,:,0]*b[:,:,0] - a[:,:,1]*b[:,:,1]
22 | res[:,:,1] = a[:,:,0]*b[:,:,1] + a[:,:,1]*b[:,:,0]
23 | return res
24 |
25 | def complexDivision(a, b):
26 | res = np.zeros(a.shape, a.dtype)
27 | divisor = 1. / (b[:,:,0]**2 + b[:,:,1]**2)
28 |
29 | res[:,:,0] = (a[:,:,0]*b[:,:,0] + a[:,:,1]*b[:,:,1]) * divisor
30 | res[:,:,1] = (a[:,:,1]*b[:,:,0] + a[:,:,0]*b[:,:,1]) * divisor
31 | return res
32 |
33 | def rearrange(img):
34 | #return np.fft.fftshift(img, axes=(0,1))
35 | assert(img.ndim==2)
36 | img_ = np.zeros(img.shape, img.dtype)
37 | xh, yh = img.shape[1]/2, img.shape[0]/2
38 | img_[0:yh,0:xh], img_[yh:img.shape[0],xh:img.shape[1]] = img[yh:img.shape[0],xh:img.shape[1]], img[0:yh,0:xh]
39 | img_[0:yh,xh:img.shape[1]], img_[yh:img.shape[0],0:xh] = img[yh:img.shape[0],0:xh], img[0:yh,xh:img.shape[1]]
40 | return img_
41 |
42 |
43 | # recttools
44 | def x2(rect):
45 | return rect[0] + rect[2]
46 |
47 | def y2(rect):
48 | return rect[1] + rect[3]
49 |
50 | def limit(rect, limit):
51 | if(rect[0]+rect[2] > limit[0]+limit[2]):
52 | rect[2] = limit[0]+limit[2]-rect[0]
53 | if(rect[1]+rect[3] > limit[1]+limit[3]):
54 | rect[3] = limit[1]+limit[3]-rect[1]
55 | if(rect[0] < limit[0]):
56 | rect[2] -= (limit[0]-rect[0])
57 | rect[0] = limit[0]
58 | if(rect[1] < limit[1]):
59 | rect[3] -= (limit[1]-rect[1])
60 | rect[1] = limit[1]
61 | if(rect[2] < 0):
62 | rect[2] = 0
63 | if(rect[3] < 0):
64 | rect[3] = 0
65 | return rect
66 |
67 | def getBorder(original, limited):
68 | res = [0,0,0,0]
69 | res[0] = limited[0] - original[0]
70 | res[1] = limited[1] - original[1]
71 | res[2] = x2(original) - x2(limited)
72 | res[3] = y2(original) - y2(limited)
73 | assert(np.all(np.array(res) >= 0))
74 | return res
75 |
76 | def subwindow(img, window, borderType=cv2.BORDER_CONSTANT):
77 | cutWindow = [x for x in window]
78 | limit(cutWindow, [0,0,img.shape[1],img.shape[0]]) # modify cutWindow
79 | assert(cutWindow[2]>0 and cutWindow[3]>0)
80 | border = getBorder(window, cutWindow)
81 | res = img[cutWindow[1]:cutWindow[1]+cutWindow[3], cutWindow[0]:cutWindow[0]+cutWindow[2]]
82 |
83 | if(border != [0,0,0,0]):
84 | res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType)
85 | return res
86 |
87 |
88 |
89 | # KCF tracker
90 | class KCFTracker:
91 | def __init__(self, hog=False, fixed_window=True, multiscale=False):
92 | self.lambdar = 0.0001 # regularization
93 | self.padding = 2.5 # extra area surrounding the target
94 | self.output_sigma_factor = 0.125 # bandwidth of gaussian target
95 |
96 | if(hog): # HOG feature
97 | # VOT
98 | self.interp_factor = 0.012 # linear interpolation factor for adaptation
99 | self.sigma = 0.6 # gaussian kernel bandwidth
100 | # TPAMI #interp_factor = 0.02 #sigma = 0.5
101 | self.cell_size = 4 # HOG cell size
102 | self._hogfeatures = True
103 | else: # raw gray-scale image # aka CSK tracker
104 | self.interp_factor = 0.075
105 | self.sigma = 0.2
106 | self.cell_size = 1
107 | self._hogfeatures = False
108 |
109 | if(multiscale):
110 | self.template_size = 96 # template size
111 | self.scale_step = 1.05 # scale step for multi-scale estimation
112 | self.scale_weight = 0.96 # to downweight detection scores of other scales for added stability
113 | elif(fixed_window):
114 | self.template_size = 96
115 | self.scale_step = 1
116 | else:
117 | self.template_size = 1
118 | self.scale_step = 1
119 |
120 | self._tmpl_sz = [0,0] # cv::Size, [width,height] #[int,int]
121 | self._roi = [0.,0.,0.,0.] # cv::Rect2f, [x,y,width,height] #[float,float,float,float]
122 | self.size_patch = [0,0,0] #[int,int,int]
123 | self._scale = 1. # float
124 | self._alphaf = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
125 | self._prob = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
126 | self._tmpl = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
127 | self.hann = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
128 |
129 | def subPixelPeak(self, left, center, right):
130 | divisor = 2*center - right - left #float
131 | return (0 if abs(divisor)<1e-3 else 0.5*(right-left)/divisor)
132 |
133 | def createHanningMats(self):
134 | hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]]
135 |
136 | hann1t = 0.5 * (1 - np.cos(2*np.pi*hann1t/(self.size_patch[1]-1)))
137 | hann2t = 0.5 * (1 - np.cos(2*np.pi*hann2t/(self.size_patch[0]-1)))
138 | hann2d = hann2t * hann1t
139 |
140 | if(self._hogfeatures):
141 | hann1d = hann2d.reshape(self.size_patch[0]*self.size_patch[1])
142 | self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d
143 | else:
144 | self.hann = hann2d
145 | self.hann = self.hann.astype(np.float32)
146 |
147 | def createGaussianPeak(self, sizey, sizex):
148 | syh, sxh = sizey/2, sizex/2
149 | output_sigma = np.sqrt(sizex*sizey) / self.padding * self.output_sigma_factor
150 | mult = -0.5 / (output_sigma*output_sigma)
151 | y, x = np.ogrid[0:sizey, 0:sizex]
152 | y, x = (y-syh)**2, (x-sxh)**2
153 | res = np.exp(mult * (y+x))
154 | return fftd(res)
155 |
156 | def gaussianCorrelation(self, x1, x2):
157 | if(self._hogfeatures):
158 | c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32)
159 | for i in xrange(self.size_patch[2]):
160 | x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1]))
161 | x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1]))
162 | caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB = True)
163 | caux = real(fftd(caux, True))
164 | #caux = rearrange(caux)
165 | c += caux
166 | c = rearrange(c)
167 | else:
168 | c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB = True) # 'conjB=' is necessary!
169 | c = fftd(c, True)
170 | c = real(c)
171 | c = rearrange(c)
172 |
173 | if(x1.ndim==3 and x2.ndim==3):
174 | d = (np.sum(x1[:,:,0]*x1[:,:,0]) + np.sum(x2[:,:,0]*x2[:,:,0]) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2])
175 | elif(x1.ndim==2 and x2.ndim==2):
176 | d = (np.sum(x1*x1) + np.sum(x2*x2) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2])
177 |
178 | d = d * (d>=0)
179 | d = np.exp(-d / (self.sigma*self.sigma))
180 |
181 | return d
182 |
183 | def getFeatures(self, image, inithann, scale_adjust=1.0):
184 | extracted_roi = [0,0,0,0] #[int,int,int,int]
185 | cx = self._roi[0] + self._roi[2]/2 #float
186 | cy = self._roi[1] + self._roi[3]/2 #float
187 |
188 | if(inithann):
189 | padded_w = self._roi[2] * self.padding
190 | padded_h = self._roi[3] * self.padding
191 |
192 | if(self.template_size > 1):
193 | if(padded_w >= padded_h):
194 | self._scale = padded_w / float(self.template_size)
195 | else:
196 | self._scale = padded_h / float(self.template_size)
197 | self._tmpl_sz[0] = int(padded_w / self._scale)
198 | self._tmpl_sz[1] = int(padded_h / self._scale)
199 | else:
200 | self._tmpl_sz[0] = int(padded_w)
201 | self._tmpl_sz[1] = int(padded_h)
202 | self._scale = 1.
203 |
204 | if(self._hogfeatures):
205 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) / (2*self.cell_size) * 2*self.cell_size + 2*self.cell_size
206 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) / (2*self.cell_size) * 2*self.cell_size + 2*self.cell_size
207 | else:
208 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) / 2 * 2
209 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) / 2 * 2
210 |
211 | extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0])
212 | extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1])
213 | extracted_roi[0] = int(cx - extracted_roi[2]/2)
214 | extracted_roi[1] = int(cy - extracted_roi[3]/2)
215 |
216 | z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE)
217 | if(z.shape[1]!=self._tmpl_sz[0] or z.shape[0]!=self._tmpl_sz[1]):
218 | z = cv2.resize(z, tuple(self._tmpl_sz))
219 |
220 | if(self._hogfeatures):
221 | mapp = {'sizeX':0, 'sizeY':0, 'numFeatures':0, 'map':0}
222 | mapp = fhog.getFeatureMaps(z, self.cell_size, mapp)
223 | mapp = fhog.normalizeAndTruncate(mapp, 0.2)
224 | mapp = fhog.PCAFeatureMaps(mapp)
225 | self.size_patch = map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']])
226 | FeaturesMap = mapp['map'].reshape((self.size_patch[0]*self.size_patch[1], self.size_patch[2])).T # (size_patch[2], size_patch[0]*size_patch[1])
227 | else:
228 | if(z.ndim==3 and z.shape[2]==3):
229 | FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY) # z:(size_patch[0], size_patch[1], 3) FeaturesMap:(size_patch[0], size_patch[1]) #np.int8 #0~255
230 | elif(z.ndim==2):
231 | FeaturesMap = z #(size_patch[0], size_patch[1]) #np.int8 #0~255
232 | FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5
233 | self.size_patch = [z.shape[0], z.shape[1], 1]
234 |
235 | if(inithann):
236 | self.createHanningMats() # createHanningMats need size_patch
237 |
238 | FeaturesMap = self.hann * FeaturesMap
239 | return FeaturesMap
240 |
241 | def detect(self, z, x):
242 | k = self.gaussianCorrelation(x, z)
243 | res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True))
244 |
245 | _, pv, _, pi = cv2.minMaxLoc(res) # pv:float pi:tuple of int
246 | p = [float(pi[0]), float(pi[1])] # cv::Point2f, [x,y] #[float,float]
247 |
248 | if(pi[0]>0 and pi[0]0 and pi[1]0 and roi[3]>0)
269 | self._tmpl = self.getFeatures(image, 1)
270 | self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1])
271 | self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32)
272 | self.train(self._tmpl, 1.0)
273 |
274 | def update(self, image):
275 | if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 1
276 | if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[2] + 1
277 | if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 2
278 | if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 2
279 |
280 | cx = self._roi[0] + self._roi[2]/2.
281 | cy = self._roi[1] + self._roi[3]/2.
282 |
283 | loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0))
284 |
285 | if(self.scale_step != 1):
286 | # Test at a smaller _scale
287 | new_loc1, new_peak_value1 = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0/self.scale_step))
288 | # Test at a bigger _scale
289 | new_loc2, new_peak_value2 = self.detect(self._tmpl, self.getFeatures(image, 0, self.scale_step))
290 |
291 | if(self.scale_weight*new_peak_value1 > peak_value and new_peak_value1>new_peak_value2):
292 | loc = new_loc1
293 | peak_value = new_peak_value1
294 | self._scale /= self.scale_step
295 | self._roi[2] /= self.scale_step
296 | self._roi[3] /= self.scale_step
297 | elif(self.scale_weight*new_peak_value2 > peak_value):
298 | loc = new_loc2
299 | peak_value = new_peak_value2
300 | self._scale *= self.scale_step
301 | self._roi[2] *= self.scale_step
302 | self._roi[3] *= self.scale_step
303 |
304 | self._roi[0] = cx - self._roi[2]/2.0 + loc[0]*self.cell_size*self._scale
305 | self._roi[1] = cy - self._roi[3]/2.0 + loc[1]*self.cell_size*self._scale
306 |
307 | if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 1
308 | if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 1
309 | if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 2
310 | if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[3] + 2
311 | assert(self._roi[2]>0 and self._roi[3]>0)
312 |
313 | x = self.getFeatures(image, 0, 1.0)
314 | self.train(x, self.interp_factor)
315 |
316 | return self._roi
317 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import sys
4 | from time import time
5 |
6 | import kcftracker
7 |
8 | selectingObject = False
9 | initTracking = False
10 | onTracking = False
11 | ix, iy, cx, cy = -1, -1, -1, -1
12 | w, h = 0, 0
13 |
14 | inteval = 1
15 | duration = 0.01
16 |
17 | # mouse callback function
18 | def draw_boundingbox(event, x, y, flags, param):
19 | global selectingObject, initTracking, onTracking, ix, iy, cx,cy, w, h
20 |
21 | if event == cv2.EVENT_LBUTTONDOWN:
22 | selectingObject = True
23 | onTracking = False
24 | ix, iy = x, y
25 | cx, cy = x, y
26 |
27 | elif event == cv2.EVENT_MOUSEMOVE:
28 | cx, cy = x, y
29 |
30 | elif event == cv2.EVENT_LBUTTONUP:
31 | selectingObject = False
32 | if(abs(x-ix)>10 and abs(y-iy)>10):
33 | w, h = abs(x - ix), abs(y - iy)
34 | ix, iy = min(x, ix), min(y, iy)
35 | initTracking = True
36 | else:
37 | onTracking = False
38 |
39 | elif event == cv2.EVENT_RBUTTONDOWN:
40 | onTracking = False
41 | if(w>0):
42 | ix, iy = x-w/2, y-h/2
43 | initTracking = True
44 |
45 |
46 |
47 | if __name__ == '__main__':
48 |
49 | if(len(sys.argv)==1):
50 | cap = cv2.VideoCapture(0)
51 | elif(len(sys.argv)==2):
52 | if(sys.argv[1].isdigit()): # True if sys.argv[1] is str of a nonnegative integer
53 | cap = cv2.VideoCapture(int(sys.argv[1]))
54 | else:
55 | cap = cv2.VideoCapture(sys.argv[1])
56 | inteval = 30
57 | else: assert(0), "too many arguments"
58 |
59 | tracker = kcftracker.KCFTracker(True, True, True) # hog, fixed_window, multiscale
60 | #if you use hog feature, there will be a short pause after you draw a first boundingbox, that is due to the use of Numba.
61 |
62 | cv2.namedWindow('tracking')
63 | cv2.setMouseCallback('tracking',draw_boundingbox)
64 |
65 | while(cap.isOpened()):
66 | ret, frame = cap.read()
67 | if not ret:
68 | break
69 |
70 | if(selectingObject):
71 | cv2.rectangle(frame,(ix,iy), (cx,cy), (0,255,255), 1)
72 | elif(initTracking):
73 | cv2.rectangle(frame,(ix,iy), (ix+w,iy+h), (0,255,255), 2)
74 |
75 | tracker.init([ix,iy,w,h], frame)
76 |
77 | initTracking = False
78 | onTracking = True
79 | elif(onTracking):
80 | t0 = time()
81 | boundingbox = tracker.update(frame)
82 | t1 = time()
83 |
84 | boundingbox = map(int, boundingbox)
85 | cv2.rectangle(frame,(boundingbox[0],boundingbox[1]), (boundingbox[0]+boundingbox[2],boundingbox[1]+boundingbox[3]), (0,255,255), 1)
86 |
87 | duration = 0.8*duration + 0.2*(t1-t0)
88 | #duration = t1-t0
89 | cv2.putText(frame, 'FPS: '+str(1/duration)[:4].strip('.'), (8,20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)
90 |
91 | cv2.imshow('tracking', frame)
92 | c = cv2.waitKey(inteval) & 0xFF
93 | if c==27 or c==ord('q'):
94 | break
95 |
96 | cap.release()
97 | cv2.destroyAllWindows()
98 |
--------------------------------------------------------------------------------