├── LICENSE ├── README.md ├── fhog.py ├── kcftracker.py └── run.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 uoip 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KCF tracker in Python 2 | 3 | Python implementation of 4 | > [High-Speed Tracking with Kernelized Correlation Filters](http://www.robots.ox.ac.uk/~joao/publications/henriques_tpami2015.pdf)
5 | > J. F. Henriques, R. Caseiro, P. Martins, J. Batista
6 | > TPAMI 2015 7 | 8 | It is translated from [KCFcpp](https://github.com/joaofaro/KCFcpp) (Authors: Joao Faro, Christian Bailer, Joao F. Henriques), a C++ implementation of Kernelized Correlation Filters. Find more references and code of KCF at http://www.robots.ox.ac.uk/~joao/circulant/ 9 | 10 | ### Requirements 11 | - Python 2.7 12 | - NumPy 13 | - Numba (needed if you want to use the hog feature) 14 | - OpenCV (ensure that you can `import cv2` in python) 15 | 16 | Actually, I have installed Anaconda(for Python 2.7), and OpenCV 3.1(from [opencv.org](http://opencv.org/)). 17 | 18 | ### Use 19 | Download the sources and execute 20 | ```shell 21 | git clone https://github.com/uoip/KCFpy.git 22 | cd KCFpy 23 | python run.py 24 | ``` 25 | It will open the default camera of your computer, you can also open a different camera or a video 26 | ```shell 27 | python run.py 2 28 | ``` 29 | ```shell 30 | python run.py ./test.avi 31 | ``` 32 | Try different options (hog/gray, fixed/flexible window, singlescale/multiscale) of KCF tracker by modifying the arguments in line `tracker = kcftracker.KCFTracker(False, True, False) # hog, fixed_window, multiscale` in run.py. 33 | 34 | 35 | ### Peoblem 36 | I have struggled to make this python implementation as fast as possible, but it's still 2 ~ 3 times slower than its C++ counterpart, furthermore, the use of Numba introduce some unpleasant delay when initializing tracker (***NEW:*** the problem has been solved in [KCFnb](https://github.com/uoip/KCFnb) by using AOT compilation). 37 | 38 | ***NEWER:*** I write a python wrapper for KCFcpp, see [KCFcpp-py-wrapper](https://github.com/uoip/KCFcpp-py-wrapper), so we can benefit from C++'s speed in python now. 39 | -------------------------------------------------------------------------------- /fhog.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from numba import jit 4 | 5 | # constant 6 | NUM_SECTOR = 9 7 | FLT_EPSILON = 1e-07 8 | 9 | 10 | @jit 11 | def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels): 12 | r = np.zeros((height, width), np.float32) 13 | alfa = np.zeros((height, width, 2), np.int) 14 | 15 | for j in xrange(1, height-1): 16 | for i in xrange(1, width-1): 17 | c = 0 18 | x = dx[j, i, c] 19 | y = dy[j, i, c] 20 | r[j, i] = np.sqrt(x*x + y*y) 21 | 22 | for ch in xrange(1, numChannels): 23 | tx = dx[j, i, ch] 24 | ty = dy[j, i, ch] 25 | magnitude = np.sqrt(tx*tx + ty*ty) 26 | if(magnitude > r[j, i]): 27 | r[j, i] = magnitude 28 | c = ch 29 | x = tx 30 | y = ty 31 | 32 | mmax = boundary_x[0]*x + boundary_y[0]*y 33 | maxi = 0 34 | 35 | for kk in xrange(0, NUM_SECTOR): 36 | dotProd = boundary_x[kk]*x + boundary_y[kk]*y 37 | if(dotProd > mmax): 38 | mmax = dotProd 39 | maxi = kk 40 | elif(-dotProd > mmax): 41 | mmax = -dotProd 42 | maxi = kk + NUM_SECTOR 43 | 44 | alfa[j, i, 0] = maxi % NUM_SECTOR 45 | alfa[j, i, 1] = maxi 46 | return r, alfa 47 | 48 | @jit 49 | def func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize): 50 | mapp = np.zeros((sizeX*sizeY*p), np.float32) 51 | for i in xrange(sizeY): 52 | for j in xrange(sizeX): 53 | for ii in xrange(k): 54 | for jj in xrange(k): 55 | if((i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width - 1)): 56 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0] 57 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0] 58 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1)): 59 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0] 60 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0] 61 | if((j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)): 62 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1] 63 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1] 64 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)): 65 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1] 66 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1] 67 | return mapp 68 | 69 | @jit 70 | def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp): 71 | newData = np.zeros((sizeY*sizeX*pp), np.float32) 72 | for i in xrange(1, sizeY+1): 73 | for j in xrange(1, sizeX+1): 74 | pos1 = i * (sizeX+2) * xp + j * xp 75 | pos2 = (i-1) * sizeX * pp + (j-1) * pp 76 | 77 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 78 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] + 79 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] + 80 | partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON 81 | newData[pos2:pos2+p] = mappmap[pos1:pos1+p] / valOfNorm 82 | newData[pos2+4*p:pos2+6*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 83 | 84 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 85 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] + 86 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] + 87 | partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON 88 | newData[pos2+p:pos2+2*p] = mappmap[pos1:pos1+p] / valOfNorm 89 | newData[pos2+6*p:pos2+8*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 90 | 91 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 92 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] + 93 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] + 94 | partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON 95 | newData[pos2+2*p:pos2+3*p] = mappmap[pos1:pos1+p] / valOfNorm 96 | newData[pos2+8*p:pos2+10*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 97 | 98 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 99 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] + 100 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] + 101 | partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON 102 | newData[pos2+3*p:pos2+4*p] = mappmap[pos1:pos1+p] / valOfNorm 103 | newData[pos2+10*p:pos2+12*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 104 | return newData 105 | 106 | @jit 107 | def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny): 108 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 109 | for i in xrange(sizeY): 110 | for j in xrange(sizeX): 111 | pos1 = (i*sizeX + j) * p 112 | pos2 = (i*sizeX + j) * pp 113 | 114 | for jj in xrange(2 * xp): # 2*9 115 | newData[pos2 + jj] = np.sum(mappmap[pos1 + yp*xp + jj : pos1 + 3*yp*xp + jj : 2*xp]) * ny 116 | for jj in xrange(xp): # 9 117 | newData[pos2 + 2*xp + jj] = np.sum(mappmap[pos1 + jj : pos1 + jj + yp*xp : xp]) * ny 118 | for ii in xrange(yp): # 4 119 | newData[pos2 + 3*xp + ii] = np.sum(mappmap[pos1 + yp*xp + ii*xp*2 : pos1 + yp*xp + ii*xp*2 + 2*xp]) * nx 120 | return newData 121 | 122 | 123 | 124 | def getFeatureMaps(image, k, mapp): 125 | kernel = np.array([[-1., 0., 1.]], np.float32) 126 | 127 | height = image.shape[0] 128 | width = image.shape[1] 129 | assert(image.ndim==3 and image.shape[2]) 130 | numChannels = 3 #(1 if image.ndim==2 else image.shape[2]) 131 | 132 | sizeX = width / k 133 | sizeY = height / k 134 | px = 3 * NUM_SECTOR 135 | p = px 136 | stringSize = sizeX * p 137 | 138 | mapp['sizeX'] = sizeX 139 | mapp['sizeY'] = sizeY 140 | mapp['numFeatures'] = p 141 | mapp['map'] = np.zeros((mapp['sizeX']*mapp['sizeY']*mapp['numFeatures']), np.float32) 142 | 143 | dx = cv2.filter2D(np.float32(image), -1, kernel) # np.float32(...) is necessary 144 | dy = cv2.filter2D(np.float32(image), -1, kernel.T) 145 | 146 | arg_vector = np.arange(NUM_SECTOR+1).astype(np.float32) * np.pi / NUM_SECTOR 147 | boundary_x = np.cos(arg_vector) 148 | boundary_y = np.sin(arg_vector) 149 | 150 | ''' 151 | ### original implementation 152 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #func1 without @jit ### 153 | 154 | ### 40x speedup 155 | magnitude = np.sqrt(dx**2 + dy**2) 156 | r = np.max(magnitude, axis=2) 157 | c = np.argmax(magnitude, axis=2) 158 | idx = (np.arange(c.shape[0])[:,np.newaxis], np.arange(c.shape[1]), c) 159 | x, y = dx[idx], dy[idx] 160 | 161 | dotProd = x[:,:,np.newaxis] * boundary_x[np.newaxis,np.newaxis,:] + y[:,:,np.newaxis] * boundary_y[np.newaxis,np.newaxis,:] 162 | dotProd = np.concatenate((dotProd, -dotProd), axis=2) 163 | maxi = np.argmax(dotProd, axis=2) 164 | alfa = np.dstack((maxi % NUM_SECTOR, maxi)) ### 165 | ''' 166 | ### 200x speedup 167 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #with @jit 168 | ### ~0.001s 169 | 170 | nearest = np.ones((k), np.int) 171 | nearest[0:k/2] = -1 172 | 173 | w = np.zeros((k, 2), np.float32) 174 | a_x = np.concatenate((k/2 - np.arange(k/2) - 0.5, np.arange(k/2,k) - k/2 + 0.5)).astype(np.float32) 175 | b_x = np.concatenate((k/2 + np.arange(k/2) + 0.5, -np.arange(k/2,k) + k/2 - 0.5 + k)).astype(np.float32) 176 | w[:, 0] = 1.0 / a_x * ((a_x*b_x) / (a_x+b_x)) 177 | w[:, 1] = 1.0 / b_x * ((a_x*b_x) / (a_x+b_x)) 178 | 179 | ''' 180 | ### original implementation 181 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #func2 without @jit ### 182 | ''' 183 | ### 500x speedup 184 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #with @jit 185 | ### ~0.001s 186 | 187 | return mapp 188 | 189 | 190 | def normalizeAndTruncate(mapp, alfa): 191 | sizeX = mapp['sizeX'] 192 | sizeY = mapp['sizeY'] 193 | 194 | p = NUM_SECTOR 195 | xp = NUM_SECTOR * 3 196 | pp = NUM_SECTOR * 12 197 | 198 | ''' 199 | ### original implementation 200 | partOfNorm = np.zeros((sizeY*sizeX), np.float32) 201 | 202 | for i in xrange(sizeX*sizeY): 203 | pos = i * mapp['numFeatures'] 204 | partOfNorm[i] = np.sum(mapp['map'][pos:pos+p]**2) ### 205 | ''' 206 | ### 50x speedup 207 | idx = np.arange(0, sizeX*sizeY*mapp['numFeatures'], mapp['numFeatures']).reshape((sizeX*sizeY, 1)) + np.arange(p) 208 | partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1) ### ~0.0002s 209 | 210 | sizeX, sizeY = sizeX-2, sizeY-2 211 | 212 | 213 | ''' 214 | ### original implementation 215 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #func3 without @jit ### 216 | 217 | ### 30x speedup 218 | newData = np.zeros((sizeY*sizeX*pp), np.float32) 219 | idx = (np.arange(1,sizeY+1)[:,np.newaxis] * (sizeX+2) + np.arange(1,sizeX+1)).reshape((sizeY*sizeX, 1)) # much faster than it's List Comprehension counterpart (see next line) 220 | #idx = np.array([[i*(sizeX+2) + j] for i in xrange(1,sizeY+1) for j in xrange(1,sizeX+1)]) 221 | pos1 = idx * xp 222 | pos2 = np.arange(sizeY*sizeX)[:,np.newaxis] * pp 223 | 224 | valOfNorm1 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2+1]) + FLT_EPSILON 225 | valOfNorm2 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2+1]) + FLT_EPSILON 226 | valOfNorm3 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2-1]) + FLT_EPSILON 227 | valOfNorm4 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2-1]) + FLT_EPSILON 228 | 229 | map1 = mapp['map'][pos1 + np.arange(p)] 230 | map2 = mapp['map'][pos1 + np.arange(p,3*p)] 231 | 232 | newData[pos2 + np.arange(p)] = map1 / valOfNorm1 233 | newData[pos2 + np.arange(4*p,6*p)] = map2 / valOfNorm1 234 | newData[pos2 + np.arange(p,2*p)] = map1 / valOfNorm2 235 | newData[pos2 + np.arange(6*p,8*p)] = map2 / valOfNorm2 236 | newData[pos2 + np.arange(2*p,3*p)] = map1 / valOfNorm3 237 | newData[pos2 + np.arange(8*p,10*p)] = map2 / valOfNorm3 238 | newData[pos2 + np.arange(3*p,4*p)] = map1 / valOfNorm4 239 | newData[pos2 + np.arange(10*p,12*p)] = map2 / valOfNorm4 ### 240 | ''' 241 | ### 30x speedup 242 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #with @jit 243 | ### 244 | 245 | # truncation 246 | newData[newData > alfa] = alfa 247 | 248 | mapp['numFeatures'] = pp 249 | mapp['sizeX'] = sizeX 250 | mapp['sizeY'] = sizeY 251 | mapp['map'] = newData 252 | 253 | return mapp 254 | 255 | 256 | def PCAFeatureMaps(mapp): 257 | sizeX = mapp['sizeX'] 258 | sizeY = mapp['sizeY'] 259 | 260 | p = mapp['numFeatures'] 261 | pp = NUM_SECTOR * 3 + 4 262 | yp = 4 263 | xp = NUM_SECTOR 264 | 265 | nx = 1.0 / np.sqrt(xp*2) 266 | ny = 1.0 / np.sqrt(yp) 267 | 268 | ''' 269 | ### original implementation 270 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #func without @jit ### 271 | 272 | ### 7.5x speedup 273 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 274 | idx1 = np.arange(2*xp).reshape((2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp) 275 | idx2 = np.arange(xp).reshape((xp, 1)) + np.arange(0, xp*yp, xp) 276 | idx3 = np.arange(0, 2*xp*yp, 2*xp).reshape((yp, 1)) + np.arange(xp*yp, xp*yp+2*xp) 277 | 278 | for i in xrange(sizeY): 279 | for j in xrange(sizeX): 280 | pos1 = (i*sizeX + j) * p 281 | pos2 = (i*sizeX + j) * pp 282 | 283 | newData[pos2 : pos2+2*xp] = np.sum(mapp['map'][pos1 + idx1], axis=1) * ny 284 | newData[pos2+2*xp : pos2+3*xp] = np.sum(mapp['map'][pos1 + idx2], axis=1) * ny 285 | newData[pos2+3*xp : pos2+3*xp+yp] = np.sum(mapp['map'][pos1 + idx3], axis=1) * nx ### 286 | 287 | ### 120x speedup 288 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 289 | idx01 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp)) 290 | idx02 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp,3*xp)).reshape((sizeX*sizeY*xp)) 291 | idx03 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(3*xp,3*xp+yp)).reshape((sizeX*sizeY*yp)) 292 | 293 | idx11 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp) 294 | idx12 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(xp)).reshape((sizeX*sizeY*xp, 1)) + np.arange(0, xp*yp, xp) 295 | idx13 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(0, 2*xp*yp, 2*xp)).reshape((sizeX*sizeY*yp, 1)) + np.arange(xp*yp, xp*yp+2*xp) 296 | 297 | newData[idx01] = np.sum(mapp['map'][idx11], axis=1) * ny 298 | newData[idx02] = np.sum(mapp['map'][idx12], axis=1) * ny 299 | newData[idx03] = np.sum(mapp['map'][idx13], axis=1) * nx ### 300 | ''' 301 | ### 190x speedup 302 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #with @jit 303 | ### 304 | 305 | mapp['numFeatures'] = pp 306 | mapp['map'] = newData 307 | 308 | return mapp 309 | -------------------------------------------------------------------------------- /kcftracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | import fhog 5 | 6 | # ffttools 7 | def fftd(img, backwards=False): 8 | # shape of img can be (m,n), (m,n,1) or (m,n,2) 9 | # in my test, fft provided by numpy and scipy are slower than cv2.dft 10 | return cv2.dft(np.float32(img), flags = ((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT)) # 'flags =' is necessary! 11 | 12 | def real(img): 13 | return img[:,:,0] 14 | 15 | def imag(img): 16 | return img[:,:,1] 17 | 18 | def complexMultiplication(a, b): 19 | res = np.zeros(a.shape, a.dtype) 20 | 21 | res[:,:,0] = a[:,:,0]*b[:,:,0] - a[:,:,1]*b[:,:,1] 22 | res[:,:,1] = a[:,:,0]*b[:,:,1] + a[:,:,1]*b[:,:,0] 23 | return res 24 | 25 | def complexDivision(a, b): 26 | res = np.zeros(a.shape, a.dtype) 27 | divisor = 1. / (b[:,:,0]**2 + b[:,:,1]**2) 28 | 29 | res[:,:,0] = (a[:,:,0]*b[:,:,0] + a[:,:,1]*b[:,:,1]) * divisor 30 | res[:,:,1] = (a[:,:,1]*b[:,:,0] + a[:,:,0]*b[:,:,1]) * divisor 31 | return res 32 | 33 | def rearrange(img): 34 | #return np.fft.fftshift(img, axes=(0,1)) 35 | assert(img.ndim==2) 36 | img_ = np.zeros(img.shape, img.dtype) 37 | xh, yh = img.shape[1]/2, img.shape[0]/2 38 | img_[0:yh,0:xh], img_[yh:img.shape[0],xh:img.shape[1]] = img[yh:img.shape[0],xh:img.shape[1]], img[0:yh,0:xh] 39 | img_[0:yh,xh:img.shape[1]], img_[yh:img.shape[0],0:xh] = img[yh:img.shape[0],0:xh], img[0:yh,xh:img.shape[1]] 40 | return img_ 41 | 42 | 43 | # recttools 44 | def x2(rect): 45 | return rect[0] + rect[2] 46 | 47 | def y2(rect): 48 | return rect[1] + rect[3] 49 | 50 | def limit(rect, limit): 51 | if(rect[0]+rect[2] > limit[0]+limit[2]): 52 | rect[2] = limit[0]+limit[2]-rect[0] 53 | if(rect[1]+rect[3] > limit[1]+limit[3]): 54 | rect[3] = limit[1]+limit[3]-rect[1] 55 | if(rect[0] < limit[0]): 56 | rect[2] -= (limit[0]-rect[0]) 57 | rect[0] = limit[0] 58 | if(rect[1] < limit[1]): 59 | rect[3] -= (limit[1]-rect[1]) 60 | rect[1] = limit[1] 61 | if(rect[2] < 0): 62 | rect[2] = 0 63 | if(rect[3] < 0): 64 | rect[3] = 0 65 | return rect 66 | 67 | def getBorder(original, limited): 68 | res = [0,0,0,0] 69 | res[0] = limited[0] - original[0] 70 | res[1] = limited[1] - original[1] 71 | res[2] = x2(original) - x2(limited) 72 | res[3] = y2(original) - y2(limited) 73 | assert(np.all(np.array(res) >= 0)) 74 | return res 75 | 76 | def subwindow(img, window, borderType=cv2.BORDER_CONSTANT): 77 | cutWindow = [x for x in window] 78 | limit(cutWindow, [0,0,img.shape[1],img.shape[0]]) # modify cutWindow 79 | assert(cutWindow[2]>0 and cutWindow[3]>0) 80 | border = getBorder(window, cutWindow) 81 | res = img[cutWindow[1]:cutWindow[1]+cutWindow[3], cutWindow[0]:cutWindow[0]+cutWindow[2]] 82 | 83 | if(border != [0,0,0,0]): 84 | res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType) 85 | return res 86 | 87 | 88 | 89 | # KCF tracker 90 | class KCFTracker: 91 | def __init__(self, hog=False, fixed_window=True, multiscale=False): 92 | self.lambdar = 0.0001 # regularization 93 | self.padding = 2.5 # extra area surrounding the target 94 | self.output_sigma_factor = 0.125 # bandwidth of gaussian target 95 | 96 | if(hog): # HOG feature 97 | # VOT 98 | self.interp_factor = 0.012 # linear interpolation factor for adaptation 99 | self.sigma = 0.6 # gaussian kernel bandwidth 100 | # TPAMI #interp_factor = 0.02 #sigma = 0.5 101 | self.cell_size = 4 # HOG cell size 102 | self._hogfeatures = True 103 | else: # raw gray-scale image # aka CSK tracker 104 | self.interp_factor = 0.075 105 | self.sigma = 0.2 106 | self.cell_size = 1 107 | self._hogfeatures = False 108 | 109 | if(multiscale): 110 | self.template_size = 96 # template size 111 | self.scale_step = 1.05 # scale step for multi-scale estimation 112 | self.scale_weight = 0.96 # to downweight detection scores of other scales for added stability 113 | elif(fixed_window): 114 | self.template_size = 96 115 | self.scale_step = 1 116 | else: 117 | self.template_size = 1 118 | self.scale_step = 1 119 | 120 | self._tmpl_sz = [0,0] # cv::Size, [width,height] #[int,int] 121 | self._roi = [0.,0.,0.,0.] # cv::Rect2f, [x,y,width,height] #[float,float,float,float] 122 | self.size_patch = [0,0,0] #[int,int,int] 123 | self._scale = 1. # float 124 | self._alphaf = None # numpy.ndarray (size_patch[0], size_patch[1], 2) 125 | self._prob = None # numpy.ndarray (size_patch[0], size_patch[1], 2) 126 | self._tmpl = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1]) 127 | self.hann = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1]) 128 | 129 | def subPixelPeak(self, left, center, right): 130 | divisor = 2*center - right - left #float 131 | return (0 if abs(divisor)<1e-3 else 0.5*(right-left)/divisor) 132 | 133 | def createHanningMats(self): 134 | hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]] 135 | 136 | hann1t = 0.5 * (1 - np.cos(2*np.pi*hann1t/(self.size_patch[1]-1))) 137 | hann2t = 0.5 * (1 - np.cos(2*np.pi*hann2t/(self.size_patch[0]-1))) 138 | hann2d = hann2t * hann1t 139 | 140 | if(self._hogfeatures): 141 | hann1d = hann2d.reshape(self.size_patch[0]*self.size_patch[1]) 142 | self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d 143 | else: 144 | self.hann = hann2d 145 | self.hann = self.hann.astype(np.float32) 146 | 147 | def createGaussianPeak(self, sizey, sizex): 148 | syh, sxh = sizey/2, sizex/2 149 | output_sigma = np.sqrt(sizex*sizey) / self.padding * self.output_sigma_factor 150 | mult = -0.5 / (output_sigma*output_sigma) 151 | y, x = np.ogrid[0:sizey, 0:sizex] 152 | y, x = (y-syh)**2, (x-sxh)**2 153 | res = np.exp(mult * (y+x)) 154 | return fftd(res) 155 | 156 | def gaussianCorrelation(self, x1, x2): 157 | if(self._hogfeatures): 158 | c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32) 159 | for i in xrange(self.size_patch[2]): 160 | x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1])) 161 | x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1])) 162 | caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB = True) 163 | caux = real(fftd(caux, True)) 164 | #caux = rearrange(caux) 165 | c += caux 166 | c = rearrange(c) 167 | else: 168 | c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB = True) # 'conjB=' is necessary! 169 | c = fftd(c, True) 170 | c = real(c) 171 | c = rearrange(c) 172 | 173 | if(x1.ndim==3 and x2.ndim==3): 174 | d = (np.sum(x1[:,:,0]*x1[:,:,0]) + np.sum(x2[:,:,0]*x2[:,:,0]) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2]) 175 | elif(x1.ndim==2 and x2.ndim==2): 176 | d = (np.sum(x1*x1) + np.sum(x2*x2) - 2.0*c) / (self.size_patch[0]*self.size_patch[1]*self.size_patch[2]) 177 | 178 | d = d * (d>=0) 179 | d = np.exp(-d / (self.sigma*self.sigma)) 180 | 181 | return d 182 | 183 | def getFeatures(self, image, inithann, scale_adjust=1.0): 184 | extracted_roi = [0,0,0,0] #[int,int,int,int] 185 | cx = self._roi[0] + self._roi[2]/2 #float 186 | cy = self._roi[1] + self._roi[3]/2 #float 187 | 188 | if(inithann): 189 | padded_w = self._roi[2] * self.padding 190 | padded_h = self._roi[3] * self.padding 191 | 192 | if(self.template_size > 1): 193 | if(padded_w >= padded_h): 194 | self._scale = padded_w / float(self.template_size) 195 | else: 196 | self._scale = padded_h / float(self.template_size) 197 | self._tmpl_sz[0] = int(padded_w / self._scale) 198 | self._tmpl_sz[1] = int(padded_h / self._scale) 199 | else: 200 | self._tmpl_sz[0] = int(padded_w) 201 | self._tmpl_sz[1] = int(padded_h) 202 | self._scale = 1. 203 | 204 | if(self._hogfeatures): 205 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) / (2*self.cell_size) * 2*self.cell_size + 2*self.cell_size 206 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) / (2*self.cell_size) * 2*self.cell_size + 2*self.cell_size 207 | else: 208 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) / 2 * 2 209 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) / 2 * 2 210 | 211 | extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0]) 212 | extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1]) 213 | extracted_roi[0] = int(cx - extracted_roi[2]/2) 214 | extracted_roi[1] = int(cy - extracted_roi[3]/2) 215 | 216 | z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE) 217 | if(z.shape[1]!=self._tmpl_sz[0] or z.shape[0]!=self._tmpl_sz[1]): 218 | z = cv2.resize(z, tuple(self._tmpl_sz)) 219 | 220 | if(self._hogfeatures): 221 | mapp = {'sizeX':0, 'sizeY':0, 'numFeatures':0, 'map':0} 222 | mapp = fhog.getFeatureMaps(z, self.cell_size, mapp) 223 | mapp = fhog.normalizeAndTruncate(mapp, 0.2) 224 | mapp = fhog.PCAFeatureMaps(mapp) 225 | self.size_patch = map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']]) 226 | FeaturesMap = mapp['map'].reshape((self.size_patch[0]*self.size_patch[1], self.size_patch[2])).T # (size_patch[2], size_patch[0]*size_patch[1]) 227 | else: 228 | if(z.ndim==3 and z.shape[2]==3): 229 | FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY) # z:(size_patch[0], size_patch[1], 3) FeaturesMap:(size_patch[0], size_patch[1]) #np.int8 #0~255 230 | elif(z.ndim==2): 231 | FeaturesMap = z #(size_patch[0], size_patch[1]) #np.int8 #0~255 232 | FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5 233 | self.size_patch = [z.shape[0], z.shape[1], 1] 234 | 235 | if(inithann): 236 | self.createHanningMats() # createHanningMats need size_patch 237 | 238 | FeaturesMap = self.hann * FeaturesMap 239 | return FeaturesMap 240 | 241 | def detect(self, z, x): 242 | k = self.gaussianCorrelation(x, z) 243 | res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True)) 244 | 245 | _, pv, _, pi = cv2.minMaxLoc(res) # pv:float pi:tuple of int 246 | p = [float(pi[0]), float(pi[1])] # cv::Point2f, [x,y] #[float,float] 247 | 248 | if(pi[0]>0 and pi[0]0 and pi[1]0 and roi[3]>0) 269 | self._tmpl = self.getFeatures(image, 1) 270 | self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1]) 271 | self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32) 272 | self.train(self._tmpl, 1.0) 273 | 274 | def update(self, image): 275 | if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 1 276 | if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[2] + 1 277 | if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 2 278 | if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 2 279 | 280 | cx = self._roi[0] + self._roi[2]/2. 281 | cy = self._roi[1] + self._roi[3]/2. 282 | 283 | loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0)) 284 | 285 | if(self.scale_step != 1): 286 | # Test at a smaller _scale 287 | new_loc1, new_peak_value1 = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0/self.scale_step)) 288 | # Test at a bigger _scale 289 | new_loc2, new_peak_value2 = self.detect(self._tmpl, self.getFeatures(image, 0, self.scale_step)) 290 | 291 | if(self.scale_weight*new_peak_value1 > peak_value and new_peak_value1>new_peak_value2): 292 | loc = new_loc1 293 | peak_value = new_peak_value1 294 | self._scale /= self.scale_step 295 | self._roi[2] /= self.scale_step 296 | self._roi[3] /= self.scale_step 297 | elif(self.scale_weight*new_peak_value2 > peak_value): 298 | loc = new_loc2 299 | peak_value = new_peak_value2 300 | self._scale *= self.scale_step 301 | self._roi[2] *= self.scale_step 302 | self._roi[3] *= self.scale_step 303 | 304 | self._roi[0] = cx - self._roi[2]/2.0 + loc[0]*self.cell_size*self._scale 305 | self._roi[1] = cy - self._roi[3]/2.0 + loc[1]*self.cell_size*self._scale 306 | 307 | if(self._roi[0] >= image.shape[1]-1): self._roi[0] = image.shape[1] - 1 308 | if(self._roi[1] >= image.shape[0]-1): self._roi[1] = image.shape[0] - 1 309 | if(self._roi[0]+self._roi[2] <= 0): self._roi[0] = -self._roi[2] + 2 310 | if(self._roi[1]+self._roi[3] <= 0): self._roi[1] = -self._roi[3] + 2 311 | assert(self._roi[2]>0 and self._roi[3]>0) 312 | 313 | x = self.getFeatures(image, 0, 1.0) 314 | self.train(x, self.interp_factor) 315 | 316 | return self._roi 317 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | from time import time 5 | 6 | import kcftracker 7 | 8 | selectingObject = False 9 | initTracking = False 10 | onTracking = False 11 | ix, iy, cx, cy = -1, -1, -1, -1 12 | w, h = 0, 0 13 | 14 | inteval = 1 15 | duration = 0.01 16 | 17 | # mouse callback function 18 | def draw_boundingbox(event, x, y, flags, param): 19 | global selectingObject, initTracking, onTracking, ix, iy, cx,cy, w, h 20 | 21 | if event == cv2.EVENT_LBUTTONDOWN: 22 | selectingObject = True 23 | onTracking = False 24 | ix, iy = x, y 25 | cx, cy = x, y 26 | 27 | elif event == cv2.EVENT_MOUSEMOVE: 28 | cx, cy = x, y 29 | 30 | elif event == cv2.EVENT_LBUTTONUP: 31 | selectingObject = False 32 | if(abs(x-ix)>10 and abs(y-iy)>10): 33 | w, h = abs(x - ix), abs(y - iy) 34 | ix, iy = min(x, ix), min(y, iy) 35 | initTracking = True 36 | else: 37 | onTracking = False 38 | 39 | elif event == cv2.EVENT_RBUTTONDOWN: 40 | onTracking = False 41 | if(w>0): 42 | ix, iy = x-w/2, y-h/2 43 | initTracking = True 44 | 45 | 46 | 47 | if __name__ == '__main__': 48 | 49 | if(len(sys.argv)==1): 50 | cap = cv2.VideoCapture(0) 51 | elif(len(sys.argv)==2): 52 | if(sys.argv[1].isdigit()): # True if sys.argv[1] is str of a nonnegative integer 53 | cap = cv2.VideoCapture(int(sys.argv[1])) 54 | else: 55 | cap = cv2.VideoCapture(sys.argv[1]) 56 | inteval = 30 57 | else: assert(0), "too many arguments" 58 | 59 | tracker = kcftracker.KCFTracker(True, True, True) # hog, fixed_window, multiscale 60 | #if you use hog feature, there will be a short pause after you draw a first boundingbox, that is due to the use of Numba. 61 | 62 | cv2.namedWindow('tracking') 63 | cv2.setMouseCallback('tracking',draw_boundingbox) 64 | 65 | while(cap.isOpened()): 66 | ret, frame = cap.read() 67 | if not ret: 68 | break 69 | 70 | if(selectingObject): 71 | cv2.rectangle(frame,(ix,iy), (cx,cy), (0,255,255), 1) 72 | elif(initTracking): 73 | cv2.rectangle(frame,(ix,iy), (ix+w,iy+h), (0,255,255), 2) 74 | 75 | tracker.init([ix,iy,w,h], frame) 76 | 77 | initTracking = False 78 | onTracking = True 79 | elif(onTracking): 80 | t0 = time() 81 | boundingbox = tracker.update(frame) 82 | t1 = time() 83 | 84 | boundingbox = map(int, boundingbox) 85 | cv2.rectangle(frame,(boundingbox[0],boundingbox[1]), (boundingbox[0]+boundingbox[2],boundingbox[1]+boundingbox[3]), (0,255,255), 1) 86 | 87 | duration = 0.8*duration + 0.2*(t1-t0) 88 | #duration = t1-t0 89 | cv2.putText(frame, 'FPS: '+str(1/duration)[:4].strip('.'), (8,20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2) 90 | 91 | cv2.imshow('tracking', frame) 92 | c = cv2.waitKey(inteval) & 0xFF 93 | if c==27 or c==ord('q'): 94 | break 95 | 96 | cap.release() 97 | cv2.destroyAllWindows() 98 | --------------------------------------------------------------------------------