├── .gitignore ├── LICENSE ├── run.py ├── README.md ├── fhog.py └── tracker.py /.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__ 2 | /.vscode 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ryan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from tracker import KCFTracker 3 | 4 | def tracker(cam, frame, bbox): 5 | tracker = KCFTracker(True, True, True) # (hog, fixed_Window, multi_scale) 6 | tracker.init(bbox, frame) 7 | 8 | while True: 9 | ok, frame = cam.read() 10 | 11 | timer = cv2.getTickCount() 12 | bbox = tracker.update(frame) 13 | bbox = list(map(int, bbox)) 14 | fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) 15 | 16 | # Tracking success 17 | p1 = (int(bbox[0]), int(bbox[1])) 18 | p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])) 19 | cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1) 20 | 21 | # Put FPS 22 | cv2.putText(frame, "FPS : " + str(int(fps)), (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2) 23 | 24 | cv2.imshow("Tracking", frame) 25 | 26 | # Exit if ESC pressed 27 | k = cv2.waitKey(1) & 0xff 28 | if k == 27: 29 | break 30 | 31 | cam.release() 32 | cv2.destroyAllWindows() 33 | 34 | 35 | if __name__ == '__main__': 36 | video = cv2.VideoCapture(0) 37 | # ok, frame = video.read() 38 | ok, frame = video.read() 39 | bbox = cv2.selectROI('Select ROI', frame, False) 40 | 41 | if min(bbox) == 0: exit(0) 42 | tracker(video, frame, bbox) 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KCF-DSST-py 2 | Python implementation of DSST tracking algorithm based on KCF tracker. 3 | 4 | In [Baseline 3], the DSST scale estimation algorithm is added to the original KCF Tracker. Based on the python implementation of KCF Tracker, see [Baseline 2], the code of DSST is translated from C++ and added to the KCF in python. 5 | 6 | ## Requirements 7 | - Python 2.7 (or 3) 8 | - NumPy 9 | - Numba (needed if you want to use the hog feature) 10 | - OpenCV (ensure that you can import cv2 in python) 11 | 12 | ## Baseline 13 | Some implementations of KCF and DSST algorithms. 14 | 15 | ### 1. KCF Tracker in C++ 16 | [C++ KCF Tracker](https://github.com/joaofaro/KCFcpp): Original C++ implementation of Kernelized Correlation Filter (KCF) [1, 2]. 17 | 18 | ### 2. KCF Tracker in Python 19 | [KCF tracker in Python](https://github.com/uoip/KCFpy): Python implementation of KCF Tracker. 20 | 21 | ### 3. DSST Tracker in C++ 22 | [KCF-DSST](https://github.com/liliumao/KCF-DSST): C++ implementation of Discriminative Scale Space Tracker (DSST) [3]. 23 | 24 | ## Reference 25 | [1] J. F. Henriques, R. Caseiro, P. Martins, J. Batista, 26 | "High-Speed Tracking with Kernelized Correlation Filters", TPAMI 2015. 27 | 28 | [2] J. F. Henriques, R. Caseiro, P. Martins, J. Batista, 29 | "Exploiting the Circulant Structure of Tracking-by-detection with Kernels", ECCV 2012. 30 | 31 | [3] M. Danelljan, G. Häger, F. Shahbaz Khan, and M. Felsberg. "Accurate scale estimation for robust visual tracking". In Proceedings of the British Machine Vision Conference (BMVC), 2014. 32 | -------------------------------------------------------------------------------- /fhog.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from numba import jit 4 | 5 | import sys 6 | PY3 = sys.version_info >= (3,) 7 | 8 | if PY3: 9 | xrange = range 10 | 11 | # constant 12 | NUM_SECTOR = 9 13 | FLT_EPSILON = 1e-07 14 | 15 | 16 | @jit(cache=True) 17 | def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels): 18 | r = np.zeros((height, width), np.float32) 19 | alfa = np.zeros((height, width, 2), np.int32) 20 | 21 | for j in xrange(1, height-1): 22 | for i in xrange(1, width-1): 23 | c = 0 24 | x = dx[j, i, c] 25 | y = dy[j, i, c] 26 | r[j, i] = np.sqrt(x*x + y*y) 27 | 28 | for ch in xrange(1, numChannels): 29 | tx = dx[j, i, ch] 30 | ty = dy[j, i, ch] 31 | magnitude = np.sqrt(tx*tx + ty*ty) 32 | if(magnitude > r[j, i]): 33 | r[j, i] = magnitude 34 | c = ch 35 | x = tx 36 | y = ty 37 | 38 | mmax = boundary_x[0]*x + boundary_y[0]*y 39 | maxi = 0 40 | 41 | for kk in xrange(0, NUM_SECTOR): 42 | dotProd = boundary_x[kk]*x + boundary_y[kk]*y 43 | if(dotProd > mmax): 44 | mmax = dotProd 45 | maxi = kk 46 | elif(-dotProd > mmax): 47 | mmax = -dotProd 48 | maxi = kk + NUM_SECTOR 49 | 50 | alfa[j, i, 0] = maxi % NUM_SECTOR 51 | alfa[j, i, 1] = maxi 52 | return r, alfa 53 | 54 | @jit(cache=True) 55 | def func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize): 56 | mapp = np.zeros((sizeX*sizeY*p), np.float32) 57 | for i in xrange(sizeY): 58 | for j in xrange(sizeX): 59 | for ii in xrange(k): 60 | for jj in xrange(k): 61 | if((i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width - 1)): 62 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0] 63 | mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0] 64 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1)): 65 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0] 66 | mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0] 67 | if((j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)): 68 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1] 69 | mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1] 70 | if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)): 71 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1] 72 | mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1] 73 | return mapp 74 | 75 | @jit(cache=True) 76 | def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp): 77 | newData = np.zeros((sizeY*sizeX*pp), np.float32) 78 | for i in xrange(1, sizeY+1): 79 | for j in xrange(1, sizeX+1): 80 | pos1 = i * (sizeX+2) * xp + j * xp 81 | pos2 = (i-1) * sizeX * pp + (j-1) * pp 82 | 83 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 84 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] + 85 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] + 86 | partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON 87 | newData[pos2:pos2+p] = mappmap[pos1:pos1+p] / valOfNorm 88 | newData[pos2+4*p:pos2+6*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 89 | 90 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 91 | partOfNorm[(i )*(sizeX + 2) + (j + 1)] + 92 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] + 93 | partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON 94 | newData[pos2+p:pos2+2*p] = mappmap[pos1:pos1+p] / valOfNorm 95 | newData[pos2+6*p:pos2+8*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 96 | 97 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 98 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] + 99 | partOfNorm[(i + 1)*(sizeX + 2) + (j )] + 100 | partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON 101 | newData[pos2+2*p:pos2+3*p] = mappmap[pos1:pos1+p] / valOfNorm 102 | newData[pos2+8*p:pos2+10*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 103 | 104 | valOfNorm = np.sqrt(partOfNorm[(i )*(sizeX + 2) + (j )] + 105 | partOfNorm[(i )*(sizeX + 2) + (j - 1)] + 106 | partOfNorm[(i - 1)*(sizeX + 2) + (j )] + 107 | partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON 108 | newData[pos2+3*p:pos2+4*p] = mappmap[pos1:pos1+p] / valOfNorm 109 | newData[pos2+10*p:pos2+12*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm 110 | return newData 111 | 112 | @jit(cache=True) 113 | def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny): 114 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 115 | for i in xrange(sizeY): 116 | for j in xrange(sizeX): 117 | pos1 = (i*sizeX + j) * p 118 | pos2 = (i*sizeX + j) * pp 119 | 120 | for jj in xrange(2 * xp): # 2*9 121 | newData[pos2 + jj] = np.sum(mappmap[pos1 + yp*xp + jj : pos1 + 3*yp*xp + jj : 2*xp]) * ny 122 | for jj in xrange(xp): # 9 123 | newData[pos2 + 2*xp + jj] = np.sum(mappmap[pos1 + jj : pos1 + jj + yp*xp : xp]) * ny 124 | for ii in xrange(yp): # 4 125 | newData[pos2 + 3*xp + ii] = np.sum(mappmap[pos1 + yp*xp + ii*xp*2 : pos1 + yp*xp + ii*xp*2 + 2*xp]) * nx 126 | return newData 127 | 128 | 129 | 130 | def getFeatureMaps(image, k, mapp): 131 | kernel = np.array([[-1., 0., 1.]], np.float32) 132 | 133 | height = image.shape[0] 134 | width = image.shape[1] 135 | assert(image.ndim==3 and image.shape[2]) 136 | numChannels = 3 #(1 if image.ndim==2 else image.shape[2]) 137 | 138 | sizeX = width // k 139 | sizeY = height // k 140 | px = 3 * NUM_SECTOR 141 | p = px 142 | stringSize = sizeX * p 143 | 144 | mapp['sizeX'] = sizeX 145 | mapp['sizeY'] = sizeY 146 | mapp['numFeatures'] = p 147 | mapp['map'] = np.zeros((mapp['sizeX']*mapp['sizeY']*mapp['numFeatures']), np.float32) 148 | 149 | dx = cv2.filter2D(np.float32(image), -1, kernel) # np.float32(...) is necessary 150 | dy = cv2.filter2D(np.float32(image), -1, kernel.T) 151 | 152 | arg_vector = np.arange(NUM_SECTOR+1).astype(np.float32) * np.pi / NUM_SECTOR 153 | boundary_x = np.cos(arg_vector) 154 | boundary_y = np.sin(arg_vector) 155 | 156 | ''' 157 | ### original implementation 158 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #func1 without @jit ### 159 | ### 40x speedup 160 | magnitude = np.sqrt(dx**2 + dy**2) 161 | r = np.max(magnitude, axis=2) 162 | c = np.argmax(magnitude, axis=2) 163 | idx = (np.arange(c.shape[0])[:,np.newaxis], np.arange(c.shape[1]), c) 164 | x, y = dx[idx], dy[idx] 165 | dotProd = x[:,:,np.newaxis] * boundary_x[np.newaxis,np.newaxis,:] + y[:,:,np.newaxis] * boundary_y[np.newaxis,np.newaxis,:] 166 | dotProd = np.concatenate((dotProd, -dotProd), axis=2) 167 | maxi = np.argmax(dotProd, axis=2) 168 | alfa = np.dstack((maxi % NUM_SECTOR, maxi)) ### 169 | ''' 170 | ### 200x speedup 171 | r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #with @jit 172 | ### ~0.001s 173 | 174 | nearest = np.ones((k), np.int) 175 | nearest[0:k//2] = -1 176 | 177 | w = np.zeros((k, 2), np.float32) 178 | a_x = np.concatenate((k/2 - np.arange(k/2) - 0.5, np.arange(k/2,k) - k/2 + 0.5)).astype(np.float32) 179 | b_x = np.concatenate((k/2 + np.arange(k/2) + 0.5, -np.arange(k/2,k) + k/2 - 0.5 + k)).astype(np.float32) 180 | w[:, 0] = 1.0 / a_x * ((a_x*b_x) / (a_x+b_x)) 181 | w[:, 1] = 1.0 / b_x * ((a_x*b_x) / (a_x+b_x)) 182 | 183 | ''' 184 | ### original implementation 185 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #func2 without @jit ### 186 | ''' 187 | ### 500x speedup 188 | mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #with @jit 189 | ### ~0.001s 190 | 191 | return mapp 192 | 193 | 194 | def normalizeAndTruncate(mapp, alfa): 195 | sizeX = mapp['sizeX'] 196 | sizeY = mapp['sizeY'] 197 | 198 | p = NUM_SECTOR 199 | xp = NUM_SECTOR * 3 200 | pp = NUM_SECTOR * 12 201 | 202 | ''' 203 | ### original implementation 204 | partOfNorm = np.zeros((sizeY*sizeX), np.float32) 205 | for i in xrange(sizeX*sizeY): 206 | pos = i * mapp['numFeatures'] 207 | partOfNorm[i] = np.sum(mapp['map'][pos:pos+p]**2) ### 208 | ''' 209 | ### 50x speedup 210 | idx = np.arange(0, sizeX*sizeY*mapp['numFeatures'], mapp['numFeatures']).reshape((sizeX*sizeY, 1)) + np.arange(p) 211 | partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1) ### ~0.0002s 212 | 213 | sizeX, sizeY = sizeX-2, sizeY-2 214 | 215 | 216 | ''' 217 | ### original implementation 218 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #func3 without @jit ### 219 | 220 | ### 30x speedup 221 | newData = np.zeros((sizeY*sizeX*pp), np.float32) 222 | idx = (np.arange(1,sizeY+1)[:,np.newaxis] * (sizeX+2) + np.arange(1,sizeX+1)).reshape((sizeY*sizeX, 1)) # much faster than it's List Comprehension counterpart (see next line) 223 | #idx = np.array([[i*(sizeX+2) + j] for i in xrange(1,sizeY+1) for j in xrange(1,sizeX+1)]) 224 | pos1 = idx * xp 225 | pos2 = np.arange(sizeY*sizeX)[:,np.newaxis] * pp 226 | 227 | valOfNorm1 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2+1]) + FLT_EPSILON 228 | valOfNorm2 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2+1]) + FLT_EPSILON 229 | valOfNorm3 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2-1]) + FLT_EPSILON 230 | valOfNorm4 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2-1]) + FLT_EPSILON 231 | map1 = mapp['map'][pos1 + np.arange(p)] 232 | map2 = mapp['map'][pos1 + np.arange(p,3*p)] 233 | newData[pos2 + np.arange(p)] = map1 / valOfNorm1 234 | newData[pos2 + np.arange(4*p,6*p)] = map2 / valOfNorm1 235 | newData[pos2 + np.arange(p,2*p)] = map1 / valOfNorm2 236 | newData[pos2 + np.arange(6*p,8*p)] = map2 / valOfNorm2 237 | newData[pos2 + np.arange(2*p,3*p)] = map1 / valOfNorm3 238 | newData[pos2 + np.arange(8*p,10*p)] = map2 / valOfNorm3 239 | newData[pos2 + np.arange(3*p,4*p)] = map1 / valOfNorm4 240 | newData[pos2 + np.arange(10*p,12*p)] = map2 / valOfNorm4 ### 241 | ''' 242 | ### 30x speedup 243 | newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #with @jit 244 | ### 245 | 246 | # truncation 247 | newData[newData > alfa] = alfa 248 | 249 | mapp['numFeatures'] = pp 250 | mapp['sizeX'] = sizeX 251 | mapp['sizeY'] = sizeY 252 | mapp['map'] = newData 253 | 254 | return mapp 255 | 256 | 257 | def PCAFeatureMaps(mapp): 258 | sizeX = mapp['sizeX'] 259 | sizeY = mapp['sizeY'] 260 | 261 | p = mapp['numFeatures'] 262 | pp = NUM_SECTOR * 3 + 4 263 | yp = 4 264 | xp = NUM_SECTOR 265 | 266 | nx = 1.0 / np.sqrt(xp*2) 267 | ny = 1.0 / np.sqrt(yp) 268 | 269 | ''' 270 | ### original implementation 271 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #func without @jit ### 272 | ### 7.5x speedup 273 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 274 | idx1 = np.arange(2*xp).reshape((2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp) 275 | idx2 = np.arange(xp).reshape((xp, 1)) + np.arange(0, xp*yp, xp) 276 | idx3 = np.arange(0, 2*xp*yp, 2*xp).reshape((yp, 1)) + np.arange(xp*yp, xp*yp+2*xp) 277 | for i in xrange(sizeY): 278 | for j in xrange(sizeX): 279 | pos1 = (i*sizeX + j) * p 280 | pos2 = (i*sizeX + j) * pp 281 | 282 | newData[pos2 : pos2+2*xp] = np.sum(mapp['map'][pos1 + idx1], axis=1) * ny 283 | newData[pos2+2*xp : pos2+3*xp] = np.sum(mapp['map'][pos1 + idx2], axis=1) * ny 284 | newData[pos2+3*xp : pos2+3*xp+yp] = np.sum(mapp['map'][pos1 + idx3], axis=1) * nx ### 285 | ### 120x speedup 286 | newData = np.zeros((sizeX*sizeY*pp), np.float32) 287 | idx01 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp)) 288 | idx02 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp,3*xp)).reshape((sizeX*sizeY*xp)) 289 | idx03 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(3*xp,3*xp+yp)).reshape((sizeX*sizeY*yp)) 290 | idx11 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp) 291 | idx12 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(xp)).reshape((sizeX*sizeY*xp, 1)) + np.arange(0, xp*yp, xp) 292 | idx13 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(0, 2*xp*yp, 2*xp)).reshape((sizeX*sizeY*yp, 1)) + np.arange(xp*yp, xp*yp+2*xp) 293 | newData[idx01] = np.sum(mapp['map'][idx11], axis=1) * ny 294 | newData[idx02] = np.sum(mapp['map'][idx12], axis=1) * ny 295 | newData[idx03] = np.sum(mapp['map'][idx13], axis=1) * nx ### 296 | ''' 297 | ### 190x speedup 298 | newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #with @jit 299 | ### 300 | 301 | mapp['numFeatures'] = pp 302 | mapp['map'] = newData 303 | 304 | return mapp 305 | -------------------------------------------------------------------------------- /tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import fhog 4 | 5 | import sys 6 | PY3 = sys.version_info >= (3,) 7 | 8 | if PY3: 9 | xrange = range 10 | 11 | 12 | # ffttools 13 | # 离散傅里叶变换、逆变换 14 | def fftd(img, backwards=False, byRow=False): 15 | # shape of img can be (m,n), (m,n,1) or (m,n,2) 16 | # in my test, fft provided by numpy and scipy are slower than cv2.dft 17 | # return cv2.dft(np.float32(img), flags=((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT)) # 'flags =' is necessary! 18 | # DFT_INVERSE: 用一维或二维逆变换取代默认的正向变换, 19 | # DFT_SCALE: 缩放比例标识符,根据数据元素个数平均求出其缩放结果,如有N个元素,则输出结果以1/N缩放输出,常与DFT_INVERSE搭配使用。 20 | # DFT_COMPLEX_OUTPUT: 对一维或二维的实数数组进行正向变换,这样的结果虽然是复数阵列,但拥有复数的共轭对称性 21 | 22 | if byRow: 23 | return cv2.dft(np.float32(img), flags=(cv2.DFT_ROWS | cv2.DFT_COMPLEX_OUTPUT)) 24 | else: 25 | return cv2.dft(np.float32(img), flags=((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT)) 26 | 27 | # 实部图像 28 | def real(img): 29 | return img[:, :, 0] 30 | 31 | # 虚部图像 32 | def imag(img): 33 | return img[:, :, 1] 34 | 35 | # 两个复数,它们的积 (a+bi)(c+di)=(ac-bd)+(ad+bc)i 36 | def complexMultiplication(a, b): 37 | res = np.zeros(a.shape, a.dtype) 38 | 39 | res[:, :, 0] = a[:, :, 0] * b[:, :, 0] - a[:, :, 1] * b[:, :, 1] 40 | res[:, :, 1] = a[:, :, 0] * b[:, :, 1] + a[:, :, 1] * b[:, :, 0] 41 | return res 42 | 43 | # 两个复数,它们相除 (a+bi)/(c+di)=(ac+bd)/(c*c+d*d) +((bc-ad)/(c*c+d*d))i 44 | def complexDivision(a, b): 45 | res = np.zeros(a.shape, a.dtype) 46 | divisor = 1. / (b[:, :, 0] ** 2 + b[:, :, 1] ** 2) 47 | 48 | res[:, :, 0] = (a[:, :, 0] * b[:, :, 0] + a[:, :, 1] * b[:, :, 1]) * divisor 49 | res[:, :, 1] = (a[:, :, 1] * b[:, :, 0] + a[:, :, 0] * b[:, :, 1]) * divisor 50 | return res 51 | 52 | def complexDivisionReal(a, b): 53 | res = np.zeros(a.shape, a.dtype) 54 | divisor = 1. / b 55 | 56 | res[:, :, 0] = a[:, :, 0] * divisor 57 | res[:, :, 1] = a[:, :, 1] * divisor 58 | return res 59 | 60 | # 可以将 FFT 输出中的直流分量移动到频谱的中央 61 | def rearrange(img): 62 | # return np.fft.fftshift(img, axes=(0,1)) 63 | 64 | assert (img.ndim == 2) # 断言,必须为真,否则抛出异常;ndim 为数组维数 65 | img_ = np.zeros(img.shape, img.dtype) 66 | xh, yh = img.shape[1] // 2, img.shape[0] // 2 # shape[0] 为行,shape[1] 为列 67 | img_[0:yh, 0:xh], img_[yh:img.shape[0], xh:img.shape[1]] = img[yh:img.shape[0], xh:img.shape[1]], img[0:yh, 0:xh] 68 | img_[0:yh, xh:img.shape[1]], img_[yh:img.shape[0], 0:xh] = img[yh:img.shape[0], 0:xh], img[0:yh, xh:img.shape[1]] 69 | return img_ 70 | 71 | 72 | 73 | # recttools 74 | # rect = {x, y, w, h} 75 | # x 右边界 76 | def x2(rect): 77 | return rect[0] + rect[2] 78 | 79 | # y 下边界 80 | def y2(rect): 81 | return rect[1] + rect[3] 82 | 83 | # 限宽、高 84 | def limit(rect, limit): 85 | if rect[0] + rect[2] > limit[0] + limit[2]: 86 | rect[2] = limit[0] + limit[2] - rect[0] 87 | if rect[1] + rect[3] > limit[1] + limit[3]: 88 | rect[3] = limit[1] + limit[3] - rect[1] 89 | if rect[0] < limit[0]: 90 | rect[2] -= (limit[0] - rect[0]) 91 | rect[0] = limit[0] 92 | if rect[1] < limit[1]: 93 | rect[3] -= (limit[1] - rect[1]) 94 | rect[1] = limit[1] 95 | if rect[2] < 0: 96 | rect[2] = 0 97 | if rect[3] < 0: 98 | rect[3] = 0 99 | return rect 100 | 101 | # 取超出来的边界 102 | def getBorder(original, limited): 103 | res = [0, 0, 0, 0] 104 | res[0] = limited[0] - original[0] 105 | res[1] = limited[1] - original[1] 106 | res[2] = x2(original) - x2(limited) 107 | res[3] = y2(original) - y2(limited) 108 | assert (np.all(np.array(res) >= 0)) 109 | return res 110 | 111 | # 经常需要空域或频域的滤波处理,在进入真正的处理程序前,需要考虑图像边界情况。 112 | # 通常的处理方法是为图像增加一定的边缘,以适应 卷积核 在原图像边界的操作。 113 | def subwindow(img, window, borderType=cv2.BORDER_CONSTANT): 114 | cutWindow = [x for x in window] 115 | limit(cutWindow, [0, 0, img.shape[1], img.shape[0]]) # modify cutWindow 116 | assert (cutWindow[2] > 0 and cutWindow[3] > 0) 117 | border = getBorder(window, cutWindow) 118 | res = img[cutWindow[1]:cutWindow[1] + cutWindow[3], cutWindow[0]:cutWindow[0] + cutWindow[2]] 119 | 120 | if (border != [0, 0, 0, 0]): 121 | res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType) 122 | return res 123 | 124 | def cutOutsize(num, limit): 125 | if num < 0: num = 0 126 | elif num > limit - 1: num = limit - 1 127 | return int(num) 128 | 129 | def extractImage(img, cx, cy, patch_width, patch_height): 130 | xs_s = np.floor(cx) - np.floor(patch_width / 2) 131 | xs_s = cutOutsize(xs_s, img.shape[1]) 132 | 133 | xs_e = np.floor(cx + patch_width - 1) - np.floor(patch_width / 2) 134 | xs_e = cutOutsize(xs_e, img.shape[1]) 135 | 136 | ys_s = np.floor(cy) - np.floor(patch_height / 2) 137 | ys_s = cutOutsize(ys_s, img.shape[0]) 138 | 139 | ys_e = np.floor(cy + patch_height - 1) - np.floor(patch_height / 2) 140 | ys_e = cutOutsize(ys_e, img.shape[0]) 141 | 142 | return img[ys_s:ys_e, xs_s:xs_e] 143 | 144 | 145 | 146 | # KCF tracker 147 | class KCFTracker: 148 | def __init__(self, hog=False, fixed_window=True, multi_scale=False): 149 | self.lambdar = 0.0001 # regularization; 正则化 150 | self.padding = 2.5 # extra area surrounding the target; 目标扩展出来的区域 151 | self.output_sigma_factor = 0.125 # bandwidth of gaussian target; 高斯目标的带宽 152 | 153 | self._multiscale = multi_scale 154 | if multi_scale: 155 | self.template_size = 96 # 模板大小,在计算_tmpl_sz时,较大边长被归一成96,而较小边长按比例缩小 156 | 157 | self.scale_padding = 1.0 158 | self.scale_step = 1.05 # default: 1.02,多尺度估计的时候的尺度步长 159 | self.scale_sigma_factor = 0.25 160 | self.n_scales = 33 # default: 33,尺度估计器样本数 161 | self.scale_lr = 0.025 162 | self.scale_max_area = 512 163 | self.scale_lambda = 0.01 164 | 165 | if hog == False: 166 | print('HOG feature is forced to turn on.') 167 | 168 | elif fixed_window: 169 | self.template_size = 96 170 | self.scale_step = 1 171 | else: 172 | self.template_size = 1 173 | self.scale_step = 1 174 | 175 | self._hogfeatures = True if hog or multi_scale else False 176 | if self._hogfeatures: # HOG feature 177 | # VOT 178 | self.interp_factor = 0.012 # linear interpolation factor for adaptation; 自适应的线性插值因子 179 | self.sigma = 0.6 # gaussian kernel bandwidth; 高斯卷积核带宽 180 | # TPAMI #interp_factor = 0.02 #sigma = 0.5 181 | self.cell_size = 4 # HOG cell size; HOG元胞数组尺寸 182 | 183 | print('Numba Compiler initializing, wait for a while.') 184 | 185 | else: # raw gray-scale image # aka CSK tracker 186 | self.interp_factor = 0.075 187 | self.sigma = 0.2 188 | self.cell_size = 1 189 | self._hogfeatures = False 190 | 191 | self._tmpl_sz = [0, 0] 192 | self._roi = [0., 0., 0., 0.] 193 | self.size_patch = [0, 0, 0] 194 | self._scale = 1. 195 | self._alphaf = None # numpy.ndarray (size_patch[0], size_patch[1], 2) 196 | self._prob = None # numpy.ndarray (size_patch[0], size_patch[1], 2) 197 | self._tmpl = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1]) 198 | self.hann = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1]) 199 | 200 | # Scale properties 201 | self.currentScaleFactor = 1 202 | self.base_width = 0 # initial ROI widt 203 | self.base_height = 0 # initial ROI height 204 | self.scaleFactors = None # all scale changing rate, from larger to smaller with 1 to be the middle 205 | self.scale_model_width = 0 # the model width for scaling 206 | self.scale_model_height = 0 # the model height for scaling 207 | self.min_scale_factor = 0. # min scaling rate 208 | self.max_scale_factor = 0. # max scaling rate 209 | 210 | # self._num = None 211 | # self._den = None 212 | 213 | self.sf_den = None 214 | self.sf_num = None 215 | 216 | self.s_hann = None 217 | self.ysf = None 218 | 219 | 220 | ################# 221 | ### 位置估计器 ### 222 | ################# 223 | 224 | # 计算一维亚像素峰值 225 | def subPixelPeak(self, left, center, right): 226 | divisor = 2 * center - right - left # float 227 | return (0 if abs(divisor) < 1e-3 else 0.5 * (right - left) / divisor) 228 | 229 | # 初始化hanning窗口,函数只在第一帧被执行 230 | # 目的是采样时为不同的样本分配不同的权重,0.5*0.5 是用汉宁窗归一化[0,1],得到矩阵的值就是每样样本的权重 231 | def createHanningMats(self): 232 | hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]] 233 | 234 | hann1t = 0.5 * (1 - np.cos(2 * np.pi * hann1t / (self.size_patch[1] - 1))) 235 | hann2t = 0.5 * (1 - np.cos(2 * np.pi * hann2t / (self.size_patch[0] - 1))) 236 | hann2d = hann2t * hann1t 237 | 238 | if self._hogfeatures: 239 | hann1d = hann2d.reshape(self.size_patch[0] * self.size_patch[1]) 240 | self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d 241 | #相当于把1D汉宁窗复制成多个通道 242 | else: 243 | self.hann = hann2d 244 | 245 | self.hann = self.hann.astype(np.float32) 246 | 247 | # 创建高斯峰函数,函数只在第一帧的时候执行(高斯响应) 248 | def createGaussianPeak(self, sizey, sizex): 249 | syh, sxh = sizey / 2, sizex / 2 250 | output_sigma = np.sqrt(sizex * sizey) / self.padding * self.output_sigma_factor 251 | mult = -0.5 / (output_sigma * output_sigma) 252 | y, x = np.ogrid[0:sizey, 0:sizex] 253 | y, x = (y - syh) ** 2, (x - sxh) ** 2 254 | res = np.exp(mult * (y + x)) 255 | return fftd(res) 256 | 257 | # 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移 258 | # 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理) 259 | def gaussianCorrelation(self, x1, x2): 260 | if self._hogfeatures: 261 | c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32) 262 | for i in xrange(self.size_patch[2]): 263 | x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1])) 264 | x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1])) 265 | caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB=True) 266 | caux = real(fftd(caux, True)) 267 | # caux = rearrange(caux) 268 | c += caux 269 | c = rearrange(c) 270 | else: 271 | # 'conjB=' is necessary!在做乘法之前取第二个输入数组的共轭. 272 | c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB=True) 273 | c = fftd(c, True) 274 | c = real(c) 275 | c = rearrange(c) 276 | 277 | if x1.ndim == 3 and x2.ndim == 3: 278 | d = (np.sum(x1[:, :, 0] * x1[:, :, 0]) + np.sum(x2[:, :, 0] * x2[:, :, 0]) - 2.0 * c) / ( 279 | self.size_patch[0] * self.size_patch[1] * self.size_patch[2]) 280 | elif x1.ndim == 2 and x2.ndim == 2: 281 | d = (np.sum(x1 * x1) + np.sum(x2 * x2) - 2.0 * c) / ( 282 | self.size_patch[0] * self.size_patch[1] * self.size_patch[2]) 283 | 284 | d = d * (d >= 0) 285 | d = np.exp(-d / (self.sigma * self.sigma)) 286 | 287 | return d 288 | 289 | # 使用第一帧和它的跟踪框,初始化KCF跟踪器 290 | def init(self, roi, image): 291 | self._roi = list(map(float,roi)) 292 | assert (roi[2] > 0 and roi[3] > 0) 293 | 294 | # _tmpl是截取的特征的加权平均 295 | self._tmpl = self.getFeatures(image, 1) 296 | # _prob是初始化时的高斯响应图 297 | self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1]) 298 | # _alphaf是频域中的相关滤波模板,有两个通道分别实部虚部 299 | self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32) 300 | 301 | if self._multiscale: 302 | self.dsstInit(self._roi, image) 303 | 304 | self.train(self._tmpl, 1.0) 305 | 306 | # 从图像得到子窗口,通过赋值填充并检测特征 307 | def getFeatures(self, image, inithann, scale_adjust=1.): 308 | extracted_roi = [0, 0, 0, 0] 309 | cx = self._roi[0] + self._roi[2] / 2 310 | cy = self._roi[1] + self._roi[3] / 2 311 | 312 | if inithann: 313 | padded_w = self._roi[2] * self.padding 314 | padded_h = self._roi[3] * self.padding 315 | 316 | if self.template_size > 1: 317 | # 把最大的边缩小到96,_scale是缩小比例 318 | # _tmpl_sz是滤波模板的大小也是裁剪下的PATCH大小 319 | if padded_w >= padded_h: 320 | self._scale = padded_w / float(self.template_size) 321 | else: 322 | self._scale = padded_h / float(self.template_size) 323 | self._tmpl_sz[0] = int(padded_w / self._scale) 324 | self._tmpl_sz[1] = int(padded_h / self._scale) 325 | else: 326 | self._tmpl_sz[0] = int(padded_w) 327 | self._tmpl_sz[1] = int(padded_h) 328 | self._scale = 1. 329 | 330 | if self._hogfeatures: 331 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) // (2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size 332 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) // (2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size 333 | else: 334 | self._tmpl_sz[0] = int(self._tmpl_sz[0]) // 2 * 2 335 | self._tmpl_sz[1] = int(self._tmpl_sz[1]) // 2 * 2 336 | 337 | # 选取从原图中扣下的图片位置大小 338 | extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0] * self.currentScaleFactor) 339 | extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1] * self.currentScaleFactor) 340 | 341 | extracted_roi[0] = int(cx - extracted_roi[2] / 2) 342 | extracted_roi[1] = int(cy - extracted_roi[3] / 2) 343 | 344 | # z是当前帧被裁剪下的搜索区域 345 | z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE) 346 | if z.shape[1] != self._tmpl_sz[0] or z.shape[0] != self._tmpl_sz[1]: # 缩小到96 347 | z = cv2.resize(z, tuple(self._tmpl_sz)) 348 | 349 | if self._hogfeatures: 350 | mapp = {'sizeX': 0, 'sizeY': 0, 'numFeatures': 0, 'map': 0} 351 | mapp = fhog.getFeatureMaps(z, self.cell_size, mapp) 352 | mapp = fhog.normalizeAndTruncate(mapp, 0.2) 353 | mapp = fhog.PCAFeatureMaps(mapp) 354 | # size_patch为列表,保存裁剪下来的特征图的【长,宽,通道】 355 | self.size_patch = list(map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']])) 356 | FeaturesMap = mapp['map'].reshape((self.size_patch[0] * self.size_patch[1], self.size_patch[2])).T # (size_patch[2], size_patch[0]*size_patch[1]) 357 | 358 | else: # 将RGB图变为单通道灰度图 359 | if z.ndim == 3 and z.shape[2] == 3: 360 | FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY) 361 | elif z.ndim == 2: 362 | FeaturesMap = z 363 | 364 | # 从此FeatureMap从-0.5到0.5 365 | FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5 366 | # size_patch为列表,保存裁剪下来的特征图的【长,宽,1】 367 | self.size_patch = [z.shape[0], z.shape[1], 1] 368 | 369 | if inithann: 370 | self.createHanningMats() 371 | 372 | FeaturesMap = self.hann * FeaturesMap # 加汉宁(余弦)窗减少频谱泄露 373 | return FeaturesMap 374 | 375 | # 使用当前图像的检测结果进行训练 376 | # x是当前帧当前尺度下的特征, train_interp_factor是interp_factor 377 | def train(self, x, train_interp_factor): 378 | k = self.gaussianCorrelation(x, x) 379 | # alphaf是频域中的相关滤波模板,有两个通道分别实部虚部 380 | # _prob是初始化时的高斯响应图,相当于y 381 | alphaf = complexDivision(self._prob, fftd(k) + self.lambdar) 382 | 383 | # _tmpl是截取的特征的加权平均 384 | self._tmpl = (1 - train_interp_factor) * self._tmpl + train_interp_factor * x 385 | # _alphaf是频域中相关滤波模板的加权平均 386 | self._alphaf = (1 - train_interp_factor) * self._alphaf + train_interp_factor * alphaf 387 | 388 | # 检测当前帧的目标 389 | # z是前一帧的训练/第一帧的初始化结果,x是当前帧当前尺度下的特征,peak_value是检测结果峰值 390 | def detect(self, z, x): 391 | k = self.gaussianCorrelation(x, z) 392 | # 得到响应图 393 | res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True)) 394 | 395 | # pv:响应最大值 pi:相应最大点的索引数组 396 | _, pv, _, pi = cv2.minMaxLoc(res) 397 | # 得到响应最大的点索引的float表示 398 | p = [float(pi[0]), float(pi[1])] 399 | 400 | # 使用幅值做差来定位峰值的位置 401 | if pi[0] > 0 and pi[0] < res.shape[1] - 1: 402 | p[0] += self.subPixelPeak(res[pi[1], pi[0] - 1], pv, res[pi[1], pi[0] + 1]) 403 | if pi[1] > 0 and pi[1] < res.shape[0] - 1: 404 | p[1] += self.subPixelPeak(res[pi[1] - 1, pi[0]], pv, res[pi[1] + 1, pi[0]]) 405 | 406 | # 得出偏离采样中心的位移 407 | p[0] -= res.shape[1] / 2. 408 | p[1] -= res.shape[0] / 2. 409 | 410 | # 返回偏离采样中心的位移和峰值 411 | return p, pv 412 | 413 | # 基于当前帧更新目标位置 414 | def update(self, image): 415 | # 修正边界 416 | if self._roi[0] + self._roi[2] <= 0: self._roi[0] = -self._roi[2] + 1 417 | if self._roi[1] + self._roi[3] <= 0: self._roi[1] = -self._roi[3] + 1 418 | if self._roi[0] >= image.shape[1] - 1: self._roi[0] = image.shape[1] - 2 419 | if self._roi[1] >= image.shape[0] - 1: self._roi[1] = image.shape[0] - 2 420 | 421 | # 跟踪框、尺度框中心 422 | cx = self._roi[0] + self._roi[2] / 2. 423 | cy = self._roi[1] + self._roi[3] / 2. 424 | 425 | # 尺度不变时检测峰值结果 426 | loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0)) 427 | 428 | # 因为返回的只有中心坐标,使用尺度和中心坐标调整目标框 429 | # loc是中心相对移动量 430 | self._roi[0] = cx - self._roi[2] / 2.0 + loc[0] * self.cell_size * self._scale * self.currentScaleFactor 431 | self._roi[1] = cy - self._roi[3] / 2.0 + loc[1] * self.cell_size * self._scale * self.currentScaleFactor 432 | 433 | # 使用尺度估计 434 | if self._multiscale: 435 | if self._roi[0] >= image.shape[1] - 1: self._roi[0] = image.shape[1] - 1 436 | if self._roi[1] >= image.shape[0] - 1: self._roi[1] = image.shape[0] - 1 437 | if self._roi[0] + self._roi[2] <= 0: self._roi[0] = -self._roi[2] + 2 438 | if self._roi[1] + self._roi[3] <= 0: self._roi[1] = -self._roi[3] + 2 439 | 440 | # 更新尺度 441 | scale_pi = self.detect_scale(image) 442 | self.currentScaleFactor = self.currentScaleFactor * self.scaleFactors[scale_pi[0]] 443 | if self.currentScaleFactor < self.min_scale_factor: 444 | self.currentScaleFactor = self.min_scale_factor 445 | # elif self.currentScaleFactor > self.max_scale_factor: 446 | # self.currentScaleFactor = self.max_scale_factor 447 | 448 | self.train_scale(image) 449 | 450 | if self._roi[0] >= image.shape[1] - 1: self._roi[0] = image.shape[1] - 1 451 | if self._roi[1] >= image.shape[0] - 1: self._roi[1] = image.shape[0] - 1 452 | if self._roi[0] + self._roi[2] <= 0: self._roi[0] = -self._roi[2] + 2 453 | if self._roi[1] + self._roi[3] <= 0: self._roi[1] = -self._roi[3] + 2 454 | assert (self._roi[2] > 0 and self._roi[3] > 0) 455 | 456 | # 使用当前的检测框来训练样本参数 457 | x = self.getFeatures(image, 0, 1.0) 458 | self.train(x, self.interp_factor) 459 | 460 | return self._roi 461 | 462 | 463 | ################# 464 | ### 尺度估计器 ### 465 | ################# 466 | 467 | def computeYsf(self): 468 | scale_sigma2 = (self.n_scales / self.n_scales ** 0.5 * self.scale_sigma_factor) ** 2 469 | _, res = np.ogrid[0:0, 0:self.n_scales] 470 | ceilS = np.ceil(self.n_scales / 2.0) 471 | res = np.exp(- 0.5 * (np.power(res + 1 - ceilS, 2)) / scale_sigma2) 472 | return fftd(res) 473 | 474 | def createHanningMatsForScale(self): 475 | _, hann_s = np.ogrid[0:0, 0:self.n_scales] 476 | hann_s = 0.5 * (1 - np.cos(2 * np.pi * hann_s / (self.n_scales - 1))) 477 | return hann_s 478 | 479 | # 初始化尺度估计器 480 | def dsstInit(self, roi, image): 481 | self.base_width = roi[2] 482 | self.base_height = roi[3] 483 | 484 | # Guassian peak for scales (after fft) 485 | self.ysf = self.computeYsf() 486 | self.s_hann = self.createHanningMatsForScale() 487 | 488 | # Get all scale changing rate 489 | scaleFactors = np.arange(self.n_scales) 490 | ceilS = np.ceil(self.n_scales / 2.0) 491 | self.scaleFactors = np.power(self.scale_step, ceilS - scaleFactors - 1) 492 | 493 | # Get the scaling rate for compressing to the model size 494 | scale_model_factor = 1. 495 | if self.base_width * self.base_height > self.scale_max_area: 496 | scale_model_factor = (self.scale_max_area / (self.base_width * self.base_height)) ** 0.5 497 | 498 | self.scale_model_width = int(self.base_width * scale_model_factor) 499 | self.scale_model_height = int(self.base_height * scale_model_factor) 500 | 501 | # Compute min and max scaling rate 502 | self.min_scale_factor = np.power(self.scale_step, np.ceil(np.log((max(5 / self.base_width, 5 / self.base_height) * (1 + self.scale_padding))) / 0.0086)) 503 | self.max_scale_factor = np.power(self.scale_step, np.floor(np.log((min(image.shape[0] / self.base_width, image.shape[1] / self.base_height) * (1 + self.scale_padding))) / 0.0086)) 504 | 505 | self.train_scale(image, True) 506 | 507 | # 获取尺度样本 508 | def get_scale_sample(self, image): 509 | xsf = None 510 | for i in range(self.n_scales): 511 | # Size of subwindow waiting to be detect 512 | patch_width = self.base_width * self.scaleFactors[i] * self.currentScaleFactor 513 | patch_height = self.base_height * self.scaleFactors[i] * self.currentScaleFactor 514 | 515 | cx = self._roi[0] + self._roi[2] / 2. 516 | cy = self._roi[1] + self._roi[3] / 2. 517 | 518 | # Get the subwindow 519 | im_patch = extractImage(image, cx, cy, patch_width, patch_height) 520 | if self.scale_model_width > im_patch.shape[1]: 521 | im_patch_resized = cv2.resize(im_patch, (self.scale_model_width, self.scale_model_height), None, 0, 0, 1) 522 | else: 523 | im_patch_resized = cv2.resize(im_patch, (self.scale_model_width, self.scale_model_height), None, 0, 0, 3) 524 | 525 | mapp = {'sizeX': 0, 'sizeY': 0, 'numFeatures': 0, 'map': 0} 526 | mapp = fhog.getFeatureMaps(im_patch_resized, self.cell_size, mapp) 527 | mapp = fhog.normalizeAndTruncate(mapp, 0.2) 528 | mapp = fhog.PCAFeatureMaps(mapp) 529 | 530 | if i == 0: 531 | totalSize = mapp['numFeatures'] * mapp['sizeX'] * mapp['sizeY'] 532 | xsf = np.zeros((totalSize, self.n_scales)) 533 | 534 | # Multiply the FHOG results by hanning window and copy to the output 535 | FeaturesMap = mapp['map'].reshape((totalSize, 1)) 536 | FeaturesMap = self.s_hann[0][i] * FeaturesMap 537 | xsf[:, i] = FeaturesMap[:, 0] 538 | 539 | return fftd(xsf, False, True) 540 | 541 | # 训练尺度估计器 542 | def train_scale(self, image, ini=False): 543 | xsf = self.get_scale_sample(image) 544 | 545 | # Adjust ysf to the same size as xsf in the first time 546 | if ini: 547 | totalSize = xsf.shape[0] 548 | self.ysf = cv2.repeat(self.ysf, totalSize, 1) 549 | 550 | # Get new GF in the paper (delta A) 551 | new_sf_num = cv2.mulSpectrums(self.ysf, xsf, 0, conjB=True) 552 | 553 | new_sf_den = cv2.mulSpectrums(xsf, xsf, 0, conjB=True) 554 | new_sf_den = cv2.reduce(real(new_sf_den), 0, cv2.REDUCE_SUM) 555 | 556 | if ini: 557 | self.sf_den = new_sf_den 558 | self.sf_num = new_sf_num 559 | else: 560 | # Get new A and new B 561 | self.sf_den = cv2.addWeighted(self.sf_den, (1 - self.scale_lr), new_sf_den, self.scale_lr, 0) 562 | self.sf_num = cv2.addWeighted(self.sf_num, (1 - self.scale_lr), new_sf_num, self.scale_lr, 0) 563 | 564 | self.update_roi() 565 | 566 | # 检测当前图像尺度 567 | def detect_scale(self, image): 568 | xsf = self.get_scale_sample(image) 569 | 570 | # Compute AZ in the paper 571 | add_temp = cv2.reduce(complexMultiplication(self.sf_num, xsf), 0, cv2.REDUCE_SUM) 572 | 573 | # compute the final y 574 | scale_response = cv2.idft(complexDivisionReal(add_temp, (self.sf_den + self.scale_lambda)), None, cv2.DFT_REAL_OUTPUT) 575 | 576 | # Get the max point as the final scaling rate 577 | # pv:响应最大值 pi:相应最大点的索引数组 578 | _, pv, _, pi = cv2.minMaxLoc(scale_response) 579 | 580 | return pi 581 | 582 | # 更新尺度 583 | def update_roi(self): 584 | # 跟踪框、尺度框中心 585 | cx = self._roi[0] + self._roi[2] / 2. 586 | cy = self._roi[1] + self._roi[3] / 2. 587 | 588 | # Recompute the ROI left-upper point and size 589 | self._roi[2] = self.base_width * self.currentScaleFactor 590 | self._roi[3] = self.base_height * self.currentScaleFactor 591 | 592 | # 因为返回的只有中心坐标,使用尺度和中心坐标调整目标框 593 | self._roi[0] = cx - self._roi[2] / 2.0 594 | self._roi[1] = cy - self._roi[3] / 2.0 595 | 596 | --------------------------------------------------------------------------------