├── .gitignore
├── LICENSE
├── run.py
├── README.md
├── fhog.py
└── tracker.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /__pycache__
2 | /.vscode
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ryan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from tracker import KCFTracker
 3 | 
 4 | def tracker(cam, frame, bbox):
 5 |     tracker = KCFTracker(True, True, True) # (hog, fixed_Window, multi_scale)
 6 |     tracker.init(bbox, frame)
 7 |     
 8 |     while True:
 9 |         ok, frame = cam.read()
10 | 
11 |         timer = cv2.getTickCount()
12 |         bbox = tracker.update(frame)
13 |         bbox = list(map(int, bbox))
14 |         fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
15 | 
16 |         # Tracking success
17 |         p1 = (int(bbox[0]), int(bbox[1]))
18 |         p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
19 |         cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1)
20 | 
21 |         # Put FPS
22 |         cv2.putText(frame, "FPS : " + str(int(fps)), (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2)
23 | 
24 |         cv2.imshow("Tracking", frame)
25 | 
26 |         # Exit if ESC pressed
27 |         k = cv2.waitKey(1) & 0xff
28 |         if k == 27:
29 |             break
30 | 
31 |     cam.release()
32 |     cv2.destroyAllWindows()
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     video = cv2.VideoCapture(0)
37 |     # ok, frame = video.read()
38 |     ok, frame = video.read()
39 |     bbox = cv2.selectROI('Select ROI', frame, False)
40 | 
41 |     if min(bbox) == 0: exit(0)
42 |     tracker(video, frame, bbox)
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # KCF-DSST-py
 2 | Python implementation of DSST tracking algorithm based on KCF tracker.
 3 | 
 4 | In [Baseline 3], the DSST scale estimation algorithm is added to the original KCF Tracker. Based on the python implementation of KCF Tracker, see [Baseline 2], the code of DSST is translated from C++ and added to the KCF in python.
 5 | 
 6 | ## Requirements
 7 | - Python 2.7 (or 3)
 8 | - NumPy
 9 | - Numba (needed if you want to use the hog feature)
10 | - OpenCV (ensure that you can import cv2 in python)
11 | 
12 | ## Baseline
13 | Some implementations of KCF and DSST algorithms.
14 | 
15 | ### 1. KCF Tracker in C++
16 | [C++ KCF Tracker](https://github.com/joaofaro/KCFcpp): Original C++ implementation of Kernelized Correlation Filter (KCF) [1, 2].
17 | 
18 | ### 2. KCF Tracker in Python
19 | [KCF tracker in Python](https://github.com/uoip/KCFpy): Python implementation of KCF Tracker.
20 | 
21 | ### 3. DSST Tracker in C++
22 | [KCF-DSST](https://github.com/liliumao/KCF-DSST): C++ implementation of Discriminative Scale Space Tracker (DSST) [3].
23 | 
24 | ## Reference
25 | [1] J. F. Henriques, R. Caseiro, P. Martins, J. Batista,
26 | "High-Speed Tracking with Kernelized Correlation Filters", TPAMI 2015.
27 | 
28 | [2] J. F. Henriques, R. Caseiro, P. Martins, J. Batista,
29 | "Exploiting the Circulant Structure of Tracking-by-detection with Kernels", ECCV 2012.
30 | 
31 | [3] M. Danelljan, G. Häger, F. Shahbaz Khan, and M. Felsberg. "Accurate scale estimation for robust visual tracking". In Proceedings of the British Machine Vision Conference (BMVC), 2014.
32 | 


--------------------------------------------------------------------------------
/fhog.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import cv2
  3 | from numba import jit
  4 | 
  5 | import sys
  6 | PY3 = sys.version_info >= (3,)
  7 |  
  8 | if PY3:
  9 |     xrange = range
 10 | 
 11 | # constant
 12 | NUM_SECTOR = 9
 13 | FLT_EPSILON = 1e-07
 14 | 
 15 | 
 16 | @jit(cache=True)
 17 | def func1(dx, dy, boundary_x, boundary_y, height, width, numChannels):
 18 |     r = np.zeros((height, width), np.float32)
 19 |     alfa = np.zeros((height, width, 2), np.int32)
 20 | 
 21 |     for j in xrange(1, height-1):
 22 |         for i in xrange(1, width-1):
 23 |             c = 0
 24 |             x = dx[j, i, c]
 25 |             y = dy[j, i, c]
 26 |             r[j, i] = np.sqrt(x*x + y*y)
 27 | 
 28 |             for ch in xrange(1, numChannels):
 29 |                 tx = dx[j, i, ch]
 30 |                 ty = dy[j, i, ch]
 31 |                 magnitude = np.sqrt(tx*tx + ty*ty)
 32 |                 if(magnitude > r[j, i]):
 33 |                     r[j, i] = magnitude
 34 |                     c = ch
 35 |                     x = tx
 36 |                     y = ty
 37 | 
 38 |             mmax = boundary_x[0]*x + boundary_y[0]*y
 39 |             maxi = 0
 40 | 
 41 |             for kk in xrange(0, NUM_SECTOR):
 42 |                 dotProd = boundary_x[kk]*x + boundary_y[kk]*y
 43 |                 if(dotProd > mmax):
 44 |                     mmax = dotProd
 45 |                     maxi = kk
 46 |                 elif(-dotProd > mmax):
 47 |                     mmax = -dotProd
 48 |                     maxi = kk + NUM_SECTOR
 49 | 
 50 |             alfa[j, i, 0] = maxi % NUM_SECTOR
 51 |             alfa[j, i, 1] = maxi
 52 |     return r, alfa
 53 | 
 54 | @jit(cache=True)
 55 | def func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize):
 56 |     mapp = np.zeros((sizeX*sizeY*p), np.float32)
 57 |     for i in xrange(sizeY):
 58 |         for j in xrange(sizeX):
 59 |             for ii in xrange(k):
 60 |                 for jj in xrange(k):
 61 |                     if((i * k + ii > 0) and (i * k + ii < height - 1) and (j * k + jj > 0) and (j * k + jj < width  - 1)):
 62 |                         mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] +=  r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
 63 |                         mapp[i*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] +=  r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,0]
 64 |                         if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1)):
 65 |                             mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
 66 |                             mapp[(i+nearest[ii])*stringSize + j*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,0]
 67 |                         if((j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
 68 |                             mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
 69 |                             mapp[i*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,0] * w[jj,1]
 70 |                         if((i + nearest[ii] >= 0) and (i + nearest[ii] <= sizeY - 1) and (j + nearest[jj] >= 0) and (j + nearest[jj] <= sizeX - 1)):
 71 |                             mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,0]] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
 72 |                             mapp[(i+nearest[ii])*stringSize + (j+nearest[jj])*p + alfa[k*i+ii,j*k+jj,1] + NUM_SECTOR] += r[k*i+ii,j*k+jj] * w[ii,1] * w[jj,1]
 73 |     return mapp
 74 | 
 75 | @jit(cache=True)
 76 | def func3(partOfNorm, mappmap, sizeX, sizeY, p, xp, pp):
 77 | 	newData = np.zeros((sizeY*sizeX*pp), np.float32)
 78 | 	for i in xrange(1, sizeY+1):
 79 | 		for j in xrange(1, sizeX+1):
 80 | 			pos1 = i * (sizeX+2) * xp + j * xp
 81 | 			pos2 = (i-1) * sizeX * pp + (j-1) * pp
 82 | 
 83 | 			valOfNorm = np.sqrt(partOfNorm[(i    )*(sizeX + 2) + (j    )] +
 84 |                 				partOfNorm[(i    )*(sizeX + 2) + (j + 1)] +
 85 |                 				partOfNorm[(i + 1)*(sizeX + 2) + (j    )] +
 86 |                 				partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
 87 | 			newData[pos2:pos2+p] = mappmap[pos1:pos1+p] / valOfNorm
 88 | 			newData[pos2+4*p:pos2+6*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
 89 | 
 90 | 			valOfNorm = np.sqrt(partOfNorm[(i    )*(sizeX + 2) + (j    )] +
 91 | 				                partOfNorm[(i    )*(sizeX + 2) + (j + 1)] +
 92 | 				                partOfNorm[(i - 1)*(sizeX + 2) + (j    )] +
 93 | 				                partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON
 94 | 			newData[pos2+p:pos2+2*p] = mappmap[pos1:pos1+p] / valOfNorm
 95 | 			newData[pos2+6*p:pos2+8*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
 96 | 
 97 | 			valOfNorm = np.sqrt(partOfNorm[(i    )*(sizeX + 2) + (j    )] +
 98 | 				                partOfNorm[(i    )*(sizeX + 2) + (j - 1)] +
 99 | 				                partOfNorm[(i + 1)*(sizeX + 2) + (j    )] +
100 | 				                partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
101 | 			newData[pos2+2*p:pos2+3*p] = mappmap[pos1:pos1+p] / valOfNorm
102 | 			newData[pos2+8*p:pos2+10*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
103 | 
104 | 			valOfNorm = np.sqrt(partOfNorm[(i    )*(sizeX + 2) + (j    )] +
105 | 				                partOfNorm[(i    )*(sizeX + 2) + (j - 1)] +
106 | 				                partOfNorm[(i - 1)*(sizeX + 2) + (j    )] +
107 | 				                partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON
108 | 			newData[pos2+3*p:pos2+4*p] = mappmap[pos1:pos1+p] / valOfNorm
109 | 			newData[pos2+10*p:pos2+12*p] = mappmap[pos1+p:pos1+3*p] / valOfNorm
110 | 	return newData
111 | 
112 | @jit(cache=True)
113 | def func4(mappmap, p, sizeX, sizeY, pp, yp, xp, nx, ny):
114 | 	newData = np.zeros((sizeX*sizeY*pp), np.float32)
115 | 	for i in xrange(sizeY):
116 | 		for j in xrange(sizeX):
117 | 			pos1 = (i*sizeX + j) * p
118 | 			pos2 = (i*sizeX + j) * pp
119 | 
120 | 			for jj in xrange(2 * xp):  # 2*9
121 | 				newData[pos2 + jj] = np.sum(mappmap[pos1 + yp*xp + jj : pos1 + 3*yp*xp + jj : 2*xp]) * ny
122 | 			for jj in xrange(xp):  # 9
123 | 				newData[pos2 + 2*xp + jj] = np.sum(mappmap[pos1 + jj : pos1 + jj + yp*xp : xp]) * ny
124 | 			for ii in xrange(yp):  # 4
125 | 				newData[pos2 + 3*xp + ii] = np.sum(mappmap[pos1 + yp*xp + ii*xp*2 : pos1 + yp*xp + ii*xp*2 + 2*xp]) * nx
126 | 	return newData
127 | 
128 | 
129 | 
130 | def getFeatureMaps(image, k, mapp):
131 | 	kernel = np.array([[-1.,  0., 1.]], np.float32)
132 | 
133 | 	height = image.shape[0]
134 | 	width = image.shape[1]
135 | 	assert(image.ndim==3 and image.shape[2])
136 | 	numChannels = 3 #(1 if image.ndim==2 else image.shape[2])
137 | 
138 | 	sizeX = width // k
139 | 	sizeY = height // k
140 | 	px = 3 * NUM_SECTOR
141 | 	p = px
142 | 	stringSize = sizeX * p
143 | 
144 | 	mapp['sizeX'] = sizeX
145 | 	mapp['sizeY'] = sizeY
146 | 	mapp['numFeatures'] = p
147 | 	mapp['map'] = np.zeros((mapp['sizeX']*mapp['sizeY']*mapp['numFeatures']), np.float32)
148 | 
149 | 	dx = cv2.filter2D(np.float32(image), -1, kernel)   # np.float32(...) is necessary
150 | 	dy = cv2.filter2D(np.float32(image), -1, kernel.T)
151 | 
152 | 	arg_vector = np.arange(NUM_SECTOR+1).astype(np.float32) * np.pi / NUM_SECTOR
153 | 	boundary_x = np.cos(arg_vector) 
154 | 	boundary_y = np.sin(arg_vector)
155 | 
156 | 	'''
157 | 	### original implementation
158 | 	r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #func1 without @jit  ### 
159 | 	### 40x speedup
160 | 	magnitude = np.sqrt(dx**2 + dy**2)
161 | 	r = np.max(magnitude, axis=2)
162 | 	c = np.argmax(magnitude, axis=2)
163 | 	idx = (np.arange(c.shape[0])[:,np.newaxis], np.arange(c.shape[1]), c)
164 | 	x, y = dx[idx], dy[idx]
165 | 	dotProd = x[:,:,np.newaxis] * boundary_x[np.newaxis,np.newaxis,:] + y[:,:,np.newaxis] * boundary_y[np.newaxis,np.newaxis,:]
166 | 	dotProd = np.concatenate((dotProd, -dotProd), axis=2)
167 | 	maxi = np.argmax(dotProd, axis=2)
168 | 	alfa = np.dstack((maxi % NUM_SECTOR, maxi)) ###
169 | 	'''
170 | 	### 200x speedup
171 | 	r, alfa = func1(dx, dy, boundary_x, boundary_y, height, width, numChannels) #with @jit
172 | 	### ~0.001s
173 | 
174 | 	nearest = np.ones((k), np.int)
175 | 	nearest[0:k//2] = -1
176 | 
177 | 	w = np.zeros((k, 2), np.float32)
178 | 	a_x = np.concatenate((k/2 - np.arange(k/2) - 0.5, np.arange(k/2,k) - k/2 + 0.5)).astype(np.float32)
179 | 	b_x = np.concatenate((k/2 + np.arange(k/2) + 0.5, -np.arange(k/2,k) + k/2 - 0.5 + k)).astype(np.float32)
180 | 	w[:, 0] = 1.0 / a_x * ((a_x*b_x) / (a_x+b_x))
181 | 	w[:, 1] = 1.0 / b_x * ((a_x*b_x) / (a_x+b_x))
182 | 
183 | 	'''
184 | 	### original implementation
185 | 	mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #func2 without @jit  ###
186 | 	'''
187 | 	### 500x speedup
188 | 	mapp['map'] = func2(dx, dy, boundary_x, boundary_y, r, alfa, nearest, w, k, height, width, sizeX, sizeY, p, stringSize) #with @jit
189 | 	### ~0.001s
190 | 
191 | 	return mapp
192 | 
193 | 
194 | def normalizeAndTruncate(mapp, alfa):
195 | 	sizeX = mapp['sizeX']
196 | 	sizeY = mapp['sizeY']
197 | 
198 | 	p = NUM_SECTOR
199 | 	xp = NUM_SECTOR * 3
200 | 	pp = NUM_SECTOR * 12
201 | 
202 | 	'''
203 | 	### original implementation
204 | 	partOfNorm = np.zeros((sizeY*sizeX), np.float32)
205 | 	for i in xrange(sizeX*sizeY):
206 | 		pos = i * mapp['numFeatures']
207 | 		partOfNorm[i] = np.sum(mapp['map'][pos:pos+p]**2) ###
208 | 	'''
209 | 	### 50x speedup
210 | 	idx = np.arange(0, sizeX*sizeY*mapp['numFeatures'], mapp['numFeatures']).reshape((sizeX*sizeY, 1)) + np.arange(p)
211 | 	partOfNorm = np.sum(mapp['map'][idx] ** 2, axis=1) ### ~0.0002s
212 | 
213 | 	sizeX, sizeY = sizeX-2, sizeY-2
214 | 	
215 | 
216 | 	'''
217 | 	### original implementation
218 | 	newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #func3 without @jit  ###
219 | 	
220 | 	### 30x speedup
221 | 	newData = np.zeros((sizeY*sizeX*pp), np.float32)
222 | 	idx = (np.arange(1,sizeY+1)[:,np.newaxis] * (sizeX+2) + np.arange(1,sizeX+1)).reshape((sizeY*sizeX, 1))   # much faster than it's List Comprehension counterpart (see next line)
223 | 	#idx = np.array([[i*(sizeX+2) + j] for i in xrange(1,sizeY+1) for j in xrange(1,sizeX+1)])
224 | 	pos1 = idx * xp
225 | 	pos2 = np.arange(sizeY*sizeX)[:,np.newaxis] * pp
226 | 	
227 | 	valOfNorm1 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2+1]) + FLT_EPSILON
228 | 	valOfNorm2 = np.sqrt(partOfNorm[idx] + partOfNorm[idx+1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2+1]) + FLT_EPSILON
229 | 	valOfNorm3 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx+sizeX+2] + partOfNorm[idx+sizeX+2-1]) + FLT_EPSILON
230 | 	valOfNorm4 = np.sqrt(partOfNorm[idx] + partOfNorm[idx-1] + partOfNorm[idx-sizeX-2] + partOfNorm[idx+sizeX-2-1]) + FLT_EPSILON
231 | 	map1 = mapp['map'][pos1 + np.arange(p)]
232 | 	map2 = mapp['map'][pos1 + np.arange(p,3*p)]
233 | 	newData[pos2 + np.arange(p)] = map1 / valOfNorm1
234 | 	newData[pos2 + np.arange(4*p,6*p)] = map2 / valOfNorm1
235 | 	newData[pos2 + np.arange(p,2*p)] = map1 / valOfNorm2
236 | 	newData[pos2 + np.arange(6*p,8*p)] = map2 / valOfNorm2
237 | 	newData[pos2 + np.arange(2*p,3*p)] = map1 / valOfNorm3
238 | 	newData[pos2 + np.arange(8*p,10*p)] = map2 / valOfNorm3
239 | 	newData[pos2 + np.arange(3*p,4*p)] = map1 / valOfNorm4
240 | 	newData[pos2 + np.arange(10*p,12*p)] = map2 / valOfNorm4 ###
241 | 	'''
242 | 	### 30x speedup
243 | 	newData = func3(partOfNorm, mapp['map'], sizeX, sizeY, p, xp, pp) #with @jit
244 | 	###
245 | 
246 | 	# truncation
247 | 	newData[newData > alfa] = alfa
248 | 
249 | 	mapp['numFeatures'] = pp
250 | 	mapp['sizeX'] = sizeX
251 | 	mapp['sizeY'] = sizeY
252 | 	mapp['map'] = newData
253 | 
254 | 	return mapp
255 | 
256 | 
257 | def PCAFeatureMaps(mapp):
258 | 	sizeX = mapp['sizeX']
259 | 	sizeY = mapp['sizeY']
260 | 
261 | 	p = mapp['numFeatures']
262 | 	pp = NUM_SECTOR * 3 + 4
263 | 	yp = 4
264 | 	xp = NUM_SECTOR
265 | 
266 | 	nx = 1.0 / np.sqrt(xp*2)
267 | 	ny = 1.0 / np.sqrt(yp)
268 | 
269 | 	'''
270 | 	### original implementation
271 | 	newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #func without @jit  ###
272 | 	### 7.5x speedup
273 | 	newData = np.zeros((sizeX*sizeY*pp), np.float32)
274 | 	idx1 = np.arange(2*xp).reshape((2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
275 | 	idx2 = np.arange(xp).reshape((xp, 1)) + np.arange(0, xp*yp, xp)
276 | 	idx3 = np.arange(0, 2*xp*yp, 2*xp).reshape((yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
277 | 	for i in xrange(sizeY):
278 | 		for j in xrange(sizeX):
279 | 			pos1 = (i*sizeX + j) * p
280 | 			pos2 = (i*sizeX + j) * pp
281 | 						
282 | 			newData[pos2 : pos2+2*xp] = np.sum(mapp['map'][pos1 + idx1], axis=1) * ny
283 | 			newData[pos2+2*xp : pos2+3*xp] = np.sum(mapp['map'][pos1 + idx2], axis=1) * ny
284 | 			newData[pos2+3*xp : pos2+3*xp+yp] = np.sum(mapp['map'][pos1 + idx3], axis=1) * nx ###
285 | 	### 120x speedup 
286 | 	newData = np.zeros((sizeX*sizeY*pp), np.float32)
287 | 	idx01 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp))
288 | 	idx02 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(2*xp,3*xp)).reshape((sizeX*sizeY*xp))
289 | 	idx03 = (np.arange(0,sizeX*sizeY*pp,pp)[:,np.newaxis] + np.arange(3*xp,3*xp+yp)).reshape((sizeX*sizeY*yp))
290 | 	idx11 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(2*xp)).reshape((sizeX*sizeY*2*xp, 1)) + np.arange(xp*yp, 3*xp*yp, 2*xp)
291 | 	idx12 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(xp)).reshape((sizeX*sizeY*xp, 1)) + np.arange(0, xp*yp, xp)
292 | 	idx13 = (np.arange(0,sizeX*sizeY*p,p)[:,np.newaxis] + np.arange(0, 2*xp*yp, 2*xp)).reshape((sizeX*sizeY*yp, 1)) + np.arange(xp*yp, xp*yp+2*xp)
293 | 	newData[idx01] = np.sum(mapp['map'][idx11], axis=1) * ny
294 | 	newData[idx02] = np.sum(mapp['map'][idx12], axis=1) * ny
295 | 	newData[idx03] = np.sum(mapp['map'][idx13], axis=1) * nx ###
296 | 	'''
297 | 	### 190x speedup
298 | 	newData = func4(mapp['map'], p, sizeX, sizeY, pp, yp, xp, nx, ny) #with @jit
299 | 	###
300 | 
301 | 	mapp['numFeatures'] = pp
302 | 	mapp['map'] = newData
303 | 
304 | 	return mapp
305 | 


--------------------------------------------------------------------------------
/tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import fhog
  4 | 
  5 | import sys
  6 | PY3 = sys.version_info >= (3,)
  7 |  
  8 | if PY3:
  9 |     xrange = range
 10 | 
 11 | 
 12 | # ffttools
 13 | # 离散傅里叶变换、逆变换
 14 | def fftd(img, backwards=False, byRow=False):
 15 |     # shape of img can be (m,n), (m,n,1) or (m,n,2)
 16 |     # in my test, fft provided by numpy and scipy are slower than cv2.dft
 17 |     # return cv2.dft(np.float32(img), flags=((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT))  # 'flags =' is necessary!
 18 |     # DFT_INVERSE: 用一维或二维逆变换取代默认的正向变换,
 19 |     # DFT_SCALE: 缩放比例标识符，根据数据元素个数平均求出其缩放结果，如有N个元素，则输出结果以1/N缩放输出，常与DFT_INVERSE搭配使用。 
 20 |     # DFT_COMPLEX_OUTPUT: 对一维或二维的实数数组进行正向变换，这样的结果虽然是复数阵列，但拥有复数的共轭对称性
 21 | 
 22 |     if byRow:
 23 |         return cv2.dft(np.float32(img), flags=(cv2.DFT_ROWS | cv2.DFT_COMPLEX_OUTPUT))
 24 |     else:
 25 |         return cv2.dft(np.float32(img), flags=((cv2.DFT_INVERSE | cv2.DFT_SCALE) if backwards else cv2.DFT_COMPLEX_OUTPUT))
 26 | 
 27 | # 实部图像
 28 | def real(img):
 29 |     return img[:, :, 0]
 30 | 
 31 | # 虚部图像
 32 | def imag(img):
 33 |     return img[:, :, 1]
 34 | 
 35 | # 两个复数，它们的积 (a+bi)(c+di)=(ac-bd)+(ad+bc)i
 36 | def complexMultiplication(a, b):
 37 |     res = np.zeros(a.shape, a.dtype)
 38 |  
 39 |     res[:, :, 0] = a[:, :, 0] * b[:, :, 0] - a[:, :, 1] * b[:, :, 1]
 40 |     res[:, :, 1] = a[:, :, 0] * b[:, :, 1] + a[:, :, 1] * b[:, :, 0]
 41 |     return res
 42 | 
 43 | # 两个复数，它们相除 (a+bi)/(c+di)=(ac+bd)/(c*c+d*d) +((bc-ad)/(c*c+d*d))i
 44 | def complexDivision(a, b):
 45 |     res = np.zeros(a.shape, a.dtype)
 46 |     divisor = 1. / (b[:, :, 0] ** 2 + b[:, :, 1] ** 2)
 47 |  
 48 |     res[:, :, 0] = (a[:, :, 0] * b[:, :, 0] + a[:, :, 1] * b[:, :, 1]) * divisor
 49 |     res[:, :, 1] = (a[:, :, 1] * b[:, :, 0] + a[:, :, 0] * b[:, :, 1]) * divisor
 50 |     return res
 51 | 
 52 | def complexDivisionReal(a, b):
 53 |     res = np.zeros(a.shape, a.dtype)
 54 |     divisor = 1. / b
 55 |  
 56 |     res[:, :, 0] = a[:, :, 0] * divisor
 57 |     res[:, :, 1] = a[:, :, 1] * divisor
 58 |     return res
 59 | 
 60 | # 可以将 FFT 输出中的直流分量移动到频谱的中央
 61 | def rearrange(img):
 62 |     # return np.fft.fftshift(img, axes=(0,1))
 63 |  
 64 |     assert (img.ndim == 2) # 断言，必须为真，否则抛出异常；ndim 为数组维数
 65 |     img_ = np.zeros(img.shape, img.dtype)
 66 |     xh, yh = img.shape[1] // 2, img.shape[0] // 2 # shape[0] 为行，shape[1] 为列
 67 |     img_[0:yh, 0:xh], img_[yh:img.shape[0], xh:img.shape[1]] = img[yh:img.shape[0], xh:img.shape[1]], img[0:yh, 0:xh]
 68 |     img_[0:yh, xh:img.shape[1]], img_[yh:img.shape[0], 0:xh] = img[yh:img.shape[0], 0:xh], img[0:yh, xh:img.shape[1]]
 69 |     return img_
 70 | 
 71 | 
 72 | 
 73 | # recttools
 74 | # rect = {x, y, w, h}
 75 | # x 右边界
 76 | def x2(rect):
 77 |     return rect[0] + rect[2]
 78 | 
 79 | # y 下边界
 80 | def y2(rect):
 81 |     return rect[1] + rect[3]
 82 | 
 83 | # 限宽、高
 84 | def limit(rect, limit):
 85 |     if rect[0] + rect[2] > limit[0] + limit[2]:
 86 |         rect[2] = limit[0] + limit[2] - rect[0]
 87 |     if rect[1] + rect[3] > limit[1] + limit[3]:
 88 |         rect[3] = limit[1] + limit[3] - rect[1]
 89 |     if rect[0] < limit[0]:
 90 |         rect[2] -= (limit[0] - rect[0])
 91 |         rect[0] = limit[0]
 92 |     if rect[1] < limit[1]:
 93 |         rect[3] -= (limit[1] - rect[1])
 94 |         rect[1] = limit[1]
 95 |     if rect[2] < 0:
 96 |         rect[2] = 0
 97 |     if rect[3] < 0:
 98 |         rect[3] = 0
 99 |     return rect
100 | 
101 | # 取超出来的边界
102 | def getBorder(original, limited):
103 |     res = [0, 0, 0, 0]
104 |     res[0] = limited[0] - original[0]
105 |     res[1] = limited[1] - original[1]
106 |     res[2] = x2(original) - x2(limited)
107 |     res[3] = y2(original) - y2(limited)
108 |     assert (np.all(np.array(res) >= 0))
109 |     return res
110 | 
111 | # 经常需要空域或频域的滤波处理，在进入真正的处理程序前，需要考虑图像边界情况。
112 | # 通常的处理方法是为图像增加一定的边缘，以适应 卷积核 在原图像边界的操作。
113 | def subwindow(img, window, borderType=cv2.BORDER_CONSTANT):
114 |     cutWindow = [x for x in window]
115 |     limit(cutWindow, [0, 0, img.shape[1], img.shape[0]])  # modify cutWindow
116 |     assert (cutWindow[2] > 0 and cutWindow[3] > 0)
117 |     border = getBorder(window, cutWindow)
118 |     res = img[cutWindow[1]:cutWindow[1] + cutWindow[3], cutWindow[0]:cutWindow[0] + cutWindow[2]]
119 |  
120 |     if (border != [0, 0, 0, 0]):
121 |         res = cv2.copyMakeBorder(res, border[1], border[3], border[0], border[2], borderType)
122 |     return res
123 | 
124 | def cutOutsize(num, limit):
125 |     if num < 0: num = 0
126 |     elif num > limit - 1: num = limit - 1
127 |     return int(num)
128 | 
129 | def extractImage(img, cx, cy, patch_width, patch_height):
130 |     xs_s = np.floor(cx) - np.floor(patch_width / 2)
131 |     xs_s = cutOutsize(xs_s, img.shape[1])
132 | 
133 |     xs_e = np.floor(cx + patch_width - 1) - np.floor(patch_width / 2)
134 |     xs_e = cutOutsize(xs_e, img.shape[1])
135 | 
136 |     ys_s = np.floor(cy) - np.floor(patch_height / 2)
137 |     ys_s = cutOutsize(ys_s, img.shape[0])
138 | 
139 |     ys_e = np.floor(cy + patch_height - 1) - np.floor(patch_height / 2)
140 |     ys_e = cutOutsize(ys_e, img.shape[0])
141 | 
142 |     return img[ys_s:ys_e, xs_s:xs_e]
143 |  
144 | 
145 | 
146 | # KCF tracker
147 | class KCFTracker:
148 |     def __init__(self, hog=False, fixed_window=True, multi_scale=False):
149 |         self.lambdar = 0.0001 # regularization; 正则化
150 |         self.padding = 2.5 # extra area surrounding the target; 目标扩展出来的区域
151 |         self.output_sigma_factor = 0.125 # bandwidth of gaussian target; 高斯目标的带宽
152 | 
153 |         self._multiscale = multi_scale
154 |         if multi_scale:
155 |             self.template_size = 96 # 模板大小，在计算_tmpl_sz时，较大边长被归一成96，而较小边长按比例缩小
156 | 
157 |             self.scale_padding = 1.0
158 |             self.scale_step = 1.05 # default: 1.02，多尺度估计的时候的尺度步长
159 |             self.scale_sigma_factor = 0.25
160 |             self.n_scales = 33 # default: 33，尺度估计器样本数
161 |             self.scale_lr = 0.025
162 |             self.scale_max_area = 512
163 |             self.scale_lambda = 0.01
164 | 
165 |             if hog == False:
166 |                 print('HOG feature is forced to turn on.')
167 | 
168 |         elif fixed_window:
169 |             self.template_size = 96
170 |             self.scale_step = 1
171 |         else:
172 |             self.template_size = 1
173 |             self.scale_step = 1
174 | 
175 |         self._hogfeatures = True if hog or multi_scale else False
176 |         if self._hogfeatures: # HOG feature
177 |             # VOT
178 |             self.interp_factor = 0.012 # linear interpolation factor for adaptation; 自适应的线性插值因子
179 |             self.sigma = 0.6 # gaussian kernel bandwidth; 高斯卷积核带宽
180 |             # TPAMI   #interp_factor = 0.02   #sigma = 0.5
181 |             self.cell_size = 4 # HOG cell size; HOG元胞数组尺寸
182 | 
183 |             print('Numba Compiler initializing, wait for a while.')
184 |             
185 |         else: # raw gray-scale image # aka CSK tracker
186 |             self.interp_factor = 0.075
187 |             self.sigma = 0.2
188 |             self.cell_size = 1
189 |             self._hogfeatures = False
190 |  
191 |         self._tmpl_sz = [0, 0]
192 |         self._roi = [0., 0., 0., 0.]
193 |         self.size_patch = [0, 0, 0]
194 |         self._scale = 1.
195 |         self._alphaf = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
196 |         self._prob = None # numpy.ndarray (size_patch[0], size_patch[1], 2)
197 |         self._tmpl = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
198 |         self.hann = None # numpy.ndarray raw: (size_patch[0], size_patch[1]) hog: (size_patch[2], size_patch[0]*size_patch[1])
199 | 
200 |         # Scale properties
201 |         self.currentScaleFactor = 1
202 |         self.base_width = 0 # initial ROI widt
203 |         self.base_height = 0 # initial ROI height
204 |         self.scaleFactors = None # all scale changing rate, from larger to smaller with 1 to be the middle
205 |         self.scale_model_width = 0 # the model width for scaling
206 |         self.scale_model_height = 0 # the model height for scaling
207 |         self.min_scale_factor = 0. # min scaling rate
208 |         self.max_scale_factor = 0. # max scaling rate
209 |         
210 |         # self._num = None
211 |         # self._den = None
212 | 
213 |         self.sf_den = None
214 |         self.sf_num = None
215 | 
216 |         self.s_hann = None
217 |         self.ysf = None
218 | 
219 | 
220 |     #################
221 |     ### 位置估计器 ###
222 |     #################
223 | 
224 |     # 计算一维亚像素峰值
225 |     def subPixelPeak(self, left, center, right):
226 |         divisor = 2 * center - right - left  # float
227 |         return (0 if abs(divisor) < 1e-3 else 0.5 * (right - left) / divisor)
228 | 
229 |     # 初始化hanning窗口，函数只在第一帧被执行
230 |     # 目的是采样时为不同的样本分配不同的权重，0.5*0.5 是用汉宁窗归一化[0,1]，得到矩阵的值就是每样样本的权重
231 |     def createHanningMats(self):
232 |         hann2t, hann1t = np.ogrid[0:self.size_patch[0], 0:self.size_patch[1]]
233 |  
234 |         hann1t = 0.5 * (1 - np.cos(2 * np.pi * hann1t / (self.size_patch[1] - 1)))
235 |         hann2t = 0.5 * (1 - np.cos(2 * np.pi * hann2t / (self.size_patch[0] - 1)))
236 |         hann2d = hann2t * hann1t
237 |  
238 |         if self._hogfeatures:
239 |             hann1d = hann2d.reshape(self.size_patch[0] * self.size_patch[1])
240 |             self.hann = np.zeros((self.size_patch[2], 1), np.float32) + hann1d
241 |             #相当于把1D汉宁窗复制成多个通道
242 |         else:
243 |             self.hann = hann2d
244 |         
245 |         self.hann = self.hann.astype(np.float32)
246 | 
247 |     # 创建高斯峰函数，函数只在第一帧的时候执行（高斯响应）
248 |     def createGaussianPeak(self, sizey, sizex):
249 |         syh, sxh = sizey / 2, sizex / 2
250 |         output_sigma = np.sqrt(sizex * sizey) / self.padding * self.output_sigma_factor
251 |         mult = -0.5 / (output_sigma * output_sigma)
252 |         y, x = np.ogrid[0:sizey, 0:sizex]
253 |         y, x = (y - syh) ** 2, (x - sxh) ** 2
254 |         res = np.exp(mult * (y + x))
255 |         return fftd(res)
256 | 
257 |     # 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移
258 |     # 必须都是MxN大小。二者必须都是周期的（即，通过一个cos窗口进行预处理）
259 |     def gaussianCorrelation(self, x1, x2):
260 |         if self._hogfeatures:
261 |             c = np.zeros((self.size_patch[0], self.size_patch[1]), np.float32)
262 |             for i in xrange(self.size_patch[2]):
263 |                 x1aux = x1[i, :].reshape((self.size_patch[0], self.size_patch[1]))
264 |                 x2aux = x2[i, :].reshape((self.size_patch[0], self.size_patch[1]))
265 |                 caux = cv2.mulSpectrums(fftd(x1aux), fftd(x2aux), 0, conjB=True)
266 |                 caux = real(fftd(caux, True))
267 |                 # caux = rearrange(caux)
268 |                 c += caux
269 |             c = rearrange(c)
270 |         else:
271 |             # 'conjB=' is necessary!在做乘法之前取第二个输入数组的共轭.
272 |             c = cv2.mulSpectrums(fftd(x1), fftd(x2), 0, conjB=True)
273 |             c = fftd(c, True)
274 |             c = real(c)
275 |             c = rearrange(c)
276 |  
277 |         if x1.ndim == 3 and x2.ndim == 3:
278 |             d = (np.sum(x1[:, :, 0] * x1[:, :, 0]) + np.sum(x2[:, :, 0] * x2[:, :, 0]) - 2.0 * c) / (
279 |                         self.size_patch[0] * self.size_patch[1] * self.size_patch[2])
280 |         elif x1.ndim == 2 and x2.ndim == 2:
281 |             d = (np.sum(x1 * x1) + np.sum(x2 * x2) - 2.0 * c) / (
282 |                         self.size_patch[0] * self.size_patch[1] * self.size_patch[2])
283 |  
284 |         d = d * (d >= 0)
285 |         d = np.exp(-d / (self.sigma * self.sigma))
286 |  
287 |         return d
288 | 
289 |     # 使用第一帧和它的跟踪框，初始化KCF跟踪器
290 |     def init(self, roi, image):
291 |         self._roi = list(map(float,roi))
292 |         assert (roi[2] > 0 and roi[3] > 0)
293 | 
294 |         # _tmpl是截取的特征的加权平均
295 |         self._tmpl = self.getFeatures(image, 1)
296 |         # _prob是初始化时的高斯响应图
297 |         self._prob = self.createGaussianPeak(self.size_patch[0], self.size_patch[1])
298 |         # _alphaf是频域中的相关滤波模板，有两个通道分别实部虚部
299 |         self._alphaf = np.zeros((self.size_patch[0], self.size_patch[1], 2), np.float32)
300 | 
301 |         if self._multiscale:
302 |             self.dsstInit(self._roi, image)
303 | 
304 |         self.train(self._tmpl, 1.0)
305 | 
306 |     # 从图像得到子窗口，通过赋值填充并检测特征
307 |     def getFeatures(self, image, inithann, scale_adjust=1.):
308 |         extracted_roi = [0, 0, 0, 0]
309 |         cx = self._roi[0] + self._roi[2] / 2
310 |         cy = self._roi[1] + self._roi[3] / 2
311 |  
312 |         if inithann:
313 |             padded_w = self._roi[2] * self.padding
314 |             padded_h = self._roi[3] * self.padding
315 |  
316 |             if self.template_size > 1:
317 |                 # 把最大的边缩小到96，_scale是缩小比例
318 |                 # _tmpl_sz是滤波模板的大小也是裁剪下的PATCH大小
319 |                 if padded_w >= padded_h:
320 |                     self._scale = padded_w / float(self.template_size)
321 |                 else:
322 |                     self._scale = padded_h / float(self.template_size)
323 |                 self._tmpl_sz[0] = int(padded_w / self._scale)
324 |                 self._tmpl_sz[1] = int(padded_h / self._scale)
325 |             else:
326 |                 self._tmpl_sz[0] = int(padded_w)
327 |                 self._tmpl_sz[1] = int(padded_h)
328 |                 self._scale = 1.
329 |  
330 |             if self._hogfeatures:
331 |                 self._tmpl_sz[0] = int(self._tmpl_sz[0]) // (2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size
332 |                 self._tmpl_sz[1] = int(self._tmpl_sz[1]) // (2 * self.cell_size) * 2 * self.cell_size + 2 * self.cell_size
333 |             else:
334 |                 self._tmpl_sz[0] = int(self._tmpl_sz[0]) // 2 * 2
335 |                 self._tmpl_sz[1] = int(self._tmpl_sz[1]) // 2 * 2
336 |  
337 |         # 选取从原图中扣下的图片位置大小
338 |         extracted_roi[2] = int(scale_adjust * self._scale * self._tmpl_sz[0] * self.currentScaleFactor)
339 |         extracted_roi[3] = int(scale_adjust * self._scale * self._tmpl_sz[1] * self.currentScaleFactor)
340 | 
341 |         extracted_roi[0] = int(cx - extracted_roi[2] / 2)
342 |         extracted_roi[1] = int(cy - extracted_roi[3] / 2)
343 | 
344 |         # z是当前帧被裁剪下的搜索区域
345 |         z = subwindow(image, extracted_roi, cv2.BORDER_REPLICATE)
346 |         if z.shape[1] != self._tmpl_sz[0] or z.shape[0] != self._tmpl_sz[1]: # 缩小到96
347 |             z = cv2.resize(z, tuple(self._tmpl_sz))
348 |  
349 |         if self._hogfeatures:
350 |             mapp = {'sizeX': 0, 'sizeY': 0, 'numFeatures': 0, 'map': 0}
351 |             mapp = fhog.getFeatureMaps(z, self.cell_size, mapp)
352 |             mapp = fhog.normalizeAndTruncate(mapp, 0.2)
353 |             mapp = fhog.PCAFeatureMaps(mapp)
354 |             # size_patch为列表，保存裁剪下来的特征图的【长，宽，通道】
355 |             self.size_patch = list(map(int, [mapp['sizeY'], mapp['sizeX'], mapp['numFeatures']]))
356 |             FeaturesMap = mapp['map'].reshape((self.size_patch[0] * self.size_patch[1], self.size_patch[2])).T # (size_patch[2], size_patch[0]*size_patch[1])
357 | 
358 |         else: # 将RGB图变为单通道灰度图
359 |             if z.ndim == 3 and z.shape[2] == 3:
360 |                 FeaturesMap = cv2.cvtColor(z, cv2.COLOR_BGR2GRAY)
361 |             elif z.ndim == 2:
362 |                 FeaturesMap = z
363 |             
364 |             # 从此FeatureMap从-0.5到0.5
365 |             FeaturesMap = FeaturesMap.astype(np.float32) / 255.0 - 0.5
366 |             # size_patch为列表，保存裁剪下来的特征图的【长，宽，1】
367 |             self.size_patch = [z.shape[0], z.shape[1], 1]
368 |  
369 |         if inithann:
370 |             self.createHanningMats()
371 |  
372 |         FeaturesMap = self.hann * FeaturesMap # 加汉宁（余弦）窗减少频谱泄露
373 |         return FeaturesMap
374 | 
375 |     # 使用当前图像的检测结果进行训练
376 |     # x是当前帧当前尺度下的特征， train_interp_factor是interp_factor
377 |     def train(self, x, train_interp_factor):
378 |         k = self.gaussianCorrelation(x, x)
379 |         # alphaf是频域中的相关滤波模板，有两个通道分别实部虚部
380 |         # _prob是初始化时的高斯响应图，相当于y
381 |         alphaf = complexDivision(self._prob, fftd(k) + self.lambdar)
382 | 
383 |         # _tmpl是截取的特征的加权平均
384 |         self._tmpl = (1 - train_interp_factor) * self._tmpl + train_interp_factor * x
385 |         # _alphaf是频域中相关滤波模板的加权平均
386 |         self._alphaf = (1 - train_interp_factor) * self._alphaf + train_interp_factor * alphaf
387 | 
388 |     # 检测当前帧的目标
389 |     # z是前一帧的训练/第一帧的初始化结果，x是当前帧当前尺度下的特征，peak_value是检测结果峰值
390 |     def detect(self, z, x):
391 |         k = self.gaussianCorrelation(x, z)
392 |         # 得到响应图
393 |         res = real(fftd(complexMultiplication(self._alphaf, fftd(k)), True))
394 | 
395 |         # pv:响应最大值 pi:相应最大点的索引数组
396 |         _, pv, _, pi = cv2.minMaxLoc(res)
397 |         # 得到响应最大的点索引的float表示
398 |         p = [float(pi[0]), float(pi[1])]
399 | 
400 |         # 使用幅值做差来定位峰值的位置
401 |         if pi[0] > 0 and pi[0] < res.shape[1] - 1:
402 |             p[0] += self.subPixelPeak(res[pi[1], pi[0] - 1], pv, res[pi[1], pi[0] + 1])
403 |         if pi[1] > 0 and pi[1] < res.shape[0] - 1:
404 |             p[1] += self.subPixelPeak(res[pi[1] - 1, pi[0]], pv, res[pi[1] + 1, pi[0]])
405 | 
406 |         # 得出偏离采样中心的位移
407 |         p[0] -= res.shape[1] / 2.
408 |         p[1] -= res.shape[0] / 2.
409 |         
410 |         # 返回偏离采样中心的位移和峰值
411 |         return p, pv
412 | 
413 |     # 基于当前帧更新目标位置
414 |     def update(self, image):
415 |         # 修正边界
416 |         if self._roi[0] + self._roi[2] <= 0:  self._roi[0] = -self._roi[2] + 1
417 |         if self._roi[1] + self._roi[3] <= 0:  self._roi[1] = -self._roi[3] + 1
418 |         if self._roi[0] >= image.shape[1] - 1:  self._roi[0] = image.shape[1] - 2
419 |         if self._roi[1] >= image.shape[0] - 1:  self._roi[1] = image.shape[0] - 2
420 |  
421 |         # 跟踪框、尺度框中心
422 |         cx = self._roi[0] + self._roi[2] / 2.
423 |         cy = self._roi[1] + self._roi[3] / 2.
424 |  
425 |         # 尺度不变时检测峰值结果
426 |         loc, peak_value = self.detect(self._tmpl, self.getFeatures(image, 0, 1.0))
427 | 
428 |         # 因为返回的只有中心坐标，使用尺度和中心坐标调整目标框
429 |         # loc是中心相对移动量
430 |         self._roi[0] = cx - self._roi[2] / 2.0 + loc[0] * self.cell_size * self._scale * self.currentScaleFactor
431 |         self._roi[1] = cy - self._roi[3] / 2.0 + loc[1] * self.cell_size * self._scale * self.currentScaleFactor
432 | 
433 |         # 使用尺度估计
434 |         if self._multiscale:
435 |             if self._roi[0] >= image.shape[1] - 1:  self._roi[0] = image.shape[1] - 1
436 |             if self._roi[1] >= image.shape[0] - 1:  self._roi[1] = image.shape[0] - 1
437 |             if self._roi[0] + self._roi[2] <= 0:  self._roi[0] = -self._roi[2] + 2
438 |             if self._roi[1] + self._roi[3] <= 0:  self._roi[1] = -self._roi[3] + 2
439 |             
440 |             # 更新尺度
441 |             scale_pi = self.detect_scale(image)
442 |             self.currentScaleFactor = self.currentScaleFactor * self.scaleFactors[scale_pi[0]]
443 |             if self.currentScaleFactor < self.min_scale_factor:
444 |                 self.currentScaleFactor = self.min_scale_factor
445 |             # elif self.currentScaleFactor > self.max_scale_factor:
446 |             #     self.currentScaleFactor = self.max_scale_factor
447 | 
448 |             self.train_scale(image)
449 | 
450 |         if self._roi[0] >= image.shape[1] - 1:  self._roi[0] = image.shape[1] - 1
451 |         if self._roi[1] >= image.shape[0] - 1:  self._roi[1] = image.shape[0] - 1
452 |         if self._roi[0] + self._roi[2] <= 0:  self._roi[0] = -self._roi[2] + 2
453 |         if self._roi[1] + self._roi[3] <= 0:  self._roi[1] = -self._roi[3] + 2
454 |         assert (self._roi[2] > 0 and self._roi[3] > 0)
455 | 
456 |         # 使用当前的检测框来训练样本参数
457 |         x = self.getFeatures(image, 0, 1.0)
458 |         self.train(x, self.interp_factor)
459 |  
460 |         return self._roi
461 | 
462 | 
463 |     #################
464 |     ### 尺度估计器 ###
465 |     #################
466 | 
467 |     def computeYsf(self):
468 |         scale_sigma2 = (self.n_scales / self.n_scales ** 0.5 * self.scale_sigma_factor) ** 2
469 |         _, res = np.ogrid[0:0, 0:self.n_scales]
470 |         ceilS = np.ceil(self.n_scales / 2.0)
471 |         res = np.exp(- 0.5 * (np.power(res + 1 - ceilS, 2)) / scale_sigma2)
472 |         return fftd(res)
473 | 
474 |     def createHanningMatsForScale(self):
475 |         _, hann_s = np.ogrid[0:0, 0:self.n_scales]
476 |         hann_s = 0.5 * (1 - np.cos(2 * np.pi * hann_s / (self.n_scales - 1)))
477 |         return hann_s
478 | 
479 |     # 初始化尺度估计器
480 |     def dsstInit(self, roi, image):
481 |         self.base_width = roi[2]
482 |         self.base_height = roi[3]
483 |         
484 |         # Guassian peak for scales (after fft)
485 |         self.ysf = self.computeYsf()
486 |         self.s_hann = self.createHanningMatsForScale()
487 | 
488 |         # Get all scale changing rate
489 |         scaleFactors = np.arange(self.n_scales)
490 |         ceilS = np.ceil(self.n_scales / 2.0)
491 |         self.scaleFactors = np.power(self.scale_step, ceilS - scaleFactors - 1)
492 | 
493 |         # Get the scaling rate for compressing to the model size
494 |         scale_model_factor = 1.
495 |         if self.base_width * self.base_height > self.scale_max_area:
496 |             scale_model_factor = (self.scale_max_area / (self.base_width * self.base_height)) ** 0.5
497 | 
498 |         self.scale_model_width = int(self.base_width * scale_model_factor)
499 |         self.scale_model_height = int(self.base_height * scale_model_factor)
500 | 
501 |         # Compute min and max scaling rate
502 |         self.min_scale_factor = np.power(self.scale_step, np.ceil(np.log((max(5 / self.base_width, 5 / self.base_height) * (1 + self.scale_padding))) / 0.0086))
503 |         self.max_scale_factor = np.power(self.scale_step, np.floor(np.log((min(image.shape[0] / self.base_width, image.shape[1] / self.base_height) * (1 + self.scale_padding))) / 0.0086))
504 | 
505 |         self.train_scale(image, True)
506 | 
507 |     # 获取尺度样本
508 |     def get_scale_sample(self, image):
509 |         xsf = None
510 |         for i in range(self.n_scales):
511 |             # Size of subwindow waiting to be detect
512 |             patch_width = self.base_width * self.scaleFactors[i] * self.currentScaleFactor
513 |             patch_height = self.base_height * self.scaleFactors[i] * self.currentScaleFactor
514 | 
515 |             cx = self._roi[0] + self._roi[2] / 2.
516 |             cy = self._roi[1] + self._roi[3] / 2.
517 | 
518 |             # Get the subwindow
519 |             im_patch = extractImage(image, cx, cy, patch_width, patch_height)
520 |             if self.scale_model_width > im_patch.shape[1]:
521 |                 im_patch_resized = cv2.resize(im_patch, (self.scale_model_width, self.scale_model_height), None, 0, 0, 1)
522 |             else:
523 |                 im_patch_resized = cv2.resize(im_patch, (self.scale_model_width, self.scale_model_height), None, 0, 0, 3)
524 | 
525 |             mapp = {'sizeX': 0, 'sizeY': 0, 'numFeatures': 0, 'map': 0}
526 |             mapp = fhog.getFeatureMaps(im_patch_resized, self.cell_size, mapp)
527 |             mapp = fhog.normalizeAndTruncate(mapp, 0.2)
528 |             mapp = fhog.PCAFeatureMaps(mapp)
529 | 
530 |             if i == 0:
531 |                 totalSize = mapp['numFeatures'] * mapp['sizeX'] * mapp['sizeY']
532 |                 xsf = np.zeros((totalSize, self.n_scales))
533 | 
534 |             # Multiply the FHOG results by hanning window and copy to the output
535 |             FeaturesMap = mapp['map'].reshape((totalSize, 1))
536 |             FeaturesMap = self.s_hann[0][i] * FeaturesMap
537 |             xsf[:, i] = FeaturesMap[:, 0]
538 | 
539 |         return fftd(xsf, False, True)
540 | 
541 |     # 训练尺度估计器
542 |     def train_scale(self, image, ini=False):
543 |         xsf = self.get_scale_sample(image)
544 | 
545 |         # Adjust ysf to the same size as xsf in the first time
546 |         if ini:
547 |             totalSize = xsf.shape[0]
548 |             self.ysf = cv2.repeat(self.ysf, totalSize, 1)
549 | 
550 |         # Get new GF in the paper (delta A)
551 |         new_sf_num = cv2.mulSpectrums(self.ysf, xsf, 0, conjB=True)
552 | 
553 |         new_sf_den = cv2.mulSpectrums(xsf, xsf, 0, conjB=True)
554 |         new_sf_den = cv2.reduce(real(new_sf_den), 0, cv2.REDUCE_SUM)
555 | 
556 |         if ini:
557 |             self.sf_den = new_sf_den
558 |             self.sf_num = new_sf_num
559 |         else:
560 |             # Get new A and new B
561 |             self.sf_den = cv2.addWeighted(self.sf_den, (1 - self.scale_lr), new_sf_den, self.scale_lr, 0)
562 |             self.sf_num = cv2.addWeighted(self.sf_num, (1 - self.scale_lr), new_sf_num, self.scale_lr, 0)
563 | 
564 |         self.update_roi()
565 | 
566 |     # 检测当前图像尺度
567 |     def detect_scale(self, image):
568 |         xsf = self.get_scale_sample(image)
569 | 
570 |         # Compute AZ in the paper
571 |         add_temp = cv2.reduce(complexMultiplication(self.sf_num, xsf), 0, cv2.REDUCE_SUM)
572 | 
573 |         # compute the final y
574 |         scale_response = cv2.idft(complexDivisionReal(add_temp, (self.sf_den + self.scale_lambda)), None, cv2.DFT_REAL_OUTPUT)
575 | 
576 |         # Get the max point as the final scaling rate
577 |         # pv:响应最大值 pi:相应最大点的索引数组
578 |         _, pv, _, pi = cv2.minMaxLoc(scale_response)
579 |         
580 |         return pi
581 | 
582 |     # 更新尺度
583 |     def update_roi(self):
584 |         # 跟踪框、尺度框中心
585 |         cx = self._roi[0] + self._roi[2] / 2.
586 |         cy = self._roi[1] + self._roi[3] / 2.
587 | 
588 |         # Recompute the ROI left-upper point and size
589 |         self._roi[2] = self.base_width * self.currentScaleFactor
590 |         self._roi[3] = self.base_height * self.currentScaleFactor
591 | 
592 |         # 因为返回的只有中心坐标，使用尺度和中心坐标调整目标框
593 |         self._roi[0] = cx - self._roi[2] / 2.0
594 |         self._roi[1] = cy - self._roi[3] / 2.0
595 | 
596 | 


--------------------------------------------------------------------------------