forked from masa-nudt/KCFDPT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tracker.m
executable file
·272 lines (221 loc) · 11.9 KB
/
tracker.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
% Original code is from Kernelized/Dual Correlation Filter (KCF/DCF)
% by Joao F. Henriques, 2015
% Integrated and modified by Dafei Huang
function [rect_position, fps] = tracker(params, ...
padding, sigma, lambda, output_sigma_factor, interp_factor, ...
cell_size, hog_orientations, ...
model, opts, scale_params)
% general parameters
bSaveImage = params.bSaveImage;
res_path = params.res_path;
video_path = params.video_path;
s_frames = params.s_frames;
pos = floor(params.init_pos);
target_sz = floor(params.wsize);
visualization = params.visualization;
% scale detection parameters
scale_detect_window_factor = scale_params.scale_detect_window_factor;
proposal_num_limit = scale_params.proposal_num_limit;
pos_shift_damping = scale_params.pos_shift_damping;
rescale_damping = scale_params.rescale_damping;
EB_maxAR_factor = scale_params.EB_maxAR_factor;
EB_minArea_factor = scale_params.EB_minArea_factor;
backSup_size = scale_params.backSup_size;
num_frames = numel(s_frames);
%if the target is large, lower the resolution, we don't need that much
%detail
resize_image = (sqrt(prod(target_sz)) >= 100); %diagonal size >= threshold
if resize_image,
pos = floor(pos / 2);
target_sz = floor(target_sz / 2);
end
org_target_sz = target_sz;
%window size, taking padding into account
window_sz = floor(target_sz * (1 + padding));
org_window_sz = window_sz;
% %we could choose a size that is a power of two, for better FFT
% %performance. in practice it is slower, due to the larger window size.
% window_sz = 2 .^ nextpow2(window_sz);
%create regression labels, gaussian shaped, with a bandwidth
%proportional to target size
output_sigma = sqrt(prod(target_sz)) * output_sigma_factor / cell_size;
yf = fft2(gaussian_shaped_labels(output_sigma, floor(window_sz / cell_size)));
%store pre-computed cosine window
cos_window = hann(size(yf,1)) * hann(size(yf,2))';
rect_position = zeros(numel(s_frames), 4);
temp = load('w2crs');
w2c = temp.w2crs;
%note: variables ending with 'f' are in the Fourier domain.
time = 0; %to calculate FPS
for frame = 1:numel(s_frames),
%load image
im = imread(s_frames{frame});
org_im = imread(s_frames{frame});
if size(im,3) > 1,
im = rgb2gray(im);
end
if resize_image,
im = imresize(im, 0.5);
org_im = imresize(org_im, 0.5, 'bilinear');
end
tic()
if frame > 1,
%obtain a subwindow for detection at the position from last
%frame, and convert to Fourier domain (its size is unchanged)
patch = get_subwindow(im, pos, window_sz);
patch = imresize(patch, org_window_sz, 'bilinear'); % scale its size to original size
org_patch = get_subwindow(org_im, pos, window_sz);
org_patch = imresize(org_patch, org_window_sz, 'bilinear');
zf = fft2(get_features(patch, org_patch, hog_orientations, cell_size, cos_window, w2c));
%calculate response of the classifier at all shifts
kzf = gaussian_correlation(zf, model_xf, sigma);
response = real(ifft2(model_alphaf .* kzf)); %equation for fast detection
%target location is at the maximum response. we must take into
%account the fact that, if the target doesn't move, the peak
%will appear at the top-left corner, not at the center (this is
%discussed in the paper). the responses wrap around cyclically.
max_response = max(response(:));
[vert_delta, horiz_delta] = find(response == max_response, 1);
if vert_delta > size(zf,1) / 2, %wrap around to negative half-space of vertical axis
vert_delta = vert_delta - size(zf,1);
end
if horiz_delta > size(zf,2) / 2, %same for horizontal axis
horiz_delta = horiz_delta - size(zf,2);
end
scale = window_sz ./ org_window_sz;
pos = pos + ( (cell_size * [vert_delta - 1, horiz_delta - 1]) .* scale );
pre_target_rect = [pos([2,1]) - target_sz([2,1])/2, target_sz([2,1])];
% begin scale detection
detect_sz = floor(target_sz * scale_detect_window_factor); % window size for scale detection
mid_pt = detect_sz * 0.5; % center position in the window for scale detection
% get the window for scale detection
edgeBoxes_window = get_subwindow(org_im, pos, detect_sz);
if size(org_im,3) == 1 % for gray sequences
edgeBoxes_window = single(edgeBoxes_window / 255);
edgeBoxes_window = cat(3, edgeBoxes_window, edgeBoxes_window, edgeBoxes_window);
end
% dynamically adjust edgeBoxes parameters
opts.maxAspectRatio = max([target_sz(1)/target_sz(2), target_sz(2)/target_sz(1)]) * EB_maxAR_factor;
opts.minBoxArea = floor( prod(target_sz) * EB_minArea_factor);
% edgeBoxes proposals
% Background Suppression is implemented within edgeBoxesTrackParam.m and edgeBoxesTrackParamMex.cpp
edgeBoxes_window_proposals = edgeBoxesTrackParam(edgeBoxes_window, model, backSup_size, opts);
% choose candidate proposals
num_of_proposals = 0;
proposals = zeros(proposal_num_limit,4); % center_y, center_x, rows, cols
proposals_xywh = zeros(proposal_num_limit,4);
target_in_edgeBoxes_window = [mid_pt([2,1]) - target_sz([2,1])/2, target_sz([2,1])];
% find candidate proposals among the top proposal_num_limit proposals detected by edgeBoxes
for i = 1 : min([size(edgeBoxes_window_proposals,1) proposal_num_limit])
if calcRectInt(edgeBoxes_window_proposals(i,[1:4]), target_in_edgeBoxes_window) > 0.6 && ...
calcRectInt(edgeBoxes_window_proposals(i,[1:4]), target_in_edgeBoxes_window) < 0.9
proposal_sz = [edgeBoxes_window_proposals(i,4) edgeBoxes_window_proposals(i,3)];
proposal_pos = [edgeBoxes_window_proposals(i,2) edgeBoxes_window_proposals(i,1)] + floor(proposal_sz/2);
num_of_proposals = num_of_proposals + 1;
proposals(num_of_proposals,:) = [pos+proposal_pos-mid_pt, proposal_sz];
proposals_xywh(num_of_proposals,:) = [proposals(num_of_proposals,[2,1]) - proposal_sz([2,1])/2, proposal_sz([2,1])];
end
end
% evaluate all the candidate proposals using kernel correlation
model_alpha = ifft2(model_alphaf);
max_proposal_response = max_response;
new_pos = pos;
new_target_sz = target_sz;
for j = 1 : num_of_proposals
proposal_patch = get_subwindow( im, proposals(j,1:2), proposals(j,3:4)*(1 + padding) );
proposal_patch = imresize(proposal_patch, org_window_sz, 'bilinear');
proposal_org_patch = get_subwindow( org_im, proposals(j,1:2), proposals(j,3:4)*(1 + padding) );
proposal_org_patch = imresize(proposal_org_patch, org_window_sz, 'bilinear');
proposal_zf = fft2(get_features(proposal_patch, proposal_org_patch, hog_orientations, cell_size, cos_window, w2c));
proposal_kz = gaussian_correlation_nofft(proposal_zf, model_xf, sigma); % no fft needed here
% calculate the response of the classifier without considering the cyclic shifts
proposal_response = model_alpha(:)' * proposal_kz(:);
if proposal_response > max_proposal_response
max_proposal_response = proposal_response;
new_pos = proposals(j,1:2);
new_target_sz = proposals(j,3:4);
end
end
chosen_proposal = [new_pos([2,1]) - new_target_sz([2,1])/2, new_target_sz([2,1])];
pos = floor( ( 1 - pos_shift_damping ) * pos + pos_shift_damping * new_pos );
target_sz = floor( ( 1 - rescale_damping ) * target_sz + rescale_damping * new_target_sz );
window_sz = floor( target_sz * (1 + padding) );
end
%obtain a subwindow for training at newly estimated target position
patch = get_subwindow(im, pos, window_sz);
org_patch = get_subwindow(org_im, pos, window_sz);
if frame > 1
patch = imresize(patch, org_window_sz, 'bilinear'); % resize to original window size then train the new model
org_patch = imresize(org_patch, org_window_sz, 'bilinear');
end
xf = fft2(get_features(patch, org_patch, hog_orientations, cell_size, cos_window, w2c));
%Kernel Ridge Regression, calculate alphas (in Fourier domain)
kf = gaussian_correlation(xf, xf, sigma);
% alphaf = yf ./ (kf + lambda); %equation for fast training
%utilize the updating scheme proposed in ACT
new_alphaf_num = yf .* kf;
new_alphaf_den = kf .* (kf + lambda);
if frame == 1, %first frame, train with a single image
% model_alphaf = alphaf;
%utilize the updating scheme proposed in ACT
alphaf_num = new_alphaf_num;
alphaf_den = new_alphaf_den;
model_xf = xf;
else
%subsequent frames, interpolate model
% model_alphaf = (1 - interp_factor) * model_alphaf + interp_factor * alphaf;
%utilize the updating scheme proposed in ACT
alphaf_num = (1 - interp_factor) * alphaf_num + interp_factor * new_alphaf_num;
alphaf_den = (1 - interp_factor) * alphaf_den + interp_factor * new_alphaf_den;
model_xf = (1 - interp_factor) * model_xf + interp_factor * xf;
end
model_alphaf = alphaf_num ./ alphaf_den;
%save position and timing
rect_position(frame,:) = [pos([2,1]) - target_sz([2,1])/2, target_sz([2,1])];
time = time + toc();
%visualization (uncomment the commented code below for detailed visual effects)
if visualization >= 1
if frame == 1, %first frame, create GUI
figure('Number','off', 'Name',['Tracker - ' video_path]);
im_handle = imshow(uint8(org_im), 'Border','tight', 'InitialMag', 100 + 100 * (length(im) < 500));
rect_handle = rectangle('Position',rect_position(frame,:), 'EdgeColor','g', 'LineWidth', 5);
% top scored proposals
% for j = 1 : 4
% proposal_handle(j) = rectangle('Position',[0,0,1,1], 'EdgeColor', 'r', 'LineWidth', 3);
% end
% preniminary location and previous size
% pre_target_handle = rectangle('Position', [0,0,1,1], 'EdgeColor', [0.3,0.3,1], 'LineWidth', 4, 'lineStyle', '--');
% the most promising proposal
% chosen_proposal_handle = rectangle('Position', [0,0,1,1], 'EdgeColor', [1,1,0], 'LineWidth', 3);
% frame index
% text_handle = text(30, 30, ['# ' int2str(frame)]);
% set(text_handle, 'color', [0 1 1], 'FontSize', 30, 'FontWeight', 'Bold');
else
try %subsequent frames, update GUI
set(im_handle, 'CData', org_im)
set(rect_handle, 'Position', rect_position(frame,:));
% for j = 1 : min([num_of_proposals 4])
% set(proposal_handle(j), 'Position', proposals_xywh(j,:));
% end
% for j = (min([num_of_proposals 4])+1) : 4
% set(proposal_handle(j), 'Position', [0,0,1,1]);
% end
% set(pre_target_handle, 'Position', pre_target_rect);
% set(chosen_proposal_handle, 'Position', chosen_proposal);
% set(text_handle, 'string', ['# ' int2str(frame)]);
catch
return
end
end
drawnow
% pause
end
if bSaveImage == 1
imwrite(frame2im(getframe(gcf)),['../.' res_path num2str(frame) '.jpg']);
end
end
if resize_image,
rect_position = rect_position * 2;
end
fps = num_frames/time;
end