├── .gitignore
├── CarKeypoints
├── README.md
├── assets
│ └── carkeypoints.png
├── inference-imageset.lua
├── inference.lua
├── valeval.lua
└── valid.txt
├── LICENSE
├── README.md
├── Video-Person-ReID
├── Graph_ModelDataGen.py
├── Graph_data_manager.py
├── Graph_video_loader.py
├── README.md
├── bases.py
├── data_manager.py
├── data_util
│ ├── convert_metadata_imglistprob.py
│ ├── create_feature_files.py
│ ├── create_metadata_files.py
│ ├── xml_reader_testdata.py
│ └── xml_reader_traindata.py
├── eval_metrics.py
├── iotools.py
├── losses.py
├── main_video_person_reid.py
├── models
│ ├── ResNet.py
│ ├── __init__.py
│ └── resnet3d.py
├── re_ranking_metadata.py
├── reidtools.py
├── samplers.py
├── transforms.py
├── utils.py
├── video2img
│ ├── crop_img.py
│ ├── crop_img_big.py
│ └── txt_GPS_new
│ │ ├── c006.txt
│ │ ├── c007.txt
│ │ ├── c008.txt
│ │ ├── c009.txt
│ │ ├── c010.txt
│ │ ├── c016.txt
│ │ ├── c017.txt
│ │ ├── c018.txt
│ │ ├── c019.txt
│ │ ├── c020.txt
│ │ ├── c021.txt
│ │ ├── c022.txt
│ │ ├── c023.txt
│ │ ├── c024.txt
│ │ ├── c025.txt
│ │ ├── c026.txt
│ │ ├── c027.txt
│ │ ├── c028.txt
│ │ ├── c029.txt
│ │ ├── c033.txt
│ │ ├── c034.txt
│ │ ├── c035.txt
│ │ └── c036.txt
└── video_loader.py
├── metadata
├── README.md
├── aic
│ └── trainer_metadata
│ │ └── Data
│ │ └── TestSet
│ │ └── data.txt
├── data.txt
├── data
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── dataset.cpython-36.pyc
│ │ ├── loader.cpython-36.pyc
│ │ └── transformer.cpython-36.pyc
│ ├── dataset.py
│ ├── loader.py
│ └── transformer.py
├── deploy.py
├── label.txt
├── models
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── alexnet.cpython-36.pyc
│ │ ├── build_model.cpython-36.pyc
│ │ ├── lightcnn.cpython-36.pyc
│ │ ├── model.cpython-36.pyc
│ │ ├── resnet.cpython-36.pyc
│ │ └── vgg.cpython-36.pyc
│ ├── alexnet.py
│ ├── build_model.py
│ ├── lightcnn.py
│ ├── model.py
│ ├── resnet.py
│ └── vgg.py
├── multi_label_classifier.py
├── options
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ └── options.cpython-36.pyc
│ └── options.py
├── reformat-log.py
├── testdata.txt
└── util
│ ├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── html.cpython-36.pyc
│ ├── util.cpython-36.pyc
│ └── webvisualizer.cpython-36.pyc
│ ├── html.py
│ ├── util.py
│ └── webvisualizer.py
├── requirements.txt
└── vehicle_keypoints
├── README.md
└── carkeypoint_train.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 |
4 | .idea/
5 |
--------------------------------------------------------------------------------
/CarKeypoints/README.md:
--------------------------------------------------------------------------------
1 | The vehicle keypoints code is based on krrish94's CarKeypoints \[[code](https://github.com/krrish94/CarKeypoints)\].
2 |
3 | # CarKeypoints
4 |
5 | This repository contains inference code for using a modified [stacked-hourglass](https://github.com/krrish94/stacked-hourglass) to detect semantic keypoints on cars.
6 |
7 | The network outputs a likelihood of keypoint presence over every pixel of an input image (the input image is a 64 x 64 car bounding box).
8 |
9 | Here is a 3D wireframe with reference keypoints.
10 |
11 |
12 |
13 |
14 | ## Setup
15 |
16 | This code assumes you have the following packages installed.
17 | * [Torch7](https://github.com/torch/torch7)
18 | * Torch packages: `nn`, `cunn`, `cudnn`, `image`, `nngraph`
19 |
20 |
21 | ## Downloading the pre-trained model
22 |
23 | Download the pre-trained model [here](https://www.dropbox.com/s/qezt3e02j4uawov/model.t7?dl=0).
24 |
25 |
26 | ## Running the inference code
27 |
28 | To perform inference on a set of images, first edit `valid.txt` and add paths to the images you need to run inference on. **These images must only contain cropped car bounding boxes** (i.e., from any image that contains a car, pick only one car bounding box and crop the region of the image contained within that bounding box). These are the only kind of images the model has been trained on.
29 |
30 | Then, run the inference script.
31 | ```
32 | inference.lua
33 | ```
34 |
35 | This will write a `results.txt` file (you can edit the name and path of this output file in `inference.lua`).
36 |
37 | ## Running inference for AIC19
38 |
39 | To run inference on multiple images, put image paths in a text file (e.g. `fullpath_train.txt`). Then, run the inference script.
40 | ```
41 | inference-imageset.lua
42 | ```
43 |
44 | The result will be saved in `keypoint-train.txt`. Example results can be downloaded [here](https://drive.google.com/open?id=1m96n_1gsHy3iI9ruRGDGqaVXqjJgVcKf).
45 |
--------------------------------------------------------------------------------
/CarKeypoints/assets/carkeypoints.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/CarKeypoints/assets/carkeypoints.png
--------------------------------------------------------------------------------
/CarKeypoints/inference-imageset.lua:
--------------------------------------------------------------------------------
1 | require 'nn'
2 | require 'cunn'
3 | require 'cudnn'
4 | require 'image'
5 | require 'nngraph'
6 | require 'valeval.lua'
7 |
8 | imageset = 'train'
9 | -- imageset = 'query'
10 | -- imageset = 'test'
11 |
12 | -- File to read image paths from
13 | data_file = '/home/ipl/twhuang/aic19/aic19-track2-reid/fullpath_%s.txt'%imageset
14 | -- Pretrained model weights
15 | model_file = '/home/ipl/twhuang/CarKeypoints/model.t7'
16 | -- This file is where results get written to
17 | results_file = '/home/ipl/twhuang/CarKeypoints/keypoint-%s.txt'%imageset
18 |
19 |
20 | function get_predictions(heat_maps)
21 | assert(heat_maps:size():size() == 4, 'Input must be 4-D tensor')
22 |
23 | local elem, idx = torch.max(heat_maps:view(heat_maps:size(1), heat_maps:size(2), heat_maps:size(3)*heat_maps:size(4)), 3)
24 | local preds = torch.repeatTensor(idx, 1, 1, 2):float()
25 |
26 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % heat_maps:size(4) + 1 end)
27 | preds[{{}, {}, 2}]:add(-1):div(heat_maps:size(3)):floor():add(1)
28 |
29 | return preds
30 | end
31 |
32 | function post_process(output, output_res)
33 | local preds = get_predictions(output)
34 | local scores = torch.zeros(preds:size(1), preds:size(2), 1)
35 |
36 | for i=1,preds:size(1) do
37 | for j=1,preds:size(2) do
38 | local heat_map = output[i][j]
39 | local pred_x, pred_y = preds[i][j][1], preds[i][j][2]
40 |
41 | scores[i][j] = heat_map[pred_x][pred_y]
42 | if pred_x > 1 and pred_x < output_res and pred_y > 1 and pred_y < output_res then
43 | local diff = torch.Tensor({heat_map[pred_y][pred_x+1]-heat_map[pred_y][pred_x-1], heat_map[pred_y+1][pred_x]-heat_map[pred_y-1][pred_x]})
44 | preds[i][j]:add(diff:sign():mul(.25))
45 | end
46 | end
47 | end
48 | preds:add(0.5)
49 |
50 | return preds:cat(preds, 3):cat(scores, 3)
51 | end
52 |
53 | function accuracy(output,label)
54 | if type(output) == 'table' then
55 | return heatmapAccuracy(output[#output],label[#output],nil,dataset.accIdxs)
56 | else
57 | return heatmapAccuracy(output,label,nil,dataset.accIdxs)
58 | end
59 | end
60 |
61 | torch.setdefaulttensortype('torch.FloatTensor')
62 |
63 | num_stacks = 2
64 | num_keypoints = 36
65 | output_res = 64
66 | pred_dims = {num_keypoints, 5}
67 | input_dims = {3, 64, 64}
68 |
69 | output_dims = {}
70 | for i=1,num_stacks do
71 | output_dims[i] = {num_keypoints, 64, 64}
72 | end
73 |
74 | num_images = 0
75 | for line in io.lines(data_file) do
76 | num_images = num_images + 1
77 | end
78 |
79 | nn.DataParallelTable.deserializeNGPUs = 1
80 | model = torch.load(model_file)
81 | model:cuda()
82 | model = model:get(1)
83 | print('\nModel Loading Done')
84 |
85 | iters = 0
86 | preds = {}
87 |
88 | local f = io.open(results_file, 'w')
89 |
90 | for line in io.lines(data_file) do
91 | iters = iters + 1
92 |
93 | img_path = string.sub(line, 1, #line-1)
94 | img = torch.FloatTensor(image.load(img_path))
95 | img = image.scale(img, 64, 64)
96 |
97 | input = torch.FloatTensor(1, 3, 64, 64)
98 | input[1] = img
99 |
100 | output = model:forward(input:cuda())
101 | if type(output) == 'table' then
102 | output = output[#output]
103 | end
104 |
105 | keypoints = post_process(output, output_res)
106 | coords = keypoints[1]:sub(1,36,3,5)
107 | table.insert(preds, keypoints[1])
108 | str = ''
109 | for i=1,coords:size(1) do
110 | for j=1,coords:size(2) do
111 | str = str .. tostring(coords[i][j]) .. ' '
112 | end
113 | end
114 |
115 | str = string.sub(str, 1, #str-1)
116 | str = str .. '\n'
117 |
118 | print('Done ' .. line)
119 | f:write(str)
120 | end
121 | f:close()
122 |
--------------------------------------------------------------------------------
/CarKeypoints/inference.lua:
--------------------------------------------------------------------------------
1 | require 'nn'
2 | require 'cunn'
3 | require 'cudnn'
4 | require 'image'
5 | require 'nngraph'
6 | require 'valeval.lua'
7 |
8 |
9 | -- File to read image paths from
10 | data_file = '/home/ipl/twhuang/aic19/aic19-track2-reid/fullpath_query.txt'
11 | -- Pretrained model weights
12 | model_file = '/home/ipl/twhuang/CarKeypoints/model.t7'
13 | -- This file is where results get written to
14 | results_file = '/home/ipl/twhuang/CarKeypoints/results-query.txt'
15 |
16 |
17 | function get_predictions(heat_maps)
18 | assert(heat_maps:size():size() == 4, 'Input must be 4-D tensor')
19 |
20 | local elem, idx = torch.max(heat_maps:view(heat_maps:size(1), heat_maps:size(2), heat_maps:size(3)*heat_maps:size(4)), 3)
21 | local preds = torch.repeatTensor(idx, 1, 1, 2):float()
22 |
23 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % heat_maps:size(4) + 1 end)
24 | preds[{{}, {}, 2}]:add(-1):div(heat_maps:size(3)):floor():add(1)
25 |
26 | return preds
27 | end
28 |
29 | function post_process(output, output_res)
30 | local preds = get_predictions(output)
31 | local scores = torch.zeros(preds:size(1), preds:size(2), 1)
32 |
33 | for i=1,preds:size(1) do
34 | for j=1,preds:size(2) do
35 | local heat_map = output[i][j]
36 | local pred_x, pred_y = preds[i][j][1], preds[i][j][2]
37 |
38 | scores[i][j] = heat_map[pred_x][pred_y]
39 | if pred_x > 1 and pred_x < output_res and pred_y > 1 and pred_y < output_res then
40 | local diff = torch.Tensor({heat_map[pred_y][pred_x+1]-heat_map[pred_y][pred_x-1], heat_map[pred_y+1][pred_x]-heat_map[pred_y-1][pred_x]})
41 | preds[i][j]:add(diff:sign():mul(.25))
42 | end
43 | end
44 | end
45 | preds:add(0.5)
46 |
47 | return preds:cat(preds, 3):cat(scores, 3)
48 | end
49 |
50 | function accuracy(output,label)
51 | if type(output) == 'table' then
52 | return heatmapAccuracy(output[#output],label[#output],nil,dataset.accIdxs)
53 | else
54 | return heatmapAccuracy(output,label,nil,dataset.accIdxs)
55 | end
56 | end
57 |
58 | torch.setdefaulttensortype('torch.FloatTensor')
59 |
60 | num_stacks = 2
61 | num_keypoints = 36
62 | output_res = 64
63 | pred_dims = {num_keypoints, 5}
64 | input_dims = {3, 64, 64}
65 |
66 | output_dims = {}
67 | for i=1,num_stacks do
68 | output_dims[i] = {num_keypoints, 64, 64}
69 | end
70 |
71 | num_images = 0
72 | for line in io.lines(data_file) do
73 | num_images = num_images + 1
74 | end
75 |
76 | nn.DataParallelTable.deserializeNGPUs = 1
77 | model = torch.load(model_file)
78 | model:cuda()
79 | model = model:get(1)
80 | print('\nModel Loading Done')
81 |
82 | iters = 0
83 | preds = {}
84 |
85 | local f = io.open(results_file, 'w')
86 |
87 | for line in io.lines(data_file) do
88 | iters = iters + 1
89 |
90 | img_path = string.sub(line, 1, #line-1)
91 | img = torch.FloatTensor(image.load(img_path))
92 | img = image.scale(img, 64, 64)
93 |
94 | input = torch.FloatTensor(1, 3, 64, 64)
95 | input[1] = img
96 |
97 | output = model:forward(input:cuda())
98 | if type(output) == 'table' then
99 | output = output[#output]
100 | end
101 |
102 | keypoints = post_process(output, output_res)
103 | coords = keypoints[1]:sub(1,36,3,5)
104 | table.insert(preds, keypoints[1])
105 | str = ''
106 | for i=1,coords:size(1) do
107 | for j=1,coords:size(2) do
108 | str = str .. tostring(coords[i][j]) .. ' '
109 | end
110 | end
111 |
112 | str = string.sub(str, 1, #str-1)
113 | str = str .. '\n'
114 |
115 | print('Done ' .. line)
116 | f:write(str)
117 | end
118 | f:close()
119 |
--------------------------------------------------------------------------------
/CarKeypoints/valeval.lua:
--------------------------------------------------------------------------------
1 | ------------------------------------------------------------------------------
2 | -- Helpful functions for evaluation
3 | -------------------------------------------------------------------------------
4 |
5 | -- Load predictions from hdf5 file
6 | -- predFile: name to the .h5 file containing the predictions
7 | -- doHm: read the 'heatmaps' field of the hdf5 database
8 | -- doInp: read the 'input' field of the hdf5 database
9 | function loadPreds(predFile, doHm, doInp)
10 | local f = hdf5.open(projectDir .. '/exp/' .. predFile .. '.h5','r')
11 | local inp,hms
12 | local idxs = f:read('idxs'):all()
13 | local preds = f:read('preds'):all()
14 | if doHm then hms = f:read('heatmaps'):all() end
15 | if doInp then inp = f:read('input'):all() end
16 | return idxs, preds, hms, inp
17 | end
18 |
19 |
20 | -- Calculate the distance between the predicted keypoints and the label (ground-truth keypoints)
21 | -- ??? (Check if function signatures (dims) are correctly understood)
22 | -- Inputs
23 | -- preds: N-by-2 tensor of predicted coordinates
24 | -- label: N-by-2 tensor of predicted coordinates
25 | -- normalize: N-by-1 tensor of normalizing factors
26 | -- Output
27 | -- dists: 2-by-N tensor of computed distances between the predictions and the labels
28 | function calcDists(preds, label, normalize)
29 | -- Initialize a tensor to hold the distances
30 | local dists = torch.Tensor(preds:size(2), preds:size(1))
31 | -- Initialize a 2-by-1 tensor to hold the difference between a label and a prediction
32 | local diff = torch.Tensor(2)
33 | -- ???
34 | -- For each keypoint predicted
35 | for i = 1,preds:size(1) do
36 | -- ???
37 | -- For each dimension of the predicted keypoint (x, y)
38 | for j = 1,preds:size(2) do
39 | -- If that keypoint is visible in the image, compute the distance
40 | if label[i][j][1] > 1 and label[i][j][2] > 1 then
41 | dists[j][i] = torch.dist(label[i][j],preds[i][j])/normalize[i]
42 | -- If that keypoint is not visible, let the distance be -1
43 | else
44 | dists[j][i] = -1
45 | end
46 | end
47 | end
48 | -- Return the distance
49 | return dists
50 | end
51 |
52 |
53 | -- Recover predictions from a heatmap
54 | -- Input
55 | -- hm: heatmap (a 4-D tensor)
56 | -- Output
57 | -- preds: N-by-2 tensor of predicted keypoint locations obtained from maxima on the heatmap
58 | function getPreds(hm)
59 | -- ??? (assert the following statement)
60 | -- I'm assuming the 4 heatmap dimensions are for [num images] x [num kps per image] x [height] x [width]
61 |
62 | assert(hm:size():size() == 4, 'Input must be 4-D tensor')
63 | -- Reshape the heatmap so that [height] and [width] are flattened out to a single dimension
64 | -- Get the maxima over the third dimension (comprising of the [height * width] flattened values)
65 | local max, idx = torch.max(hm:view(hm:size(1), hm:size(2), hm:size(3) * hm:size(4)), 3)
66 | -- Allocate memory for a tensor to hold X,Y coordinates of maxima locations
67 | local preds = torch.repeatTensor(idx, 1, 1, 2):float()
68 | -- Obtain the X coordinate of each maxima
69 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % hm:size(4) + 1 end)
70 | -- Obtain the Y coordinate of each maxima
71 | preds[{{}, {}, 2}]:add(-1):div(hm:size(3)):floor():add(1)
72 | -- Return the predicted locations
73 | --print(preds:size())
74 |
75 | return preds
76 | end
77 |
78 |
79 | -- ???
80 | -- Inputs
81 | -- dists: N-by-2 tensor of distances (between predictions and ground-truth)
82 | -- thr: threshold distance below which a detection is to be considered accurate
83 | -- Output
84 | -- percentage of keypoints that lie within the specified threshold (or -1 if no keypoint is visible)
85 | function distAccuracy(dists, thr)
86 | -- Return percentage below threshold while ignoring values with a -1
87 | if not thr then thr = .5 end
88 | -- Ignore distances that are -1 (since those keypoints are not visible in the image)
89 | if torch.ne(dists,-1):sum() > 0 then
90 | -- For all other keypoints, compute the percentage of keypoints that satisfy the distance threshold
91 | return dists:le(thr):eq(dists:ne(-1)):sum() / dists:ne(-1):sum()
92 | else
93 | return -1
94 | end
95 | end
96 |
97 |
98 | -- Calculate accuracy according to the PCK (Percentage of Correct Keypoints) metric, but use the
99 | -- ground-truth heatmap, rather than ground-truth X,Y locations
100 | -- Inputs
101 | -- output: output heatmap (from the hourglass network)
102 | -- label: ground-truth heatmap (??? confirm)
103 | -- thr: threshold distance below which a detection is considered correct
104 | -- idxs: average accuracy across 'idxs' is also returned by this function
105 | -- Output
106 | -- multiple values (each is an accuracy). The first value to be returned is the average accuracy
107 | -- across 'idxs'. This is followed by accuracies for individual keypoints.
108 | function heatmapAccuracy(output, label, thr, idxs)
109 | -- Compute predictions from the output heatmap (from the hourglass network)
110 | local preds = getPreds(output)
111 | -- Get predictions from the label (ground-truth)
112 | local gt = getPreds(label)
113 | -- Calculate the distance between the predictions and the labels
114 | -- The third argument here is the normalizing factor to be applied at each heatmap location
115 | -- ??? (find out what the /10 is for)
116 | local dists = calcDists(preds, gt, torch.ones(preds:size(1))*opt.outputRes/10)
117 |
118 | -- Table to store accuracies
119 | local acc = {}
120 | -- Variable to store the average accuracy (over specific keypoints as prescribed by 'idxs')
121 | local avgAcc = 0.0
122 | -- Number of indices that are bad (inaccurate)
123 | local badIdxCount = 0
124 |
125 | -- If average accuracy over 'idxs' is not specified
126 | if not idxs then
127 | -- Then compute it over all keypoint indices
128 | for i = 1,dists:size(1) do
129 | -- Compute the percentage of keypoints that are correct
130 | acc[i+1] = distAccuracy(dists[i])
131 | -- If at least one keypoint is correct, add it to the average accuracy
132 | if acc[i+1] >= 0 then avgAcc = avgAcc + acc[i+1]
133 | -- Otherwise, exclude it
134 | else badIdxCount = badIdxCount + 1 end
135 | end
136 | -- Compute the average accuracy for all keypoint indices
137 | -- In this evaluation, we consider only those images where at least one keypoint is accurately
138 | -- predicted.
139 | acc[1] = avgAcc / (dists:size(1) - badIdxCount)
140 | -- Compute average accuracy only over specified 'idxs'
141 | else
142 | for i = 1,#idxs do
143 | acc[i+1] = distAccuracy(dists[idxs[i]])
144 | if acc[i+1] >= 0 then avgAcc = avgAcc + acc[i+1]
145 | else badIdxCount = badIdxCount + 1 end
146 | end
147 | acc[1] = avgAcc / (#idxs - badIdxCount)
148 | end
149 | -- Return the accuracies
150 | return unpack(acc)
151 | end
152 |
153 |
154 | -- ???
155 | -- Calculate basic accuracy
156 | -- Inputs
157 | -- output: output coordinates (??? heatmap or coordinates)
158 | -- label: ground-truth keypoint coordinates
159 | -- thr: threshold
160 | -- Output
161 | -- Percentage of correct keypoints
162 | function basicAccuracy(output, label, thr)
163 | -- Default threshold of .5
164 | if not thr then thr = .5 end
165 | -- Flatten both the output and the label
166 | output = output:view(output:numel())
167 | label = label:view(label:numel())
168 | -- Check if the prediction is within the threshold of the label
169 | local rounded_output = torch.ceil(output - thr):typeAs(label)
170 | local eql = torch.eq(label,rounded_output):typeAs(label)
171 | -- Return PCK
172 | return eql:sum()/output:numel()
173 | end
174 |
175 |
176 | -- ???
177 | -- Generate standard PCK plot
178 | function displayPCK(dists, part_idx, label, title, show_key)
179 |
180 | if not (type(part_idx) == 'table') then
181 | part_idx = {part_idx}
182 | end
183 |
184 | curve_res = 11
185 | num_curves = #dists
186 | local t = torch.linspace(0,.5,curve_res)
187 | local pdj_scores = torch.zeros(num_curves, curve_res)
188 | local plot_args = {}
189 | print(title)
190 | for curve = 1,num_curves do
191 | for i = 1,curve_res do
192 | t[i] = (i-1)*.05
193 | local acc = 0.0
194 | for j = 1,#part_idx do
195 | acc = acc + distAccuracy(dists[curve][part_idx[j]], t[i])
196 | end
197 | pdj_scores[curve][i] = acc / #part_idx
198 | end
199 | plot_args[curve] = {label[curve],t,pdj_scores[curve],'-'}
200 | print(label[curve],pdj_scores[curve][curve_res])
201 | end
202 |
203 | require 'gnuplot'
204 | gnuplot.raw('set title "' .. title .. '"')
205 | if not show_key then gnuplot.raw('unset key')
206 | else gnuplot.raw('set key font ",6" right bottom') end
207 | gnuplot.raw('set xrange [0:.5]')
208 | gnuplot.raw('set yrange [0:1]')
209 | gnuplot.plot(unpack(plot_args))
210 | end
211 |
--------------------------------------------------------------------------------
/CarKeypoints/valid.txt:
--------------------------------------------------------------------------------
1 | /home/ipl/haotian/CarKeypoints/123/2.jpg
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Information Processing Lab, University of Washington
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 2019-CVPR-AIC-Track-2-UWIPL
2 | Repository for 2019 CVPR AI City Challenge Track 2 from IPL @University of Washington.
3 | Our method ranks 2nd in the competition.
4 |
5 | ## Code structure
6 | Our code consists of the following three components:
7 |
8 | ### 1. Video-Person-ReID
9 | The multi-view and metadata re-ranking vehicle reidentification model. The code is based on Jiyang Gao's Video-Person-ReID \[[code](https://github.com/jiyanggao/Video-Person-ReID)\].
10 |
11 | ### 2. Metadata
12 | Metadata model for vehicle's type, brand and color. The code is based on \[[code](https://github.com/pangwong/pytorch-multi-label-classifier)\].
13 |
14 | ### 3. CarKeypoints
15 | The vehicle keypoints code is based on krrish94's CarKeypoints \[[code](https://github.com/krrish94/CarKeypoints)\].
16 |
17 | ## Training
18 | Training of both Video-Person-ReID and metadata requires CarKeypoints's inference result on training set. For CarKeypoints, we use the pre-trained model \[[model](https://github.com/krrish94/CarKeypoints)\]. Please refer to the README.md files in each subfolder.
19 |
20 | ## Testing
21 | Testing of both Video-Person-ReID and metadata requires CarKeypoints's inference result on testing set. In addition, Video-Person-ReID needs metadata's inference result on testing set.
22 |
--------------------------------------------------------------------------------
/Video-Person-ReID/Graph_ModelDataGen.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 | import os
3 | import sys
4 | import time
5 | import datetime
6 | import argparse
7 | import os.path as osp
8 | import numpy as np
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.backends.cudnn as cudnn
13 | from torch.utils.data import DataLoader
14 | from torch.autograd import Variable
15 | from torch.optim import lr_scheduler
16 |
17 | import Graph_data_manager
18 | from Graph_video_loader import VideoDataset
19 | import transforms as T
20 | import models
21 | from models import resnet3d
22 | from losses import CrossEntropyLabelSmooth, TripletLoss
23 | from utils import AverageMeter, Logger, save_checkpoint
24 | from eval_metrics import evaluate
25 | from samplers import RandomIdentitySampler
26 | from reidtools import visualize_ranked_results # TH
27 |
28 |
29 |
30 |
31 | def testseq(dataset_name, use_gpu):
32 |
33 | dataset_root = './video2img/track1_sct_img_test_big/'
34 | dataset = Graph_data_manager.AICityTrack2(root=dataset_root)
35 |
36 |
37 | width = 224
38 | height = 224
39 | transform_train = T.Compose([
40 | T.Random2DTranslation(height, width),
41 | T.RandomHorizontalFlip(),
42 | T.ToTensor(),
43 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
44 | ])
45 |
46 | transform_test = T.Compose([
47 | T.Resize((height, width)),
48 | T.ToTensor(),
49 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
50 | ])
51 |
52 | pin_memory = True if use_gpu else False
53 | seq_len = 4
54 | num_instance = 4
55 | train_batch = 32
56 | test_batch = 1
57 |
58 | queryloader = DataLoader(
59 | VideoDataset(dataset.query, seq_len=seq_len, sample='dense', transform=transform_test),
60 | batch_size=test_batch, shuffle=False, num_workers=4,
61 | pin_memory=pin_memory, drop_last=False,
62 | )
63 |
64 | arch = "resnet50ta"
65 | pretrained_model = "./log/track12_ta224_checkpoint_ep500.pth.tar"
66 |
67 |
68 | start_epoch = 0
69 | print("Initializing model: {}".format(arch))
70 | dataset.num_train_pids = 517
71 | if arch=='resnet503d':
72 | model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=width, sample_height=height, sample_duration=seq_len)
73 | if not os.path.exists(pretrained_model):
74 | raise IOError("Can't find pretrained model: {}".format(pretrained_model))
75 | print("Loading checkpoint from '{}'".format(pretrained_model))
76 | checkpoint = torch.load(pretrained_model)
77 | state_dict = {}
78 | for key in checkpoint['state_dict']:
79 | if 'fc' in key: continue
80 | state_dict[key.partition("module.")[2]] = checkpoint['state_dict'][key]
81 | model.load_state_dict(state_dict, strict=False)
82 | else:
83 | if not os.path.exists(pretrained_model):
84 | model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
85 | else:
86 | model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
87 | checkpoint = torch.load(pretrained_model)
88 | model.load_state_dict(checkpoint['state_dict'])
89 | start_epoch = checkpoint['epoch'] + 1
90 | print("Loaded checkpoint from '{}'".format(pretrained_model))
91 | print("- start_epoch: {}\n- rank1: {}".format(start_epoch, checkpoint['rank1']))
92 |
93 | print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
94 |
95 | criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
96 | criterion_htri = TripletLoss(margin=0.3)
97 |
98 | lr = 0.0003
99 | gamma = 0.1
100 | stepsize = 200
101 | weight_decay = 5e-04
102 |
103 | optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
104 | if stepsize > 0:
105 | scheduler = lr_scheduler.StepLR(optimizer, step_size=stepsize, gamma=gamma)
106 | start_epoch = start_epoch
107 |
108 | if use_gpu:
109 | model = nn.DataParallel(model).cuda()
110 |
111 | test(model, queryloader, 'avg', use_gpu, dataset, -1, meta_data_tab=None)
112 |
113 | def test(model, queryloader, pool, use_gpu, dataset, epoch, ranks=[1, 5, 10, 20], meta_data_tab = None):
114 | model.eval()
115 |
116 | qf, q_pids, q_camids = [], [], []
117 | if False:
118 | for batch_idx, (imgs, surfaces, pids, camids) in enumerate(queryloader):
119 | torch.cuda.empty_cache()
120 | if use_gpu:
121 | imgs = imgs.cuda()
122 | surfaces = surfaces.cuda()
123 | imgs = Variable(imgs, volatile=True)
124 | surfaces = Variable(surfaces, volatile=True)
125 | b, n, s, c, h, w = imgs.size()
126 | b_s, n_s, s_s, d_s = surfaces.size()
127 | assert(b == b_s and n == n_s and s == s_s)
128 | if n < 100:
129 | assert(b == 1)
130 | imgs = imgs.view(b * n, s, c, h, w)
131 | surfaces = surfaces.view(b * n, s, -1)
132 | features = model(imgs, surfaces)
133 | features = features.view(n, -1)
134 |
135 | else:
136 | imgs = imgs.data
137 | imgs.resize_(50, s, c, h, w)
138 | imgs = imgs.view(50, s, c, h, w)
139 | imgs = Variable(imgs, volatile=True)
140 | surfaces = surfaces.data
141 | surfaces.resize_(50, s, d_s)
142 | surfaces = surfaces.view(50, s, -1)
143 | surfaces = Variable(surfaces, volatile=True)
144 | features = model(imgs, surfaces)
145 | features = features.view(50, -1)
146 |
147 | features = torch.mean(features, 0)
148 | features = features.data.cpu()
149 | qf.append(features)
150 | q_pids.extend(pids)
151 | q_camids.extend(camids)
152 | else:
153 | for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
154 | torch.cuda.empty_cache()
155 | if use_gpu:
156 | imgs = imgs.cuda()
157 | imgs = Variable(imgs, volatile=True)
158 | b, n, s, c, h, w = imgs.size()
159 | if n < 100:
160 | assert(b == 1)
161 | imgs = imgs.view(b * n, s, c, h, w)
162 | features = model(imgs)
163 | features = features.view(n, -1)
164 |
165 | else:
166 | imgs = imgs.data
167 | imgs.resize_(50, s, c, h, w)
168 | imgs = imgs.view(50, s, c, h, w)
169 | imgs = Variable(imgs, volatile=True)
170 | features = model(imgs)
171 | features = features.view(50, -1)
172 |
173 | features = torch.mean(features, 0)
174 | features = features.data.cpu()
175 | qf.append(features.numpy())
176 | q_pids.extend(pids.numpy())
177 | q_camids.extend(camids.numpy())
178 |
179 | qf = np.array(qf)
180 | q_pids = np.asarray(q_pids)
181 | q_camids = np.asarray(q_camids)
182 |
183 | np.save("qf3_no_nms_big0510.npy", qf)
184 | np.save("q_pids3_no_nms_big0510.npy", q_pids)
185 | np.save("q_camids3_no_nms_big0510.npy", q_camids)
186 |
187 |
188 | def main():
189 | seed = 1
190 | gpu_devices = '0'
191 | torch.manual_seed(seed)
192 | os.environ['CUDA_VISIBLE_DEVICES'] = gpu_devices
193 | use_gpu = torch.cuda.is_available()
194 | use_gpu = True
195 |
196 | if not True:
197 | sys.stdout = Logger(osp.join('track1_log', 'log_train.txt'))
198 | else:
199 | sys.stdout = Logger(osp.join('track1_log', 'log_test.txt'))
200 | print("==========\nArgs:{}\n==========")
201 |
202 | if use_gpu:
203 | print("Currently using GPU {}".format(gpu_devices))
204 | cudnn.benchmark = True
205 | torch.cuda.manual_seed_all(seed)
206 | else:
207 | print("Currently using CPU (GPU is highly recommended)")
208 |
209 | dataset = "aictrack2"
210 | print("Initializing dataset {}".format(dataset))
211 | testseq(dataset, use_gpu)
212 |
213 |
214 | if __name__ == '__main__':
215 |
216 | main()
217 |
--------------------------------------------------------------------------------
/Video-Person-ReID/Graph_data_manager.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 | import os
3 | import glob
4 | import re
5 | import sys
6 | import urllib
7 | import tarfile
8 | import zipfile
9 | import os.path as osp
10 | from scipy.io import loadmat
11 | import numpy as np
12 |
13 | from utils import mkdir_if_missing, write_json, read_json
14 | from bases import BaseVideoDataset
15 | """Dataset classes"""
16 |
17 |
18 | class AICityTrack2(BaseVideoDataset):
19 |
20 | def __init__(self, root, min_seq_len=0, verbose=True, **kwargs):
21 | self.dataset_dir = root
22 | self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json')
23 | self.min_seq_len = min_seq_len
24 |
25 | print("Note: if root path is changed, the previously generated json files need to be re-generated (so delete them first)")
26 |
27 | query = self._process_dir3(self.dataset_dir, self.split_query_json_path, relabel=False)
28 |
29 |
30 | self.query = query
31 | self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query)
32 |
33 | def _process_dir3(self, dir_path, json_path, relabel):
34 | if osp.exists(json_path):
35 | print("=> {} generated before, awesome!".format(json_path))
36 | split = read_json(json_path)
37 | return split['tracklets']
38 |
39 | print("=> Automatically generating split (might take a while for the first time, have a coffe)")
40 | camids = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store
41 | print("Processing '{}' with {} cameras".format(dir_path, len(camids)))
42 |
43 |
44 | tracklets = []
45 | for camid in camids:
46 | ss = camid.split("/")
47 | cam = camid
48 |
49 | camid = int(osp.basename(ss[7].replace("c","")))
50 | print(camid)
51 |
52 | pidrs = glob.glob(osp.join(cam, '*'))
53 | for pdir in pidrs:
54 | raw_img_paths = glob.glob(osp.join(pdir, '*.jpg'))
55 | num_imgs = len(raw_img_paths)
56 |
57 | if num_imgs < self.min_seq_len:
58 | continue
59 |
60 | imgfiles = os.listdir(pdir)
61 | img_paths = []
62 |
63 | for imgfile in imgfiles:
64 | img_idx_name = imgfile
65 | img_paths.append(pdir+"/"+imgfile)
66 |
67 | ############### keep N largest images
68 | N_largest = 32
69 | if N_largest > 0 and len(img_paths) > N_largest:
70 | from PIL import Image
71 | w = 4 # window for average size
72 | area_first = 0
73 | area_last = 0
74 | for img_path in img_paths[:w]:
75 | img = Image.open(img_path)
76 | width, height = img.size
77 | area_first += width*height
78 | for img_path in img_paths[-w:]:
79 | img = Image.open(img_path)
80 | width, height = img.size
81 | area_last += width*height
82 | if area_first > area_last:
83 | img_paths = img_paths[:N_largest]
84 | else:
85 | img_paths = img_paths[-N_largest:]
86 | ##############################################
87 |
88 | img_name = osp.basename(img_paths[0])
89 |
90 | ss = pdir.split("/")
91 | pid = int(ss[8])
92 | img_paths = tuple(img_paths)
93 | tracklets.append((img_paths, pid, camid))
94 |
95 | print("Saving split to {}".format(json_path))
96 | split_dict = {
97 | 'tracklets': tracklets,
98 | }
99 | write_json(split_dict, json_path)
100 |
101 | return tracklets
102 |
103 |
104 |
105 | """Create dataset"""
106 |
107 | __factory = {
108 | 'aictrack2': AICityTrack2,
109 | }
110 |
111 | def get_names():
112 | return __factory.keys()
113 |
114 | def init_dataset(name, *args, **kwargs):
115 | if name not in __factory.keys():
116 | raise KeyError("Unknown dataset: {}".format(name))
117 | return __factory[name](*args, **kwargs)
118 |
119 | if __name__ == '__main__':
120 | dataset = AICityTrack2()
121 |
122 |
123 |
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/Video-Person-ReID/Graph_video_loader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 | import os
3 | from PIL import Image
4 | import numpy as np
5 |
6 | import torch
7 | from torch.utils.data import Dataset
8 | import random
9 |
10 | def read_image(img_path):
11 | """Keep reading image until succeed.
12 | This can avoid IOError incurred by heavy IO process."""
13 | got_img = False
14 | while not got_img:
15 | try:
16 | img = Image.open(img_path).convert('RGB')
17 | got_img = True
18 | except IOError:
19 | print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path))
20 | pass
21 | return img
22 |
23 |
24 | class VideoDataset(Dataset):
25 | """Video Person ReID Dataset.
26 | Note batch data has shape (batch, seq_len, channel, height, width).
27 | """
28 | sample_methods = ['evenly', 'random', 'all']
29 |
30 | def __init__(self, dataset, seq_len=15, sample='evenly', transform=None):
31 | self.dataset = dataset
32 | self.seq_len = seq_len
33 | self.sample = sample
34 | self.transform = transform
35 |
36 | def __len__(self):
37 | return len(self.dataset)
38 |
39 | def __getitem__(self, index):
40 | img_paths, pid, camid= self.dataset[index]
41 | num = len(img_paths)
42 | if self.sample == 'random':
43 | """
44 | Randomly sample seq_len consecutive frames from num frames,
45 | if num is smaller than seq_len, then replicate items.
46 | This sampling strategy is used in training phase.
47 | """
48 | frame_indices = range(num)
49 | rand_end = max(0, len(frame_indices) - self.seq_len - 1)
50 | begin_index = random.randint(0, rand_end)
51 | end_index = min(begin_index + self.seq_len, len(frame_indices))
52 |
53 | indices = frame_indices[begin_index:end_index]
54 |
55 | for index in indices:
56 | if len(indices) >= self.seq_len:
57 | break
58 | indices.append(index)
59 | indices=np.array(indices)
60 | imgs = []
61 | for index in indices:
62 | index=int(index)
63 | img_path = img_paths[index]
64 | img = read_image(img_path)
65 | if self.transform is not None:
66 | img = self.transform(img)
67 | img = img.unsqueeze(0)
68 | imgs.append(img)
69 | imgs = torch.cat(imgs, dim=0)
70 | #imgs=imgs.permute(1,0,2,3)
71 | return imgs, pid, camid
72 |
73 | elif self.sample == 'dense':
74 | """
75 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1.
76 | This sampling strategy is used in test phase.
77 | """
78 | cur_index=0
79 | frame_indices = list(range(num))
80 | indices_list=[]
81 | while num-cur_index > self.seq_len:
82 | indices_list.append(frame_indices[cur_index:cur_index+self.seq_len])
83 | cur_index+=self.seq_len
84 | last_seq=frame_indices[cur_index:]
85 | for index in last_seq:
86 | if len(last_seq) >= self.seq_len:
87 | break
88 | last_seq.append(index)
89 | indices_list.append(last_seq)
90 | imgs_list=[]
91 | for indices in indices_list:
92 | imgs = []
93 | for index in indices:
94 | index=int(index)
95 | img_path = img_paths[index]
96 | img = read_image(img_path)
97 | if self.transform is not None:
98 | img = self.transform(img)
99 | img = img.unsqueeze(0)
100 | imgs.append(img)
101 | imgs = torch.cat(imgs, dim=0)
102 | imgs_list.append(imgs)
103 | imgs_array = torch.stack(imgs_list)
104 | return imgs_array, pid, camid
105 |
106 | else:
107 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods))
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/Video-Person-ReID/README.md:
--------------------------------------------------------------------------------
1 | # Video-Person-ReID for AIC19
2 |
3 | The code is for the video-based vehicle reidentification task in AIC19 track 1 and 2 \[[link](https://www.aicitychallenge.org/)\].
4 | The code is based on Jiyang Gao's Video-Person-ReID \[[code](https://github.com/jiyanggao/Video-Person-ReID)\].
5 |
6 | ### Requirement
7 |
8 | PyTorch 0.3.1
9 | Torchvision 0.2.0
10 | Python 2.7
11 |
12 | ### Dataset
13 |
14 | First download the AIC19 dataset \[[link](https://www.aicitychallenge.org/)\], and use the python scripts in `data_util/` to convert images, keypoints and metadata into desired file structure. Please copy the scripts to your path to `aic19-track2-reid` for simplicity.
15 |
16 | 1. Run `xml_reader_testdata.py` and `xml_reader_traindata.py` to convert images into desired file structure: `image_train_deepreid/carId/camId/imgId.jpg`.
17 | 2. Run `create_feature_files.py` to convert the keypoints into desired file structure as images: `keypoint_train_deepreid/carId/camId/imgId.txt`.
18 | 3. Run `convert_metadata_imglistprob.py` to convert the metadata inference result of query (and test) tracks into `prob_v2m100_query.txt` and `imglist_v2m100_query.txt`. And then run `create_metadata_files.py` to convert the metadata into desired file structure as images: `metadata_v2m100_query_deepreid/carId/camId/imgId.txt`. If using other metadata models, change `v2m100` to other names. Example txt output from the provided metadata model \[[link](https://github.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/tree/master/metadata)\] can be downloaded [here](https://drive.google.com/open?id=1X4geSMtsHCztwmhuUimjFjEZGUImsA7L).
19 |
20 |
21 | ### Training
22 |
23 | To train the model, please run
24 |
25 | `
26 | python main_video_person_reid.py --train-batch 16 --workers 0 --seq-len 4 --arch resnet50ta_surface_nu --width 224 --height 224 --dataset aictrack2 --use-surface --save-dir log --learning-rate 0.0001 --eval-step 50 --save-step 50 --gpu-devices 0 --re-ranking --metadata-model v2m100 --bstri
27 | `
28 |
29 |
30 | `arch` could be `resnet50ta_surface_nu` (Temporal Attention with keypoints feature, for AIC19 track 2) or `resnet50ta` (Temporal Attention, for AIC19 track 1). If using `resnet50ta`, do not use `--use-surface`.
31 |
32 | ### Testing
33 |
34 | To test the model, please run
35 |
36 | `
37 | python main_video_person_reid.py --train-batch 16 --workers 0 --seq-len 4 --arch resnet50ta_surface_nu --width 224 --height 224 --dataset aictrack2 --use-surface --evaluate --pretrained-model log/checkpoint_ep300.pth.tar --save-dir log-test --gpu-devices 0 --re-ranking --metadata-model v2m100
38 | `
39 |
40 | Optionally, start from previously saved feature without redoing inference
41 |
42 | `
43 | python main_video_person_reid.py --dataset aictrack2 --save-dir log --re-ranking --metadata-model v2m100 --load-feature --feature-dir feature_dir
44 | `
45 |
46 | `feature_dir` can be point to previously saved feature directory, e.g. `log/feature_ep0300`.
47 |
48 | The pre-trained model can be download at [here](https://drive.google.com/open?id=1jjwQhk8i4X12_DjCz9LlgrvL-9uKa2mE).
49 | Besides, the confusion matrix of metadata model need to be put under `metadata/`. Example confusion matrix can be downloaded [here](https://drive.google.com/open?id=178oG9f8H58YgVWsk_KaxpWf_i3dr2wER).
50 |
51 |
52 | ### AIC19 track 1
53 |
54 | For generating features for our AIC19 track 1 's testing \[[code](https://github.com/ipl-uw/2019-CVPR-AIC-Track-1-UWIPL)\], run
55 |
56 | `
57 | python Graph_ModelDataGen.py
58 | `
59 |
60 | The pretrained model can be downloaded [here](https://drive.google.com/file/d/1C-uE8nPA3Rtu8tkHptRS8J87sayrB7Nj/view?usp=sharing). The model should be put under `log/`.
61 | Besides, the data should be processed in a different manner:
62 | Create video2img folder in the downloaded project (i.e., Video-Person-ReID/video2img/).
63 | Put and run `python crop_img.py` in the same folder in the downloaded dataset (i.e., aic19-track1-mtmc/test). You need to creat a folder track1_test_img in the same path (i.e., aic19-track1-mtmc/test/track1_test_img). After that, create a folder track1_sct_img_test_big and run `python crop_img_big.py`. Then, create a folder log in the dowanloaded project (i.e., Video-Person-ReID/log) and put the downloaded model file of track1 ReID in this folder. Finally, run `python Graph_ModelDataGen.py` to obtain the feature files (q_camids3_no_nms_big0510.npy, qf3_no_nms_big0510.npy and q_pids3_no_nms_big0510.npy).
64 |
65 | ### Reference
66 |
67 | The code is based on Jiyang Gao's [Video-Person-ReID](https://github.com/jiyanggao/Video-Person-ReID).
68 | The visualization code is adopted from KaiyangZhou's [deep-person-reid](https://github.com/KaiyangZhou/deep-person-reid).
69 | The re-ranking code is modified based on zhunzhong07's [person-re-ranking](https://github.com/zhunzhong07/person-re-ranking).
70 |
--------------------------------------------------------------------------------
/Video-Person-ReID/bases.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 |
4 | import numpy as np
5 |
6 |
7 | class BaseDataset(object):
8 | """
9 | Base class of reid dataset
10 | """
11 |
12 | def get_imagedata_info(self, data):
13 | pids, cams = [], []
14 | for _, pid, camid in data:
15 | pids += [pid]
16 | cams += [camid]
17 | pids = set(pids)
18 | cams = set(cams)
19 | num_pids = len(pids)
20 | num_cams = len(cams)
21 | num_imgs = len(data)
22 | return num_pids, num_imgs, num_cams
23 |
24 | def get_videodata_info(self, data, return_tracklet_stats=False):
25 | pids, cams, tracklet_stats = [], [], []
26 | for img_paths, pid, camid in data:
27 | pids += [pid]
28 | cams += [camid]
29 | tracklet_stats += [len(img_paths)]
30 | pids = set(pids)
31 | cams = set(cams)
32 | num_pids = len(pids)
33 | num_cams = len(cams)
34 | num_tracklets = len(data)
35 | if return_tracklet_stats:
36 | return num_pids, num_tracklets, num_cams, tracklet_stats
37 | return num_pids, num_tracklets, num_cams
38 |
39 |
40 |
41 | def get_imagedata_info_ori(self, data):
42 | pids, cams = [], []
43 | for _, pid, camid,orientation in data:
44 | pids += [pid]
45 | cams += [camid]
46 | pids = set(pids)
47 | cams = set(cams)
48 | num_pids = len(pids)
49 | num_cams = len(cams)
50 | num_imgs = len(data)
51 | return num_pids, num_imgs, num_cams
52 |
53 | def get_videodata_info_ori(self, data, return_tracklet_stats=False):
54 | pids, cams, tracklet_stats = [], [], []
55 | for img_paths, pid, camid,orientation in data:
56 | pids += [pid]
57 | cams += [camid]
58 | tracklet_stats += [len(img_paths)]
59 | pids = set(pids)
60 | cams = set(cams)
61 | num_pids = len(pids)
62 | num_cams = len(cams)
63 | num_tracklets = len(data)
64 | if return_tracklet_stats:
65 | return num_pids, num_tracklets, num_cams, tracklet_stats
66 | return num_pids, num_tracklets, num_cams
67 |
68 | def get_imagedata_info_ori_iou(self, data):
69 | pids, cams = [], []
70 | for _, pid, camid,orientation,iou in data:
71 | pids += [pid]
72 | cams += [camid]
73 | pids = set(pids)
74 | cams = set(cams)
75 | num_pids = len(pids)
76 | num_cams = len(cams)
77 | num_imgs = len(data)
78 | return num_pids, num_imgs, num_cams
79 |
80 | def get_videodata_info_ori_iou(self, data, return_tracklet_stats=False):
81 | pids, cams, tracklet_stats = [], [], []
82 | for img_paths, pid, camid,orientation,iou in data:
83 | pids += [pid]
84 | cams += [camid]
85 | tracklet_stats += [len(img_paths)]
86 | pids = set(pids)
87 | cams = set(cams)
88 | num_pids = len(pids)
89 | num_cams = len(cams)
90 | num_tracklets = len(data)
91 | if return_tracklet_stats:
92 | return num_pids, num_tracklets, num_cams, tracklet_stats
93 | return num_pids, num_tracklets, num_cams
94 |
95 | def print_dataset_statistics(self):
96 | raise NotImplementedError
97 |
98 |
99 | class BaseImageDataset(BaseDataset):
100 | """
101 | Base class of image reid dataset
102 | """
103 |
104 | def print_dataset_statistics(self, train, query, gallery):
105 | num_train_pids, num_train_imgs, num_train_cams = self.get_imagedata_info(train)
106 | num_query_pids, num_query_imgs, num_query_cams = self.get_imagedata_info(query)
107 | num_gallery_pids, num_gallery_imgs, num_gallery_cams = self.get_imagedata_info(gallery)
108 |
109 | print("Dataset statistics:")
110 | print(" ----------------------------------------")
111 | print(" subset | # ids | # images | # cameras")
112 | print(" ----------------------------------------")
113 | print(" train | {:5d} | {:8d} | {:9d}".format(num_train_pids, num_train_imgs, num_train_cams))
114 | print(" query | {:5d} | {:8d} | {:9d}".format(num_query_pids, num_query_imgs, num_query_cams))
115 | print(" gallery | {:5d} | {:8d} | {:9d}".format(num_gallery_pids, num_gallery_imgs, num_gallery_cams))
116 | print(" ----------------------------------------")
117 |
118 |
119 | class BaseVideoDataset(BaseDataset):
120 | """
121 | Base class of video reid dataset
122 | """
123 |
124 | def print_dataset_statistics(self, train, query, gallery):
125 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \
126 | self.get_videodata_info(train, return_tracklet_stats=True)
127 |
128 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \
129 | self.get_videodata_info(query, return_tracklet_stats=True)
130 |
131 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \
132 | self.get_videodata_info(gallery, return_tracklet_stats=True)
133 |
134 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats
135 | min_num = np.min(tracklet_stats)
136 | max_num = np.max(tracklet_stats)
137 | avg_num = np.mean(tracklet_stats)
138 |
139 | print("Dataset statistics:")
140 | print(" -------------------------------------------")
141 | print(" subset | # ids | # tracklets | # cameras")
142 | print(" -------------------------------------------")
143 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams))
144 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams))
145 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams))
146 | print(" -------------------------------------------")
147 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num))
148 | print(" -------------------------------------------")
149 |
150 |
151 | def print_dataset_statistics_ori(self, train, query, gallery):
152 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \
153 | self.get_videodata_info_ori(train, return_tracklet_stats=True)
154 |
155 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \
156 | self.get_videodata_info_ori(query, return_tracklet_stats=True)
157 |
158 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \
159 | self.get_videodata_info_ori(gallery, return_tracklet_stats=True)
160 |
161 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats
162 | min_num = np.min(tracklet_stats)
163 | max_num = np.max(tracklet_stats)
164 | avg_num = np.mean(tracklet_stats)
165 |
166 | print("Dataset statistics:")
167 | print(" -------------------------------------------")
168 | print(" subset | # ids | # tracklets | # cameras")
169 | print(" -------------------------------------------")
170 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams))
171 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams))
172 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams))
173 | print(" -------------------------------------------")
174 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num))
175 | print(" -------------------------------------------")
176 |
177 | def print_dataset_statistics_ori_iou(self, train, query, gallery):
178 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \
179 | self.get_videodata_info_ori_iou(train, return_tracklet_stats=True)
180 |
181 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \
182 | self.get_videodata_info_ori_iou(query, return_tracklet_stats=True)
183 |
184 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \
185 | self.get_videodata_info_ori_iou(gallery, return_tracklet_stats=True)
186 |
187 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats
188 | min_num = np.min(tracklet_stats)
189 | max_num = np.max(tracklet_stats)
190 | avg_num = np.mean(tracklet_stats)
191 |
192 | print("Dataset statistics:")
193 | print(" -------------------------------------------")
194 | print(" subset | # ids | # tracklets | # cameras")
195 | print(" -------------------------------------------")
196 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams))
197 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams))
198 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams))
199 | print(" -------------------------------------------")
200 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num))
201 | print(" -------------------------------------------")
--------------------------------------------------------------------------------
/Video-Person-ReID/data_util/convert_metadata_imglistprob.py:
--------------------------------------------------------------------------------
1 | from os import listdir, mkdir
2 | from os.path import join, split, isfile, isdir
3 |
4 |
5 | conversions = [
6 | ('./track2-gallery-query-metadata-v2m100/test-prob-v2m100.log',
7 | './track2-gallery-query-metadata-v2m100/prob_v2m100.txt',
8 | './track2-gallery-query-metadata-v2m100/imglist_v2m100.txt'),
9 | ]
10 |
11 | img_gline = {}
12 | with open('test_track.txt', 'r') as f:
13 | for gg, line in enumerate(f):
14 | g_line = gg+1
15 | print(g_line)
16 |
17 | imgs = line.replace("\n", "").strip().split(" ")
18 | for i, img in enumerate(imgs):
19 | img_gline[img] = g_line
20 |
21 | img_qline = {}
22 | with open('query_track.txt', 'r') as f:
23 | for qq, line in enumerate(f):
24 | q_line = qq+1
25 | print(q_line)
26 |
27 | imgs = line.replace("\n", "").strip().split(" ")
28 | for i, img in enumerate(imgs):
29 | img_qline[img] = q_line
30 | assert int(imgs[0].replace('.jpg','')) == q_line # make sure is ordered
31 |
32 |
33 | for raw_filename, prob_filename, imglist_filename in conversions:
34 | metadatas = []
35 | with open(raw_filename, 'r') as f:
36 | buf = ''
37 | i = 0
38 | for line in f:
39 | line = line.strip()
40 | if i % 4 == 0:
41 | metadatas.append([])
42 | i += 1
43 | else:
44 | buf = buf + ' ' + line
45 | #if line[-2:] != ']]':
46 | # continue
47 | #print(buf)
48 | l = buf.rfind('[[')
49 | r = buf.find(']]')
50 | if l == -1 and r == -1:
51 | metadatas[-1].append(buf.strip())
52 | elif l < r:
53 | metadatas[-1].append(buf[l+2:r].strip())
54 | else:
55 | print('invalid buf: ' + buf)
56 | buf = ''
57 | i += 1
58 | if len(metadatas[-1]) == 0:
59 | metadatas = metadatas[:-1]
60 | print('images in metadatas: %d' % len(metadatas))
61 |
62 | prob_filename_test = prob_filename[:-4] + '_test.txt'
63 | imglist_filename_test = imglist_filename[:-4] + '_test.txt'
64 | f_prob = open(prob_filename_test, 'w')
65 | f_imglist = open(imglist_filename_test, 'w')
66 | i = 0
67 | for img in img_gline:
68 | f_prob.write('%d/%d image\n' % (i, len(img_gline)))
69 | for metadata in metadatas[img_gline[img]-1 + 1052]:
70 | f_prob.write(metadata+'\n')
71 | f_imglist.write(img+'\n')
72 | i+=1
73 | f_prob.close()
74 | f_imglist.close()
75 |
76 | prob_filename_query = prob_filename[:-4] + '_query.txt'
77 | imglist_filename_query = imglist_filename[:-4] + '_query.txt'
78 | f_prob = open(prob_filename_query, 'w')
79 | f_imglist = open(imglist_filename_query, 'w')
80 | i = 0
81 | for img in img_qline:
82 | f_prob.write('%d/%d image\n' % (i, len(img_qline)))
83 | for metadata in metadatas[img_qline[img]-1]:
84 | f_prob.write(metadata+'\n')
85 | f_imglist.write(img+'\n')
86 | i+=1
87 | f_prob.close()
88 | f_imglist.close()
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/Video-Person-ReID/data_util/create_feature_files.py:
--------------------------------------------------------------------------------
1 | from os import listdir, mkdir
2 | from os.path import join, split, isfile, isdir
3 |
4 |
5 | image_sets = [
6 | #'train',
7 | 'query',
8 | 'test',
9 | ]
10 |
11 | dummys = [
12 | '',
13 | #'_dummy',
14 | ]
15 |
16 | features = [
17 | 'keypoint',
18 | ]
19 |
20 | aic_track2_dir = '/path_to_aic19-track2-reid/'
21 |
22 | for image_set in image_sets:
23 | for dummy in dummys:
24 | image_path = aic_track2_dir + 'image_%s_deepreid%s' % (image_set, dummy)
25 | for feature in features:
26 | print((image_set, dummy, feature))
27 | feature_path = aic_track2_dir + '%s_%s_deepreid%s' % (feature, image_set, dummy)
28 | mkdir(feature_path)
29 |
30 | feature_file = aic_track2_dir + '%s-%s.txt' % (feature, image_set)
31 | lines = []
32 | with open(feature_file, 'r') as f:
33 | lines = f.readlines()
34 |
35 | pids = [f for f in listdir(image_path) if isdir(join(image_path, f))]
36 | pids.sort()
37 | for pid in pids:
38 | print(pid)
39 | pid_path = join(feature_path, pid)
40 | pid_path_img = join(image_path, pid)
41 | mkdir(pid_path)
42 | cids = [f for f in listdir(pid_path_img) if isdir(join(pid_path_img, f))]
43 | for cid in cids:
44 | cid_path = join(pid_path, cid)
45 | cid_path_img = join(pid_path_img, cid)
46 | mkdir(cid_path)
47 | imgs = [f for f in listdir(cid_path_img) if isfile(join(cid_path_img, f)) and f[-4:] == '.jpg']
48 | for img in imgs:
49 | imgname = img[:-4]
50 | imgid = imgname.split('_')[-1]
51 | feature_file = join(cid_path, imgname+'.txt')
52 | with open(feature_file, 'w') as file:
53 | file.write(lines[int(imgid)-1])
54 |
--------------------------------------------------------------------------------
/Video-Person-ReID/data_util/create_metadata_files.py:
--------------------------------------------------------------------------------
1 | from os import listdir, mkdir
2 | from os.path import join, split, isfile, isdir
3 |
4 |
5 | image_sets = [
6 | 'query',
7 | 'test',
8 | ]
9 |
10 | dummys = [
11 | '',
12 | #'_dummy',
13 | ]
14 |
15 | models = [
16 | 'v2m100',
17 | ]
18 |
19 | aic_track2_dir = '/path_to_aic19-track2-reid/'
20 |
21 | for model in models:
22 | for image_set in image_sets:
23 | for dummy in dummys:
24 | print((model, image_set, dummy))
25 | # parse metadata probability from file
26 | metadatas = []
27 | with open(aic_track2_dir + 'prob_%s_%s.txt'%(model, image_set), 'r') as f:
28 | for i, line in enumerate(f):
29 | line = line.strip()
30 | if i % 4 == 0:
31 | metadatas.append([])
32 | else:
33 | l = line.rfind('[')
34 | r = line.find(']')
35 | if l == -1 and r == -1:
36 | metadatas[-1].append(line.strip())
37 | elif l < r:
38 | metadatas[-1].append(line[l+1:r].strip())
39 | else:
40 | print('invalid line: ' + line)
41 | if len(metadatas[-1]) == 0:
42 | metadatas = metadatas[:-1]
43 | print('images in metadatas: %d' % len(metadatas))
44 |
45 | # read image filenames from file
46 | img_orders = {}
47 | with open(aic_track2_dir + 'imglist_%s_%s.txt'%(model, image_set), 'r') as f:
48 | for i, line in enumerate(f):
49 | pos = line.find('.jpg')
50 | imgid = line[pos-6:pos]
51 | #print(imgid)
52 | if imgid in img_orders:
53 | print('duplicate images: '+imgid)
54 | img_orders[imgid] = i
55 | print('images in image list: %d' % len(img_orders))
56 |
57 |
58 | image_path = aic_track2_dir + 'image_%s_deepreid%s' % (image_set, dummy)
59 | metadata_path = aic_track2_dir + 'metadata_%s_%s_deepreid%s' % (model, image_set, dummy)
60 | mkdir(metadata_path)
61 |
62 | pids = [f for f in listdir(image_path) if isdir(join(image_path, f))]
63 | pids.sort()
64 | for pid in pids:
65 | print(pid)
66 | pid_path = join(metadata_path, pid)
67 | pid_path_img = join(image_path, pid)
68 | mkdir(pid_path)
69 | cids = [f for f in listdir(pid_path_img) if isdir(join(pid_path_img, f))]
70 | for cid in cids:
71 | cid_path = join(pid_path, cid)
72 | cid_path_img = join(pid_path_img, cid)
73 | mkdir(cid_path)
74 | imgs = [f for f in listdir(cid_path_img) if isfile(join(cid_path_img, f)) and f[-4:] == '.jpg']
75 | for img in imgs:
76 | imgname = img[:-4]
77 | imgid = imgname.split('_')[-1]
78 | metadata_file = join(cid_path, imgname+'.txt')
79 | with open(metadata_file, 'w') as file:
80 | for metadata in metadatas[img_orders[imgid]]:
81 | file.write(metadata+'\n')
82 |
--------------------------------------------------------------------------------
/Video-Person-ReID/data_util/xml_reader_testdata.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Feb 5 00:16:00 2019
4 |
5 | @author: hungminhsu
6 | """
7 |
8 | import os
9 | import shutil
10 | def copy_rename(src_dir,old_file_name,dst_dir ,new_file_name):
11 | src_file = os.path.join(src_dir, old_file_name)
12 | #print("src_file:"+src_file)
13 | shutil.copy(src_file,dst_dir)
14 |
15 | dst_file = os.path.join(dst_dir, old_file_name)
16 | #print("dst_file:"+dst_file)
17 | new_dst_file_name = os.path.join(dst_dir, new_file_name)
18 | #print("new_dst_file_name:"+new_dst_file_name)
19 | os.rename(dst_file, new_dst_file_name)
20 |
21 | ###########################################################################
22 |
23 | aic_track2_dir = '/path_to_aic19-track2-reid/'
24 |
25 | source_path_query = aic_track2_dir + "image_query/"
26 | path_query = aic_track2_dir + "image_query_deepreid/"
27 | os.mkdir(path_query)
28 |
29 | q_img_camID={}
30 | q_img_carID={}
31 |
32 | q_imgs = [f for f in os.listdir(source_path_query)]
33 | q_imgs.sort()
34 | with open('query_track.txt', 'w') as f:
35 | f.write('\n'.join(q_imgs))
36 | for i, img in enumerate(q_imgs):
37 | q_img_camID[img] = 'c901' # camID for query starts from 901
38 | q_img_carID[img] = '%04d'%(i+1)
39 | for i, img in enumerate(q_imgs):
40 | print(i)
41 | #print(s)
42 | carID = q_img_carID[img]
43 | camID = q_img_camID[img]
44 |
45 |
46 | if not os.path.isdir(path_query+"/"+carID+"/"):
47 | os.mkdir(path_query+"/"+carID+"/")
48 | if not os.path.isdir(path_query+"/"+carID+"/"+camID+"/"):
49 | os.mkdir(path_query+"/"+carID+"/"+camID+"/")
50 | copy_rename(source_path_query,img,path_query+"/"+carID+"/"+camID+"/",'%s'%(img))
51 |
52 | source_path_test = aic_track2_dir + "image_test/"
53 | path_test = aic_track2_dir +"image_test_deepreid/"
54 | os.mkdir(path_test)
55 |
56 | g_img_camID={}
57 | g_img_carID={}
58 | g_imgs = []
59 | with open('test_track.txt', 'r') as f:
60 | for i, line in enumerate(f):
61 | s = line.replace('\n', '').strip().split(' ')
62 | g_imgs.append(s)
63 | for img in s:
64 | g_img_camID[img] = 'c001'
65 | g_img_carID[img] = '%04d'%(i+1)
66 | for l, s in enumerate(g_imgs):
67 | print(l)
68 | #print(s)
69 | for i in range(0,len(s)):
70 |
71 | carID = g_img_carID[s[i]]
72 | camID = g_img_camID[s[i]]
73 |
74 |
75 | if not os.path.isdir(path_test+"/"+carID+"/"):
76 | os.mkdir(path_test+"/"+carID+"/")
77 | if not os.path.isdir(path_test+"/"+carID+"/"+camID+"/"):
78 | os.mkdir(path_test+"/"+carID+"/"+camID+"/")
79 | copy_rename(source_path_test,s[i],path_test+"/"+carID+"/"+camID+"/",'%04d_%s'%(i, s[i]))
80 |
81 |
--------------------------------------------------------------------------------
/Video-Person-ReID/data_util/xml_reader_traindata.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Feb 5 00:16:00 2019
4 |
5 | @author: hungminhsu
6 | """
7 |
8 |
9 |
10 | import os
11 | import shutil
12 | def copy_rename(src_dir,old_file_name,dst_dir ,new_file_name):
13 | src_file = os.path.join(src_dir, old_file_name)
14 | #print("src_file:"+src_file)
15 | shutil.copy(src_file,dst_dir)
16 |
17 | dst_file = os.path.join(dst_dir, old_file_name)
18 | #print("dst_file:"+dst_file)
19 | new_dst_file_name = os.path.join(dst_dir, new_file_name)
20 | #print("new_dst_file_name:"+new_dst_file_name)
21 | os.rename(dst_file, new_dst_file_name)
22 |
23 | ###########################################################################
24 |
25 | import xml.etree.ElementTree as ET
26 | xmlp = ET.XMLParser(encoding="utf-8")
27 | tree = ET.parse('train_label.xml', parser=xmlp)
28 | root = tree.getroot()
29 |
30 |
31 | img_camID={}
32 | img_carID={}
33 |
34 | for neighbor in root.iter('Item'):
35 | #print(neighbor.attrib)
36 | #print(neighbor.get('imageName'))
37 | #print(neighbor.get('vehicleID'))
38 | #print(neighbor.get('cameraID'))
39 | img_camID[neighbor.get('imageName')] = neighbor.get('cameraID')
40 | img_carID[neighbor.get('imageName')] = neighbor.get('vehicleID')
41 |
42 | carID_num={}
43 |
44 | aic_track2_dir = '/path_to_aic19-track2-reid/'
45 |
46 | source_path = aic_track2_dir + "image_train/"
47 | path_train = aic_track2_dir + "image_train_deepreid/"
48 | path_query = aic_track2_dir + "image_train_deepreid_query/"
49 | path_query_single = aic_track2_dir + "image_train_deepreid_query_single/"
50 | path_gallery = aic_track2_dir + "image_train_deepreid_gallery/"
51 | os.mkdir(path_train)
52 | os.mkdir(path_query)
53 | os.mkdir(path_query_single)
54 | os.mkdir(path_gallery)
55 | file = open("train_track.txt","r")
56 | for line in file:
57 | #print(line)
58 | s = line.replace(" \n","").split(" ")
59 | #print(s)
60 | # find single query i as the minimum image number in s
61 | tmp = [int(c[:-4]) for c in s]
62 | sq = tmp.index(min(tmp))
63 | for i in range(0,len(s)):
64 |
65 | carID = img_carID[s[i]]
66 | camID = img_camID[s[i]]
67 |
68 |
69 | if len(carID_num)<160 or carID in carID_num:
70 | if carID in carID_num:
71 | #if len(carID_num[carID])==1 and carID_num[carID][0]!=camID:
72 | # carID_num[carID].append(camID) #ccc
73 | if not camID in carID_num[carID]:
74 | carID_num[carID].append(camID)
75 | else:
76 | carID_num[carID]=[]
77 | carID_num[carID].append(camID)
78 |
79 | #print(carID_num[carID])
80 | #print(len(carID_num))
81 | #if len(carID_num)<160:
82 | if carID in carID_num and False:
83 | if len(carID_num[carID])==1:
84 | ###camID = carID_num[carID][0] #ccc
85 | if not os.path.isdir(path_query+"/"+carID+"/"):
86 | os.mkdir(path_query+"/"+carID+"/")
87 | if not os.path.isdir(path_query+"/"+carID+"/"+camID+"/"):
88 | os.mkdir(path_query+"/"+carID+"/"+camID+"/")
89 | copy_rename(source_path,s[i],path_query+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i]))
90 | if i == sq: # single query
91 | if not os.path.isdir(path_query_single+"/"+carID+"/"):
92 | os.mkdir(path_query_single+"/"+carID+"/")
93 | if not os.path.isdir(path_query_single+"/"+carID+"/"+camID+"/"):
94 | os.mkdir(path_query_single+"/"+carID+"/"+camID+"/")
95 | copy_rename(source_path,s[i],path_query_single+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i]))
96 |
97 | #elif len(carID_num[carID])==2: #ccc
98 | else:
99 | #print("111111111")
100 | ###camID = carID_num[carID][1] #ccc
101 | if not os.path.isdir(path_gallery+"/"+carID+"/"):
102 | os.mkdir(path_gallery+"/"+carID+"/")
103 | if not os.path.isdir(path_gallery+"/"+carID+"/"+camID+"/"):
104 | os.mkdir(path_gallery+"/"+carID+"/"+camID+"/")
105 | copy_rename(source_path,s[i],path_gallery+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i]))
106 | else:
107 | #if carID not in carID_num:
108 | if not os.path.isdir(path_train+"/"+carID+"/"):
109 | os.mkdir(path_train+"/"+carID+"/")
110 | if not os.path.isdir(path_train+"/"+carID+"/"+camID+"/"):
111 | os.mkdir(path_train+"/"+carID+"/"+camID+"/")
112 | copy_rename(source_path,s[i],path_train+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i]))
113 |
114 |
115 |
--------------------------------------------------------------------------------
/Video-Person-ReID/eval_metrics.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 | import numpy as np
3 | import copy
4 |
5 | import os.path as osp
6 | from os import mkdir
7 |
8 | def dump_matches_imgids(output_dir, matches_imgids):
9 | if not osp.isdir(output_dir):
10 | mkdir(output_dir)
11 | for q_imgid, g_imgids in matches_imgids.iteritems():
12 | with open(osp.join(output_dir, '%s.txt' % q_imgid), 'w') as f:
13 | for g_imgid in g_imgids:
14 | f.write('%s\n' % g_imgid)
15 |
16 | def dump_query_result(output_dir, matches_imgids, top_N=100):
17 | if not osp.isdir(output_dir):
18 | mkdir(output_dir)
19 | with open(osp.join(output_dir, 'track2.txt'), 'w') as f:
20 | for q_imgid, g_imgids in sorted(matches_imgids.iteritems()):
21 | g_imgids = [str(imgid) for imgid in g_imgids]
22 | if top_N > 0:
23 | g_imgids = g_imgids[:top_N]
24 | st = ' '.join(g_imgids)
25 | f.write(st + '\n')
26 |
27 | def evaluate_imgids(distmat, q_pids, g_pids, q_camids, g_camids, q_imgids, g_imgids, max_rank=50, top_N=0):
28 | '''
29 | mAP and cmc in per-image basis
30 | g_imgids, g_imgids: list of list of imgid
31 | return all_cmc, mAP, and matches_imgids (map from q_imgids to g_imgids)
32 | '''
33 |
34 | num_q, num_g = distmat.shape
35 |
36 | assert(len(q_imgids) == num_q and len(g_imgids) == num_g)
37 |
38 | q_counts = [len(imgs) for imgs in q_imgids]
39 | g_counts = [len(imgs) for imgs in g_imgids]
40 | num_gi = sum(g_counts)
41 | #print('num_q = %d, num_g = %d, num_gi = %d' % (num_q, num_g, num_gi))
42 |
43 | if num_gi < max_rank:
44 | max_rank = num_gi
45 | print("Note: number of gallery samples is quite small, got {}".format(num_gi))
46 | indices = np.argsort(distmat, axis=1)
47 | # count gt and prediction (first imgid only)
48 | matches_gt_pred = {}
49 | for q_idx in range(num_q):
50 | q_pid = q_pids[q_idx]
51 | q_imgid = q_imgids[q_idx][0]
52 | matches_gt_pred[(q_pid, q_imgid)] = ([], [])
53 | for gi_idx in range(num_g):
54 | g_idx = indices[q_idx, gi_idx]
55 | g_pid = g_pids[g_idx]
56 | g_imgid = g_imgids[g_idx][0]
57 | matches_gt_pred[(q_pid, q_imgid)][1].append((g_pid, g_imgid))
58 | if g_pid == q_pid:
59 | matches_gt_pred[(q_pid, q_imgid)][0].append((g_pid, g_imgid))
60 | # expand to per-gallery image
61 | indices_expanded = np.zeros((num_q, num_gi), dtype=np.int32)
62 | for q_idx in range(num_q):
63 | pos = 0
64 | for s_idx in range(num_g):
65 | g_idx = indices[q_idx][s_idx]
66 | g_count = g_counts[g_idx]
67 | indices_expanded[q_idx][pos:pos+g_count] = g_idx
68 | pos += g_count
69 | indices = indices_expanded
70 | # create matches_imgids from indices_expanded
71 | matches_imgids = {}
72 | for q_idx in range(num_q):
73 | matches_imgids[q_imgids[q_idx][0]] = []
74 | g_poss = [0] * num_g
75 | for gi_idx in range(num_gi):
76 | g_idx = indices_expanded[q_idx][gi_idx]
77 | #print('q_idx = %d, gi_idx = %d, g_idx = %d' % (q_idx, gi_idx, g_idx))
78 | #print('g_poss = ' + str(g_poss))
79 | matches_imgids[q_imgids[q_idx][0]].append(g_imgids[g_idx][g_poss[g_idx]])
80 | g_poss[g_idx] += 1
81 | if top_N > 0:
82 | matches_imgids[q_imgids[q_idx][0]] = matches_imgids[q_imgids[q_idx][0]][:top_N]
83 | #print(str(q_imgids[q_idx][0]) + ': ' + str(matches_imgids[q_imgids[q_idx][0]]))
84 |
85 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
86 |
87 | # find false positive result
88 | matches_imgids_FP = {}
89 | top_FP = 3
90 | for q_idx in range(num_q):
91 | matches_imgids_FP[q_imgids[q_idx][0]] = []
92 | FPs = []
93 | for gi_idx in range(min(top_FP, num_g)):
94 | if matches[q_idx, gi_idx] == 0:
95 | FPs.append(indices[q_idx, gi_idx])
96 |
97 | g_poss = [0] * num_g
98 | for gi_idx in range(num_gi):
99 | g_idx = indices_expanded[q_idx][gi_idx]
100 | if g_idx in FPs:
101 | #print('q_idx = %d, gi_idx = %d, g_idx = %d' % (q_idx, gi_idx, g_idx))
102 | #print('g_poss = ' + str(g_poss))
103 | matches_imgids_FP[q_imgids[q_idx][0]].append(g_imgids[g_idx][g_poss[g_idx]])
104 | g_poss[g_idx] += 1
105 |
106 | # compute cmc curve for each query
107 | all_cmc = []
108 | all_AP = []
109 | num_valid_q = 0.
110 | for q_idx in range(num_q):
111 | # get query pid and camid
112 | q_pid = q_pids[q_idx]
113 | q_camid = q_camids[q_idx]
114 |
115 | # remove gallery samples that have the same pid and camid with query
116 | order = indices[q_idx]
117 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
118 | keep = np.invert(remove)
119 | #keep += True ###### keep everything
120 |
121 | # compute cmc curve
122 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches
123 | if not np.any(orig_cmc):
124 | # this condition is true when query identity does not appear in gallery
125 | continue
126 |
127 | cmc = orig_cmc.cumsum()
128 | cmc[cmc > 1] = 1
129 |
130 | all_cmc.append(cmc[:max_rank])
131 | num_valid_q += 1.
132 |
133 | # compute average precision
134 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
135 | num_rel = orig_cmc.sum()
136 | tmp_cmc = orig_cmc.cumsum()
137 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
138 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc
139 | if top_N == 0:
140 | AP = tmp_cmc.sum() / num_rel
141 | else:
142 | AP = tmp_cmc[:top_N].sum() / num_rel
143 | all_AP.append(AP)
144 |
145 | #print('%s %s AP: %f, cmc[0]: %f' % (q_pids[q_idx], q_imgids[q_idx], AP, cmc[0]))
146 | #if AP < cmc[0]:
147 | # print(orig_cmc[:top_N])
148 |
149 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
150 |
151 | all_cmc = np.asarray(all_cmc).astype(np.float32)
152 | all_cmc = all_cmc.sum(0) / num_valid_q
153 | mAP = np.mean(all_AP)
154 |
155 | return all_cmc, mAP, matches_imgids, matches_imgids_FP, matches_gt_pred
156 |
157 |
158 |
159 | def evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50, top_N=0):
160 | num_q, num_g = distmat.shape
161 | if num_g < max_rank:
162 | max_rank = num_g
163 | print("Note: number of gallery samples is quite small, got {}".format(num_g))
164 | indices = np.argsort(distmat, axis=1)
165 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
166 |
167 | # compute cmc curve for each query
168 | all_cmc = []
169 | all_AP = []
170 | num_valid_q = 0.
171 | for q_idx in range(num_q):
172 | # get query pid and camid
173 | q_pid = q_pids[q_idx]
174 | q_camid = q_camids[q_idx]
175 |
176 | # remove gallery samples that have the same pid and camid with query
177 | order = indices[q_idx]
178 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
179 | keep = np.invert(remove)
180 | #keep += True ###### keep everything
181 |
182 | # compute cmc curve
183 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches
184 | if not np.any(orig_cmc):
185 | # this condition is true when query identity does not appear in gallery
186 | continue
187 |
188 | cmc = orig_cmc.cumsum()
189 | cmc[cmc > 1] = 1
190 |
191 | all_cmc.append(cmc[:max_rank])
192 | num_valid_q += 1.
193 |
194 | # compute average precision
195 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
196 | num_rel = orig_cmc.sum()
197 | tmp_cmc = orig_cmc.cumsum()
198 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
199 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc
200 | if top_N == 0:
201 | AP = tmp_cmc.sum() / num_rel
202 | else:
203 | AP = tmp_cmc[:top_N].sum() / num_rel
204 | all_AP.append(AP)
205 |
206 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
207 |
208 | all_cmc = np.asarray(all_cmc).astype(np.float32)
209 | all_cmc = all_cmc.sum(0) / num_valid_q
210 | mAP = np.mean(all_AP)
211 |
212 | return all_cmc, mAP
213 |
214 |
215 |
--------------------------------------------------------------------------------
/Video-Person-ReID/iotools.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import os
4 | import os.path as osp
5 | import errno
6 | import json
7 | import shutil
8 |
9 | import torch
10 |
11 |
12 | def mkdir_if_missing(directory):
13 | if not osp.exists(directory):
14 | try:
15 | os.makedirs(directory)
16 | except OSError as e:
17 | if e.errno != errno.EEXIST:
18 | raise
19 |
20 |
21 | def check_isfile(path):
22 | isfile = osp.isfile(path)
23 | if not isfile:
24 | print("=> Warning: no file found at '{}' (ignored)".format(path))
25 | return isfile
26 |
27 |
28 | def read_json(fpath):
29 | with open(fpath, 'r') as f:
30 | obj = json.load(f)
31 | return obj
32 |
33 |
34 | def write_json(obj, fpath):
35 | mkdir_if_missing(osp.dirname(fpath))
36 | with open(fpath, 'w') as f:
37 | json.dump(obj, f, indent=4, separators=(',', ': '))
38 |
39 |
40 | def save_checkpoint(state, is_best=False, fpath='checkpoint.pth.tar'):
41 | if len(osp.dirname(fpath)) != 0:
42 | mkdir_if_missing(osp.dirname(fpath))
43 | torch.save(state, fpath)
44 | if is_best:
45 | shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar'))
--------------------------------------------------------------------------------
/Video-Person-ReID/losses.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import torch
4 | from torch import nn
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 |
8 | import numpy as np
9 |
10 | """
11 | Shorthands for loss:
12 | - CrossEntropyLabelSmooth: xent
13 | - TripletLoss: htri
14 | - CenterLoss: cent
15 | """
16 | __all__ = ['CrossEntropyLabelSmooth', 'TripletLoss', 'CenterLoss']
17 |
18 | class CrossEntropyLabelSmooth(nn.Module):
19 | """Cross entropy loss with label smoothing regularizer.
20 |
21 | Reference:
22 | Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
23 | Equation: y = (1 - epsilon) * y + epsilon / K.
24 |
25 | Args:
26 | num_classes (int): number of classes.
27 | epsilon (float): weight.
28 | """
29 | def __init__(self, num_classes, epsilon=0.1, use_gpu=True):
30 | super(CrossEntropyLabelSmooth, self).__init__()
31 | self.num_classes = num_classes
32 | self.epsilon = epsilon
33 | self.use_gpu = use_gpu
34 | self.logsoftmax = nn.LogSoftmax(dim=1)
35 |
36 | def forward(self, inputs, targets):
37 | """
38 | Args:
39 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
40 | targets: ground truth labels with shape (num_classes)
41 | """
42 | log_probs = self.logsoftmax(inputs)
43 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)
44 | if self.use_gpu: targets = targets.cuda()
45 | targets = Variable(targets, requires_grad=False)
46 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
47 | loss = (- targets * log_probs).mean(0).sum()
48 | return loss
49 |
50 | def augment_surfaces(inputs, targets, surfaces, thresh_cos=0.95, aug_ratio=0.5):
51 | n = inputs.size(0)
52 | #print(n)
53 | #print('surfaces.size')
54 | #print(surfaces.size(0))
55 | #print(surfaces.size(1))
56 | n, d = surfaces.size(0), surfaces.size(1)
57 |
58 | '''surfaces_np = surfaces.data.cpu().numpy()
59 | mask = targets.expand(n, n).eq(targets.expand(n, n).t())
60 | mask_np = mask.data.cpu().numpy()
61 | import sklearn
62 | cosine_sim = sklearn.metrics.pairwise.cosine_similarity(surfaces_np,surfaces_np)
63 | cosine_sim -= mask_np.astype(np.float32)'''
64 |
65 | cosine_sim = F.cosine_similarity(surfaces.view(1, n, d).expand(n, n, d), surfaces.view(n, 1, d).expand(n, n, d), 2)
66 | '''cosine_sim = torch.pow(surfaces, 2).sum(dim=1, keepdim=True).expand(n, n)
67 | cosine_sim = cosine_sim + cosine_sim.t()
68 | cosine_sim.addmm_(1, -2, surfaces, surfaces.t())
69 | cosine_sim = cosine_sim.clamp(min=1e-12).sqrt()
70 | cosine_sim = 1 - cosine_sim
71 | cos = nn.CosineSimilarity(dim=1, eps=1e-6)
72 | cosine_sim = cos(surfaces, surfaces)'''
73 | #print(cosine_sim.data.cpu().numpy())
74 | mask = targets.expand(n, n).eq(targets.expand(n, n).t())
75 | #mask_np = mask.data.cpu().numpy()
76 | #print('mask_np')
77 | #print(mask_np)
78 | cosine_sim = cosine_sim - mask.float()
79 | m = nn.Threshold(thresh_cos, -1, inplace=True)
80 | cosine_sim = m(cosine_sim)
81 | cosine_sim = cosine_sim.data.cpu().numpy()
82 | targets_np = targets.data.cpu().numpy()
83 | #print('cosine_sim.shape')
84 | #print(cosine_sim.shape)
85 | #print(cosine_sim)
86 | #print('targets_np.shape')
87 | #print(targets_np.shape)
88 | #print(targets_np)
89 | num_pids = np.unique(targets_np).shape[0]
90 | #print('num_pids')
91 | #print(num_pids)
92 | aug_pairs = []
93 | aug_idxs = []
94 | inputs_aug = inputs.clone()
95 | while (np.max(cosine_sim) > thresh_cos and len(aug_pairs) < num_pids*aug_ratio):
96 | imax = np.argmax(cosine_sim)
97 | imax, jmax = np.unravel_index(imax, (n, n))
98 | i = targets_np[imax]
99 | j = targets_np[jmax]
100 | assert i != j
101 | aug_pairs.append((i,j))
102 | aug_pairs.append((j,i))
103 | idxi = np.where(targets_np == i)[0].tolist()
104 | idxj = np.where(targets_np == j)[0].tolist()
105 | aug_idxs.extend(idxi)
106 | aug_idxs.extend(idxj)
107 | dfij = inputs[jmax,:] - inputs[imax,:]
108 | for idx in idxi:
109 | inputs_aug[idx,:] = inputs[idx,:] + dfij
110 | targets[idx] = j
111 | cosine_sim[idx,:] = -1
112 | cosine_sim[:,idx] = -1
113 | for idx in idxj:
114 | inputs_aug[idx,:] = inputs[idx,:] - dfij
115 | targets[idx] = i
116 | cosine_sim[idx,:] = -1
117 | cosine_sim[:,idx] = -1
118 | for idx in range(n):
119 | if idx not in aug_idxs:
120 | inputs_aug[idx,:] = inputs[idx,:]
121 | #print('aug_pairs')
122 | #print(aug_pairs)
123 | #print('aug_idxs')
124 | #print(aug_idxs)
125 | #targets_np = targets.data.cpu().numpy()
126 | #print(targets_np)
127 |
128 | return inputs_aug, targets
129 |
130 | class TripletLoss(nn.Module):
131 | """Triplet loss with hard positive/negative mining.
132 |
133 | Reference:
134 | Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
135 |
136 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
137 |
138 | Args:
139 | margin (float): margin for triplet.
140 | """
141 | def __init__(self, margin=0.3):
142 | super(TripletLoss, self).__init__()
143 | self.margin = margin
144 | self.ranking_loss = nn.MarginRankingLoss(margin=margin)
145 |
146 | def forward(self, inputs, targets, surfaces=None):
147 | """
148 | Args:
149 | inputs: feature matrix with shape (batch_size, feat_dim)
150 | targets: ground truth labels with shape (num_classes)
151 | """
152 | if surfaces is not None:
153 | inputs, targets = augment_surfaces(inputs, targets, surfaces)
154 | n = inputs.size(0)
155 | # Compute pairwise distance, replace by the official when merged
156 | dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
157 | dist = dist + dist.t()
158 | dist.addmm_(1, -2, inputs, inputs.t())
159 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
160 | # For each anchor, find the hardest positive and negative
161 | mask = targets.expand(n, n).eq(targets.expand(n, n).t())
162 | dist_ap, dist_an = [], []
163 | for i in range(n):
164 | dist_ap.append(dist[i][mask[i]].max())
165 | dist_an.append(dist[i][mask[i] == 0].min())
166 | dist_ap = torch.cat(dist_ap)
167 | dist_an = torch.cat(dist_an)
168 | # Compute ranking hinge loss
169 | y = dist_an.data.new()
170 | y.resize_as_(dist_an.data)
171 | y.fill_(1)
172 | y = Variable(y)
173 | loss = self.ranking_loss(dist_an, dist_ap, y)
174 | return loss
175 |
176 | def _apply_margin(x, m):
177 | if isinstance(m, float):
178 | #return (x + m).clamp(min=0)
179 | return torch.mean((x + m).clamp(min=0))
180 | elif m.lower() == "soft":
181 | return F.softplus(x)
182 | elif m.lower() == "none":
183 | return x
184 | else:
185 | raise NotImplementedError("The margin %s is not implemented in BatchHard!" % m)
186 |
187 | def batch_soft(cdist, pids, margin, T=1.0):
188 | """Calculates the batch soft.
189 | Instead of picking the hardest example through argmax or argmin,
190 | a softmax (softmin) is used to sample and use less difficult examples as well.
191 | Args:
192 | cdist (2D Tensor): All-to-all distance matrix, sized (B,B).
193 | pids (1D tensor): PIDs (classes) of the identities, sized (B,).
194 | margin: The margin to use, can be 'soft', 'none', or a number.
195 | T (float): The temperature of the softmax operation.
196 | """
197 | # mask where all positivies are set to true
198 | mask_pos = pids[None, :] == pids[:, None]
199 | mask_neg = 1 - mask_pos.data
200 |
201 | # only one copy
202 | cdist_max = cdist.clone()
203 | cdist_max[mask_neg] = -float('inf')
204 | cdist_min = cdist.clone()
205 | cdist_min[mask_pos] = float('inf')
206 |
207 | # NOTE: We could even take multiple ones by increasing num_samples,
208 | # the following `gather` call does the right thing!
209 | idx_pos = torch.multinomial(F.softmax(cdist_max/T, dim=1), num_samples=1)
210 | idx_neg = torch.multinomial(F.softmin(cdist_min/T, dim=1), num_samples=1)
211 | positive = cdist.gather(dim=1, index=idx_pos)[:,0] # Drop the extra (samples) dim
212 | negative = cdist.gather(dim=1, index=idx_neg)[:,0]
213 |
214 | return _apply_margin(positive - negative, margin)
215 |
216 | class BatchSoft(nn.Module):
217 | """BatchSoft implementation using softmax.
218 |
219 | Also by Tristani as Adaptivei Weighted Triplet Loss.
220 | """
221 |
222 | def __init__(self, m, T=1.0, **kwargs):
223 | """
224 | Args:
225 | m: margin
226 | T: Softmax temperature
227 | """
228 | super(BatchSoft, self).__init__()
229 | self.name = "BatchSoft(m={}, T={})".format(m, T)
230 | self.m = m
231 | self.T = T
232 |
233 | def forward(self, inputs, targets):
234 | """
235 | Args:
236 | inputs: feature matrix with shape (batch_size, feat_dim)
237 | targets: ground truth labels with shape (num_classes)
238 | """
239 | n = inputs.size(0)
240 | # Compute pairwise distance, replace by the official when merged
241 | dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
242 | dist = dist + dist.t()
243 | dist.addmm_(1, -2, inputs, inputs.t())
244 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
245 |
246 | return batch_soft(dist, targets, self.m, self.T)
247 |
248 | dist_ap, dist_an = [], []
249 | for i in range(n):
250 | dist_ap.append(dist[i][mask[i]].max())
251 | dist_an.append(dist[i][mask[i] == 0].min())
252 | dist_ap = torch.cat(dist_ap)
253 | dist_an = torch.cat(dist_an)
254 | # Compute ranking hinge loss
255 | y = dist_an.data.new()
256 | y.resize_as_(dist_an.data)
257 | y.fill_(1)
258 | y = Variable(y)
259 | loss = self.ranking_loss(dist_an, dist_ap, y)
260 | return loss
261 |
262 | #def forward(self, dist, pids):
263 | # return batch_soft(dist, pids, self.m, self.T)
264 |
265 | class CenterLoss(nn.Module):
266 | """Center loss.
267 |
268 | Reference:
269 | Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
270 |
271 | Args:
272 | num_classes (int): number of classes.
273 | feat_dim (int): feature dimension.
274 | """
275 | def __init__(self, num_classes=10, feat_dim=2, use_gpu=True):
276 | super(CenterLoss, self).__init__()
277 | self.num_classes = num_classes
278 | self.feat_dim = feat_dim
279 | self.use_gpu = use_gpu
280 |
281 | if self.use_gpu:
282 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
283 | else:
284 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))
285 |
286 | def forward(self, x, labels):
287 | """
288 | Args:
289 | x: feature matrix with shape (batch_size, feat_dim).
290 | labels: ground truth labels with shape (num_classes).
291 | """
292 | batch_size = x.size(0)
293 | distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
294 | torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
295 | distmat.addmm_(1, -2, x, self.centers.t())
296 |
297 | classes = torch.arange(self.num_classes).long()
298 | if self.use_gpu: classes = classes.cuda()
299 | classes = Variable(classes)
300 | labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
301 | mask = labels.eq(classes.expand(batch_size, self.num_classes))
302 |
303 | dist = []
304 | for i in range(batch_size):
305 | value = distmat[i][mask[i]]
306 | value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
307 | dist.append(value)
308 | dist = torch.cat(dist)
309 | loss = dist.mean()
310 |
311 | return loss
312 |
313 | if __name__ == '__main__':
314 | pass
--------------------------------------------------------------------------------
/Video-Person-ReID/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .ResNet import *
4 |
5 | __factory = {
6 | 'resnet50tp': ResNet50TP,
7 | 'resnet50ta': ResNet50TA,
8 | 'myresnet50ta': myResNet50TA,
9 | 'resnet50rnn': ResNet50RNN,
10 | 'resnet50tp_ori': ResNet50TP_ORIENTATION,
11 | 'resnet50tp_ori_iou': ResNet50TP_ORIENTATION_IOU,
12 | 'resnet50ta_ori': ResNet50TA_ORIENTATION,
13 | 'resnet50ta_ori_iou': ResNet50TA_ORIENTATION_IOU,
14 | 'resnet50ta_surface': ResNet50TA_SURFACE,
15 | 'resnet50ta_surface_nu': ResNet50TA_SURFACE_NU,
16 | 'resnet50ta_surface_nu4': ResNet50TA_SURFACE_NU4,
17 | 'resnet50ta_surface_nu2': ResNet50TA_SURFACE_NU2,
18 | 'resnet50ta_surface_nu2f1': ResNet50TA_SURFACE_NU2F1,
19 | 'resnet50ta_surface_n1': ResNet50TA_SURFACE_N1,
20 | 'resnet50ta_surface_n2': ResNet50TA_SURFACE_N2,
21 | }
22 |
23 |
24 | def get_names():
25 | return __factory.keys()
26 |
27 |
28 | def init_model(name, *args, **kwargs):
29 | if name not in __factory.keys():
30 | raise KeyError("Unknown model: {}".format(name))
31 | return __factory[name](*args, **kwargs)
32 |
--------------------------------------------------------------------------------
/Video-Person-ReID/models/resnet3d.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | import math
6 | from functools import partial
7 |
8 | __all__ = [
9 | 'ResNet', 'resnet10', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
10 | 'resnet152', 'resnet200'
11 | ]
12 |
13 |
14 | def conv3x3x3(in_planes, out_planes, stride=1):
15 | # 3x3x3 convolution with padding
16 | return nn.Conv3d(
17 | in_planes,
18 | out_planes,
19 | kernel_size=3,
20 | stride=stride,
21 | padding=1,
22 | bias=False)
23 |
24 |
25 | def downsample_basic_block(x, planes, stride):
26 | out = F.avg_pool3d(x, kernel_size=1, stride=stride)
27 | zero_pads = torch.Tensor(
28 | out.size(0), planes - out.size(1), out.size(2), out.size(3),
29 | out.size(4)).zero_()
30 | if isinstance(out.data, torch.cuda.FloatTensor):
31 | zero_pads = zero_pads.cuda()
32 |
33 | out = Variable(torch.cat([out.data, zero_pads], dim=1))
34 |
35 | return out
36 |
37 |
38 | class BasicBlock(nn.Module):
39 | expansion = 1
40 |
41 | def __init__(self, inplanes, planes, stride=1, downsample=None):
42 | super(BasicBlock, self).__init__()
43 | self.conv1 = conv3x3x3(inplanes, planes, stride)
44 | self.bn1 = nn.BatchNorm3d(planes)
45 | self.relu = nn.ReLU(inplace=True)
46 | self.conv2 = conv3x3x3(planes, planes)
47 | self.bn2 = nn.BatchNorm3d(planes)
48 | self.downsample = downsample
49 | self.stride = stride
50 |
51 | def forward(self, x):
52 | residual = x
53 |
54 | out = self.conv1(x)
55 | out = self.bn1(out)
56 | out = self.relu(out)
57 |
58 | out = self.conv2(out)
59 | out = self.bn2(out)
60 |
61 | if self.downsample is not None:
62 | residual = self.downsample(x)
63 |
64 | out += residual
65 | out = self.relu(out)
66 |
67 | return out
68 |
69 |
70 | class Bottleneck(nn.Module):
71 | expansion = 4
72 |
73 | def __init__(self, inplanes, planes, stride=1, downsample=None):
74 | super(Bottleneck, self).__init__()
75 | self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
76 | self.bn1 = nn.BatchNorm3d(planes)
77 | self.conv2 = nn.Conv3d(
78 | planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
79 | self.bn2 = nn.BatchNorm3d(planes)
80 | self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False)
81 | self.bn3 = nn.BatchNorm3d(planes * 4)
82 | self.relu = nn.ReLU(inplace=True)
83 | self.downsample = downsample
84 | self.stride = stride
85 |
86 | def forward(self, x):
87 | residual = x
88 |
89 | out = self.conv1(x)
90 | out = self.bn1(out)
91 | out = self.relu(out)
92 |
93 | out = self.conv2(out)
94 | out = self.bn2(out)
95 | out = self.relu(out)
96 |
97 | out = self.conv3(out)
98 | out = self.bn3(out)
99 |
100 | if self.downsample is not None:
101 | residual = self.downsample(x)
102 |
103 | out += residual
104 | out = self.relu(out)
105 |
106 | return out
107 |
108 |
109 | class ResNet(nn.Module):
110 |
111 | def __init__(self,
112 | block,
113 | layers,
114 | sample_height,
115 | sample_width,
116 | sample_duration,
117 | shortcut_type='B',
118 | num_classes=400):
119 | self.inplanes = 64
120 | super(ResNet, self).__init__()
121 | self.conv1 = nn.Conv3d(
122 | 3,
123 | 64,
124 | kernel_size=7,
125 | stride=(1, 2, 2),
126 | padding=(3, 3, 3),
127 | bias=False)
128 | self.bn1 = nn.BatchNorm3d(64)
129 | self.relu = nn.ReLU(inplace=True)
130 | self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
131 | self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
132 | self.layer2 = self._make_layer(
133 | block, 128, layers[1], shortcut_type, stride=2)
134 | self.layer3 = self._make_layer(
135 | block, 256, layers[2], shortcut_type, stride=2)
136 | self.layer4 = self._make_layer(
137 | block, 512, layers[3], shortcut_type, stride=2)
138 | last_duration = int(math.ceil(sample_duration / 16.0))
139 | last_height = int(math.ceil(sample_height / 32.0))
140 | last_width = int(math.ceil(sample_width / 32.0))
141 | self.avgpool = nn.AvgPool3d(
142 | (last_duration, last_height, last_width), stride=1)
143 | self.fc = nn.Linear(512 * block.expansion, num_classes)
144 |
145 | for m in self.modules():
146 | if isinstance(m, nn.Conv3d):
147 | m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
148 | elif isinstance(m, nn.BatchNorm3d):
149 | m.weight.data.fill_(1)
150 | m.bias.data.zero_()
151 |
152 | def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
153 | downsample = None
154 | if stride != 1 or self.inplanes != planes * block.expansion:
155 | if shortcut_type == 'A':
156 | downsample = partial(
157 | downsample_basic_block,
158 | planes=planes * block.expansion,
159 | stride=stride)
160 | else:
161 | downsample = nn.Sequential(
162 | nn.Conv3d(
163 | self.inplanes,
164 | planes * block.expansion,
165 | kernel_size=1,
166 | stride=stride,
167 | bias=False), nn.BatchNorm3d(planes * block.expansion))
168 |
169 | layers = []
170 | layers.append(block(self.inplanes, planes, stride, downsample))
171 | self.inplanes = planes * block.expansion
172 | for i in range(1, blocks):
173 | layers.append(block(self.inplanes, planes))
174 |
175 | return nn.Sequential(*layers)
176 |
177 | def load_matched_state_dict(self, state_dict):
178 |
179 | own_state = self.state_dict()
180 | for name, param in state_dict.items():
181 | if name not in own_state:
182 | continue
183 | #if isinstance(param, Parameter):
184 | # backwards compatibility for serialized parameters
185 | param = param.data
186 | print("loading "+name)
187 | own_state[name].copy_(param)
188 |
189 | def forward(self, x):
190 | # default size is (b, s, c, w, h), s for seq_len, c for channel
191 | # convert for 3d cnn, (b, c, s, w, h)
192 | x=x.permute(0,2,1,3,4)
193 | x = self.conv1(x)
194 | x = self.bn1(x)
195 | x = self.relu(x)
196 | x = self.maxpool(x)
197 |
198 | x = self.layer1(x)
199 | x = self.layer2(x)
200 | x = self.layer3(x)
201 | x = self.layer4(x)
202 | x = self.avgpool(x)
203 | x = x.view(x.size(0), -1)
204 | y = self.fc(x)
205 |
206 | return y, x
207 |
208 |
209 | def get_fine_tuning_parameters(model, ft_begin_index):
210 | if ft_begin_index == 0:
211 | return model.parameters()
212 |
213 | ft_module_names = []
214 | for i in range(ft_begin_index, 5):
215 | ft_module_names.append('layer{}'.format(i))
216 | ft_module_names.append('fc')
217 |
218 | parameters = []
219 | for k, v in model.named_parameters():
220 | for ft_module in ft_module_names:
221 | if ft_module in k:
222 | parameters.append({'params': v})
223 | break
224 | else:
225 | parameters.append({'params': v, 'lr': 0.0})
226 |
227 | return parameters
228 |
229 |
230 | def resnet10(**kwargs):
231 | """Constructs a ResNet-18 model.
232 | """
233 | model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)
234 | return model
235 |
236 |
237 | def resnet18(**kwargs):
238 | """Constructs a ResNet-18 model.
239 | """
240 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
241 | return model
242 |
243 |
244 | def resnet34(**kwargs):
245 | """Constructs a ResNet-34 model.
246 | """
247 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
248 | return model
249 |
250 |
251 | def resnet50(**kwargs):
252 | """Constructs a ResNet-50 model.
253 | """
254 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
255 | return model
256 |
257 |
258 | def resnet101(**kwargs):
259 | """Constructs a ResNet-101 model.
260 | """
261 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
262 | return model
263 |
264 |
265 | def resnet152(**kwargs):
266 | """Constructs a ResNet-101 model.
267 | """
268 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
269 | return model
270 |
271 |
272 | def resnet200(**kwargs):
273 | """Constructs a ResNet-101 model.
274 | """
275 | model = ResNet(Bottleneck, [3, 24, 36, 3], **kwargs)
276 | return model
277 |
--------------------------------------------------------------------------------
/Video-Person-ReID/reidtools.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 |
4 | import numpy as np
5 | import os
6 | import os.path as osp
7 | import shutil
8 |
9 | from iotools import mkdir_if_missing
10 |
11 |
12 | def visualize_ranked_results(distmat, dataset, save_dir='log/ranked_results', topk=20):
13 | """
14 | Visualize ranked results
15 |
16 | Support both imgreid and vidreid
17 |
18 | Args:
19 | - distmat: distance matrix of shape (num_query, num_gallery).
20 | - dataset: a 2-tuple containing (query, gallery), each contains a list of (img_path, pid, camid);
21 | for imgreid, img_path is a string, while for vidreid, img_path is a tuple containing
22 | a sequence of strings.
23 | - save_dir: directory to save output images.
24 | - topk: int, denoting top-k images in the rank list to be visualized.
25 | """
26 | num_q, num_g = distmat.shape
27 |
28 | print("Visualizing top-{} ranks".format(topk))
29 | print("# query: {}\n# gallery {}".format(num_q, num_g))
30 | print("Saving images to '{}'".format(save_dir))
31 |
32 | query, gallery = dataset
33 | assert num_q == len(query)
34 | assert num_g == len(gallery)
35 |
36 | indices = np.argsort(distmat, axis=1)
37 | mkdir_if_missing(save_dir)
38 |
39 | def _cp_img_to(src, dst, rank, prefix):
40 | """
41 | - src: image path or tuple (for vidreid)
42 | - dst: target directory
43 | - rank: int, denoting ranked position, starting from 1
44 | - prefix: string
45 | """
46 | if isinstance(src, tuple) or isinstance(src, list):
47 | dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3))
48 | mkdir_if_missing(dst)
49 | for img_path in src:
50 | shutil.copy(img_path, dst)
51 | else:
52 | dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3) + '_name_' + osp.basename(src))
53 | shutil.copy(src, dst)
54 |
55 | for q_idx in range(num_q):
56 | #qimg_path, qpid, qcamid = query[q_idx]
57 | qimg_path, qpid, qcamid, qangle = query[q_idx]
58 | if isinstance(qimg_path, tuple) or isinstance(qimg_path, list):
59 | qdir = osp.join(save_dir, osp.basename(qimg_path[0]))
60 | else:
61 | qdir = osp.join(save_dir, osp.basename(qimg_path))
62 | mkdir_if_missing(qdir)
63 | _cp_img_to(qimg_path, qdir, rank=0, prefix='query')
64 |
65 | rank_idx = 1
66 | for g_idx in indices[q_idx, :]:
67 | #gimg_path, gpid, gcamid = gallery[g_idx]
68 | gimg_path, gpid, gcamid, gangle = gallery[g_idx]
69 | invalid = (qpid == gpid) & (qcamid == gcamid)
70 | if not invalid:
71 | _cp_img_to(gimg_path, qdir, rank=rank_idx, prefix='gallery')
72 | rank_idx += 1
73 | if rank_idx > topk:
74 | break
75 |
76 | print("Done")
77 |
--------------------------------------------------------------------------------
/Video-Person-ReID/samplers.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from collections import defaultdict
3 | import numpy as np
4 |
5 | import torch
6 |
7 | #class RandomIdentitySampler(object):
8 | class RandomIdentitySampler(torch.utils.data.sampler.Sampler):
9 | """
10 | Randomly sample N identities, then for each identity,
11 | randomly sample K instances, therefore batch size is N*K.
12 |
13 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py.
14 |
15 | Args:
16 | data_source (Dataset): dataset to sample from.
17 | num_instances (int): number of instances per identity.
18 | """
19 | def __init__(self, data_source, num_instances=4):
20 | self.data_source = data_source
21 | self.num_instances = num_instances
22 | self.index_dic = defaultdict(list)
23 | #import pdb; pdb.set_trace()
24 | for index, (_, pid, _) in enumerate(data_source):
25 | #for index, (_, pid, _,_) in enumerate(data_source):
26 | self.index_dic[pid].append(index)
27 | self.pids = list(self.index_dic.keys())
28 | self.num_identities = len(self.pids)
29 |
30 | def __iter__(self):
31 | indices = torch.randperm(self.num_identities)
32 | ret = []
33 | for i in indices:
34 | pid = self.pids[i]
35 | t = self.index_dic[pid]
36 | replace = False if len(t) >= self.num_instances else True
37 | t = np.random.choice(t, size=self.num_instances, replace=replace)
38 | ret.extend(t)
39 | return iter(ret)
40 |
41 | def __len__(self):
42 | return self.num_identities * self.num_instances
43 |
44 | class RandomIdentitySamplerOrientation(torch.utils.data.sampler.Sampler):
45 | """
46 | Randomly sample N identities, then for each identity,
47 | randomly sample K instances, therefore batch size is N*K.
48 |
49 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py.
50 |
51 | Args:
52 | data_source (Dataset): dataset to sample from.
53 | num_instances (int): number of instances per identity.
54 | """
55 | def __init__(self, data_source, num_instances=4):
56 | self.data_source = data_source
57 | self.num_instances = num_instances
58 | self.index_dic = defaultdict(list)
59 | #for index, (_, pid, _) in enumerate(data_source):
60 | #for index, (_, pid, _,_) in enumerate(data_source):
61 | for index, (_, pid, _,_,_) in enumerate(data_source):
62 | self.index_dic[pid].append(index)
63 | self.pids = list(self.index_dic.keys())
64 | self.num_identities = len(self.pids)
65 |
66 | def __iter__(self):
67 | indices = torch.randperm(self.num_identities)
68 | ret = []
69 | for i in indices:
70 | pid = self.pids[i]
71 | t = self.index_dic[pid]
72 | replace = False if len(t) >= self.num_instances else True
73 | t = np.random.choice(t, size=self.num_instances, replace=replace)
74 | ret.extend(t)
75 | return iter(ret)
76 |
77 | def __len__(self):
78 | return self.num_identities * self.num_instances
79 | class RandomIdentitySamplerOrientationIOU(torch.utils.data.sampler.Sampler):
80 | """
81 | Randomly sample N identities, then for each identity,
82 | randomly sample K instances, therefore batch size is N*K.
83 |
84 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py.
85 |
86 | Args:
87 | data_source (Dataset): dataset to sample from.
88 | num_instances (int): number of instances per identity.
89 | """
90 | def __init__(self, data_source, num_instances=4):
91 | self.data_source = data_source
92 | self.num_instances = num_instances
93 | self.index_dic = defaultdict(list)
94 | #for index, (_, pid, _) in enumerate(data_source):
95 | for index, (_, pid, _,_,_) in enumerate(data_source):
96 | self.index_dic[pid].append(index)
97 | self.pids = list(self.index_dic.keys())
98 | self.num_identities = len(self.pids)
99 |
100 | def __iter__(self):
101 | indices = torch.randperm(self.num_identities)
102 | ret = []
103 | for i in indices:
104 | pid = self.pids[i]
105 | t = self.index_dic[pid]
106 | replace = False if len(t) >= self.num_instances else True
107 | t = np.random.choice(t, size=self.num_instances, replace=replace)
108 | ret.extend(t)
109 | return iter(ret)
110 |
111 | def __len__(self):
112 | return self.num_identities * self.num_instances
113 |
--------------------------------------------------------------------------------
/Video-Person-ReID/transforms.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from torchvision.transforms import *
4 | from PIL import Image
5 | import random
6 | import numpy as np
7 |
8 | class Random2DTranslation(object):
9 | """
10 | With a probability, first increase image size to (1 + 1/8), and then perform random crop.
11 |
12 | Args:
13 | height (int): target height.
14 | width (int): target width.
15 | p (float): probability of performing this transformation. Default: 0.5.
16 | """
17 | def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR):
18 | self.height = height
19 | self.width = width
20 | self.p = p
21 | self.interpolation = interpolation
22 |
23 | def __call__(self, img):
24 | """
25 | Args:
26 | img (PIL Image): Image to be cropped.
27 |
28 | Returns:
29 | PIL Image: Cropped image.
30 | """
31 | if random.random() < self.p:
32 | return img.resize((self.width, self.height), self.interpolation)
33 | new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125))
34 | resized_img = img.resize((new_width, new_height), self.interpolation)
35 | x_maxrange = new_width - self.width
36 | y_maxrange = new_height - self.height
37 | x1 = int(round(random.uniform(0, x_maxrange)))
38 | y1 = int(round(random.uniform(0, y_maxrange)))
39 | croped_img = resized_img.crop((x1, y1, x1 + self.width, y1 + self.height))
40 | return croped_img
41 |
42 | if __name__ == '__main__':
43 | pass
44 |
--------------------------------------------------------------------------------
/Video-Person-ReID/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import sys
4 | import errno
5 | import shutil
6 | import json
7 | import os.path as osp
8 |
9 | import torch
10 |
11 | def mkdir_if_missing(directory):
12 | if not osp.exists(directory):
13 | try:
14 | os.makedirs(directory)
15 | except OSError as e:
16 | if e.errno != errno.EEXIST:
17 | raise
18 |
19 | class AverageMeter(object):
20 | """Computes and stores the average and current value.
21 |
22 | Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
23 | """
24 | def __init__(self):
25 | self.reset()
26 |
27 | def reset(self):
28 | self.val = 0
29 | self.avg = 0
30 | self.sum = 0
31 | self.count = 0
32 |
33 | def update(self, val, n=1):
34 | self.val = val
35 | self.sum += val * n
36 | self.count += n
37 | self.avg = self.sum / self.count
38 |
39 | def save_checkpoint(state, is_best, fpath='checkpoint.pth.tar'):
40 | mkdir_if_missing(osp.dirname(fpath))
41 | torch.save(state, fpath)
42 | if is_best:
43 | shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar'))
44 |
45 | class Logger(object):
46 | """
47 | Write console output to external text file.
48 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py.
49 | """
50 | def __init__(self, fpath=None):
51 | self.console = sys.stdout
52 | self.file = None
53 | if fpath is not None:
54 | mkdir_if_missing(os.path.dirname(fpath))
55 | self.file = open(fpath, 'w')
56 |
57 | def __del__(self):
58 | self.close()
59 |
60 | def __enter__(self):
61 | pass
62 |
63 | def __exit__(self, *args):
64 | self.close()
65 |
66 | def write(self, msg):
67 | self.console.write(msg)
68 | if self.file is not None:
69 | self.file.write(msg)
70 |
71 | def flush(self):
72 | self.console.flush()
73 | if self.file is not None:
74 | self.file.flush()
75 | os.fsync(self.file.fileno())
76 |
77 | def close(self):
78 | self.console.close()
79 | if self.file is not None:
80 | self.file.close()
81 |
82 | def read_json(fpath):
83 | with open(fpath, 'r') as f:
84 | obj = json.load(f)
85 | return obj
86 |
87 | def write_json(obj, fpath):
88 | mkdir_if_missing(osp.dirname(fpath))
89 | with open(fpath, 'w') as f:
90 | json.dump(obj, f, indent=4, separators=(',', ': '))
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/Video-Person-ReID/video2img/crop_img.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import sys
4 | import numpy as np
5 | import os.path as osp
6 |
7 | FILE_LEN = 10
8 |
9 | lev1s = ["./S02/", "./S05/"]
10 |
11 | OUT_DIR = "./track1_test_img/"
12 |
13 |
14 | for lev1 in lev1s:
15 | lev2s = os.listdir(lev1)
16 | for lev2 in lev2s:
17 | camera_path = osp.join(lev1, lev2)
18 | path_to_vid = osp.join(camera_path, "vdo.avi")
19 |
20 | vid = cv2.VideoCapture(path_to_vid)
21 |
22 | suc = True
23 | img = None
24 |
25 | count = 1
26 |
27 | out_path = osp.join(OUT_DIR, lev2)
28 | if not osp.isdir(out_path):
29 | os.makedirs(out_path)
30 |
31 | while suc:
32 | suc, img = vid.read()
33 | if img is None:
34 | break
35 |
36 | f_name = osp.join(out_path, str(count).zfill(10) + ".jpg")
37 |
38 | cv2.imwrite(f_name, img)
39 | count += 1
40 |
--------------------------------------------------------------------------------
/Video-Person-ReID/video2img/crop_img_big.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import os.path as osp
4 |
5 | IMG_DIR = "./track1_test_img/"
6 | OUT_DIR = "./track1_sct_img_test_big/"
7 |
8 | for res_f in os.listdir("./txt_GPS_new/"):
9 | camid = res_f.split(".")[0]
10 | cam_img_path = osp.join(IMG_DIR, camid)
11 | out_cam_path = osp.join(OUT_DIR, camid)
12 |
13 | if not osp.isdir(out_cam_path):
14 | os.makedirs(out_cam_path)
15 |
16 | for line in open(osp.join("./txt_GPS_new/", res_f)).readlines():
17 | tmp = line.strip("\n").split(",")
18 | f_id = tmp[0]
19 | obj_id = tmp[1]
20 |
21 | img_f = osp.join(cam_img_path, f_id.zfill(10) + ".jpg")
22 | img = cv2.imread(img_f)
23 |
24 | height, width = img.shape[:2]
25 |
26 |
27 | left = int(tmp[2])-20
28 | top = int(tmp[3])-20
29 | w = int(tmp[4])+40
30 | h = int(tmp[5])+40
31 |
32 | right = left + w
33 | bot = top + h
34 |
35 | if left<0:
36 | left = 0
37 | if top<0:
38 | top=0
39 |
40 | if right>width:
41 | right = width
42 | if bot>height:
43 | bot=height
44 |
45 |
46 |
47 | crop_img = img[top: bot, left:right]
48 |
49 | out_obj_path = osp.join(out_cam_path, obj_id)
50 | if not osp.isdir(out_obj_path):
51 | os.makedirs(out_obj_path)
52 |
53 | out_path = osp.join(out_obj_path, f_id.zfill(10) + ".jpg")
54 | cv2.imwrite(out_path, crop_img)
55 |
56 |
--------------------------------------------------------------------------------
/Video-Person-ReID/video_loader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 | import os
3 | from PIL import Image
4 | import numpy as np
5 |
6 | import torch
7 | from torch.utils.data import Dataset
8 | import random
9 |
10 | from math import exp, atan2
11 | #import cv2
12 |
13 | def read_image(img_path):
14 | """Keep reading image until succeed.
15 | This can avoid IOError incurred by heavy IO process."""
16 | got_img = False
17 | while not got_img:
18 | try:
19 | img = Image.open(img_path).convert('RGB')
20 | got_img = True
21 | except IOError:
22 | print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path))
23 | pass
24 | #print(img_path)
25 | return img
26 |
27 | def read_metadata(img_path, metadata_model, verbose=True):
28 | """Read sruface from file"""
29 | if metadata_model[:2] == 'v1':
30 | metadata_dim = 26 # 6, 12, 8 for type, brand, color
31 | elif metadata_model[:2] == 'v2':
32 | metadata_dim = 46 # 7, 30, 9 for type, brand, color
33 | else: # the oldest version
34 | metadata_dim = 26 # 6, 12, 8 for type, brand, color
35 | metadata_path = img_path.replace('image', 'metadata_%s'%metadata_model).replace('.jpg', '.txt')
36 | if os.path.isfile(metadata_path):
37 | #print(metadata_path)
38 | with open(metadata_path, 'r') as f:
39 | metadata = []
40 | for line in f:
41 | #print(line)
42 | if ',' in line:
43 | line = line.strip().replace(' ', '').split(',')
44 | line = [s for s in line if len(s) > 0]
45 | else:
46 | line = line.strip().split(' ')
47 | line = [s for s in line if len(s) > 0]
48 | #print(line)
49 | metadata.append(np.array(line, dtype=np.float32))
50 | metadata = np.concatenate(metadata) ### concat all probability vector
51 | assert metadata.shape[0] == metadata_dim
52 | return metadata
53 | else:
54 | if verbose:
55 | print('warning: metadata not exist: ' + str(metadata_path))
56 | return np.zeros(metadata_dim, dtype=np.float32) ### if no metadata
57 |
58 | def PolyArea(pts):
59 | return -0.5*(np.dot(pts[:,0],np.roll(pts[:,1],1))-np.dot(pts[:,1],np.roll(pts[:,0],1)))
60 |
61 | def keypointsArea(keypoints, ids):
62 | pts = np.array([(keypoints[i][0], keypoints[i][1]) for i in ids])
63 | #return cv2.contourArea(pts, oriented=True)
64 | return PolyArea(pts)
65 |
66 | def keypointsSymmetry(keypoints):
67 | area0 = abs(keypointsArea(keypoints, [i for i in range(2, 18)])) + 1
68 | area1 = abs(keypointsArea(keypoints, [i for i in range(2+18, 18+18)])) + 1
69 | ratio = area1 / area0 if area1 < area0 else area0 / area1
70 | #print('area0: %f, area1: %f' % (area0, area1))
71 | return ratio
72 |
73 | def keypointsParallel(keypoints):
74 | NUM_PAIRS = 18
75 | vecs = np.zeros((NUM_PAIRS, 2), dtype=np.float32)
76 | for i in range(NUM_PAIRS):
77 | vecs[i][0] = keypoints[i+18][0] - keypoints[i][0]
78 | vecs[i][1] = keypoints[i+18][1] - keypoints[i][1]
79 | vec_mean = np.mean(vecs, axis=0)
80 | vec_diff = np.subtract(vecs, vec_mean)
81 | vec_err = np.linalg.norm(vec_diff, axis=1) / np.linalg.norm(vec_mean)
82 | vec_errmean = np.mean(vec_err)
83 | return exp(-vec_errmean)
84 |
85 | def keypointsConfidence(keypoints):
86 | parallel_conf = keypointsParallel(keypoints)
87 | symmetry_conf = keypointsSymmetry(keypoints)
88 | keypoint_conf = pow(parallel_conf**2 + symmetry_conf**2, 0.5) / pow(2, 0.5)
89 | return keypoint_conf
90 |
91 | def keypointsSurface(keypoints):
92 | surfaces = []
93 | idss = []
94 | idss.append([i for i in range(2, 18)])
95 | idss.append([i for i in range(20, 36)][::-1])
96 | for i in range(16):
97 | idss.append([i%16+2, i%16+2+18, (i+1)%16+2+18, (i+1)%16+2])
98 | for ids in idss:
99 | surfaces.append(keypointsArea(keypoints, ids))
100 | surfaces = np.array(surfaces, dtype=np.float32)
101 | surfaces /= np.linalg.norm(surfaces)
102 | #surfaces *= 999
103 | #print(surfaces)
104 | return surfaces
105 |
106 | def surfacesAngle(surfaces):
107 | x = surfaces[0] - surfaces[1]
108 | y = surfaces[16] + surfaces[15] + surfaces[14] - surfaces[10] - surfaces[11] - surfaces[12]
109 | return atan2(y, x) # between -pi and pi
110 |
111 | def read_keypoint(img_path):
112 | """Read keypoint from file"""
113 | keypoint_path = img_path.replace('image', 'keypoint').replace('.jpg', '.txt')
114 | with open(keypoint_path, 'r') as f:
115 | keypoints = np.loadtxt(f, dtype=np.float32).flatten()
116 | keypoints = np.reshape(keypoints, (-1,3))
117 | #print(keypoints)
118 | return keypoints
119 |
120 | class VideoDataset(Dataset):
121 | """Video Person ReID Dataset.
122 | Note batch data has shape (batch, seq_len, channel, height, width).
123 | """
124 | sample_methods = ['evenly', 'random', 'all']
125 |
126 | def __init__(self, dataset, metadata_model, seq_len=15, sample='evenly', transform=None):
127 | self.dataset = dataset
128 | self.metadata_model = metadata_model
129 | self.seq_len = seq_len
130 | self.sample = sample
131 | self.transform = transform
132 |
133 | def __len__(self):
134 | return len(self.dataset)
135 |
136 | def __getitem__(self, index):
137 | img_paths, pid, camid = self.dataset[index]
138 | num = len(img_paths)
139 | if self.sample == 'random':
140 | """
141 | Randomly sample seq_len consecutive frames from num frames,
142 | if num is smaller than seq_len, then replicate items.
143 | This sampling strategy is used in training phase.
144 | """
145 | frame_indices = range(num)
146 | rand_end = max(0, len(frame_indices) - self.seq_len - 1)
147 | begin_index = random.randint(0, rand_end)
148 | end_index = min(begin_index + self.seq_len, len(frame_indices))
149 |
150 | indices = frame_indices[begin_index:end_index]
151 |
152 | for index in indices:
153 | if len(indices) >= self.seq_len:
154 | break
155 | indices.append(index)
156 | indices = np.array(indices)
157 | imgs = []
158 | metadatas = []
159 | for index in indices:
160 | index = int(index)
161 | img_path = img_paths[index]
162 | img = read_image(img_path)
163 | if self.transform is not None:
164 | img = self.transform(img)
165 | img = img.unsqueeze(0)
166 | imgs.append(img)
167 | metadata = read_metadata(img_path, self.metadata_model, False)
168 | metadata = torch.from_numpy(metadata)
169 | metadata = metadata.unsqueeze(0)
170 | metadatas.append(metadata)
171 | imgs = torch.cat(imgs, dim=0)
172 | # imgs=imgs.permute(1,0,2,3)
173 | metadatas = torch.cat(metadatas, dim=0)
174 | return imgs, pid, camid, metadatas
175 |
176 | elif self.sample == 'dense':
177 | """
178 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1.
179 | This sampling strategy is used in test phase.
180 | """
181 | cur_index = 0
182 | # frame_indices = range(num)
183 | frame_indices = list(range(num))
184 | indices_list = []
185 | while num - cur_index > self.seq_len:
186 | indices_list.append(frame_indices[cur_index:cur_index + self.seq_len])
187 | cur_index += self.seq_len
188 | last_seq = frame_indices[cur_index:]
189 | for index in last_seq:
190 | if len(last_seq) >= self.seq_len:
191 | break
192 | last_seq.append(index)
193 | indices_list.append(last_seq)
194 | imgs_list = []
195 | metadatas_list = []
196 | for indices in indices_list:
197 | imgs = []
198 | metadatas = []
199 | for index in indices:
200 | index = int(index)
201 | img_path = img_paths[index]
202 | img = read_image(img_path)
203 | if self.transform is not None:
204 | img = self.transform(img)
205 | img = img.unsqueeze(0)
206 | imgs.append(img)
207 | metadata = read_metadata(img_path, self.metadata_model, False) ####################
208 | metadata = torch.from_numpy(metadata)
209 | metadata = metadata.unsqueeze(0)
210 | metadatas.append(metadata)
211 | imgs = torch.cat(imgs, dim=0)
212 | # imgs=imgs.permute(1,0,2,3)
213 | imgs_list.append(imgs)
214 | metadatas = torch.cat(metadatas, dim=0)
215 | metadatas_list.append(metadatas)
216 | imgs_array = torch.stack(imgs_list)
217 | metadatas_array = torch.stack(metadatas_list)
218 |
219 | return imgs_array, pid, camid, metadatas_array, img_paths
220 |
221 | else:
222 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods))
223 |
224 |
225 | class VideoDataset_SURFACE(Dataset):
226 | """Video Person ReID Dataset.
227 | Note batch data has shape (batch, seq_len, channel, height, width).
228 | """
229 | sample_methods = ['evenly', 'random', 'all']
230 |
231 | def __init__(self, dataset, metadata_model, seq_len=15, sample='evenly', transform=None):
232 | self.dataset = dataset
233 | self.metadata_model = metadata_model
234 | self.seq_len = seq_len
235 | self.sample = sample
236 | self.transform = transform
237 |
238 | def __len__(self):
239 | return len(self.dataset)
240 |
241 | def __getitem__(self, index):
242 | img_paths, pid, camid = self.dataset[index]
243 | num = len(img_paths)
244 | keypoint_conf_thresh = 0.6#999
245 | if self.sample == 'random':
246 | """
247 | Randomly sample seq_len consecutive frames from num frames,
248 | if num is smaller than seq_len, then replicate items.
249 | This sampling strategy is used in training phase.
250 | """
251 | frame_indices = range(num)
252 | rand_end = max(0, len(frame_indices) - self.seq_len - 1)
253 | begin_index = random.randint(0, rand_end)
254 | end_index = min(begin_index + self.seq_len, len(frame_indices))
255 |
256 | indices = frame_indices[begin_index:end_index]
257 |
258 | for index in indices:
259 | if len(indices) >= self.seq_len:
260 | break
261 | indices.append(index)
262 | indices = np.array(indices)
263 | imgs = []
264 | surfaces = []
265 | metadatas = []
266 | for index in indices:
267 | index = int(index)
268 | img_path = img_paths[index]
269 | img = read_image(img_path)
270 | if self.transform is not None:
271 | img = self.transform(img)
272 | img = img.unsqueeze(0)
273 | imgs.append(img)
274 | # TH surface
275 | keypoints = read_keypoint(img_path)
276 | surface = keypointsSurface(keypoints)
277 | keypoint_conf = keypointsConfidence(keypoints)
278 | if keypoint_conf < keypoint_conf_thresh:
279 | surface = surface * 0
280 | #print('surface = ' + str(surface))
281 | surface = torch.from_numpy(surface)
282 | surface = surface.unsqueeze(0)
283 | surfaces.append(surface)
284 | metadata = read_metadata(img_path, self.metadata_model, False)
285 | metadata = torch.from_numpy(metadata)
286 | metadata = metadata.unsqueeze(0)
287 | metadatas.append(metadata)
288 | imgs = torch.cat(imgs, dim=0)
289 | # imgs=imgs.permute(1,0,2,3)
290 | surfaces = torch.cat(surfaces, dim=0)
291 | metadatas = torch.cat(metadatas, dim=0)
292 | return imgs, surfaces, pid, camid, metadatas
293 |
294 | elif self.sample == 'dense':
295 | """
296 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1.
297 | This sampling strategy is used in test phase.
298 | """
299 | cur_index = 0
300 | # frame_indices = range(num)
301 | frame_indices = list(range(num))
302 | indices_list = []
303 | while num - cur_index > self.seq_len:
304 | indices_list.append(frame_indices[cur_index:cur_index + self.seq_len])
305 | cur_index += self.seq_len
306 | last_seq = frame_indices[cur_index:]
307 | for index in last_seq:
308 | if len(last_seq) >= self.seq_len:
309 | break
310 | last_seq.append(index)
311 | indices_list.append(last_seq)
312 | imgs_list = []
313 | surfaces_list = []
314 | metadatas_list = []
315 | for indices in indices_list:
316 | imgs = []
317 | surfaces = []
318 | metadatas = []
319 | for index in indices:
320 | index = int(index)
321 | img_path = img_paths[index]
322 | img = read_image(img_path)
323 | if self.transform is not None:
324 | img = self.transform(img)
325 | img = img.unsqueeze(0)
326 | imgs.append(img)
327 | # TH surface
328 | keypoints = read_keypoint(img_path)
329 | surface = keypointsSurface(keypoints)
330 | keypoint_conf = keypointsConfidence(keypoints)
331 | if keypoint_conf < keypoint_conf_thresh:
332 | surface = surface * 0
333 | #print('surface = ' + str(surface))
334 | surface = torch.from_numpy(surface)
335 | surface = surface.unsqueeze(0)
336 | surfaces.append(surface)
337 | metadata = read_metadata(img_path, self.metadata_model)
338 | metadata = torch.from_numpy(metadata)
339 | metadata = metadata.unsqueeze(0)
340 | metadatas.append(metadata)
341 | imgs = torch.cat(imgs, dim=0)
342 | # imgs=imgs.permute(1,0,2,3)
343 | imgs_list.append(imgs)
344 | surfaces = torch.cat(surfaces, dim=0)
345 | surfaces_list.append(surfaces)
346 | metadatas = torch.cat(metadatas, dim=0)
347 | metadatas_list.append(metadatas)
348 | imgs_array = torch.stack(imgs_list)
349 | surfaces_array = torch.stack(surfaces_list)
350 | metadatas_array = torch.stack(metadatas_list)
351 |
352 | return imgs_array, surfaces_array, pid, camid, metadatas_array, img_paths
353 |
354 | else:
355 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods))
356 |
--------------------------------------------------------------------------------
/metadata/README.md:
--------------------------------------------------------------------------------
1 | The metadata classifier code is based on \[[code](https://github.com/pangwong/pytorch-multi-label-classifier)\]. We use the 29 -layer light CNN model with modifications on transformations, input size and the beginning layers.(*Wu, X., He, R., Sun, Z. and Tan, T., 2018. A light cnn for deep face representation with noisy labels. IEEE Transactions on Information Forensics and Security, 13(11), pp.2884-2896*).
2 |
3 | ## Setup
4 |
5 | This code assumes you have the following packages installed.
6 | - Python 3.6
7 | - Pytorch 0.4.0
8 | - Torchvision 0.2.1
9 | - Numpy 1.16.2
10 | - PIL 5.1.0
11 |
12 | Running system:
13 | - Linux Ubuntu 18.04
14 | - CUDA 9.0
15 | - 2 NVIDIA Titan Xp GPU
16 |
17 | ## Our pre-trained model
18 |
19 | Download the pre-trained model [here](https://drive.google.com/file/d/119GdCtKDkJCGc_AX0Try_CoMij5rhcg4/view?usp=sharing).
20 | - The model is pretrained on CompCar dataset [link](http://mmlab.ie.cuhk.edu.hk/datasets/comp_cars/index.html).
21 | - We cleaned up CompCar dataset by only keeping the brands exist in AIC dataset (based on country/type/year/version of the vehicle models) and correcting wrong labels.
22 | - The training set could be downloaded at [here](https://drive.google.com/file/d/1dbcjbdPBC19dCzq-v8TE-z1OHI4nLBFg/view?usp=sharing).
23 | - Some images in track2 training set and traffic video from surveillance camera are used for further training. For privacy concerns, we are not allowed to release the source videos. Please follow [their](http://www.uwstarlab.org/) future work on the dataset.
24 | - Once you run the testing code, you will obtain the probabilities of the metadata. To be simple and consistent with our Reid code, the output could be downloaded [here](https://drive.google.com/file/d/1korVAg0W_VKNYKiwc3cYwvmC6pDhiuxs/view?usp=sharing)
25 |
26 | ## Training
27 |
28 | - label.txt is the categories.
29 | - data.txt is the training data path and labels. Noted that the example here is to used for training vehicle type and brand, weighted for the losses need to be revised in the code (`multi_label_classifier.py` line 68-74). The traning data should follow this format.
30 | - for other training/testing/visualization options, please refer to option.py.
31 |
32 | Use the following command to run training code.
33 |
34 | python multi_label_classifier.py --dir "./YOUR_DIRPATH_OF_data.txt_and_label.txt/" --mode "Train" --model "LightenB" --name "YOURMODELNAME" --batch_size 8 --gpu_ids 0 --input_channel 3 --load_size 512 --input_size 512 --ratio "[0.7, 0.1, 0.2]" --load_thread 4 --sum_epoch 500 --lr_decay_in_epoch 1 --display_port 8900 --validate_ratio 0.5 --top_k "(1,)" --score_thres 0.1 --display_train_freq 1000 --display_validate_freq 1000 --save_epoch_freq 2000 --display_image_ratio 0.1 --shuffle
35 |
36 |
37 | ## Testing
38 |
39 | To test the model, make sure you have the image id and paths under ./your_model_name/Data/Test/data.txt. To be simple, use `testdata.txt`.Then run,
40 |
41 | python multi_label_classifier.py --dir "./YOUR_DIRPATH_OF_data.txt_and_label.txt/" --mode "Test" --model "LightenB" --name "YOURMODELNAME" --checkpoint_name "/path_to_model.pth"
42 |
43 |
44 | The probabilities of each label will be saved in `test.log`. Run `reformat-log.py` to reformat the log file for reranking.
45 |
46 |
--------------------------------------------------------------------------------
/metadata/data/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/data/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/dataset.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/data/__pycache__/loader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/loader.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/data/__pycache__/transformer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/transformer.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/data/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json,ast
4 | import random
5 | import logging
6 | import torch.utils.data as data
7 |
8 | from .transformer import get_transformer, load_image
9 |
10 | class BaseDataset(data.Dataset):
11 | def __init__(self, opt, data_type, id2rid):
12 | super(BaseDataset, self).__init__()
13 | self.opt = opt
14 | self.data_type = data_type
15 | self.dataset = self._load_data(opt.data_dir+ '/' + data_type + '/data.txt')
16 | self.id2rid = id2rid
17 | self.data_size = len(self.dataset)
18 | self.transformer = get_transformer(opt)
19 |
20 | def __getitem__(self, index):
21 | image_file, box, attr_ids = self.dataset[index % self.data_size]
22 |
23 | input = load_image(image_file, box, self.opt, self.transformer)
24 | #input = load_image(image_file, self.opt, self.transformer)
25 |
26 | # label
27 | labels = list()
28 | for index, attr_id in enumerate(attr_ids):
29 | labels.append(self.id2rid[index][attr_id])
30 |
31 | return input, labels
32 |
33 | def __len__(self):
34 | return self.data_size
35 |
36 | def _load_data(self, data_file):
37 | print(data_file)
38 | dataset = list()
39 | if not os.path.exists(data_file):
40 | return dataset
41 | with open(data_file) as d:
42 | for line in d.readlines():
43 | line = json.dumps(ast.literal_eval(line))
44 | dataset.append(self.readline(line))
45 | #import pdb; pdb.set_trace()
46 | if self.opt.shuffle:
47 | logging.info("Shuffle %s Data" %(self.data_type))
48 | random.shuffle(dataset)
49 | else:
50 | logging.info("Not Shuffle %s Data" %(self.data_type))
51 | return dataset
52 |
53 | def readline(self, line):
54 | vbrand_list = ['Dodge', 'Ford', 'Chevrolet', 'GMC', 'Honda', 'Chrysler', 'Jeep', 'Hyundai',\
55 | 'Subaru', 'Toyota', 'Buick', 'others', 'KIA', 'Nissan', 'Volkswagen',\
56 | 'Oldsmobile', 'BMW', 'Cadillac', 'Volvo', 'Pontiac', 'Mercury', 'Lexus',\
57 | 'Saturn', 'Benz', 'Mazda', 'Scion', 'RAM', 'Mini', 'Lincoln', 'Audi',\
58 | 'Mitsubishi']
59 | vtype_list = ['SUV', 'PickupTruck', 'Sedan', 'Minivan', 'Truck', 'Hatchback', 'Bus']
60 | vcolor_list = ['Black', 'White', 'Red', 'Gray', 'Silver', 'Blue', 'Gold', 'Green', 'Yellow']
61 | data = [None, None,None]
62 | #print(line)
63 | line = ast.literal_eval(line)
64 | line = ast.literal_eval(line)
65 |
66 | #line = json.loads(line)
67 | if "image_file" in line:
68 | data[0] = line["image_file"]
69 | if 'box' in line:
70 | data[1] = line["box"]
71 | if 'id' in line:
72 | data[2] = line["id"]
73 | vtype = data[2][0]
74 | vbrand = data[2][1]
75 | vcolor = data[2][2]
76 |
77 | if (vtype not in vtype_list) or (vbrand not in vbrand_list) or (vcolor not in vcolor_list):
78 | print(data[0],data[2])
79 |
80 | return data
81 |
--------------------------------------------------------------------------------
/metadata/data/loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import random
5 | import logging
6 | import collections
7 | from torch.utils.data import DataLoader
8 | from data.dataset import BaseDataset
9 | import ast
10 | import json
11 |
12 | sys.path.append('../')
13 | from util.util import rmdir, load_label
14 |
15 | class MultiLabelDataLoader():
16 | def __init__(self, opt):
17 | self.opt = opt
18 | assert os.path.exists(opt.dir + "/data.txt"), "No data.txt found in specified dir"
19 | assert os.path.exists(opt.dir + "/label.txt"), "No label.txt found in specified dir"
20 |
21 | train_dir = opt.data_dir + "/TrainSet/"
22 | val_dir = opt.data_dir + "/ValidateSet/"
23 | test_dir = opt.data_dir + "/TestSet/"
24 |
25 | # split data
26 | if not all([os.path.exists(train_dir), os.path.exists(val_dir), os.path.exists(test_dir)]):
27 | # rm existing directories
28 | rmdir(train_dir)
29 | rmdir(val_dir)
30 | rmdir(test_dir)
31 |
32 | # split data to Train, Val, Test
33 | logging.info("Split raw data to Train, Val and Test")
34 | ratios = opt.ratio
35 | dataset = collections.defaultdict(list)
36 | with open(opt.dir + '/data.txt') as d:
37 | for line in d.readlines():
38 | #print(line)
39 | line = ast.literal_eval(line)
40 | line = json.dumps(line)
41 |
42 | #line = json.loads(line)
43 | # if data has been specified data_type yet, load data as what was specified before
44 | #if "type" in line:
45 | # dataset[line["type"]].append(line)
46 | # continue
47 | # specified data_type randomly
48 | rand = random.random()
49 | if rand < ratios[0]:
50 | data_type = "Train"
51 | elif rand < ratios[0] + ratios[1]:
52 | data_type = "Validate"
53 | else:
54 | data_type = "Test"
55 | dataset[data_type].append(line)
56 | # write to file
57 | self._WriteDataToFile(dataset["Train"], train_dir)
58 | self._WriteDataToFile(dataset["Validate"], val_dir)
59 | self._WriteDataToFile(dataset["Test"], test_dir)
60 |
61 | self.rid2name, self.id2rid, self.rid2id = load_label(opt.dir + '/label.txt')
62 | print(self.rid2name, self.id2rid, self.rid2id)
63 | self.num_classes = [len(item)-2 for item in self.rid2name]
64 |
65 | # load dataset
66 | if opt.mode == "Train":
67 | logging.info("Load Train Dataset...")
68 | self.train_set = BaseDataset(self.opt, "TrainSet", self.rid2id)
69 | logging.info("Load Validate Dataset...")
70 | self.val_set = BaseDataset(self.opt, "ValidateSet", self.rid2id)
71 | else:
72 | # force batch_size for test to 1
73 | self.opt.batch_size = 1
74 | self.opt.load_thread = 1
75 | logging.info("Load Test Dataset...")
76 | self.test_set = BaseDataset(self.opt, "TestSet", self.rid2id)
77 |
78 | def GetTrainSet(self):
79 | if self.opt.mode == "Train":
80 | return self._DataLoader(self.train_set)
81 | else:
82 | raise("Train Set DataLoader NOT implemented in Test Mode")
83 |
84 | def GetValSet(self):
85 | if self.opt.mode == "Train":
86 | return self._DataLoader(self.val_set)
87 | else:
88 | raise("Validation Set DataLoader NOT implemented in Test Mode")
89 |
90 | def GetTestSet(self):
91 | if self.opt.mode == "Test":
92 | return self._DataLoader(self.test_set)
93 | else:
94 | raise("Test Set DataLoader NOT implemented in Train Mode")
95 |
96 | def GetNumClasses(self):
97 | return self.num_classes
98 |
99 | def GetRID2Name(self):
100 | return self.rid2name
101 |
102 | def GetID2RID(self):
103 | return self.id2rid
104 |
105 | def GetiRID2ID(self):
106 | return self.irid2id
107 |
108 | def _WriteDataToFile(self, src_data, dst_dir):
109 | """
110 | write info of each objects to data.txt as predefined format
111 | """
112 | if not os.path.exists(dst_dir):
113 | os.mkdir(dst_dir)
114 | with open(dst_dir + "/data.txt", 'w') as d:
115 | for line in src_data:
116 | d.write(json.dumps(line, separators=(',',':'))+'\n')
117 |
118 |
119 | def _DataLoader(self, dataset):
120 | """
121 | create data loder
122 | """
123 | dataloader = DataLoader(
124 | dataset,
125 | batch_size=self.opt.batch_size,
126 | shuffle=False,
127 | num_workers=int(self.opt.load_thread),
128 | pin_memory=self.opt.cuda,
129 | drop_last=False)
130 | return dataloader
131 |
132 |
--------------------------------------------------------------------------------
/metadata/data/transformer.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from PIL import Image
3 | from torchvision import transforms
4 |
5 |
6 | def get_transformer(opt):
7 | transform_list = []
8 |
9 | # resize
10 | osize = [opt.load_size, opt.load_size]
11 | #transform_list.append(transforms.functional.resize(osize,Image.BICUBIC))
12 | transform_list.append(transforms.Resize(osize, Image.BICUBIC))
13 |
14 | # grayscales
15 | if opt.input_channel == 1:
16 | transform_list.append(transforms.Grayscale())
17 |
18 | # crop
19 | if opt.crop == "RandomCrop":
20 | transform_list.append(transforms.RandomCrop(opt.fineSize))
21 | elif opt.crop == "CenterCrop":
22 | transform_list.append(transforms.CenterCrop(opt.input_size))
23 | elif opt.crop == "FiveCrop":
24 | transform_list.append(transforms.FiveCrop(opt.input_size))
25 | elif opt.crop == "TenCrop":
26 | transform_list.append(transforms.TenCrop(opt.input_size))
27 |
28 | # flip
29 | if opt.mode == "Train" and opt.flip:
30 | transform_list.append(transforms.RandomHorizontalFlip())
31 |
32 | # to tensor
33 | transform_list.append(transforms.ToTensor())
34 |
35 | # If you make changes here, you should also modified
36 | # function `tensor2im` in util/util.py accordingly
37 | transform_list1 = [
38 | transforms.ToTensor(),
39 | transforms.Normalize(opt.mean, opt.std)]
40 | transform_list.append(transforms.Normalize(opt.mean, opt.std))
41 |
42 | return transforms.Compose(transform_list1)
43 |
44 | def fix_box(box, width, height, ratio=-1, scale=1.0):
45 | if scale < 0:
46 | scale = 1.0
47 | box = copy.deepcopy(box)
48 | w = box["w"]
49 | h = box["h"]
50 | x = box["x"] + w / 2
51 | y = box["y"] + h / 2
52 | mw = 2 * min(x, width - x)
53 | mh = 2 * min(y, height - y)
54 | w = max(1, min(int(w * scale), mw))
55 | h = max(1, min(int(h * scale), mh))
56 | if ratio > 0:
57 | if 1.0 * w / h > ratio:
58 | h = int(w / ratio)
59 | h = min(h, mh)
60 | w = int(h * ratio)
61 | else:
62 | w = int(h * ratio)
63 | w = min(w, mw)
64 | h = int(w / ratio)
65 | box["x"] = x - w / 2
66 | box["y"] = y - h / 2
67 | box["w"] = w
68 | box["h"] = h
69 | return box
70 |
71 | def load_image(image_file, box, opt, transformer):
72 | img = Image.open(image_file)
73 | if opt.input_channel == 3:
74 | img = img.convert('RGB')
75 |
76 | # box crop
77 | #if box is not None and opt.region == True:
78 | # box = fix_box(box, width, height, opt.box_ratio, opt.box_scale)
79 | # area = (box['x'], box['y'], box['x']+box['w'], box['y']+box['h'])
80 | # img = img.crop(area)
81 | # transform
82 | osize = opt.load_size
83 | old_size = img.size # old_size[0] is in (width, height) format
84 | ratio = float(osize)/max(old_size)
85 | new_size = tuple([int(x*ratio) for x in old_size])
86 | im = img.resize(new_size, Image.ANTIALIAS)
87 | new_im = Image.new("RGB", (osize,osize))
88 | new_im.paste(im, ((osize-new_size[0])//2,
89 | (osize-new_size[1])//2))
90 |
91 |
92 | input = transformer(new_im)
93 | # and a column of 0s at pos 10
94 | #result = F.pad(input=source, pad=(1, 1, 0, 1), mode='constant', value=0)
95 | #if width>height:fpaf
96 |
97 |
98 | return input
99 |
100 |
--------------------------------------------------------------------------------
/metadata/deploy.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import logging
5 | import torch
6 | import torch.backends.cudnn as cudnn
7 | import torch.nn.functional as F
8 | from torch.autograd import Variable
9 | from torchvision import transforms
10 | from collections import OrderedDict, defaultdict
11 |
12 | from options.options import Options
13 | from models.model import load_model
14 | from data.transformer import get_transformer, load_image
15 | from util.util import load_label, opt2file
16 | from util.webvisualizer import WebVisualizer
17 |
18 | def main():
19 | # parse options
20 | op = Options()
21 | opt = op.parse()
22 |
23 | # special setting
24 | opt.shuffle = False
25 | opt.batch_size = 1
26 | opt.load_thread = 1
27 |
28 | # initialize train or test working dir
29 | test_dir = os.path.join(opt.classify_dir , opt.name)
30 | opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/"
31 | if not os.path.exists(test_dir):
32 | os.mkdir(test_dir)
33 |
34 | # save options to disk
35 | opt2file(opt, os.path.join(test_dir, "opt.txt"))
36 |
37 | # log setting
38 | log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
39 | formatter = logging.Formatter(log_format)
40 | fh = logging.FileHandler(test_dir + "/deploy.log", 'a')
41 | fh.setFormatter(formatter)
42 | ch = logging.StreamHandler()
43 | ch.setFormatter(formatter)
44 | logging.getLogger().addHandler(fh)
45 | logging.getLogger().addHandler(ch)
46 | logging.getLogger().setLevel(logging.INFO)
47 |
48 | # load label
49 | if opt.label_file == "":
50 | opt.label_file = opt.dir + "/label.txt"
51 | rid2name, id2rid, rid2id = load_label(opt.label_file)
52 | num_classes = [len(rid2name[index])-2 for index in range(len(rid2name))]
53 |
54 | # load transformer
55 | transformer = get_transformer(opt)
56 |
57 | # load model
58 | model = load_model(opt, num_classes)
59 | model.eval()
60 |
61 | # use cuda
62 | if opt.cuda:
63 | model = model.cuda(opt.devices[0])
64 | cudnn.benchmark = True
65 |
66 | l = open(test_dir + "/classify_res_data.txt", 'w')
67 | with open(opt.classify_dir + "/data.txt") as data:
68 | for num, line in enumerate(data):
69 | logging.info(str(num+1))
70 | line = json.loads(line)
71 | input_tensor = load_image(line["image_file"], line["box"], opt, transformer)
72 | input_tensor = input_tensor.unsqueeze(0)
73 | if opt.cuda:
74 | input_tensor = input_tensor.cuda(opt.devices[0])
75 | outputs = model(Variable(input_tensor, volatile=True))
76 | if not isinstance(outputs, list):
77 | outputs = [outputs]
78 | line["classify_res"] = list()
79 | for index, out in enumerate(outputs):
80 | out = out.cpu()
81 | #print "out:", out
82 | softmax = F.softmax(out, dim=1).data.squeeze()
83 | #print "softmax:", softmax
84 | probs, ids = softmax.sort(0, True)
85 | classify_res = {}
86 | for i in range(len(probs)):
87 | classify_res[rid2name[index][id2rid[index][ids[i]]]] = probs[i]
88 | classify_res["max_score"] = probs[0]
89 | classify_res["best_label"] = rid2name[index][id2rid[index][ids[0]]]
90 | line["classify_res"].append(classify_res)
91 | l.write(json.dumps(line, separators=(',', ':'))+'\n')
92 | l.close()
93 | logging.info("classification done")
94 |
95 |
96 | if __name__ == "__main__":
97 | main()
98 |
--------------------------------------------------------------------------------
/metadata/label.txt:
--------------------------------------------------------------------------------
1 | 7;type;type
2 | Sedan;Sedan
3 | SUV;SUV
4 | Truck;Truck
5 | Minivan;Minivan
6 | PickupTruck;PickupTruck
7 | Hatchback;Hatchback
8 | Bus;Bus
9 | 30;brand;brand
10 | Dodge;Dodge
11 | Ford;Ford
12 | Chevrolet;Chevrolet
13 | GMC;GMC
14 | Honda;Honda
15 | Chrysler;Chrysler
16 | Jeep;Jeep
17 | Hyundai;Hyundai
18 | Subaru;Subaru
19 | Toyota;Toyota
20 | Buick;Buick
21 | KIA;KIA
22 | Nissan;Nissan
23 | Volkswagen;Volkswagen
24 | Oldsmobile;Oldsmobile
25 | BMW;BMW
26 | Cadillac;Cadillac
27 | Volvo;Volvo
28 | Pontiac;Pontiac
29 | Mercury;Mercury
30 | Lexus;Lexus
31 | Saturn;Saturn
32 | Benz;Benz
33 | Mazda;Mazda
34 | Scion;Scion
35 | Mini;Mini
36 | Lincoln;Lincoln
37 | Audi;Audi
38 | Mitsubishi;Mitsubishi
39 | others;others
40 | 9;color;color
41 | Black;Black
42 | White;White
43 | Gray;Gray
44 | Blue;Blue
45 | Red;Red
46 | Gold;Gold
47 | Silver;Silver
48 | Green;Green
49 | Yellow;Yellow
50 |
--------------------------------------------------------------------------------
/metadata/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/alexnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/alexnet.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/build_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/build_model.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/lightcnn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/lightcnn.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/model.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/resnet.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/__pycache__/vgg.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/vgg.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/models/alexnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.utils.model_zoo as model_zoo
3 | from models.build_model import *
4 |
5 | __all__ = ['AlexNet', 'alexnet']
6 |
7 |
8 | model_urls = {
9 | 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
10 | }
11 |
12 |
13 | class AlexNet(nn.Module):
14 |
15 | def __init__(self, num_classes=1000):
16 | super(AlexNet, self).__init__()
17 | self.features = nn.Sequential(
18 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
19 | nn.ReLU(inplace=True),
20 | nn.MaxPool2d(kernel_size=3, stride=2),
21 | nn.Conv2d(64, 192, kernel_size=5, padding=2),
22 | nn.ReLU(inplace=True),
23 | nn.MaxPool2d(kernel_size=3, stride=2),
24 | nn.Conv2d(192, 384, kernel_size=3, padding=1),
25 | nn.ReLU(inplace=True),
26 | nn.Conv2d(384, 256, kernel_size=3, padding=1),
27 | nn.ReLU(inplace=True),
28 | nn.Conv2d(256, 256, kernel_size=3, padding=1),
29 | nn.ReLU(inplace=True),
30 | nn.MaxPool2d(kernel_size=3, stride=2),
31 | )
32 | self.classifier = nn.Sequential(
33 | nn.Dropout(),
34 | nn.Linear(256 * 6 * 6, 4096),
35 | nn.ReLU(inplace=True),
36 | nn.Dropout(),
37 | nn.Linear(4096, 4096),
38 | nn.ReLU(inplace=True),
39 | nn.Linear(4096, num_classes),
40 | )
41 |
42 | def forward(self, x):
43 | x = self.features(x)
44 | x = x.view(x.size(0), 256 * 6 * 6)
45 | x = self.classifier(x)
46 | return x
47 |
48 | class AlexNetTemplet(nn.Module):
49 | def __init__(self, input_channel):
50 | super(AlexNetTemplet, self).__init__()
51 | self.features = nn.Sequential(
52 | nn.Conv2d(input_channel, 64, kernel_size=11, stride=4, padding=2),
53 | nn.ReLU(inplace=True),
54 | nn.MaxPool2d(kernel_size=3, stride=2),
55 | nn.Conv2d(64, 192, kernel_size=5, padding=2),
56 | nn.ReLU(inplace=True),
57 | nn.MaxPool2d(kernel_size=3, stride=2),
58 | nn.Conv2d(192, 384, kernel_size=3, padding=1),
59 | nn.ReLU(inplace=True),
60 | nn.Conv2d(384, 256, kernel_size=3, padding=1),
61 | nn.ReLU(inplace=True),
62 | nn.Conv2d(256, 256, kernel_size=3, padding=1),
63 | nn.ReLU(inplace=True),
64 | nn.MaxPool2d(kernel_size=3, stride=2),
65 | )
66 | self.classifier = nn.Sequential(
67 | nn.Dropout(),
68 | nn.Linear(256 * 6 * 6, 4096),
69 | nn.ReLU(inplace=True),
70 | nn.Dropout(),
71 | nn.Linear(4096, 4096),
72 | nn.ReLU(inplace=True),
73 | )
74 |
75 | def forward(self, x):
76 | x = self.features(x)
77 | x = x.view(x.size(0), 256 * 6 * 6)
78 | x = self.classifier(x)
79 | return x
80 |
81 |
82 | def alexnet(pretrained=False, **kwargs):
83 | r"""AlexNet model architecture from the
84 | `"One weird trick..." `_ paper.
85 | Args:
86 | pretrained (bool): If True, returns a model pre-trained on ImageNet
87 | """
88 | model = AlexNet(**kwargs)
89 | if pretrained:
90 | model.load_state_dict(model_zoo.load_url(model_urls['alexnet']))
91 | return model
92 |
93 | def AlexnetTemplet(input_channel, pretrained=False, **kwargs):
94 | r"""AlexNet model architecture from the
95 | `"One weird trick..." `_ paper.
96 | Args:
97 | pretrained (bool): If True, returns a model pre-trained on ImageNet
98 | """
99 | model = AlexNetTemplet(input_channel)
100 | if pretrained:
101 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['alexnet']))
102 | model.load_state_dict(model_dict)
103 | return model
104 |
--------------------------------------------------------------------------------
/metadata/models/build_model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | class MultiLabelModel(nn.Module):
4 | def __init__(self, basemodel, basemodel_output, num_classes):
5 | super(MultiLabelModel, self).__init__()
6 | self.basemodel = basemodel
7 | self.num_classes = num_classes
8 | for index, num_class in enumerate(num_classes):
9 | setattr(self, "FullyConnectedLayer_" + str(index), nn.Linear(basemodel_output, num_class))
10 |
11 | def forward(self, x):
12 | x = self.basemodel.forward(x)
13 | #print(fc.shape,x.shape)
14 | outs = list()
15 | dir(self)
16 | for index, num_class in enumerate(self.num_classes):
17 | fun = eval("self.FullyConnectedLayer_" + str(index))
18 | out = fun(x)
19 | outs.append(out)
20 | return outs
21 |
22 | def LoadPretrainedModel(model, pretrained_state_dict):
23 | model_dict = model.state_dict()
24 | union_dict = {k : v for k,v in pretrained_state_dict.iteritems() if k in model_dict}
25 | model_dict.update(union_dict)
26 | return model_dict
27 |
28 | def BuildMultiLabelModel(basemodel, basemodel_output, num_classes):
29 | return MultiLabelModel(basemodel, basemodel_output, num_classes)
30 |
--------------------------------------------------------------------------------
/metadata/models/lightcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class mfm(nn.Module):
6 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, type=1):
7 | super(mfm, self).__init__()
8 | self.out_channels = out_channels
9 | if type == 1:
10 | self.filter = nn.Conv2d(in_channels, 2*out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
11 | else:
12 | self.filter = nn.Linear(in_channels, 2*out_channels)
13 |
14 | def forward(self, x):
15 | x = self.filter(x)
16 | out = torch.split(x, self.out_channels, 1)
17 | return torch.max(out[0], out[1])
18 |
19 | class group(nn.Module):
20 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
21 | super(group, self).__init__()
22 | self.conv_a = mfm(in_channels, in_channels, 1, 1, 0)
23 | self.conv = mfm(in_channels, out_channels, kernel_size, stride, padding)
24 |
25 | def forward(self, x):
26 | x = self.conv_a(x)
27 | x = self.conv(x)
28 | return x
29 |
30 | class resblock(nn.Module):
31 | def __init__(self, in_channels, out_channels):
32 | super(resblock, self).__init__()
33 | self.conv1 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
34 | self.conv2 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
35 |
36 | def forward(self, x):
37 | res = x
38 | out = self.conv1(x)
39 | out = self.conv2(out)
40 | out = out + res
41 | return out
42 |
43 | class network_9layers(nn.Module):
44 | def __init__(self, num_classes=79077):
45 | super(network_9layers, self).__init__()
46 | self.features = nn.Sequential(
47 | mfm(1, 48, 5, 1, 2),
48 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
49 | group(48, 96, 3, 1, 1),
50 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
51 | group(96, 192, 3, 1, 1),
52 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
53 | group(192, 128, 3, 1, 1),
54 | group(128, 128, 3, 1, 1),
55 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
56 | )
57 | self.fc1 = mfm(8*8*128, 256, type=0)
58 | self.fc2 = nn.Linear(256, num_classes)
59 |
60 | def forward(self, x):
61 | x = self.features(x)
62 | x = x.view(x.size(0), -1)
63 | x = self.fc1(x)
64 | x = F.dropout(x, training=self.training)
65 | out = self.fc2(x)
66 | return out, x
67 |
68 | class network_29layers(nn.Module):
69 | def __init__(self, block, layers, num_classes=79077):
70 | super(network_29layers, self).__init__()
71 | self.conv1 = mfm(1, 48, 5, 1, 2)
72 | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
73 | self.block1 = self._make_layer(block, layers[0], 48, 48)
74 | self.group1 = group(48, 96, 3, 1, 1)
75 | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
76 | self.block2 = self._make_layer(block, layers[1], 96, 96)
77 | self.group2 = group(96, 192, 3, 1, 1)
78 | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
79 | self.block3 = self._make_layer(block, layers[2], 192, 192)
80 | self.group3 = group(192, 128, 3, 1, 1)
81 | self.block4 = self._make_layer(block, layers[3], 128, 128)
82 | self.group4 = group(128, 128, 3, 1, 1)
83 | self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
84 | self.fc = mfm(8*8*128, 256, type=0)
85 | self.fc2 = nn.Linear(256, num_classes)
86 |
87 |
88 | def _make_layer(self, block, num_blocks, in_channels, out_channels):
89 | layers = []
90 | for i in range(0, num_blocks):
91 | layers.append(block(in_channels, out_channels))
92 | return nn.Sequential(*layers)
93 |
94 | def forward(self, x):
95 | x = self.conv1(x)
96 | x = self.pool1(x)
97 |
98 | x = self.block1(x)
99 | x = self.group1(x)
100 | x = self.pool2(x)
101 |
102 | x = self.block2(x)
103 | x = self.group2(x)
104 | x = self.pool3(x)
105 |
106 | x = self.block3(x)
107 | x = self.group3(x)
108 | x = self.block4(x)
109 | x = self.group4(x)
110 | x = self.pool4(x)
111 |
112 | x = x.view(x.size(0), -1)
113 | fc = self.fc(x)
114 | fc = F.dropout(fc, training=self.training)
115 | out = self.fc2(fc)
116 | return out, fc
117 |
118 |
119 | class network_29layers_v2(nn.Module):
120 | def __init__(self, block, layers, num_classes=79077):
121 | super(network_29layers_v2, self).__init__()
122 | self.conv1 = mfm(1, 48, 5, 1, 2)
123 | self.block1 = self._make_layer(block, layers[0], 48, 48)
124 | self.group1 = group(48, 96, 3, 1, 1)
125 | self.block2 = self._make_layer(block, layers[1], 96, 96)
126 | self.group2 = group(96, 192, 3, 1, 1)
127 | self.block3 = self._make_layer(block, layers[2], 192, 192)
128 | self.group3 = group(192, 128, 3, 1, 1)
129 | self.block4 = self._make_layer(block, layers[3], 128, 128)
130 | self.group4 = group(128, 128, 3, 1, 1)
131 | self.fc = nn.Linear(8*8*128, 256)
132 | self.fc2 = nn.Linear(256, num_classes[0], bias=False)
133 |
134 | def _make_layer(self, block, num_blocks, in_channels, out_channels):
135 | layers = []
136 | for i in range(0, num_blocks):
137 | layers.append(block(in_channels, out_channels))
138 | return nn.Sequential(*layers)
139 |
140 | def forward(self, x):
141 | x = self.conv1(x)
142 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
143 |
144 | x = self.block1(x)
145 | x = self.group1(x)
146 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
147 |
148 | x = self.block2(x)
149 | x = self.group2(x)
150 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
151 |
152 | x = self.block3(x)
153 | x = self.group3(x)
154 | x = self.block4(x)
155 | x = self.group4(x)
156 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
157 |
158 | x = x.view(x.size(0), -1)
159 | fc = self.fc(x)
160 | x = F.dropout(fc, training=self.training)
161 |
162 | output = list()
163 | for name, fun in self.fc_dict.iteritems():
164 | out = fun(x)
165 | output.append(out)
166 |
167 | return output, fc
168 |
169 | class network_9layers_templet(nn.Module):
170 | def __init__(self, in_channel):
171 | super(network_9layers_templet, self).__init__()
172 | self.features = nn.Sequential(
173 | mfm(in_channel, 48, 5, 1, 2),
174 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
175 | group(48, 96, 3, 1, 1),
176 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
177 | group(96, 192, 3, 1, 1),
178 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
179 | group(192, 128, 3, 1, 1),
180 | group(128, 128, 3, 1, 1),
181 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
182 | )
183 | self.fc1 = mfm(8*8*128, 256, type=0)
184 |
185 | def forward(self, x):
186 | x = self.features(x)
187 | x = x.view(x.size(0), -1)
188 | x = self.fc1(x)
189 | out = F.dropout(x, training=self.training)
190 | return out
191 |
192 | class network_29layers_v2_templet(nn.Module):
193 | def __init__(self, in_channel, block, layers):
194 | super(network_29layers_v2_templet, self).__init__()
195 | self.conv1 = mfm(in_channel, 48, 5, 1, 2)
196 | self.block1 = self._make_layer(block, layers[0], 48, 48)
197 | self.group1 = group(48, 96, 3, 1, 1)
198 | self.block2 = self._make_layer(block, layers[1], 96, 96)
199 | self.group2 = group(96, 192, 3, 1, 1)
200 | self.block3 = self._make_layer(block, layers[2], 192, 192)
201 | self.group3 = group(192, 256, 3, 1, 1)
202 | self.block4 = self._make_layer(block, layers[3], 256, 256)
203 | self.group4 = group(256, 128, 3, 1, 1)
204 | self.block5 = self._make_layer(block, layers[4], 128, 128)
205 | self.group5 = group(128, 64, 3, 1, 1)
206 | self.block6 = self._make_layer(block, layers[5], 64, 64)
207 | self.group6 = group(64, 64, 3, 1, 1)
208 |
209 | self.fc = nn.Linear(8*8*64, 256)
210 |
211 | def _make_layer(self, block, num_blocks, in_channels, out_channels):
212 | layers = []
213 | for i in range(0, num_blocks):
214 | layers.append(block(in_channels, out_channels))
215 | return nn.Sequential(*layers)
216 |
217 | def forward(self, x):
218 | '''
219 | x = self.conv1(x)
220 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
221 |
222 | x = self.block1(x)
223 | x = self.group1(x)
224 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
225 |
226 | x = self.block2(x)
227 | x = self.group2(x)
228 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
229 |
230 | x = self.block3(x)
231 | x = self.group3(x)
232 | x = self.block4(x)
233 | x = self.group4(x)
234 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
235 |
236 | x = x.view(x.size(0), -1)
237 | fc = self.fc(x)
238 | x = F.dropout(fc, training=self.training)
239 | '''
240 | x = self.conv1(x)
241 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
242 |
243 | x = self.block1(x)
244 | x = self.group1(x)
245 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
246 |
247 | x = self.block2(x)
248 | x = self.group2(x)
249 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
250 |
251 | x = self.block3(x)
252 | x = self.group3(x)
253 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
254 |
255 | x = self.block4(x)
256 | x = self.group4(x)
257 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
258 |
259 | x = self.block5(x)
260 | x = self.group5(x)
261 | x = self.block6(x)
262 | x = self.group6(x)
263 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2)
264 |
265 | x = x.view(x.size(0), -1)
266 | fc = self.fc(x)
267 | x = F.dropout(fc, training=self.training)
268 | return x
269 |
270 |
271 | def LightCNN_9Layers(**kwargs):
272 | model = network_9layers(**kwargs)
273 | return model
274 |
275 | def LightCNN_29Layers(**kwargs):
276 | model = network_29layers(resblock, [1, 2, 3, 4], **kwargs)
277 | return model
278 |
279 | def LightCNN_29Layers_v2(**kwargs):
280 | model = network_29layers_v2(resblock, [1, 2, 3, 4], **kwargs)
281 | return model
282 |
283 | def LightCNN_9Layers_templet(in_channel, pretrained=False):
284 | model = network_9layers_templet(in_channel)
285 | return model
286 |
287 | def LightCNN_29Layers_v2_templet(in_channel, pretrained=False):
288 | model = network_29layers_v2_templet(in_channel, resblock, [1,2,3,4,5,6])
289 | return model
290 |
291 |
292 | if __name__ == "__main__":
293 | model = LightCNN_29Layers_v2_templet(3)
294 | print(model)
--------------------------------------------------------------------------------
/metadata/models/model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import logging
4 | from torch.autograd import Variable
5 |
6 | from .build_model import BuildMultiLabelModel, LoadPretrainedModel
7 | from .lightcnn import LightCNN_29Layers_v2_templet, LightCNN_9Layers_templet
8 | from .alexnet import AlexnetTemplet
9 | from .resnet import Resnet50Templet
10 | from .vgg import VGG16Templet
11 |
12 | def load_model(opt, num_classes):
13 | # load templet
14 | if opt.model == "Alexnet":
15 | templet = AlexnetTemplet(opt.input_channel, opt.pretrain)
16 | elif opt.model == "LightenB":
17 | templet = LightCNN_29Layers_v2_templet(opt.input_channel, opt.pretrain)
18 | elif opt.model == "Lighten9":
19 | templet = LightCNN_9Layers_templet(opt.input_channel, opt.pretrain)
20 | elif opt.model == "Resnet50":
21 | templet = Resnet50Templet(opt.input_channel, opt.pretrain)
22 | elif opt.model == "VGG16":
23 | templet = VGG16Templet(opt.input_channel, opt.pretrain)
24 | else:
25 | logging.error("unknown model type")
26 | sys.exit(0)
27 |
28 | # build model
29 | tmp_input = Variable(torch.FloatTensor(1, opt.input_channel, opt.input_size, opt.input_size))
30 | if opt.model == "LightenB":
31 | tmp_output = templet(tmp_input)
32 | else:
33 | tmp_output = templet(tmp_input)
34 | output_dim = int(tmp_output.size()[-1])
35 | model = BuildMultiLabelModel(templet, output_dim, num_classes)
36 | logging.info(model)
37 |
38 | # imagenet pretrain model
39 | if opt.pretrain:
40 | logging.info("use imagenet pretrained model")
41 |
42 | # load exsiting model
43 | if opt.checkpoint_name != "":
44 | if os.path.exists(opt.checkpoint_name):
45 | logging.info("load pretrained model from "+opt.checkpoint_name)
46 | model.load_state_dict(torch.load(opt.checkpoint_name))
47 | elif os.path.exists(opt.model_dir):
48 | checkpoint_name = opt.model_dir + "/" + opt.checkpoint_name
49 | model.load_state_dict(torch.load(checkpoint_name))
50 | logging.info("load pretrained model from "+ checkpoint_name)
51 | else:
52 | opt.checkpoint_name = ""
53 | logging.warning("WARNING: unknown pretrained model, skip it.")
54 |
55 | return model
56 |
57 | def save_model(model, opt, epoch):
58 | checkpoint_name = opt.model_dir + "/epoch_%s_snapshot.pth" %(epoch)
59 | torch.save(model.cpu().state_dict(), checkpoint_name)
60 | if opt.cuda and torch.cuda.is_available():
61 | model.cuda(opt.devices[0])
62 |
63 | def modify_last_layer_lr(named_params, base_lr, lr_mult_w, lr_mult_b):
64 | params = list()
65 | for name, param in named_params:
66 | if 'bias' in name:
67 | if 'FullyConnectedLayer_' in name:
68 | params += [{'params':param, 'lr': base_lr * lr_mult_b, 'weight_decay': 0}]
69 | else:
70 | params += [{'params':param, 'lr': base_lr * 2, 'weight_decay': 0}]
71 | else:
72 | if 'FullyConnectedLayer_' in name:
73 | params += [{'params':param, 'lr': base_lr * lr_mult_w}]
74 | else:
75 | params += [{'params':param, 'lr': base_lr * 1}]
76 | return params
77 |
--------------------------------------------------------------------------------
/metadata/models/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | import torch.utils.model_zoo as model_zoo
4 | from models.build_model import LoadPretrainedModel
5 |
6 |
7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
8 | 'resnet152']
9 |
10 |
11 | model_urls = {
12 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
13 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
14 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
15 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
16 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
17 | }
18 |
19 |
20 | def conv3x3(in_planes, out_planes, stride=1):
21 | "3x3 convolution with padding"
22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
23 | padding=1, bias=False)
24 |
25 |
26 | class BasicBlock(nn.Module):
27 | expansion = 1
28 |
29 | def __init__(self, inplanes, planes, stride=1, downsample=None):
30 | super(BasicBlock, self).__init__()
31 | self.conv1 = conv3x3(inplanes, planes, stride)
32 | self.bn1 = nn.BatchNorm2d(planes)
33 | self.relu = nn.ReLU(inplace=True)
34 | self.conv2 = conv3x3(planes, planes)
35 | self.bn2 = nn.BatchNorm2d(planes)
36 | self.downsample = downsample
37 | self.stride = stride
38 |
39 | def forward(self, x):
40 | residual = x
41 |
42 | out = self.conv1(x)
43 | out = self.bn1(out)
44 | out = self.relu(out)
45 |
46 | out = self.conv2(out)
47 | out = self.bn2(out)
48 |
49 | if self.downsample is not None:
50 | residual = self.downsample(x)
51 |
52 | out += residual
53 | out = self.relu(out)
54 |
55 | return out
56 |
57 |
58 | class Bottleneck(nn.Module):
59 | expansion = 4
60 |
61 | def __init__(self, inplanes, planes, stride=1, downsample=None):
62 | super(Bottleneck, self).__init__()
63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
64 | self.bn1 = nn.BatchNorm2d(planes)
65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
66 | padding=1, bias=False)
67 | self.bn2 = nn.BatchNorm2d(planes)
68 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
69 | self.bn3 = nn.BatchNorm2d(planes * 4)
70 | self.relu = nn.ReLU(inplace=True)
71 | self.downsample = downsample
72 | self.stride = stride
73 |
74 | def forward(self, x):
75 | residual = x
76 |
77 | out = self.conv1(x)
78 | out = self.bn1(out)
79 | out = self.relu(out)
80 |
81 | out = self.conv2(out)
82 | out = self.bn2(out)
83 | out = self.relu(out)
84 |
85 | out = self.conv3(out)
86 | out = self.bn3(out)
87 |
88 | if self.downsample is not None:
89 | residual = self.downsample(x)
90 |
91 | out += residual
92 | out = self.relu(out)
93 |
94 | return out
95 |
96 |
97 | class ResNet(nn.Module):
98 |
99 | def __init__(self, block, layers, num_classes=1000):
100 | self.inplanes = 64
101 | super(ResNet, self).__init__()
102 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
103 | bias=False)
104 | self.bn1 = nn.BatchNorm2d(64)
105 | self.relu = nn.ReLU(inplace=True)
106 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
107 | self.layer1 = self._make_layer(block, 64, layers[0])
108 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
109 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
110 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
111 | self.avgpool = nn.AvgPool2d(7, stride=1)
112 | self.fc = nn.Linear(512 * block.expansion, num_classes)
113 |
114 | for m in self.modules():
115 | if isinstance(m, nn.Conv2d):
116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
117 | m.weight.data.normal_(0, math.sqrt(2. / n))
118 | elif isinstance(m, nn.BatchNorm2d):
119 | m.weight.data.fill_(1)
120 | m.bias.data.zero_()
121 |
122 | def _make_layer(self, block, planes, blocks, stride=1):
123 | downsample = None
124 | if stride != 1 or self.inplanes != planes * block.expansion:
125 | downsample = nn.Sequential(
126 | nn.Conv2d(self.inplanes, planes * block.expansion,
127 | kernel_size=1, stride=stride, bias=False),
128 | nn.BatchNorm2d(planes * block.expansion),
129 | )
130 |
131 | layers = []
132 | layers.append(block(self.inplanes, planes, stride, downsample))
133 | self.inplanes = planes * block.expansion
134 | for i in range(1, blocks):
135 | layers.append(block(self.inplanes, planes))
136 |
137 | return nn.Sequential(*layers)
138 |
139 | def forward(self, x):
140 | x = self.conv1(x)
141 | x = self.bn1(x)
142 | x = self.relu(x)
143 | x = self.maxpool(x)
144 |
145 | x = self.layer1(x)
146 | x = self.layer2(x)
147 | x = self.layer3(x)
148 | x = self.layer4(x)
149 |
150 | x = self.avgpool(x)
151 | x = x.view(x.size(0), -1)
152 | x = self.fc(x)
153 |
154 | return x
155 |
156 | class ResNetTemplet(nn.Module):
157 |
158 | def __init__(self, block, layers, input_channel):
159 | self.inplanes = 64
160 | super(ResNetTemplet, self).__init__()
161 | self.conv1 = nn.Conv2d(input_channel, 64, kernel_size=7, stride=2, padding=3,
162 | bias=False)
163 | self.bn1 = nn.BatchNorm2d(64)
164 | self.relu = nn.ReLU(inplace=True)
165 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
166 | self.layer1 = self._make_layer(block, 64, layers[0])
167 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
168 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
169 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
170 | self.avgpool = nn.AvgPool2d(7, stride=1)
171 | self.fc = nn.Linear(512 * block.expansion, 1000)
172 |
173 | for m in self.modules():
174 | if isinstance(m, nn.Conv2d):
175 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
176 | m.weight.data.normal_(0, math.sqrt(2. / n))
177 | elif isinstance(m, nn.BatchNorm2d):
178 | m.weight.data.fill_(1)
179 | m.bias.data.zero_()
180 |
181 | def _make_layer(self, block, planes, blocks, stride=1):
182 | downsample = None
183 | if stride != 1 or self.inplanes != planes * block.expansion:
184 | downsample = nn.Sequential(
185 | nn.Conv2d(self.inplanes, planes * block.expansion,
186 | kernel_size=1, stride=stride, bias=False),
187 | nn.BatchNorm2d(planes * block.expansion),
188 | )
189 | layers = []
190 | layers.append(block(self.inplanes, planes, stride, downsample))
191 | self.inplanes = planes * block.expansion
192 | for i in range(1, blocks):
193 | layers.append(block(self.inplanes, planes))
194 | return nn.Sequential(*layers)
195 |
196 | def forward(self, x):
197 | x = self.conv1(x)
198 | x = self.bn1(x)
199 | x = self.relu(x)
200 | x = self.maxpool(x)
201 |
202 | x = self.layer1(x)
203 | x = self.layer2(x)
204 | x = self.layer3(x)
205 | x = self.layer4(x)
206 |
207 | x = self.avgpool(x)
208 | x = x.view(x.size(0), -1)
209 |
210 | return x
211 |
212 |
213 | def resnet18(pretrained=False, **kwargs):
214 | """Constructs a ResNet-18 model.
215 |
216 | Args:
217 | pretrained (bool): If True, returns a model pre-trained on ImageNet
218 | """
219 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
220 | if pretrained:
221 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
222 | return model
223 |
224 |
225 | def Resnet18Templet(input_channel, pretrained=False, **kwargs):
226 | """Constructs a ResNet-18 model.
227 |
228 | Args:
229 | pretrained (bool): If True, returns a model pre-trained on ImageNet
230 | """
231 | model = ResNetTemplet(BasicBlock, [2, 2, 2, 2], input_channel, **kwargs)
232 | if pretrained:
233 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet18']))
234 | model.load_state_dict(model_dict)
235 | return model
236 |
237 |
238 | def resnet34(pretrained=False, **kwargs):
239 | """Constructs a ResNet-34 model.
240 |
241 | Args:
242 | pretrained (bool): If True, returns a model pre-trained on ImageNet
243 | """
244 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
245 | if pretrained:
246 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
247 | return model
248 |
249 |
250 | def Resnet34Templet(input_channel, pretrained=False, **kwargs):
251 | """Constructs a ResNet-34 model.
252 |
253 | Args:
254 | pretrained (bool): If True, returns a model pre-trained on ImageNet
255 | """
256 | model = ResNetTemplet(BasicBlock, [3, 4, 6, 3], input_channel, **kwargs)
257 | if pretrained:
258 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet34']))
259 | model.load_state_dict(model_dict)
260 | return model
261 |
262 |
263 | def resnet50(pretrained=False, **kwargs):
264 | """Constructs a ResNet-50 model.
265 |
266 | Args:
267 | pretrained (bool): If True, returns a model pre-trained on ImageNet
268 | """
269 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
270 | if pretrained:
271 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
272 | return model
273 |
274 |
275 | def Resnet50Templet(input_channel, pretrained=False, **kwargs):
276 | """Constructs a ResNet-50 model.
277 |
278 | Args:
279 | pretrained (bool): If True, returns a model pre-trained on ImageNet
280 | """
281 | model = ResNetTemplet(Bottleneck, [3, 4, 6, 3], input_channel, **kwargs)
282 | if pretrained:
283 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet50']))
284 | model.load_state_dict(model_dict)
285 | return model
286 |
287 |
288 | def resnet101(pretrained=False, **kwargs):
289 | """Constructs a ResNet-101 model.
290 |
291 | Args:
292 | pretrained (bool): If True, returns a model pre-trained on ImageNet
293 | """
294 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
295 | if pretrained:
296 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
297 | return model
298 |
299 | def Resnet101Templet(input_channel, pretrained=False, **kwargs):
300 | """Constructs a ResNet-101 model.
301 |
302 | Args:
303 | pretrained (bool): If True, returns a model pre-trained on ImageNet
304 | """
305 | model = ResNetTemplet(Bottleneck, [3, 4, 23, 3], input_channel, **kwargs)
306 | if pretrained:
307 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet101']))
308 | model.load_state_dict(model_dict)
309 | return model
310 |
311 | def resnet152(pretrained=False, **kwargs):
312 | """Constructs a ResNet-152 model.
313 |
314 | Args:
315 | pretrained (bool): If True, returns a model pre-trained on ImageNet
316 | """
317 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
318 | if pretrained:
319 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
320 | return model
321 |
322 | def Resnet152Templet(input_channel, pretrained=False, **kwargs):
323 | """Constructs a ResNet-152 model.
324 |
325 | Args:
326 | pretrained (bool): If True, returns a model pre-trained on ImageNet
327 | """
328 | model = ResNetTemplet(Bottleneck, [3, 8, 36, 3], input_channel, **kwargs)
329 | if pretrained:
330 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet152']))
331 | model.load_state_dict(model_dict)
332 | return model
333 |
334 |
--------------------------------------------------------------------------------
/metadata/models/vgg.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.utils.model_zoo as model_zoo
3 | import math
4 | from models.build_model import LoadPretrainedModel
5 |
6 | __all__ = [
7 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
8 | 'vgg19_bn', 'vgg19',
9 | ]
10 |
11 |
12 | model_urls = {
13 | 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
14 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
15 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
16 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
17 | 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
18 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
19 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
20 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
21 | }
22 |
23 |
24 | class VGG(nn.Module):
25 |
26 | def __init__(self, features, num_classes=1000):
27 | super(VGG, self).__init__()
28 | self.features = features
29 | self.classifier = nn.Sequential(
30 | nn.Linear(512 * 7 * 7, 4096),
31 | nn.ReLU(True),
32 | nn.Dropout(),
33 | nn.Linear(4096, 4096),
34 | nn.ReLU(True),
35 | nn.Dropout(),
36 | nn.Linear(4096, num_classes),
37 | )
38 | self._initialize_weights()
39 |
40 | def forward(self, x):
41 | x = self.features(x)
42 | x = x.view(x.size(0), -1)
43 | x = self.classifier(x)
44 | return x
45 |
46 | def _initialize_weights(self):
47 | for m in self.modules():
48 | if isinstance(m, nn.Conv2d):
49 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
50 | m.weight.data.normal_(0, math.sqrt(2. / n))
51 | if m.bias is not None:
52 | m.bias.data.zero_()
53 | elif isinstance(m, nn.BatchNorm2d):
54 | m.weight.data.fill_(1)
55 | m.bias.data.zero_()
56 | elif isinstance(m, nn.Linear):
57 | m.weight.data.normal_(0, 0.01)
58 | m.bias.data.zero_()
59 |
60 | class VGGTemplet(nn.Module):
61 |
62 | def __init__(self, features):
63 | super(VGGTemplet, self).__init__()
64 | self.features = features
65 | self.classifier = nn.Sequential(
66 | nn.Linear(512 * 7 * 7, 4096),
67 | nn.ReLU(True),
68 | nn.Dropout(),
69 | nn.Linear(4096, 4096),
70 | nn.ReLU(True),
71 | nn.Dropout(),
72 | #nn.Linear(4096, num_classes),
73 | )
74 | self._initialize_weights()
75 |
76 | def forward(self, x):
77 | x = self.features(x)
78 | x = x.view(x.size(0), -1)
79 | x = self.classifier(x)
80 | return x
81 |
82 | def _initialize_weights(self):
83 | for m in self.modules():
84 | if isinstance(m, nn.Conv2d):
85 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
86 | m.weight.data.normal_(0, math.sqrt(2. / n))
87 | if m.bias is not None:
88 | m.bias.data.zero_()
89 | elif isinstance(m, nn.BatchNorm2d):
90 | m.weight.data.fill_(1)
91 | m.bias.data.zero_()
92 | elif isinstance(m, nn.Linear):
93 | m.weight.data.normal_(0, 0.01)
94 | m.bias.data.zero_()
95 |
96 | def make_layers(cfg, in_channels, batch_norm=False):
97 | layers = []
98 | #in_channels = 3
99 | for v in cfg:
100 | if v == 'M':
101 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
102 | else:
103 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
104 | if batch_norm:
105 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
106 | else:
107 | layers += [conv2d, nn.ReLU(inplace=True)]
108 | in_channels = v
109 | return nn.Sequential(*layers)
110 |
111 |
112 | cfg = {
113 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
114 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
115 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
116 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
117 | }
118 |
119 |
120 | def vgg11(input_channel=3, pretrained=False, **kwargs):
121 | """VGG 11-layer model (configuration "A")
122 |
123 | Args:
124 | pretrained (bool): If True, returns a model pre-trained on ImageNet
125 | """
126 | model = VGG(make_layers(cfg['A'], input_channel), **kwargs)
127 | if pretrained:
128 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
129 | return model
130 |
131 | def VGG11Templet(input_channel=3, pretrained=False, **kwargs):
132 | """VGG 11-layer model (configuration "A")
133 |
134 | Args:
135 | pretrained (bool): If True, returns a model pre-trained on ImageNet
136 | """
137 | model = VGGTemplet(make_layers(cfg['A'], input_channel), **kwargs)
138 | if pretrained:
139 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg11']))
140 | model.load_state_dict(model_dict)
141 | return model
142 |
143 | def vgg11_bn(input_channel=3, pretrained=False, **kwargs):
144 | """VGG 11-layer model (configuration "A") with batch normalization
145 |
146 | Args:
147 | pretrained (bool): If True, returns a model pre-trained on ImageNet
148 | """
149 | model = VGG(make_layers(cfg['A'], input_channel, batch_norm=True), **kwargs)
150 | if pretrained:
151 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
152 | return model
153 |
154 | def VGG11BNTemplet(input_channel=3, pretrained=False, **kwargs):
155 | """VGG 11-layer model (configuration "A")
156 |
157 | Args:
158 | pretrained (bool): If True, returns a model pre-trained on ImageNet
159 | """
160 | model = VGGTemplet(make_layers(cfg['A'], input_channel, batch_norm=True), **kwargs)
161 | if pretrained:
162 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg11_bn']))
163 | model.load_state_dict(model_dict)
164 | return model
165 |
166 | def vgg13(input_channel=3, pretrained=False, **kwargs):
167 | """VGG 13-layer model (configuration "B")
168 |
169 | Args:
170 | pretrained (bool): If True, returns a model pre-trained on ImageNet
171 | """
172 | model = VGG(make_layers(cfg['B'], input_channel), **kwargs)
173 | if pretrained:
174 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
175 | return model
176 |
177 | def VGG13Templet(input_channel=3, pretrained=False, **kwargs):
178 | """VGG 13-layer model (configuration "B")
179 |
180 | Args:
181 | pretrained (bool): If True, returns a model pre-trained on ImageNet
182 | """
183 | model = VGGTemplet(make_layers(cfg['B'], input_channel), **kwargs)
184 | if pretrained:
185 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg13']))
186 | model.load_state_dict(model_dict)
187 | return model
188 |
189 | def vgg13_bn(input_channel=3, pretrained=False, **kwargs):
190 | """VGG 13-layer model (configuration "B") with batch normalization
191 |
192 | Args:
193 | pretrained (bool): If True, returns a model pre-trained on ImageNet
194 | """
195 | model = VGG(make_layers(cfg['B'], input_channel, batch_norm=True), **kwargs)
196 | if pretrained:
197 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
198 | return model
199 |
200 | def VGG13BNTemplet(input_channel=3, pretrained=False, **kwargs):
201 | """VGG 13-layer model (configuration "B") with batch normalization
202 |
203 | Args:
204 | pretrained (bool): If True, returns a model pre-trained on ImageNet
205 | """
206 | model = VGGTemplet(make_layers(cfg['B'], input_channel, batch_norm=True), **kwargs)
207 | if pretrained:
208 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg13_bn']))
209 | model.load_state_dict(model_dict)
210 | return model
211 |
212 | def vgg16(input_channel=3, pretrained=False, **kwargs):
213 | """VGG 16-layer model (configuration "D")
214 |
215 | Args:
216 | pretrained (bool): If True, returns a model pre-trained on ImageNet
217 | """
218 | model = VGG(make_layers(cfg['D'], input_channel), **kwargs)
219 | if pretrained:
220 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
221 | return model
222 |
223 | def VGG16Templet(input_channel=3, pretrained=False, **kwargs):
224 | """VGG 16-layer model (configuration "D")
225 |
226 | Args:
227 | pretrained (bool): If True, returns a model pre-trained on ImageNet
228 | """
229 | model = VGGTemplet(make_layers(cfg['D'], input_channel), **kwargs)
230 | if pretrained:
231 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg16']))
232 | model.load_state_dict(model_dict)
233 | return model
234 |
235 | def vgg16_bn(input_channel=3, pretrained=False, **kwargs):
236 | """VGG 16-layer model (configuration "D") with batch normalization
237 |
238 | Args:
239 | pretrained (bool): If True, returns a model pre-trained on ImageNet
240 | """
241 | model = VGG(make_layers(cfg['D'], input_channel, batch_norm=True), **kwargs)
242 | if pretrained:
243 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
244 | return model
245 |
246 | def VGG16BNTemplet(input_channel=3, pretrained=False, **kwargs):
247 | """VGG 16-layer model (configuration "D") with batch normalization
248 |
249 | Args:
250 | pretrained (bool): If True, returns a model pre-trained on ImageNet
251 | """
252 | model = VGGTemplet(make_layers(cfg['D'], input_channel, batch_norm=True), **kwargs)
253 | if pretrained:
254 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg16_bn']))
255 | model.load_state_dict(model_dict)
256 | return model
257 |
258 | def vgg19(input_channel=3, pretrained=False, **kwargs):
259 | """VGG 19-layer model (configuration "E")
260 |
261 | Args:
262 | pretrained (bool): If True, returns a model pre-trained on ImageNet
263 | """
264 | model = VGG(make_layers(cfg['E'], input_channel), **kwargs)
265 | if pretrained:
266 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
267 | return model
268 |
269 | def VGG19Templet(input_channel=3, pretrained=False, **kwargs):
270 | """VGG 19-layer model (configuration "E")
271 |
272 | Args:
273 | pretrained (bool): If True, returns a model pre-trained on ImageNet
274 | """
275 | model = VGGTemplet(make_layers(cfg['E'], input_channel), **kwargs)
276 | if pretrained:
277 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg19']))
278 | model.load_state_dict(model_dict)
279 | return model
280 |
281 | def vgg19_bn(input_channel=3, pretrained=False, **kwargs):
282 | """VGG 19-layer model (configuration 'E') with batch normalization
283 |
284 | Args:
285 | pretrained (bool): If True, returns a model pre-trained on ImageNet
286 | """
287 | model = VGG(make_layers(cfg['E'], input_channel, batch_norm=True), **kwargs)
288 | if pretrained:
289 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
290 | return model
291 |
292 | def VGG19BNTemplet(input_channel=3, pretrained=False, **kwargs):
293 | """VGG 19-layer model (configuration 'E') with batch normalization
294 |
295 | Args:
296 | pretrained (bool): If True, returns a model pre-trained on ImageNet
297 | """
298 | model = VGGTemplet(make_layers(cfg['E'], input_channel, batch_norm=True), **kwargs)
299 | if pretrained:
300 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg19_bn']))
301 | model.load_state_dict(model_dict)
302 | return model
303 |
--------------------------------------------------------------------------------
/metadata/options/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/options/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/options/__pycache__/options.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/options/__pycache__/options.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/options/options.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import argparse
4 |
5 | class Options():
6 | def __init__(self):
7 | self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
8 |
9 | self.parser.add_argument('--dir', required=True, default='./', help='path to the data directory containing data.txt and label.txt')
10 | self.parser.add_argument('--name', required=True, default='test', help='subdirectory name for training or testing, snapshot, splited dataset and test results exist here')
11 | self.parser.add_argument('--mode', required=True, default='Train', help='run mode of training or testing. [Train | Test | train | test]')
12 | self.parser.add_argument('--model', required=True, default='LightenB', help='model type. [Alexnet | LightenB | VGG16 | Resnet18 | ...]')
13 | self.parser.add_argument('--load_size', type=int, default=512, help='scale image to the size prepared for croping')
14 | self.parser.add_argument('--input_size', type=int, default=512, help='then crop image to the size as network input')
15 | self.parser.add_argument('--ratio', type=str, default='[1, 0, 0]', help='ratio of whole dataset for Train, Validate, Test resperctively')
16 | self.parser.add_argument('--batch_size', type=int, default=1, help='batch size of network input. Note that batch_size should only set to 1 in Test mode')
17 | self.parser.add_argument('--shuffle', action='store_true', help='default false. If true, data will be shuffled when split dataset and in batch')
18 | self.parser.add_argument('--flip', action='store_true', help='if true, flip image randomly before input into network')
19 | self.parser.add_argument('--region', action='store_false', help='if true, crop image by input box')
20 | self.parser.add_argument('--load_thread', type=int, default=2, help='how many subprocesses to use for data loading')
21 | self.parser.add_argument('--crop', type=str, default='NoCrop', help='crop type, candidates are [NoCrop | RandomCrop | CenterCrop | FiveCrop | TenCrop]')
22 | self.parser.add_argument('--gray', action='store_true', help='defalut false. If true, image will be converted to gray_scale')
23 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
24 | self.parser.add_argument('--box_ratio', type=float, default=-1, help='modify box ratio of width and height to specified ratio')
25 | self.parser.add_argument('--box_scale', type=float, default=1.0, help='scale box to specified ratio. Default 1.0 means no change')
26 | self.parser.add_argument('--input_channel', type=int, default=3, help='set input image channel, 1 for gray and 3 for color')
27 | self.parser.add_argument('--mean', type=str, default='(0.485, 0.456, 0.406)', help='sequence of means for each channel used for normization')
28 | self.parser.add_argument('--std', type=str, default='(0.229, 0.224, 0.225)', help='sequence standard deviations for each channel used for normization')
29 | self.parser.add_argument('--padding', action='store_true', help='default false. If true, image will be padded if scaled box is out of image boundary')
30 | self.parser.add_argument('--checkpoint_name', type=str, default='_YOUR_MODEL_PATH_', help='path to pretrained model or model to deploy')
31 | self.parser.add_argument('--pretrain', action='store_true', help='default false. If true, load pretrained model to initizaize model state_dict')
32 | ## for train
33 | self.parser.add_argument('--validate_ratio', type=float, default=1, help='ratio of validate set when validate model')
34 | self.parser.add_argument('--sum_epoch', type=int, default=200, help='sum epoches for training')
35 | self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='save snapshot every $save_epoch_freq epoches training')
36 | self.parser.add_argument('--save_batch_iter_freq', type=int, default=2000, help='save snapshot every $save_batch_iter_freq training')
37 | self.parser.add_argument('--lr', type=float, default=0.00001, help='initial learning rate')
38 | self.parser.add_argument('--gamma', type=float, default=0.1, help='multiplicative factor of learning rate decay.')
39 | self.parser.add_argument('--lr_mult_w', type=float, default=20, help='learning rate of W of last layer parameter will be lr*lr_mult_w')
40 | self.parser.add_argument('--lr_mult_b', type=float, default=20, help='learning rate of b of last layer parameter will be lr*lr_mult_b')
41 | self.parser.add_argument('--lr_policy', type=str, default='step', help='learning rate policy: lambda|step|plateau')
42 | self.parser.add_argument('--lr_decay_in_epoch', type=int, default=50, help='multiply by a gamma every lr_decay_in_epoch iterations')
43 | self.parser.add_argument('--momentum', type=float, default=0.5, help='momentum of SGD')
44 | self.parser.add_argument('--weight_decay', type=float, default=1e-5, help='lr')
45 | self.parser.add_argument('--loss_weight', type=str, default='', help='list. Loss weight for cross entropy loss.For example set $loss_weight to [1, 0.8, 0.8] for a 3 labels classification')
46 |
47 | ## for test
48 | self.parser.add_argument('--top_k', type=str, default='(3,)', help='tuple. We only take top k classification results into accuracy consideration')
49 | self.parser.add_argument('--score_thres', type=str, default='0.1', help='float or list. We only take classification results whose score is bigger than score_thres into recall consideration')
50 | # these tow param below used only in deploy.py
51 | self.parser.add_argument('--label_file', type=str, default="", help='label file only for deploy a checkpoint model')
52 | self.parser.add_argument('--classify_dir', type=str, default="", help='directory where data.txt to be classified exists')
53 |
54 | ## for visualization
55 | self.parser.add_argument('--display_winsize', type=int, default=128, help='display window size')
56 | self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display. Less than 1 will display nothing')
57 | self.parser.add_argument('--display_port', type=int, default=8097, help='port of visdom server for web display. Result will show on `localhost:$display_port`')
58 | self.parser.add_argument('--image_ncols', type=int, default=0, help='if positive, display all images in a single visdom web panel with certain number of images per row.')
59 | self.parser.add_argument('--html', action='store_false', help='defalt true. Do not save intermediate training results to [opt.dir]/[opt.name]/web/')
60 | self.parser.add_argument('--update_html_freq', type=int, default=10, help='frequency of saving training results to html')
61 | self.parser.add_argument('--display_train_freq', type=int, default=10, help='print train loss and accuracy every $train_freq batches iteration')
62 | self.parser.add_argument('--display_validate_freq', type=int, default=10, help='test validate dateset every $validate_freq batches iteration')
63 | self.parser.add_argument('--display_data_freq', type=int, default=10, help='frequency of showing training data on web browser')
64 | self.parser.add_argument('--display_image_ratio', type=float, default=1.0, help='ratio of images in a batch showing on web browser')
65 |
66 | def parse(self):
67 | opt = self.parser.parse_args()
68 |
69 | # mode
70 | if opt.mode not in ["Train", "Test", "Test-Train", "train", "test","test-train"]:
71 | raise Exception("cannot recognize flag `mode`")
72 | opt.mode = opt.mode.capitalize()
73 | if opt.mode == "Test":
74 | opt.batch_size = 1
75 | opt.shuffle = False
76 |
77 | # devices id
78 | gpu_ids = opt.gpu_ids.split(',')
79 | opt.devices = []
80 | for id in gpu_ids:
81 | if eval(id) >= 0:
82 | opt.devices.append(eval(id))
83 | # cuda
84 | opt.cuda = False
85 | if len(opt.devices) > 0 and torch.cuda.is_available():
86 | opt.cuda = True
87 |
88 |
89 | opt.top_k = eval(opt.top_k)
90 | opt.mean = eval(opt.mean)
91 | opt.std = eval(opt.std)
92 | opt.ratio = eval(opt.ratio)
93 | if opt.loss_weight == "":
94 | opt.loss_weight=None
95 | else:
96 | opt.loss_weight = torch.FloatTensor(eval(opt.loss_weight))
97 |
98 | return opt
99 |
100 | if __name__ == "__main__":
101 | op = Options()
102 | op.parse()
103 |
--------------------------------------------------------------------------------
/metadata/reformat-log.py:
--------------------------------------------------------------------------------
1 | import os,re
2 | rawlogpath = './test.log'
3 | newlogpath = './newtest.log'
4 | f = open(rawlogpath,'r')
5 | cnt = 0
6 | attr = []
7 |
8 | for line in f:
9 | if line[0:2] == '[[':
10 | tmp = []
11 | for x in re.split('\[|\n| ',line):
12 | if x!='':
13 | tmp.append(x)
14 | elif line[0:4]=='test':
15 | pass
16 | elif line[0:4]=='data':
17 | break
18 | elif line[-3:] == ']]\n':
19 | for x in re.split('\]|\n| ',line):
20 | if x!='':
21 | tmp.append(x)
22 | attr.append(tmp)
23 | else:
24 | for x in re.split('\n| ',line):
25 | if x!='':
26 | tmp.append(x)
27 | cnt+=1
28 | f.close()
29 |
30 | f = open(newlogpath,'w')
31 | for cnt in range(int(len(attr)/3)):
32 | f.write('test %s image \n'%(cnt))
33 | for x in attr[cnt*3]:
34 | f.write(x+' ')
35 | f.write('\n')
36 |
37 | for x in attr[cnt*3+1]:
38 | f.write(x)
39 | f.write('\n')
40 |
41 | for x in attr[cnt*3+2]:
42 | f.write(x)
43 | f.write('\n')
44 |
45 | print(len(attr))
46 | f.close()
47 |
--------------------------------------------------------------------------------
/metadata/util/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/util/__pycache__/html.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/html.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/util/__pycache__/util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/util.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/util/__pycache__/webvisualizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/webvisualizer.cpython-36.pyc
--------------------------------------------------------------------------------
/metadata/util/html.py:
--------------------------------------------------------------------------------
1 | import dominate
2 | from dominate.tags import *
3 | import os
4 |
5 |
6 | class HTML:
7 | def __init__(self, web_dir, title, reflesh=0):
8 | self.title = title
9 | self.web_dir = web_dir
10 | self.img_dir = os.path.join(self.web_dir, 'images')
11 | if not os.path.exists(self.web_dir):
12 | os.makedirs(self.web_dir)
13 | if not os.path.exists(self.img_dir):
14 | os.makedirs(self.img_dir)
15 | # print(self.img_dir)
16 |
17 | self.doc = dominate.document(title=title)
18 | if reflesh > 0:
19 | with self.doc.head:
20 | meta(http_equiv="reflesh", content=str(reflesh))
21 |
22 | def get_image_dir(self):
23 | return self.img_dir
24 |
25 | def add_header(self, str):
26 | with self.doc:
27 | h3(str)
28 |
29 | def add_table(self, border=1):
30 | self.t = table(border=border, style="table-layout: fixed;")
31 | self.doc.add(self.t)
32 |
33 | def add_images(self, ims, txts, links, width=400):
34 | self.add_table()
35 | with self.t:
36 | with tr():
37 | for im, txt, link in zip(ims, txts, links):
38 | with td(style="word-wrap: break-word;", halign="center", valign="top"):
39 | with p():
40 | with a(href=os.path.join('images', link)):
41 | img(style="width:%dpx" % width, src=os.path.join('images', im))
42 | br()
43 | p(txt)
44 |
45 | def save(self):
46 | html_file = '%s/index.html' % self.web_dir
47 | f = open(html_file, 'wt')
48 | f.write(self.doc.render())
49 | f.close()
50 |
51 |
52 | if __name__ == '__main__':
53 | html = HTML('web/', 'test_html')
54 | html.add_header('hello world')
55 |
56 | ims = []
57 | txts = []
58 | links = []
59 | for n in range(4):
60 | ims.append('image_%d.png' % n)
61 | txts.append('text_%d' % n)
62 | links.append('image_%d.png' % n)
63 | html.add_images(ims, txts, links)
64 | html.save()
65 |
--------------------------------------------------------------------------------
/metadata/util/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import copy
3 | import numpy as np
4 | import logging
5 | import collections
6 | from PIL import Image
7 |
8 |
9 | def tensor2im(image_tensor, mean, std, imtype=np.uint8):
10 | image_numpy = image_tensor.cpu().float().numpy()
11 | if image_numpy.shape[0] == 1:
12 | image_numpy = np.tile(image_numpy, (3, 1, 1))
13 | image_numpy = image_numpy.transpose(1, 2, 0)
14 | image_numpy *= std
15 | image_numpy += mean
16 | image_numpy *= 255.0
17 | return image_numpy.astype(imtype)
18 |
19 | def save_image(image_numpy, image_path):
20 | image_pil = Image.fromarray(image_numpy)
21 | image_pil.save(image_path)
22 |
23 | def mkdirs(paths):
24 | if isinstance(paths, list) and not isinstance(paths, str):
25 | for path in paths:
26 | mkdir(path)
27 | else:
28 | mkdir(paths)
29 |
30 | def mkdir(path):
31 | if not os.path.exists(path):
32 | os.makedirs(path)
33 |
34 | def rmdir(path):
35 | if os.path.exists(path):
36 | os.system('rm -rf ' + path)
37 |
38 | def print_loss(loss_list, label, epoch=0, batch_iter=0):
39 | if label == "Test":
40 | logging.info("[ %s Loss ] of Test Dataset:" % (label))
41 | else:
42 | logging.info("[ %s Loss ] of Epoch %d Batch %d" % (label, epoch, batch_iter))
43 |
44 | for index, loss in enumerate(loss_list):
45 | logging.info("----Attribute %d: %f" %(index, loss))
46 |
47 | def print_accuracy(accuracy_list, label, epoch=0, batch_iter=0):
48 | if label == "Test":
49 | logging.info("[ %s Accu ] of Test Dataset:" % (label))
50 | else:
51 | logging.info("[ %s Accu ] of Epoch %d Batch %d" %(label, epoch, batch_iter))
52 |
53 | for index, item in enumerate(accuracy_list):
54 | for top_k, value in item.items():
55 | logging.info("----Attribute %d Top%d: %f" %(index, top_k, value["ratio"]))
56 |
57 | def opt2file(opt, dst_file):
58 | args = vars(opt)
59 | with open(dst_file, 'wt') as opt_file:
60 | opt_file.write('------------ Options -------------\n')
61 | print('------------ Options -------------')
62 | for k, v in sorted(args.items()):
63 | opt_file.write('%s: %s\n' % (str(k), str(v)))
64 | print("%s: %s" %(str(k), str(v)))
65 | opt_file.write('-------------- End ----------------\n')
66 | print('-------------- End ----------------')
67 |
68 | def load_label(label_file):
69 | rid2name = list() # rid: real id, same as the id in label.txt
70 | id2rid = list() # id: number from 0 to len(rids)-1 corresponding to the order of rids
71 | rid2id = list()
72 | with open(label_file) as l:
73 | rid2name_dict = collections.defaultdict(str)
74 | id2rid_dict = collections.defaultdict(str)
75 | rid2id_dict = collections.defaultdict(str)
76 | new_id = 0
77 | for line in l.readlines():
78 | line = line.strip('\n\r').split(';')
79 | if len(line) == 3: # attr description
80 | if len(rid2name_dict) != 0:
81 | rid2name.append(rid2name_dict)
82 | id2rid.append(id2rid_dict)
83 | rid2id.append(rid2id_dict)
84 | rid2name_dict = collections.defaultdict(str)
85 | id2rid_dict = collections.defaultdict(str)
86 | rid2id_dict = collections.defaultdict(str)
87 | new_id = 0
88 | rid2name_dict["__name__"] = line[2]
89 | rid2name_dict["__attr_id__"] = line[1]
90 | elif len(line) == 2: # attr value description
91 | rid2name_dict[line[0]] = line[1]
92 | id2rid_dict[new_id] = line[0]
93 | rid2id_dict[line[0]] = new_id
94 | new_id += 1
95 | if len(rid2name_dict) != 0:
96 | rid2name.append(rid2name_dict)
97 | id2rid.append(id2rid_dict)
98 | rid2id.append(rid2id_dict)
99 | return rid2name, id2rid, rid2id
100 |
101 |
--------------------------------------------------------------------------------
/metadata/util/webvisualizer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import collections
4 | import numpy as np
5 | from . import util
6 | from . import html
7 |
8 |
9 | class WebVisualizer():
10 | def __init__(self, opt):
11 | self.opt = opt
12 | self.display_id = opt.display_id
13 | self.win_size = opt.display_winsize
14 | self.use_html = (opt.html and (opt.mode == "Train"))
15 | self.name = opt.name
16 | self.saved = False
17 | self.type2id = {"Loss":0, "Accuracy": 1, "Other": 2}
18 | self.phase2id = {"Train": 0, "Validate": 1, "Test": 2}
19 |
20 | def ManualType():
21 | return collections.defaultdict(list)
22 | # store all the points for regular backup
23 | self.plot_data = collections.defaultdict(ManualType)
24 | # line window info
25 | self.win_info = collections.defaultdict(ManualType)
26 | if self.display_id > 0:
27 | import visdom
28 | self.vis = visdom.Visdom(port=opt.display_port)
29 |
30 | if self.use_html:
31 | self.web_dir = os.path.join(opt.model_dir, "web")
32 | self.img_dir = os.path.join(opt.model_dir, "image")
33 | print("Create web directory %s ..." %(self.web_dir))
34 | util.mkdirs([self.web_dir, self.img_dir])
35 |
36 |
37 | def reset(self):
38 | self.saved = False
39 |
40 | """
41 | type: [Accuracy | Loss | Other]
42 | phase: [Train | Validate | Test]
43 | """
44 | def plot_points(self, x, y, data_type, phase):
45 | line_name = data_type + "@" + phase
46 | self.plot_data[data_type][phase].append((x,y))
47 | # draw ininial line objects if not initialized
48 | if len(self.win_info[data_type][phase]) == 0:
49 | for index in range(len(y)):
50 | win_id = self.type2id[data_type]*len(y) + index
51 | win = self.vis.line(X=np.array([0]),
52 | Y=np.array([0]),
53 | opts=dict(
54 | title=data_type + " of Attribute " + str(index) + " Over Time",
55 | xlabel="epoch",
56 | ylabel=data_type,
57 | showlegend=True,
58 | width=900,
59 | height=450),
60 | win=win_id,
61 | name=line_name)
62 | self.win_info[data_type][phase].append(win)
63 |
64 | for index, value in enumerate(y):
65 | win_id = self.win_info[data_type][phase][index]
66 | self.vis.line(X=np.array([x]),
67 | Y=np.array([value]),
68 | win=win_id,
69 | name=line_name,
70 | update="append")
71 |
72 | def plot_images(self, image_dict, start_display_id, epoch, save_result):
73 | if self.display_id > 0: # show images in the browser
74 | ncols = self.opt.image_ncols
75 | if ncols > 0:
76 | h, w = next(iter(image_dict.values())).shape[:2]
77 | table_css = """""" % (w, h)
81 | title = self.name
82 | label_html = ''
83 | label_html_row = ''
84 | nrows = int(np.ceil(len(image_dict.items()) / ncols))
85 | images = []
86 | idx = 0
87 | for label, image_numpy in image_dict.items():
88 | label_html_row += '%s | ' % label
89 | images.append(image_numpy.transpose([2, 0, 1]))
90 | idx += 1
91 | if idx % ncols == 0:
92 | label_html += '%s
' % label_html_row
93 | label_html_row = ''
94 | white_image = np.ones_like(image_numpy.transpose([2, 0, 1])) * 255
95 | while idx % ncols != 0:
96 | images.append(white_image)
97 | label_html_row += ' | '
98 | idx += 1
99 | if label_html_row != '':
100 | label_html += '%s
' % label_html_row
101 | # pane col = image row
102 | self.vis.images(images, nrow=ncols, win=start_display_id + 1,
103 | padding=2, opts=dict(title=title + ' images'))
104 | label_html = '' % label_html
105 | self.vis.text(table_css + label_html, win=start_display_id + 2,
106 | opts=dict(title=title + ' labels'))
107 | else:
108 | idx = 1
109 | for label, image_numpy in image_dict.items():
110 | self.vis.image(image_numpy.transpose([2, 0, 1]), opts=dict(title=label),
111 | win=start_display_id + idx)
112 | idx += 1
113 |
114 | if self.use_html and (save_result or not self.saved): # save images to a html file
115 | self.saved = True
116 | for label, image_numpy in image_dict.items():
117 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
118 | util.save_image(image_numpy, img_path)
119 | # update website
120 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
121 | for n in range(epoch, 0, -1):
122 | webpage.add_header('epoch [%d]' % n)
123 | ims = []
124 | txts = []
125 | links = []
126 |
127 | for label, image_numpy in image_dict.items():
128 | img_path = 'epoch%.3d_%s.png' % (n, label)
129 | ims.append(img_path)
130 | txts.append(label)
131 | links.append(img_path)
132 | webpage.add_images(ims, txts, links, width=self.win_size)
133 | webpage.save()
134 |
135 | def backup(self, name):
136 | pass
137 |
138 | def test(self):
139 | pass
140 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file may be used to create an environment using:
2 | # $ conda create --name --file
3 | # platform: linux-64
4 | _libgcc_mutex=0.1=main
5 | backports=1.0=py_2
6 | backports.functools_lru_cache=1.6.1=py_0
7 | backports_abc=0.5=py_1
8 | blas=1.0=mkl
9 | bzip2=1.0.8=h7b6447c_0
10 | ca-certificates=2020.7.22=0
11 | certifi=2019.11.28=py27_0
12 | cffi=1.14.0=py27he30daa8_1
13 | cuda90=1.0=h6433d27_0
14 | cycler=0.10.0=py27_0
15 | dbus=1.13.16=hb2f20db_0
16 | expat=2.2.9=he6710b0_2
17 | ffmpeg=4.3.1=h167e202_0
18 | fontconfig=2.13.0=h9420a91_0
19 | freetype=2.10.2=h5ab3b9f_0
20 | functools32=3.2.3.2=py27_1
21 | futures=3.3.0=py27_0
22 | glib=2.65.0=h3eb4bd4_0
23 | gmp=6.2.0=he1b5a44_2
24 | gnutls=3.6.13=h79a8f9a_0
25 | gst-plugins-base=1.14.0=hbbd80ab_1
26 | gstreamer=1.14.0=hb31296c_0
27 | icu=58.2=he6710b0_3
28 | intel-openmp=2020.2=254
29 | jpeg=9b=h024ee3a_2
30 | kiwisolver=1.1.0=py27he6710b0_0
31 | lame=3.100=h7b6447c_0
32 | libedit=3.1.20191231=h14c3975_1
33 | libffi=3.3=he6710b0_2
34 | libgcc-ng=9.1.0=hdf63c60_0
35 | libgfortran-ng=7.3.0=hdf63c60_0
36 | libiconv=1.15=h63c8f33_5
37 | libpng=1.6.37=hbc83047_0
38 | libstdcxx-ng=9.1.0=hdf63c60_0
39 | libtiff=4.1.0=h2733197_1
40 | libuuid=1.0.3=h1bed415_2
41 | libxcb=1.14=h7b6447c_0
42 | libxml2=2.9.10=he19cac6_1
43 | lz4-c=1.9.2=he6710b0_1
44 | matplotlib=2.2.3=py27hb69df0a_0
45 | mkl=2020.2=256
46 | mkl-service=2.3.0=py27he904b0f_0
47 | mkl_fft=1.0.15=py27ha843d7b_0
48 | mkl_random=1.1.0=py27hd6b4f25_0
49 | ncurses=6.2=he6710b0_1
50 | nettle=3.4.1=hbb512f6_0
51 | numpy=1.16.6=py27hbc911f0_0
52 | numpy-base=1.16.6=py27hde5b4d6_0
53 | olefile=0.46=py27_0
54 | opencv=2.4.11=nppy27_0
55 | openh264=2.1.1=h8b12597_0
56 | openssl=1.1.1g=h7b6447c_0
57 | pcre=8.44=he6710b0_0
58 | pillow=6.2.1=py27h34e0f95_0
59 | pip=19.3.1=py27_0
60 | pycparser=2.20=py_2
61 | pyparsing=2.4.7=py_0
62 | pyqt=5.9.2=py27h05f1152_2
63 | python=2.7.18=h15b4118_1
64 | python-dateutil=2.8.1=py_0
65 | pytorch=0.3.1=py27_cuda9.0.176_cudnn7.0.5_2
66 | pytz=2020.1=py_0
67 | qt=5.9.7=h5867ecd_1
68 | readline=8.0=h7b6447c_0
69 | scikit-learn=0.20.3=py27hd81dba3_0
70 | scipy=1.2.1=py27h7c811a0_0
71 | setuptools=44.0.0=py27_0
72 | singledispatch=3.4.0.3=py27_0
73 | sip=4.19.8=py27hf484d3e_0
74 | six=1.15.0=py_0
75 | sqlite=3.33.0=h62c20be_0
76 | subprocess32=3.5.4=py27h7b6447c_0
77 | tk=8.6.10=hbc83047_0
78 | torchvision=0.2.0=py27hfb27419_1
79 | tornado=5.1.1=py27h7b6447c_0
80 | wheel=0.35.1=py_0
81 | x264=1!152.20180806=h7b6447c_0
82 | xz=5.2.5=h7b6447c_0
83 | zlib=1.2.11=h7b6447c_3
84 | zstd=1.4.5=h9ceee32_0
85 |
--------------------------------------------------------------------------------
/vehicle_keypoints/README.md:
--------------------------------------------------------------------------------
1 | This code is to estimate the viewpoint estimation. You need to change the data paths before running.
2 |
--------------------------------------------------------------------------------