├── .gitignore ├── CarKeypoints ├── README.md ├── assets │ └── carkeypoints.png ├── inference-imageset.lua ├── inference.lua ├── valeval.lua └── valid.txt ├── LICENSE ├── README.md ├── Video-Person-ReID ├── Graph_ModelDataGen.py ├── Graph_data_manager.py ├── Graph_video_loader.py ├── README.md ├── bases.py ├── data_manager.py ├── data_util │ ├── convert_metadata_imglistprob.py │ ├── create_feature_files.py │ ├── create_metadata_files.py │ ├── xml_reader_testdata.py │ └── xml_reader_traindata.py ├── eval_metrics.py ├── iotools.py ├── losses.py ├── main_video_person_reid.py ├── models │ ├── ResNet.py │ ├── __init__.py │ └── resnet3d.py ├── re_ranking_metadata.py ├── reidtools.py ├── samplers.py ├── transforms.py ├── utils.py ├── video2img │ ├── crop_img.py │ ├── crop_img_big.py │ └── txt_GPS_new │ │ ├── c006.txt │ │ ├── c007.txt │ │ ├── c008.txt │ │ ├── c009.txt │ │ ├── c010.txt │ │ ├── c016.txt │ │ ├── c017.txt │ │ ├── c018.txt │ │ ├── c019.txt │ │ ├── c020.txt │ │ ├── c021.txt │ │ ├── c022.txt │ │ ├── c023.txt │ │ ├── c024.txt │ │ ├── c025.txt │ │ ├── c026.txt │ │ ├── c027.txt │ │ ├── c028.txt │ │ ├── c029.txt │ │ ├── c033.txt │ │ ├── c034.txt │ │ ├── c035.txt │ │ └── c036.txt └── video_loader.py ├── metadata ├── README.md ├── aic │ └── trainer_metadata │ │ └── Data │ │ └── TestSet │ │ └── data.txt ├── data.txt ├── data │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── dataset.cpython-36.pyc │ │ ├── loader.cpython-36.pyc │ │ └── transformer.cpython-36.pyc │ ├── dataset.py │ ├── loader.py │ └── transformer.py ├── deploy.py ├── label.txt ├── models │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── alexnet.cpython-36.pyc │ │ ├── build_model.cpython-36.pyc │ │ ├── lightcnn.cpython-36.pyc │ │ ├── model.cpython-36.pyc │ │ ├── resnet.cpython-36.pyc │ │ └── vgg.cpython-36.pyc │ ├── alexnet.py │ ├── build_model.py │ ├── lightcnn.py │ ├── model.py │ ├── resnet.py │ └── vgg.py ├── multi_label_classifier.py ├── options │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── options.cpython-36.pyc │ └── options.py ├── reformat-log.py ├── testdata.txt └── util │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── html.cpython-36.pyc │ ├── util.cpython-36.pyc │ └── webvisualizer.cpython-36.pyc │ ├── html.py │ ├── util.py │ └── webvisualizer.py ├── requirements.txt └── vehicle_keypoints ├── README.md └── carkeypoint_train.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | 4 | .idea/ 5 | -------------------------------------------------------------------------------- /CarKeypoints/README.md: -------------------------------------------------------------------------------- 1 | The vehicle keypoints code is based on krrish94's CarKeypoints \[[code](https://github.com/krrish94/CarKeypoints)\]. 2 | 3 | # CarKeypoints 4 | 5 | This repository contains inference code for using a modified [stacked-hourglass](https://github.com/krrish94/stacked-hourglass) to detect semantic keypoints on cars. 6 | 7 | The network outputs a likelihood of keypoint presence over every pixel of an input image (the input image is a 64 x 64 car bounding box). 8 | 9 | Here is a 3D wireframe with reference keypoints. 10 |

11 | 12 |

13 | 14 | ## Setup 15 | 16 | This code assumes you have the following packages installed. 17 | * [Torch7](https://github.com/torch/torch7) 18 | * Torch packages: `nn`, `cunn`, `cudnn`, `image`, `nngraph` 19 | 20 | 21 | ## Downloading the pre-trained model 22 | 23 | Download the pre-trained model [here](https://www.dropbox.com/s/qezt3e02j4uawov/model.t7?dl=0). 24 | 25 | 26 | ## Running the inference code 27 | 28 | To perform inference on a set of images, first edit `valid.txt` and add paths to the images you need to run inference on. **These images must only contain cropped car bounding boxes** (i.e., from any image that contains a car, pick only one car bounding box and crop the region of the image contained within that bounding box). These are the only kind of images the model has been trained on. 29 | 30 | Then, run the inference script. 31 | ``` 32 | inference.lua 33 | ``` 34 | 35 | This will write a `results.txt` file (you can edit the name and path of this output file in `inference.lua`). 36 | 37 | ## Running inference for AIC19 38 | 39 | To run inference on multiple images, put image paths in a text file (e.g. `fullpath_train.txt`). Then, run the inference script. 40 | ``` 41 | inference-imageset.lua 42 | ``` 43 | 44 | The result will be saved in `keypoint-train.txt`. Example results can be downloaded [here](https://drive.google.com/open?id=1m96n_1gsHy3iI9ruRGDGqaVXqjJgVcKf). 45 | -------------------------------------------------------------------------------- /CarKeypoints/assets/carkeypoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/CarKeypoints/assets/carkeypoints.png -------------------------------------------------------------------------------- /CarKeypoints/inference-imageset.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'cunn' 3 | require 'cudnn' 4 | require 'image' 5 | require 'nngraph' 6 | require 'valeval.lua' 7 | 8 | imageset = 'train' 9 | -- imageset = 'query' 10 | -- imageset = 'test' 11 | 12 | -- File to read image paths from 13 | data_file = '/home/ipl/twhuang/aic19/aic19-track2-reid/fullpath_%s.txt'%imageset 14 | -- Pretrained model weights 15 | model_file = '/home/ipl/twhuang/CarKeypoints/model.t7' 16 | -- This file is where results get written to 17 | results_file = '/home/ipl/twhuang/CarKeypoints/keypoint-%s.txt'%imageset 18 | 19 | 20 | function get_predictions(heat_maps) 21 | assert(heat_maps:size():size() == 4, 'Input must be 4-D tensor') 22 | 23 | local elem, idx = torch.max(heat_maps:view(heat_maps:size(1), heat_maps:size(2), heat_maps:size(3)*heat_maps:size(4)), 3) 24 | local preds = torch.repeatTensor(idx, 1, 1, 2):float() 25 | 26 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % heat_maps:size(4) + 1 end) 27 | preds[{{}, {}, 2}]:add(-1):div(heat_maps:size(3)):floor():add(1) 28 | 29 | return preds 30 | end 31 | 32 | function post_process(output, output_res) 33 | local preds = get_predictions(output) 34 | local scores = torch.zeros(preds:size(1), preds:size(2), 1) 35 | 36 | for i=1,preds:size(1) do 37 | for j=1,preds:size(2) do 38 | local heat_map = output[i][j] 39 | local pred_x, pred_y = preds[i][j][1], preds[i][j][2] 40 | 41 | scores[i][j] = heat_map[pred_x][pred_y] 42 | if pred_x > 1 and pred_x < output_res and pred_y > 1 and pred_y < output_res then 43 | local diff = torch.Tensor({heat_map[pred_y][pred_x+1]-heat_map[pred_y][pred_x-1], heat_map[pred_y+1][pred_x]-heat_map[pred_y-1][pred_x]}) 44 | preds[i][j]:add(diff:sign():mul(.25)) 45 | end 46 | end 47 | end 48 | preds:add(0.5) 49 | 50 | return preds:cat(preds, 3):cat(scores, 3) 51 | end 52 | 53 | function accuracy(output,label) 54 | if type(output) == 'table' then 55 | return heatmapAccuracy(output[#output],label[#output],nil,dataset.accIdxs) 56 | else 57 | return heatmapAccuracy(output,label,nil,dataset.accIdxs) 58 | end 59 | end 60 | 61 | torch.setdefaulttensortype('torch.FloatTensor') 62 | 63 | num_stacks = 2 64 | num_keypoints = 36 65 | output_res = 64 66 | pred_dims = {num_keypoints, 5} 67 | input_dims = {3, 64, 64} 68 | 69 | output_dims = {} 70 | for i=1,num_stacks do 71 | output_dims[i] = {num_keypoints, 64, 64} 72 | end 73 | 74 | num_images = 0 75 | for line in io.lines(data_file) do 76 | num_images = num_images + 1 77 | end 78 | 79 | nn.DataParallelTable.deserializeNGPUs = 1 80 | model = torch.load(model_file) 81 | model:cuda() 82 | model = model:get(1) 83 | print('\nModel Loading Done') 84 | 85 | iters = 0 86 | preds = {} 87 | 88 | local f = io.open(results_file, 'w') 89 | 90 | for line in io.lines(data_file) do 91 | iters = iters + 1 92 | 93 | img_path = string.sub(line, 1, #line-1) 94 | img = torch.FloatTensor(image.load(img_path)) 95 | img = image.scale(img, 64, 64) 96 | 97 | input = torch.FloatTensor(1, 3, 64, 64) 98 | input[1] = img 99 | 100 | output = model:forward(input:cuda()) 101 | if type(output) == 'table' then 102 | output = output[#output] 103 | end 104 | 105 | keypoints = post_process(output, output_res) 106 | coords = keypoints[1]:sub(1,36,3,5) 107 | table.insert(preds, keypoints[1]) 108 | str = '' 109 | for i=1,coords:size(1) do 110 | for j=1,coords:size(2) do 111 | str = str .. tostring(coords[i][j]) .. ' ' 112 | end 113 | end 114 | 115 | str = string.sub(str, 1, #str-1) 116 | str = str .. '\n' 117 | 118 | print('Done ' .. line) 119 | f:write(str) 120 | end 121 | f:close() 122 | -------------------------------------------------------------------------------- /CarKeypoints/inference.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'cunn' 3 | require 'cudnn' 4 | require 'image' 5 | require 'nngraph' 6 | require 'valeval.lua' 7 | 8 | 9 | -- File to read image paths from 10 | data_file = '/home/ipl/twhuang/aic19/aic19-track2-reid/fullpath_query.txt' 11 | -- Pretrained model weights 12 | model_file = '/home/ipl/twhuang/CarKeypoints/model.t7' 13 | -- This file is where results get written to 14 | results_file = '/home/ipl/twhuang/CarKeypoints/results-query.txt' 15 | 16 | 17 | function get_predictions(heat_maps) 18 | assert(heat_maps:size():size() == 4, 'Input must be 4-D tensor') 19 | 20 | local elem, idx = torch.max(heat_maps:view(heat_maps:size(1), heat_maps:size(2), heat_maps:size(3)*heat_maps:size(4)), 3) 21 | local preds = torch.repeatTensor(idx, 1, 1, 2):float() 22 | 23 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % heat_maps:size(4) + 1 end) 24 | preds[{{}, {}, 2}]:add(-1):div(heat_maps:size(3)):floor():add(1) 25 | 26 | return preds 27 | end 28 | 29 | function post_process(output, output_res) 30 | local preds = get_predictions(output) 31 | local scores = torch.zeros(preds:size(1), preds:size(2), 1) 32 | 33 | for i=1,preds:size(1) do 34 | for j=1,preds:size(2) do 35 | local heat_map = output[i][j] 36 | local pred_x, pred_y = preds[i][j][1], preds[i][j][2] 37 | 38 | scores[i][j] = heat_map[pred_x][pred_y] 39 | if pred_x > 1 and pred_x < output_res and pred_y > 1 and pred_y < output_res then 40 | local diff = torch.Tensor({heat_map[pred_y][pred_x+1]-heat_map[pred_y][pred_x-1], heat_map[pred_y+1][pred_x]-heat_map[pred_y-1][pred_x]}) 41 | preds[i][j]:add(diff:sign():mul(.25)) 42 | end 43 | end 44 | end 45 | preds:add(0.5) 46 | 47 | return preds:cat(preds, 3):cat(scores, 3) 48 | end 49 | 50 | function accuracy(output,label) 51 | if type(output) == 'table' then 52 | return heatmapAccuracy(output[#output],label[#output],nil,dataset.accIdxs) 53 | else 54 | return heatmapAccuracy(output,label,nil,dataset.accIdxs) 55 | end 56 | end 57 | 58 | torch.setdefaulttensortype('torch.FloatTensor') 59 | 60 | num_stacks = 2 61 | num_keypoints = 36 62 | output_res = 64 63 | pred_dims = {num_keypoints, 5} 64 | input_dims = {3, 64, 64} 65 | 66 | output_dims = {} 67 | for i=1,num_stacks do 68 | output_dims[i] = {num_keypoints, 64, 64} 69 | end 70 | 71 | num_images = 0 72 | for line in io.lines(data_file) do 73 | num_images = num_images + 1 74 | end 75 | 76 | nn.DataParallelTable.deserializeNGPUs = 1 77 | model = torch.load(model_file) 78 | model:cuda() 79 | model = model:get(1) 80 | print('\nModel Loading Done') 81 | 82 | iters = 0 83 | preds = {} 84 | 85 | local f = io.open(results_file, 'w') 86 | 87 | for line in io.lines(data_file) do 88 | iters = iters + 1 89 | 90 | img_path = string.sub(line, 1, #line-1) 91 | img = torch.FloatTensor(image.load(img_path)) 92 | img = image.scale(img, 64, 64) 93 | 94 | input = torch.FloatTensor(1, 3, 64, 64) 95 | input[1] = img 96 | 97 | output = model:forward(input:cuda()) 98 | if type(output) == 'table' then 99 | output = output[#output] 100 | end 101 | 102 | keypoints = post_process(output, output_res) 103 | coords = keypoints[1]:sub(1,36,3,5) 104 | table.insert(preds, keypoints[1]) 105 | str = '' 106 | for i=1,coords:size(1) do 107 | for j=1,coords:size(2) do 108 | str = str .. tostring(coords[i][j]) .. ' ' 109 | end 110 | end 111 | 112 | str = string.sub(str, 1, #str-1) 113 | str = str .. '\n' 114 | 115 | print('Done ' .. line) 116 | f:write(str) 117 | end 118 | f:close() 119 | -------------------------------------------------------------------------------- /CarKeypoints/valeval.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | -- Helpful functions for evaluation 3 | ------------------------------------------------------------------------------- 4 | 5 | -- Load predictions from hdf5 file 6 | -- predFile: name to the .h5 file containing the predictions 7 | -- doHm: read the 'heatmaps' field of the hdf5 database 8 | -- doInp: read the 'input' field of the hdf5 database 9 | function loadPreds(predFile, doHm, doInp) 10 | local f = hdf5.open(projectDir .. '/exp/' .. predFile .. '.h5','r') 11 | local inp,hms 12 | local idxs = f:read('idxs'):all() 13 | local preds = f:read('preds'):all() 14 | if doHm then hms = f:read('heatmaps'):all() end 15 | if doInp then inp = f:read('input'):all() end 16 | return idxs, preds, hms, inp 17 | end 18 | 19 | 20 | -- Calculate the distance between the predicted keypoints and the label (ground-truth keypoints) 21 | -- ??? (Check if function signatures (dims) are correctly understood) 22 | -- Inputs 23 | -- preds: N-by-2 tensor of predicted coordinates 24 | -- label: N-by-2 tensor of predicted coordinates 25 | -- normalize: N-by-1 tensor of normalizing factors 26 | -- Output 27 | -- dists: 2-by-N tensor of computed distances between the predictions and the labels 28 | function calcDists(preds, label, normalize) 29 | -- Initialize a tensor to hold the distances 30 | local dists = torch.Tensor(preds:size(2), preds:size(1)) 31 | -- Initialize a 2-by-1 tensor to hold the difference between a label and a prediction 32 | local diff = torch.Tensor(2) 33 | -- ??? 34 | -- For each keypoint predicted 35 | for i = 1,preds:size(1) do 36 | -- ??? 37 | -- For each dimension of the predicted keypoint (x, y) 38 | for j = 1,preds:size(2) do 39 | -- If that keypoint is visible in the image, compute the distance 40 | if label[i][j][1] > 1 and label[i][j][2] > 1 then 41 | dists[j][i] = torch.dist(label[i][j],preds[i][j])/normalize[i] 42 | -- If that keypoint is not visible, let the distance be -1 43 | else 44 | dists[j][i] = -1 45 | end 46 | end 47 | end 48 | -- Return the distance 49 | return dists 50 | end 51 | 52 | 53 | -- Recover predictions from a heatmap 54 | -- Input 55 | -- hm: heatmap (a 4-D tensor) 56 | -- Output 57 | -- preds: N-by-2 tensor of predicted keypoint locations obtained from maxima on the heatmap 58 | function getPreds(hm) 59 | -- ??? (assert the following statement) 60 | -- I'm assuming the 4 heatmap dimensions are for [num images] x [num kps per image] x [height] x [width] 61 | 62 | assert(hm:size():size() == 4, 'Input must be 4-D tensor') 63 | -- Reshape the heatmap so that [height] and [width] are flattened out to a single dimension 64 | -- Get the maxima over the third dimension (comprising of the [height * width] flattened values) 65 | local max, idx = torch.max(hm:view(hm:size(1), hm:size(2), hm:size(3) * hm:size(4)), 3) 66 | -- Allocate memory for a tensor to hold X,Y coordinates of maxima locations 67 | local preds = torch.repeatTensor(idx, 1, 1, 2):float() 68 | -- Obtain the X coordinate of each maxima 69 | preds[{{}, {}, 1}]:apply(function(x) return (x - 1) % hm:size(4) + 1 end) 70 | -- Obtain the Y coordinate of each maxima 71 | preds[{{}, {}, 2}]:add(-1):div(hm:size(3)):floor():add(1) 72 | -- Return the predicted locations 73 | --print(preds:size()) 74 | 75 | return preds 76 | end 77 | 78 | 79 | -- ??? 80 | -- Inputs 81 | -- dists: N-by-2 tensor of distances (between predictions and ground-truth) 82 | -- thr: threshold distance below which a detection is to be considered accurate 83 | -- Output 84 | -- percentage of keypoints that lie within the specified threshold (or -1 if no keypoint is visible) 85 | function distAccuracy(dists, thr) 86 | -- Return percentage below threshold while ignoring values with a -1 87 | if not thr then thr = .5 end 88 | -- Ignore distances that are -1 (since those keypoints are not visible in the image) 89 | if torch.ne(dists,-1):sum() > 0 then 90 | -- For all other keypoints, compute the percentage of keypoints that satisfy the distance threshold 91 | return dists:le(thr):eq(dists:ne(-1)):sum() / dists:ne(-1):sum() 92 | else 93 | return -1 94 | end 95 | end 96 | 97 | 98 | -- Calculate accuracy according to the PCK (Percentage of Correct Keypoints) metric, but use the 99 | -- ground-truth heatmap, rather than ground-truth X,Y locations 100 | -- Inputs 101 | -- output: output heatmap (from the hourglass network) 102 | -- label: ground-truth heatmap (??? confirm) 103 | -- thr: threshold distance below which a detection is considered correct 104 | -- idxs: average accuracy across 'idxs' is also returned by this function 105 | -- Output 106 | -- multiple values (each is an accuracy). The first value to be returned is the average accuracy 107 | -- across 'idxs'. This is followed by accuracies for individual keypoints. 108 | function heatmapAccuracy(output, label, thr, idxs) 109 | -- Compute predictions from the output heatmap (from the hourglass network) 110 | local preds = getPreds(output) 111 | -- Get predictions from the label (ground-truth) 112 | local gt = getPreds(label) 113 | -- Calculate the distance between the predictions and the labels 114 | -- The third argument here is the normalizing factor to be applied at each heatmap location 115 | -- ??? (find out what the /10 is for) 116 | local dists = calcDists(preds, gt, torch.ones(preds:size(1))*opt.outputRes/10) 117 | 118 | -- Table to store accuracies 119 | local acc = {} 120 | -- Variable to store the average accuracy (over specific keypoints as prescribed by 'idxs') 121 | local avgAcc = 0.0 122 | -- Number of indices that are bad (inaccurate) 123 | local badIdxCount = 0 124 | 125 | -- If average accuracy over 'idxs' is not specified 126 | if not idxs then 127 | -- Then compute it over all keypoint indices 128 | for i = 1,dists:size(1) do 129 | -- Compute the percentage of keypoints that are correct 130 | acc[i+1] = distAccuracy(dists[i]) 131 | -- If at least one keypoint is correct, add it to the average accuracy 132 | if acc[i+1] >= 0 then avgAcc = avgAcc + acc[i+1] 133 | -- Otherwise, exclude it 134 | else badIdxCount = badIdxCount + 1 end 135 | end 136 | -- Compute the average accuracy for all keypoint indices 137 | -- In this evaluation, we consider only those images where at least one keypoint is accurately 138 | -- predicted. 139 | acc[1] = avgAcc / (dists:size(1) - badIdxCount) 140 | -- Compute average accuracy only over specified 'idxs' 141 | else 142 | for i = 1,#idxs do 143 | acc[i+1] = distAccuracy(dists[idxs[i]]) 144 | if acc[i+1] >= 0 then avgAcc = avgAcc + acc[i+1] 145 | else badIdxCount = badIdxCount + 1 end 146 | end 147 | acc[1] = avgAcc / (#idxs - badIdxCount) 148 | end 149 | -- Return the accuracies 150 | return unpack(acc) 151 | end 152 | 153 | 154 | -- ??? 155 | -- Calculate basic accuracy 156 | -- Inputs 157 | -- output: output coordinates (??? heatmap or coordinates) 158 | -- label: ground-truth keypoint coordinates 159 | -- thr: threshold 160 | -- Output 161 | -- Percentage of correct keypoints 162 | function basicAccuracy(output, label, thr) 163 | -- Default threshold of .5 164 | if not thr then thr = .5 end 165 | -- Flatten both the output and the label 166 | output = output:view(output:numel()) 167 | label = label:view(label:numel()) 168 | -- Check if the prediction is within the threshold of the label 169 | local rounded_output = torch.ceil(output - thr):typeAs(label) 170 | local eql = torch.eq(label,rounded_output):typeAs(label) 171 | -- Return PCK 172 | return eql:sum()/output:numel() 173 | end 174 | 175 | 176 | -- ??? 177 | -- Generate standard PCK plot 178 | function displayPCK(dists, part_idx, label, title, show_key) 179 | 180 | if not (type(part_idx) == 'table') then 181 | part_idx = {part_idx} 182 | end 183 | 184 | curve_res = 11 185 | num_curves = #dists 186 | local t = torch.linspace(0,.5,curve_res) 187 | local pdj_scores = torch.zeros(num_curves, curve_res) 188 | local plot_args = {} 189 | print(title) 190 | for curve = 1,num_curves do 191 | for i = 1,curve_res do 192 | t[i] = (i-1)*.05 193 | local acc = 0.0 194 | for j = 1,#part_idx do 195 | acc = acc + distAccuracy(dists[curve][part_idx[j]], t[i]) 196 | end 197 | pdj_scores[curve][i] = acc / #part_idx 198 | end 199 | plot_args[curve] = {label[curve],t,pdj_scores[curve],'-'} 200 | print(label[curve],pdj_scores[curve][curve_res]) 201 | end 202 | 203 | require 'gnuplot' 204 | gnuplot.raw('set title "' .. title .. '"') 205 | if not show_key then gnuplot.raw('unset key') 206 | else gnuplot.raw('set key font ",6" right bottom') end 207 | gnuplot.raw('set xrange [0:.5]') 208 | gnuplot.raw('set yrange [0:1]') 209 | gnuplot.plot(unpack(plot_args)) 210 | end 211 | -------------------------------------------------------------------------------- /CarKeypoints/valid.txt: -------------------------------------------------------------------------------- 1 | /home/ipl/haotian/CarKeypoints/123/2.jpg 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Information Processing Lab, University of Washington 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2019-CVPR-AIC-Track-2-UWIPL 2 | Repository for 2019 CVPR AI City Challenge Track 2 from IPL @University of Washington. 3 | Our method ranks 2nd in the competition. 4 | 5 | ## Code structure 6 | Our code consists of the following three components: 7 | 8 | ### 1. Video-Person-ReID 9 | The multi-view and metadata re-ranking vehicle reidentification model. The code is based on Jiyang Gao's Video-Person-ReID \[[code](https://github.com/jiyanggao/Video-Person-ReID)\]. 10 | 11 | ### 2. Metadata 12 | Metadata model for vehicle's type, brand and color. The code is based on \[[code](https://github.com/pangwong/pytorch-multi-label-classifier)\]. 13 | 14 | ### 3. CarKeypoints 15 | The vehicle keypoints code is based on krrish94's CarKeypoints \[[code](https://github.com/krrish94/CarKeypoints)\]. 16 | 17 | ## Training 18 | Training of both Video-Person-ReID and metadata requires CarKeypoints's inference result on training set. For CarKeypoints, we use the pre-trained model \[[model](https://github.com/krrish94/CarKeypoints)\]. Please refer to the README.md files in each subfolder. 19 | 20 | ## Testing 21 | Testing of both Video-Person-ReID and metadata requires CarKeypoints's inference result on testing set. In addition, Video-Person-ReID needs metadata's inference result on testing set. 22 | -------------------------------------------------------------------------------- /Video-Person-ReID/Graph_ModelDataGen.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os 3 | import sys 4 | import time 5 | import datetime 6 | import argparse 7 | import os.path as osp 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.backends.cudnn as cudnn 13 | from torch.utils.data import DataLoader 14 | from torch.autograd import Variable 15 | from torch.optim import lr_scheduler 16 | 17 | import Graph_data_manager 18 | from Graph_video_loader import VideoDataset 19 | import transforms as T 20 | import models 21 | from models import resnet3d 22 | from losses import CrossEntropyLabelSmooth, TripletLoss 23 | from utils import AverageMeter, Logger, save_checkpoint 24 | from eval_metrics import evaluate 25 | from samplers import RandomIdentitySampler 26 | from reidtools import visualize_ranked_results # TH 27 | 28 | 29 | 30 | 31 | def testseq(dataset_name, use_gpu): 32 | 33 | dataset_root = './video2img/track1_sct_img_test_big/' 34 | dataset = Graph_data_manager.AICityTrack2(root=dataset_root) 35 | 36 | 37 | width = 224 38 | height = 224 39 | transform_train = T.Compose([ 40 | T.Random2DTranslation(height, width), 41 | T.RandomHorizontalFlip(), 42 | T.ToTensor(), 43 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 44 | ]) 45 | 46 | transform_test = T.Compose([ 47 | T.Resize((height, width)), 48 | T.ToTensor(), 49 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 50 | ]) 51 | 52 | pin_memory = True if use_gpu else False 53 | seq_len = 4 54 | num_instance = 4 55 | train_batch = 32 56 | test_batch = 1 57 | 58 | queryloader = DataLoader( 59 | VideoDataset(dataset.query, seq_len=seq_len, sample='dense', transform=transform_test), 60 | batch_size=test_batch, shuffle=False, num_workers=4, 61 | pin_memory=pin_memory, drop_last=False, 62 | ) 63 | 64 | arch = "resnet50ta" 65 | pretrained_model = "./log/track12_ta224_checkpoint_ep500.pth.tar" 66 | 67 | 68 | start_epoch = 0 69 | print("Initializing model: {}".format(arch)) 70 | dataset.num_train_pids = 517 71 | if arch=='resnet503d': 72 | model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=width, sample_height=height, sample_duration=seq_len) 73 | if not os.path.exists(pretrained_model): 74 | raise IOError("Can't find pretrained model: {}".format(pretrained_model)) 75 | print("Loading checkpoint from '{}'".format(pretrained_model)) 76 | checkpoint = torch.load(pretrained_model) 77 | state_dict = {} 78 | for key in checkpoint['state_dict']: 79 | if 'fc' in key: continue 80 | state_dict[key.partition("module.")[2]] = checkpoint['state_dict'][key] 81 | model.load_state_dict(state_dict, strict=False) 82 | else: 83 | if not os.path.exists(pretrained_model): 84 | model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) 85 | else: 86 | model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) 87 | checkpoint = torch.load(pretrained_model) 88 | model.load_state_dict(checkpoint['state_dict']) 89 | start_epoch = checkpoint['epoch'] + 1 90 | print("Loaded checkpoint from '{}'".format(pretrained_model)) 91 | print("- start_epoch: {}\n- rank1: {}".format(start_epoch, checkpoint['rank1'])) 92 | 93 | print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) 94 | 95 | criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu) 96 | criterion_htri = TripletLoss(margin=0.3) 97 | 98 | lr = 0.0003 99 | gamma = 0.1 100 | stepsize = 200 101 | weight_decay = 5e-04 102 | 103 | optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) 104 | if stepsize > 0: 105 | scheduler = lr_scheduler.StepLR(optimizer, step_size=stepsize, gamma=gamma) 106 | start_epoch = start_epoch 107 | 108 | if use_gpu: 109 | model = nn.DataParallel(model).cuda() 110 | 111 | test(model, queryloader, 'avg', use_gpu, dataset, -1, meta_data_tab=None) 112 | 113 | def test(model, queryloader, pool, use_gpu, dataset, epoch, ranks=[1, 5, 10, 20], meta_data_tab = None): 114 | model.eval() 115 | 116 | qf, q_pids, q_camids = [], [], [] 117 | if False: 118 | for batch_idx, (imgs, surfaces, pids, camids) in enumerate(queryloader): 119 | torch.cuda.empty_cache() 120 | if use_gpu: 121 | imgs = imgs.cuda() 122 | surfaces = surfaces.cuda() 123 | imgs = Variable(imgs, volatile=True) 124 | surfaces = Variable(surfaces, volatile=True) 125 | b, n, s, c, h, w = imgs.size() 126 | b_s, n_s, s_s, d_s = surfaces.size() 127 | assert(b == b_s and n == n_s and s == s_s) 128 | if n < 100: 129 | assert(b == 1) 130 | imgs = imgs.view(b * n, s, c, h, w) 131 | surfaces = surfaces.view(b * n, s, -1) 132 | features = model(imgs, surfaces) 133 | features = features.view(n, -1) 134 | 135 | else: 136 | imgs = imgs.data 137 | imgs.resize_(50, s, c, h, w) 138 | imgs = imgs.view(50, s, c, h, w) 139 | imgs = Variable(imgs, volatile=True) 140 | surfaces = surfaces.data 141 | surfaces.resize_(50, s, d_s) 142 | surfaces = surfaces.view(50, s, -1) 143 | surfaces = Variable(surfaces, volatile=True) 144 | features = model(imgs, surfaces) 145 | features = features.view(50, -1) 146 | 147 | features = torch.mean(features, 0) 148 | features = features.data.cpu() 149 | qf.append(features) 150 | q_pids.extend(pids) 151 | q_camids.extend(camids) 152 | else: 153 | for batch_idx, (imgs, pids, camids) in enumerate(queryloader): 154 | torch.cuda.empty_cache() 155 | if use_gpu: 156 | imgs = imgs.cuda() 157 | imgs = Variable(imgs, volatile=True) 158 | b, n, s, c, h, w = imgs.size() 159 | if n < 100: 160 | assert(b == 1) 161 | imgs = imgs.view(b * n, s, c, h, w) 162 | features = model(imgs) 163 | features = features.view(n, -1) 164 | 165 | else: 166 | imgs = imgs.data 167 | imgs.resize_(50, s, c, h, w) 168 | imgs = imgs.view(50, s, c, h, w) 169 | imgs = Variable(imgs, volatile=True) 170 | features = model(imgs) 171 | features = features.view(50, -1) 172 | 173 | features = torch.mean(features, 0) 174 | features = features.data.cpu() 175 | qf.append(features.numpy()) 176 | q_pids.extend(pids.numpy()) 177 | q_camids.extend(camids.numpy()) 178 | 179 | qf = np.array(qf) 180 | q_pids = np.asarray(q_pids) 181 | q_camids = np.asarray(q_camids) 182 | 183 | np.save("qf3_no_nms_big0510.npy", qf) 184 | np.save("q_pids3_no_nms_big0510.npy", q_pids) 185 | np.save("q_camids3_no_nms_big0510.npy", q_camids) 186 | 187 | 188 | def main(): 189 | seed = 1 190 | gpu_devices = '0' 191 | torch.manual_seed(seed) 192 | os.environ['CUDA_VISIBLE_DEVICES'] = gpu_devices 193 | use_gpu = torch.cuda.is_available() 194 | use_gpu = True 195 | 196 | if not True: 197 | sys.stdout = Logger(osp.join('track1_log', 'log_train.txt')) 198 | else: 199 | sys.stdout = Logger(osp.join('track1_log', 'log_test.txt')) 200 | print("==========\nArgs:{}\n==========") 201 | 202 | if use_gpu: 203 | print("Currently using GPU {}".format(gpu_devices)) 204 | cudnn.benchmark = True 205 | torch.cuda.manual_seed_all(seed) 206 | else: 207 | print("Currently using CPU (GPU is highly recommended)") 208 | 209 | dataset = "aictrack2" 210 | print("Initializing dataset {}".format(dataset)) 211 | testseq(dataset, use_gpu) 212 | 213 | 214 | if __name__ == '__main__': 215 | 216 | main() 217 | -------------------------------------------------------------------------------- /Video-Person-ReID/Graph_data_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os 3 | import glob 4 | import re 5 | import sys 6 | import urllib 7 | import tarfile 8 | import zipfile 9 | import os.path as osp 10 | from scipy.io import loadmat 11 | import numpy as np 12 | 13 | from utils import mkdir_if_missing, write_json, read_json 14 | from bases import BaseVideoDataset 15 | """Dataset classes""" 16 | 17 | 18 | class AICityTrack2(BaseVideoDataset): 19 | 20 | def __init__(self, root, min_seq_len=0, verbose=True, **kwargs): 21 | self.dataset_dir = root 22 | self.split_query_json_path = osp.join(self.dataset_dir, 'split_query.json') 23 | self.min_seq_len = min_seq_len 24 | 25 | print("Note: if root path is changed, the previously generated json files need to be re-generated (so delete them first)") 26 | 27 | query = self._process_dir3(self.dataset_dir, self.split_query_json_path, relabel=False) 28 | 29 | 30 | self.query = query 31 | self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query) 32 | 33 | def _process_dir3(self, dir_path, json_path, relabel): 34 | if osp.exists(json_path): 35 | print("=> {} generated before, awesome!".format(json_path)) 36 | split = read_json(json_path) 37 | return split['tracklets'] 38 | 39 | print("=> Automatically generating split (might take a while for the first time, have a coffe)") 40 | camids = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store 41 | print("Processing '{}' with {} cameras".format(dir_path, len(camids))) 42 | 43 | 44 | tracklets = [] 45 | for camid in camids: 46 | ss = camid.split("/") 47 | cam = camid 48 | 49 | camid = int(osp.basename(ss[7].replace("c",""))) 50 | print(camid) 51 | 52 | pidrs = glob.glob(osp.join(cam, '*')) 53 | for pdir in pidrs: 54 | raw_img_paths = glob.glob(osp.join(pdir, '*.jpg')) 55 | num_imgs = len(raw_img_paths) 56 | 57 | if num_imgs < self.min_seq_len: 58 | continue 59 | 60 | imgfiles = os.listdir(pdir) 61 | img_paths = [] 62 | 63 | for imgfile in imgfiles: 64 | img_idx_name = imgfile 65 | img_paths.append(pdir+"/"+imgfile) 66 | 67 | ############### keep N largest images 68 | N_largest = 32 69 | if N_largest > 0 and len(img_paths) > N_largest: 70 | from PIL import Image 71 | w = 4 # window for average size 72 | area_first = 0 73 | area_last = 0 74 | for img_path in img_paths[:w]: 75 | img = Image.open(img_path) 76 | width, height = img.size 77 | area_first += width*height 78 | for img_path in img_paths[-w:]: 79 | img = Image.open(img_path) 80 | width, height = img.size 81 | area_last += width*height 82 | if area_first > area_last: 83 | img_paths = img_paths[:N_largest] 84 | else: 85 | img_paths = img_paths[-N_largest:] 86 | ############################################## 87 | 88 | img_name = osp.basename(img_paths[0]) 89 | 90 | ss = pdir.split("/") 91 | pid = int(ss[8]) 92 | img_paths = tuple(img_paths) 93 | tracklets.append((img_paths, pid, camid)) 94 | 95 | print("Saving split to {}".format(json_path)) 96 | split_dict = { 97 | 'tracklets': tracklets, 98 | } 99 | write_json(split_dict, json_path) 100 | 101 | return tracklets 102 | 103 | 104 | 105 | """Create dataset""" 106 | 107 | __factory = { 108 | 'aictrack2': AICityTrack2, 109 | } 110 | 111 | def get_names(): 112 | return __factory.keys() 113 | 114 | def init_dataset(name, *args, **kwargs): 115 | if name not in __factory.keys(): 116 | raise KeyError("Unknown dataset: {}".format(name)) 117 | return __factory[name](*args, **kwargs) 118 | 119 | if __name__ == '__main__': 120 | dataset = AICityTrack2() 121 | 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /Video-Person-ReID/Graph_video_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os 3 | from PIL import Image 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | import random 9 | 10 | def read_image(img_path): 11 | """Keep reading image until succeed. 12 | This can avoid IOError incurred by heavy IO process.""" 13 | got_img = False 14 | while not got_img: 15 | try: 16 | img = Image.open(img_path).convert('RGB') 17 | got_img = True 18 | except IOError: 19 | print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path)) 20 | pass 21 | return img 22 | 23 | 24 | class VideoDataset(Dataset): 25 | """Video Person ReID Dataset. 26 | Note batch data has shape (batch, seq_len, channel, height, width). 27 | """ 28 | sample_methods = ['evenly', 'random', 'all'] 29 | 30 | def __init__(self, dataset, seq_len=15, sample='evenly', transform=None): 31 | self.dataset = dataset 32 | self.seq_len = seq_len 33 | self.sample = sample 34 | self.transform = transform 35 | 36 | def __len__(self): 37 | return len(self.dataset) 38 | 39 | def __getitem__(self, index): 40 | img_paths, pid, camid= self.dataset[index] 41 | num = len(img_paths) 42 | if self.sample == 'random': 43 | """ 44 | Randomly sample seq_len consecutive frames from num frames, 45 | if num is smaller than seq_len, then replicate items. 46 | This sampling strategy is used in training phase. 47 | """ 48 | frame_indices = range(num) 49 | rand_end = max(0, len(frame_indices) - self.seq_len - 1) 50 | begin_index = random.randint(0, rand_end) 51 | end_index = min(begin_index + self.seq_len, len(frame_indices)) 52 | 53 | indices = frame_indices[begin_index:end_index] 54 | 55 | for index in indices: 56 | if len(indices) >= self.seq_len: 57 | break 58 | indices.append(index) 59 | indices=np.array(indices) 60 | imgs = [] 61 | for index in indices: 62 | index=int(index) 63 | img_path = img_paths[index] 64 | img = read_image(img_path) 65 | if self.transform is not None: 66 | img = self.transform(img) 67 | img = img.unsqueeze(0) 68 | imgs.append(img) 69 | imgs = torch.cat(imgs, dim=0) 70 | #imgs=imgs.permute(1,0,2,3) 71 | return imgs, pid, camid 72 | 73 | elif self.sample == 'dense': 74 | """ 75 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1. 76 | This sampling strategy is used in test phase. 77 | """ 78 | cur_index=0 79 | frame_indices = list(range(num)) 80 | indices_list=[] 81 | while num-cur_index > self.seq_len: 82 | indices_list.append(frame_indices[cur_index:cur_index+self.seq_len]) 83 | cur_index+=self.seq_len 84 | last_seq=frame_indices[cur_index:] 85 | for index in last_seq: 86 | if len(last_seq) >= self.seq_len: 87 | break 88 | last_seq.append(index) 89 | indices_list.append(last_seq) 90 | imgs_list=[] 91 | for indices in indices_list: 92 | imgs = [] 93 | for index in indices: 94 | index=int(index) 95 | img_path = img_paths[index] 96 | img = read_image(img_path) 97 | if self.transform is not None: 98 | img = self.transform(img) 99 | img = img.unsqueeze(0) 100 | imgs.append(img) 101 | imgs = torch.cat(imgs, dim=0) 102 | imgs_list.append(imgs) 103 | imgs_array = torch.stack(imgs_list) 104 | return imgs_array, pid, camid 105 | 106 | else: 107 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods)) 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /Video-Person-ReID/README.md: -------------------------------------------------------------------------------- 1 | # Video-Person-ReID for AIC19 2 | 3 | The code is for the video-based vehicle reidentification task in AIC19 track 1 and 2 \[[link](https://www.aicitychallenge.org/)\]. 4 | The code is based on Jiyang Gao's Video-Person-ReID \[[code](https://github.com/jiyanggao/Video-Person-ReID)\]. 5 | 6 | ### Requirement 7 | 8 | PyTorch 0.3.1
9 | Torchvision 0.2.0
10 | Python 2.7
11 | 12 | ### Dataset 13 | 14 | First download the AIC19 dataset \[[link](https://www.aicitychallenge.org/)\], and use the python scripts in `data_util/` to convert images, keypoints and metadata into desired file structure. Please copy the scripts to your path to `aic19-track2-reid` for simplicity. 15 | 16 | 1. Run `xml_reader_testdata.py` and `xml_reader_traindata.py` to convert images into desired file structure: `image_train_deepreid/carId/camId/imgId.jpg`. 17 | 2. Run `create_feature_files.py` to convert the keypoints into desired file structure as images: `keypoint_train_deepreid/carId/camId/imgId.txt`. 18 | 3. Run `convert_metadata_imglistprob.py` to convert the metadata inference result of query (and test) tracks into `prob_v2m100_query.txt` and `imglist_v2m100_query.txt`. And then run `create_metadata_files.py` to convert the metadata into desired file structure as images: `metadata_v2m100_query_deepreid/carId/camId/imgId.txt`. If using other metadata models, change `v2m100` to other names. Example txt output from the provided metadata model \[[link](https://github.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/tree/master/metadata)\] can be downloaded [here](https://drive.google.com/open?id=1X4geSMtsHCztwmhuUimjFjEZGUImsA7L). 19 | 20 | 21 | ### Training 22 | 23 | To train the model, please run 24 |
25 | ` 26 | python main_video_person_reid.py --train-batch 16 --workers 0 --seq-len 4 --arch resnet50ta_surface_nu --width 224 --height 224 --dataset aictrack2 --use-surface --save-dir log --learning-rate 0.0001 --eval-step 50 --save-step 50 --gpu-devices 0 --re-ranking --metadata-model v2m100 --bstri 27 | ` 28 |
29 | 30 | `arch` could be `resnet50ta_surface_nu` (Temporal Attention with keypoints feature, for AIC19 track 2) or `resnet50ta` (Temporal Attention, for AIC19 track 1). If using `resnet50ta`, do not use `--use-surface`.
31 | 32 | ### Testing 33 | 34 | To test the model, please run 35 |
36 | ` 37 | python main_video_person_reid.py --train-batch 16 --workers 0 --seq-len 4 --arch resnet50ta_surface_nu --width 224 --height 224 --dataset aictrack2 --use-surface --evaluate --pretrained-model log/checkpoint_ep300.pth.tar --save-dir log-test --gpu-devices 0 --re-ranking --metadata-model v2m100 38 | ` 39 |
40 | Optionally, start from previously saved feature without redoing inference 41 |
42 | ` 43 | python main_video_person_reid.py --dataset aictrack2 --save-dir log --re-ranking --metadata-model v2m100 --load-feature --feature-dir feature_dir 44 | ` 45 |
46 | `feature_dir` can be point to previously saved feature directory, e.g. `log/feature_ep0300`.
47 | 48 | The pre-trained model can be download at [here](https://drive.google.com/open?id=1jjwQhk8i4X12_DjCz9LlgrvL-9uKa2mE).
49 | Besides, the confusion matrix of metadata model need to be put under `metadata/`. Example confusion matrix can be downloaded [here](https://drive.google.com/open?id=178oG9f8H58YgVWsk_KaxpWf_i3dr2wER). 50 | 51 | 52 | ### AIC19 track 1 53 | 54 | For generating features for our AIC19 track 1 's testing \[[code](https://github.com/ipl-uw/2019-CVPR-AIC-Track-1-UWIPL)\], run 55 |
56 | ` 57 | python Graph_ModelDataGen.py 58 | ` 59 |
60 | The pretrained model can be downloaded [here](https://drive.google.com/file/d/1C-uE8nPA3Rtu8tkHptRS8J87sayrB7Nj/view?usp=sharing). The model should be put under `log/`.
61 | Besides, the data should be processed in a different manner:
62 | Create video2img folder in the downloaded project (i.e., Video-Person-ReID/video2img/). 63 | Put and run `python crop_img.py` in the same folder in the downloaded dataset (i.e., aic19-track1-mtmc/test). You need to creat a folder track1_test_img in the same path (i.e., aic19-track1-mtmc/test/track1_test_img). After that, create a folder track1_sct_img_test_big and run `python crop_img_big.py`. Then, create a folder log in the dowanloaded project (i.e., Video-Person-ReID/log) and put the downloaded model file of track1 ReID in this folder. Finally, run `python Graph_ModelDataGen.py` to obtain the feature files (q_camids3_no_nms_big0510.npy, qf3_no_nms_big0510.npy and q_pids3_no_nms_big0510.npy).
64 | 65 | ### Reference 66 | 67 | The code is based on Jiyang Gao's [Video-Person-ReID](https://github.com/jiyanggao/Video-Person-ReID).
68 | The visualization code is adopted from KaiyangZhou's [deep-person-reid](https://github.com/KaiyangZhou/deep-person-reid).
69 | The re-ranking code is modified based on zhunzhong07's [person-re-ranking](https://github.com/zhunzhong07/person-re-ranking). 70 | -------------------------------------------------------------------------------- /Video-Person-ReID/bases.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | 6 | 7 | class BaseDataset(object): 8 | """ 9 | Base class of reid dataset 10 | """ 11 | 12 | def get_imagedata_info(self, data): 13 | pids, cams = [], [] 14 | for _, pid, camid in data: 15 | pids += [pid] 16 | cams += [camid] 17 | pids = set(pids) 18 | cams = set(cams) 19 | num_pids = len(pids) 20 | num_cams = len(cams) 21 | num_imgs = len(data) 22 | return num_pids, num_imgs, num_cams 23 | 24 | def get_videodata_info(self, data, return_tracklet_stats=False): 25 | pids, cams, tracklet_stats = [], [], [] 26 | for img_paths, pid, camid in data: 27 | pids += [pid] 28 | cams += [camid] 29 | tracklet_stats += [len(img_paths)] 30 | pids = set(pids) 31 | cams = set(cams) 32 | num_pids = len(pids) 33 | num_cams = len(cams) 34 | num_tracklets = len(data) 35 | if return_tracklet_stats: 36 | return num_pids, num_tracklets, num_cams, tracklet_stats 37 | return num_pids, num_tracklets, num_cams 38 | 39 | 40 | 41 | def get_imagedata_info_ori(self, data): 42 | pids, cams = [], [] 43 | for _, pid, camid,orientation in data: 44 | pids += [pid] 45 | cams += [camid] 46 | pids = set(pids) 47 | cams = set(cams) 48 | num_pids = len(pids) 49 | num_cams = len(cams) 50 | num_imgs = len(data) 51 | return num_pids, num_imgs, num_cams 52 | 53 | def get_videodata_info_ori(self, data, return_tracklet_stats=False): 54 | pids, cams, tracklet_stats = [], [], [] 55 | for img_paths, pid, camid,orientation in data: 56 | pids += [pid] 57 | cams += [camid] 58 | tracklet_stats += [len(img_paths)] 59 | pids = set(pids) 60 | cams = set(cams) 61 | num_pids = len(pids) 62 | num_cams = len(cams) 63 | num_tracklets = len(data) 64 | if return_tracklet_stats: 65 | return num_pids, num_tracklets, num_cams, tracklet_stats 66 | return num_pids, num_tracklets, num_cams 67 | 68 | def get_imagedata_info_ori_iou(self, data): 69 | pids, cams = [], [] 70 | for _, pid, camid,orientation,iou in data: 71 | pids += [pid] 72 | cams += [camid] 73 | pids = set(pids) 74 | cams = set(cams) 75 | num_pids = len(pids) 76 | num_cams = len(cams) 77 | num_imgs = len(data) 78 | return num_pids, num_imgs, num_cams 79 | 80 | def get_videodata_info_ori_iou(self, data, return_tracklet_stats=False): 81 | pids, cams, tracklet_stats = [], [], [] 82 | for img_paths, pid, camid,orientation,iou in data: 83 | pids += [pid] 84 | cams += [camid] 85 | tracklet_stats += [len(img_paths)] 86 | pids = set(pids) 87 | cams = set(cams) 88 | num_pids = len(pids) 89 | num_cams = len(cams) 90 | num_tracklets = len(data) 91 | if return_tracklet_stats: 92 | return num_pids, num_tracklets, num_cams, tracklet_stats 93 | return num_pids, num_tracklets, num_cams 94 | 95 | def print_dataset_statistics(self): 96 | raise NotImplementedError 97 | 98 | 99 | class BaseImageDataset(BaseDataset): 100 | """ 101 | Base class of image reid dataset 102 | """ 103 | 104 | def print_dataset_statistics(self, train, query, gallery): 105 | num_train_pids, num_train_imgs, num_train_cams = self.get_imagedata_info(train) 106 | num_query_pids, num_query_imgs, num_query_cams = self.get_imagedata_info(query) 107 | num_gallery_pids, num_gallery_imgs, num_gallery_cams = self.get_imagedata_info(gallery) 108 | 109 | print("Dataset statistics:") 110 | print(" ----------------------------------------") 111 | print(" subset | # ids | # images | # cameras") 112 | print(" ----------------------------------------") 113 | print(" train | {:5d} | {:8d} | {:9d}".format(num_train_pids, num_train_imgs, num_train_cams)) 114 | print(" query | {:5d} | {:8d} | {:9d}".format(num_query_pids, num_query_imgs, num_query_cams)) 115 | print(" gallery | {:5d} | {:8d} | {:9d}".format(num_gallery_pids, num_gallery_imgs, num_gallery_cams)) 116 | print(" ----------------------------------------") 117 | 118 | 119 | class BaseVideoDataset(BaseDataset): 120 | """ 121 | Base class of video reid dataset 122 | """ 123 | 124 | def print_dataset_statistics(self, train, query, gallery): 125 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \ 126 | self.get_videodata_info(train, return_tracklet_stats=True) 127 | 128 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \ 129 | self.get_videodata_info(query, return_tracklet_stats=True) 130 | 131 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \ 132 | self.get_videodata_info(gallery, return_tracklet_stats=True) 133 | 134 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats 135 | min_num = np.min(tracklet_stats) 136 | max_num = np.max(tracklet_stats) 137 | avg_num = np.mean(tracklet_stats) 138 | 139 | print("Dataset statistics:") 140 | print(" -------------------------------------------") 141 | print(" subset | # ids | # tracklets | # cameras") 142 | print(" -------------------------------------------") 143 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams)) 144 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams)) 145 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams)) 146 | print(" -------------------------------------------") 147 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num)) 148 | print(" -------------------------------------------") 149 | 150 | 151 | def print_dataset_statistics_ori(self, train, query, gallery): 152 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \ 153 | self.get_videodata_info_ori(train, return_tracklet_stats=True) 154 | 155 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \ 156 | self.get_videodata_info_ori(query, return_tracklet_stats=True) 157 | 158 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \ 159 | self.get_videodata_info_ori(gallery, return_tracklet_stats=True) 160 | 161 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats 162 | min_num = np.min(tracklet_stats) 163 | max_num = np.max(tracklet_stats) 164 | avg_num = np.mean(tracklet_stats) 165 | 166 | print("Dataset statistics:") 167 | print(" -------------------------------------------") 168 | print(" subset | # ids | # tracklets | # cameras") 169 | print(" -------------------------------------------") 170 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams)) 171 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams)) 172 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams)) 173 | print(" -------------------------------------------") 174 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num)) 175 | print(" -------------------------------------------") 176 | 177 | def print_dataset_statistics_ori_iou(self, train, query, gallery): 178 | num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \ 179 | self.get_videodata_info_ori_iou(train, return_tracklet_stats=True) 180 | 181 | num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \ 182 | self.get_videodata_info_ori_iou(query, return_tracklet_stats=True) 183 | 184 | num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \ 185 | self.get_videodata_info_ori_iou(gallery, return_tracklet_stats=True) 186 | 187 | tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats 188 | min_num = np.min(tracklet_stats) 189 | max_num = np.max(tracklet_stats) 190 | avg_num = np.mean(tracklet_stats) 191 | 192 | print("Dataset statistics:") 193 | print(" -------------------------------------------") 194 | print(" subset | # ids | # tracklets | # cameras") 195 | print(" -------------------------------------------") 196 | print(" train | {:5d} | {:11d} | {:9d}".format(num_train_pids, num_train_tracklets, num_train_cams)) 197 | print(" query | {:5d} | {:11d} | {:9d}".format(num_query_pids, num_query_tracklets, num_query_cams)) 198 | print(" gallery | {:5d} | {:11d} | {:9d}".format(num_gallery_pids, num_gallery_tracklets, num_gallery_cams)) 199 | print(" -------------------------------------------") 200 | print(" number of images per tracklet: {} ~ {}, average {:.2f}".format(min_num, max_num, avg_num)) 201 | print(" -------------------------------------------") -------------------------------------------------------------------------------- /Video-Person-ReID/data_util/convert_metadata_imglistprob.py: -------------------------------------------------------------------------------- 1 | from os import listdir, mkdir 2 | from os.path import join, split, isfile, isdir 3 | 4 | 5 | conversions = [ 6 | ('./track2-gallery-query-metadata-v2m100/test-prob-v2m100.log', 7 | './track2-gallery-query-metadata-v2m100/prob_v2m100.txt', 8 | './track2-gallery-query-metadata-v2m100/imglist_v2m100.txt'), 9 | ] 10 | 11 | img_gline = {} 12 | with open('test_track.txt', 'r') as f: 13 | for gg, line in enumerate(f): 14 | g_line = gg+1 15 | print(g_line) 16 | 17 | imgs = line.replace("\n", "").strip().split(" ") 18 | for i, img in enumerate(imgs): 19 | img_gline[img] = g_line 20 | 21 | img_qline = {} 22 | with open('query_track.txt', 'r') as f: 23 | for qq, line in enumerate(f): 24 | q_line = qq+1 25 | print(q_line) 26 | 27 | imgs = line.replace("\n", "").strip().split(" ") 28 | for i, img in enumerate(imgs): 29 | img_qline[img] = q_line 30 | assert int(imgs[0].replace('.jpg','')) == q_line # make sure is ordered 31 | 32 | 33 | for raw_filename, prob_filename, imglist_filename in conversions: 34 | metadatas = [] 35 | with open(raw_filename, 'r') as f: 36 | buf = '' 37 | i = 0 38 | for line in f: 39 | line = line.strip() 40 | if i % 4 == 0: 41 | metadatas.append([]) 42 | i += 1 43 | else: 44 | buf = buf + ' ' + line 45 | #if line[-2:] != ']]': 46 | # continue 47 | #print(buf) 48 | l = buf.rfind('[[') 49 | r = buf.find(']]') 50 | if l == -1 and r == -1: 51 | metadatas[-1].append(buf.strip()) 52 | elif l < r: 53 | metadatas[-1].append(buf[l+2:r].strip()) 54 | else: 55 | print('invalid buf: ' + buf) 56 | buf = '' 57 | i += 1 58 | if len(metadatas[-1]) == 0: 59 | metadatas = metadatas[:-1] 60 | print('images in metadatas: %d' % len(metadatas)) 61 | 62 | prob_filename_test = prob_filename[:-4] + '_test.txt' 63 | imglist_filename_test = imglist_filename[:-4] + '_test.txt' 64 | f_prob = open(prob_filename_test, 'w') 65 | f_imglist = open(imglist_filename_test, 'w') 66 | i = 0 67 | for img in img_gline: 68 | f_prob.write('%d/%d image\n' % (i, len(img_gline))) 69 | for metadata in metadatas[img_gline[img]-1 + 1052]: 70 | f_prob.write(metadata+'\n') 71 | f_imglist.write(img+'\n') 72 | i+=1 73 | f_prob.close() 74 | f_imglist.close() 75 | 76 | prob_filename_query = prob_filename[:-4] + '_query.txt' 77 | imglist_filename_query = imglist_filename[:-4] + '_query.txt' 78 | f_prob = open(prob_filename_query, 'w') 79 | f_imglist = open(imglist_filename_query, 'w') 80 | i = 0 81 | for img in img_qline: 82 | f_prob.write('%d/%d image\n' % (i, len(img_qline))) 83 | for metadata in metadatas[img_qline[img]-1]: 84 | f_prob.write(metadata+'\n') 85 | f_imglist.write(img+'\n') 86 | i+=1 87 | f_prob.close() 88 | f_imglist.close() 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /Video-Person-ReID/data_util/create_feature_files.py: -------------------------------------------------------------------------------- 1 | from os import listdir, mkdir 2 | from os.path import join, split, isfile, isdir 3 | 4 | 5 | image_sets = [ 6 | #'train', 7 | 'query', 8 | 'test', 9 | ] 10 | 11 | dummys = [ 12 | '', 13 | #'_dummy', 14 | ] 15 | 16 | features = [ 17 | 'keypoint', 18 | ] 19 | 20 | aic_track2_dir = '/path_to_aic19-track2-reid/' 21 | 22 | for image_set in image_sets: 23 | for dummy in dummys: 24 | image_path = aic_track2_dir + 'image_%s_deepreid%s' % (image_set, dummy) 25 | for feature in features: 26 | print((image_set, dummy, feature)) 27 | feature_path = aic_track2_dir + '%s_%s_deepreid%s' % (feature, image_set, dummy) 28 | mkdir(feature_path) 29 | 30 | feature_file = aic_track2_dir + '%s-%s.txt' % (feature, image_set) 31 | lines = [] 32 | with open(feature_file, 'r') as f: 33 | lines = f.readlines() 34 | 35 | pids = [f for f in listdir(image_path) if isdir(join(image_path, f))] 36 | pids.sort() 37 | for pid in pids: 38 | print(pid) 39 | pid_path = join(feature_path, pid) 40 | pid_path_img = join(image_path, pid) 41 | mkdir(pid_path) 42 | cids = [f for f in listdir(pid_path_img) if isdir(join(pid_path_img, f))] 43 | for cid in cids: 44 | cid_path = join(pid_path, cid) 45 | cid_path_img = join(pid_path_img, cid) 46 | mkdir(cid_path) 47 | imgs = [f for f in listdir(cid_path_img) if isfile(join(cid_path_img, f)) and f[-4:] == '.jpg'] 48 | for img in imgs: 49 | imgname = img[:-4] 50 | imgid = imgname.split('_')[-1] 51 | feature_file = join(cid_path, imgname+'.txt') 52 | with open(feature_file, 'w') as file: 53 | file.write(lines[int(imgid)-1]) 54 | -------------------------------------------------------------------------------- /Video-Person-ReID/data_util/create_metadata_files.py: -------------------------------------------------------------------------------- 1 | from os import listdir, mkdir 2 | from os.path import join, split, isfile, isdir 3 | 4 | 5 | image_sets = [ 6 | 'query', 7 | 'test', 8 | ] 9 | 10 | dummys = [ 11 | '', 12 | #'_dummy', 13 | ] 14 | 15 | models = [ 16 | 'v2m100', 17 | ] 18 | 19 | aic_track2_dir = '/path_to_aic19-track2-reid/' 20 | 21 | for model in models: 22 | for image_set in image_sets: 23 | for dummy in dummys: 24 | print((model, image_set, dummy)) 25 | # parse metadata probability from file 26 | metadatas = [] 27 | with open(aic_track2_dir + 'prob_%s_%s.txt'%(model, image_set), 'r') as f: 28 | for i, line in enumerate(f): 29 | line = line.strip() 30 | if i % 4 == 0: 31 | metadatas.append([]) 32 | else: 33 | l = line.rfind('[') 34 | r = line.find(']') 35 | if l == -1 and r == -1: 36 | metadatas[-1].append(line.strip()) 37 | elif l < r: 38 | metadatas[-1].append(line[l+1:r].strip()) 39 | else: 40 | print('invalid line: ' + line) 41 | if len(metadatas[-1]) == 0: 42 | metadatas = metadatas[:-1] 43 | print('images in metadatas: %d' % len(metadatas)) 44 | 45 | # read image filenames from file 46 | img_orders = {} 47 | with open(aic_track2_dir + 'imglist_%s_%s.txt'%(model, image_set), 'r') as f: 48 | for i, line in enumerate(f): 49 | pos = line.find('.jpg') 50 | imgid = line[pos-6:pos] 51 | #print(imgid) 52 | if imgid in img_orders: 53 | print('duplicate images: '+imgid) 54 | img_orders[imgid] = i 55 | print('images in image list: %d' % len(img_orders)) 56 | 57 | 58 | image_path = aic_track2_dir + 'image_%s_deepreid%s' % (image_set, dummy) 59 | metadata_path = aic_track2_dir + 'metadata_%s_%s_deepreid%s' % (model, image_set, dummy) 60 | mkdir(metadata_path) 61 | 62 | pids = [f for f in listdir(image_path) if isdir(join(image_path, f))] 63 | pids.sort() 64 | for pid in pids: 65 | print(pid) 66 | pid_path = join(metadata_path, pid) 67 | pid_path_img = join(image_path, pid) 68 | mkdir(pid_path) 69 | cids = [f for f in listdir(pid_path_img) if isdir(join(pid_path_img, f))] 70 | for cid in cids: 71 | cid_path = join(pid_path, cid) 72 | cid_path_img = join(pid_path_img, cid) 73 | mkdir(cid_path) 74 | imgs = [f for f in listdir(cid_path_img) if isfile(join(cid_path_img, f)) and f[-4:] == '.jpg'] 75 | for img in imgs: 76 | imgname = img[:-4] 77 | imgid = imgname.split('_')[-1] 78 | metadata_file = join(cid_path, imgname+'.txt') 79 | with open(metadata_file, 'w') as file: 80 | for metadata in metadatas[img_orders[imgid]]: 81 | file.write(metadata+'\n') 82 | -------------------------------------------------------------------------------- /Video-Person-ReID/data_util/xml_reader_testdata.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Feb 5 00:16:00 2019 4 | 5 | @author: hungminhsu 6 | """ 7 | 8 | import os 9 | import shutil 10 | def copy_rename(src_dir,old_file_name,dst_dir ,new_file_name): 11 | src_file = os.path.join(src_dir, old_file_name) 12 | #print("src_file:"+src_file) 13 | shutil.copy(src_file,dst_dir) 14 | 15 | dst_file = os.path.join(dst_dir, old_file_name) 16 | #print("dst_file:"+dst_file) 17 | new_dst_file_name = os.path.join(dst_dir, new_file_name) 18 | #print("new_dst_file_name:"+new_dst_file_name) 19 | os.rename(dst_file, new_dst_file_name) 20 | 21 | ########################################################################### 22 | 23 | aic_track2_dir = '/path_to_aic19-track2-reid/' 24 | 25 | source_path_query = aic_track2_dir + "image_query/" 26 | path_query = aic_track2_dir + "image_query_deepreid/" 27 | os.mkdir(path_query) 28 | 29 | q_img_camID={} 30 | q_img_carID={} 31 | 32 | q_imgs = [f for f in os.listdir(source_path_query)] 33 | q_imgs.sort() 34 | with open('query_track.txt', 'w') as f: 35 | f.write('\n'.join(q_imgs)) 36 | for i, img in enumerate(q_imgs): 37 | q_img_camID[img] = 'c901' # camID for query starts from 901 38 | q_img_carID[img] = '%04d'%(i+1) 39 | for i, img in enumerate(q_imgs): 40 | print(i) 41 | #print(s) 42 | carID = q_img_carID[img] 43 | camID = q_img_camID[img] 44 | 45 | 46 | if not os.path.isdir(path_query+"/"+carID+"/"): 47 | os.mkdir(path_query+"/"+carID+"/") 48 | if not os.path.isdir(path_query+"/"+carID+"/"+camID+"/"): 49 | os.mkdir(path_query+"/"+carID+"/"+camID+"/") 50 | copy_rename(source_path_query,img,path_query+"/"+carID+"/"+camID+"/",'%s'%(img)) 51 | 52 | source_path_test = aic_track2_dir + "image_test/" 53 | path_test = aic_track2_dir +"image_test_deepreid/" 54 | os.mkdir(path_test) 55 | 56 | g_img_camID={} 57 | g_img_carID={} 58 | g_imgs = [] 59 | with open('test_track.txt', 'r') as f: 60 | for i, line in enumerate(f): 61 | s = line.replace('\n', '').strip().split(' ') 62 | g_imgs.append(s) 63 | for img in s: 64 | g_img_camID[img] = 'c001' 65 | g_img_carID[img] = '%04d'%(i+1) 66 | for l, s in enumerate(g_imgs): 67 | print(l) 68 | #print(s) 69 | for i in range(0,len(s)): 70 | 71 | carID = g_img_carID[s[i]] 72 | camID = g_img_camID[s[i]] 73 | 74 | 75 | if not os.path.isdir(path_test+"/"+carID+"/"): 76 | os.mkdir(path_test+"/"+carID+"/") 77 | if not os.path.isdir(path_test+"/"+carID+"/"+camID+"/"): 78 | os.mkdir(path_test+"/"+carID+"/"+camID+"/") 79 | copy_rename(source_path_test,s[i],path_test+"/"+carID+"/"+camID+"/",'%04d_%s'%(i, s[i])) 80 | 81 | -------------------------------------------------------------------------------- /Video-Person-ReID/data_util/xml_reader_traindata.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Feb 5 00:16:00 2019 4 | 5 | @author: hungminhsu 6 | """ 7 | 8 | 9 | 10 | import os 11 | import shutil 12 | def copy_rename(src_dir,old_file_name,dst_dir ,new_file_name): 13 | src_file = os.path.join(src_dir, old_file_name) 14 | #print("src_file:"+src_file) 15 | shutil.copy(src_file,dst_dir) 16 | 17 | dst_file = os.path.join(dst_dir, old_file_name) 18 | #print("dst_file:"+dst_file) 19 | new_dst_file_name = os.path.join(dst_dir, new_file_name) 20 | #print("new_dst_file_name:"+new_dst_file_name) 21 | os.rename(dst_file, new_dst_file_name) 22 | 23 | ########################################################################### 24 | 25 | import xml.etree.ElementTree as ET 26 | xmlp = ET.XMLParser(encoding="utf-8") 27 | tree = ET.parse('train_label.xml', parser=xmlp) 28 | root = tree.getroot() 29 | 30 | 31 | img_camID={} 32 | img_carID={} 33 | 34 | for neighbor in root.iter('Item'): 35 | #print(neighbor.attrib) 36 | #print(neighbor.get('imageName')) 37 | #print(neighbor.get('vehicleID')) 38 | #print(neighbor.get('cameraID')) 39 | img_camID[neighbor.get('imageName')] = neighbor.get('cameraID') 40 | img_carID[neighbor.get('imageName')] = neighbor.get('vehicleID') 41 | 42 | carID_num={} 43 | 44 | aic_track2_dir = '/path_to_aic19-track2-reid/' 45 | 46 | source_path = aic_track2_dir + "image_train/" 47 | path_train = aic_track2_dir + "image_train_deepreid/" 48 | path_query = aic_track2_dir + "image_train_deepreid_query/" 49 | path_query_single = aic_track2_dir + "image_train_deepreid_query_single/" 50 | path_gallery = aic_track2_dir + "image_train_deepreid_gallery/" 51 | os.mkdir(path_train) 52 | os.mkdir(path_query) 53 | os.mkdir(path_query_single) 54 | os.mkdir(path_gallery) 55 | file = open("train_track.txt","r") 56 | for line in file: 57 | #print(line) 58 | s = line.replace(" \n","").split(" ") 59 | #print(s) 60 | # find single query i as the minimum image number in s 61 | tmp = [int(c[:-4]) for c in s] 62 | sq = tmp.index(min(tmp)) 63 | for i in range(0,len(s)): 64 | 65 | carID = img_carID[s[i]] 66 | camID = img_camID[s[i]] 67 | 68 | 69 | if len(carID_num)<160 or carID in carID_num: 70 | if carID in carID_num: 71 | #if len(carID_num[carID])==1 and carID_num[carID][0]!=camID: 72 | # carID_num[carID].append(camID) #ccc 73 | if not camID in carID_num[carID]: 74 | carID_num[carID].append(camID) 75 | else: 76 | carID_num[carID]=[] 77 | carID_num[carID].append(camID) 78 | 79 | #print(carID_num[carID]) 80 | #print(len(carID_num)) 81 | #if len(carID_num)<160: 82 | if carID in carID_num and False: 83 | if len(carID_num[carID])==1: 84 | ###camID = carID_num[carID][0] #ccc 85 | if not os.path.isdir(path_query+"/"+carID+"/"): 86 | os.mkdir(path_query+"/"+carID+"/") 87 | if not os.path.isdir(path_query+"/"+carID+"/"+camID+"/"): 88 | os.mkdir(path_query+"/"+carID+"/"+camID+"/") 89 | copy_rename(source_path,s[i],path_query+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i])) 90 | if i == sq: # single query 91 | if not os.path.isdir(path_query_single+"/"+carID+"/"): 92 | os.mkdir(path_query_single+"/"+carID+"/") 93 | if not os.path.isdir(path_query_single+"/"+carID+"/"+camID+"/"): 94 | os.mkdir(path_query_single+"/"+carID+"/"+camID+"/") 95 | copy_rename(source_path,s[i],path_query_single+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i])) 96 | 97 | #elif len(carID_num[carID])==2: #ccc 98 | else: 99 | #print("111111111") 100 | ###camID = carID_num[carID][1] #ccc 101 | if not os.path.isdir(path_gallery+"/"+carID+"/"): 102 | os.mkdir(path_gallery+"/"+carID+"/") 103 | if not os.path.isdir(path_gallery+"/"+carID+"/"+camID+"/"): 104 | os.mkdir(path_gallery+"/"+carID+"/"+camID+"/") 105 | copy_rename(source_path,s[i],path_gallery+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i])) 106 | else: 107 | #if carID not in carID_num: 108 | if not os.path.isdir(path_train+"/"+carID+"/"): 109 | os.mkdir(path_train+"/"+carID+"/") 110 | if not os.path.isdir(path_train+"/"+carID+"/"+camID+"/"): 111 | os.mkdir(path_train+"/"+carID+"/"+camID+"/") 112 | copy_rename(source_path,s[i],path_train+"/"+carID+"/"+camID+"/",'%04d_%s'%(i,s[i])) 113 | 114 | 115 | -------------------------------------------------------------------------------- /Video-Person-ReID/eval_metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import numpy as np 3 | import copy 4 | 5 | import os.path as osp 6 | from os import mkdir 7 | 8 | def dump_matches_imgids(output_dir, matches_imgids): 9 | if not osp.isdir(output_dir): 10 | mkdir(output_dir) 11 | for q_imgid, g_imgids in matches_imgids.iteritems(): 12 | with open(osp.join(output_dir, '%s.txt' % q_imgid), 'w') as f: 13 | for g_imgid in g_imgids: 14 | f.write('%s\n' % g_imgid) 15 | 16 | def dump_query_result(output_dir, matches_imgids, top_N=100): 17 | if not osp.isdir(output_dir): 18 | mkdir(output_dir) 19 | with open(osp.join(output_dir, 'track2.txt'), 'w') as f: 20 | for q_imgid, g_imgids in sorted(matches_imgids.iteritems()): 21 | g_imgids = [str(imgid) for imgid in g_imgids] 22 | if top_N > 0: 23 | g_imgids = g_imgids[:top_N] 24 | st = ' '.join(g_imgids) 25 | f.write(st + '\n') 26 | 27 | def evaluate_imgids(distmat, q_pids, g_pids, q_camids, g_camids, q_imgids, g_imgids, max_rank=50, top_N=0): 28 | ''' 29 | mAP and cmc in per-image basis 30 | g_imgids, g_imgids: list of list of imgid 31 | return all_cmc, mAP, and matches_imgids (map from q_imgids to g_imgids) 32 | ''' 33 | 34 | num_q, num_g = distmat.shape 35 | 36 | assert(len(q_imgids) == num_q and len(g_imgids) == num_g) 37 | 38 | q_counts = [len(imgs) for imgs in q_imgids] 39 | g_counts = [len(imgs) for imgs in g_imgids] 40 | num_gi = sum(g_counts) 41 | #print('num_q = %d, num_g = %d, num_gi = %d' % (num_q, num_g, num_gi)) 42 | 43 | if num_gi < max_rank: 44 | max_rank = num_gi 45 | print("Note: number of gallery samples is quite small, got {}".format(num_gi)) 46 | indices = np.argsort(distmat, axis=1) 47 | # count gt and prediction (first imgid only) 48 | matches_gt_pred = {} 49 | for q_idx in range(num_q): 50 | q_pid = q_pids[q_idx] 51 | q_imgid = q_imgids[q_idx][0] 52 | matches_gt_pred[(q_pid, q_imgid)] = ([], []) 53 | for gi_idx in range(num_g): 54 | g_idx = indices[q_idx, gi_idx] 55 | g_pid = g_pids[g_idx] 56 | g_imgid = g_imgids[g_idx][0] 57 | matches_gt_pred[(q_pid, q_imgid)][1].append((g_pid, g_imgid)) 58 | if g_pid == q_pid: 59 | matches_gt_pred[(q_pid, q_imgid)][0].append((g_pid, g_imgid)) 60 | # expand to per-gallery image 61 | indices_expanded = np.zeros((num_q, num_gi), dtype=np.int32) 62 | for q_idx in range(num_q): 63 | pos = 0 64 | for s_idx in range(num_g): 65 | g_idx = indices[q_idx][s_idx] 66 | g_count = g_counts[g_idx] 67 | indices_expanded[q_idx][pos:pos+g_count] = g_idx 68 | pos += g_count 69 | indices = indices_expanded 70 | # create matches_imgids from indices_expanded 71 | matches_imgids = {} 72 | for q_idx in range(num_q): 73 | matches_imgids[q_imgids[q_idx][0]] = [] 74 | g_poss = [0] * num_g 75 | for gi_idx in range(num_gi): 76 | g_idx = indices_expanded[q_idx][gi_idx] 77 | #print('q_idx = %d, gi_idx = %d, g_idx = %d' % (q_idx, gi_idx, g_idx)) 78 | #print('g_poss = ' + str(g_poss)) 79 | matches_imgids[q_imgids[q_idx][0]].append(g_imgids[g_idx][g_poss[g_idx]]) 80 | g_poss[g_idx] += 1 81 | if top_N > 0: 82 | matches_imgids[q_imgids[q_idx][0]] = matches_imgids[q_imgids[q_idx][0]][:top_N] 83 | #print(str(q_imgids[q_idx][0]) + ': ' + str(matches_imgids[q_imgids[q_idx][0]])) 84 | 85 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) 86 | 87 | # find false positive result 88 | matches_imgids_FP = {} 89 | top_FP = 3 90 | for q_idx in range(num_q): 91 | matches_imgids_FP[q_imgids[q_idx][0]] = [] 92 | FPs = [] 93 | for gi_idx in range(min(top_FP, num_g)): 94 | if matches[q_idx, gi_idx] == 0: 95 | FPs.append(indices[q_idx, gi_idx]) 96 | 97 | g_poss = [0] * num_g 98 | for gi_idx in range(num_gi): 99 | g_idx = indices_expanded[q_idx][gi_idx] 100 | if g_idx in FPs: 101 | #print('q_idx = %d, gi_idx = %d, g_idx = %d' % (q_idx, gi_idx, g_idx)) 102 | #print('g_poss = ' + str(g_poss)) 103 | matches_imgids_FP[q_imgids[q_idx][0]].append(g_imgids[g_idx][g_poss[g_idx]]) 104 | g_poss[g_idx] += 1 105 | 106 | # compute cmc curve for each query 107 | all_cmc = [] 108 | all_AP = [] 109 | num_valid_q = 0. 110 | for q_idx in range(num_q): 111 | # get query pid and camid 112 | q_pid = q_pids[q_idx] 113 | q_camid = q_camids[q_idx] 114 | 115 | # remove gallery samples that have the same pid and camid with query 116 | order = indices[q_idx] 117 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) 118 | keep = np.invert(remove) 119 | #keep += True ###### keep everything 120 | 121 | # compute cmc curve 122 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches 123 | if not np.any(orig_cmc): 124 | # this condition is true when query identity does not appear in gallery 125 | continue 126 | 127 | cmc = orig_cmc.cumsum() 128 | cmc[cmc > 1] = 1 129 | 130 | all_cmc.append(cmc[:max_rank]) 131 | num_valid_q += 1. 132 | 133 | # compute average precision 134 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision 135 | num_rel = orig_cmc.sum() 136 | tmp_cmc = orig_cmc.cumsum() 137 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] 138 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc 139 | if top_N == 0: 140 | AP = tmp_cmc.sum() / num_rel 141 | else: 142 | AP = tmp_cmc[:top_N].sum() / num_rel 143 | all_AP.append(AP) 144 | 145 | #print('%s %s AP: %f, cmc[0]: %f' % (q_pids[q_idx], q_imgids[q_idx], AP, cmc[0])) 146 | #if AP < cmc[0]: 147 | # print(orig_cmc[:top_N]) 148 | 149 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery" 150 | 151 | all_cmc = np.asarray(all_cmc).astype(np.float32) 152 | all_cmc = all_cmc.sum(0) / num_valid_q 153 | mAP = np.mean(all_AP) 154 | 155 | return all_cmc, mAP, matches_imgids, matches_imgids_FP, matches_gt_pred 156 | 157 | 158 | 159 | def evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50, top_N=0): 160 | num_q, num_g = distmat.shape 161 | if num_g < max_rank: 162 | max_rank = num_g 163 | print("Note: number of gallery samples is quite small, got {}".format(num_g)) 164 | indices = np.argsort(distmat, axis=1) 165 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) 166 | 167 | # compute cmc curve for each query 168 | all_cmc = [] 169 | all_AP = [] 170 | num_valid_q = 0. 171 | for q_idx in range(num_q): 172 | # get query pid and camid 173 | q_pid = q_pids[q_idx] 174 | q_camid = q_camids[q_idx] 175 | 176 | # remove gallery samples that have the same pid and camid with query 177 | order = indices[q_idx] 178 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) 179 | keep = np.invert(remove) 180 | #keep += True ###### keep everything 181 | 182 | # compute cmc curve 183 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches 184 | if not np.any(orig_cmc): 185 | # this condition is true when query identity does not appear in gallery 186 | continue 187 | 188 | cmc = orig_cmc.cumsum() 189 | cmc[cmc > 1] = 1 190 | 191 | all_cmc.append(cmc[:max_rank]) 192 | num_valid_q += 1. 193 | 194 | # compute average precision 195 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision 196 | num_rel = orig_cmc.sum() 197 | tmp_cmc = orig_cmc.cumsum() 198 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] 199 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc 200 | if top_N == 0: 201 | AP = tmp_cmc.sum() / num_rel 202 | else: 203 | AP = tmp_cmc[:top_N].sum() / num_rel 204 | all_AP.append(AP) 205 | 206 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery" 207 | 208 | all_cmc = np.asarray(all_cmc).astype(np.float32) 209 | all_cmc = all_cmc.sum(0) / num_valid_q 210 | mAP = np.mean(all_AP) 211 | 212 | return all_cmc, mAP 213 | 214 | 215 | -------------------------------------------------------------------------------- /Video-Person-ReID/iotools.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | import os.path as osp 5 | import errno 6 | import json 7 | import shutil 8 | 9 | import torch 10 | 11 | 12 | def mkdir_if_missing(directory): 13 | if not osp.exists(directory): 14 | try: 15 | os.makedirs(directory) 16 | except OSError as e: 17 | if e.errno != errno.EEXIST: 18 | raise 19 | 20 | 21 | def check_isfile(path): 22 | isfile = osp.isfile(path) 23 | if not isfile: 24 | print("=> Warning: no file found at '{}' (ignored)".format(path)) 25 | return isfile 26 | 27 | 28 | def read_json(fpath): 29 | with open(fpath, 'r') as f: 30 | obj = json.load(f) 31 | return obj 32 | 33 | 34 | def write_json(obj, fpath): 35 | mkdir_if_missing(osp.dirname(fpath)) 36 | with open(fpath, 'w') as f: 37 | json.dump(obj, f, indent=4, separators=(',', ': ')) 38 | 39 | 40 | def save_checkpoint(state, is_best=False, fpath='checkpoint.pth.tar'): 41 | if len(osp.dirname(fpath)) != 0: 42 | mkdir_if_missing(osp.dirname(fpath)) 43 | torch.save(state, fpath) 44 | if is_best: 45 | shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar')) -------------------------------------------------------------------------------- /Video-Person-ReID/losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | import numpy as np 9 | 10 | """ 11 | Shorthands for loss: 12 | - CrossEntropyLabelSmooth: xent 13 | - TripletLoss: htri 14 | - CenterLoss: cent 15 | """ 16 | __all__ = ['CrossEntropyLabelSmooth', 'TripletLoss', 'CenterLoss'] 17 | 18 | class CrossEntropyLabelSmooth(nn.Module): 19 | """Cross entropy loss with label smoothing regularizer. 20 | 21 | Reference: 22 | Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. 23 | Equation: y = (1 - epsilon) * y + epsilon / K. 24 | 25 | Args: 26 | num_classes (int): number of classes. 27 | epsilon (float): weight. 28 | """ 29 | def __init__(self, num_classes, epsilon=0.1, use_gpu=True): 30 | super(CrossEntropyLabelSmooth, self).__init__() 31 | self.num_classes = num_classes 32 | self.epsilon = epsilon 33 | self.use_gpu = use_gpu 34 | self.logsoftmax = nn.LogSoftmax(dim=1) 35 | 36 | def forward(self, inputs, targets): 37 | """ 38 | Args: 39 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) 40 | targets: ground truth labels with shape (num_classes) 41 | """ 42 | log_probs = self.logsoftmax(inputs) 43 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1) 44 | if self.use_gpu: targets = targets.cuda() 45 | targets = Variable(targets, requires_grad=False) 46 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 47 | loss = (- targets * log_probs).mean(0).sum() 48 | return loss 49 | 50 | def augment_surfaces(inputs, targets, surfaces, thresh_cos=0.95, aug_ratio=0.5): 51 | n = inputs.size(0) 52 | #print(n) 53 | #print('surfaces.size') 54 | #print(surfaces.size(0)) 55 | #print(surfaces.size(1)) 56 | n, d = surfaces.size(0), surfaces.size(1) 57 | 58 | '''surfaces_np = surfaces.data.cpu().numpy() 59 | mask = targets.expand(n, n).eq(targets.expand(n, n).t()) 60 | mask_np = mask.data.cpu().numpy() 61 | import sklearn 62 | cosine_sim = sklearn.metrics.pairwise.cosine_similarity(surfaces_np,surfaces_np) 63 | cosine_sim -= mask_np.astype(np.float32)''' 64 | 65 | cosine_sim = F.cosine_similarity(surfaces.view(1, n, d).expand(n, n, d), surfaces.view(n, 1, d).expand(n, n, d), 2) 66 | '''cosine_sim = torch.pow(surfaces, 2).sum(dim=1, keepdim=True).expand(n, n) 67 | cosine_sim = cosine_sim + cosine_sim.t() 68 | cosine_sim.addmm_(1, -2, surfaces, surfaces.t()) 69 | cosine_sim = cosine_sim.clamp(min=1e-12).sqrt() 70 | cosine_sim = 1 - cosine_sim 71 | cos = nn.CosineSimilarity(dim=1, eps=1e-6) 72 | cosine_sim = cos(surfaces, surfaces)''' 73 | #print(cosine_sim.data.cpu().numpy()) 74 | mask = targets.expand(n, n).eq(targets.expand(n, n).t()) 75 | #mask_np = mask.data.cpu().numpy() 76 | #print('mask_np') 77 | #print(mask_np) 78 | cosine_sim = cosine_sim - mask.float() 79 | m = nn.Threshold(thresh_cos, -1, inplace=True) 80 | cosine_sim = m(cosine_sim) 81 | cosine_sim = cosine_sim.data.cpu().numpy() 82 | targets_np = targets.data.cpu().numpy() 83 | #print('cosine_sim.shape') 84 | #print(cosine_sim.shape) 85 | #print(cosine_sim) 86 | #print('targets_np.shape') 87 | #print(targets_np.shape) 88 | #print(targets_np) 89 | num_pids = np.unique(targets_np).shape[0] 90 | #print('num_pids') 91 | #print(num_pids) 92 | aug_pairs = [] 93 | aug_idxs = [] 94 | inputs_aug = inputs.clone() 95 | while (np.max(cosine_sim) > thresh_cos and len(aug_pairs) < num_pids*aug_ratio): 96 | imax = np.argmax(cosine_sim) 97 | imax, jmax = np.unravel_index(imax, (n, n)) 98 | i = targets_np[imax] 99 | j = targets_np[jmax] 100 | assert i != j 101 | aug_pairs.append((i,j)) 102 | aug_pairs.append((j,i)) 103 | idxi = np.where(targets_np == i)[0].tolist() 104 | idxj = np.where(targets_np == j)[0].tolist() 105 | aug_idxs.extend(idxi) 106 | aug_idxs.extend(idxj) 107 | dfij = inputs[jmax,:] - inputs[imax,:] 108 | for idx in idxi: 109 | inputs_aug[idx,:] = inputs[idx,:] + dfij 110 | targets[idx] = j 111 | cosine_sim[idx,:] = -1 112 | cosine_sim[:,idx] = -1 113 | for idx in idxj: 114 | inputs_aug[idx,:] = inputs[idx,:] - dfij 115 | targets[idx] = i 116 | cosine_sim[idx,:] = -1 117 | cosine_sim[:,idx] = -1 118 | for idx in range(n): 119 | if idx not in aug_idxs: 120 | inputs_aug[idx,:] = inputs[idx,:] 121 | #print('aug_pairs') 122 | #print(aug_pairs) 123 | #print('aug_idxs') 124 | #print(aug_idxs) 125 | #targets_np = targets.data.cpu().numpy() 126 | #print(targets_np) 127 | 128 | return inputs_aug, targets 129 | 130 | class TripletLoss(nn.Module): 131 | """Triplet loss with hard positive/negative mining. 132 | 133 | Reference: 134 | Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. 135 | 136 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. 137 | 138 | Args: 139 | margin (float): margin for triplet. 140 | """ 141 | def __init__(self, margin=0.3): 142 | super(TripletLoss, self).__init__() 143 | self.margin = margin 144 | self.ranking_loss = nn.MarginRankingLoss(margin=margin) 145 | 146 | def forward(self, inputs, targets, surfaces=None): 147 | """ 148 | Args: 149 | inputs: feature matrix with shape (batch_size, feat_dim) 150 | targets: ground truth labels with shape (num_classes) 151 | """ 152 | if surfaces is not None: 153 | inputs, targets = augment_surfaces(inputs, targets, surfaces) 154 | n = inputs.size(0) 155 | # Compute pairwise distance, replace by the official when merged 156 | dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) 157 | dist = dist + dist.t() 158 | dist.addmm_(1, -2, inputs, inputs.t()) 159 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 160 | # For each anchor, find the hardest positive and negative 161 | mask = targets.expand(n, n).eq(targets.expand(n, n).t()) 162 | dist_ap, dist_an = [], [] 163 | for i in range(n): 164 | dist_ap.append(dist[i][mask[i]].max()) 165 | dist_an.append(dist[i][mask[i] == 0].min()) 166 | dist_ap = torch.cat(dist_ap) 167 | dist_an = torch.cat(dist_an) 168 | # Compute ranking hinge loss 169 | y = dist_an.data.new() 170 | y.resize_as_(dist_an.data) 171 | y.fill_(1) 172 | y = Variable(y) 173 | loss = self.ranking_loss(dist_an, dist_ap, y) 174 | return loss 175 | 176 | def _apply_margin(x, m): 177 | if isinstance(m, float): 178 | #return (x + m).clamp(min=0) 179 | return torch.mean((x + m).clamp(min=0)) 180 | elif m.lower() == "soft": 181 | return F.softplus(x) 182 | elif m.lower() == "none": 183 | return x 184 | else: 185 | raise NotImplementedError("The margin %s is not implemented in BatchHard!" % m) 186 | 187 | def batch_soft(cdist, pids, margin, T=1.0): 188 | """Calculates the batch soft. 189 | Instead of picking the hardest example through argmax or argmin, 190 | a softmax (softmin) is used to sample and use less difficult examples as well. 191 | Args: 192 | cdist (2D Tensor): All-to-all distance matrix, sized (B,B). 193 | pids (1D tensor): PIDs (classes) of the identities, sized (B,). 194 | margin: The margin to use, can be 'soft', 'none', or a number. 195 | T (float): The temperature of the softmax operation. 196 | """ 197 | # mask where all positivies are set to true 198 | mask_pos = pids[None, :] == pids[:, None] 199 | mask_neg = 1 - mask_pos.data 200 | 201 | # only one copy 202 | cdist_max = cdist.clone() 203 | cdist_max[mask_neg] = -float('inf') 204 | cdist_min = cdist.clone() 205 | cdist_min[mask_pos] = float('inf') 206 | 207 | # NOTE: We could even take multiple ones by increasing num_samples, 208 | # the following `gather` call does the right thing! 209 | idx_pos = torch.multinomial(F.softmax(cdist_max/T, dim=1), num_samples=1) 210 | idx_neg = torch.multinomial(F.softmin(cdist_min/T, dim=1), num_samples=1) 211 | positive = cdist.gather(dim=1, index=idx_pos)[:,0] # Drop the extra (samples) dim 212 | negative = cdist.gather(dim=1, index=idx_neg)[:,0] 213 | 214 | return _apply_margin(positive - negative, margin) 215 | 216 | class BatchSoft(nn.Module): 217 | """BatchSoft implementation using softmax. 218 | 219 | Also by Tristani as Adaptivei Weighted Triplet Loss. 220 | """ 221 | 222 | def __init__(self, m, T=1.0, **kwargs): 223 | """ 224 | Args: 225 | m: margin 226 | T: Softmax temperature 227 | """ 228 | super(BatchSoft, self).__init__() 229 | self.name = "BatchSoft(m={}, T={})".format(m, T) 230 | self.m = m 231 | self.T = T 232 | 233 | def forward(self, inputs, targets): 234 | """ 235 | Args: 236 | inputs: feature matrix with shape (batch_size, feat_dim) 237 | targets: ground truth labels with shape (num_classes) 238 | """ 239 | n = inputs.size(0) 240 | # Compute pairwise distance, replace by the official when merged 241 | dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) 242 | dist = dist + dist.t() 243 | dist.addmm_(1, -2, inputs, inputs.t()) 244 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 245 | 246 | return batch_soft(dist, targets, self.m, self.T) 247 | 248 | dist_ap, dist_an = [], [] 249 | for i in range(n): 250 | dist_ap.append(dist[i][mask[i]].max()) 251 | dist_an.append(dist[i][mask[i] == 0].min()) 252 | dist_ap = torch.cat(dist_ap) 253 | dist_an = torch.cat(dist_an) 254 | # Compute ranking hinge loss 255 | y = dist_an.data.new() 256 | y.resize_as_(dist_an.data) 257 | y.fill_(1) 258 | y = Variable(y) 259 | loss = self.ranking_loss(dist_an, dist_ap, y) 260 | return loss 261 | 262 | #def forward(self, dist, pids): 263 | # return batch_soft(dist, pids, self.m, self.T) 264 | 265 | class CenterLoss(nn.Module): 266 | """Center loss. 267 | 268 | Reference: 269 | Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. 270 | 271 | Args: 272 | num_classes (int): number of classes. 273 | feat_dim (int): feature dimension. 274 | """ 275 | def __init__(self, num_classes=10, feat_dim=2, use_gpu=True): 276 | super(CenterLoss, self).__init__() 277 | self.num_classes = num_classes 278 | self.feat_dim = feat_dim 279 | self.use_gpu = use_gpu 280 | 281 | if self.use_gpu: 282 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda()) 283 | else: 284 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)) 285 | 286 | def forward(self, x, labels): 287 | """ 288 | Args: 289 | x: feature matrix with shape (batch_size, feat_dim). 290 | labels: ground truth labels with shape (num_classes). 291 | """ 292 | batch_size = x.size(0) 293 | distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ 294 | torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t() 295 | distmat.addmm_(1, -2, x, self.centers.t()) 296 | 297 | classes = torch.arange(self.num_classes).long() 298 | if self.use_gpu: classes = classes.cuda() 299 | classes = Variable(classes) 300 | labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) 301 | mask = labels.eq(classes.expand(batch_size, self.num_classes)) 302 | 303 | dist = [] 304 | for i in range(batch_size): 305 | value = distmat[i][mask[i]] 306 | value = value.clamp(min=1e-12, max=1e+12) # for numerical stability 307 | dist.append(value) 308 | dist = torch.cat(dist) 309 | loss = dist.mean() 310 | 311 | return loss 312 | 313 | if __name__ == '__main__': 314 | pass -------------------------------------------------------------------------------- /Video-Person-ReID/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .ResNet import * 4 | 5 | __factory = { 6 | 'resnet50tp': ResNet50TP, 7 | 'resnet50ta': ResNet50TA, 8 | 'myresnet50ta': myResNet50TA, 9 | 'resnet50rnn': ResNet50RNN, 10 | 'resnet50tp_ori': ResNet50TP_ORIENTATION, 11 | 'resnet50tp_ori_iou': ResNet50TP_ORIENTATION_IOU, 12 | 'resnet50ta_ori': ResNet50TA_ORIENTATION, 13 | 'resnet50ta_ori_iou': ResNet50TA_ORIENTATION_IOU, 14 | 'resnet50ta_surface': ResNet50TA_SURFACE, 15 | 'resnet50ta_surface_nu': ResNet50TA_SURFACE_NU, 16 | 'resnet50ta_surface_nu4': ResNet50TA_SURFACE_NU4, 17 | 'resnet50ta_surface_nu2': ResNet50TA_SURFACE_NU2, 18 | 'resnet50ta_surface_nu2f1': ResNet50TA_SURFACE_NU2F1, 19 | 'resnet50ta_surface_n1': ResNet50TA_SURFACE_N1, 20 | 'resnet50ta_surface_n2': ResNet50TA_SURFACE_N2, 21 | } 22 | 23 | 24 | def get_names(): 25 | return __factory.keys() 26 | 27 | 28 | def init_model(name, *args, **kwargs): 29 | if name not in __factory.keys(): 30 | raise KeyError("Unknown model: {}".format(name)) 31 | return __factory[name](*args, **kwargs) 32 | -------------------------------------------------------------------------------- /Video-Person-ReID/models/resnet3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import math 6 | from functools import partial 7 | 8 | __all__ = [ 9 | 'ResNet', 'resnet10', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 10 | 'resnet152', 'resnet200' 11 | ] 12 | 13 | 14 | def conv3x3x3(in_planes, out_planes, stride=1): 15 | # 3x3x3 convolution with padding 16 | return nn.Conv3d( 17 | in_planes, 18 | out_planes, 19 | kernel_size=3, 20 | stride=stride, 21 | padding=1, 22 | bias=False) 23 | 24 | 25 | def downsample_basic_block(x, planes, stride): 26 | out = F.avg_pool3d(x, kernel_size=1, stride=stride) 27 | zero_pads = torch.Tensor( 28 | out.size(0), planes - out.size(1), out.size(2), out.size(3), 29 | out.size(4)).zero_() 30 | if isinstance(out.data, torch.cuda.FloatTensor): 31 | zero_pads = zero_pads.cuda() 32 | 33 | out = Variable(torch.cat([out.data, zero_pads], dim=1)) 34 | 35 | return out 36 | 37 | 38 | class BasicBlock(nn.Module): 39 | expansion = 1 40 | 41 | def __init__(self, inplanes, planes, stride=1, downsample=None): 42 | super(BasicBlock, self).__init__() 43 | self.conv1 = conv3x3x3(inplanes, planes, stride) 44 | self.bn1 = nn.BatchNorm3d(planes) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.conv2 = conv3x3x3(planes, planes) 47 | self.bn2 = nn.BatchNorm3d(planes) 48 | self.downsample = downsample 49 | self.stride = stride 50 | 51 | def forward(self, x): 52 | residual = x 53 | 54 | out = self.conv1(x) 55 | out = self.bn1(out) 56 | out = self.relu(out) 57 | 58 | out = self.conv2(out) 59 | out = self.bn2(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class Bottleneck(nn.Module): 71 | expansion = 4 72 | 73 | def __init__(self, inplanes, planes, stride=1, downsample=None): 74 | super(Bottleneck, self).__init__() 75 | self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False) 76 | self.bn1 = nn.BatchNorm3d(planes) 77 | self.conv2 = nn.Conv3d( 78 | planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 79 | self.bn2 = nn.BatchNorm3d(planes) 80 | self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False) 81 | self.bn3 = nn.BatchNorm3d(planes * 4) 82 | self.relu = nn.ReLU(inplace=True) 83 | self.downsample = downsample 84 | self.stride = stride 85 | 86 | def forward(self, x): 87 | residual = x 88 | 89 | out = self.conv1(x) 90 | out = self.bn1(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv2(out) 94 | out = self.bn2(out) 95 | out = self.relu(out) 96 | 97 | out = self.conv3(out) 98 | out = self.bn3(out) 99 | 100 | if self.downsample is not None: 101 | residual = self.downsample(x) 102 | 103 | out += residual 104 | out = self.relu(out) 105 | 106 | return out 107 | 108 | 109 | class ResNet(nn.Module): 110 | 111 | def __init__(self, 112 | block, 113 | layers, 114 | sample_height, 115 | sample_width, 116 | sample_duration, 117 | shortcut_type='B', 118 | num_classes=400): 119 | self.inplanes = 64 120 | super(ResNet, self).__init__() 121 | self.conv1 = nn.Conv3d( 122 | 3, 123 | 64, 124 | kernel_size=7, 125 | stride=(1, 2, 2), 126 | padding=(3, 3, 3), 127 | bias=False) 128 | self.bn1 = nn.BatchNorm3d(64) 129 | self.relu = nn.ReLU(inplace=True) 130 | self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) 131 | self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) 132 | self.layer2 = self._make_layer( 133 | block, 128, layers[1], shortcut_type, stride=2) 134 | self.layer3 = self._make_layer( 135 | block, 256, layers[2], shortcut_type, stride=2) 136 | self.layer4 = self._make_layer( 137 | block, 512, layers[3], shortcut_type, stride=2) 138 | last_duration = int(math.ceil(sample_duration / 16.0)) 139 | last_height = int(math.ceil(sample_height / 32.0)) 140 | last_width = int(math.ceil(sample_width / 32.0)) 141 | self.avgpool = nn.AvgPool3d( 142 | (last_duration, last_height, last_width), stride=1) 143 | self.fc = nn.Linear(512 * block.expansion, num_classes) 144 | 145 | for m in self.modules(): 146 | if isinstance(m, nn.Conv3d): 147 | m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out') 148 | elif isinstance(m, nn.BatchNorm3d): 149 | m.weight.data.fill_(1) 150 | m.bias.data.zero_() 151 | 152 | def _make_layer(self, block, planes, blocks, shortcut_type, stride=1): 153 | downsample = None 154 | if stride != 1 or self.inplanes != planes * block.expansion: 155 | if shortcut_type == 'A': 156 | downsample = partial( 157 | downsample_basic_block, 158 | planes=planes * block.expansion, 159 | stride=stride) 160 | else: 161 | downsample = nn.Sequential( 162 | nn.Conv3d( 163 | self.inplanes, 164 | planes * block.expansion, 165 | kernel_size=1, 166 | stride=stride, 167 | bias=False), nn.BatchNorm3d(planes * block.expansion)) 168 | 169 | layers = [] 170 | layers.append(block(self.inplanes, planes, stride, downsample)) 171 | self.inplanes = planes * block.expansion 172 | for i in range(1, blocks): 173 | layers.append(block(self.inplanes, planes)) 174 | 175 | return nn.Sequential(*layers) 176 | 177 | def load_matched_state_dict(self, state_dict): 178 | 179 | own_state = self.state_dict() 180 | for name, param in state_dict.items(): 181 | if name not in own_state: 182 | continue 183 | #if isinstance(param, Parameter): 184 | # backwards compatibility for serialized parameters 185 | param = param.data 186 | print("loading "+name) 187 | own_state[name].copy_(param) 188 | 189 | def forward(self, x): 190 | # default size is (b, s, c, w, h), s for seq_len, c for channel 191 | # convert for 3d cnn, (b, c, s, w, h) 192 | x=x.permute(0,2,1,3,4) 193 | x = self.conv1(x) 194 | x = self.bn1(x) 195 | x = self.relu(x) 196 | x = self.maxpool(x) 197 | 198 | x = self.layer1(x) 199 | x = self.layer2(x) 200 | x = self.layer3(x) 201 | x = self.layer4(x) 202 | x = self.avgpool(x) 203 | x = x.view(x.size(0), -1) 204 | y = self.fc(x) 205 | 206 | return y, x 207 | 208 | 209 | def get_fine_tuning_parameters(model, ft_begin_index): 210 | if ft_begin_index == 0: 211 | return model.parameters() 212 | 213 | ft_module_names = [] 214 | for i in range(ft_begin_index, 5): 215 | ft_module_names.append('layer{}'.format(i)) 216 | ft_module_names.append('fc') 217 | 218 | parameters = [] 219 | for k, v in model.named_parameters(): 220 | for ft_module in ft_module_names: 221 | if ft_module in k: 222 | parameters.append({'params': v}) 223 | break 224 | else: 225 | parameters.append({'params': v, 'lr': 0.0}) 226 | 227 | return parameters 228 | 229 | 230 | def resnet10(**kwargs): 231 | """Constructs a ResNet-18 model. 232 | """ 233 | model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs) 234 | return model 235 | 236 | 237 | def resnet18(**kwargs): 238 | """Constructs a ResNet-18 model. 239 | """ 240 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 241 | return model 242 | 243 | 244 | def resnet34(**kwargs): 245 | """Constructs a ResNet-34 model. 246 | """ 247 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 248 | return model 249 | 250 | 251 | def resnet50(**kwargs): 252 | """Constructs a ResNet-50 model. 253 | """ 254 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 255 | return model 256 | 257 | 258 | def resnet101(**kwargs): 259 | """Constructs a ResNet-101 model. 260 | """ 261 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 262 | return model 263 | 264 | 265 | def resnet152(**kwargs): 266 | """Constructs a ResNet-101 model. 267 | """ 268 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 269 | return model 270 | 271 | 272 | def resnet200(**kwargs): 273 | """Constructs a ResNet-101 model. 274 | """ 275 | model = ResNet(Bottleneck, [3, 24, 36, 3], **kwargs) 276 | return model 277 | -------------------------------------------------------------------------------- /Video-Person-ReID/reidtools.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | import os 6 | import os.path as osp 7 | import shutil 8 | 9 | from iotools import mkdir_if_missing 10 | 11 | 12 | def visualize_ranked_results(distmat, dataset, save_dir='log/ranked_results', topk=20): 13 | """ 14 | Visualize ranked results 15 | 16 | Support both imgreid and vidreid 17 | 18 | Args: 19 | - distmat: distance matrix of shape (num_query, num_gallery). 20 | - dataset: a 2-tuple containing (query, gallery), each contains a list of (img_path, pid, camid); 21 | for imgreid, img_path is a string, while for vidreid, img_path is a tuple containing 22 | a sequence of strings. 23 | - save_dir: directory to save output images. 24 | - topk: int, denoting top-k images in the rank list to be visualized. 25 | """ 26 | num_q, num_g = distmat.shape 27 | 28 | print("Visualizing top-{} ranks".format(topk)) 29 | print("# query: {}\n# gallery {}".format(num_q, num_g)) 30 | print("Saving images to '{}'".format(save_dir)) 31 | 32 | query, gallery = dataset 33 | assert num_q == len(query) 34 | assert num_g == len(gallery) 35 | 36 | indices = np.argsort(distmat, axis=1) 37 | mkdir_if_missing(save_dir) 38 | 39 | def _cp_img_to(src, dst, rank, prefix): 40 | """ 41 | - src: image path or tuple (for vidreid) 42 | - dst: target directory 43 | - rank: int, denoting ranked position, starting from 1 44 | - prefix: string 45 | """ 46 | if isinstance(src, tuple) or isinstance(src, list): 47 | dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3)) 48 | mkdir_if_missing(dst) 49 | for img_path in src: 50 | shutil.copy(img_path, dst) 51 | else: 52 | dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3) + '_name_' + osp.basename(src)) 53 | shutil.copy(src, dst) 54 | 55 | for q_idx in range(num_q): 56 | #qimg_path, qpid, qcamid = query[q_idx] 57 | qimg_path, qpid, qcamid, qangle = query[q_idx] 58 | if isinstance(qimg_path, tuple) or isinstance(qimg_path, list): 59 | qdir = osp.join(save_dir, osp.basename(qimg_path[0])) 60 | else: 61 | qdir = osp.join(save_dir, osp.basename(qimg_path)) 62 | mkdir_if_missing(qdir) 63 | _cp_img_to(qimg_path, qdir, rank=0, prefix='query') 64 | 65 | rank_idx = 1 66 | for g_idx in indices[q_idx, :]: 67 | #gimg_path, gpid, gcamid = gallery[g_idx] 68 | gimg_path, gpid, gcamid, gangle = gallery[g_idx] 69 | invalid = (qpid == gpid) & (qcamid == gcamid) 70 | if not invalid: 71 | _cp_img_to(gimg_path, qdir, rank=rank_idx, prefix='gallery') 72 | rank_idx += 1 73 | if rank_idx > topk: 74 | break 75 | 76 | print("Done") 77 | -------------------------------------------------------------------------------- /Video-Person-ReID/samplers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from collections import defaultdict 3 | import numpy as np 4 | 5 | import torch 6 | 7 | #class RandomIdentitySampler(object): 8 | class RandomIdentitySampler(torch.utils.data.sampler.Sampler): 9 | """ 10 | Randomly sample N identities, then for each identity, 11 | randomly sample K instances, therefore batch size is N*K. 12 | 13 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py. 14 | 15 | Args: 16 | data_source (Dataset): dataset to sample from. 17 | num_instances (int): number of instances per identity. 18 | """ 19 | def __init__(self, data_source, num_instances=4): 20 | self.data_source = data_source 21 | self.num_instances = num_instances 22 | self.index_dic = defaultdict(list) 23 | #import pdb; pdb.set_trace() 24 | for index, (_, pid, _) in enumerate(data_source): 25 | #for index, (_, pid, _,_) in enumerate(data_source): 26 | self.index_dic[pid].append(index) 27 | self.pids = list(self.index_dic.keys()) 28 | self.num_identities = len(self.pids) 29 | 30 | def __iter__(self): 31 | indices = torch.randperm(self.num_identities) 32 | ret = [] 33 | for i in indices: 34 | pid = self.pids[i] 35 | t = self.index_dic[pid] 36 | replace = False if len(t) >= self.num_instances else True 37 | t = np.random.choice(t, size=self.num_instances, replace=replace) 38 | ret.extend(t) 39 | return iter(ret) 40 | 41 | def __len__(self): 42 | return self.num_identities * self.num_instances 43 | 44 | class RandomIdentitySamplerOrientation(torch.utils.data.sampler.Sampler): 45 | """ 46 | Randomly sample N identities, then for each identity, 47 | randomly sample K instances, therefore batch size is N*K. 48 | 49 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py. 50 | 51 | Args: 52 | data_source (Dataset): dataset to sample from. 53 | num_instances (int): number of instances per identity. 54 | """ 55 | def __init__(self, data_source, num_instances=4): 56 | self.data_source = data_source 57 | self.num_instances = num_instances 58 | self.index_dic = defaultdict(list) 59 | #for index, (_, pid, _) in enumerate(data_source): 60 | #for index, (_, pid, _,_) in enumerate(data_source): 61 | for index, (_, pid, _,_,_) in enumerate(data_source): 62 | self.index_dic[pid].append(index) 63 | self.pids = list(self.index_dic.keys()) 64 | self.num_identities = len(self.pids) 65 | 66 | def __iter__(self): 67 | indices = torch.randperm(self.num_identities) 68 | ret = [] 69 | for i in indices: 70 | pid = self.pids[i] 71 | t = self.index_dic[pid] 72 | replace = False if len(t) >= self.num_instances else True 73 | t = np.random.choice(t, size=self.num_instances, replace=replace) 74 | ret.extend(t) 75 | return iter(ret) 76 | 77 | def __len__(self): 78 | return self.num_identities * self.num_instances 79 | class RandomIdentitySamplerOrientationIOU(torch.utils.data.sampler.Sampler): 80 | """ 81 | Randomly sample N identities, then for each identity, 82 | randomly sample K instances, therefore batch size is N*K. 83 | 84 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py. 85 | 86 | Args: 87 | data_source (Dataset): dataset to sample from. 88 | num_instances (int): number of instances per identity. 89 | """ 90 | def __init__(self, data_source, num_instances=4): 91 | self.data_source = data_source 92 | self.num_instances = num_instances 93 | self.index_dic = defaultdict(list) 94 | #for index, (_, pid, _) in enumerate(data_source): 95 | for index, (_, pid, _,_,_) in enumerate(data_source): 96 | self.index_dic[pid].append(index) 97 | self.pids = list(self.index_dic.keys()) 98 | self.num_identities = len(self.pids) 99 | 100 | def __iter__(self): 101 | indices = torch.randperm(self.num_identities) 102 | ret = [] 103 | for i in indices: 104 | pid = self.pids[i] 105 | t = self.index_dic[pid] 106 | replace = False if len(t) >= self.num_instances else True 107 | t = np.random.choice(t, size=self.num_instances, replace=replace) 108 | ret.extend(t) 109 | return iter(ret) 110 | 111 | def __len__(self): 112 | return self.num_identities * self.num_instances 113 | -------------------------------------------------------------------------------- /Video-Person-ReID/transforms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from torchvision.transforms import * 4 | from PIL import Image 5 | import random 6 | import numpy as np 7 | 8 | class Random2DTranslation(object): 9 | """ 10 | With a probability, first increase image size to (1 + 1/8), and then perform random crop. 11 | 12 | Args: 13 | height (int): target height. 14 | width (int): target width. 15 | p (float): probability of performing this transformation. Default: 0.5. 16 | """ 17 | def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR): 18 | self.height = height 19 | self.width = width 20 | self.p = p 21 | self.interpolation = interpolation 22 | 23 | def __call__(self, img): 24 | """ 25 | Args: 26 | img (PIL Image): Image to be cropped. 27 | 28 | Returns: 29 | PIL Image: Cropped image. 30 | """ 31 | if random.random() < self.p: 32 | return img.resize((self.width, self.height), self.interpolation) 33 | new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125)) 34 | resized_img = img.resize((new_width, new_height), self.interpolation) 35 | x_maxrange = new_width - self.width 36 | y_maxrange = new_height - self.height 37 | x1 = int(round(random.uniform(0, x_maxrange))) 38 | y1 = int(round(random.uniform(0, y_maxrange))) 39 | croped_img = resized_img.crop((x1, y1, x1 + self.width, y1 + self.height)) 40 | return croped_img 41 | 42 | if __name__ == '__main__': 43 | pass 44 | -------------------------------------------------------------------------------- /Video-Person-ReID/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import sys 4 | import errno 5 | import shutil 6 | import json 7 | import os.path as osp 8 | 9 | import torch 10 | 11 | def mkdir_if_missing(directory): 12 | if not osp.exists(directory): 13 | try: 14 | os.makedirs(directory) 15 | except OSError as e: 16 | if e.errno != errno.EEXIST: 17 | raise 18 | 19 | class AverageMeter(object): 20 | """Computes and stores the average and current value. 21 | 22 | Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 23 | """ 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.val = 0 29 | self.avg = 0 30 | self.sum = 0 31 | self.count = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.count += n 37 | self.avg = self.sum / self.count 38 | 39 | def save_checkpoint(state, is_best, fpath='checkpoint.pth.tar'): 40 | mkdir_if_missing(osp.dirname(fpath)) 41 | torch.save(state, fpath) 42 | if is_best: 43 | shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar')) 44 | 45 | class Logger(object): 46 | """ 47 | Write console output to external text file. 48 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. 49 | """ 50 | def __init__(self, fpath=None): 51 | self.console = sys.stdout 52 | self.file = None 53 | if fpath is not None: 54 | mkdir_if_missing(os.path.dirname(fpath)) 55 | self.file = open(fpath, 'w') 56 | 57 | def __del__(self): 58 | self.close() 59 | 60 | def __enter__(self): 61 | pass 62 | 63 | def __exit__(self, *args): 64 | self.close() 65 | 66 | def write(self, msg): 67 | self.console.write(msg) 68 | if self.file is not None: 69 | self.file.write(msg) 70 | 71 | def flush(self): 72 | self.console.flush() 73 | if self.file is not None: 74 | self.file.flush() 75 | os.fsync(self.file.fileno()) 76 | 77 | def close(self): 78 | self.console.close() 79 | if self.file is not None: 80 | self.file.close() 81 | 82 | def read_json(fpath): 83 | with open(fpath, 'r') as f: 84 | obj = json.load(f) 85 | return obj 86 | 87 | def write_json(obj, fpath): 88 | mkdir_if_missing(osp.dirname(fpath)) 89 | with open(fpath, 'w') as f: 90 | json.dump(obj, f, indent=4, separators=(',', ': ')) 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /Video-Person-ReID/video2img/crop_img.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import sys 4 | import numpy as np 5 | import os.path as osp 6 | 7 | FILE_LEN = 10 8 | 9 | lev1s = ["./S02/", "./S05/"] 10 | 11 | OUT_DIR = "./track1_test_img/" 12 | 13 | 14 | for lev1 in lev1s: 15 | lev2s = os.listdir(lev1) 16 | for lev2 in lev2s: 17 | camera_path = osp.join(lev1, lev2) 18 | path_to_vid = osp.join(camera_path, "vdo.avi") 19 | 20 | vid = cv2.VideoCapture(path_to_vid) 21 | 22 | suc = True 23 | img = None 24 | 25 | count = 1 26 | 27 | out_path = osp.join(OUT_DIR, lev2) 28 | if not osp.isdir(out_path): 29 | os.makedirs(out_path) 30 | 31 | while suc: 32 | suc, img = vid.read() 33 | if img is None: 34 | break 35 | 36 | f_name = osp.join(out_path, str(count).zfill(10) + ".jpg") 37 | 38 | cv2.imwrite(f_name, img) 39 | count += 1 40 | -------------------------------------------------------------------------------- /Video-Person-ReID/video2img/crop_img_big.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import os.path as osp 4 | 5 | IMG_DIR = "./track1_test_img/" 6 | OUT_DIR = "./track1_sct_img_test_big/" 7 | 8 | for res_f in os.listdir("./txt_GPS_new/"): 9 | camid = res_f.split(".")[0] 10 | cam_img_path = osp.join(IMG_DIR, camid) 11 | out_cam_path = osp.join(OUT_DIR, camid) 12 | 13 | if not osp.isdir(out_cam_path): 14 | os.makedirs(out_cam_path) 15 | 16 | for line in open(osp.join("./txt_GPS_new/", res_f)).readlines(): 17 | tmp = line.strip("\n").split(",") 18 | f_id = tmp[0] 19 | obj_id = tmp[1] 20 | 21 | img_f = osp.join(cam_img_path, f_id.zfill(10) + ".jpg") 22 | img = cv2.imread(img_f) 23 | 24 | height, width = img.shape[:2] 25 | 26 | 27 | left = int(tmp[2])-20 28 | top = int(tmp[3])-20 29 | w = int(tmp[4])+40 30 | h = int(tmp[5])+40 31 | 32 | right = left + w 33 | bot = top + h 34 | 35 | if left<0: 36 | left = 0 37 | if top<0: 38 | top=0 39 | 40 | if right>width: 41 | right = width 42 | if bot>height: 43 | bot=height 44 | 45 | 46 | 47 | crop_img = img[top: bot, left:right] 48 | 49 | out_obj_path = osp.join(out_cam_path, obj_id) 50 | if not osp.isdir(out_obj_path): 51 | os.makedirs(out_obj_path) 52 | 53 | out_path = osp.join(out_obj_path, f_id.zfill(10) + ".jpg") 54 | cv2.imwrite(out_path, crop_img) 55 | 56 | -------------------------------------------------------------------------------- /Video-Person-ReID/video_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os 3 | from PIL import Image 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | import random 9 | 10 | from math import exp, atan2 11 | #import cv2 12 | 13 | def read_image(img_path): 14 | """Keep reading image until succeed. 15 | This can avoid IOError incurred by heavy IO process.""" 16 | got_img = False 17 | while not got_img: 18 | try: 19 | img = Image.open(img_path).convert('RGB') 20 | got_img = True 21 | except IOError: 22 | print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path)) 23 | pass 24 | #print(img_path) 25 | return img 26 | 27 | def read_metadata(img_path, metadata_model, verbose=True): 28 | """Read sruface from file""" 29 | if metadata_model[:2] == 'v1': 30 | metadata_dim = 26 # 6, 12, 8 for type, brand, color 31 | elif metadata_model[:2] == 'v2': 32 | metadata_dim = 46 # 7, 30, 9 for type, brand, color 33 | else: # the oldest version 34 | metadata_dim = 26 # 6, 12, 8 for type, brand, color 35 | metadata_path = img_path.replace('image', 'metadata_%s'%metadata_model).replace('.jpg', '.txt') 36 | if os.path.isfile(metadata_path): 37 | #print(metadata_path) 38 | with open(metadata_path, 'r') as f: 39 | metadata = [] 40 | for line in f: 41 | #print(line) 42 | if ',' in line: 43 | line = line.strip().replace(' ', '').split(',') 44 | line = [s for s in line if len(s) > 0] 45 | else: 46 | line = line.strip().split(' ') 47 | line = [s for s in line if len(s) > 0] 48 | #print(line) 49 | metadata.append(np.array(line, dtype=np.float32)) 50 | metadata = np.concatenate(metadata) ### concat all probability vector 51 | assert metadata.shape[0] == metadata_dim 52 | return metadata 53 | else: 54 | if verbose: 55 | print('warning: metadata not exist: ' + str(metadata_path)) 56 | return np.zeros(metadata_dim, dtype=np.float32) ### if no metadata 57 | 58 | def PolyArea(pts): 59 | return -0.5*(np.dot(pts[:,0],np.roll(pts[:,1],1))-np.dot(pts[:,1],np.roll(pts[:,0],1))) 60 | 61 | def keypointsArea(keypoints, ids): 62 | pts = np.array([(keypoints[i][0], keypoints[i][1]) for i in ids]) 63 | #return cv2.contourArea(pts, oriented=True) 64 | return PolyArea(pts) 65 | 66 | def keypointsSymmetry(keypoints): 67 | area0 = abs(keypointsArea(keypoints, [i for i in range(2, 18)])) + 1 68 | area1 = abs(keypointsArea(keypoints, [i for i in range(2+18, 18+18)])) + 1 69 | ratio = area1 / area0 if area1 < area0 else area0 / area1 70 | #print('area0: %f, area1: %f' % (area0, area1)) 71 | return ratio 72 | 73 | def keypointsParallel(keypoints): 74 | NUM_PAIRS = 18 75 | vecs = np.zeros((NUM_PAIRS, 2), dtype=np.float32) 76 | for i in range(NUM_PAIRS): 77 | vecs[i][0] = keypoints[i+18][0] - keypoints[i][0] 78 | vecs[i][1] = keypoints[i+18][1] - keypoints[i][1] 79 | vec_mean = np.mean(vecs, axis=0) 80 | vec_diff = np.subtract(vecs, vec_mean) 81 | vec_err = np.linalg.norm(vec_diff, axis=1) / np.linalg.norm(vec_mean) 82 | vec_errmean = np.mean(vec_err) 83 | return exp(-vec_errmean) 84 | 85 | def keypointsConfidence(keypoints): 86 | parallel_conf = keypointsParallel(keypoints) 87 | symmetry_conf = keypointsSymmetry(keypoints) 88 | keypoint_conf = pow(parallel_conf**2 + symmetry_conf**2, 0.5) / pow(2, 0.5) 89 | return keypoint_conf 90 | 91 | def keypointsSurface(keypoints): 92 | surfaces = [] 93 | idss = [] 94 | idss.append([i for i in range(2, 18)]) 95 | idss.append([i for i in range(20, 36)][::-1]) 96 | for i in range(16): 97 | idss.append([i%16+2, i%16+2+18, (i+1)%16+2+18, (i+1)%16+2]) 98 | for ids in idss: 99 | surfaces.append(keypointsArea(keypoints, ids)) 100 | surfaces = np.array(surfaces, dtype=np.float32) 101 | surfaces /= np.linalg.norm(surfaces) 102 | #surfaces *= 999 103 | #print(surfaces) 104 | return surfaces 105 | 106 | def surfacesAngle(surfaces): 107 | x = surfaces[0] - surfaces[1] 108 | y = surfaces[16] + surfaces[15] + surfaces[14] - surfaces[10] - surfaces[11] - surfaces[12] 109 | return atan2(y, x) # between -pi and pi 110 | 111 | def read_keypoint(img_path): 112 | """Read keypoint from file""" 113 | keypoint_path = img_path.replace('image', 'keypoint').replace('.jpg', '.txt') 114 | with open(keypoint_path, 'r') as f: 115 | keypoints = np.loadtxt(f, dtype=np.float32).flatten() 116 | keypoints = np.reshape(keypoints, (-1,3)) 117 | #print(keypoints) 118 | return keypoints 119 | 120 | class VideoDataset(Dataset): 121 | """Video Person ReID Dataset. 122 | Note batch data has shape (batch, seq_len, channel, height, width). 123 | """ 124 | sample_methods = ['evenly', 'random', 'all'] 125 | 126 | def __init__(self, dataset, metadata_model, seq_len=15, sample='evenly', transform=None): 127 | self.dataset = dataset 128 | self.metadata_model = metadata_model 129 | self.seq_len = seq_len 130 | self.sample = sample 131 | self.transform = transform 132 | 133 | def __len__(self): 134 | return len(self.dataset) 135 | 136 | def __getitem__(self, index): 137 | img_paths, pid, camid = self.dataset[index] 138 | num = len(img_paths) 139 | if self.sample == 'random': 140 | """ 141 | Randomly sample seq_len consecutive frames from num frames, 142 | if num is smaller than seq_len, then replicate items. 143 | This sampling strategy is used in training phase. 144 | """ 145 | frame_indices = range(num) 146 | rand_end = max(0, len(frame_indices) - self.seq_len - 1) 147 | begin_index = random.randint(0, rand_end) 148 | end_index = min(begin_index + self.seq_len, len(frame_indices)) 149 | 150 | indices = frame_indices[begin_index:end_index] 151 | 152 | for index in indices: 153 | if len(indices) >= self.seq_len: 154 | break 155 | indices.append(index) 156 | indices = np.array(indices) 157 | imgs = [] 158 | metadatas = [] 159 | for index in indices: 160 | index = int(index) 161 | img_path = img_paths[index] 162 | img = read_image(img_path) 163 | if self.transform is not None: 164 | img = self.transform(img) 165 | img = img.unsqueeze(0) 166 | imgs.append(img) 167 | metadata = read_metadata(img_path, self.metadata_model, False) 168 | metadata = torch.from_numpy(metadata) 169 | metadata = metadata.unsqueeze(0) 170 | metadatas.append(metadata) 171 | imgs = torch.cat(imgs, dim=0) 172 | # imgs=imgs.permute(1,0,2,3) 173 | metadatas = torch.cat(metadatas, dim=0) 174 | return imgs, pid, camid, metadatas 175 | 176 | elif self.sample == 'dense': 177 | """ 178 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1. 179 | This sampling strategy is used in test phase. 180 | """ 181 | cur_index = 0 182 | # frame_indices = range(num) 183 | frame_indices = list(range(num)) 184 | indices_list = [] 185 | while num - cur_index > self.seq_len: 186 | indices_list.append(frame_indices[cur_index:cur_index + self.seq_len]) 187 | cur_index += self.seq_len 188 | last_seq = frame_indices[cur_index:] 189 | for index in last_seq: 190 | if len(last_seq) >= self.seq_len: 191 | break 192 | last_seq.append(index) 193 | indices_list.append(last_seq) 194 | imgs_list = [] 195 | metadatas_list = [] 196 | for indices in indices_list: 197 | imgs = [] 198 | metadatas = [] 199 | for index in indices: 200 | index = int(index) 201 | img_path = img_paths[index] 202 | img = read_image(img_path) 203 | if self.transform is not None: 204 | img = self.transform(img) 205 | img = img.unsqueeze(0) 206 | imgs.append(img) 207 | metadata = read_metadata(img_path, self.metadata_model, False) #################### 208 | metadata = torch.from_numpy(metadata) 209 | metadata = metadata.unsqueeze(0) 210 | metadatas.append(metadata) 211 | imgs = torch.cat(imgs, dim=0) 212 | # imgs=imgs.permute(1,0,2,3) 213 | imgs_list.append(imgs) 214 | metadatas = torch.cat(metadatas, dim=0) 215 | metadatas_list.append(metadatas) 216 | imgs_array = torch.stack(imgs_list) 217 | metadatas_array = torch.stack(metadatas_list) 218 | 219 | return imgs_array, pid, camid, metadatas_array, img_paths 220 | 221 | else: 222 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods)) 223 | 224 | 225 | class VideoDataset_SURFACE(Dataset): 226 | """Video Person ReID Dataset. 227 | Note batch data has shape (batch, seq_len, channel, height, width). 228 | """ 229 | sample_methods = ['evenly', 'random', 'all'] 230 | 231 | def __init__(self, dataset, metadata_model, seq_len=15, sample='evenly', transform=None): 232 | self.dataset = dataset 233 | self.metadata_model = metadata_model 234 | self.seq_len = seq_len 235 | self.sample = sample 236 | self.transform = transform 237 | 238 | def __len__(self): 239 | return len(self.dataset) 240 | 241 | def __getitem__(self, index): 242 | img_paths, pid, camid = self.dataset[index] 243 | num = len(img_paths) 244 | keypoint_conf_thresh = 0.6#999 245 | if self.sample == 'random': 246 | """ 247 | Randomly sample seq_len consecutive frames from num frames, 248 | if num is smaller than seq_len, then replicate items. 249 | This sampling strategy is used in training phase. 250 | """ 251 | frame_indices = range(num) 252 | rand_end = max(0, len(frame_indices) - self.seq_len - 1) 253 | begin_index = random.randint(0, rand_end) 254 | end_index = min(begin_index + self.seq_len, len(frame_indices)) 255 | 256 | indices = frame_indices[begin_index:end_index] 257 | 258 | for index in indices: 259 | if len(indices) >= self.seq_len: 260 | break 261 | indices.append(index) 262 | indices = np.array(indices) 263 | imgs = [] 264 | surfaces = [] 265 | metadatas = [] 266 | for index in indices: 267 | index = int(index) 268 | img_path = img_paths[index] 269 | img = read_image(img_path) 270 | if self.transform is not None: 271 | img = self.transform(img) 272 | img = img.unsqueeze(0) 273 | imgs.append(img) 274 | # TH surface 275 | keypoints = read_keypoint(img_path) 276 | surface = keypointsSurface(keypoints) 277 | keypoint_conf = keypointsConfidence(keypoints) 278 | if keypoint_conf < keypoint_conf_thresh: 279 | surface = surface * 0 280 | #print('surface = ' + str(surface)) 281 | surface = torch.from_numpy(surface) 282 | surface = surface.unsqueeze(0) 283 | surfaces.append(surface) 284 | metadata = read_metadata(img_path, self.metadata_model, False) 285 | metadata = torch.from_numpy(metadata) 286 | metadata = metadata.unsqueeze(0) 287 | metadatas.append(metadata) 288 | imgs = torch.cat(imgs, dim=0) 289 | # imgs=imgs.permute(1,0,2,3) 290 | surfaces = torch.cat(surfaces, dim=0) 291 | metadatas = torch.cat(metadatas, dim=0) 292 | return imgs, surfaces, pid, camid, metadatas 293 | 294 | elif self.sample == 'dense': 295 | """ 296 | Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to be set to 1. 297 | This sampling strategy is used in test phase. 298 | """ 299 | cur_index = 0 300 | # frame_indices = range(num) 301 | frame_indices = list(range(num)) 302 | indices_list = [] 303 | while num - cur_index > self.seq_len: 304 | indices_list.append(frame_indices[cur_index:cur_index + self.seq_len]) 305 | cur_index += self.seq_len 306 | last_seq = frame_indices[cur_index:] 307 | for index in last_seq: 308 | if len(last_seq) >= self.seq_len: 309 | break 310 | last_seq.append(index) 311 | indices_list.append(last_seq) 312 | imgs_list = [] 313 | surfaces_list = [] 314 | metadatas_list = [] 315 | for indices in indices_list: 316 | imgs = [] 317 | surfaces = [] 318 | metadatas = [] 319 | for index in indices: 320 | index = int(index) 321 | img_path = img_paths[index] 322 | img = read_image(img_path) 323 | if self.transform is not None: 324 | img = self.transform(img) 325 | img = img.unsqueeze(0) 326 | imgs.append(img) 327 | # TH surface 328 | keypoints = read_keypoint(img_path) 329 | surface = keypointsSurface(keypoints) 330 | keypoint_conf = keypointsConfidence(keypoints) 331 | if keypoint_conf < keypoint_conf_thresh: 332 | surface = surface * 0 333 | #print('surface = ' + str(surface)) 334 | surface = torch.from_numpy(surface) 335 | surface = surface.unsqueeze(0) 336 | surfaces.append(surface) 337 | metadata = read_metadata(img_path, self.metadata_model) 338 | metadata = torch.from_numpy(metadata) 339 | metadata = metadata.unsqueeze(0) 340 | metadatas.append(metadata) 341 | imgs = torch.cat(imgs, dim=0) 342 | # imgs=imgs.permute(1,0,2,3) 343 | imgs_list.append(imgs) 344 | surfaces = torch.cat(surfaces, dim=0) 345 | surfaces_list.append(surfaces) 346 | metadatas = torch.cat(metadatas, dim=0) 347 | metadatas_list.append(metadatas) 348 | imgs_array = torch.stack(imgs_list) 349 | surfaces_array = torch.stack(surfaces_list) 350 | metadatas_array = torch.stack(metadatas_list) 351 | 352 | return imgs_array, surfaces_array, pid, camid, metadatas_array, img_paths 353 | 354 | else: 355 | raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods)) 356 | -------------------------------------------------------------------------------- /metadata/README.md: -------------------------------------------------------------------------------- 1 | The metadata classifier code is based on \[[code](https://github.com/pangwong/pytorch-multi-label-classifier)\]. We use the 29 -layer light CNN model with modifications on transformations, input size and the beginning layers.(*Wu, X., He, R., Sun, Z. and Tan, T., 2018. A light cnn for deep face representation with noisy labels. IEEE Transactions on Information Forensics and Security, 13(11), pp.2884-2896*). 2 | 3 | ## Setup 4 | 5 | This code assumes you have the following packages installed. 6 | - Python 3.6 7 | - Pytorch 0.4.0 8 | - Torchvision 0.2.1 9 | - Numpy 1.16.2 10 | - PIL 5.1.0 11 | 12 | Running system: 13 | - Linux Ubuntu 18.04 14 | - CUDA 9.0 15 | - 2 NVIDIA Titan Xp GPU 16 | 17 | ## Our pre-trained model 18 | 19 | Download the pre-trained model [here](https://drive.google.com/file/d/119GdCtKDkJCGc_AX0Try_CoMij5rhcg4/view?usp=sharing). 20 | - The model is pretrained on CompCar dataset [link](http://mmlab.ie.cuhk.edu.hk/datasets/comp_cars/index.html). 21 | - We cleaned up CompCar dataset by only keeping the brands exist in AIC dataset (based on country/type/year/version of the vehicle models) and correcting wrong labels. 22 | - The training set could be downloaded at [here](https://drive.google.com/file/d/1dbcjbdPBC19dCzq-v8TE-z1OHI4nLBFg/view?usp=sharing). 23 | - Some images in track2 training set and traffic video from surveillance camera are used for further training. For privacy concerns, we are not allowed to release the source videos. Please follow [their](http://www.uwstarlab.org/) future work on the dataset. 24 | - Once you run the testing code, you will obtain the probabilities of the metadata. To be simple and consistent with our Reid code, the output could be downloaded [here](https://drive.google.com/file/d/1korVAg0W_VKNYKiwc3cYwvmC6pDhiuxs/view?usp=sharing) 25 | 26 | ## Training 27 | 28 | - label.txt is the categories. 29 | - data.txt is the training data path and labels. Noted that the example here is to used for training vehicle type and brand, weighted for the losses need to be revised in the code (`multi_label_classifier.py` line 68-74). The traning data should follow this format. 30 | - for other training/testing/visualization options, please refer to option.py. 31 | 32 | Use the following command to run training code. 33 | 34 | python multi_label_classifier.py --dir "./YOUR_DIRPATH_OF_data.txt_and_label.txt/" --mode "Train" --model "LightenB" --name "YOURMODELNAME" --batch_size 8 --gpu_ids 0 --input_channel 3 --load_size 512 --input_size 512 --ratio "[0.7, 0.1, 0.2]" --load_thread 4 --sum_epoch 500 --lr_decay_in_epoch 1 --display_port 8900 --validate_ratio 0.5 --top_k "(1,)" --score_thres 0.1 --display_train_freq 1000 --display_validate_freq 1000 --save_epoch_freq 2000 --display_image_ratio 0.1 --shuffle 35 | 36 | 37 | ## Testing 38 | 39 | To test the model, make sure you have the image id and paths under ./your_model_name/Data/Test/data.txt. To be simple, use `testdata.txt`.Then run, 40 | 41 | python multi_label_classifier.py --dir "./YOUR_DIRPATH_OF_data.txt_and_label.txt/" --mode "Test" --model "LightenB" --name "YOURMODELNAME" --checkpoint_name "/path_to_model.pth" 42 | 43 | 44 | The probabilities of each label will be saved in `test.log`. Run `reformat-log.py` to reformat the log file for reranking. 45 | 46 | -------------------------------------------------------------------------------- /metadata/data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/data/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/data/__pycache__/loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/loader.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/data/__pycache__/transformer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/data/__pycache__/transformer.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/data/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json,ast 4 | import random 5 | import logging 6 | import torch.utils.data as data 7 | 8 | from .transformer import get_transformer, load_image 9 | 10 | class BaseDataset(data.Dataset): 11 | def __init__(self, opt, data_type, id2rid): 12 | super(BaseDataset, self).__init__() 13 | self.opt = opt 14 | self.data_type = data_type 15 | self.dataset = self._load_data(opt.data_dir+ '/' + data_type + '/data.txt') 16 | self.id2rid = id2rid 17 | self.data_size = len(self.dataset) 18 | self.transformer = get_transformer(opt) 19 | 20 | def __getitem__(self, index): 21 | image_file, box, attr_ids = self.dataset[index % self.data_size] 22 | 23 | input = load_image(image_file, box, self.opt, self.transformer) 24 | #input = load_image(image_file, self.opt, self.transformer) 25 | 26 | # label 27 | labels = list() 28 | for index, attr_id in enumerate(attr_ids): 29 | labels.append(self.id2rid[index][attr_id]) 30 | 31 | return input, labels 32 | 33 | def __len__(self): 34 | return self.data_size 35 | 36 | def _load_data(self, data_file): 37 | print(data_file) 38 | dataset = list() 39 | if not os.path.exists(data_file): 40 | return dataset 41 | with open(data_file) as d: 42 | for line in d.readlines(): 43 | line = json.dumps(ast.literal_eval(line)) 44 | dataset.append(self.readline(line)) 45 | #import pdb; pdb.set_trace() 46 | if self.opt.shuffle: 47 | logging.info("Shuffle %s Data" %(self.data_type)) 48 | random.shuffle(dataset) 49 | else: 50 | logging.info("Not Shuffle %s Data" %(self.data_type)) 51 | return dataset 52 | 53 | def readline(self, line): 54 | vbrand_list = ['Dodge', 'Ford', 'Chevrolet', 'GMC', 'Honda', 'Chrysler', 'Jeep', 'Hyundai',\ 55 | 'Subaru', 'Toyota', 'Buick', 'others', 'KIA', 'Nissan', 'Volkswagen',\ 56 | 'Oldsmobile', 'BMW', 'Cadillac', 'Volvo', 'Pontiac', 'Mercury', 'Lexus',\ 57 | 'Saturn', 'Benz', 'Mazda', 'Scion', 'RAM', 'Mini', 'Lincoln', 'Audi',\ 58 | 'Mitsubishi'] 59 | vtype_list = ['SUV', 'PickupTruck', 'Sedan', 'Minivan', 'Truck', 'Hatchback', 'Bus'] 60 | vcolor_list = ['Black', 'White', 'Red', 'Gray', 'Silver', 'Blue', 'Gold', 'Green', 'Yellow'] 61 | data = [None, None,None] 62 | #print(line) 63 | line = ast.literal_eval(line) 64 | line = ast.literal_eval(line) 65 | 66 | #line = json.loads(line) 67 | if "image_file" in line: 68 | data[0] = line["image_file"] 69 | if 'box' in line: 70 | data[1] = line["box"] 71 | if 'id' in line: 72 | data[2] = line["id"] 73 | vtype = data[2][0] 74 | vbrand = data[2][1] 75 | vcolor = data[2][2] 76 | 77 | if (vtype not in vtype_list) or (vbrand not in vbrand_list) or (vcolor not in vcolor_list): 78 | print(data[0],data[2]) 79 | 80 | return data 81 | -------------------------------------------------------------------------------- /metadata/data/loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import random 5 | import logging 6 | import collections 7 | from torch.utils.data import DataLoader 8 | from data.dataset import BaseDataset 9 | import ast 10 | import json 11 | 12 | sys.path.append('../') 13 | from util.util import rmdir, load_label 14 | 15 | class MultiLabelDataLoader(): 16 | def __init__(self, opt): 17 | self.opt = opt 18 | assert os.path.exists(opt.dir + "/data.txt"), "No data.txt found in specified dir" 19 | assert os.path.exists(opt.dir + "/label.txt"), "No label.txt found in specified dir" 20 | 21 | train_dir = opt.data_dir + "/TrainSet/" 22 | val_dir = opt.data_dir + "/ValidateSet/" 23 | test_dir = opt.data_dir + "/TestSet/" 24 | 25 | # split data 26 | if not all([os.path.exists(train_dir), os.path.exists(val_dir), os.path.exists(test_dir)]): 27 | # rm existing directories 28 | rmdir(train_dir) 29 | rmdir(val_dir) 30 | rmdir(test_dir) 31 | 32 | # split data to Train, Val, Test 33 | logging.info("Split raw data to Train, Val and Test") 34 | ratios = opt.ratio 35 | dataset = collections.defaultdict(list) 36 | with open(opt.dir + '/data.txt') as d: 37 | for line in d.readlines(): 38 | #print(line) 39 | line = ast.literal_eval(line) 40 | line = json.dumps(line) 41 | 42 | #line = json.loads(line) 43 | # if data has been specified data_type yet, load data as what was specified before 44 | #if "type" in line: 45 | # dataset[line["type"]].append(line) 46 | # continue 47 | # specified data_type randomly 48 | rand = random.random() 49 | if rand < ratios[0]: 50 | data_type = "Train" 51 | elif rand < ratios[0] + ratios[1]: 52 | data_type = "Validate" 53 | else: 54 | data_type = "Test" 55 | dataset[data_type].append(line) 56 | # write to file 57 | self._WriteDataToFile(dataset["Train"], train_dir) 58 | self._WriteDataToFile(dataset["Validate"], val_dir) 59 | self._WriteDataToFile(dataset["Test"], test_dir) 60 | 61 | self.rid2name, self.id2rid, self.rid2id = load_label(opt.dir + '/label.txt') 62 | print(self.rid2name, self.id2rid, self.rid2id) 63 | self.num_classes = [len(item)-2 for item in self.rid2name] 64 | 65 | # load dataset 66 | if opt.mode == "Train": 67 | logging.info("Load Train Dataset...") 68 | self.train_set = BaseDataset(self.opt, "TrainSet", self.rid2id) 69 | logging.info("Load Validate Dataset...") 70 | self.val_set = BaseDataset(self.opt, "ValidateSet", self.rid2id) 71 | else: 72 | # force batch_size for test to 1 73 | self.opt.batch_size = 1 74 | self.opt.load_thread = 1 75 | logging.info("Load Test Dataset...") 76 | self.test_set = BaseDataset(self.opt, "TestSet", self.rid2id) 77 | 78 | def GetTrainSet(self): 79 | if self.opt.mode == "Train": 80 | return self._DataLoader(self.train_set) 81 | else: 82 | raise("Train Set DataLoader NOT implemented in Test Mode") 83 | 84 | def GetValSet(self): 85 | if self.opt.mode == "Train": 86 | return self._DataLoader(self.val_set) 87 | else: 88 | raise("Validation Set DataLoader NOT implemented in Test Mode") 89 | 90 | def GetTestSet(self): 91 | if self.opt.mode == "Test": 92 | return self._DataLoader(self.test_set) 93 | else: 94 | raise("Test Set DataLoader NOT implemented in Train Mode") 95 | 96 | def GetNumClasses(self): 97 | return self.num_classes 98 | 99 | def GetRID2Name(self): 100 | return self.rid2name 101 | 102 | def GetID2RID(self): 103 | return self.id2rid 104 | 105 | def GetiRID2ID(self): 106 | return self.irid2id 107 | 108 | def _WriteDataToFile(self, src_data, dst_dir): 109 | """ 110 | write info of each objects to data.txt as predefined format 111 | """ 112 | if not os.path.exists(dst_dir): 113 | os.mkdir(dst_dir) 114 | with open(dst_dir + "/data.txt", 'w') as d: 115 | for line in src_data: 116 | d.write(json.dumps(line, separators=(',',':'))+'\n') 117 | 118 | 119 | def _DataLoader(self, dataset): 120 | """ 121 | create data loder 122 | """ 123 | dataloader = DataLoader( 124 | dataset, 125 | batch_size=self.opt.batch_size, 126 | shuffle=False, 127 | num_workers=int(self.opt.load_thread), 128 | pin_memory=self.opt.cuda, 129 | drop_last=False) 130 | return dataloader 131 | 132 | -------------------------------------------------------------------------------- /metadata/data/transformer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from PIL import Image 3 | from torchvision import transforms 4 | 5 | 6 | def get_transformer(opt): 7 | transform_list = [] 8 | 9 | # resize 10 | osize = [opt.load_size, opt.load_size] 11 | #transform_list.append(transforms.functional.resize(osize,Image.BICUBIC)) 12 | transform_list.append(transforms.Resize(osize, Image.BICUBIC)) 13 | 14 | # grayscales 15 | if opt.input_channel == 1: 16 | transform_list.append(transforms.Grayscale()) 17 | 18 | # crop 19 | if opt.crop == "RandomCrop": 20 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 21 | elif opt.crop == "CenterCrop": 22 | transform_list.append(transforms.CenterCrop(opt.input_size)) 23 | elif opt.crop == "FiveCrop": 24 | transform_list.append(transforms.FiveCrop(opt.input_size)) 25 | elif opt.crop == "TenCrop": 26 | transform_list.append(transforms.TenCrop(opt.input_size)) 27 | 28 | # flip 29 | if opt.mode == "Train" and opt.flip: 30 | transform_list.append(transforms.RandomHorizontalFlip()) 31 | 32 | # to tensor 33 | transform_list.append(transforms.ToTensor()) 34 | 35 | # If you make changes here, you should also modified 36 | # function `tensor2im` in util/util.py accordingly 37 | transform_list1 = [ 38 | transforms.ToTensor(), 39 | transforms.Normalize(opt.mean, opt.std)] 40 | transform_list.append(transforms.Normalize(opt.mean, opt.std)) 41 | 42 | return transforms.Compose(transform_list1) 43 | 44 | def fix_box(box, width, height, ratio=-1, scale=1.0): 45 | if scale < 0: 46 | scale = 1.0 47 | box = copy.deepcopy(box) 48 | w = box["w"] 49 | h = box["h"] 50 | x = box["x"] + w / 2 51 | y = box["y"] + h / 2 52 | mw = 2 * min(x, width - x) 53 | mh = 2 * min(y, height - y) 54 | w = max(1, min(int(w * scale), mw)) 55 | h = max(1, min(int(h * scale), mh)) 56 | if ratio > 0: 57 | if 1.0 * w / h > ratio: 58 | h = int(w / ratio) 59 | h = min(h, mh) 60 | w = int(h * ratio) 61 | else: 62 | w = int(h * ratio) 63 | w = min(w, mw) 64 | h = int(w / ratio) 65 | box["x"] = x - w / 2 66 | box["y"] = y - h / 2 67 | box["w"] = w 68 | box["h"] = h 69 | return box 70 | 71 | def load_image(image_file, box, opt, transformer): 72 | img = Image.open(image_file) 73 | if opt.input_channel == 3: 74 | img = img.convert('RGB') 75 | 76 | # box crop 77 | #if box is not None and opt.region == True: 78 | # box = fix_box(box, width, height, opt.box_ratio, opt.box_scale) 79 | # area = (box['x'], box['y'], box['x']+box['w'], box['y']+box['h']) 80 | # img = img.crop(area) 81 | # transform 82 | osize = opt.load_size 83 | old_size = img.size # old_size[0] is in (width, height) format 84 | ratio = float(osize)/max(old_size) 85 | new_size = tuple([int(x*ratio) for x in old_size]) 86 | im = img.resize(new_size, Image.ANTIALIAS) 87 | new_im = Image.new("RGB", (osize,osize)) 88 | new_im.paste(im, ((osize-new_size[0])//2, 89 | (osize-new_size[1])//2)) 90 | 91 | 92 | input = transformer(new_im) 93 | # and a column of 0s at pos 10 94 | #result = F.pad(input=source, pad=(1, 1, 0, 1), mode='constant', value=0) 95 | #if width>height:fpaf 96 | 97 | 98 | return input 99 | 100 | -------------------------------------------------------------------------------- /metadata/deploy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import logging 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | from torchvision import transforms 10 | from collections import OrderedDict, defaultdict 11 | 12 | from options.options import Options 13 | from models.model import load_model 14 | from data.transformer import get_transformer, load_image 15 | from util.util import load_label, opt2file 16 | from util.webvisualizer import WebVisualizer 17 | 18 | def main(): 19 | # parse options 20 | op = Options() 21 | opt = op.parse() 22 | 23 | # special setting 24 | opt.shuffle = False 25 | opt.batch_size = 1 26 | opt.load_thread = 1 27 | 28 | # initialize train or test working dir 29 | test_dir = os.path.join(opt.classify_dir , opt.name) 30 | opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/" 31 | if not os.path.exists(test_dir): 32 | os.mkdir(test_dir) 33 | 34 | # save options to disk 35 | opt2file(opt, os.path.join(test_dir, "opt.txt")) 36 | 37 | # log setting 38 | log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 39 | formatter = logging.Formatter(log_format) 40 | fh = logging.FileHandler(test_dir + "/deploy.log", 'a') 41 | fh.setFormatter(formatter) 42 | ch = logging.StreamHandler() 43 | ch.setFormatter(formatter) 44 | logging.getLogger().addHandler(fh) 45 | logging.getLogger().addHandler(ch) 46 | logging.getLogger().setLevel(logging.INFO) 47 | 48 | # load label 49 | if opt.label_file == "": 50 | opt.label_file = opt.dir + "/label.txt" 51 | rid2name, id2rid, rid2id = load_label(opt.label_file) 52 | num_classes = [len(rid2name[index])-2 for index in range(len(rid2name))] 53 | 54 | # load transformer 55 | transformer = get_transformer(opt) 56 | 57 | # load model 58 | model = load_model(opt, num_classes) 59 | model.eval() 60 | 61 | # use cuda 62 | if opt.cuda: 63 | model = model.cuda(opt.devices[0]) 64 | cudnn.benchmark = True 65 | 66 | l = open(test_dir + "/classify_res_data.txt", 'w') 67 | with open(opt.classify_dir + "/data.txt") as data: 68 | for num, line in enumerate(data): 69 | logging.info(str(num+1)) 70 | line = json.loads(line) 71 | input_tensor = load_image(line["image_file"], line["box"], opt, transformer) 72 | input_tensor = input_tensor.unsqueeze(0) 73 | if opt.cuda: 74 | input_tensor = input_tensor.cuda(opt.devices[0]) 75 | outputs = model(Variable(input_tensor, volatile=True)) 76 | if not isinstance(outputs, list): 77 | outputs = [outputs] 78 | line["classify_res"] = list() 79 | for index, out in enumerate(outputs): 80 | out = out.cpu() 81 | #print "out:", out 82 | softmax = F.softmax(out, dim=1).data.squeeze() 83 | #print "softmax:", softmax 84 | probs, ids = softmax.sort(0, True) 85 | classify_res = {} 86 | for i in range(len(probs)): 87 | classify_res[rid2name[index][id2rid[index][ids[i]]]] = probs[i] 88 | classify_res["max_score"] = probs[0] 89 | classify_res["best_label"] = rid2name[index][id2rid[index][ids[0]]] 90 | line["classify_res"].append(classify_res) 91 | l.write(json.dumps(line, separators=(',', ':'))+'\n') 92 | l.close() 93 | logging.info("classification done") 94 | 95 | 96 | if __name__ == "__main__": 97 | main() 98 | -------------------------------------------------------------------------------- /metadata/label.txt: -------------------------------------------------------------------------------- 1 | 7;type;type 2 | Sedan;Sedan 3 | SUV;SUV 4 | Truck;Truck 5 | Minivan;Minivan 6 | PickupTruck;PickupTruck 7 | Hatchback;Hatchback 8 | Bus;Bus 9 | 30;brand;brand 10 | Dodge;Dodge 11 | Ford;Ford 12 | Chevrolet;Chevrolet 13 | GMC;GMC 14 | Honda;Honda 15 | Chrysler;Chrysler 16 | Jeep;Jeep 17 | Hyundai;Hyundai 18 | Subaru;Subaru 19 | Toyota;Toyota 20 | Buick;Buick 21 | KIA;KIA 22 | Nissan;Nissan 23 | Volkswagen;Volkswagen 24 | Oldsmobile;Oldsmobile 25 | BMW;BMW 26 | Cadillac;Cadillac 27 | Volvo;Volvo 28 | Pontiac;Pontiac 29 | Mercury;Mercury 30 | Lexus;Lexus 31 | Saturn;Saturn 32 | Benz;Benz 33 | Mazda;Mazda 34 | Scion;Scion 35 | Mini;Mini 36 | Lincoln;Lincoln 37 | Audi;Audi 38 | Mitsubishi;Mitsubishi 39 | others;others 40 | 9;color;color 41 | Black;Black 42 | White;White 43 | Gray;Gray 44 | Blue;Blue 45 | Red;Red 46 | Gold;Gold 47 | Silver;Silver 48 | Green;Green 49 | Yellow;Yellow 50 | -------------------------------------------------------------------------------- /metadata/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/alexnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/alexnet.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/build_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/build_model.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/lightcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/lightcnn.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/__pycache__/vgg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/models/__pycache__/vgg.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/models/alexnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | from models.build_model import * 4 | 5 | __all__ = ['AlexNet', 'alexnet'] 6 | 7 | 8 | model_urls = { 9 | 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', 10 | } 11 | 12 | 13 | class AlexNet(nn.Module): 14 | 15 | def __init__(self, num_classes=1000): 16 | super(AlexNet, self).__init__() 17 | self.features = nn.Sequential( 18 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), 19 | nn.ReLU(inplace=True), 20 | nn.MaxPool2d(kernel_size=3, stride=2), 21 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 22 | nn.ReLU(inplace=True), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 25 | nn.ReLU(inplace=True), 26 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 29 | nn.ReLU(inplace=True), 30 | nn.MaxPool2d(kernel_size=3, stride=2), 31 | ) 32 | self.classifier = nn.Sequential( 33 | nn.Dropout(), 34 | nn.Linear(256 * 6 * 6, 4096), 35 | nn.ReLU(inplace=True), 36 | nn.Dropout(), 37 | nn.Linear(4096, 4096), 38 | nn.ReLU(inplace=True), 39 | nn.Linear(4096, num_classes), 40 | ) 41 | 42 | def forward(self, x): 43 | x = self.features(x) 44 | x = x.view(x.size(0), 256 * 6 * 6) 45 | x = self.classifier(x) 46 | return x 47 | 48 | class AlexNetTemplet(nn.Module): 49 | def __init__(self, input_channel): 50 | super(AlexNetTemplet, self).__init__() 51 | self.features = nn.Sequential( 52 | nn.Conv2d(input_channel, 64, kernel_size=11, stride=4, padding=2), 53 | nn.ReLU(inplace=True), 54 | nn.MaxPool2d(kernel_size=3, stride=2), 55 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 56 | nn.ReLU(inplace=True), 57 | nn.MaxPool2d(kernel_size=3, stride=2), 58 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 59 | nn.ReLU(inplace=True), 60 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 61 | nn.ReLU(inplace=True), 62 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 63 | nn.ReLU(inplace=True), 64 | nn.MaxPool2d(kernel_size=3, stride=2), 65 | ) 66 | self.classifier = nn.Sequential( 67 | nn.Dropout(), 68 | nn.Linear(256 * 6 * 6, 4096), 69 | nn.ReLU(inplace=True), 70 | nn.Dropout(), 71 | nn.Linear(4096, 4096), 72 | nn.ReLU(inplace=True), 73 | ) 74 | 75 | def forward(self, x): 76 | x = self.features(x) 77 | x = x.view(x.size(0), 256 * 6 * 6) 78 | x = self.classifier(x) 79 | return x 80 | 81 | 82 | def alexnet(pretrained=False, **kwargs): 83 | r"""AlexNet model architecture from the 84 | `"One weird trick..." `_ paper. 85 | Args: 86 | pretrained (bool): If True, returns a model pre-trained on ImageNet 87 | """ 88 | model = AlexNet(**kwargs) 89 | if pretrained: 90 | model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) 91 | return model 92 | 93 | def AlexnetTemplet(input_channel, pretrained=False, **kwargs): 94 | r"""AlexNet model architecture from the 95 | `"One weird trick..." `_ paper. 96 | Args: 97 | pretrained (bool): If True, returns a model pre-trained on ImageNet 98 | """ 99 | model = AlexNetTemplet(input_channel) 100 | if pretrained: 101 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['alexnet'])) 102 | model.load_state_dict(model_dict) 103 | return model 104 | -------------------------------------------------------------------------------- /metadata/models/build_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class MultiLabelModel(nn.Module): 4 | def __init__(self, basemodel, basemodel_output, num_classes): 5 | super(MultiLabelModel, self).__init__() 6 | self.basemodel = basemodel 7 | self.num_classes = num_classes 8 | for index, num_class in enumerate(num_classes): 9 | setattr(self, "FullyConnectedLayer_" + str(index), nn.Linear(basemodel_output, num_class)) 10 | 11 | def forward(self, x): 12 | x = self.basemodel.forward(x) 13 | #print(fc.shape,x.shape) 14 | outs = list() 15 | dir(self) 16 | for index, num_class in enumerate(self.num_classes): 17 | fun = eval("self.FullyConnectedLayer_" + str(index)) 18 | out = fun(x) 19 | outs.append(out) 20 | return outs 21 | 22 | def LoadPretrainedModel(model, pretrained_state_dict): 23 | model_dict = model.state_dict() 24 | union_dict = {k : v for k,v in pretrained_state_dict.iteritems() if k in model_dict} 25 | model_dict.update(union_dict) 26 | return model_dict 27 | 28 | def BuildMultiLabelModel(basemodel, basemodel_output, num_classes): 29 | return MultiLabelModel(basemodel, basemodel_output, num_classes) 30 | -------------------------------------------------------------------------------- /metadata/models/lightcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class mfm(nn.Module): 6 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, type=1): 7 | super(mfm, self).__init__() 8 | self.out_channels = out_channels 9 | if type == 1: 10 | self.filter = nn.Conv2d(in_channels, 2*out_channels, kernel_size=kernel_size, stride=stride, padding=padding) 11 | else: 12 | self.filter = nn.Linear(in_channels, 2*out_channels) 13 | 14 | def forward(self, x): 15 | x = self.filter(x) 16 | out = torch.split(x, self.out_channels, 1) 17 | return torch.max(out[0], out[1]) 18 | 19 | class group(nn.Module): 20 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding): 21 | super(group, self).__init__() 22 | self.conv_a = mfm(in_channels, in_channels, 1, 1, 0) 23 | self.conv = mfm(in_channels, out_channels, kernel_size, stride, padding) 24 | 25 | def forward(self, x): 26 | x = self.conv_a(x) 27 | x = self.conv(x) 28 | return x 29 | 30 | class resblock(nn.Module): 31 | def __init__(self, in_channels, out_channels): 32 | super(resblock, self).__init__() 33 | self.conv1 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1) 34 | self.conv2 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1) 35 | 36 | def forward(self, x): 37 | res = x 38 | out = self.conv1(x) 39 | out = self.conv2(out) 40 | out = out + res 41 | return out 42 | 43 | class network_9layers(nn.Module): 44 | def __init__(self, num_classes=79077): 45 | super(network_9layers, self).__init__() 46 | self.features = nn.Sequential( 47 | mfm(1, 48, 5, 1, 2), 48 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 49 | group(48, 96, 3, 1, 1), 50 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 51 | group(96, 192, 3, 1, 1), 52 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 53 | group(192, 128, 3, 1, 1), 54 | group(128, 128, 3, 1, 1), 55 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 56 | ) 57 | self.fc1 = mfm(8*8*128, 256, type=0) 58 | self.fc2 = nn.Linear(256, num_classes) 59 | 60 | def forward(self, x): 61 | x = self.features(x) 62 | x = x.view(x.size(0), -1) 63 | x = self.fc1(x) 64 | x = F.dropout(x, training=self.training) 65 | out = self.fc2(x) 66 | return out, x 67 | 68 | class network_29layers(nn.Module): 69 | def __init__(self, block, layers, num_classes=79077): 70 | super(network_29layers, self).__init__() 71 | self.conv1 = mfm(1, 48, 5, 1, 2) 72 | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) 73 | self.block1 = self._make_layer(block, layers[0], 48, 48) 74 | self.group1 = group(48, 96, 3, 1, 1) 75 | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) 76 | self.block2 = self._make_layer(block, layers[1], 96, 96) 77 | self.group2 = group(96, 192, 3, 1, 1) 78 | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) 79 | self.block3 = self._make_layer(block, layers[2], 192, 192) 80 | self.group3 = group(192, 128, 3, 1, 1) 81 | self.block4 = self._make_layer(block, layers[3], 128, 128) 82 | self.group4 = group(128, 128, 3, 1, 1) 83 | self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) 84 | self.fc = mfm(8*8*128, 256, type=0) 85 | self.fc2 = nn.Linear(256, num_classes) 86 | 87 | 88 | def _make_layer(self, block, num_blocks, in_channels, out_channels): 89 | layers = [] 90 | for i in range(0, num_blocks): 91 | layers.append(block(in_channels, out_channels)) 92 | return nn.Sequential(*layers) 93 | 94 | def forward(self, x): 95 | x = self.conv1(x) 96 | x = self.pool1(x) 97 | 98 | x = self.block1(x) 99 | x = self.group1(x) 100 | x = self.pool2(x) 101 | 102 | x = self.block2(x) 103 | x = self.group2(x) 104 | x = self.pool3(x) 105 | 106 | x = self.block3(x) 107 | x = self.group3(x) 108 | x = self.block4(x) 109 | x = self.group4(x) 110 | x = self.pool4(x) 111 | 112 | x = x.view(x.size(0), -1) 113 | fc = self.fc(x) 114 | fc = F.dropout(fc, training=self.training) 115 | out = self.fc2(fc) 116 | return out, fc 117 | 118 | 119 | class network_29layers_v2(nn.Module): 120 | def __init__(self, block, layers, num_classes=79077): 121 | super(network_29layers_v2, self).__init__() 122 | self.conv1 = mfm(1, 48, 5, 1, 2) 123 | self.block1 = self._make_layer(block, layers[0], 48, 48) 124 | self.group1 = group(48, 96, 3, 1, 1) 125 | self.block2 = self._make_layer(block, layers[1], 96, 96) 126 | self.group2 = group(96, 192, 3, 1, 1) 127 | self.block3 = self._make_layer(block, layers[2], 192, 192) 128 | self.group3 = group(192, 128, 3, 1, 1) 129 | self.block4 = self._make_layer(block, layers[3], 128, 128) 130 | self.group4 = group(128, 128, 3, 1, 1) 131 | self.fc = nn.Linear(8*8*128, 256) 132 | self.fc2 = nn.Linear(256, num_classes[0], bias=False) 133 | 134 | def _make_layer(self, block, num_blocks, in_channels, out_channels): 135 | layers = [] 136 | for i in range(0, num_blocks): 137 | layers.append(block(in_channels, out_channels)) 138 | return nn.Sequential(*layers) 139 | 140 | def forward(self, x): 141 | x = self.conv1(x) 142 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 143 | 144 | x = self.block1(x) 145 | x = self.group1(x) 146 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 147 | 148 | x = self.block2(x) 149 | x = self.group2(x) 150 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 151 | 152 | x = self.block3(x) 153 | x = self.group3(x) 154 | x = self.block4(x) 155 | x = self.group4(x) 156 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 157 | 158 | x = x.view(x.size(0), -1) 159 | fc = self.fc(x) 160 | x = F.dropout(fc, training=self.training) 161 | 162 | output = list() 163 | for name, fun in self.fc_dict.iteritems(): 164 | out = fun(x) 165 | output.append(out) 166 | 167 | return output, fc 168 | 169 | class network_9layers_templet(nn.Module): 170 | def __init__(self, in_channel): 171 | super(network_9layers_templet, self).__init__() 172 | self.features = nn.Sequential( 173 | mfm(in_channel, 48, 5, 1, 2), 174 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 175 | group(48, 96, 3, 1, 1), 176 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 177 | group(96, 192, 3, 1, 1), 178 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 179 | group(192, 128, 3, 1, 1), 180 | group(128, 128, 3, 1, 1), 181 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 182 | ) 183 | self.fc1 = mfm(8*8*128, 256, type=0) 184 | 185 | def forward(self, x): 186 | x = self.features(x) 187 | x = x.view(x.size(0), -1) 188 | x = self.fc1(x) 189 | out = F.dropout(x, training=self.training) 190 | return out 191 | 192 | class network_29layers_v2_templet(nn.Module): 193 | def __init__(self, in_channel, block, layers): 194 | super(network_29layers_v2_templet, self).__init__() 195 | self.conv1 = mfm(in_channel, 48, 5, 1, 2) 196 | self.block1 = self._make_layer(block, layers[0], 48, 48) 197 | self.group1 = group(48, 96, 3, 1, 1) 198 | self.block2 = self._make_layer(block, layers[1], 96, 96) 199 | self.group2 = group(96, 192, 3, 1, 1) 200 | self.block3 = self._make_layer(block, layers[2], 192, 192) 201 | self.group3 = group(192, 256, 3, 1, 1) 202 | self.block4 = self._make_layer(block, layers[3], 256, 256) 203 | self.group4 = group(256, 128, 3, 1, 1) 204 | self.block5 = self._make_layer(block, layers[4], 128, 128) 205 | self.group5 = group(128, 64, 3, 1, 1) 206 | self.block6 = self._make_layer(block, layers[5], 64, 64) 207 | self.group6 = group(64, 64, 3, 1, 1) 208 | 209 | self.fc = nn.Linear(8*8*64, 256) 210 | 211 | def _make_layer(self, block, num_blocks, in_channels, out_channels): 212 | layers = [] 213 | for i in range(0, num_blocks): 214 | layers.append(block(in_channels, out_channels)) 215 | return nn.Sequential(*layers) 216 | 217 | def forward(self, x): 218 | ''' 219 | x = self.conv1(x) 220 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 221 | 222 | x = self.block1(x) 223 | x = self.group1(x) 224 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 225 | 226 | x = self.block2(x) 227 | x = self.group2(x) 228 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 229 | 230 | x = self.block3(x) 231 | x = self.group3(x) 232 | x = self.block4(x) 233 | x = self.group4(x) 234 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 235 | 236 | x = x.view(x.size(0), -1) 237 | fc = self.fc(x) 238 | x = F.dropout(fc, training=self.training) 239 | ''' 240 | x = self.conv1(x) 241 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 242 | 243 | x = self.block1(x) 244 | x = self.group1(x) 245 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 246 | 247 | x = self.block2(x) 248 | x = self.group2(x) 249 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 250 | 251 | x = self.block3(x) 252 | x = self.group3(x) 253 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 254 | 255 | x = self.block4(x) 256 | x = self.group4(x) 257 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 258 | 259 | x = self.block5(x) 260 | x = self.group5(x) 261 | x = self.block6(x) 262 | x = self.group6(x) 263 | x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) 264 | 265 | x = x.view(x.size(0), -1) 266 | fc = self.fc(x) 267 | x = F.dropout(fc, training=self.training) 268 | return x 269 | 270 | 271 | def LightCNN_9Layers(**kwargs): 272 | model = network_9layers(**kwargs) 273 | return model 274 | 275 | def LightCNN_29Layers(**kwargs): 276 | model = network_29layers(resblock, [1, 2, 3, 4], **kwargs) 277 | return model 278 | 279 | def LightCNN_29Layers_v2(**kwargs): 280 | model = network_29layers_v2(resblock, [1, 2, 3, 4], **kwargs) 281 | return model 282 | 283 | def LightCNN_9Layers_templet(in_channel, pretrained=False): 284 | model = network_9layers_templet(in_channel) 285 | return model 286 | 287 | def LightCNN_29Layers_v2_templet(in_channel, pretrained=False): 288 | model = network_29layers_v2_templet(in_channel, resblock, [1,2,3,4,5,6]) 289 | return model 290 | 291 | 292 | if __name__ == "__main__": 293 | model = LightCNN_29Layers_v2_templet(3) 294 | print(model) -------------------------------------------------------------------------------- /metadata/models/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import logging 4 | from torch.autograd import Variable 5 | 6 | from .build_model import BuildMultiLabelModel, LoadPretrainedModel 7 | from .lightcnn import LightCNN_29Layers_v2_templet, LightCNN_9Layers_templet 8 | from .alexnet import AlexnetTemplet 9 | from .resnet import Resnet50Templet 10 | from .vgg import VGG16Templet 11 | 12 | def load_model(opt, num_classes): 13 | # load templet 14 | if opt.model == "Alexnet": 15 | templet = AlexnetTemplet(opt.input_channel, opt.pretrain) 16 | elif opt.model == "LightenB": 17 | templet = LightCNN_29Layers_v2_templet(opt.input_channel, opt.pretrain) 18 | elif opt.model == "Lighten9": 19 | templet = LightCNN_9Layers_templet(opt.input_channel, opt.pretrain) 20 | elif opt.model == "Resnet50": 21 | templet = Resnet50Templet(opt.input_channel, opt.pretrain) 22 | elif opt.model == "VGG16": 23 | templet = VGG16Templet(opt.input_channel, opt.pretrain) 24 | else: 25 | logging.error("unknown model type") 26 | sys.exit(0) 27 | 28 | # build model 29 | tmp_input = Variable(torch.FloatTensor(1, opt.input_channel, opt.input_size, opt.input_size)) 30 | if opt.model == "LightenB": 31 | tmp_output = templet(tmp_input) 32 | else: 33 | tmp_output = templet(tmp_input) 34 | output_dim = int(tmp_output.size()[-1]) 35 | model = BuildMultiLabelModel(templet, output_dim, num_classes) 36 | logging.info(model) 37 | 38 | # imagenet pretrain model 39 | if opt.pretrain: 40 | logging.info("use imagenet pretrained model") 41 | 42 | # load exsiting model 43 | if opt.checkpoint_name != "": 44 | if os.path.exists(opt.checkpoint_name): 45 | logging.info("load pretrained model from "+opt.checkpoint_name) 46 | model.load_state_dict(torch.load(opt.checkpoint_name)) 47 | elif os.path.exists(opt.model_dir): 48 | checkpoint_name = opt.model_dir + "/" + opt.checkpoint_name 49 | model.load_state_dict(torch.load(checkpoint_name)) 50 | logging.info("load pretrained model from "+ checkpoint_name) 51 | else: 52 | opt.checkpoint_name = "" 53 | logging.warning("WARNING: unknown pretrained model, skip it.") 54 | 55 | return model 56 | 57 | def save_model(model, opt, epoch): 58 | checkpoint_name = opt.model_dir + "/epoch_%s_snapshot.pth" %(epoch) 59 | torch.save(model.cpu().state_dict(), checkpoint_name) 60 | if opt.cuda and torch.cuda.is_available(): 61 | model.cuda(opt.devices[0]) 62 | 63 | def modify_last_layer_lr(named_params, base_lr, lr_mult_w, lr_mult_b): 64 | params = list() 65 | for name, param in named_params: 66 | if 'bias' in name: 67 | if 'FullyConnectedLayer_' in name: 68 | params += [{'params':param, 'lr': base_lr * lr_mult_b, 'weight_decay': 0}] 69 | else: 70 | params += [{'params':param, 'lr': base_lr * 2, 'weight_decay': 0}] 71 | else: 72 | if 'FullyConnectedLayer_' in name: 73 | params += [{'params':param, 'lr': base_lr * lr_mult_w}] 74 | else: 75 | params += [{'params':param, 'lr': base_lr * 1}] 76 | return params 77 | -------------------------------------------------------------------------------- /metadata/models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | from models.build_model import LoadPretrainedModel 5 | 6 | 7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 8 | 'resnet152'] 9 | 10 | 11 | model_urls = { 12 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 13 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 14 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 15 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 16 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 17 | } 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1): 21 | "3x3 convolution with padding" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | 25 | 26 | class BasicBlock(nn.Module): 27 | expansion = 1 28 | 29 | def __init__(self, inplanes, planes, stride=1, downsample=None): 30 | super(BasicBlock, self).__init__() 31 | self.conv1 = conv3x3(inplanes, planes, stride) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.relu = nn.ReLU(inplace=True) 34 | self.conv2 = conv3x3(planes, planes) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | self.downsample = downsample 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | expansion = 4 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn1 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 66 | padding=1, bias=False) 67 | self.bn2 = nn.BatchNorm2d(planes) 68 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 69 | self.bn3 = nn.BatchNorm2d(planes * 4) 70 | self.relu = nn.ReLU(inplace=True) 71 | self.downsample = downsample 72 | self.stride = stride 73 | 74 | def forward(self, x): 75 | residual = x 76 | 77 | out = self.conv1(x) 78 | out = self.bn1(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv2(out) 82 | out = self.bn2(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv3(out) 86 | out = self.bn3(out) 87 | 88 | if self.downsample is not None: 89 | residual = self.downsample(x) 90 | 91 | out += residual 92 | out = self.relu(out) 93 | 94 | return out 95 | 96 | 97 | class ResNet(nn.Module): 98 | 99 | def __init__(self, block, layers, num_classes=1000): 100 | self.inplanes = 64 101 | super(ResNet, self).__init__() 102 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 103 | bias=False) 104 | self.bn1 = nn.BatchNorm2d(64) 105 | self.relu = nn.ReLU(inplace=True) 106 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 107 | self.layer1 = self._make_layer(block, 64, layers[0]) 108 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 109 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 110 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 111 | self.avgpool = nn.AvgPool2d(7, stride=1) 112 | self.fc = nn.Linear(512 * block.expansion, num_classes) 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 117 | m.weight.data.normal_(0, math.sqrt(2. / n)) 118 | elif isinstance(m, nn.BatchNorm2d): 119 | m.weight.data.fill_(1) 120 | m.bias.data.zero_() 121 | 122 | def _make_layer(self, block, planes, blocks, stride=1): 123 | downsample = None 124 | if stride != 1 or self.inplanes != planes * block.expansion: 125 | downsample = nn.Sequential( 126 | nn.Conv2d(self.inplanes, planes * block.expansion, 127 | kernel_size=1, stride=stride, bias=False), 128 | nn.BatchNorm2d(planes * block.expansion), 129 | ) 130 | 131 | layers = [] 132 | layers.append(block(self.inplanes, planes, stride, downsample)) 133 | self.inplanes = planes * block.expansion 134 | for i in range(1, blocks): 135 | layers.append(block(self.inplanes, planes)) 136 | 137 | return nn.Sequential(*layers) 138 | 139 | def forward(self, x): 140 | x = self.conv1(x) 141 | x = self.bn1(x) 142 | x = self.relu(x) 143 | x = self.maxpool(x) 144 | 145 | x = self.layer1(x) 146 | x = self.layer2(x) 147 | x = self.layer3(x) 148 | x = self.layer4(x) 149 | 150 | x = self.avgpool(x) 151 | x = x.view(x.size(0), -1) 152 | x = self.fc(x) 153 | 154 | return x 155 | 156 | class ResNetTemplet(nn.Module): 157 | 158 | def __init__(self, block, layers, input_channel): 159 | self.inplanes = 64 160 | super(ResNetTemplet, self).__init__() 161 | self.conv1 = nn.Conv2d(input_channel, 64, kernel_size=7, stride=2, padding=3, 162 | bias=False) 163 | self.bn1 = nn.BatchNorm2d(64) 164 | self.relu = nn.ReLU(inplace=True) 165 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 166 | self.layer1 = self._make_layer(block, 64, layers[0]) 167 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 168 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 169 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 170 | self.avgpool = nn.AvgPool2d(7, stride=1) 171 | self.fc = nn.Linear(512 * block.expansion, 1000) 172 | 173 | for m in self.modules(): 174 | if isinstance(m, nn.Conv2d): 175 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 176 | m.weight.data.normal_(0, math.sqrt(2. / n)) 177 | elif isinstance(m, nn.BatchNorm2d): 178 | m.weight.data.fill_(1) 179 | m.bias.data.zero_() 180 | 181 | def _make_layer(self, block, planes, blocks, stride=1): 182 | downsample = None 183 | if stride != 1 or self.inplanes != planes * block.expansion: 184 | downsample = nn.Sequential( 185 | nn.Conv2d(self.inplanes, planes * block.expansion, 186 | kernel_size=1, stride=stride, bias=False), 187 | nn.BatchNorm2d(planes * block.expansion), 188 | ) 189 | layers = [] 190 | layers.append(block(self.inplanes, planes, stride, downsample)) 191 | self.inplanes = planes * block.expansion 192 | for i in range(1, blocks): 193 | layers.append(block(self.inplanes, planes)) 194 | return nn.Sequential(*layers) 195 | 196 | def forward(self, x): 197 | x = self.conv1(x) 198 | x = self.bn1(x) 199 | x = self.relu(x) 200 | x = self.maxpool(x) 201 | 202 | x = self.layer1(x) 203 | x = self.layer2(x) 204 | x = self.layer3(x) 205 | x = self.layer4(x) 206 | 207 | x = self.avgpool(x) 208 | x = x.view(x.size(0), -1) 209 | 210 | return x 211 | 212 | 213 | def resnet18(pretrained=False, **kwargs): 214 | """Constructs a ResNet-18 model. 215 | 216 | Args: 217 | pretrained (bool): If True, returns a model pre-trained on ImageNet 218 | """ 219 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 220 | if pretrained: 221 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 222 | return model 223 | 224 | 225 | def Resnet18Templet(input_channel, pretrained=False, **kwargs): 226 | """Constructs a ResNet-18 model. 227 | 228 | Args: 229 | pretrained (bool): If True, returns a model pre-trained on ImageNet 230 | """ 231 | model = ResNetTemplet(BasicBlock, [2, 2, 2, 2], input_channel, **kwargs) 232 | if pretrained: 233 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet18'])) 234 | model.load_state_dict(model_dict) 235 | return model 236 | 237 | 238 | def resnet34(pretrained=False, **kwargs): 239 | """Constructs a ResNet-34 model. 240 | 241 | Args: 242 | pretrained (bool): If True, returns a model pre-trained on ImageNet 243 | """ 244 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 245 | if pretrained: 246 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 247 | return model 248 | 249 | 250 | def Resnet34Templet(input_channel, pretrained=False, **kwargs): 251 | """Constructs a ResNet-34 model. 252 | 253 | Args: 254 | pretrained (bool): If True, returns a model pre-trained on ImageNet 255 | """ 256 | model = ResNetTemplet(BasicBlock, [3, 4, 6, 3], input_channel, **kwargs) 257 | if pretrained: 258 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet34'])) 259 | model.load_state_dict(model_dict) 260 | return model 261 | 262 | 263 | def resnet50(pretrained=False, **kwargs): 264 | """Constructs a ResNet-50 model. 265 | 266 | Args: 267 | pretrained (bool): If True, returns a model pre-trained on ImageNet 268 | """ 269 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 270 | if pretrained: 271 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 272 | return model 273 | 274 | 275 | def Resnet50Templet(input_channel, pretrained=False, **kwargs): 276 | """Constructs a ResNet-50 model. 277 | 278 | Args: 279 | pretrained (bool): If True, returns a model pre-trained on ImageNet 280 | """ 281 | model = ResNetTemplet(Bottleneck, [3, 4, 6, 3], input_channel, **kwargs) 282 | if pretrained: 283 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet50'])) 284 | model.load_state_dict(model_dict) 285 | return model 286 | 287 | 288 | def resnet101(pretrained=False, **kwargs): 289 | """Constructs a ResNet-101 model. 290 | 291 | Args: 292 | pretrained (bool): If True, returns a model pre-trained on ImageNet 293 | """ 294 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 295 | if pretrained: 296 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 297 | return model 298 | 299 | def Resnet101Templet(input_channel, pretrained=False, **kwargs): 300 | """Constructs a ResNet-101 model. 301 | 302 | Args: 303 | pretrained (bool): If True, returns a model pre-trained on ImageNet 304 | """ 305 | model = ResNetTemplet(Bottleneck, [3, 4, 23, 3], input_channel, **kwargs) 306 | if pretrained: 307 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet101'])) 308 | model.load_state_dict(model_dict) 309 | return model 310 | 311 | def resnet152(pretrained=False, **kwargs): 312 | """Constructs a ResNet-152 model. 313 | 314 | Args: 315 | pretrained (bool): If True, returns a model pre-trained on ImageNet 316 | """ 317 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 318 | if pretrained: 319 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 320 | return model 321 | 322 | def Resnet152Templet(input_channel, pretrained=False, **kwargs): 323 | """Constructs a ResNet-152 model. 324 | 325 | Args: 326 | pretrained (bool): If True, returns a model pre-trained on ImageNet 327 | """ 328 | model = ResNetTemplet(Bottleneck, [3, 8, 36, 3], input_channel, **kwargs) 329 | if pretrained: 330 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['resnet152'])) 331 | model.load_state_dict(model_dict) 332 | return model 333 | 334 | -------------------------------------------------------------------------------- /metadata/models/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | import math 4 | from models.build_model import LoadPretrainedModel 5 | 6 | __all__ = [ 7 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 8 | 'vgg19_bn', 'vgg19', 9 | ] 10 | 11 | 12 | model_urls = { 13 | 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', 14 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', 15 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', 16 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', 17 | 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', 18 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', 19 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', 20 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', 21 | } 22 | 23 | 24 | class VGG(nn.Module): 25 | 26 | def __init__(self, features, num_classes=1000): 27 | super(VGG, self).__init__() 28 | self.features = features 29 | self.classifier = nn.Sequential( 30 | nn.Linear(512 * 7 * 7, 4096), 31 | nn.ReLU(True), 32 | nn.Dropout(), 33 | nn.Linear(4096, 4096), 34 | nn.ReLU(True), 35 | nn.Dropout(), 36 | nn.Linear(4096, num_classes), 37 | ) 38 | self._initialize_weights() 39 | 40 | def forward(self, x): 41 | x = self.features(x) 42 | x = x.view(x.size(0), -1) 43 | x = self.classifier(x) 44 | return x 45 | 46 | def _initialize_weights(self): 47 | for m in self.modules(): 48 | if isinstance(m, nn.Conv2d): 49 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 50 | m.weight.data.normal_(0, math.sqrt(2. / n)) 51 | if m.bias is not None: 52 | m.bias.data.zero_() 53 | elif isinstance(m, nn.BatchNorm2d): 54 | m.weight.data.fill_(1) 55 | m.bias.data.zero_() 56 | elif isinstance(m, nn.Linear): 57 | m.weight.data.normal_(0, 0.01) 58 | m.bias.data.zero_() 59 | 60 | class VGGTemplet(nn.Module): 61 | 62 | def __init__(self, features): 63 | super(VGGTemplet, self).__init__() 64 | self.features = features 65 | self.classifier = nn.Sequential( 66 | nn.Linear(512 * 7 * 7, 4096), 67 | nn.ReLU(True), 68 | nn.Dropout(), 69 | nn.Linear(4096, 4096), 70 | nn.ReLU(True), 71 | nn.Dropout(), 72 | #nn.Linear(4096, num_classes), 73 | ) 74 | self._initialize_weights() 75 | 76 | def forward(self, x): 77 | x = self.features(x) 78 | x = x.view(x.size(0), -1) 79 | x = self.classifier(x) 80 | return x 81 | 82 | def _initialize_weights(self): 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 86 | m.weight.data.normal_(0, math.sqrt(2. / n)) 87 | if m.bias is not None: 88 | m.bias.data.zero_() 89 | elif isinstance(m, nn.BatchNorm2d): 90 | m.weight.data.fill_(1) 91 | m.bias.data.zero_() 92 | elif isinstance(m, nn.Linear): 93 | m.weight.data.normal_(0, 0.01) 94 | m.bias.data.zero_() 95 | 96 | def make_layers(cfg, in_channels, batch_norm=False): 97 | layers = [] 98 | #in_channels = 3 99 | for v in cfg: 100 | if v == 'M': 101 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 102 | else: 103 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 104 | if batch_norm: 105 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 106 | else: 107 | layers += [conv2d, nn.ReLU(inplace=True)] 108 | in_channels = v 109 | return nn.Sequential(*layers) 110 | 111 | 112 | cfg = { 113 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 114 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 115 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 116 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 117 | } 118 | 119 | 120 | def vgg11(input_channel=3, pretrained=False, **kwargs): 121 | """VGG 11-layer model (configuration "A") 122 | 123 | Args: 124 | pretrained (bool): If True, returns a model pre-trained on ImageNet 125 | """ 126 | model = VGG(make_layers(cfg['A'], input_channel), **kwargs) 127 | if pretrained: 128 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11'])) 129 | return model 130 | 131 | def VGG11Templet(input_channel=3, pretrained=False, **kwargs): 132 | """VGG 11-layer model (configuration "A") 133 | 134 | Args: 135 | pretrained (bool): If True, returns a model pre-trained on ImageNet 136 | """ 137 | model = VGGTemplet(make_layers(cfg['A'], input_channel), **kwargs) 138 | if pretrained: 139 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg11'])) 140 | model.load_state_dict(model_dict) 141 | return model 142 | 143 | def vgg11_bn(input_channel=3, pretrained=False, **kwargs): 144 | """VGG 11-layer model (configuration "A") with batch normalization 145 | 146 | Args: 147 | pretrained (bool): If True, returns a model pre-trained on ImageNet 148 | """ 149 | model = VGG(make_layers(cfg['A'], input_channel, batch_norm=True), **kwargs) 150 | if pretrained: 151 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn'])) 152 | return model 153 | 154 | def VGG11BNTemplet(input_channel=3, pretrained=False, **kwargs): 155 | """VGG 11-layer model (configuration "A") 156 | 157 | Args: 158 | pretrained (bool): If True, returns a model pre-trained on ImageNet 159 | """ 160 | model = VGGTemplet(make_layers(cfg['A'], input_channel, batch_norm=True), **kwargs) 161 | if pretrained: 162 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg11_bn'])) 163 | model.load_state_dict(model_dict) 164 | return model 165 | 166 | def vgg13(input_channel=3, pretrained=False, **kwargs): 167 | """VGG 13-layer model (configuration "B") 168 | 169 | Args: 170 | pretrained (bool): If True, returns a model pre-trained on ImageNet 171 | """ 172 | model = VGG(make_layers(cfg['B'], input_channel), **kwargs) 173 | if pretrained: 174 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13'])) 175 | return model 176 | 177 | def VGG13Templet(input_channel=3, pretrained=False, **kwargs): 178 | """VGG 13-layer model (configuration "B") 179 | 180 | Args: 181 | pretrained (bool): If True, returns a model pre-trained on ImageNet 182 | """ 183 | model = VGGTemplet(make_layers(cfg['B'], input_channel), **kwargs) 184 | if pretrained: 185 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg13'])) 186 | model.load_state_dict(model_dict) 187 | return model 188 | 189 | def vgg13_bn(input_channel=3, pretrained=False, **kwargs): 190 | """VGG 13-layer model (configuration "B") with batch normalization 191 | 192 | Args: 193 | pretrained (bool): If True, returns a model pre-trained on ImageNet 194 | """ 195 | model = VGG(make_layers(cfg['B'], input_channel, batch_norm=True), **kwargs) 196 | if pretrained: 197 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn'])) 198 | return model 199 | 200 | def VGG13BNTemplet(input_channel=3, pretrained=False, **kwargs): 201 | """VGG 13-layer model (configuration "B") with batch normalization 202 | 203 | Args: 204 | pretrained (bool): If True, returns a model pre-trained on ImageNet 205 | """ 206 | model = VGGTemplet(make_layers(cfg['B'], input_channel, batch_norm=True), **kwargs) 207 | if pretrained: 208 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg13_bn'])) 209 | model.load_state_dict(model_dict) 210 | return model 211 | 212 | def vgg16(input_channel=3, pretrained=False, **kwargs): 213 | """VGG 16-layer model (configuration "D") 214 | 215 | Args: 216 | pretrained (bool): If True, returns a model pre-trained on ImageNet 217 | """ 218 | model = VGG(make_layers(cfg['D'], input_channel), **kwargs) 219 | if pretrained: 220 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16'])) 221 | return model 222 | 223 | def VGG16Templet(input_channel=3, pretrained=False, **kwargs): 224 | """VGG 16-layer model (configuration "D") 225 | 226 | Args: 227 | pretrained (bool): If True, returns a model pre-trained on ImageNet 228 | """ 229 | model = VGGTemplet(make_layers(cfg['D'], input_channel), **kwargs) 230 | if pretrained: 231 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg16'])) 232 | model.load_state_dict(model_dict) 233 | return model 234 | 235 | def vgg16_bn(input_channel=3, pretrained=False, **kwargs): 236 | """VGG 16-layer model (configuration "D") with batch normalization 237 | 238 | Args: 239 | pretrained (bool): If True, returns a model pre-trained on ImageNet 240 | """ 241 | model = VGG(make_layers(cfg['D'], input_channel, batch_norm=True), **kwargs) 242 | if pretrained: 243 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn'])) 244 | return model 245 | 246 | def VGG16BNTemplet(input_channel=3, pretrained=False, **kwargs): 247 | """VGG 16-layer model (configuration "D") with batch normalization 248 | 249 | Args: 250 | pretrained (bool): If True, returns a model pre-trained on ImageNet 251 | """ 252 | model = VGGTemplet(make_layers(cfg['D'], input_channel, batch_norm=True), **kwargs) 253 | if pretrained: 254 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg16_bn'])) 255 | model.load_state_dict(model_dict) 256 | return model 257 | 258 | def vgg19(input_channel=3, pretrained=False, **kwargs): 259 | """VGG 19-layer model (configuration "E") 260 | 261 | Args: 262 | pretrained (bool): If True, returns a model pre-trained on ImageNet 263 | """ 264 | model = VGG(make_layers(cfg['E'], input_channel), **kwargs) 265 | if pretrained: 266 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19'])) 267 | return model 268 | 269 | def VGG19Templet(input_channel=3, pretrained=False, **kwargs): 270 | """VGG 19-layer model (configuration "E") 271 | 272 | Args: 273 | pretrained (bool): If True, returns a model pre-trained on ImageNet 274 | """ 275 | model = VGGTemplet(make_layers(cfg['E'], input_channel), **kwargs) 276 | if pretrained: 277 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg19'])) 278 | model.load_state_dict(model_dict) 279 | return model 280 | 281 | def vgg19_bn(input_channel=3, pretrained=False, **kwargs): 282 | """VGG 19-layer model (configuration 'E') with batch normalization 283 | 284 | Args: 285 | pretrained (bool): If True, returns a model pre-trained on ImageNet 286 | """ 287 | model = VGG(make_layers(cfg['E'], input_channel, batch_norm=True), **kwargs) 288 | if pretrained: 289 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn'])) 290 | return model 291 | 292 | def VGG19BNTemplet(input_channel=3, pretrained=False, **kwargs): 293 | """VGG 19-layer model (configuration 'E') with batch normalization 294 | 295 | Args: 296 | pretrained (bool): If True, returns a model pre-trained on ImageNet 297 | """ 298 | model = VGGTemplet(make_layers(cfg['E'], input_channel, batch_norm=True), **kwargs) 299 | if pretrained: 300 | model_dict = LoadPretrainedModel(model, model_zoo.load_url(model_urls['vgg19_bn'])) 301 | model.load_state_dict(model_dict) 302 | return model 303 | -------------------------------------------------------------------------------- /metadata/options/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/options/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/options/__pycache__/options.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/options/__pycache__/options.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/options/options.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import argparse 4 | 5 | class Options(): 6 | def __init__(self): 7 | self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 8 | 9 | self.parser.add_argument('--dir', required=True, default='./', help='path to the data directory containing data.txt and label.txt') 10 | self.parser.add_argument('--name', required=True, default='test', help='subdirectory name for training or testing, snapshot, splited dataset and test results exist here') 11 | self.parser.add_argument('--mode', required=True, default='Train', help='run mode of training or testing. [Train | Test | train | test]') 12 | self.parser.add_argument('--model', required=True, default='LightenB', help='model type. [Alexnet | LightenB | VGG16 | Resnet18 | ...]') 13 | self.parser.add_argument('--load_size', type=int, default=512, help='scale image to the size prepared for croping') 14 | self.parser.add_argument('--input_size', type=int, default=512, help='then crop image to the size as network input') 15 | self.parser.add_argument('--ratio', type=str, default='[1, 0, 0]', help='ratio of whole dataset for Train, Validate, Test resperctively') 16 | self.parser.add_argument('--batch_size', type=int, default=1, help='batch size of network input. Note that batch_size should only set to 1 in Test mode') 17 | self.parser.add_argument('--shuffle', action='store_true', help='default false. If true, data will be shuffled when split dataset and in batch') 18 | self.parser.add_argument('--flip', action='store_true', help='if true, flip image randomly before input into network') 19 | self.parser.add_argument('--region', action='store_false', help='if true, crop image by input box') 20 | self.parser.add_argument('--load_thread', type=int, default=2, help='how many subprocesses to use for data loading') 21 | self.parser.add_argument('--crop', type=str, default='NoCrop', help='crop type, candidates are [NoCrop | RandomCrop | CenterCrop | FiveCrop | TenCrop]') 22 | self.parser.add_argument('--gray', action='store_true', help='defalut false. If true, image will be converted to gray_scale') 23 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 24 | self.parser.add_argument('--box_ratio', type=float, default=-1, help='modify box ratio of width and height to specified ratio') 25 | self.parser.add_argument('--box_scale', type=float, default=1.0, help='scale box to specified ratio. Default 1.0 means no change') 26 | self.parser.add_argument('--input_channel', type=int, default=3, help='set input image channel, 1 for gray and 3 for color') 27 | self.parser.add_argument('--mean', type=str, default='(0.485, 0.456, 0.406)', help='sequence of means for each channel used for normization') 28 | self.parser.add_argument('--std', type=str, default='(0.229, 0.224, 0.225)', help='sequence standard deviations for each channel used for normization') 29 | self.parser.add_argument('--padding', action='store_true', help='default false. If true, image will be padded if scaled box is out of image boundary') 30 | self.parser.add_argument('--checkpoint_name', type=str, default='_YOUR_MODEL_PATH_', help='path to pretrained model or model to deploy') 31 | self.parser.add_argument('--pretrain', action='store_true', help='default false. If true, load pretrained model to initizaize model state_dict') 32 | ## for train 33 | self.parser.add_argument('--validate_ratio', type=float, default=1, help='ratio of validate set when validate model') 34 | self.parser.add_argument('--sum_epoch', type=int, default=200, help='sum epoches for training') 35 | self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='save snapshot every $save_epoch_freq epoches training') 36 | self.parser.add_argument('--save_batch_iter_freq', type=int, default=2000, help='save snapshot every $save_batch_iter_freq training') 37 | self.parser.add_argument('--lr', type=float, default=0.00001, help='initial learning rate') 38 | self.parser.add_argument('--gamma', type=float, default=0.1, help='multiplicative factor of learning rate decay.') 39 | self.parser.add_argument('--lr_mult_w', type=float, default=20, help='learning rate of W of last layer parameter will be lr*lr_mult_w') 40 | self.parser.add_argument('--lr_mult_b', type=float, default=20, help='learning rate of b of last layer parameter will be lr*lr_mult_b') 41 | self.parser.add_argument('--lr_policy', type=str, default='step', help='learning rate policy: lambda|step|plateau') 42 | self.parser.add_argument('--lr_decay_in_epoch', type=int, default=50, help='multiply by a gamma every lr_decay_in_epoch iterations') 43 | self.parser.add_argument('--momentum', type=float, default=0.5, help='momentum of SGD') 44 | self.parser.add_argument('--weight_decay', type=float, default=1e-5, help='lr') 45 | self.parser.add_argument('--loss_weight', type=str, default='', help='list. Loss weight for cross entropy loss.For example set $loss_weight to [1, 0.8, 0.8] for a 3 labels classification') 46 | 47 | ## for test 48 | self.parser.add_argument('--top_k', type=str, default='(3,)', help='tuple. We only take top k classification results into accuracy consideration') 49 | self.parser.add_argument('--score_thres', type=str, default='0.1', help='float or list. We only take classification results whose score is bigger than score_thres into recall consideration') 50 | # these tow param below used only in deploy.py 51 | self.parser.add_argument('--label_file', type=str, default="", help='label file only for deploy a checkpoint model') 52 | self.parser.add_argument('--classify_dir', type=str, default="", help='directory where data.txt to be classified exists') 53 | 54 | ## for visualization 55 | self.parser.add_argument('--display_winsize', type=int, default=128, help='display window size') 56 | self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display. Less than 1 will display nothing') 57 | self.parser.add_argument('--display_port', type=int, default=8097, help='port of visdom server for web display. Result will show on `localhost:$display_port`') 58 | self.parser.add_argument('--image_ncols', type=int, default=0, help='if positive, display all images in a single visdom web panel with certain number of images per row.') 59 | self.parser.add_argument('--html', action='store_false', help='defalt true. Do not save intermediate training results to [opt.dir]/[opt.name]/web/') 60 | self.parser.add_argument('--update_html_freq', type=int, default=10, help='frequency of saving training results to html') 61 | self.parser.add_argument('--display_train_freq', type=int, default=10, help='print train loss and accuracy every $train_freq batches iteration') 62 | self.parser.add_argument('--display_validate_freq', type=int, default=10, help='test validate dateset every $validate_freq batches iteration') 63 | self.parser.add_argument('--display_data_freq', type=int, default=10, help='frequency of showing training data on web browser') 64 | self.parser.add_argument('--display_image_ratio', type=float, default=1.0, help='ratio of images in a batch showing on web browser') 65 | 66 | def parse(self): 67 | opt = self.parser.parse_args() 68 | 69 | # mode 70 | if opt.mode not in ["Train", "Test", "Test-Train", "train", "test","test-train"]: 71 | raise Exception("cannot recognize flag `mode`") 72 | opt.mode = opt.mode.capitalize() 73 | if opt.mode == "Test": 74 | opt.batch_size = 1 75 | opt.shuffle = False 76 | 77 | # devices id 78 | gpu_ids = opt.gpu_ids.split(',') 79 | opt.devices = [] 80 | for id in gpu_ids: 81 | if eval(id) >= 0: 82 | opt.devices.append(eval(id)) 83 | # cuda 84 | opt.cuda = False 85 | if len(opt.devices) > 0 and torch.cuda.is_available(): 86 | opt.cuda = True 87 | 88 | 89 | opt.top_k = eval(opt.top_k) 90 | opt.mean = eval(opt.mean) 91 | opt.std = eval(opt.std) 92 | opt.ratio = eval(opt.ratio) 93 | if opt.loss_weight == "": 94 | opt.loss_weight=None 95 | else: 96 | opt.loss_weight = torch.FloatTensor(eval(opt.loss_weight)) 97 | 98 | return opt 99 | 100 | if __name__ == "__main__": 101 | op = Options() 102 | op.parse() 103 | -------------------------------------------------------------------------------- /metadata/reformat-log.py: -------------------------------------------------------------------------------- 1 | import os,re 2 | rawlogpath = './test.log' 3 | newlogpath = './newtest.log' 4 | f = open(rawlogpath,'r') 5 | cnt = 0 6 | attr = [] 7 | 8 | for line in f: 9 | if line[0:2] == '[[': 10 | tmp = [] 11 | for x in re.split('\[|\n| ',line): 12 | if x!='': 13 | tmp.append(x) 14 | elif line[0:4]=='test': 15 | pass 16 | elif line[0:4]=='data': 17 | break 18 | elif line[-3:] == ']]\n': 19 | for x in re.split('\]|\n| ',line): 20 | if x!='': 21 | tmp.append(x) 22 | attr.append(tmp) 23 | else: 24 | for x in re.split('\n| ',line): 25 | if x!='': 26 | tmp.append(x) 27 | cnt+=1 28 | f.close() 29 | 30 | f = open(newlogpath,'w') 31 | for cnt in range(int(len(attr)/3)): 32 | f.write('test %s image \n'%(cnt)) 33 | for x in attr[cnt*3]: 34 | f.write(x+' ') 35 | f.write('\n') 36 | 37 | for x in attr[cnt*3+1]: 38 | f.write(x) 39 | f.write('\n') 40 | 41 | for x in attr[cnt*3+2]: 42 | f.write(x) 43 | f.write('\n') 44 | 45 | print(len(attr)) 46 | f.close() 47 | -------------------------------------------------------------------------------- /metadata/util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/util/__pycache__/html.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/html.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/util/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/util/__pycache__/webvisualizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipl-uw/2019-CVPR-AIC-Track-2-UWIPL/387924b1e33e0594977cd095c26a147e4a7f8192/metadata/util/__pycache__/webvisualizer.cpython-36.pyc -------------------------------------------------------------------------------- /metadata/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, reflesh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | self.img_dir = os.path.join(self.web_dir, 'images') 11 | if not os.path.exists(self.web_dir): 12 | os.makedirs(self.web_dir) 13 | if not os.path.exists(self.img_dir): 14 | os.makedirs(self.img_dir) 15 | # print(self.img_dir) 16 | 17 | self.doc = dominate.document(title=title) 18 | if reflesh > 0: 19 | with self.doc.head: 20 | meta(http_equiv="reflesh", content=str(reflesh)) 21 | 22 | def get_image_dir(self): 23 | return self.img_dir 24 | 25 | def add_header(self, str): 26 | with self.doc: 27 | h3(str) 28 | 29 | def add_table(self, border=1): 30 | self.t = table(border=border, style="table-layout: fixed;") 31 | self.doc.add(self.t) 32 | 33 | def add_images(self, ims, txts, links, width=400): 34 | self.add_table() 35 | with self.t: 36 | with tr(): 37 | for im, txt, link in zip(ims, txts, links): 38 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 39 | with p(): 40 | with a(href=os.path.join('images', link)): 41 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 42 | br() 43 | p(txt) 44 | 45 | def save(self): 46 | html_file = '%s/index.html' % self.web_dir 47 | f = open(html_file, 'wt') 48 | f.write(self.doc.render()) 49 | f.close() 50 | 51 | 52 | if __name__ == '__main__': 53 | html = HTML('web/', 'test_html') 54 | html.add_header('hello world') 55 | 56 | ims = [] 57 | txts = [] 58 | links = [] 59 | for n in range(4): 60 | ims.append('image_%d.png' % n) 61 | txts.append('text_%d' % n) 62 | links.append('image_%d.png' % n) 63 | html.add_images(ims, txts, links) 64 | html.save() 65 | -------------------------------------------------------------------------------- /metadata/util/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import numpy as np 4 | import logging 5 | import collections 6 | from PIL import Image 7 | 8 | 9 | def tensor2im(image_tensor, mean, std, imtype=np.uint8): 10 | image_numpy = image_tensor.cpu().float().numpy() 11 | if image_numpy.shape[0] == 1: 12 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 13 | image_numpy = image_numpy.transpose(1, 2, 0) 14 | image_numpy *= std 15 | image_numpy += mean 16 | image_numpy *= 255.0 17 | return image_numpy.astype(imtype) 18 | 19 | def save_image(image_numpy, image_path): 20 | image_pil = Image.fromarray(image_numpy) 21 | image_pil.save(image_path) 22 | 23 | def mkdirs(paths): 24 | if isinstance(paths, list) and not isinstance(paths, str): 25 | for path in paths: 26 | mkdir(path) 27 | else: 28 | mkdir(paths) 29 | 30 | def mkdir(path): 31 | if not os.path.exists(path): 32 | os.makedirs(path) 33 | 34 | def rmdir(path): 35 | if os.path.exists(path): 36 | os.system('rm -rf ' + path) 37 | 38 | def print_loss(loss_list, label, epoch=0, batch_iter=0): 39 | if label == "Test": 40 | logging.info("[ %s Loss ] of Test Dataset:" % (label)) 41 | else: 42 | logging.info("[ %s Loss ] of Epoch %d Batch %d" % (label, epoch, batch_iter)) 43 | 44 | for index, loss in enumerate(loss_list): 45 | logging.info("----Attribute %d: %f" %(index, loss)) 46 | 47 | def print_accuracy(accuracy_list, label, epoch=0, batch_iter=0): 48 | if label == "Test": 49 | logging.info("[ %s Accu ] of Test Dataset:" % (label)) 50 | else: 51 | logging.info("[ %s Accu ] of Epoch %d Batch %d" %(label, epoch, batch_iter)) 52 | 53 | for index, item in enumerate(accuracy_list): 54 | for top_k, value in item.items(): 55 | logging.info("----Attribute %d Top%d: %f" %(index, top_k, value["ratio"])) 56 | 57 | def opt2file(opt, dst_file): 58 | args = vars(opt) 59 | with open(dst_file, 'wt') as opt_file: 60 | opt_file.write('------------ Options -------------\n') 61 | print('------------ Options -------------') 62 | for k, v in sorted(args.items()): 63 | opt_file.write('%s: %s\n' % (str(k), str(v))) 64 | print("%s: %s" %(str(k), str(v))) 65 | opt_file.write('-------------- End ----------------\n') 66 | print('-------------- End ----------------') 67 | 68 | def load_label(label_file): 69 | rid2name = list() # rid: real id, same as the id in label.txt 70 | id2rid = list() # id: number from 0 to len(rids)-1 corresponding to the order of rids 71 | rid2id = list() 72 | with open(label_file) as l: 73 | rid2name_dict = collections.defaultdict(str) 74 | id2rid_dict = collections.defaultdict(str) 75 | rid2id_dict = collections.defaultdict(str) 76 | new_id = 0 77 | for line in l.readlines(): 78 | line = line.strip('\n\r').split(';') 79 | if len(line) == 3: # attr description 80 | if len(rid2name_dict) != 0: 81 | rid2name.append(rid2name_dict) 82 | id2rid.append(id2rid_dict) 83 | rid2id.append(rid2id_dict) 84 | rid2name_dict = collections.defaultdict(str) 85 | id2rid_dict = collections.defaultdict(str) 86 | rid2id_dict = collections.defaultdict(str) 87 | new_id = 0 88 | rid2name_dict["__name__"] = line[2] 89 | rid2name_dict["__attr_id__"] = line[1] 90 | elif len(line) == 2: # attr value description 91 | rid2name_dict[line[0]] = line[1] 92 | id2rid_dict[new_id] = line[0] 93 | rid2id_dict[line[0]] = new_id 94 | new_id += 1 95 | if len(rid2name_dict) != 0: 96 | rid2name.append(rid2name_dict) 97 | id2rid.append(id2rid_dict) 98 | rid2id.append(rid2id_dict) 99 | return rid2name, id2rid, rid2id 100 | 101 | -------------------------------------------------------------------------------- /metadata/util/webvisualizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import collections 4 | import numpy as np 5 | from . import util 6 | from . import html 7 | 8 | 9 | class WebVisualizer(): 10 | def __init__(self, opt): 11 | self.opt = opt 12 | self.display_id = opt.display_id 13 | self.win_size = opt.display_winsize 14 | self.use_html = (opt.html and (opt.mode == "Train")) 15 | self.name = opt.name 16 | self.saved = False 17 | self.type2id = {"Loss":0, "Accuracy": 1, "Other": 2} 18 | self.phase2id = {"Train": 0, "Validate": 1, "Test": 2} 19 | 20 | def ManualType(): 21 | return collections.defaultdict(list) 22 | # store all the points for regular backup 23 | self.plot_data = collections.defaultdict(ManualType) 24 | # line window info 25 | self.win_info = collections.defaultdict(ManualType) 26 | if self.display_id > 0: 27 | import visdom 28 | self.vis = visdom.Visdom(port=opt.display_port) 29 | 30 | if self.use_html: 31 | self.web_dir = os.path.join(opt.model_dir, "web") 32 | self.img_dir = os.path.join(opt.model_dir, "image") 33 | print("Create web directory %s ..." %(self.web_dir)) 34 | util.mkdirs([self.web_dir, self.img_dir]) 35 | 36 | 37 | def reset(self): 38 | self.saved = False 39 | 40 | """ 41 | type: [Accuracy | Loss | Other] 42 | phase: [Train | Validate | Test] 43 | """ 44 | def plot_points(self, x, y, data_type, phase): 45 | line_name = data_type + "@" + phase 46 | self.plot_data[data_type][phase].append((x,y)) 47 | # draw ininial line objects if not initialized 48 | if len(self.win_info[data_type][phase]) == 0: 49 | for index in range(len(y)): 50 | win_id = self.type2id[data_type]*len(y) + index 51 | win = self.vis.line(X=np.array([0]), 52 | Y=np.array([0]), 53 | opts=dict( 54 | title=data_type + " of Attribute " + str(index) + " Over Time", 55 | xlabel="epoch", 56 | ylabel=data_type, 57 | showlegend=True, 58 | width=900, 59 | height=450), 60 | win=win_id, 61 | name=line_name) 62 | self.win_info[data_type][phase].append(win) 63 | 64 | for index, value in enumerate(y): 65 | win_id = self.win_info[data_type][phase][index] 66 | self.vis.line(X=np.array([x]), 67 | Y=np.array([value]), 68 | win=win_id, 69 | name=line_name, 70 | update="append") 71 | 72 | def plot_images(self, image_dict, start_display_id, epoch, save_result): 73 | if self.display_id > 0: # show images in the browser 74 | ncols = self.opt.image_ncols 75 | if ncols > 0: 76 | h, w = next(iter(image_dict.values())).shape[:2] 77 | table_css = """""" % (w, h) 81 | title = self.name 82 | label_html = '' 83 | label_html_row = '' 84 | nrows = int(np.ceil(len(image_dict.items()) / ncols)) 85 | images = [] 86 | idx = 0 87 | for label, image_numpy in image_dict.items(): 88 | label_html_row += '%s' % label 89 | images.append(image_numpy.transpose([2, 0, 1])) 90 | idx += 1 91 | if idx % ncols == 0: 92 | label_html += '%s' % label_html_row 93 | label_html_row = '' 94 | white_image = np.ones_like(image_numpy.transpose([2, 0, 1])) * 255 95 | while idx % ncols != 0: 96 | images.append(white_image) 97 | label_html_row += '' 98 | idx += 1 99 | if label_html_row != '': 100 | label_html += '%s' % label_html_row 101 | # pane col = image row 102 | self.vis.images(images, nrow=ncols, win=start_display_id + 1, 103 | padding=2, opts=dict(title=title + ' images')) 104 | label_html = '%s
' % label_html 105 | self.vis.text(table_css + label_html, win=start_display_id + 2, 106 | opts=dict(title=title + ' labels')) 107 | else: 108 | idx = 1 109 | for label, image_numpy in image_dict.items(): 110 | self.vis.image(image_numpy.transpose([2, 0, 1]), opts=dict(title=label), 111 | win=start_display_id + idx) 112 | idx += 1 113 | 114 | if self.use_html and (save_result or not self.saved): # save images to a html file 115 | self.saved = True 116 | for label, image_numpy in image_dict.items(): 117 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) 118 | util.save_image(image_numpy, img_path) 119 | # update website 120 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1) 121 | for n in range(epoch, 0, -1): 122 | webpage.add_header('epoch [%d]' % n) 123 | ims = [] 124 | txts = [] 125 | links = [] 126 | 127 | for label, image_numpy in image_dict.items(): 128 | img_path = 'epoch%.3d_%s.png' % (n, label) 129 | ims.append(img_path) 130 | txts.append(label) 131 | links.append(img_path) 132 | webpage.add_images(ims, txts, links, width=self.win_size) 133 | webpage.save() 134 | 135 | def backup(self, name): 136 | pass 137 | 138 | def test(self): 139 | pass 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=main 5 | backports=1.0=py_2 6 | backports.functools_lru_cache=1.6.1=py_0 7 | backports_abc=0.5=py_1 8 | blas=1.0=mkl 9 | bzip2=1.0.8=h7b6447c_0 10 | ca-certificates=2020.7.22=0 11 | certifi=2019.11.28=py27_0 12 | cffi=1.14.0=py27he30daa8_1 13 | cuda90=1.0=h6433d27_0 14 | cycler=0.10.0=py27_0 15 | dbus=1.13.16=hb2f20db_0 16 | expat=2.2.9=he6710b0_2 17 | ffmpeg=4.3.1=h167e202_0 18 | fontconfig=2.13.0=h9420a91_0 19 | freetype=2.10.2=h5ab3b9f_0 20 | functools32=3.2.3.2=py27_1 21 | futures=3.3.0=py27_0 22 | glib=2.65.0=h3eb4bd4_0 23 | gmp=6.2.0=he1b5a44_2 24 | gnutls=3.6.13=h79a8f9a_0 25 | gst-plugins-base=1.14.0=hbbd80ab_1 26 | gstreamer=1.14.0=hb31296c_0 27 | icu=58.2=he6710b0_3 28 | intel-openmp=2020.2=254 29 | jpeg=9b=h024ee3a_2 30 | kiwisolver=1.1.0=py27he6710b0_0 31 | lame=3.100=h7b6447c_0 32 | libedit=3.1.20191231=h14c3975_1 33 | libffi=3.3=he6710b0_2 34 | libgcc-ng=9.1.0=hdf63c60_0 35 | libgfortran-ng=7.3.0=hdf63c60_0 36 | libiconv=1.15=h63c8f33_5 37 | libpng=1.6.37=hbc83047_0 38 | libstdcxx-ng=9.1.0=hdf63c60_0 39 | libtiff=4.1.0=h2733197_1 40 | libuuid=1.0.3=h1bed415_2 41 | libxcb=1.14=h7b6447c_0 42 | libxml2=2.9.10=he19cac6_1 43 | lz4-c=1.9.2=he6710b0_1 44 | matplotlib=2.2.3=py27hb69df0a_0 45 | mkl=2020.2=256 46 | mkl-service=2.3.0=py27he904b0f_0 47 | mkl_fft=1.0.15=py27ha843d7b_0 48 | mkl_random=1.1.0=py27hd6b4f25_0 49 | ncurses=6.2=he6710b0_1 50 | nettle=3.4.1=hbb512f6_0 51 | numpy=1.16.6=py27hbc911f0_0 52 | numpy-base=1.16.6=py27hde5b4d6_0 53 | olefile=0.46=py27_0 54 | opencv=2.4.11=nppy27_0 55 | openh264=2.1.1=h8b12597_0 56 | openssl=1.1.1g=h7b6447c_0 57 | pcre=8.44=he6710b0_0 58 | pillow=6.2.1=py27h34e0f95_0 59 | pip=19.3.1=py27_0 60 | pycparser=2.20=py_2 61 | pyparsing=2.4.7=py_0 62 | pyqt=5.9.2=py27h05f1152_2 63 | python=2.7.18=h15b4118_1 64 | python-dateutil=2.8.1=py_0 65 | pytorch=0.3.1=py27_cuda9.0.176_cudnn7.0.5_2 66 | pytz=2020.1=py_0 67 | qt=5.9.7=h5867ecd_1 68 | readline=8.0=h7b6447c_0 69 | scikit-learn=0.20.3=py27hd81dba3_0 70 | scipy=1.2.1=py27h7c811a0_0 71 | setuptools=44.0.0=py27_0 72 | singledispatch=3.4.0.3=py27_0 73 | sip=4.19.8=py27hf484d3e_0 74 | six=1.15.0=py_0 75 | sqlite=3.33.0=h62c20be_0 76 | subprocess32=3.5.4=py27h7b6447c_0 77 | tk=8.6.10=hbc83047_0 78 | torchvision=0.2.0=py27hfb27419_1 79 | tornado=5.1.1=py27h7b6447c_0 80 | wheel=0.35.1=py_0 81 | x264=1!152.20180806=h7b6447c_0 82 | xz=5.2.5=h7b6447c_0 83 | zlib=1.2.11=h7b6447c_3 84 | zstd=1.4.5=h9ceee32_0 85 | -------------------------------------------------------------------------------- /vehicle_keypoints/README.md: -------------------------------------------------------------------------------- 1 | This code is to estimate the viewpoint estimation. You need to change the data paths before running. 2 | --------------------------------------------------------------------------------