├── .idea
├── DRNet.iml
├── encodings.xml
├── misc.xml
├── modules.xml
└── workspace.xml
├── README.md
├── __pycache__
├── config.cpython-37.pyc
└── train.cpython-37.pyc
├── config.py
├── datasets
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── dataset.cpython-37.pyc
│ └── samplers.cpython-37.pyc
├── dataset.py
├── dataset_prepare
│ ├── __init__.py
│ ├── functions.py
│ ├── info.json
│ ├── scene_label.py
│ ├── train_val_divide.py
│ └── video_vis.py
├── samplers.py
└── setting
│ ├── HT21.py
│ ├── SENSE.py
│ ├── __init__.py
│ └── __pycache__
│ ├── HT21.cpython-37.pyc
│ ├── SENSE.cpython-37.pyc
│ └── __init__.cpython-37.pyc
├── demo_code
├── image2video.py
├── test_CroHD.py
├── test_beijng.py
└── video2img.py
├── figures
├── demo_screen1.png
├── framework1.png
└── utils
│ ├── 0.png
│ ├── 30.png
│ ├── __init__.py
│ ├── frame_figure
│ ├── 112500_112501_matches.png
│ ├── 112500_112501_matches_vis.png
│ ├── 112501_b_vis_.jpg
│ ├── 112501_vis_.jpg
│ ├── assign.png
│ ├── assign_P.npy
│ ├── cost.png
│ ├── cost_c.npy
│ ├── cost_c_.npy
│ ├── hot_map.py
│ └── id.npy
│ ├── info.json
│ └── intro.py
├── misc
├── KPI_pool.py
├── __init__.py
├── __pycache__
│ ├── KPI_pool.cpython-37.pyc
│ ├── __init__.cpython-37.pyc
│ ├── dot_ops.cpython-37.pyc
│ ├── get_bbox.cpython-37.pyc
│ ├── inflation.cpython-37.pyc
│ ├── layer.cpython-37.pyc
│ ├── nms.cpython-37.pyc
│ ├── transforms.cpython-37.pyc
│ └── utils.cpython-37.pyc
├── cal_mean.py
├── dot_ops.py
├── evaluation_code.py
├── get_bbox.py
├── inflation.py
├── layer.py
├── modelsummary.py
├── nms.py
├── post_process.py
├── transforms.py
└── utils.py
├── model
├── MatchTool
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── compute_metric.cpython-37.pyc
│ │ └── utils.cpython-37.pyc
│ ├── compute_metric.py
│ └── utils.py
├── PreciseRoIPooling
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ ├── _assets
│ │ └── prroi_visualization.png
│ ├── pytorch
│ │ ├── prroi_pool
│ │ │ ├── .gitignore
│ │ │ ├── __init__.py
│ │ │ ├── functional.py
│ │ │ ├── prroi_pool.py
│ │ │ └── src
│ │ │ │ ├── prroi_pooling_gpu.c
│ │ │ │ ├── prroi_pooling_gpu.h
│ │ │ │ ├── prroi_pooling_gpu_impl.cu
│ │ │ │ └── prroi_pooling_gpu_impl.cuh
│ │ └── tests
│ │ │ └── test_prroi_pooling2d.py
│ ├── src
│ │ ├── prroi_pooling_gpu_impl.cu
│ │ └── prroi_pooling_gpu_impl.cuh
│ └── tensorflow
│ │ ├── prroi_pool
│ │ ├── CMakeLists.txt
│ │ ├── __init__.py
│ │ ├── precise_roi_pooling_ops.py
│ │ └── src
│ │ │ ├── kernels
│ │ │ ├── build_cuda.py
│ │ │ ├── external
│ │ │ │ ├── prroi_pooling_gpu_impl.cu
│ │ │ │ └── prroi_pooling_gpu_impl.cuh
│ │ │ ├── precise_roi_pooling.h
│ │ │ ├── precise_roi_pooling_kernels.cc
│ │ │ └── precise_roi_pooling_kernels.cu.cc
│ │ │ └── ops
│ │ │ └── precise_roi_pooling_ops.cc
│ │ └── tests
│ │ ├── precise_roi_pooling_ops_test.py
│ │ └── test_binaries
│ │ └── 2_2_0.5
│ │ ├── features.npy
│ │ ├── gradients0.npy
│ │ ├── gradients1.npy
│ │ ├── real_outputs.npy
│ │ └── rois.npy
├── VGG
│ ├── VGG16_FPN.py
│ └── conv.py
├── VIC.py
├── __init__.py
├── __pycache__
│ ├── VIC.cpython-37.pyc
│ ├── __init__.cpython-37.pyc
│ ├── optimal_transport_layer.cpython-37.pyc
│ └── points_from_den.cpython-37.pyc
├── necks
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ └── fpn.cpython-37.pyc
│ └── fpn.py
├── optimal_transport_layer.py
└── points_from_den.py
├── requirements.txt
├── results
├── LOI_SENSE_metric.py
├── Tracking_HT21_metric.py
└── sense_result_CFM
├── test_HT21.py
├── test_SENSE.py
├── train.py
└── vision
├── engine.py
├── transform.py
└── utils.py
/.idea/DRNet.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | Python
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | C:\Users\Lenovo\AppData\Roaming\Subversion
10 |
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DRNet for Video Indvidual Counting (CVPR 2022)
2 | ## Introduction
3 | This is the official PyTorch implementation of paper: [**DR.VIC: Decomposition and Reasoning for Video Individual Counting**](https://arxiv.org/abs/2203.12335). Different from the single image counting methods, it counts the total number of the pedestrians in a video sequence with a person in different frames only being calculated once. DRNet decomposes this new task to estimate the initial crowd number in the first frame and integrate differential crowd numbers in a set of following image pairs (namely current frame and preceding frame).
4 | 
5 |
6 | # Catalog
7 | - [x] Testing Code (2022.3.19)
8 | - [x] PyTorch pretrained models (2022.3.19)
9 | - [x] Training Code
10 | - [x] HT21
11 | - [x] SenseCrowd (2022.9.30)
12 |
13 | # Getting started
14 |
15 | ## preparatoin
16 |
17 | - Clone this repo in the directory (```Root/DRNet```):
18 | - Install dependencies. We use python 3.7 and pytorch >= 1.6.0 : http://pytorch.org.
19 |
20 | ```bash
21 | conda create -n DRNet python=3.7
22 | conda activate DRNet
23 | conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
24 | cd ${DRNet}
25 | pip install -r requirements.txt
26 | ```
27 |
28 | - [PreciseRoIPooling](https://github.com/vacancy/PreciseRoIPooling) for extracting the feature descriptors
29 |
30 | Note: the PreciseRoIPooling [1] module is included in the repo, but it's likely to have some problems when running the code:
31 |
32 | 1. If you are prompted to install ninja, the following commands will help you.
33 | ```bash
34 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
35 | sudo unzip ninja-linux.zip -d /usr/local/bin/
36 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force
37 | ```
38 | 2. If you encounter errors when compiling the PreciseRoIPooling, you can look up the original repo's [issues](https://github.com/vacancy/PreciseRoIPooling/issues) for help. One solution to the most common errors can be found in this [blog](https://blog.csdn.net/weixin_42098198/article/details/124756432?spm=1001.2014.3001.5502).
39 | - Datasets
40 | - **HT21** dataset: Download CroHD dataset from this [link](https://motchallenge.net/data/Head_Tracking_21/). Unzip ```HT21.zip``` and place ``` HT21``` into the folder (```Root/dataset/```).
41 | - **SenseCrowd** dataset: Download the dataset from [Baidu disk](https://pan.baidu.com/s/1OYBSPxgwvRMrr6UTStq7ZQ?pwd=64xm) or from the original dataset [link](https://github.com/HopLee6/VSCrowd-Dataset).
42 | - Download the lists of `train/val/test` sets at [link1](https://1drv.ms/u/s!AgKz_E1uf260nWeqa86-o9FMIqMt?e=0scDuw) or [link2](https://pan.baidu.com/s/13X3-egn0fYSd6NUTxB4cuw?pwd=ew8f), and place them to each dataset folder, respectively.
43 | ## Training
44 | Check some parameters in ```config.py``` before training,
45 | * Use `__C.DATASET = 'HT21'` to set the dataset (default: `HT21`).
46 | * Use `__C.GPU_ID = '0'` to set the GPU.
47 | * Use `__C.MAX_EPOCH = 20` to set the number of the training epochs (default:20).
48 | * Use `__C.EXP_PATH = os.path.join('./exp', __C.DATASET)` to set the dictionary for saving the code, weights, and resume point.
49 |
50 | Check other parameters (`TRAIN_BATCH_SIZE`, `TRAIN_SIZE` etc.) in the ```Root/DRNet/datasets/setting``` in case your GPU's memory is not support for the default setting.
51 | - run ```python train.py```.
52 |
53 |
54 | Tips: The training process takes **~10 hours** on HT21 dataset with **one TITAN RTX (24GB Memory)**.
55 |
56 | ## Testing
57 | To reproduce the performance, download the pre-trained models from [onedrive](https://1drv.ms/u/s!AgKz_E1uf260nWeqa86-o9FMIqMt?e=0scDuw) or [badu disk](https://pan.baidu.com/s/13X3-egn0fYSd6NUTxB4cuw?pwd=ew8f) and then place ```pretrained_models``` folder to ```Root/DRNet/model/```
58 | - for HT21:
59 | - Run ```python test_HT21.py```.
60 | - for SenseCrowd:
61 | - Run ```python test_SENSE.py```.
62 | Then the output file (```*_SENSE_cnt.py```) will be generated.
63 | ## Performance
64 | The results on HT21 and SenseCrowd.
65 |
66 | - HT21 dataset
67 |
68 | | Method | CroHD11~CroHD15 | MAE/MSE/MRAE(%) |
69 | |------------|-------- |-------|
70 | | Paper: VGG+FPN [2,3]| 164.6/1075.5/752.8/784.5/382.3|141.1/192.3/27.4|
71 | | This Repo's Reproduction: VGG+FPN [2,3]|138.4/1017.5/623.9/659.8/348.5|160.7/217.3/25.1|
72 |
73 | - SenseCrowd dataset
74 |
75 | | Method | MAE/MSE/MRAE(%)| MIAE/MOAE | D0~D4 (for MAE) |
76 | |------------|---------|-------|-------|
77 | | Paper: VGG+FPN [2,3]| 12.3/24.7/12.7 |1.98/2.01 |4.1/8.0/23.3/50.0/77.0|
78 | | This Repo's Reproduction: VGG+FPN [2,3] | 11.7/24.6/11.7 | 1.99/1.88| 3.6/6.8/22.4/42.6/85.2 |
79 |
80 | # Video Demo
81 | Please visit [bilibili](https://www.bilibili.com/video/BV1cY411H7hr/) or [YouTube]() to watch the video demonstration.
82 | 
83 | # References
84 | 1. Acquisition of Localization Confidence for Accurate Object Detection, ECCV, 2018.
85 | 2. Very Deep Convolutional Networks for Large-scale Image Recognition, arXiv, 2014.
86 | 3. Feature Pyramid Networks for Object Detection, CVPR, 2017.
87 |
88 | # Citation
89 | If you find this project is useful for your research, please cite:
90 | ```
91 | @article{han2022drvic,
92 | title={DR.VIC: Decomposition and Reasoning for Video Individual Counting},
93 | author={Han, Tao, Bai Lei, Gao, Junyu, Qi Wang, and Ouyang Wanli},
94 | booktitle={CVPR},
95 | year={2022}
96 | }
97 | ```
98 |
99 | # Acknowledgement
100 | The released PyTorch training script borrows some codes from the [C^3 Framework](https://github.com/gjy3035/C-3-Framework) and [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork) repositories. If you think this repo is helpful for your research, please consider cite them.
101 |
--------------------------------------------------------------------------------
/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/__pycache__/config.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/train.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/__pycache__/train.cpython-37.pyc
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from easydict import EasyDict as edict
3 | import time
4 | import torch
5 |
6 | # init
7 | __C = edict()
8 | cfg = __C
9 |
10 | #------------------------------TRAIN------------------------
11 | __C.SEED = 3035 # random seed, for reproduction
12 | __C.DATASET = 'HT21' # dataset selection: HT21, SENSE
13 | __C.NET = 'VGG16_FPN' # 'VGG16_FPN'
14 |
15 | __C.RESUME = False # continue training
16 | __C.RESUME_PATH = './exp/SENSE/11-23_04-55_SENSE_Res50_FPN_5e-05/latest_state.pth'
17 | __C.GPU_ID = '0' # sigle gpu: '0'; multi gpus: '0,1'
18 |
19 | __C.sinkhorn_iterations = 100
20 | __C.FEATURE_DIM = 256
21 | __C.ROI_RADIUS = 4.
22 | if __C.DATASET == 'SENSE':
23 | __C.VAL_INTERVALS =15
24 | else:
25 | __C.VAL_INTERVALS = 50
26 | # learning rate settings
27 | __C.LR_Base = 5e-5 # learning rate
28 | __C.LR_Thre = 1e-2
29 |
30 | __C.LR_DECAY = 0.95
31 | __C.WEIGHT_DECAY = 1e-5 # decay rate
32 | # when training epoch is more than it, the learning rate will be begin to decay
33 |
34 | __C.MAX_EPOCH = 20
35 |
36 | # print
37 | __C.PRINT_FREQ = 20
38 |
39 | now = time.strftime("%m-%d_%H-%M", time.localtime())
40 |
41 | __C.EXP_NAME = now \
42 | + '_' + __C.DATASET \
43 | + '_' + __C.NET \
44 | + '_' + str(__C.LR_Base)
45 |
46 | __C.VAL_VIS_PATH = './exp/'+__C.DATASET+'_val'
47 | __C.EXP_PATH = os.path.join('./exp', __C.DATASET) # the path of logs, checkpoints, and current codes
48 | if not os.path.exists(__C.EXP_PATH ):
49 | os.makedirs(__C.EXP_PATH )
50 | #------------------------------VAL------------------------
51 |
52 | if __C.DATASET == 'HT21':
53 | __C.VAL_FREQ = 1 # Before __C.VAL_DENSE_START epoches, the freq is set as __C.VAL_FREQ
54 | __C.VAL_DENSE_START = 2
55 | else:
56 | __C.VAL_FREQ = 1
57 | __C.VAL_DENSE_START = 0
58 | #------------------------------VIS------------------------
59 |
60 | #================================================================================
61 |
--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import torch
5 | import torch.nn.functional as F
6 | from importlib import import_module
7 | import misc.transforms as own_transforms
8 | from misc.transforms import check_image
9 | import torchvision.transforms as standard_transforms
10 | from . import dataset
11 | from . import setting
12 | from . import samplers
13 | from torch.utils.data import DataLoader
14 | from torch.utils.data import RandomSampler
15 | from config import cfg
16 |
17 | import random
18 | class train_pair_transform(object):
19 | def __init__(self,cfg_data, check_dim = True):
20 | self.cfg_data = cfg_data
21 | self.pair_flag = 0
22 | self.scale_factor = 1
23 | self.last_cw_ch =(0,0)
24 | self.crop_left = (0,0)
25 | self.last_crop_left = (0, 0)
26 | self.rate_range = (0.8,1.2)
27 | self.resize_and_crop= own_transforms.RandomCrop( cfg_data.TRAIN_SIZE)
28 | self.scale_to_setting = own_transforms.ScaleByRateWithMin(cfg_data.TRAIN_SIZE[1], cfg_data.TRAIN_SIZE[0])
29 |
30 | self.flip_flag = 0
31 | self.horizontal_flip = own_transforms.RandomHorizontallyFlip()
32 |
33 | self.last_frame_size = (0,0)
34 |
35 | self.check_dim = check_dim
36 | def __call__(self,img,target):
37 | import numpy as np
38 | w_ori, h_ori = img.size
39 | if self.pair_flag == 1 and self.check_dim: # make sure two frames are with the same shape
40 | assert self.last_frame_size == (w_ori,w_ori)
41 | # self.last_frame_size = (w_ori, w_ori)
42 | self.scale_factor = random.uniform(self.rate_range[0], self.rate_range[1])
43 | self.c_h,self.c_w = int(self.cfg_data.TRAIN_SIZE[0]/self.scale_factor), int(self.cfg_data.TRAIN_SIZE[1]/self.scale_factor)
44 | img, target = check_image(img, target, (self.c_h,self.c_w)) # make sure the img size is large than we needed
45 | w, h = img.size
46 | if self.pair_flag % 2 == 0:
47 | self.last_cw_ch = (self.c_w,self.c_h)
48 | self.pair_flag = 0
49 | self.last_frame_size = (w_ori, w_ori)
50 |
51 | x1 = random.randint(0, w - self.c_w)
52 | y1 = random.randint(0, h - self.c_h)
53 | self.last_crop_left = (x1,y1)
54 |
55 | if self.pair_flag % 2 == 1:
56 | if self.check_dim:
57 | x1 = max(0, int(self.last_crop_left[0] + (self.last_cw_ch[0]-self.c_w)))
58 | y1 = max(0, int(self.last_crop_left[1] + (self.last_cw_ch[1]-self.c_h)))
59 | else: # for pre_training on other dataset
60 | x1 = random.randint(0, w - self.c_w)
61 | y1 = random.randint(0, h - self.c_h)
62 | self.crop_left = (x1, y1)
63 |
64 | img, target = self.resize_and_crop(img, target, self.crop_left,crop_size=(self.c_h,self.c_w))
65 | img, target = self.scale_to_setting(img,target)
66 |
67 | self.flip_flag = round(random.random())
68 | img, target = self.horizontal_flip(img, target, self.flip_flag)
69 | self.pair_flag += 1
70 |
71 | # assert np.array(img).sum()>0
72 | return img, target
73 |
74 |
75 | def collate_fn(batch):
76 | batch = list(filter(lambda x: x is not None, batch))
77 | # return torch.utils.data.dataloader.default_collate(batch)
78 | # if len(batch) == 0:
79 | # import pdb;pdb.set_trace()
80 | return tuple(zip(*batch))
81 |
82 | def createTrainData(datasetname, Dataset, cfg_data):
83 | img_transform = standard_transforms.Compose([
84 | standard_transforms.ToTensor(),
85 | standard_transforms.Normalize(*cfg_data.MEAN_STD)
86 | ])
87 |
88 | main_transform = train_pair_transform(cfg_data)
89 | train_set =Dataset(cfg_data.TRAIN_LST,
90 | cfg_data.DATA_PATH,
91 | main_transform=main_transform,
92 | img_transform=img_transform,
93 | train=True,
94 | datasetname=datasetname)
95 |
96 | train_sampler = samplers.CategoriesSampler(train_set.labels, frame_intervals=cfg_data.TRAIN_FRAME_INTERVALS,
97 | n_per=cfg_data.TRAIN_BATCH_SIZE)
98 | train_loader = DataLoader(train_set, batch_sampler=train_sampler, num_workers=8, collate_fn=collate_fn, pin_memory=True)
99 | print('dataset is {}, images num is {}'.format(datasetname, train_set.__len__()))
100 |
101 | return train_loader
102 | def createValData(datasetname, Dataset, cfg_data):
103 |
104 |
105 | img_transform = standard_transforms.Compose([
106 | standard_transforms.ToTensor(),
107 | standard_transforms.Normalize(*cfg_data.MEAN_STD)
108 | ])
109 |
110 | val_loader = []
111 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.VAL_LST), 'r') as txt:
112 | scene_names = txt.readlines()
113 | for scene in scene_names:
114 | sub_val_dataset = Dataset([scene.strip()],
115 | cfg_data.DATA_PATH,
116 | main_transform=None,
117 | img_transform= img_transform ,
118 | train=False,
119 | datasetname=datasetname)
120 | sub_val_loader = DataLoader(sub_val_dataset, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=4,collate_fn=collate_fn,pin_memory=False )
121 | val_loader.append(sub_val_loader)
122 |
123 | return val_loader
124 | def createRestore(mean_std):
125 | return standard_transforms.Compose([
126 | own_transforms.DeNormalize(*mean_std),
127 | standard_transforms.ToPILImage()
128 | ])
129 |
130 | def loading_data(datasetname,val_interval):
131 | datasetname = datasetname.upper()
132 | cfg_data = getattr(setting, datasetname).cfg_data
133 |
134 | Dataset = dataset.Dataset
135 | train_loader = createTrainData(datasetname, Dataset, cfg_data)
136 | restore_transform = createRestore(cfg_data.MEAN_STD)
137 |
138 | Dataset = dataset.TestDataset
139 | val_loader = createValTestData(datasetname, Dataset, cfg_data,val_interval, mode ='val')
140 |
141 |
142 | return train_loader, val_loader, restore_transform
143 |
144 | def createValTestData(datasetname, Dataset, cfg_data,frame_interval,mode ='val'):
145 | img_transform = standard_transforms.Compose([
146 | standard_transforms.ToTensor(),
147 | standard_transforms.Normalize(*cfg_data.MEAN_STD)
148 | ])
149 | if mode == 'val':
150 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.VAL_LST), 'r') as txt:
151 | scene_names = txt.readlines()
152 | scene_names = [i.strip() for i in scene_names]
153 | data_loader = []
154 | for scene_name in scene_names:
155 | print(scene_name)
156 | sub_dataset = Dataset(scene_name = scene_name,
157 | base_path=cfg_data.DATA_PATH,
158 | main_transform=None,
159 | img_transform=img_transform,
160 | interval=frame_interval,
161 | target=True,
162 | datasetname = datasetname)
163 | sub_loader = DataLoader(sub_dataset, batch_size=cfg_data.VAL_BATCH_SIZE,
164 | collate_fn=collate_fn, num_workers=0, pin_memory=True)
165 | data_loader.append(sub_loader)
166 | return data_loader
167 | elif mode == 'test':
168 | if datasetname=='HT21':
169 | target = False
170 | scene_names = ['test/HT21-11', 'test/HT21-12', 'test/HT21-13', 'test/HT21-14', 'test/HT21-15']
171 | else:
172 | target =True
173 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.TEST_LST), 'r') as txt:
174 | scene_names = txt.readlines()
175 | scene_names = [i.strip() for i in scene_names]
176 | data_loader = []
177 | for scene_name in scene_names:
178 | print(scene_name)
179 | sub_dataset = Dataset(scene_name=scene_name,
180 | base_path=cfg_data.DATA_PATH,
181 | main_transform=None,
182 | img_transform=img_transform,
183 | interval=frame_interval,
184 | target=target,
185 | datasetname=datasetname)
186 | sub_loader = DataLoader(sub_dataset, batch_size=cfg_data.VAL_BATCH_SIZE,
187 | collate_fn=collate_fn, num_workers=0, pin_memory=True)
188 | data_loader.append(sub_loader)
189 | return data_loader
190 |
191 |
192 | def loading_testset(datasetname, test_interval, mode='test'):
193 |
194 | datasetname = datasetname.upper()
195 | cfg_data = getattr(setting, datasetname).cfg_data
196 |
197 | Dataset = dataset.TestDataset
198 |
199 | test_loader = createValTestData(datasetname, Dataset, cfg_data,test_interval, mode=mode)
200 |
201 | restore_transform = createRestore(cfg_data.MEAN_STD)
202 | return test_loader, restore_transform
--------------------------------------------------------------------------------
/datasets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/datasets/__pycache__/dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/dataset.cpython-37.pyc
--------------------------------------------------------------------------------
/datasets/__pycache__/samplers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/samplers.cpython-37.pyc
--------------------------------------------------------------------------------
/datasets/dataset.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import os.path as osp
5 | import os
6 | from collections import defaultdict
7 | from pathlib import Path
8 |
9 | import numpy as np
10 | import torch
11 | import torch.utils.data as data
12 |
13 | from torchvision.ops.boxes import clip_boxes_to_image
14 | from PIL import Image
15 | import re
16 |
17 | class Dataset(data.Dataset):
18 | """
19 | Dataset class.
20 | """
21 | def __init__(self, txt_path, base_path,main_transform=None,img_transform=None,train=True, datasetname='Empty'):
22 | self.base_path = base_path
23 | self.bboxes = defaultdict(list)
24 | self.imgs_path = []
25 | self.labels = []
26 | self.datasetname = datasetname
27 | if train:
28 | with open(osp.join(base_path, txt_path), 'r') as txt:
29 | scene_names = txt.readlines()
30 | else:
31 | scene_names = txt_path # for val and test
32 |
33 | for i in scene_names:
34 | if datasetname == 'HT21':
35 | img_path, label= HT21_ImgPath_and_Target(base_path,i.strip())
36 | elif datasetname == 'SENSE':
37 | img_path, label = SENSE_ImgPath_and_Target(base_path,i.strip())
38 | else:
39 | raise NotImplementedError
40 | self.imgs_path+=img_path
41 | self.labels +=label
42 |
43 | self.is_train = train
44 | self.main_transforms = main_transform
45 | self.img_transforms = img_transform
46 |
47 | def __len__(self):
48 | return len(self.imgs_path)
49 |
50 | def __getitem__(self, index):
51 |
52 | img = Image.open(self.imgs_path[index])
53 | if img.mode is not 'RGB':
54 | img=img.convert('RGB')
55 |
56 | target = self.labels[index].copy()
57 |
58 | if self.main_transforms is not None:
59 | img, target = self.main_transforms(img, target)
60 | if self.img_transforms is not None:
61 | img = self.img_transforms(img)
62 |
63 | return img,target
64 |
65 | def HT21_ImgPath_and_Target(base_path,i):
66 | img_path = []
67 | labels=[]
68 | root = osp.join(base_path, i + '/img1')
69 | img_ids = os.listdir(root)
70 | img_ids.sort()
71 | gts = defaultdict(list)
72 | with open(osp.join(root.replace('img1', 'gt'), 'gt.txt'), 'r') as f:
73 | lines = f.readlines()
74 | for lin in lines:
75 | lin_list = [float(i) for i in lin.rstrip().split(',')]
76 | ind = int(lin_list[0])
77 | gts[ind].append(lin_list)
78 |
79 | for img_id in img_ids:
80 | img_id = img_id.strip()
81 | single_path = osp.join(root, img_id)
82 | annotation = gts[int(img_id.split('.')[0])]
83 | annotation = torch.tensor(annotation,dtype=torch.float32)
84 | box = annotation[:,2:6]
85 | points = box[:,0:2] + box[:,2:4]/2
86 |
87 | sigma = torch.min(box[:,2:4], 1)[0] / 2.
88 | ids = annotation[:,1].long()
89 | img_path.append(single_path)
90 |
91 | labels.append({'scene_name':i,'frame':int(img_id.split('.')[0]), 'person_id':ids, 'points':points,'sigma':sigma})
92 | return img_path, labels
93 |
94 | def SENSE_ImgPath_and_Target(base_path,i):
95 | img_path = []
96 | labels=[]
97 | root = osp.join(base_path, 'video_ori', i )
98 | img_ids = os.listdir(root)
99 | img_ids.sort()
100 | gts = defaultdict(list)
101 | with open(root.replace('video_ori', 'label_list_all')+'.txt', 'r') as f: #label_list_all_rmInvalid
102 | lines = f.readlines()
103 | for lin in lines:
104 | lin_list = [i for i in lin.rstrip().split(' ')]
105 | ind = lin_list[0]
106 | lin_list = [float(i) for i in lin_list[3:] if i != '']
107 | assert len(lin_list) % 7 == 0
108 | gts[ind] = lin_list
109 |
110 | for img_id in img_ids:
111 | img_id = img_id.strip()
112 | single_path = osp.join(root, img_id)
113 | label = gts[img_id]
114 | box_and_point = torch.tensor(label).view(-1, 7).contiguous()
115 |
116 | points = box_and_point[:, 4:6].float()
117 | ids = (box_and_point[:, 6]).long()
118 |
119 | if ids.size(0)>0:
120 | sigma = 0.6*torch.stack([(box_and_point[:,2]-box_and_point[:,0])/2,(box_and_point[:,3]-box_and_point[:,1])/2],1).min(1)[0] #torch.sqrt(((box_and_point[:,2]-box_and_point[:,0])/2)**2 + ((box_and_point[:,3]-box_and_point[:,1])/2)**2)
121 | else:
122 | sigma = torch.tensor([])
123 | img_path.append(single_path)
124 |
125 | labels.append({'scene_name':i,'frame':int(img_id.split('.')[0]), 'person_id':ids, 'points':points, 'sigma':sigma})
126 | return img_path, labels
127 |
128 |
129 | class TestDataset(data.Dataset):
130 | """
131 | Dataset class.
132 | """
133 | def __init__(self,scene_name, base_path, main_transform=None, img_transform=None, interval=1, target=True, datasetname='Empty'):
134 | self.base_path = base_path
135 | self.target = target
136 |
137 | if self.target:
138 | if datasetname == 'HT21':
139 | self.imgs_path, self.label = HT21_ImgPath_and_Target(self.base_path, scene_name)
140 | elif datasetname == 'SENSE':
141 | self.imgs_path, self.label = SENSE_ImgPath_and_Target(self.base_path, scene_name)
142 | else:
143 | raise NotImplementedError
144 | else:
145 | if datasetname == 'HT21':
146 | self.imgs_path = self.generate_imgPath_label(scene_name)
147 | elif datasetname == 'SENSE':
148 | self.imgs_path, self.label = SENSE_ImgPath_and_Target(self.base_path, scene_name)
149 | else:
150 | raise NotImplementedError
151 | self.interval =interval
152 |
153 | self.main_transforms = main_transform
154 | self.img_transforms = img_transform
155 | self.length = len(self.imgs_path)
156 | def __len__(self):
157 | return len(self.imgs_path) - self.interval
158 |
159 |
160 | def __getitem__(self, index):
161 | index1 = index
162 | index2 = index + self.interval
163 | img1 = Image.open(self.imgs_path[index1])
164 | img2 = Image.open(self.imgs_path[index2])
165 |
166 | if img1.mode is not 'RGB':
167 | img1=img1.convert('RGB')
168 | if img2.mode is not 'RGB':
169 | img2 = img2.convert('RGB')
170 | if self.img_transforms is not None:
171 | img1 = self.img_transforms(img1)
172 | img2 = self.img_transforms(img2)
173 | if self.target:
174 | target1 = self.label[index1]
175 | target2 = self.label[index2]
176 | return [img1,img2], [target1,target2]
177 |
178 | return [img1,img2], None
179 |
180 | def generate_imgPath_label(self, i):
181 |
182 | img_path = []
183 | root = osp.join(self.base_path, i +'/img1')
184 | img_ids = os.listdir(root)
185 | img_ids.sort(key=self.myc)
186 |
187 |
188 | for img_id in img_ids:
189 | img_id = img_id.strip()
190 | single_path = osp.join(root, img_id)
191 | img_path.append(single_path)
192 |
193 | return img_path
194 |
195 | def myc(self, string):
196 | p = re.compile("\d+")
197 | return int(p.findall(string)[0])
--------------------------------------------------------------------------------
/datasets/dataset_prepare/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/dataset_prepare/__init__.py
--------------------------------------------------------------------------------
/datasets/dataset_prepare/functions.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | def euclidean_dist( test_matrix, train_matrix):
3 | """
4 | Args:
5 | x: pytorch Variable, with shape [m, d]
6 | y: pytorch Variable, with shape [n, d]
7 | Returns:
8 | dist: pytorch Variable, with shape [m, n]
9 | """
10 | num_test = test_matrix.shape[0]
11 | num_train = train_matrix.shape[0]
12 | dists = np.zeros((num_test, num_train))
13 | d1 = -2 * np.dot(test_matrix, train_matrix.T) # shape (num_test, num_train)
14 | d2 = np.sum(np.square(test_matrix), axis=1, keepdims=True) # shape (num_test, 1)
15 | d3 = np.sum(np.square(train_matrix), axis=1) # shape (num_train, )
16 | dists = np.sqrt(d1 + d2 + d3) # broadcasting
17 |
18 | return dists
19 |
20 |
21 | def generate_cycle_mask( height, width):
22 | x, y = np.ogrid[-height:height + 1, -width:width + 1]
23 | # ellipse mask
24 | mask = ((x) ** 2 / (height ** 2) + (y) ** 2 / (width ** 2) <= 1)
25 | mask.dtype = 'uint8'
26 | return mask
27 |
28 |
29 | def average_del_min(data_list):
30 | if len(data_list) == 0:
31 | return 0
32 | if len(data_list) > 2:
33 | data_list.remove(min(data_list))
34 | # data_list.remove(max(data_list))
35 | average_data = float(sum(data_list)) / len(data_list)
36 | return average_data
37 | elif len(data_list) <= 2:
38 | average_data = float(sum(data_list)) / len(data_list)
39 | return average_data
--------------------------------------------------------------------------------
/datasets/dataset_prepare/scene_label.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | import json
4 |
5 | root = 'T:\CVPR2022'
6 | with open('info.json') as f:
7 | info = json.load(f)
8 | cat = ['0~50', '50~100', '100~150', '150~200', '200~400']
9 | # for k, v in a.items():
10 | # data.append(v)
11 | # if v in range(0,50):
12 | # number[0]+=1
13 | # elif v in range(50,100):
14 | # number[1]+=1
15 | # elif v in range(100,150):
16 | # number[2]+=1
17 | # elif v in range(150, 200):
18 | # number[3] += 1
19 | # elif v in range(200, 400):
20 | # number[4] += 1
21 | with open(osp.join(root, 'new_label.txt'),'r') as f:
22 | lines = f.readlines()
23 | new_lines = []
24 | for i in lines:
25 | i = i.rstrip()
26 | scene_name = i.split(' ')[0]
27 | v = info[scene_name]
28 |
29 | if v in range(0,50):
30 | density_label =0
31 | elif v in range(50,100):
32 | density_label = 1
33 | elif v in range(100,150):
34 | density_label = 2
35 | elif v in range(150, 200):
36 | density_label = 3
37 | elif v in range(200, 400):
38 | density_label = 4
39 | new_i = i+' ' +str(density_label)+ '\n'
40 | new_lines.append(new_i)
41 | with open(osp.join(root,'scene_label.txt'), 'w') as f:
42 |
43 | f.writelines(new_lines)
44 |
--------------------------------------------------------------------------------
/datasets/dataset_prepare/train_val_divide.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import random
4 | Root = '/media/E/ht/dataset/HT21'
5 | dataset = 'HT21'
6 |
7 | dst_imgs_path = os.path.join(Root,'images')
8 |
9 | def divide_dataset(val_ration =0.1):
10 | test_set = []
11 | val_set= []
12 | train_set=[]
13 | train_path = os.path.join(Root+'/train')
14 | scenes= os.listdir(train_path)
15 |
16 | for i_scene in scenes:
17 | sub_files = os.listdir(os.path.join(train_path, i_scene+'/img1'))
18 | for i in sub_files:
19 | train_set.append(os.path.join('train/'+i_scene+'/img1',i))
20 |
21 |
22 | train_path = os.path.join(Root+'/test')
23 | scenes= os.listdir(train_path)
24 |
25 | for i_scene in scenes:
26 | sub_files = os.listdir(os.path.join(train_path, i_scene+'/img1'))
27 | for i in sub_files:
28 | test_set.append(os.path.join('test/'+i_scene+'/img1',i))
29 |
30 |
31 |
32 | print("test_set_num:", len(train_set), 'train_val_num:',len(test_set))
33 |
34 | # val_set = random.sample(train_set, round(val_ration * len(train_val)))
35 | print("val_set_num:", len(val_set))
36 | train_set = set(train_set)
37 | val_set = set(val_set)
38 | train_set = train_set - val_set
39 | print("train_set_num:", len(train_set))
40 |
41 | train_set = sorted(train_set)
42 | val_set = sorted(val_set)
43 | test_set = sorted(test_set)
44 |
45 | with open(os.path.join(Root,'train.txt'), "w") as f:
46 | for train_name in train_set:
47 | f.write(train_name+'\n')
48 | f.close()
49 |
50 | with open(os.path.join(Root,'val.txt'), "w") as f:
51 | for valid_name in val_set:
52 | f.write(valid_name+'\n')
53 |
54 | f.close()
55 |
56 | with open(os.path.join(Root,'test.txt'), "w") as f:
57 | for test_name in test_set:
58 | f.write(test_name+'\n')
59 |
60 | f.close()
61 |
62 |
63 | def divide_dataset(val_ration=0.1):
64 | test_set = []
65 | val_set = []
66 | train_set = []
67 | train_path = os.path.join(Root + '/train')
68 | scenes = os.listdir(train_path)
69 |
70 | for i_scene in scenes:
71 | train_set.append(os.path.join('train/' + i_scene))
72 |
73 | train_path = os.path.join(Root + '/test')
74 | scenes = os.listdir(train_path)
75 |
76 | for i_scene in scenes:
77 | test_set.append(os.path.join('test/' + i_scene ))
78 |
79 | print("test_set_num:", len(train_set), 'train_val_num:', len(test_set))
80 |
81 | # val_set = random.sample(train_set, round(val_ration * len(train_val)))
82 | print("val_set_num:", len(val_set))
83 | train_set = set(train_set)
84 | val_set = set(val_set)
85 | train_set = train_set - val_set
86 | print("train_set_num:", len(train_set))
87 |
88 | train_set = sorted(train_set)
89 | val_set = sorted(val_set)
90 | test_set = sorted(test_set)
91 |
92 | with open(os.path.join(Root, 'train.txt'), "w") as f:
93 | for train_name in train_set:
94 | f.write(train_name + '\n')
95 | f.close()
96 |
97 | with open(os.path.join(Root, 'val.txt'), "w") as f:
98 | for valid_name in val_set:
99 | f.write(valid_name + '\n')
100 |
101 | f.close()
102 |
103 | with open(os.path.join(Root, 'test.txt'), "w") as f:
104 | for test_name in test_set:
105 | f.write(test_name + '\n')
106 |
107 | f.close()
108 | if __name__ == '__main__':
109 | divide_dataset()
--------------------------------------------------------------------------------
/datasets/dataset_prepare/video_vis.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | from collections import defaultdict
4 | import numpy as np
5 | import os.path as osp
6 | def plot_boxes(cur_frame, head_map, points, ids,body_map={}, text=True):
7 | plotting_im = cur_frame.copy()
8 | for index, t_dim in enumerate(head_map):
9 | (startX, startY, endX, endY) = [int(i) for i in t_dim]
10 | cv2.rectangle(plotting_im, (startX, startY), (endX, endY),
11 | (0, 255, 0), 2)
12 | cur_centroid = tuple([(startX+endX)//2,
13 | (startY+endY)//2])
14 |
15 | # cv2.circle(plotting_im, cur_centroid, 2,
16 | # (255, 0, 0), 2)
17 |
18 | if text:
19 | cv2.putText(plotting_im, str(ids[index]), cur_centroid,
20 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
21 | for index, t_dim in enumerate(points):
22 | X, Y, = [int(i) for i in t_dim]
23 | cv2.circle(plotting_im, (X, Y), 2,
24 | (0, 0, 255), 2)
25 |
26 | for index, (t_id, t_dim) in enumerate(body_map.items()):
27 | (startX, startY, endX, endY) = [int(i) for i in t_dim]
28 | cv2.rectangle(plotting_im, (startX, startY), (endX, endY),
29 | (0, 255, 0), 2)
30 | return plotting_im
31 |
32 | def CroHead():
33 | root = '../../dataset/HT21/train'
34 | sub_scenes = os.listdir(root)
35 | print(sub_scenes)
36 |
37 | for sub_scene in sub_scenes[2:]:
38 | imgs_path = os.path.join(root, sub_scene, 'img1')
39 | imgs_id = os.listdir(imgs_path)
40 | det_path = os.path.join(imgs_path.replace('img1', 'det'), 'det.txt')
41 |
42 | bboxes = defaultdict(list)
43 | with open(det_path, 'r') as f:
44 | lines = f.readlines()
45 | # imgs_path = [i.rstrip().strip("#").lstrip()
46 | # for i in lines if i.startswith('#')]
47 | for lin in lines:
48 | lin_list = [float(i) for i in lin.rstrip().split(',')]
49 | ind = int(lin_list[0])
50 | bboxes[ind].append(lin_list)
51 | f.close()
52 | gts = defaultdict(list)
53 | with open(os.path.join(imgs_path.replace('img1','gt'), 'gt.txt'), 'r') as f:
54 | lines = f.readlines()
55 | for lin in lines:
56 | lin_list = [float(i) for i in lin.rstrip().split(',')]
57 | ind = int(lin_list[0])
58 | gts[ind].append(lin_list)
59 | f.close()
60 | # print(gts)
61 | # print(imgs_id)
62 |
63 | for img_id in imgs_id:
64 | img_path=os.path.join(imgs_path,img_id)
65 | labels = bboxes[int(img_id.split('.')[0])]
66 | labels_point = gts[int(img_id.split('.')[0])]
67 | annotations = np.zeros((0, 4))
68 | points = np.zeros((0, 2))
69 | if len(labels) == 0:
70 | label = [[0, 0, 0, 0, 0]]
71 | ignore_ar = []
72 | for idx, label in enumerate(labels):
73 | annotation = np.zeros((1, 4))
74 | # bbox
75 | annotation[0, 0] = label[2] # x1
76 | annotation[0, 1] = label[3] # y1
77 | annotation[0, 2] = label[4] +label[2] # x2
78 | annotation[0, 3] = label[5] +label[3]# y2
79 | annotations = np.append(annotations, annotation, axis=0)
80 | for idx, label in enumerate(labels_point):
81 | point = np.zeros((1, 2))
82 | # bbox
83 | point[0, 0] = label[2] + label[4]/2# x1
84 | point[0, 1] = label[3] + label[5]/2 # y1
85 | points = np.append(points, point, axis=0)
86 | # print(annotations)
87 | print(len(points))
88 | img = cv2.imread(img_path)
89 | img = plot_boxes(img,{},points)
90 | # cv2.imshow(img_id, img)
91 | save_path = img_path.replace('img1','vis')
92 | cv2.imwrite(save_path,img)
93 | # cv2.waitKey()
94 |
95 | video_path = 'E:/netdisk\SenseCrowd/video_ori'
96 | label_path = 'E:/netdisk\SenseCrowd/label_list_all_rmInvalid'
97 | import json
98 | import os
99 | from numpy import array
100 | import numpy as np
101 | import pylab as pl
102 | def SensorCrowd():
103 | Info_dict={}
104 | time = 0
105 | for scene in sorted(os.listdir(video_path)[51:]):
106 | print(scene)
107 | gts = defaultdict(list)
108 | with open(os.path.join(label_path,scene+'.txt')) as f:
109 | lines = f.readlines()
110 | for line in lines:
111 | lin_list = [i for i in line.rstrip().split(' ')]
112 | ind = lin_list[0]
113 | lin_list = [float(i) for i in lin_list[3:] if i != '']
114 | assert len(lin_list)%7==0
115 | gts[ind]=lin_list
116 |
117 | root = osp.join(video_path, scene)
118 | img_ids = os.listdir(root)
119 | print(img_ids)
120 | id_list = []
121 | for img_id in img_ids:
122 | if not img_id.endswith("jpg"):
123 | continue
124 | time+=1/5
125 | img_path=osp.join(root, img_id)
126 | label = gts[img_id]
127 | box_and_point = np.array(label).reshape(-1,7)
128 | boxes = box_and_point[:,0:4]
129 | points = box_and_point[:,4:6]
130 | ids = box_and_point[:,6].astype(np.int)
131 |
132 | id_list.append(ids)
133 |
134 | img = cv2.imread(img_path)
135 | print(img_path)
136 | plot_img = plot_boxes(img, boxes, points, ids)
137 | cv2.imshow(img_id, plot_img)
138 | cv2.waitKey()
139 | all_id = np.concatenate(id_list)
140 | Info_dict.update({scene:len(set(all_id))})
141 |
142 |
143 | print(time)
144 | with open('info.json','w') as f:
145 | json.dump(Info_dict,f)
146 |
147 | # print(Info_dict)
148 |
149 | def SENSE_train_val_test():
150 | import random
151 | random.seed(0)
152 | scenarios = ['1_cut', '']
153 | all_scenarios = []
154 | with open('./info.json','r') as f:
155 | a = json.load(f)
156 | for k, v in a.items():
157 | all_scenarios.append(k)
158 | print(len(all_scenarios))
159 | train_val = random.sample(all_scenarios, int(len(all_scenarios)*0.6))
160 | # print(train_val)
161 | test = list(set(all_scenarios)-set(train_val))
162 |
163 | val = random.sample(train_val, int(0.1*len(all_scenarios)))
164 | # print(val)
165 | train = list(set(train_val)-set(val))
166 | data = ''
167 | with open('./train.txt', 'w') as f:
168 | for i in train: data += i+'\n'
169 | f.write(data)
170 | data = ''
171 | with open('./val.txt', 'w') as f:
172 | for i in val: data += i+'\n'
173 | f.write(data)
174 | data = ''
175 | with open('./test.txt', 'w') as f:
176 | for i in test: data += i+'\n'
177 | f.write(data)
178 |
179 |
180 | print(len(train) +len(val)+len(test))
181 |
182 | def Infor_statistics():
183 | with open('./info.json','r') as f:
184 | a = json.load(f)
185 | data = []
186 | number = np.zeros(5)
187 | cat = ['0~50', '50~100', '100~150', '150~200', '200~400']
188 | for k, v in a.items():
189 | data.append(v)
190 | if v in range(0,50):
191 | number[0]+=1
192 | elif v in range(50,100):
193 | number[1]+=1
194 | elif v in range(100,150):
195 | number[2]+=1
196 | elif v in range(150, 200):
197 | number[3] += 1
198 | elif v in range(200, 400):
199 | number[4] += 1
200 | data = np.array(data)
201 | import pdb
202 | pdb.set_trace()
203 |
204 | print(data, data.sum())
205 | draw_hist(data)
206 |
207 |
208 |
209 | def draw_hist(lenths):
210 | data = lenths
211 |
212 | bins = np.linspace(min(data), 400, 10)
213 | bins = [0,100, 200, 400]
214 | pl.hist(data, bins)
215 |
216 | pl.xlabel('Number of people')
217 |
218 | pl.ylabel('Number of occurences')
219 |
220 | pl.title('Frequency distribution of number of people in SensorCrowd (634 Seq)')
221 |
222 | pl.show()
223 |
224 |
225 |
226 | if __name__ =='__main__':
227 | SensorCrowd()
228 | Infor_statistics()
229 | # SENSE_train_val_test()
--------------------------------------------------------------------------------
/datasets/samplers.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Yaoyao Liu
3 | ## Modified from: https://github.com/Sha-Lab/FEAT
4 | ## Tianjin University
5 | ## liuyaoyao@tju.edu.cn
6 | ## Copyright (c) 2019
7 | ##
8 | ## This source code is licensed under the MIT-style license found in the
9 | ## LICENSE file in the root directory of this source tree
10 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
11 | """ Sampler for dataloader. """
12 | import torch
13 | import numpy as np
14 | import random
15 | class CategoriesSampler():
16 | """The class to generate episodic data"""
17 | def __init__(self, labels, frame_intervals, n_per):
18 | self.frame_intervals = frame_intervals
19 | self.n_sample = len(labels)
20 | self.n_batch = self.n_sample// n_per
21 | self.n_per = n_per
22 | self.scenes = []
23 | self.scene_id = {}
24 | for idx, label in enumerate(labels):
25 | scene_name = label['scene_name']
26 | if scene_name not in self.scene_id.keys():
27 | self.scene_id.update({scene_name:0})
28 | self.scene_id[scene_name]+=1
29 | self.scenes.append(scene_name)
30 |
31 | def __len__(self):
32 | return self.n_batch
33 | def __iter__(self):
34 | for i_batch in range(self.n_batch):
35 | batch = []
36 | frame_a = torch.randperm(self.n_sample )[:self.n_per]
37 | for c in frame_a:
38 | scene_name = self.scenes[c]
39 | # print(c)
40 | tmp_intervals = random.randint(self.frame_intervals[0],
41 | min(self.scene_id[scene_name]//2,self.frame_intervals[1]))
42 | if c 0)[0]
96 | unmatched1 = torch.where(dis.min(0)[0] > 0)[0]
97 | match_gt = {'a2b': matched_a2b, 'un_a': unmatched0, 'un_b': unmatched1}
98 | img0, img1 = plot_id(pair_img[0], pair_img[1], pair_target[0]['points'], pair_target[1]['points'], match_gt)
99 | cv2.imwrite('0.png',img0.copy())
100 | cv2.imwrite('30.png', img1.copy())
101 | cv2.imshow('0', img0)
102 | cv2.imshow('1', img1)
103 |
104 | cv2.waitKey()
105 |
106 | if __name__ == '__main__':
107 | plot_intro()
--------------------------------------------------------------------------------
/misc/KPI_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 | from torch.autograd import Variable
5 | from collections import deque
6 |
7 | class Task_KPI_Pool:
8 | def __init__(self,task_setting, maximum_sample):
9 | """
10 | :param task_setting: {'den': ['gt', 'den'], 'match': ['gt', 'den']}
11 | :param maximum_sample: the number of the saved samples
12 | """
13 | self.pool_size = maximum_sample
14 | self.maximum_sample = maximum_sample
15 | assert self.pool_size > 0
16 | self.current_sample = {x: 0 for x in task_setting.keys()}
17 | self.store = task_setting
18 | for key, data in self.store.items():
19 | self.store[key] = {x: deque() for x in data}
20 |
21 | def add(self, save_dict):
22 | """
23 | :param save_dict: {'den': {'gt':torch.tensor(10), 'den':torch.tensor(20)},
24 | 'match': {'gt':torch.tensor(40), 'den':torch.tensor(100)}}
25 | :return: None
26 | """
27 | for task_key, data in save_dict.items():
28 | if self.current_sample[task_key]< self.pool_size:
29 | self.current_sample[task_key] = self.current_sample[task_key] + 1
30 | for data_key, data_val in data.items():
31 | self.store[task_key][data_key].append(data_val)
32 | else:
33 | for data_key, data_val in data.items():
34 | self.store[task_key][data_key].popleft()
35 | self.store[task_key][data_key].append(data_val)
36 |
37 | def return_feature(self,cls_group):
38 | return_features = []
39 | return_labels = []
40 |
41 | return return_features, return_labels
42 |
43 | def query(self):
44 | task_KPI = {}
45 | for task_key in self.store:
46 | data_keys = list(self.store[task_key].keys())
47 |
48 | gt_list = list(self.store[task_key][data_keys[0]])
49 | correct_list = list(self.store[task_key][data_keys[1]])
50 | gt_sum = torch.tensor(gt_list).sum()
51 |
52 | correct_sum = torch.tensor(correct_list).sum()
53 |
54 |
55 | task_KPI.update({task_key:correct_sum/(gt_sum+1e-8)})
56 |
57 | return task_KPI
58 |
59 | if __name__ == '__main__':
60 | import random
61 |
62 | index = np.random.randint(0, 3, size=30)
63 | # index = random.sample(range(0, 54), 54)
64 | feature = torch.rand(30,3).cuda()
65 | target = torch.Tensor(index).cuda().long()
66 | pred = torch.randn(30,3).cuda()
67 | task = {'den': ['gt', 'den'], 'match': ['gt', 'den']}
68 | save_dict0 = {'den': {'gt':torch.tensor(10), 'den':torch.tensor(20)}, 'match': {'gt':torch.tensor(40), 'den':torch.tensor(100)}}
69 | save_dict1 = {'den': {'gt':torch.tensor(20.6), 'den':torch.tensor(30.8)}, 'match': {'gt':torch.tensor(50), 'den':torch.tensor(120.4)}}
70 | print(task.keys())
71 | pool = Task_KPI_Pool(task,100)
72 | pool.add(save_dict0)
73 | pool.add(save_dict1)
74 |
75 | print(pool.query())
76 |
77 | import pdb
78 |
79 | pdb.set_trace()
--------------------------------------------------------------------------------
/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__init__.py
--------------------------------------------------------------------------------
/misc/__pycache__/KPI_pool.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/KPI_pool.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/dot_ops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/dot_ops.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/get_bbox.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/get_bbox.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/inflation.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/inflation.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/layer.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/nms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/nms.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/transforms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/transforms.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/misc/cal_mean.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from PIL import Image
3 | import numpy as np
4 | import os
5 |
6 | def make_parser():
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument('--trainDataPath', type=str, default='/media/D/GJY/Dataset/VisDroneCC_1080P_mod16/img',
9 | help='absolute path to your data path')
10 | return parser
11 |
12 | if __name__ == '__main__':
13 | args = make_parser().parse_args()
14 |
15 | imgs_list = []
16 |
17 | for i_img, img_name in enumerate(os.listdir(args.trainDataPath)):
18 | if i_img % 100 == 0:
19 | print( i_img )
20 | img = Image.open(os.path.join(args.trainDataPath, img_name))
21 | if img.mode == 'L':
22 | img = img.convert('RGB')
23 |
24 | img = np.array(img.resize((1024,768),Image.BILINEAR))
25 |
26 | imgs_list.append(img)
27 |
28 | imgs = np.array(imgs_list).astype(np.float32)/255.
29 | red = imgs[:,:,:,0]
30 | green = imgs[:,:,:,1]
31 | blue = imgs[:,:,:,2]
32 |
33 |
34 | print("means: [{}, {}, {}]".format(np.mean(red),np.mean(green),np.mean(blue)))
35 | print("stdevs: [{}, {}, {}]".format(np.std(red),np.std(green),np.std(blue)))
36 |
--------------------------------------------------------------------------------
/misc/dot_ops.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch
4 | import torch. nn as nn
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 | import math
8 |
9 | class Gaussian(nn.Module):
10 | def __init__(self, in_channels, sigmalist, kernel_size=64, stride=1, padding=0, froze=True):
11 | super(Gaussian, self).__init__()
12 | out_channels = len(sigmalist) * in_channels
13 | # gaussian kernel
14 | mu = kernel_size // 2
15 | gaussFuncTemp = lambda x: (lambda sigma: math.exp(-(x - mu) ** 2 / float(2 * sigma ** 2)))
16 | gaussFuncs = [gaussFuncTemp(x) for x in range(kernel_size)]
17 | windows = []
18 | for sigma in sigmalist:
19 | gauss = torch.Tensor([gaussFunc(sigma) for gaussFunc in gaussFuncs])
20 | gauss /= gauss.sum()
21 | _1D_window = gauss.unsqueeze(1)
22 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
23 | window = Variable(_2D_window.expand(in_channels, 1, kernel_size, kernel_size).contiguous())
24 | windows.append(window)
25 | kernels = torch.stack(windows)
26 | kernels = kernels.permute(1, 0, 2, 3, 4)
27 | weight = kernels.reshape(out_channels, in_channels, kernel_size, kernel_size)
28 |
29 | self.gkernel = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, groups=in_channels, bias=False)
30 | self.gkernel.weight = torch.nn.Parameter(weight)
31 |
32 | if froze: self.frozePara()
33 |
34 | def forward(self, dotmaps):
35 | gaussianmaps = self.gkernel(dotmaps)
36 | return gaussianmaps
37 |
38 | def frozePara(self):
39 | for para in self.parameters():
40 | para.requires_grad = False
41 |
42 |
43 | class SumPool2d(nn.Module):
44 | def __init__(self, kernel_size):
45 | super(SumPool2d, self).__init__()
46 | self.avgpool = nn.AvgPool2d(kernel_size, stride=1, padding=kernel_size // 2)
47 | if type(kernel_size) is not int:
48 | self.area = kernel_size[0] * kernel_size[1]
49 | else:
50 | self.area = kernel_size * self.kernel_size
51 |
52 | def forward(self, dotmap):
53 | return self.avgpool(dotmap) * self.area
--------------------------------------------------------------------------------
/misc/evaluation_code.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import sys
5 | import math
6 |
7 | #MAE = lambda
8 |
9 | errorcode = 'WA'
10 |
11 | class AverageMeter(object):
12 | """Computes and stores the average and current value"""
13 |
14 | def __init__(self):
15 | self.reset()
16 |
17 | def reset(self):
18 | self.maeSum = 0
19 | self.mseSum = 0
20 | self.naeSum = 0
21 | self.count = 0
22 | self.naecount = 0
23 |
24 | def update(self, mae, mse, nae):
25 |
26 | self.maeSum += mae
27 | self.mseSum += mse
28 | if nae >= 0:
29 | self.naeSum += nae
30 | self.naecount += 1
31 | self.count += 1
32 |
33 | def output(self):
34 | if self.count > 0:
35 | mae = self.maeSum / self.count
36 | mse = math.sqrt(self.mseSum / self.count)
37 | else:
38 | mae, mse = -1, -1
39 | nae = self.naeSum / self.naecount if self.naecount > 0 else -1
40 | return mae, mse, nae
41 |
42 | def dictout(self):
43 | mae, mse, nae = self.output()
44 | return dict(
45 | mae = mae,
46 | mse = mse,
47 | nae = nae
48 | )
49 |
50 |
51 | def readoutput(outtxt):
52 | output = {}
53 | with open(outtxt) as f:
54 | for line in f.readlines():
55 | line = line.strip().split(' ')
56 | if len(line) == 2:
57 | idx, score = int(line[0]), float(line[1])
58 | output[idx] = score
59 | return output
60 |
61 | def readtarget(tartxt):
62 | target = {}
63 | with open(tartxt) as f:
64 | for line in f.readlines():
65 | line = line.strip().split(' ')
66 | if len(line) == 4:
67 | idx, illum, level = map(int, line[:3])
68 | score = float(line[3])
69 | target[idx] = dict(
70 | illum = illum,
71 | level = level,
72 | gt_count = score
73 | )
74 | return target
75 |
76 | def judge(outtxt, tartxt):
77 | output = readoutput(outtxt)
78 | target = readtarget(tartxt)
79 | for key in target.keys():
80 | if key in output:
81 | target[key]["pd_count"] = output[key]
82 | else:
83 | return errorcode
84 |
85 | totalJudger = AverageMeter()
86 | levelJudger = [AverageMeter() for _ in range(5)]
87 | illumJudger = [AverageMeter() for _ in range(4)]
88 |
89 | for _, score in target.items():
90 | # get data
91 | illum = score['illum']
92 | level = score['level']
93 | gt_count = score['gt_count']
94 | pd_count = score['pd_count']
95 |
96 | # process
97 | mae = abs(pd_count - gt_count)
98 | mse = mae ** 2
99 | nae = mae / gt_count if gt_count > 0 else -1
100 |
101 | # save
102 | totalJudger.update(mae, mse, nae)
103 | levelJudger[level].update(mae, mse, nae)
104 | illumJudger[illum].update(mae, mse, nae)
105 |
106 | outputdict = {
107 | 'overall': totalJudger.dictout(),
108 | 'levels': [judger.dictout() for judger in levelJudger],
109 | 'illums': [judger.dictout() for judger in illumJudger],
110 | }
111 | outputdict['mmae'] = dict(
112 | mmae_level = sum(result['mae'] for result in outputdict['levels']) / len(outputdict['levels']),
113 | mmae_illum = sum(result['mae'] for result in outputdict['illums']) / len(outputdict['illums'])
114 | )
115 |
116 | return outputdict
117 |
118 |
119 | if __name__ == '__main__':
120 | target = {}
121 | if len(sys.argv) != 3:
122 | print(errorcode)
123 | print(judge(sys.argv[1], sys.argv[2]))
--------------------------------------------------------------------------------
/misc/get_bbox.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn.functional as F
4 | from .nms import *
5 | import torch.nn as nn
6 | import cv2
7 | import pdb
8 | def local_maximum(sub_pre,sub_bin, scale_factor=1.):
9 | sub_pre = torch.from_numpy(sub_pre).unsqueeze(0).unsqueeze(0)
10 | max_value = torch.max(sub_pre)
11 |
12 | # kernel = [[1/9., 1/9., 1/9.], [1/9., 1/9., 1/9.], [1/9., 1/9.,1/9.]]
13 | # kernel = torch.FloatTensor(kernel).unsqueeze(0).unsqueeze(0)
14 | # weight = nn.Parameter(data=kernel, requires_grad=False)
15 | # sub_pre = F.conv2d(sub_pre, weight, stride=1,padding=1)
16 |
17 | keep = nn.functional.max_pool2d(sub_pre, (3, 3), stride=1, padding=1)
18 | keep = (keep == sub_pre).float()
19 | sub_pre = keep * sub_pre
20 |
21 | sub_pre[sub_pre < 0.5 * max_value] = 0
22 | sub_pre[sub_pre > 0] = 1
23 | count = int(torch.sum(sub_pre).item())
24 |
25 | kpoint = sub_pre.data.squeeze(0).squeeze(0).cpu().numpy()
26 |
27 | points = np.array(list(zip(np.nonzero(kpoint)[1], np.nonzero(kpoint)[0]))).astype(np.float32)
28 | # distance_map = cv2.distanceTransform(sub_bin, cv2.DIST_L1,3)
29 |
30 | boxes = np.zeros((len(points), 5)).astype(np.float32)
31 | for i in range(len(points)):
32 | x, y = points[i]
33 | length = scale_factor # max(distance_map[int(y), int(x)], scale_factor)
34 | boxes[i] = [x - length, y - length, 2*length, 2*length, 4*length*length]
35 | pre_data = {'num': count, 'points': points, 'boxes': boxes}
36 | return pre_data
37 |
38 | def Noise_box_detection(recs):
39 | maintain_list = []
40 | recs[:, 2] = recs[:, 0] + recs[:, 2]
41 | recs[:, 3] = recs[:, 1] + recs[:, 3]
42 | length = len(recs)
43 |
44 | for i in range(length):
45 | if i < length - 1:
46 | j = i + 1
47 | index = (recs[i][0] >= recs[j:][:, 0]) & (recs[i][1] >= recs[j:][:, 1]) \
48 | & (recs[i][2] <= recs[j:][:, 2]) & (recs[i][3] <= recs[j:][:, 3])
49 | index = np.where(index == True)[0]
50 | if index.size > 0:
51 | continue
52 | else:
53 | maintain_list.append(i)
54 | else:
55 | maintain_list.append(i)
56 | return maintain_list
57 |
58 | def get_boxInfo_from_Binar_map(pred_map , threshold = 0.3, min_area=4,scale_factor = 1., polish =False):
59 | # import pdb
60 | # pdb.set_trace()
61 | a = torch.ones_like(pred_map)
62 | b = torch.zeros_like(pred_map)
63 | Binar_map = torch.where(pred_map >= threshold, a, b).cpu().numpy()
64 |
65 | Binar_map = Binar_map.squeeze().astype(np.uint8)
66 | pred_map = pred_map.squeeze()
67 | cnt, labels, stats, centroids = cv2.connectedComponentsWithStats(Binar_map, connectivity=4) # centriod (w,h)
68 |
69 | boxes = stats[1:, :].astype(np.float32)
70 | points = centroids[1:, :].astype(np.float32)
71 | index = (boxes[:, 4] >= min_area)
72 | boxes = boxes[index]
73 | points = points[index]
74 |
75 | order = np.argsort(boxes[:, 4])
76 | points = points[order]
77 | boxes = boxes[order]
78 |
79 | maintain_list = Noise_box_detection(boxes.copy())
80 | boxes = boxes[maintain_list]
81 | points = points[maintain_list]
82 |
83 | if polish:
84 | boxes_app = []
85 | points_app = []
86 | for id in range(len(boxes)):
87 | w_s, h_s, w, h, area = boxes[id]
88 | sub_pre = pred_map[int(h_s):int(h_s) + int(h), int(w_s):int(w_s) + int(w)].copy()
89 | sub_bin = Binar_map[int(h_s):int(h_s) + int(h), int(w_s):int(w_s) + int(w)].copy()
90 | iou = boxes[id, 4] / (w * h)
91 | ration = h / w
92 | if area>20:
93 | if ration > 2 or ration < 0.5 or iou < 0.75:
94 | pred_data = local_maximum(sub_pre,sub_bin,scale_factor)
95 | if pred_data['num'] >= 1:
96 | pred_data['boxes'][:, 0] += w_s
97 | pred_data['boxes'][:, 1] += h_s
98 | pred_data['points'][:, 0] += int(w_s)
99 | pred_data['points'][:, 1] += int(h_s)
100 | boxes[id, :] = pred_data['boxes'][0, :]
101 | points[id, :] = pred_data['points'][0, :]
102 |
103 | for k in range(1, pred_data['num']):
104 | boxes_app.append(pred_data['boxes'][k, :])
105 | points_app.append(pred_data['points'][k, :])
106 |
107 | # print('original:{}, add_boxes:{}, final_boxes:{}'.format(len(boxes), len(boxes_app), len(boxes) + len(boxes_app)))
108 |
109 | if len(boxes_app) > 0:
110 | boxes = np.concatenate((boxes, np.array(boxes_app)))
111 | points = np.concatenate((points, np.array(points_app).astype(np.int32)))
112 | new_boxes = np.zeros((len(points), 4)).astype(np.float32)
113 | scores = np.zeros((len(points), 1)).astype(np.float32)
114 |
115 | # for i in range(len(boxes)):
116 | # x_s, y_s, w, h, area = boxes[i]
117 | # x, y = points[i]
118 | # # _scale = scale_map[y_s:y_s + h, x_s:x_s + w]
119 | # # _mask = Binar_map[y_s:y_s + h, x_s:x_s + w]
120 | # _pred = pred_map[int(y_s):int(y_s) + int(h), int(x_s):int(x_s) + int(w)]
121 | # score =pred_map[int(y),int(x)]
122 | # sigma = np.sqrt(w ** 2 + h ** 2)
123 | # sin = h / sigma
124 | # cos = w / sigma
125 | #
126 | # scale = max( scale_map[int(y),int(x)], sigma / 2) #if index.sum()>0 else sigma / 2 #_scale[index].max()
127 | #
128 | # de_h, de_w = scale * sin, scale * cos
129 | # new_x_s, new_x_e = x - de_w, x + de_w
130 | # new_y_s, new_y_e = y - de_h, y + de_h
131 | # new_boxes[i] = [new_x_s, new_y_s, new_x_e, new_y_e]
132 | # scores[i] = score
133 |
134 | batch_id = np.zeros((len(points), 1)).astype(np.float32)
135 | boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
136 | boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
137 | boxes = boxes[:, :4]
138 | boxes = np.hstack((batch_id,boxes / scale_factor))
139 | boxes = torch.from_numpy(boxes)
140 | # boxes = np.hstack((boxes/scale_factor,scores))
141 | # new_boxes = np.hstack((new_boxes/scale_factor, scores))
142 |
143 | if polish:
144 | keep = nms(new_boxes,thresh=0.3)
145 | points = points[keep]
146 | boxes = boxes[keep]
147 | new_boxes = new_boxes[keep]
148 | pred_data = {'num': len(points), 'points': points/scale_factor, 'rois': boxes, 'new_boxes': new_boxes}
149 | return pred_data
150 |
151 | def multiscale_nms(pred_data):
152 |
153 | base_boxes = pred_data[1]['boxes']
154 | base_points = pred_data[1]['points']
155 | base_new_boxes = pred_data[1]['new_boxes']
156 |
157 | for scale in pred_data.keys():
158 | if scale == 1:
159 | continue
160 | boxes = pred_data[scale]['boxes']
161 | points = pred_data[scale]['points']
162 | new_boxes = pred_data[scale]['new_boxes']
163 |
164 | base_boxes= np.concatenate((base_boxes, boxes))
165 | base_points= np.concatenate((base_points, points))
166 | base_new_boxes = np.concatenate((base_new_boxes, new_boxes))
167 |
168 | # order = np.argsort((base_new_boxes[:, 3]-base_new_boxes[:, 1])*(base_new_boxes[:, 2]-base_new_boxes[:, 0]))
169 | # base_points = base_points[order]
170 | # base_boxes = base_boxes[order]
171 | # base_new_boxes = base_new_boxes[order]
172 | # #
173 | # keep = Noise_box_detection(base_new_boxes.copy())
174 | # base_points = base_points[keep]
175 | # base_boxes = base_boxes[keep]
176 | # base_new_boxes = base_new_boxes[keep]
177 |
178 | keep = nms(base_new_boxes,thresh=0.2)
179 | base_points = base_points[keep]
180 | base_boxes = base_boxes[keep]
181 | base_new_boxes = base_new_boxes[keep]
182 |
183 | pred_data = {'num': len(base_points), 'points': base_points , 'rois': base_boxes, 'new_boxes': base_new_boxes}
184 | return pred_data
185 |
--------------------------------------------------------------------------------
/misc/inflation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch
4 | import pdb
5 | import torch. nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | import math
9 | import numpy
10 |
11 | class inflation(nn.Module):
12 | def __init__(self,K=15,stride=1,padding=None):
13 | super(inflation,self).__init__()
14 | weight = numpy.zeros((K,K))
15 | t = (K-1)/2
16 | for i in range(K):
17 | for j in range(K):
18 | if abs(i-t)+abs(j-t)<=t:
19 | weight[i,j] = 1
20 | if padding is None:
21 | padding = K//2
22 | self.ikernel = nn.Conv2d(1,1,K,stride=stride,padding=padding,bias=False)
23 | self.ikernel.weight = torch.nn.Parameter(torch.from_numpy(weight.reshape(1,1,K,K).astype(numpy.float32)))
24 | for para in self.parameters():
25 | para.requires_grad = False
26 |
27 | def forward(self,x):
28 | x = x.unsqueeze(0)
29 | x = x.unsqueeze(0)
30 | x = self.ikernel(x)
31 | return x.squeeze()
32 |
33 | class Expend(torch.nn.Module):
34 | def __init__(self):
35 | super(Expend, self).__init__()
36 | self.ex = torch.nn.AvgPool2d(15,stride=1,padding=7)
37 | for para in self.parameters():
38 | para.requires_grad = False
39 |
40 | def forward(self, x):
41 | x = x.unsqueeze(0)
42 | x = self.ex(x)
43 | return x.squeeze()
--------------------------------------------------------------------------------
/misc/layer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .dot_ops import Gaussian, SumPool2d
5 | import scipy.spatial
6 | import scipy.ndimage
7 | import numpy as np
8 | import torch.nn.functional as F
9 | import cv2 as cv
10 | class Point2Mask(object):
11 | def __init__(self, max_kernel_size=7):
12 |
13 | self.max_kernel_size = max_kernel_size
14 | def __call__(self, target, pre_map):
15 | b,c,h,w = pre_map.size()
16 | mask_map = torch.zeros_like(pre_map)
17 | for idx, sub_target in enumerate(target):
18 | points = sub_target["points"]
19 | # import pdb
20 | # pdb.set_trace()
21 | count = points.shape[0]
22 | if count==0:
23 | continue
24 | elif count==1:
25 | pt = points[0].astype(np.int32)
26 | kernel_size = self.max_kernel_size
27 | up = max(pt[1] - kernel_size, 0)
28 | down = min(pt[1] + kernel_size + 1, h)
29 | left = max(pt[0] - kernel_size, 0)
30 | right = min(pt[0] + kernel_size + 1, w)
31 |
32 | mask_map[idx, 0, up:down + 1, left:right + 1] = 1
33 | else:
34 | leafsize = 2048
35 | tree = scipy.spatial.KDTree(points.copy(), leafsize=leafsize)
36 | distances, locations = tree.query(points, k=2)
37 | for i, pt in enumerate(points):
38 | if pt[0] >= w or pt[1] > h:
39 | continue
40 | pt = pt.astype(np.int32)
41 | kernel_size = (distances[i][1]) * 0.25
42 | kernel_size = min(self.max_kernel_size, int(kernel_size + 0.5))
43 | up = max(pt[1] - kernel_size,0)
44 | down = min(pt[1] + kernel_size+1,h)
45 | left = max(pt[0] - kernel_size,0)
46 | right = min(pt[0] + kernel_size+1,w)
47 | mask_map[idx,0, up:down+1, left:right+1]=1
48 |
49 | # density_nn[np.where(pnt_density > 0)] = distances[i][1]
50 | # mask_map += pnt_density
51 | # density_std[np.where(pnt_density > 0)] = sigma
52 | # mask_map = mask_map.astype(np.uint8) * 255
53 | # cv.imwrite('../dataset/mask_vis/mask_vis.png', mask_map[0][0].cpu().numpy(), [cv.IMWRITE_PNG_BILEVEL, 1])
54 | # import pdb
55 | # pdb.set_trace()
56 | # print(mask_map.sum())
57 | return mask_map
58 | class Gaussianlayer(nn.Module):
59 | def __init__(self, sigma=None, kernel_size=15):
60 | super(Gaussianlayer, self).__init__()
61 | if sigma == None:
62 | sigma = [4]
63 | self.gaussian = Gaussian(1, sigma, kernel_size=kernel_size, padding=kernel_size//2, froze=True)
64 |
65 | def forward(self, dotmaps):
66 | denmaps = self.gaussian(dotmaps)
67 | return denmaps
68 |
69 |
70 | class Conv2d(nn.Module):
71 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, NL='relu', same_padding=False, bn=False, dilation=1):
72 | super(Conv2d, self).__init__()
73 | padding = int((kernel_size - 1) // 2) if same_padding else 0
74 | self.conv = []
75 | if dilation==1:
76 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, dilation=dilation)
77 | else:
78 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=dilation, dilation=dilation)
79 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0, affine=True) if bn else None
80 | if NL == 'relu' :
81 | self.relu = nn.ReLU(inplace=True)
82 | elif NL == 'prelu':
83 | self.relu = nn.PReLU()
84 | else:
85 | self.relu = None
86 |
87 | def forward(self, x):
88 | x = self.conv(x)
89 | if self.bn is not None:
90 | x = self.bn(x)
91 | if self.relu is not None:
92 | x = self.relu(x)
93 | return x
94 |
95 |
96 | class FC(nn.Module):
97 | def __init__(self, in_features, out_features, NL='relu'):
98 | super(FC, self).__init__()
99 | self.fc = nn.Linear(in_features, out_features)
100 | if NL == 'relu' :
101 | self.relu = nn.ReLU(inplace=True)
102 | elif NL == 'prelu':
103 | self.relu = nn.PReLU()
104 | else:
105 | self.relu = None
106 |
107 | def forward(self, x):
108 | x = self.fc(x)
109 | if self.relu is not None:
110 | x = self.relu(x)
111 | return x
112 |
113 | class convDU(nn.Module):
114 |
115 | def __init__(self,
116 | in_out_channels=2048,
117 | kernel_size=(9,1)
118 | ):
119 | super(convDU, self).__init__()
120 | self.conv = nn.Sequential(
121 | nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)),
122 | nn.ReLU(inplace=True)
123 | )
124 |
125 | def forward(self, fea):
126 | n, c, h, w = fea.size()
127 |
128 | fea_stack = []
129 | for i in range(h):
130 | i_fea = fea.select(2, i).resize(n,c,1,w)
131 | if i == 0:
132 | fea_stack.append(i_fea)
133 | continue
134 | fea_stack.append(self.conv(fea_stack[i-1])+i_fea)
135 | # pdb.set_trace()
136 | # fea[:,i,:,:] = self.conv(fea[:,i-1,:,:].expand(n,1,h,w))+fea[:,i,:,:].expand(n,1,h,w)
137 |
138 |
139 | for i in range(h):
140 | pos = h-i-1
141 | if pos == h-1:
142 | continue
143 | fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos]
144 | # pdb.set_trace()
145 | fea = torch.cat(fea_stack, 2)
146 | return fea
147 |
148 | class convLR(nn.Module):
149 |
150 | def __init__(self,
151 | in_out_channels=2048,
152 | kernel_size=(1,9)
153 | ):
154 | super(convLR, self).__init__()
155 | self.conv = nn.Sequential(
156 | nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)),
157 | nn.ReLU(inplace=True)
158 | )
159 |
160 | def forward(self, fea):
161 | n, c, h, w = fea.size()
162 |
163 | fea_stack = []
164 | for i in range(w):
165 | i_fea = fea.select(3, i).resize(n,c,h,1)
166 | if i == 0:
167 | fea_stack.append(i_fea)
168 | continue
169 | fea_stack.append(self.conv(fea_stack[i-1])+i_fea)
170 |
171 | for i in range(w):
172 | pos = w-i-1
173 | if pos == w-1:
174 | continue
175 | fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos]
176 |
177 |
178 | fea = torch.cat(fea_stack, 3)
179 | return fea
--------------------------------------------------------------------------------
/misc/modelsummary.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import os
13 | import logging
14 | from collections import namedtuple
15 |
16 | import torch
17 | import torch.nn as nn
18 |
19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
20 | """
21 | :param model:
22 | :param input_tensors:
23 | :param item_length:
24 | :return:
25 | """
26 |
27 | summary = []
28 |
29 | ModuleDetails = namedtuple(
30 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
31 | hooks = []
32 | layer_instances = {}
33 |
34 | def add_hooks(module):
35 |
36 | def hook(module, input, output):
37 | class_name = str(module.__class__.__name__)
38 |
39 | instance_index = 1
40 | if class_name not in layer_instances:
41 | layer_instances[class_name] = instance_index
42 | else:
43 | instance_index = layer_instances[class_name] + 1
44 | layer_instances[class_name] = instance_index
45 |
46 | layer_name = class_name + "_" + str(instance_index)
47 |
48 | params = 0
49 |
50 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
51 | class_name.find("Linear") != -1:
52 | for param_ in module.parameters():
53 | params += param_.view(-1).size(0)
54 |
55 | flops = "Not Available"
56 | if class_name.find("Conv") != -1 and hasattr(module, "weight"):
57 | flops = (
58 | torch.prod(
59 | torch.LongTensor(list(module.weight.data.size()))) *
60 | torch.prod(
61 | torch.LongTensor(list(output.size())[2:]))).item()
62 | elif isinstance(module, nn.Linear):
63 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \
64 | * input[0].size(1)).item()
65 |
66 | if isinstance(input[0], list):
67 | input = input[0]
68 | if isinstance(output, list):
69 | output = output[0]
70 |
71 | summary.append(
72 | ModuleDetails(
73 | name=layer_name,
74 | input_size=list(input[0].size()),
75 | output_size=list(output.size()),
76 | num_parameters=params,
77 | multiply_adds=flops)
78 | )
79 |
80 | if not isinstance(module, nn.ModuleList) \
81 | and not isinstance(module, nn.Sequential) \
82 | and module != model:
83 | hooks.append(module.register_forward_hook(hook))
84 |
85 | model.eval()
86 | model.apply(add_hooks)
87 |
88 | space_len = item_length
89 |
90 | model(*input_tensors)
91 | for hook in hooks:
92 | hook.remove()
93 |
94 | details = ''
95 | if verbose:
96 | details = "Model Summary" + \
97 | os.linesep + \
98 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
99 | ' ' * (space_len - len("Name")),
100 | ' ' * (space_len - len("Input Size")),
101 | ' ' * (space_len - len("Output Size")),
102 | ' ' * (space_len - len("Parameters")),
103 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \
104 | + os.linesep + '-' * space_len * 5 + os.linesep
105 |
106 | params_sum = 0
107 | flops_sum = 0
108 | for layer in summary:
109 | params_sum += layer.num_parameters
110 | if layer.multiply_adds != "Not Available":
111 | flops_sum += layer.multiply_adds
112 | if verbose:
113 | details += "{}{}{}{}{}{}{}{}{}{}".format(
114 | layer.name,
115 | ' ' * (space_len - len(layer.name)),
116 | layer.input_size,
117 | ' ' * (space_len - len(str(layer.input_size))),
118 | layer.output_size,
119 | ' ' * (space_len - len(str(layer.output_size))),
120 | layer.num_parameters,
121 | ' ' * (space_len - len(str(layer.num_parameters))),
122 | layer.multiply_adds,
123 | ' ' * (space_len - len(str(layer.multiply_adds)))) \
124 | + os.linesep + '-' * space_len * 5 + os.linesep
125 |
126 | details += os.linesep \
127 | + "Total Parameters: {:,}".format(params_sum) \
128 | + os.linesep + '-' * space_len * 5 + os.linesep
129 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
130 | + os.linesep + '-' * space_len * 5 + os.linesep
131 | details += "Number of Layers" + os.linesep
132 | for layer in layer_instances:
133 | details += "{} : {} layers ".format(layer, layer_instances[layer])
134 |
135 | return details
--------------------------------------------------------------------------------
/misc/nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | '''
8 | nms.py: CPU implementation of non maximal supression modified from Ross's code.
9 | Authors : svp
10 |
11 | Modified from https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py
12 | to accommodate a corner case which handles one box lying completely inside another.
13 | '''
14 | import numpy as np
15 |
16 |
17 | def is_square(inter, areas):
18 | truth_val = np.logical_not((np.logical_and((np.sqrt(areas) ** 2 == areas), (np.sqrt(inter) ** 2 == inter))))
19 | return np.float32(truth_val)
20 |
21 |
22 | def nms(dets, thresh):
23 | x1 = dets[:, 0]
24 | y1 = dets[:, 1]
25 | x2 = dets[:, 2]
26 | y2 = dets[:, 3]
27 | scores = dets[:, 4]
28 |
29 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
30 |
31 | order = scores.argsort()[::-1]
32 |
33 | keep = []
34 | while order.size > 0:
35 | i = order[0]
36 | keep.append(i)
37 | xx1 = np.maximum(x1[i], x1[order[1:]])
38 | yy1 = np.maximum(y1[i], y1[order[1:]])
39 | xx2 = np.minimum(x2[i], x2[order[1:]])
40 | yy2 = np.minimum(y2[i], y2[order[1:]])
41 |
42 | w = np.maximum(0.0, xx2 - xx1 + 1)
43 | h = np.maximum(0.0, yy2 - yy1 + 1)
44 | inter = w * h
45 |
46 | remove_index_1 = np.where(areas[i] == inter) # i is included by others
47 | remove_index_2 = np.where(areas[order[1:]] == inter) # i include pthers
48 |
49 | ovr = 1 / 3 * inter / (areas[i] + areas[order[1:]] - inter) \
50 | + 1 / 3 * inter / areas[i] \
51 | + 1 / 3 * inter / areas[order[1:]]
52 |
53 | # ovr = inter / (areas[i] + areas[order[1:]] - inter)* np.maximum (areas[order[1:]]/areas[i], areas[i]/areas[order[1:]])
54 |
55 | ovr[remove_index_1] = 1.0
56 | ovr[remove_index_2] = 1.0
57 | inds = np.where(ovr <= thresh)[0] # get the index(a series)
58 | order = order[inds + 1]
59 |
60 | return keep
61 |
62 |
63 | if __name__ == '__main__':
64 | a = np.array([[1, 2, 4, 5, 0.9], [1, 2, 3, 4, 0.99], [8, 2, 9, 4, 0.99]])
65 | keep = nms(a, 0.2)
66 | print(keep)
67 | np.where(np.array([78, 3, 4, 54, 3, ]) > 10)
--------------------------------------------------------------------------------
/model/MatchTool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__init__.py
--------------------------------------------------------------------------------
/model/MatchTool/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/model/MatchTool/__pycache__/compute_metric.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/compute_metric.cpython-37.pyc
--------------------------------------------------------------------------------
/model/MatchTool/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/model/MatchTool/compute_metric.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import numpy as np
4 | from scipy import spatial as ss
5 | import pdb
6 |
7 | import cv2
8 | from .utils import hungarian,read_pred_and_gt,AverageMeter,AverageCategoryMeter
9 |
10 | # gt_file = 'val_gt_loc.txt'
11 | # pred_file = 'TinyFaces_loc_0.8_0.3.txt'
12 |
13 | flagError = False
14 | id_std = [i for i in range(3110,3610,1)]
15 | id_std[59] = 3098
16 | num_classes = 6
17 |
18 |
19 | def compute_metrics(dist_matrix,match_matrix,pred_num,sigma):
20 | for i_pred_p in range(pred_num):
21 | pred_dist = dist_matrix[i_pred_p,:]
22 | match_matrix[i_pred_p,:] = pred_dist<=sigma
23 |
24 | tp, assign = hungarian(match_matrix)
25 | fn_gt_index = np.array(np.where(assign.sum(0)==0))[0]
26 |
27 | fp_pred_index = np.array(np.where(assign.sum(1)==0))[0]
28 |
29 | # tp_pred_index = np.array(np.where(assign.sum(1)==1))[0]
30 | # tp_gt_index = np.array(np.where(assign.sum(0)==1))[0]
31 |
32 | tp_pred_index, tp_gt_index = np.where(assign==1)
33 |
34 | tp = tp_pred_index.shape[0]
35 | fp = fp_pred_index.shape[0]
36 | fn = fn_gt_index.shape[0]
37 |
38 | #
39 | # import pdb
40 | # pdb.set_trace()
41 | return tp,fp,fn,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index
42 |
43 |
44 |
45 |
46 | def associate_pred2gt_point(pred_data, gt_data):
47 | # import pdb
48 | # pdb.set_trace()
49 | pred_p = pred_data['points'].cpu().numpy()
50 | gt_p = gt_data['points'].cpu().numpy()
51 | gt_sigma = gt_data['sigma'].cpu().numpy()
52 | if gt_p.shape[0]>0:
53 | gt_data = {'num':gt_p.shape[0], 'points':gt_p,'sigma':gt_sigma}
54 | else:
55 | gt_data = {'num':0, 'points':[],'sigma':[]}
56 |
57 | tp_l,fp_l,fn_l = [0,0,0]
58 | tp_pred_index,tp_gt_index = [],[]
59 | if gt_data['num'] ==0 and pred_p.shape[0] !=0:
60 | fp_pred_index = np.array(range(pred_p.shape[0]))
61 | fp_l = fp_pred_index.shape[0]
62 |
63 | if pred_p.shape[0] ==0 and gt_data['num'] !=0:
64 | gt_p = gt_data['points']
65 | fn_gt_index = np.array(range(gt_p.shape[0]))
66 | fn_l = fn_gt_index.shape[0]
67 |
68 |
69 | if gt_data['num'] !=0 and pred_p.shape[0] !=0:
70 | gt_p = gt_data['points']
71 | sigma = gt_data['sigma']
72 |
73 | # dist
74 | dist_matrix = ss.distance_matrix(pred_p,gt_p,p=2)
75 | match_matrix = np.zeros(dist_matrix.shape,dtype=bool)
76 |
77 | # sigma_s and sigma_l
78 | tp_l,fp_l,fn_l,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index = compute_metrics(dist_matrix,match_matrix,pred_p.shape[0],sigma)
79 | return tp_pred_index,tp_gt_index
80 |
81 |
82 |
83 |
84 | def associate_pred2gt_point_vis(pred_data, gt_data, gt_diff_idx):
85 | # import pdb
86 | # pdb.set_trace()
87 | pred_p = pred_data.cpu().numpy()
88 | gt_p = gt_data['points'].cpu().numpy()[gt_diff_idx]
89 | gt_sigma = gt_data['sigma'].cpu().numpy()[gt_diff_idx]
90 | if gt_p.shape[0]>0:
91 | gt_data = {'num':gt_p.shape[0], 'points':gt_p,'sigma':gt_sigma}
92 | else:
93 | gt_data = {'num':0, 'points':[],'sigma':[]}
94 |
95 | tp_l,fp_l,fn_l = [0,0,0]
96 | tp_pred_index,tp_gt_index,fp_pred_index,fn_gt_index = [],[],[],[]
97 | if gt_data['num'] ==0 and pred_p.shape[0] !=0:
98 | fp_pred_index = np.array(range(pred_p.shape[0]))
99 | fp_l = fp_pred_index.shape[0]
100 | fn_gt_index = np.array([])
101 | if pred_p.shape[0] ==0 and gt_data['num'] !=0:
102 | gt_p = gt_data['points']
103 | fn_gt_index = np.array(range(gt_p.shape[0]))
104 | fn_l = fn_gt_index.shape[0]
105 | fp_pred_index = np.array([])
106 |
107 | if gt_data['num'] !=0 and pred_p.shape[0] !=0:
108 | gt_p = gt_data['points']
109 | sigma = gt_data['sigma']
110 |
111 | # dist
112 | dist_matrix = ss.distance_matrix(pred_p,gt_p,p=2)
113 | match_matrix = np.zeros(dist_matrix.shape,dtype=bool)
114 |
115 | # sigma_s and sigma_l
116 | tp_l,fp_l,fn_l,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index = compute_metrics(dist_matrix,match_matrix,pred_p.shape[0],sigma)
117 | return tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index
118 |
119 | if __name__ == '__main__':
120 | eval_metrics()
121 |
--------------------------------------------------------------------------------
/model/MatchTool/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import numpy as np
4 | import time
5 | import random
6 | import shutil
7 | import cv2
8 | from PIL import Image
9 |
10 | import torch
11 | from torch import nn
12 | import torch.nn.functional as F
13 | import torchvision.utils as vutils
14 | import torchvision.transforms as standard_transforms
15 |
16 | import sys
17 | sys.setrecursionlimit(100000) # set the recursion depth
18 | # Hungarian method for bipartite graph
19 | def hungarian(matrixTF):
20 | # matrix to adjacent matrix
21 | edges = np.argwhere(matrixTF)
22 | lnum, rnum = matrixTF.shape
23 | graph = [[] for _ in range(lnum)]
24 | for edge in edges:
25 | graph[edge[0]].append(edge[1])
26 |
27 | # deep first search
28 | match = [-1 for _ in range(rnum)]
29 | vis = [-1 for _ in range(rnum)]
30 | def dfs(u):
31 | for v in graph[u]:
32 | if vis[v]: continue
33 | vis[v] = True
34 | if match[v] == -1 or dfs(match[v]):
35 | match[v] = u
36 | return True
37 | return False
38 |
39 | # for loop
40 | ans = 0
41 | for a in range(lnum):
42 | for i in range(rnum): vis[i] = False
43 | if dfs(a): ans += 1
44 |
45 | # assignment matrix
46 | assign = np.zeros((lnum, rnum), dtype=bool)
47 | for i, m in enumerate(match):
48 | if m >= 0:
49 | assign[m, i] = True
50 |
51 | return ans, assign
52 |
53 | def read_pred_and_gt(pred_file,gt_file):
54 | # read pred
55 | pred_data = {}
56 | with open(pred_file) as f:
57 |
58 | id_read = []
59 | for line in f.readlines():
60 | line = line.strip().split(' ')
61 |
62 | # check1
63 | if len(line) <2 or len(line) % 2 !=0 or (len(line)-2)/2 != int(line[1]):
64 | flagError = True
65 | sys.exit(1)
66 |
67 | line_data = [int(i) for i in line]
68 | idx, num = [line_data[0], line_data[1]]
69 | id_read.append(idx)
70 |
71 | points = []
72 | if num>0:
73 | points = np.array(line_data[2:]).reshape(((len(line)-2)//2,2))
74 | pred_data[idx] = {'num': num, 'points':points}
75 | else:
76 | pred_data[idx] = {'num': num, 'points':[]}
77 |
78 | # read gt
79 | gt_data = {}
80 | with open(gt_file) as f:
81 | for line in f.readlines():
82 | line = line.strip().split(' ')
83 | line_data = [int(i) for i in line]
84 | idx, num = [line_data[0], line_data[1]]
85 | points_r = []
86 | if num>0:
87 | points_r = np.array(line_data[2:]).reshape(((len(line)-2)//5,5))
88 | gt_data[idx] = {'num': num, 'points':points_r[:,0:2], 'sigma': points_r[:,2:4], 'level':points_r[:,4]}
89 | else:
90 | gt_data[idx] = {'num': 0, 'points':[], 'sigma':[], 'level':[]}
91 |
92 | return pred_data, gt_data
93 |
94 | class AverageMeter(object):
95 | """Computes and stores the average and current value"""
96 |
97 | def __init__(self):
98 | self.reset()
99 |
100 | def reset(self):
101 | self.cur_val = 0
102 | self.avg = 0
103 | self.sum = 0
104 | self.count = 0
105 |
106 | def update(self, cur_val, cur_count=1):
107 | self.cur_val = cur_val
108 | self.sum += cur_val
109 | self.count += cur_count
110 | self.avg = self.sum / self.count
111 |
112 | class AverageCategoryMeter(object):
113 | """Computes and stores the average and current value"""
114 |
115 | def __init__(self,num_class):
116 | self.num_class = num_class
117 | self.reset()
118 |
119 | def reset(self):
120 | self.cur_val = np.zeros(self.num_class)
121 | self.sum = np.zeros(self.num_class)
122 |
123 |
124 | def update(self, cur_val):
125 | self.cur_val = cur_val
126 | self.sum += cur_val
127 |
128 | class MultiAverageMeter(object):
129 | """Computes and stores the average and current value"""
130 |
131 | def __init__(self,num_class):
132 | self.num_class = num_class
133 | self.reset()
134 |
135 | def reset(self):
136 | self.cur_val = np.zeros(self.num_class)
137 | self.sum = np.zeros(self.num_class)
138 |
139 |
140 | def update(self, cur_val,id):
141 | self.cur_val[id] = cur_val
142 | self.sum[id] += cur_val
143 |
144 |
145 | if __name__ =="__main__":
146 | a = MultiAverageMeter(100)
147 | a.update(10,6)
148 | print(a.cur_val)
149 | print(a.sum)
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | .vim-template*
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Jiayuan Mao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/README.md:
--------------------------------------------------------------------------------
1 | # PreciseRoIPooling
2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation).
3 |
4 | **Acquisition of Localization Confidence for Accurate Object Detection**
5 |
6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.)
7 |
8 | https://arxiv.org/abs/1807.11590
9 |
10 | **Causion**: To install the library, please `git clone` the repository instead of downloading the zip file, since source files inside the folder `./pytorch/prroi_pool/src/` and `tensorflow/prroi_pool/src/kernels/external` are symbol-linked. Downloading the repository as a zip file will break these symbolic links. Also, there are reports indicating that Windows git versions also breaks the symbol links. See [issues/58](https://github.com/vacancy/PreciseRoIPooling/issues/58).
11 |
12 | ## Brief
13 |
14 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is:
15 |
16 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates.
17 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous.
18 |
19 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper.
20 |
21 |
22 |
23 | ## Implementation
24 |
25 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome.
26 |
27 | ## Usage (PyTorch 1.0)
28 |
29 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented).
30 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do:
31 |
32 | ```
33 | from prroi_pool import PrRoIPool2D
34 |
35 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
36 | roi_features = avg_pool(features, rois)
37 |
38 | # for those who want to use the "functional"
39 |
40 | from prroi_pool.functional import prroi_pool2d
41 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
42 | ```
43 |
44 | ## Usage (PyTorch 0.4)
45 |
46 | **!!! Please first checkout to the branch pytorch0.4.**
47 |
48 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented).
49 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:
50 |
51 | ```
52 | from prroi_pool import PrRoIPool2D
53 |
54 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
55 | roi_features = avg_pool(features, rois)
56 |
57 | # for those who want to use the "functional"
58 |
59 | from prroi_pool.functional import prroi_pool2d
60 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
61 | ```
62 |
63 | Here,
64 |
65 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
66 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
67 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.
68 |
69 | ## Usage (TensorFlow)
70 | In the directory `tensorflow/`, we provide a TensorFlow-based implementation of PrRoI Pooling. It tested TensorFlow 2.2 and only supports CUDA (CPU mode is not implemented).
71 | To compile the essential components, follow the instruction below
72 |
73 | To use the PrRoI Pooling module, to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:
74 | ### Requirements
75 | * CUDA compiler(NVCC)
76 | * Tensorflow-GPU 2.x
77 | * CMake
78 | * Microsoft Visual C++ Build Tools(For Windows Users)
79 | ### Step-by-step instructions
80 | #### For Ubuntu Users
81 | ##### CMake Configuration
82 |
83 | ```
84 | mkdir tensorflow/prroi_pool/build
85 | cd tensorflow/prroi_pool/build
86 | cmake -DCMAKE_BUILD_TYPE="Release" ..
87 | ```
88 | #### Build & Test PrRoI Pooling module
89 | ```
90 | make
91 | ```
92 | #### For Windows Users
93 |
94 | ##### MSVC Configuration
95 | ```
96 | ${MSVC_INSTALL_PATH}\VC\Auxiliary\Build\vcvars64.bat
97 | ```
98 | ##### CMake Configuration
99 |
100 | ```
101 | mkdir tensorflow/prroi_pool/build
102 | cd tensorflow/prroi_pool/build
103 | cmake -DCMAKE_BUILD_TYPE="Release" -G "NMake Makefiles" ..
104 | ```
105 | ##### Build & Test Custom ops
106 | ```
107 | nmake BUILD=release
108 | ```
109 |
110 | To use the module in your code, simply do:
111 | ```
112 | from prroi_pool import PreciseRoIPooling
113 |
114 | avg_pool = PreciseRoIPooling(window_height, window_width, spatial_scale, data_format)
115 | roi_features = avg_pool([features, rois])
116 |
117 | ```
118 |
119 | Here,
120 |
121 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
122 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
123 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.
124 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/_assets/prroi_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/_assets/prroi_visualization.png
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /_prroi_pooling
3 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | from .prroi_pool import *
13 |
14 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/functional.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : functional.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch
13 | import torch.autograd as ag
14 |
15 | __all__ = ['prroi_pool2d']
16 |
17 |
18 | _prroi_pooling = None
19 |
20 |
21 | def _import_prroi_pooling():
22 | global _prroi_pooling
23 |
24 | if _prroi_pooling is None:
25 | try:
26 | from os.path import join as pjoin, dirname
27 | from torch.utils.cpp_extension import load as load_extension
28 | root_dir = pjoin(dirname(__file__), 'src')
29 |
30 | _prroi_pooling = load_extension(
31 | '_prroi_pooling',
32 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')],
33 | verbose=True
34 | )
35 | except ImportError:
36 | raise ImportError('Can not compile Precise RoI Pooling library.')
37 |
38 | return _prroi_pooling
39 |
40 |
41 | class PrRoIPool2DFunction(ag.Function):
42 | @staticmethod
43 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale):
44 | _prroi_pooling = _import_prroi_pooling()
45 |
46 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \
47 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type())
48 |
49 | pooled_height = int(pooled_height)
50 | pooled_width = int(pooled_width)
51 | spatial_scale = float(spatial_scale)
52 |
53 | features = features.contiguous()
54 | rois = rois.contiguous()
55 | params = (pooled_height, pooled_width, spatial_scale)
56 |
57 | if features.is_cuda:
58 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params)
59 | ctx.params = params
60 | # everything here is contiguous.
61 | ctx.save_for_backward(features, rois, output)
62 | else:
63 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')
64 |
65 | return output
66 |
67 | @staticmethod
68 | def backward(ctx, grad_output):
69 | _prroi_pooling = _import_prroi_pooling()
70 |
71 | features, rois, output = ctx.saved_tensors
72 | grad_input = grad_coor = None
73 |
74 | if features.requires_grad:
75 | grad_output = grad_output.contiguous()
76 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params)
77 | if rois.requires_grad:
78 | grad_output = grad_output.contiguous()
79 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params)
80 |
81 | return grad_input, grad_coor, None, None, None
82 |
83 |
84 | prroi_pool2d = PrRoIPool2DFunction.apply
85 |
86 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : prroi_pool.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch.nn as nn
13 |
14 | from .functional import prroi_pool2d
15 |
16 | __all__ = ['PrRoIPool2D']
17 |
18 |
19 | class PrRoIPool2D(nn.Module):
20 | def __init__(self, pooled_height, pooled_width, spatial_scale):
21 | super().__init__()
22 |
23 | self.pooled_height = int(pooled_height)
24 | self.pooled_width = int(pooled_width)
25 | self.spatial_scale = float(spatial_scale)
26 |
27 | def forward(self, features, rois):
28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale)
29 |
30 | def extra_repr(self):
31 | return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__)
32 |
33 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.c
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | #include
12 | #include
13 |
14 | #include
15 | #include
16 |
17 | #include
18 |
19 | #include "prroi_pooling_gpu_impl.cuh"
20 |
21 |
22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) {
23 | int nr_rois = rois.size(0);
24 | int nr_channels = features.size(1);
25 | int height = features.size(2);
26 | int width = features.size(3);
27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options());
29 |
30 | if (output.numel() == 0) {
31 | THCudaCheck(cudaGetLastError());
32 | return output;
33 | }
34 |
35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
36 | PrRoIPoolingForwardGpu(
37 | stream, features.data(), rois.data(), output.data(),
38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
39 | top_count
40 | );
41 |
42 | THCudaCheck(cudaGetLastError());
43 | return output;
44 | }
45 |
46 | at::Tensor prroi_pooling_backward_cuda(
47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
48 | int pooled_height, int pooled_width, float spatial_scale) {
49 |
50 | auto features_diff = at::zeros_like(features);
51 |
52 | int nr_rois = rois.size(0);
53 | int batch_size = features.size(0);
54 | int nr_channels = features.size(1);
55 | int height = features.size(2);
56 | int width = features.size(3);
57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
58 | int bottom_count = batch_size * nr_channels * height * width;
59 |
60 | if (output.numel() == 0) {
61 | THCudaCheck(cudaGetLastError());
62 | return features_diff;
63 | }
64 |
65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
66 | PrRoIPoolingBackwardGpu(
67 | stream,
68 | features.data(), rois.data(), output.data(), output_diff.data(),
69 | features_diff.data(),
70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
71 | top_count, bottom_count
72 | );
73 |
74 | THCudaCheck(cudaGetLastError());
75 | return features_diff;
76 | }
77 |
78 | at::Tensor prroi_pooling_coor_backward_cuda(
79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
80 | int pooled_height, int pooled_width, float spatial_scale) {
81 |
82 | auto coor_diff = at::zeros_like(rois);
83 |
84 | int nr_rois = rois.size(0);
85 | int nr_channels = features.size(1);
86 | int height = features.size(2);
87 | int width = features.size(3);
88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
89 | int bottom_count = nr_rois * 5;
90 |
91 | if (output.numel() == 0) {
92 | THCudaCheck(cudaGetLastError());
93 | return coor_diff;
94 | }
95 |
96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
97 | PrRoIPoolingCoorBackwardGpu(
98 | stream,
99 | features.data(), rois.data(), output.data(), output_diff.data(),
100 | coor_diff.data(),
101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
102 | top_count, bottom_count
103 | );
104 |
105 | THCudaCheck(cudaGetLastError());
106 | return coor_diff;
107 | }
108 |
109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward");
111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward");
112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor");
113 | }
114 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.h
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale);
12 |
13 | int prroi_pooling_backward_cuda(
14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
15 | int pooled_height, int pooled_width, float spatial_scale
16 | );
17 |
18 | int prroi_pooling_coor_backward_cuda(
19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
20 | int pooled_height, int pooled_width, float spatial_scal
21 | );
22 |
23 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : test_prroi_pooling2d.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 18/02/2018
6 | #
7 | # This file is part of Jacinle.
8 |
9 | import unittest
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 |
15 | from jactorch.utils.unittest import TorchTestCase
16 |
17 | from prroi_pool import PrRoIPool2D
18 |
19 |
20 | class TestPrRoIPool2D(TorchTestCase):
21 | def test_forward(self):
22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5)
23 | features = torch.rand((4, 16, 24, 32)).cuda()
24 | rois = torch.tensor([
25 | [0, 0, 0, 14, 14],
26 | [1, 14, 14, 28, 28],
27 | ]).float().cuda()
28 |
29 | out = pool(features, rois)
30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1)
31 |
32 | self.assertTensorClose(out, torch.stack((
33 | out_gold[0, :, :7, :7],
34 | out_gold[1, :, 7:14, 7:14],
35 | ), dim=0))
36 |
37 | def test_backward_shapeonly(self):
38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5)
39 |
40 | features = torch.rand((4, 2, 24, 32)).cuda()
41 | rois = torch.tensor([
42 | [0, 0, 0, 4, 4],
43 | [1, 14, 14, 18, 18],
44 | ]).float().cuda()
45 | features.requires_grad = rois.requires_grad = True
46 | out = pool(features, rois)
47 |
48 | loss = out.sum()
49 | loss.backward()
50 |
51 | self.assertTupleEqual(features.size(), features.grad.size())
52 | self.assertTupleEqual(rois.size(), rois.grad.size())
53 |
54 |
55 | if __name__ == '__main__':
56 | unittest.main()
57 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # File : CMakeLists.txt
2 | # Author : Kanghee Lee
3 | # Email : lerohiso@gmail.com
4 | # Date : 09/25/2020
5 | #
6 | # This file is part of PreciseRoIPooling.
7 | # Distributed under terms of the MIT license.
8 |
9 | CMAKE_MINIMUM_REQUIRED(VERSION 3.17 FATAL_ERROR)
10 |
11 | PROJECT(precise_roi_pooling)
12 | FIND_PACKAGE(CUDA)
13 | FIND_PACKAGE(PythonInterp 3)
14 |
15 | if (MSVC)
16 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.lib)
17 | elseif (UNIX)
18 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.so)
19 | endif()
20 |
21 | if (NOT EXISTS ${GPU_LIB})
22 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build_cuda.py" RESULTS_VARIABLE RET_CODE)
23 | if (NOT "${RET_CODE}" STREQUAL "0")
24 | MESSAGE(FATAL_ERROR "Fail to Complie CUDA codes")
25 | endif ()
26 | endif ()
27 |
28 | if (NOT DEFINED TF_PATH)
29 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_include(), end='', flush=True)" OUTPUT_VARIABLE TF_INC)
30 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='', flush=True)" OUTPUT_VARIABLE TF_LIB)
31 | MESSAGE(STATUS "TF_INC: " ${TF_INC})
32 | MESSAGE(STATUS "TF_LIB: " ${TF_LIB})
33 | SET(TF_PATH 1)
34 | endif ()
35 |
36 | if (NOT DEFINED TF_FLAGS)
37 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_CFLAGS)
38 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_LFLAGS)
39 | MESSAGE(STATUS "TF_CFLAGS: " ${TF_CFLAGS})
40 | MESSAGE(STATUS "TF_LFLAGS: " ${TF_LFLAGS})
41 | SET(TF_FLAGS 1)
42 | endif ()
43 |
44 | INCLUDE_DIRECTORIES(${TF_INC})
45 | LINK_DIRECTORIES(${TF_LIB})
46 | INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
47 |
48 | LIST(APPEND CMAKE_CXX_FLAGS "${TF_CFLAGS} ${TF_LFLAGS} -O2 -D GOOGLE_CUDA=1 -std=c++11 -shared")
49 | if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
50 | LIST(APPEND CMAKE_CXX_FLAGS " -lcudart -DNOMINMAX")
51 | endif ()
52 |
53 | MESSAGE(STATUS "CMAKE_CXX_COMPILER_ID: " ${CMAKE_CXX_COMPILER_ID})
54 | MESSAGE(STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
55 |
56 | ADD_LIBRARY(precise_roi_pooling SHARED src/kernels/precise_roi_pooling.h
57 | src/kernels/precise_roi_pooling_kernels.cc
58 | src/ops/precise_roi_pooling_ops.cc)
59 | TARGET_COMPILE_FEATURES(precise_roi_pooling PUBLIC cxx_std_11)
60 | SET_TARGET_PROPERTIES(precise_roi_pooling PROPERTIES
61 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/"
62 | LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/"
63 | )
64 |
65 | ADD_LIBRARY(precise_roi_pooling_gpu SHARED IMPORTED)
66 | if (MSVC)
67 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_IMPLIB ${GPU_LIB})
68 | elseif (UNIX)
69 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_LOCATION ${GPU_LIB})
70 | endif()
71 |
72 | ADD_LIBRARY(tensorflow_internal SHARED IMPORTED)
73 | if (MSVC)
74 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES
75 | IMPORTED_IMPLIB ${TF_LIB}/python/_pywrap_tensorflow_internal.lib)
76 | elseif (UNIX)
77 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES
78 | IMPORTED_LOCATION ${TF_LIB}/python/_pywrap_tensorflow_internal.so)
79 | endif()
80 |
81 | TARGET_LINK_LIBRARIES(precise_roi_pooling tensorflow_internal
82 | precise_roi_pooling_gpu
83 | ${CUDA_LIBRARIES})
84 |
85 | ADD_CUSTOM_TARGET(precise_roi_pooling_test ALL
86 | COMMAND ${CMAKE_COMMAND} -E env
87 | "PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../"
88 | ${PYTHON_EXECUTABLE} tests/precise_roi_pooling_ops_test.py
89 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../")
90 |
91 | ADD_DEPENDENCIES(precise_roi_pooling_test precise_roi_pooling)
92 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Kanghee Lee
5 | # Email : lerohiso@gmail.com
6 | # Date : 09/25/2020
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 |
11 | from .precise_roi_pooling_ops import *
12 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/precise_roi_pooling_ops.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : precise_roi_pooling_ops.py
4 | # Author : Kanghee Lee
5 | # Email : lerohiso@gmail.com
6 | # Date : 09/25/2020
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 |
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 |
15 | import os
16 | import platform
17 | import tensorflow as tf
18 |
19 | __all__ = ['PreciseRoIPooling']
20 |
21 | os_type = platform.system()
22 | if os_type == 'Windows':
23 | MODULE_NAME = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'module/precise_roi_pooling.dll')
24 | elif os_type == 'Linux':
25 | MODULE_NAME = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'module/libprecise_roi_pooling.so')
26 |
27 | _precise_roi_pooling_ops = tf.load_op_library(MODULE_NAME)
28 |
29 | def _precise_roi_pooling(features,
30 | rois,
31 | pooled_height,
32 | pooled_width,
33 | spatial_scale,
34 | data_format,
35 | name=None):
36 | with tf.name_scope(name or "precise_roi_pooling"):
37 | op_call = _precise_roi_pooling_ops.precise_ro_i_pooling
38 |
39 | if data_format == 'channels_last':
40 | inputs = tf.transpose(features, [0, 3, 1, 2])
41 | elif data_format == "channels_first":
42 | inputs = features
43 | else:
44 | raise ValueError('`data_format` must be either `channels_last` or `channels_first`')
45 |
46 | outputs = op_call(inputs,
47 | rois,
48 | pooled_height=pooled_height,
49 | pooled_width=pooled_width,
50 | spatial_scale=spatial_scale,
51 | data_format='NCHW')
52 |
53 | if data_format == 'channels_last':
54 | return tf.transpose(outputs, [0, 2, 3, 1])
55 |
56 | return outputs
57 |
58 | class PreciseRoIPooling(tf.keras.layers.Layer):
59 | def __init__(self,
60 | pooled_height: int,
61 | pooled_width: int,
62 | spatial_scale: float,
63 | data_format: str = 'channels_first',
64 | **kwargs):
65 | self.pooled_height = pooled_height
66 | self.pooled_width = pooled_width
67 | self.spatial_scale = spatial_scale
68 |
69 | if data_format != 'channels_last' and data_format != 'channels_first':
70 | raise ValueError('`data_format` must be either `channels_last` or'
71 | '`channels_first`, instead got %s' % data_format)
72 |
73 | self.data_format = data_format
74 |
75 | super().__init__(**kwargs)
76 |
77 | def build(self, input_shape):
78 | if not isinstance(input_shape, list):
79 | raise ValueError('Input must be a list of two Tensors to process')
80 | super().build(input_shape)
81 |
82 | def call(self, inputs):
83 | if not isinstance(inputs, list):
84 | raise ValueError('Input must be a list of two Tensors to process')
85 |
86 | features = tf.convert_to_tensor(inputs[0])
87 | rois = tf.convert_to_tensor(inputs[1])
88 |
89 | return _precise_roi_pooling(features,
90 | rois,
91 | pooled_height=self.pooled_height,
92 | pooled_width=self.pooled_width,
93 | spatial_scale=self.spatial_scale,
94 | data_format=self.data_format)
95 |
96 | def compute_output_shape(self, input_shape):
97 | assert isinstance(input_shape, list)
98 |
99 | # Input validation
100 | if len(input_shape) != 2:
101 | raise ValueError('Input must be a list of two shapes')
102 |
103 | number_of_rois = input_shape[1][0]
104 |
105 | if self.data_format == 'channels_first':
106 | number_of_channels = input_shape[0][1]
107 | return [(number_of_rois, number_of_channels, self.pooled_height, self.pooled_width)]
108 |
109 | elif self.data_format == 'channels_last':
110 | number_of_channels = input_shape[0][3]
111 | return [(number_of_rois, self.pooled_height, self.pooled_width, number_of_channels)]
112 | else:
113 | raise ValueError(
114 | '`data_format` must be either `channels_last` or `channels_first`'
115 | )
116 |
117 | def get_config(self):
118 | config = {
119 | 'pooled_height': self.pooled_height,
120 | 'pooled_width': self.pooled_width,
121 | 'spatial_scale': self.spatial_scale,
122 | 'data_format': self.data_format,
123 | }
124 |
125 | base_config = super().get_config()
126 | return {**base_config, **config}
127 |
128 | @tf.RegisterGradient('PreciseRoIPooling')
129 | def _precise_roi_pooling_grad(op, grad_output):
130 | pooled_height = op.get_attr('pooled_height')
131 | pooled_width = op.get_attr('pooled_width')
132 | spatial_scale = op.get_attr('spatial_scale')
133 | data_format = op.get_attr('data_format')
134 |
135 | features = tf.convert_to_tensor(op.inputs[0], name='features')
136 | rois = tf.convert_to_tensor(op.inputs[1], name='rois')
137 | pooled_features = tf.convert_to_tensor(op.outputs[0], name='pooled_features')
138 | grad_output = tf.convert_to_tensor(grad_output, name='grad_output')
139 |
140 | op_call = _precise_roi_pooling_ops.precise_ro_i_pooling_grad
141 | grads = op_call(features,
142 | rois,
143 | pooled_features,
144 | grad_output,
145 | pooled_height=pooled_height,
146 | pooled_width=pooled_width,
147 | spatial_scale=spatial_scale,
148 | data_format=data_format)
149 |
150 | features_gradient = tf.convert_to_tensor(grads[0], name='features_gradient')
151 | rois_gradient = tf.convert_to_tensor(grads[1], name='rois_gradient')
152 | return [features_gradient, rois_gradient]
153 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/build_cuda.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : build_cuda.py
4 | # Author : Kanghee Lee
5 | # Email : lerohiso@gmail.com
6 | # Date : 09/25/2020
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 |
11 | import os
12 | import platform
13 | import shutil
14 | import subprocess
15 |
16 | import tensorflow as tf
17 |
18 | CUDA_SRCS = []
19 | CUDA_OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build')
20 |
21 | if not os.path.isdir(CUDA_OUTPUT_DIR):
22 | os.makedirs(CUDA_OUTPUT_DIR)
23 |
24 | for file in os.listdir(os.path.dirname(os.path.realpath(__file__))):
25 | if file.endswith('.cu.cc'):
26 | CUDA_SRCS.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), file))
27 |
28 | CUDA_COMPILER = shutil.which('nvcc')
29 | if CUDA_COMPILER == None:
30 | raise ValueError('CUDA Compiler Not Found')
31 |
32 | TF_CFLAGS = ' '.join(tf.sysconfig.get_compile_flags())
33 | TF_LFLAGS = ' '.join(tf.sysconfig.get_link_flags())
34 |
35 | CUDA_NVCC_FLAGS = TF_CFLAGS + ' ' + TF_LFLAGS + ' -D GOOGLE_CUDA=1 -x cu --expt-relaxed-constexpr'
36 |
37 | os_type = platform.system()
38 | if os_type == 'Windows':
39 | CUDA_NVCC_FLAGS += ' -Xcompiler -MD -cudart=shared -D_WINSOCKAPI_'
40 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.lib'
41 | elif os_type == 'Linux':
42 | CUDA_NVCC_FLAGS += ' -Xcompiler -fPIC -DNDEBUG'
43 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.so'
44 |
45 | COMMAND = CUDA_COMPILER
46 | COMMAND += ' -c -o ' + os.path.join(CUDA_OUTPUT_DIR, CUDA_OUTPUT_FILENAME)
47 | COMMAND += ' ' + ' '.join(CUDA_SRCS)
48 | COMMAND += ' ' + CUDA_NVCC_FLAGS
49 |
50 | process = subprocess.Popen(COMMAND, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
51 | process_output = process.communicate()[0]
52 | print(process_output.decode())
53 |
54 | if process.returncode is not 0:
55 | raise ValueError('Fail to CUDA Compile')
56 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/external/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling.h:
--------------------------------------------------------------------------------
1 | /*
2 | * File : precise_roi_pooling.h
3 | * Author : Kanghee Lee
4 | * Email : lerohiso@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | */
8 |
9 | #ifndef KERNEL_PRECISE_ROI_POOLING_H_
10 | #define KERNEL_PRECISE_ROI_POOLING_H_
11 |
12 | #include "tensorflow/core/framework/op_kernel.h"
13 | #include "tensorflow/core/util/tensor_format.h"
14 |
15 | namespace tensorflow {
16 |
17 | namespace functor {
18 |
19 | template
20 | struct PreciseRoIPoolingFunctor {
21 | Status operator()(OpKernelContext* context,
22 | const Tensor& features,
23 | const Tensor& rois,
24 | Tensor* pooled_features,
25 | int pooled_height,
26 | int pooled_width,
27 | float spatial_scale,
28 | TensorFormat data_format);
29 | };
30 |
31 | template
32 | struct PreciseRoIPoolingGradFunctor {
33 | Status operator()(OpKernelContext* context,
34 | const Tensor& features,
35 | const Tensor& rois,
36 | const Tensor& pooled_features,
37 | const Tensor& pooled_features_diff,
38 | Tensor* features_gradient,
39 | Tensor* rois_gradient,
40 | int pooled_height,
41 | int pooled_width,
42 | float spatial_scale,
43 | TensorFormat data_format);
44 | };
45 |
46 | } // namespace functor
47 |
48 | } // namespace tensorflow
49 |
50 | #endif // KERNEL_PRECISE_ROI_POOLING_H_
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling_kernels.cu.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * File : precise_roi_pooling_kernels.cu.cc
3 | * Author : Kanghee Lee
4 | * Email : lerohiso@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | */
8 |
9 | #if GOOGLE_CUDA
10 | #define EIGEN_USE_GPU
11 |
12 | #include "precise_roi_pooling.h"
13 | #include "external/prroi_pooling_gpu_impl.cu"
14 | #include "tensorflow/core/util/gpu_kernel_helper.h"
15 |
16 | namespace tensorflow {
17 | namespace functor {
18 |
19 | typedef Eigen::GpuDevice GPUDevice;
20 |
21 | // Define the GPU implementation that launches the CUDA kernel.
22 | template
23 | struct PreciseRoIPoolingFunctor {
24 | Status operator()(OpKernelContext *context,
25 | const Tensor& features,
26 | const Tensor& rois,
27 | Tensor* pooled_features,
28 | int pooled_height,
29 | int pooled_width,
30 | float spatial_scale,
31 | TensorFormat data_format) {
32 | const int32 batch_size = GetTensorDim(features, data_format, 'N');
33 | const int32 number_of_channels = GetTensorDim(features, data_format, 'C');
34 | const int32 features_height = GetTensorDim(features, data_format, 'H');
35 | const int32 features_width = GetTensorDim(features, data_format, 'W');
36 |
37 | const int32 number_of_rois = rois.dim_size(0);
38 |
39 | const int top_count = number_of_rois * number_of_channels * pooled_height * pooled_width;
40 | const GPUDevice &d = context->eigen_gpu_device();
41 |
42 | PrRoIPoolingForwardGpu(d.stream(),
43 | features.flat().data(),
44 | rois.flat().data(),
45 | pooled_features->flat().data(),
46 | number_of_channels,
47 | features_height,
48 | features_width,
49 | pooled_height,
50 | pooled_width,
51 | spatial_scale,
52 | top_count);
53 |
54 | return Status::OK();
55 | }
56 | };
57 |
58 | template
59 | struct PreciseRoIPoolingGradFunctor {
60 | Status operator()(OpKernelContext* context,
61 | const Tensor& features,
62 | const Tensor& rois,
63 | const Tensor& pooled_features,
64 | const Tensor& pooled_features_diff,
65 | Tensor* features_gradient,
66 | Tensor* rois_gradient,
67 | int pooled_height,
68 | int pooled_width,
69 | float spatial_scale,
70 | TensorFormat data_format) {
71 | const int32 batch_size = GetTensorDim(features, data_format, 'N');
72 | const int32 number_of_channels = GetTensorDim(features, data_format, 'C');
73 | const int32 features_height = GetTensorDim(features, data_format, 'H');
74 | const int32 features_width = GetTensorDim(features, data_format, 'W');
75 |
76 | const int32 number_of_rois = rois.dim_size(0);
77 |
78 | const int top_count = number_of_rois * number_of_channels * pooled_height * pooled_width;
79 | const GPUDevice &d = context->eigen_gpu_device();
80 |
81 | const int features_gradient_size = batch_size * number_of_channels * features_height * features_width;
82 | const int rois_gradient_size = number_of_rois * 5;
83 |
84 | PrRoIPoolingBackwardGpu(d.stream(),
85 | features.flat().data(),
86 | rois.flat().data(),
87 | pooled_features.flat().data(),
88 | pooled_features_diff.flat().data(),
89 | features_gradient->flat().data(),
90 | number_of_channels,
91 | features_height,
92 | features_width,
93 | pooled_height,
94 | pooled_width,
95 | spatial_scale,
96 | top_count,
97 | features_gradient_size);
98 |
99 | PrRoIPoolingCoorBackwardGpu(d.stream(),
100 | features.flat().data(),
101 | rois.flat().data(),
102 | pooled_features.flat().data(),
103 | pooled_features_diff.flat().data(),
104 | rois_gradient->flat().data(),
105 | number_of_channels,
106 | features_height,
107 | features_width,
108 | pooled_height,
109 | pooled_width,
110 | spatial_scale,
111 | top_count,
112 | rois_gradient_size);
113 |
114 | return Status::OK();
115 | }
116 | };
117 |
118 | // Explicitly instantiate functors for the types of OpKernels registered.
119 | template struct PreciseRoIPoolingFunctor;
120 | template struct PreciseRoIPoolingGradFunctor;
121 |
122 | } // end namespace functor
123 |
124 | } // end namespace tensorflow
125 |
126 | #endif // GOOGLE_CUDA
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/prroi_pool/src/ops/precise_roi_pooling_ops.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * File : precise_roi_pooling_ops.cc
3 | * Author : Kanghee Lee
4 | * Email : lerohiso@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | */
8 |
9 | #include "tensorflow/core/framework/op.h"
10 | #include "tensorflow/core/framework/shape_inference.h"
11 |
12 | namespace tensorflow {
13 |
14 | using ::tensorflow::shape_inference::InferenceContext;
15 | using ::tensorflow::shape_inference::ShapeHandle;
16 |
17 | REGISTER_OP("PreciseRoIPooling")
18 | .Input("features: T")
19 | .Input("rois: T")
20 | .Output("pooled_features: T")
21 | .Attr("pooled_height: int")
22 | .Attr("pooled_width: int")
23 | .Attr("spatial_scale: float")
24 | .Attr("data_format: {'NCHW'} = 'NCHW'")
25 | .Attr("T: realnumbertype")
26 | .SetShapeFn([](InferenceContext* c) {
27 | ShapeHandle features, rois;
28 |
29 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features));
30 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois));
31 |
32 | // get input shapes
33 | int32 number_of_rois, number_of_channels;
34 | number_of_rois = c->Value(c->Dim(rois, 0));
35 | string data_format;
36 | Status s = c->GetAttr("data_format", &data_format);
37 | if (s.ok() && data_format == "NCHW") {
38 | number_of_channels = c->Value(c->Dim(features, 1));
39 | }
40 | else {
41 | number_of_channels = c->Value(c->Dim(features, 3));
42 | }
43 |
44 | int32 pooled_height;
45 | int32 pooled_width;
46 |
47 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_height", &pooled_height));
48 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_width", &pooled_width));
49 |
50 | // Note, the output is always NCHW (even when input is NHWC)
51 | c->set_output(0, c->MakeShape({number_of_rois, number_of_channels, pooled_height, pooled_width}));
52 | return Status::OK();
53 | })
54 | .Doc(R"doc(PreciseRoIPooling op.)doc");
55 |
56 | REGISTER_OP("PreciseRoIPoolingGrad")
57 | .Input("features: T")
58 | .Input("rois: T")
59 | .Input("pooled_features: T")
60 | .Input("pooled_features_diff: T")
61 | .Output("features_gradient: T")
62 | .Output("rois_gradient: T")
63 | .Attr("pooled_height: int")
64 | .Attr("pooled_width: int")
65 | .Attr("spatial_scale: float")
66 | .Attr("data_format: {'NCHW'} = 'NCHW'")
67 | .Attr("T: realnumbertype")
68 | .SetShapeFn([](InferenceContext* c) {
69 | ShapeHandle features, rois;
70 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features));
71 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois));
72 | c->set_output(0, features);
73 | c->set_output(1, rois);
74 | return Status::OK();
75 | })
76 | .Doc(R"doc(PreciseRoIPoolingGrad op.)doc");
77 |
78 | } // namespace tensorflow
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/precise_roi_pooling_ops_test.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : precise_roi_pooling_ops_test.py
4 | # Author : Kanghee Lee
5 | # Email : lerohiso@gmail.com
6 | # Date : 09/25/2020
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 |
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 |
15 | import os
16 | import numpy as np
17 | import tensorflow as tf
18 |
19 | from tensorflow.python.framework import ops
20 | from tensorflow.python.platform import test
21 | from tensorflow.python.framework import test_util
22 | from prroi_pool import PreciseRoIPooling
23 |
24 |
25 | class PreciseRoIPoolingTest(test.TestCase):
26 | @test_util.run_gpu_only
27 | def test_forward(self):
28 | with self.test_session():
29 | with ops.device("/gpu:0"):
30 | pooled_width = 7
31 | pooled_height = 7
32 | spatial_scale = 0.5
33 | data_format = 'channels_first'
34 | pool = PreciseRoIPooling(pooled_height,
35 | pooled_width,
36 | spatial_scale=spatial_scale,
37 | data_format=data_format)
38 | features = tf.random.uniform([4, 16, 24, 32], dtype=tf.float32)
39 | rois = tf.constant([[0, 0, 0, 14, 14], [1, 14, 14, 28, 28]], dtype=tf.float32)
40 | operation_outputs = pool([features, rois])
41 | real_outputs = tf.keras.layers.AveragePooling2D(data_format=data_format, strides=1)(features)
42 | real_outputs = tf.stack([real_outputs[0, :, :7, :7], real_outputs[1, :, 7:14, 7:14]], axis=0)
43 | self.assertAllClose(operation_outputs, real_outputs)
44 |
45 | @test_util.run_gpu_only
46 | def test_backward(self):
47 | with self.test_session():
48 | with ops.device("/gpu:0"):
49 | pooled_width = 2
50 | pooled_height = 2
51 | spatial_scale = 0.5
52 | data_format = 'channels_first'
53 | base_directory = os.path.dirname(os.path.realpath(__file__))
54 |
55 | # binaries from pytorch prroi_pool module
56 | features = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/features.npy'))
57 | rois = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/rois.npy'))
58 |
59 | real_outputs = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/real_outputs.npy'))
60 | real_gradients0 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients0.npy'))
61 | real_gradients1 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients1.npy'))
62 | features = tf.convert_to_tensor(features)
63 | rois = tf.convert_to_tensor(rois)
64 | with tf.GradientTape() as tape:
65 | tape.watch([features, rois])
66 | outputs = PreciseRoIPooling(pooled_height=pooled_height,
67 | pooled_width=pooled_width,
68 | spatial_scale=spatial_scale,
69 | data_format=data_format)([features, rois])
70 | loss = tf.reduce_sum(outputs)
71 |
72 | gradients = tape.gradient(loss, [features, rois])
73 |
74 | self.assertAllClose(outputs, real_outputs)
75 | self.assertAllClose(gradients[0], real_gradients0)
76 | self.assertAllClose(gradients[1], real_gradients1)
77 |
78 |
79 | if __name__ == '__main__':
80 | test.main()
81 |
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy
--------------------------------------------------------------------------------
/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy
--------------------------------------------------------------------------------
/model/VGG/VGG16_FPN.py:
--------------------------------------------------------------------------------
1 | from torchvision import models
2 | import sys
3 | import torch.nn.functional as F
4 | from misc.utils import *
5 | from misc.layer import *
6 | from torchsummary import summary
7 | from model.necks import FPN
8 | from .conv import ResBlock
9 |
10 | BatchNorm2d = nn.BatchNorm2d
11 | BN_MOMENTUM = 0.01
12 |
13 | class VGG16_FPN(nn.Module):
14 | def __init__(self, pretrained=True):
15 | super(VGG16_FPN, self).__init__()
16 |
17 | vgg = models.vgg16_bn(pretrained=pretrained)
18 | features = list(vgg.features.children())
19 |
20 | self.layer1 = nn.Sequential(*features[0:23])
21 | self.layer2 = nn.Sequential(*features[23:33])
22 | self.layer3 = nn.Sequential(*features[33:43])
23 |
24 | in_channels = [256,512,512]
25 | self.neck = FPN(in_channels,192,len(in_channels))
26 | self.neck2f = FPN(in_channels, 128, len(in_channels))
27 | self.loc_head = nn.Sequential(
28 | nn.Dropout2d(0.2),
29 | ResBlock(in_dim=576, out_dim=256, dilation=0, norm="bn"),
30 | ResBlock(in_dim=256, out_dim=128, dilation=0, norm="bn"),
31 |
32 | nn.ConvTranspose2d(128, 64, 2, stride=2, padding=0, output_padding=0, bias=False),
33 | nn.BatchNorm2d(64, momentum=BN_MOMENTUM),
34 | nn.ReLU(inplace=True),
35 |
36 | nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
37 | nn.BatchNorm2d(32, momentum=BN_MOMENTUM),
38 | nn.ReLU(inplace=True),
39 |
40 | nn.ConvTranspose2d(32, 16, 2, stride=2, padding=0, output_padding=0, bias=False),
41 | nn.BatchNorm2d(16, momentum=BN_MOMENTUM),
42 | nn.ReLU(inplace=True),
43 |
44 | nn.Conv2d(16, 1, kernel_size=1, stride=1, padding=0),
45 | nn.ReLU(inplace=True)
46 | )
47 | self.feature_head = nn.Sequential(
48 | nn.Dropout2d(0.2),
49 | ResBlock(in_dim=384, out_dim=384, dilation=0, norm="bn"),
50 | ResBlock(in_dim=384, out_dim=256, dilation=0, norm="bn"),
51 |
52 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
53 | BatchNorm2d(256, momentum=BN_MOMENTUM),
54 | nn.ReLU(inplace=True),
55 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
56 | )
57 | def forward(self, x):
58 | f_list = []
59 | x = self.layer1(x)
60 | f_list.append(x)
61 | x2 = self.layer2(x)
62 | f_list.append(x2)
63 | x = self.layer3(x2)
64 | f_list.append(x)
65 |
66 |
67 | f = self.neck(f_list)
68 | f =torch.cat([f[0], F.interpolate(f[1],scale_factor=2,mode='bilinear',align_corners=True),
69 | F.interpolate(f[2],scale_factor=4, mode='bilinear',align_corners=True)], dim=1)
70 |
71 | x = self.loc_head(f)
72 |
73 | f = self.neck2f(f_list)
74 | f =torch.cat([f[0], F.interpolate(f[1],scale_factor=2,mode='bilinear',align_corners=True),
75 | F.interpolate(f[2],scale_factor=4, mode='bilinear',align_corners=True)], dim=1)
76 | feature = self.feature_head(f)
77 | return feature, x
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/model/VGG/conv.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | conv_cfg = {
5 | 'Conv': nn.Conv2d,
6 | # TODO: octave conv
7 | }
8 |
9 |
10 | class BasicDeconv(nn.Module):
11 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, activate=None):
12 | super(BasicDeconv, self).__init__()
13 | bias = False if activate == 'bn' else True
14 | self.tconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=0, bias=not self.use_bn)
15 | if activate == 'bn':
16 | self.bn = nn.BatchNorm2d(out_channels)
17 | elif activate == 'in':
18 | self.bn = nn.InstanceNorm2d(out_channels)
19 | elif activate == None:
20 | self.bn = None
21 | def forward(self, x):
22 | # pdb.set_trace()
23 | x = self.tconv(x)
24 | x = self.bn(x)
25 | return F.relu(x, inplace=True)
26 |
27 |
28 | class BasicConv(nn.Module):
29 | def __init__(self, in_channels, out_channels,kernel_size,stride=1, padding=0,dilation=1, norm=None, relu =False):
30 | super(BasicConv, self).__init__()
31 | self.relu = relu
32 | bias = True if norm is None else False
33 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,stride=stride,
34 | padding=padding,dilation=dilation, bias=bias)
35 | if norm == 'bn':
36 | self.norm = nn.BatchNorm2d(out_channels,eps=1e-05, momentum=0.01)
37 | elif norm == 'in':
38 | self.norm = nn.InstanceNorm2d(out_channels)
39 | elif norm == None:
40 | self.norm = None
41 |
42 |
43 | def forward(self, x):
44 | x = self.conv(x)
45 | x = self.norm(x) if self.norm is not None else x
46 | x = F.relu(x, inplace=True) if self.relu else x
47 | return x
48 |
49 | class ResBlock(nn.Module):
50 | def __init__(self, in_dim,out_dim, dilation=1, norm="bn"):
51 | super(ResBlock, self).__init__()
52 | padding = dilation+1
53 | model = []
54 | medium_dim = in_dim//4
55 | model.append(BasicConv(in_dim, medium_dim, 1, 1, 0, norm = norm, relu =True))
56 | model.append(BasicConv(medium_dim, medium_dim, 3, 1, padding = padding, dilation=dilation+1, norm=norm, relu =True))
57 | model.append(BasicConv(medium_dim, out_dim, 1, 1, 0, norm=norm, relu =False))
58 | self.model = nn.Sequential(*model)
59 | if in_dim !=out_dim:
60 | self.downsample = BasicConv(in_dim, out_dim, 1, 1, 0, norm=norm, relu =False)
61 | else:
62 | self.downsample =None
63 | self.relu = nn.ReLU(inplace=True)
64 | def forward(self, x):
65 | residual = x
66 | out = self.model(x)
67 | if self.downsample is not None:
68 |
69 | out += self.downsample(residual)
70 | else:
71 | out += residual
72 | out = self.relu(out)
73 | return out
74 | def build_conv_layer(cfg, *args, **kwargs):
75 | """ Build convolution layer
76 |
77 | Args:
78 | cfg (None or dict): cfg should contain:
79 | type (str): identify conv layer type.
80 | layer args: args needed to instantiate a conv layer.
81 |
82 | Returns:
83 | layer (nn.Module): created conv layer
84 | """
85 | if cfg is None:
86 | cfg_ = dict(type='Conv')
87 | else:
88 | assert isinstance(cfg, dict) and 'type' in cfg
89 | cfg_ = cfg.copy()
90 |
91 | layer_type = cfg_.pop('type')
92 | if layer_type not in conv_cfg:
93 | raise KeyError('Unrecognized norm type {}'.format(layer_type))
94 | else:
95 | conv_layer = conv_cfg[layer_type]
96 |
97 | layer = conv_layer(*args, **kwargs, **cfg_)
98 |
99 | return layer
100 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__init__.py
--------------------------------------------------------------------------------
/model/__pycache__/VIC.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/VIC.cpython-37.pyc
--------------------------------------------------------------------------------
/model/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/model/__pycache__/optimal_transport_layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/optimal_transport_layer.cpython-37.pyc
--------------------------------------------------------------------------------
/model/__pycache__/points_from_den.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/points_from_den.cpython-37.pyc
--------------------------------------------------------------------------------
/model/necks/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .fpn import FPN
3 |
4 |
5 | __all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE']
6 |
--------------------------------------------------------------------------------
/model/necks/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/necks/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/model/necks/__pycache__/fpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/necks/__pycache__/fpn.cpython-37.pyc
--------------------------------------------------------------------------------
/model/necks/fpn.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 |
6 | class FPN(nn.Module):
7 | """
8 | Feature Pyramid Network.
9 |
10 | This is an implementation of - Feature Pyramid Networks for Object
11 | Detection (https://arxiv.org/abs/1612.03144)
12 |
13 | Args:
14 | in_channels (List[int]):
15 | number of input channels per scale
16 |
17 | out_channels (int):
18 | number of output channels (used at each scale)
19 |
20 | num_outs (int):
21 | number of output scales
22 |
23 | start_level (int):
24 | index of the first input scale to use as an output scale
25 |
26 | end_level (int, default=-1):
27 | index of the last input scale to use as an output scale
28 |
29 | Example:
30 | >>> import torch
31 | >>> in_channels = [2, 3, 5, 7]
32 | >>> scales = [340, 170, 84, 43]
33 | >>> inputs = [torch.rand(1, c, s, s)
34 | ... for c, s in zip(in_channels, scales)]
35 | >>> self = FPN(in_channels, 11, len(in_channels)).eval()
36 | >>> outputs = self.forward(inputs)
37 | >>> for i in range(len(outputs)):
38 | ... print('outputs[{}].shape = {!r}'.format(i, outputs[i].shape))
39 | outputs[0].shape = torch.Size([1, 11, 340, 340])
40 | outputs[1].shape = torch.Size([1, 11, 170, 170])
41 | outputs[2].shape = torch.Size([1, 11, 84, 84])
42 | outputs[3].shape = torch.Size([1, 11, 43, 43])
43 | """
44 |
45 | def __init__(self,in_channels,out_channels,num_outs,start_level=0,end_level=-1,bn=True):
46 | super(FPN, self).__init__()
47 | assert isinstance(in_channels, list)
48 | self.in_channels = in_channels
49 | self.out_channels = out_channels
50 | self.num_ins = len(in_channels)
51 | self.num_outs = num_outs
52 |
53 | self.fp16_enabled = False
54 |
55 | if end_level == -1:
56 | self.backbone_end_level = self.num_ins
57 | assert num_outs >= self.num_ins - start_level
58 | else:
59 | # if end_level < inputs, no extra level is allowed
60 | self.backbone_end_level = end_level
61 | assert end_level <= len(in_channels)
62 | assert num_outs == end_level - start_level
63 | self.start_level = start_level
64 | self.end_level = end_level
65 |
66 | self.lateral_convs = nn.ModuleList()
67 | self.fpn_convs = nn.ModuleList()
68 |
69 | for i in range(self.start_level, self.backbone_end_level):
70 | l_conv = Conv2d( in_channels[i], out_channels,1,bn=bn, bias=not bn,same_padding=True)
71 |
72 | fpn_conv = Conv2d( out_channels, out_channels,3,bn=bn, bias=not bn,same_padding=True)
73 |
74 | self.lateral_convs.append(l_conv)
75 | self.fpn_convs.append(fpn_conv)
76 |
77 | # add extra conv layers (e.g., RetinaNet)
78 | self.init_weights()
79 | # default init_weights for conv(msra) and norm in ConvModule
80 | def init_weights(self):
81 | for m in self.modules():
82 | if isinstance(m, nn.Conv2d):
83 | nn.init.xavier_uniform_(m.weight)
84 |
85 |
86 | def forward(self, inputs):
87 |
88 | assert len(inputs) == len(self.in_channels)
89 |
90 | # build laterals
91 | laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)]
92 |
93 | # build top-down path
94 | used_backbone_levels = len(laterals)
95 | for i in range(used_backbone_levels - 1, 0, -1):
96 | prev_shape = laterals[i - 1].shape[2:]
97 | laterals[i - 1] += F.interpolate(laterals[i], size=prev_shape, mode='nearest')
98 |
99 | # build outputs
100 | # part 1: from original levels
101 | outs = [ self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) ]
102 |
103 |
104 | return tuple(outs)
105 |
106 |
107 |
108 | class Conv2d(nn.Module):
109 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, NL='relu', same_padding=False, bn=True, bias=True):
110 | super(Conv2d, self).__init__()
111 | padding = int((kernel_size - 1) // 2) if same_padding else 0
112 |
113 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, bias=bias)
114 |
115 | self.bn = nn.BatchNorm2d(out_channels) if bn else None
116 | if NL == 'relu' :
117 | self.relu = nn.ReLU(inplace=False)
118 | elif NL == 'prelu':
119 | self.relu = nn.PReLU()
120 | else:
121 | self.relu = None
122 |
123 | def forward(self, x):
124 | x = self.conv(x)
125 | if self.bn is not None:
126 | x = self.bn(x)
127 | if self.relu is not None:
128 | x = self.relu(x)
129 | return x
--------------------------------------------------------------------------------
/model/optimal_transport_layer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | class Optimal_Transport_Layer(nn.Module):
5 | def __init__(self, config):
6 | super(Optimal_Transport_Layer, self).__init__()
7 | self.iters =config['sinkhorn_iterations']
8 | self.feature_dim = config['feature_dim']
9 | self.matched_threshold = config['matched_threshold']
10 | self.bin_score = torch.nn.Parameter(torch.tensor(1.),requires_grad=True)
11 | self.register_parameter('bin_score', self.bin_score)
12 | @property
13 | def loss(self):
14 | return self.matching_loss, self.hard_pair_loss
15 | def forward(self,mdesc0, mdesc1, match_gt=None, ignore =False):
16 | # Compute matching descriptor distance.
17 | sim_matrix = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1)
18 |
19 | scores = sim_matrix / self.feature_dim ** .5
20 |
21 | # Run the optimal transport.
22 | scores = log_optimal_transport(
23 | scores, self.bin_score,
24 | iters=self.iters)
25 |
26 | # Get the matches with score above "match_threshold".
27 | max0 = scores[:, :-1, :-1].max(2) # the points in a that have matched in b, return b's index,
28 | max1 = scores[:, :-1, :-1].max(1) # the points in b that have matched in b, return a's index
29 | indices0, indices1 = max0.indices, max1.indices
30 |
31 | mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
32 | mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
33 | zero = scores.new_tensor(0)
34 | mscores0 = torch.where(mutual0, max0.values.exp(), zero)
35 | mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
36 |
37 | valid0 = mutual0 & (mscores0 > self.matched_threshold)
38 | valid1 = mutual1 & valid0.gather(1, indices1)
39 | indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
40 | indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))
41 |
42 |
43 | scores = scores.squeeze(0).exp()
44 |
45 | if match_gt is not None:
46 | matched_mask = torch.zeros(scores.size()).long().to(scores)
47 |
48 | matched_mask[match_gt['a2b'][:, 0], match_gt['a2b'][:, 1]] = 1
49 | if not ignore: matched_mask[match_gt['un_a'], -1] = 1
50 | if not ignore: matched_mask[-1, match_gt['un_b']] = 1
51 |
52 | self.matching_loss = -torch.log(scores[matched_mask == 1])
53 |
54 | top2_mask = matched_mask[:-1, :-1]
55 | scores_ = scores[:-1, :-1]* (1 - top2_mask)
56 | self.hard_pair_loss = -(torch.log(1- torch.cat([scores_.max(1)[0], scores_.max(0)[0]])))
57 |
58 | return scores, indices0.squeeze(0), indices1.squeeze(0), mscores0.squeeze(0), mscores1.squeeze(0)
59 |
60 | def log_sinkhorn_iterations(Z, log_mu, log_nu, iters: int):
61 | """ Perform Sinkhorn Normalization in Log-space for stability"""
62 |
63 | log_u, log_v = torch.zeros_like(log_mu), torch.zeros_like(log_nu) # initialized with the u,v=1, the log(u)=0, log(v)=0
64 | for _ in range(iters):
65 | log_u = log_mu - torch.logsumexp(Z + log_v.unsqueeze(1), dim=2)
66 | log_v = log_nu - torch.logsumexp(Z + log_u.unsqueeze(2), dim=1)
67 |
68 | return Z + log_u.unsqueeze(2) + log_v.unsqueeze(1)
69 |
70 |
71 | def log_optimal_transport(scores, alpha, iters: int):
72 | """ Perform Differentiable Optimal Transport in Log-space for stability"""
73 | b, m, n = scores.shape
74 | one = scores.new_tensor(1)
75 | ms, ns = (m*one).to(scores), (n*one).to(scores)
76 |
77 | bins0 = alpha.expand(b, m, 1)
78 | bins1 = alpha.expand(b, 1, n)
79 | alpha = alpha.expand(b, 1, 1)
80 |
81 | couplings = torch.cat([torch.cat([scores, bins0], -1),
82 | torch.cat([bins1, alpha], -1)], 1)
83 |
84 | norm = - (ms + ns).log() # normalization in the Log-space (log(1/(m+n)))
85 | log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm])
86 | log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm])
87 | log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1)
88 |
89 | Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters)
90 | score = Z - norm # multiply probabilities by M+N
91 | return score
92 |
93 |
94 | def arange_like(x, dim: int):
95 | return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1
--------------------------------------------------------------------------------
/model/points_from_den.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class get_ROI_and_MatchInfo(object):
6 | def __init__(self,train_size,rdius=8,feature_scale=0.125):
7 | self.h = train_size[0]
8 | self.w = train_size[1]
9 | self.radius = rdius
10 | self.feature_scale = feature_scale
11 | def __call__(self,target_a, target_b,noise=None, shape =None):
12 | gt_a, gt_b = target_a['points'], target_b['points']
13 | if shape is not None:
14 | self.h = shape[0]
15 | self.w = shape[1]
16 | if noise == 'ab':
17 | gt_a, gt_b = gt_a + torch.randn(gt_a.size()).to(gt_a)*2, gt_b + torch.randn(gt_b.size()).to(gt_b)*2
18 | elif noise == 'a':
19 | gt_a = gt_a + torch.randn(gt_a.size()).to(gt_a)
20 | elif noise == 'b':
21 | gt_b = gt_b + torch.randn(gt_b.size()).to(gt_b)
22 |
23 |
24 | roi_a = torch.zeros(gt_a.size(0), 5).to(gt_a)
25 | roi_b = torch.zeros(gt_b.size(0), 5).to(gt_b)
26 | roi_a[:, 0] = 0
27 | roi_a[:, 1] = torch.clamp(gt_a[:, 0] - self.radius,min=0)
28 | roi_a[:, 2] = torch.clamp(gt_a[:, 1] - self.radius, min=0)
29 | roi_a[:, 3] = torch.clamp(gt_a[:, 0] + self.radius, max=self.w)
30 | roi_a[:, 4] = torch.clamp(gt_a[:, 1] + self.radius, max=self.h)
31 |
32 | roi_b[:, 0] = 1
33 | roi_b[:, 1] = torch.clamp(gt_b[:, 0] - self.radius, min=0)
34 | roi_b[:, 2] = torch.clamp(gt_b[:, 1] - self.radius, min=0)
35 | roi_b[:, 3] = torch.clamp(gt_b[:, 0] + self.radius, max=self.w)
36 | roi_b[:, 4] = torch.clamp(gt_b[:, 1] + self.radius, max=self.h)
37 |
38 | pois = torch.cat([roi_a, roi_b], dim=0)
39 |
40 | # ===================match the id for the prediction points of two adhesive frame===================
41 |
42 | a_ids = target_a['person_id']
43 | b_ids = target_b['person_id']
44 |
45 | dis = a_ids.unsqueeze(1).expand(-1,len(b_ids)) - b_ids.unsqueeze(0).expand(len(a_ids),-1)
46 | dis = dis.abs()
47 | matched_a, matched_b = torch.where(dis==0)
48 | matched_a2b = torch.stack([matched_a,matched_b],1)
49 | unmatched0 = torch.where(dis.min(1)[0]>0)[0]
50 | unmatched1 = torch.where(dis.min(0)[0]>0)[0]
51 |
52 | match_gt={'a2b': matched_a2b, 'un_a':unmatched0, 'un_b':unmatched1}
53 |
54 | return match_gt, pois
55 |
56 |
57 | def local_maximum_points(sub_pre, gaussian_maximun,radius=8.):
58 | sub_pre = sub_pre.detach()
59 | _,_,h,w = sub_pre.size()
60 | kernel = torch.ones(3,3)/9.
61 | kernel =kernel.unsqueeze(0).unsqueeze(0).cuda()
62 | weight = nn.Parameter(data=kernel, requires_grad=False)
63 | sub_pre = F.conv2d(sub_pre, weight, stride=1, padding=1)
64 |
65 | keep = F.max_pool2d(sub_pre, (5, 5), stride=2, padding=2)
66 | keep = F.interpolate(keep, scale_factor=2)
67 | keep = (keep == sub_pre).float()
68 | sub_pre = keep * sub_pre
69 |
70 | sub_pre[sub_pre < 0.25*gaussian_maximun] = 0
71 | sub_pre[sub_pre > 0] = 1
72 | count = int(torch.sum(sub_pre).item())
73 |
74 | points = torch.nonzero(sub_pre)[:,[0,1,3,2]].float() # b,c,h,w->b,c,w,h
75 | rois = torch.zeros((points.size(0), 5)).float().to(sub_pre)
76 | rois[:, 0] = points[:, 0]
77 | rois[:, 1] = torch.clamp(points[:, 2] - radius, min=0)
78 | rois[:, 2] = torch.clamp(points[:, 3] - radius, min=0)
79 | rois[:, 3] = torch.clamp(points[:, 2] + radius, max=w)
80 | rois[:, 4] = torch.clamp(points[:, 3] + radius, max=h)
81 |
82 | pre_data = {'num': count, 'points': points, 'rois': rois}
83 | return pre_data
84 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torchsummary==1.5.1
2 | tqdm==4.48.2
3 | scipy==1.6.2
4 | matplotlib==3.5.1
5 | tensorboardX
6 | tensorboard
7 | tensorflow
8 | easydict
9 | pandas
10 | numpy
11 | opencv-python
12 | pyyaml
--------------------------------------------------------------------------------
/results/Tracking_HT21_metric.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import defaultdict
3 | import os.path as osp
4 | from train import compute_metrics_all_scenes
5 | import numpy as np
6 | import cv2
7 | from PIL import Image
8 | def tracking_to_crowdflow():
9 | method = 'HT21_10'# 'HeadHunter_result' 'fairmot_head''PHDTT' 'PHDTT'
10 |
11 | # Root = os.path.join('D:\Crowd_tracking/HeadHunter',method)
12 | Root = os.path.join('/media/E/ht/HeadHunter--T-master/results', method)
13 | # gt_root = 'D:\Crowd_tracking/dataset/HT21/train'
14 | scenes = sorted(os.listdir(Root))
15 | print(scenes)
16 | scenes_pred_dict = []
17 | scenes_gt_dict = []
18 | all_sum = []
19 |
20 |
21 | for _, i in enumerate(scenes,0):
22 | # if _>0:
23 | # break
24 | pred = defaultdict(list)
25 | gts = defaultdict(list)
26 |
27 | path = os.path.join(Root,i)
28 | id_list = []
29 | with open(path, 'r') as f:
30 | lines = f.readlines()
31 | for vi, line in enumerate(lines, 0):
32 | line = line.strip().split(',')
33 | img_id = int(line[0])
34 | tmp_id = int(line[1])
35 | pred[img_id].append(tmp_id)
36 | id_list.append(tmp_id)
37 |
38 | # with open(osp.join(gt_root, i.split('.')[0], 'gt', 'gt.txt'), 'r') as f:
39 | # lines = f.readlines()
40 | # for lin in lines:
41 | # lin_list = [float(i) for i in lin.rstrip().split(',')]
42 | # ind = int(lin_list[0])
43 | # gts[ind].append(int(lin_list[1]))
44 | # print(id_list)
45 | id = set(id_list)
46 | all_sum.append(len(id))
47 | print(all_sum, sum(all_sum[:5]), sum(all_sum[5:]))
48 |
49 |
50 | gt_pre_flow_cnt = torch.cat([torch.tensor([[133., 737., 734., 1040., 321.]]), torch.tensor(all_sum)[None]]).transpose(0, 1)
51 | print(gt_pre_flow_cnt)
52 | time = torch.tensor([585.,2080.,1000.,1050.,1008.])
53 | MAE = torch.mean(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1]))
54 | MSE = torch.mean((gt_pre_flow_cnt[:, 0] - gt_pre_flow_cnt[:, 1])**2).sqrt()
55 | WRAE = torch.sum(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])/gt_pre_flow_cnt[:,0]*(time/(time.sum()+1e-10)))*100
56 | print(MAE, MSE, WRAE)
57 |
58 |
59 | # pred_dict = {'id': i, 'time': len(lines), 'first_frame': 0, 'inflow': [], 'outflow': []}
60 | # gt_dict = {'id': i, 'time': len(lines), 'first_frame': 0, 'inflow': [], 'outflow': []}
61 | #
62 | # interval = 75
63 | # img_num =len(gts.keys())
64 | # print(img_num)
65 | # for img_id, ids in gts.items():
66 | # if img_id>img_num-interval:
67 | # break
68 | #
69 | # img_id_b = img_id+interval
70 | #
71 | # pre_ids,pre_ids_b = pred[img_id],pred[img_id_b]
72 | # gt_ids,gt_ids_b = ids, gts[img_id_b]
73 | #
74 | # if img_id == 1:
75 | # pred_dict['first_frame'] = len(pre_ids)
76 | # gt_dict['first_frame'] = len(gt_ids)
77 | # # import pdb
78 | # # pdb.set_trace()
79 | #
80 | # # if (img_id-1) % interval ==0 or img_num== 0:
81 | # pre_inflow =set(pre_ids_b)-set(pre_ids)
82 | # pre_outflow = set(pre_ids)-set(pre_ids_b)
83 | #
84 | # gt_inflow = set(gt_ids_b)-set(gt_ids)
85 | # gt_outflow = set(gt_ids)-set(gt_ids_b)
86 | # pred_dict['inflow'].append(len(pre_inflow))
87 | # pred_dict['outflow'].append(len(pre_outflow))
88 | # gt_dict['inflow'].append(len(gt_inflow))
89 | # gt_dict['outflow'].append(len(gt_outflow))
90 | # # print(pred_dict, gt_dict)
91 | # scenes_pred_dict.append(pred_dict)
92 | # scenes_gt_dict.append(gt_dict)
93 | # MAE, MSE, WRAE, MIAE, MOAE, cnt_result = compute_metrics_all_scenes(scenes_pred_dict, scenes_gt_dict, interval)
94 | # print(MAE, MSE, WRAE, MIAE, MOAE, cnt_result)
95 |
96 |
97 | def id_counting():
98 | Root = 'D:/Crowd_tracking/dataset/HT21/train'
99 | scenes = os.listdir(Root)
100 | all_sum = []
101 | for i in scenes:
102 | path = os.path.join(Root,i,'gt/gt.txt')
103 | id_list = []
104 | with open(path, 'r') as f:
105 | lines = f.readlines()
106 | for line in lines:
107 | id_list.append(int(line.strip().split(',')[1]))
108 | id = set(id_list)
109 | all_sum.append(len(id))
110 | print(all_sum, sum(all_sum[:4]), sum(all_sum[4:]))
111 |
112 | if __name__ == '__main__':
113 | import torch
114 | #PHDTT
115 | # gt_pre_flow_cnt = torch.tensor([[133.,737.,734.,1040.,321.],[380.,4530.,5528.,1531.,1648.]]).transpose(0,1)
116 | # #HeadHunter
117 | gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [307., 2145., 2556., 1531., 888.,]]).transpose(0, 1)
118 | #
119 | # #LOI
120 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.],[72.4 ,493.1 ,275.3 ,409.2,189.8]]).transpose(0, 1)
121 | # # Hungarian s=10
122 | # gt_pre_flow_cnt = torch.tensor([[ 129., 133.],
123 | # [ 421., 737.],
124 | # [ 332., 734.],
125 | # [ 331., 1040.],
126 | # [ 185., 321.]])
127 | #
128 | #
129 | # # Hungarian s=12
130 | # gt_pre_flow_cnt = torch.tensor([[ 188., 133.],
131 | # [ 779., 737.],
132 | # [1069., 734.],
133 | # [ 772., 1040.],
134 | # [ 324., 321.]])
135 | # # Hungarian s=15
136 | # gt_pre_flow_cnt = torch.tensor([[ 298., 133.],
137 | # [1833., 737.],
138 | # [1921., 734.],
139 | # [1641., 1040.],
140 | # [ 752., 321.]])
141 | #
142 | # #Tracking
143 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [284., 1364., 1435., 1975., 539., ]]).transpose(0, 1)
144 |
145 | ## SSIC sampling
146 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [432.6235237121582, 4244.325263977051, 2307.327682495117, 2219.3844146728516, 1355.9616165161133]]).transpose(0, 1)
147 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.],[83.13096618652344, 216.19476318359375, 224.47157287597656, 174.38177490234375, 118.87664794921875]]).transpose(0,1)
148 | #
149 | time = torch.tensor([585.,2080.,1000.,1050.,1008.])
150 | MAE = torch.mean(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1]))
151 | MSE = torch.mean((gt_pre_flow_cnt[:, 0] - gt_pre_flow_cnt[:, 1])**2).sqrt()
152 | WRAE = torch.sum(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])/gt_pre_flow_cnt[:,0]*(time/(time.sum()+1e-10)))*100
153 |
154 | print(MAE, MSE, WRAE)
155 |
156 | tracking_to_crowdflow()
157 |
--------------------------------------------------------------------------------
/vision/engine.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import math
5 | import sys
6 | import time
7 | from collections import defaultdict
8 |
9 | import brambox
10 | import pandas as pd
11 | import numpy as np
12 | import torch
13 | import torchvision.models.detection.mask_rcnn
14 |
15 | from head_detection.vision import utils
16 | from brambox.stat._matchboxes import match_det, match_anno
17 | from brambox.stat import coordinates, mr_fppi, ap, pr, threshold, fscore, peak, lamr
18 |
19 |
20 | def check_empty_target(targets):
21 | for tar in targets:
22 | if len(tar['boxes']) < 1:
23 | return True
24 | return False
25 |
26 |
27 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
28 | model.train()
29 | metric_logger = utils.MetricLogger(delimiter=" ")
30 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
31 | header = 'Epoch: [{}]'.format(epoch)
32 |
33 | lr_scheduler = None
34 | if epoch == 0:
35 | warmup_factor = 1. / 1000
36 | warmup_iters = min(1000, len(data_loader) - 1)
37 |
38 | lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
39 |
40 | for images, targets in metric_logger.log_every(data_loader, print_freq, header):
41 | if check_empty_target(targets):
42 | continue
43 | images = list(image.to(device) for image in images)
44 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
45 | loss_dict = model(images, targets)
46 |
47 | losses = sum(loss for loss in loss_dict.values())
48 |
49 | # reduce losses over all GPUs for logging purposes
50 | loss_dict_reduced = utils.reduce_dict(loss_dict)
51 | losses_reduced = sum(loss for loss in loss_dict_reduced.values())
52 |
53 | loss_value = losses_reduced.item()
54 |
55 | if not math.isfinite(loss_value):
56 | print("Loss is {}, stopping training".format(loss_value))
57 | print(loss_dict_reduced)
58 | sys.exit(1)
59 |
60 | optimizer.zero_grad()
61 | losses.backward()
62 | optimizer.step()
63 |
64 | if lr_scheduler is not None:
65 | lr_scheduler.step()
66 |
67 | metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
68 | metric_logger.update(lr=optimizer.param_groups[0]["lr"])
69 |
70 | return metric_logger
71 |
72 |
73 | def _get_iou_types(model):
74 | model_without_ddp = model
75 | if isinstance(model, torch.nn.parallel.DistributedDataParallel):
76 | model_without_ddp = model.module
77 | iou_types = ["bbox"]
78 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
79 | iou_types.append("segm")
80 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
81 | iou_types.append("keypoints")
82 | return iou_types
83 |
84 |
85 | def safe_div(x,y):
86 | if y == 0:
87 | return 0
88 | return x / y
89 |
90 | def get_moda(det, anno, threshold=0.2, ignore=None):
91 | if ignore is None:
92 | ignore = anno.ignore.any()
93 |
94 | dets_per_frame = anno.groupby('image').filter(lambda x: any(x['ignore'] == 0))
95 | dets_per_frame = dets_per_frame.groupby('image').size().to_dict()
96 | # Other param for finding matched anno
97 | crit = coordinates.pdollar if ignore else coordinates.iou
98 | label = len({*det.class_label.unique(), *anno.class_label.unique()}) > 1
99 | matched_dets = match_det(det, anno, threshold, criteria=crit,
100 | class_label=label, ignore=2 if ignore else 0)
101 | fp_per_im = matched_dets[matched_dets.fp==True].groupby('image').size().to_dict()
102 | tp_per_im = matched_dets[matched_dets.tp==True].groupby('image').size().to_dict()
103 | valid_anno = anno[anno.ignore == False].groupby('image').size().to_dict()
104 | assert valid_anno.keys() == tp_per_im.keys()
105 |
106 | moda_ = []
107 | for k, _ in valid_anno.items():
108 | n_gt = valid_anno[k]
109 | miss = n_gt-tp_per_im[k]
110 | fp = fp_per_im[k]
111 | moda_.append(safe_div((miss+fp), n_gt))
112 | return 1 - np.mean(moda_)
113 |
114 |
115 | def get_modp(det, anno, threshold=0.2, ignore=None):
116 | if ignore is None:
117 | ignore = anno.ignore.any()
118 | # Compute TP/FP
119 | if not {'tp', 'fp'}.issubset(det.columns):
120 | crit = coordinates.pdollar if ignore else coordinates.iou
121 | label = len({*det.class_label.unique(), *anno.class_label.unique()}) > 1
122 | det = match_anno(det, anno, threshold, criteria=crit, class_label=label, ignore=2 if ignore else 0)
123 | elif not det.confidence.is_monotonic_decreasing:
124 | det = det.sort_values('confidence', ascending=False)
125 | modp = det.groupby('image')['criteria'].mean().mean()
126 | return modp
127 |
128 | @torch.no_grad()
129 | def evaluate(model, data_loader, out_path=None, benchmark=None):
130 | """
131 | Evaluates a model over testing set, using AP, Log MMR, F1-score
132 | """
133 | n_threads = torch.get_num_threads()
134 | torch.set_num_threads(1)
135 | device=torch.device('cuda')
136 | cpu_device = torch.device("cpu")
137 | model.eval()
138 | metric_logger = utils.MetricLogger(delimiter=" ")
139 | header = 'Valid:'
140 |
141 | # Brambox eval related
142 | pred_dict = defaultdict(list)
143 | gt_dict = defaultdict(list)
144 | results = {}
145 | for i, (images, targets) in enumerate(metric_logger.log_every(data_loader, 100, header)):
146 | images = list(img.to(device) for img in images)
147 |
148 | torch.cuda.synchronize()
149 | model_time = time.time()
150 | outputs = model(images)
151 |
152 | outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
153 | model_time = time.time() - model_time
154 | evaluator_time = time.time()
155 | # Pred lists
156 | pred_boxes = [p['boxes'].numpy() for p in outputs]
157 | pred_scores = [p['scores'].numpy() for p in outputs]
158 |
159 | # GT List
160 | gt_boxes = [gt['boxes'].numpy()for gt in targets]
161 |
162 | # ignore variables are used in our benchmark and CHuman Benchmark
163 | ignore_ar = [gt['ignore'] for gt in targets]
164 | # Just to be sure target and prediction have batchsize 2
165 | assert len(gt_boxes) == len(pred_boxes)
166 | for j in range(len(gt_boxes)):
167 | im_name = str(targets[j]['image_id']) + '.jpg'
168 | # write to results dict for MOT format
169 | results[targets[j]['image_id'].item()] = {'boxes': pred_boxes[j],
170 | 'scores': pred_scores[j]}
171 | for _, (p_b, p_s) in enumerate(zip(pred_boxes[j], pred_scores[j])):
172 | pred_dict['image'].append(im_name)
173 | pred_dict['class_label'].append('head')
174 | pred_dict['id'].append(0)
175 | pred_dict['x_top_left'].append(p_b[0])
176 | pred_dict['y_top_left'].append(p_b[1])
177 | pred_dict['width'].append(p_b[2] - p_b[0])
178 | pred_dict['height'].append(p_b[3] - p_b[1])
179 | pred_dict['confidence'].append(p_s)
180 |
181 | for _, (gt_b, ignore_val) in enumerate(zip(gt_boxes[j], ignore_ar[j])):
182 | gt_dict['image'].append(im_name)
183 | gt_dict['class_label'].append('head')
184 | gt_dict['id'].append(0)
185 | gt_dict['x_top_left'].append(gt_b[0])
186 | gt_dict['y_top_left'].append(gt_b[1])
187 | gt_dict['width'].append(gt_b[2] - gt_b[0])
188 | gt_dict['height'].append(gt_b[3] - gt_b[1])
189 | gt_dict['ignore'].append(ignore_val)
190 |
191 | evaluator_time = time.time() - evaluator_time
192 | metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
193 |
194 | # Save results in MOT format if out_path is provided
195 | if out_path is not None:
196 | data_loader.dataset.write_results_files(results, out_path)
197 | # gather the stats from all processes
198 | pred_df = pd.DataFrame(pred_dict)
199 | gt_df = pd.DataFrame(gt_dict)
200 | pred_df['image'] = pred_df['image'].astype('category')
201 | gt_df['image'] = gt_df['image'].astype('category')
202 | pr_ = pr(pred_df, gt_df, ignore=True)
203 | ap_ = ap(pr_)
204 | mr_fppi_ = mr_fppi(pred_df, gt_df, threshold=0.5, ignore=True)
205 | lamr_ = lamr(mr_fppi_)
206 | f1_ = fscore(pr_)
207 | f1_ = f1_.fillna(0)
208 | threshold_ = peak(f1_)
209 |
210 | moda = get_moda(pred_df, gt_df, threshold=0.2, ignore=True)
211 | modp = get_modp(pred_df, gt_df, threshold=0.2, ignore=True)
212 |
213 | result_dict = {'AP' : ap_, 'MMR' : lamr_,
214 | 'f1' : threshold_.f1, 'r':pr_['recall'].values[-1],
215 | 'moda' : moda, 'modp' : modp}
216 |
217 | metric_logger.synchronize_between_processes()
218 |
219 | torch.set_num_threads(n_threads)
220 | return result_dict
221 |
--------------------------------------------------------------------------------
/vision/transform.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | from torchvision.transforms import functional as F
4 |
5 |
6 | def _flip_coco_person_keypoints(kps, width):
7 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
8 | flipped_data = kps[:, flip_inds]
9 | flipped_data[..., 0] = width - flipped_data[..., 0]
10 | # Maintain COCO convention that if visibility == 0, then x, y = 0
11 | inds = flipped_data[..., 2] == 0
12 | flipped_data[inds] = 0
13 | return flipped_data
14 |
15 |
16 | class Compose(object):
17 | def __init__(self, transforms):
18 | self.transforms = transforms
19 |
20 | def __call__(self, image, target):
21 | for t in self.transforms:
22 | image, target = t(image, target)
23 | return image, target
24 |
25 |
26 | class RandomHorizontalFlip(object):
27 | def __init__(self, prob):
28 | self.prob = prob
29 |
30 | def __call__(self, image, target):
31 | if random.random() < self.prob:
32 | height, width = image.shape[-2:]
33 | image = image.flip(-1)
34 | bbox = target["boxes"]
35 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
36 | target["boxes"] = bbox
37 | if "masks" in target:
38 | target["masks"] = target["masks"].flip(-1)
39 | if "keypoints" in target:
40 | keypoints = target["keypoints"]
41 | keypoints = _flip_coco_person_keypoints(keypoints, width)
42 | target["keypoints"] = keypoints
43 | return image, target
44 |
45 |
46 | class ToTensor(object):
47 | def __call__(self, image, target):
48 | image = F.to_tensor(image)
49 | return image, target
50 |
--------------------------------------------------------------------------------