├── .idea ├── DRNet.iml ├── encodings.xml ├── misc.xml ├── modules.xml └── workspace.xml ├── README.md ├── __pycache__ ├── config.cpython-37.pyc └── train.cpython-37.pyc ├── config.py ├── datasets ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── dataset.cpython-37.pyc │ └── samplers.cpython-37.pyc ├── dataset.py ├── dataset_prepare │ ├── __init__.py │ ├── functions.py │ ├── info.json │ ├── scene_label.py │ ├── train_val_divide.py │ └── video_vis.py ├── samplers.py └── setting │ ├── HT21.py │ ├── SENSE.py │ ├── __init__.py │ └── __pycache__ │ ├── HT21.cpython-37.pyc │ ├── SENSE.cpython-37.pyc │ └── __init__.cpython-37.pyc ├── demo_code ├── image2video.py ├── test_CroHD.py ├── test_beijng.py └── video2img.py ├── figures ├── demo_screen1.png ├── framework1.png └── utils │ ├── 0.png │ ├── 30.png │ ├── __init__.py │ ├── frame_figure │ ├── 112500_112501_matches.png │ ├── 112500_112501_matches_vis.png │ ├── 112501_b_vis_.jpg │ ├── 112501_vis_.jpg │ ├── assign.png │ ├── assign_P.npy │ ├── cost.png │ ├── cost_c.npy │ ├── cost_c_.npy │ ├── hot_map.py │ └── id.npy │ ├── info.json │ └── intro.py ├── misc ├── KPI_pool.py ├── __init__.py ├── __pycache__ │ ├── KPI_pool.cpython-37.pyc │ ├── __init__.cpython-37.pyc │ ├── dot_ops.cpython-37.pyc │ ├── get_bbox.cpython-37.pyc │ ├── inflation.cpython-37.pyc │ ├── layer.cpython-37.pyc │ ├── nms.cpython-37.pyc │ ├── transforms.cpython-37.pyc │ └── utils.cpython-37.pyc ├── cal_mean.py ├── dot_ops.py ├── evaluation_code.py ├── get_bbox.py ├── inflation.py ├── layer.py ├── modelsummary.py ├── nms.py ├── post_process.py ├── transforms.py └── utils.py ├── model ├── MatchTool │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── compute_metric.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── compute_metric.py │ └── utils.py ├── PreciseRoIPooling │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── _assets │ │ └── prroi_visualization.png │ ├── pytorch │ │ ├── prroi_pool │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── functional.py │ │ │ ├── prroi_pool.py │ │ │ └── src │ │ │ │ ├── prroi_pooling_gpu.c │ │ │ │ ├── prroi_pooling_gpu.h │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ └── tests │ │ │ └── test_prroi_pooling2d.py │ ├── src │ │ ├── prroi_pooling_gpu_impl.cu │ │ └── prroi_pooling_gpu_impl.cuh │ └── tensorflow │ │ ├── prroi_pool │ │ ├── CMakeLists.txt │ │ ├── __init__.py │ │ ├── precise_roi_pooling_ops.py │ │ └── src │ │ │ ├── kernels │ │ │ ├── build_cuda.py │ │ │ ├── external │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ │ ├── precise_roi_pooling.h │ │ │ ├── precise_roi_pooling_kernels.cc │ │ │ └── precise_roi_pooling_kernels.cu.cc │ │ │ └── ops │ │ │ └── precise_roi_pooling_ops.cc │ │ └── tests │ │ ├── precise_roi_pooling_ops_test.py │ │ └── test_binaries │ │ └── 2_2_0.5 │ │ ├── features.npy │ │ ├── gradients0.npy │ │ ├── gradients1.npy │ │ ├── real_outputs.npy │ │ └── rois.npy ├── VGG │ ├── VGG16_FPN.py │ └── conv.py ├── VIC.py ├── __init__.py ├── __pycache__ │ ├── VIC.cpython-37.pyc │ ├── __init__.cpython-37.pyc │ ├── optimal_transport_layer.cpython-37.pyc │ └── points_from_den.cpython-37.pyc ├── necks │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── fpn.cpython-37.pyc │ └── fpn.py ├── optimal_transport_layer.py └── points_from_den.py ├── requirements.txt ├── results ├── LOI_SENSE_metric.py ├── Tracking_HT21_metric.py └── sense_result_CFM ├── test_HT21.py ├── test_SENSE.py ├── train.py └── vision ├── engine.py ├── transform.py └── utils.py /.idea/DRNet.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | Python 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | C:\Users\Lenovo\AppData\Roaming\Subversion 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DRNet for Video Indvidual Counting (CVPR 2022) 2 | ## Introduction 3 | This is the official PyTorch implementation of paper: [**DR.VIC: Decomposition and Reasoning for Video Individual Counting**](https://arxiv.org/abs/2203.12335). Different from the single image counting methods, it counts the total number of the pedestrians in a video sequence with a person in different frames only being calculated once. DRNet decomposes this new task to estimate the initial crowd number in the first frame and integrate differential crowd numbers in a set of following image pairs (namely current frame and preceding frame). 4 | ![framework](./figures/framework1.png) 5 | 6 | # Catalog 7 | - [x] Testing Code (2022.3.19) 8 | - [x] PyTorch pretrained models (2022.3.19) 9 | - [x] Training Code 10 | - [x] HT21 11 | - [x] SenseCrowd (2022.9.30) 12 | 13 | # Getting started 14 | 15 | ## preparatoin 16 | 17 | - Clone this repo in the directory (```Root/DRNet```): 18 | - Install dependencies. We use python 3.7 and pytorch >= 1.6.0 : http://pytorch.org. 19 | 20 | ```bash 21 | conda create -n DRNet python=3.7 22 | conda activate DRNet 23 | conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch 24 | cd ${DRNet} 25 | pip install -r requirements.txt 26 | ``` 27 | 28 | - [PreciseRoIPooling](https://github.com/vacancy/PreciseRoIPooling) for extracting the feature descriptors 29 | 30 | Note: the PreciseRoIPooling [1] module is included in the repo, but it's likely to have some problems when running the code: 31 | 32 | 1. If you are prompted to install ninja, the following commands will help you. 33 | ```bash 34 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip 35 | sudo unzip ninja-linux.zip -d /usr/local/bin/ 36 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force 37 | ``` 38 | 2. If you encounter errors when compiling the PreciseRoIPooling, you can look up the original repo's [issues](https://github.com/vacancy/PreciseRoIPooling/issues) for help. One solution to the most common errors can be found in this [blog](https://blog.csdn.net/weixin_42098198/article/details/124756432?spm=1001.2014.3001.5502). 39 | - Datasets 40 | - **HT21** dataset: Download CroHD dataset from this [link](https://motchallenge.net/data/Head_Tracking_21/). Unzip ```HT21.zip``` and place ``` HT21``` into the folder (```Root/dataset/```). 41 | - **SenseCrowd** dataset: Download the dataset from [Baidu disk](https://pan.baidu.com/s/1OYBSPxgwvRMrr6UTStq7ZQ?pwd=64xm) or from the original dataset [link](https://github.com/HopLee6/VSCrowd-Dataset). 42 | - Download the lists of `train/val/test` sets at [link1](https://1drv.ms/u/s!AgKz_E1uf260nWeqa86-o9FMIqMt?e=0scDuw) or [link2](https://pan.baidu.com/s/13X3-egn0fYSd6NUTxB4cuw?pwd=ew8f), and place them to each dataset folder, respectively. 43 | ## Training 44 | Check some parameters in ```config.py``` before training, 45 | * Use `__C.DATASET = 'HT21'` to set the dataset (default: `HT21`). 46 | * Use `__C.GPU_ID = '0'` to set the GPU. 47 | * Use `__C.MAX_EPOCH = 20` to set the number of the training epochs (default:20). 48 | * Use `__C.EXP_PATH = os.path.join('./exp', __C.DATASET)` to set the dictionary for saving the code, weights, and resume point. 49 | 50 | Check other parameters (`TRAIN_BATCH_SIZE`, `TRAIN_SIZE` etc.) in the ```Root/DRNet/datasets/setting``` in case your GPU's memory is not support for the default setting. 51 | - run ```python train.py```. 52 | 53 | 54 | Tips: The training process takes **~10 hours** on HT21 dataset with **one TITAN RTX (24GB Memory)**. 55 | 56 | ## Testing 57 | To reproduce the performance, download the pre-trained models from [onedrive](https://1drv.ms/u/s!AgKz_E1uf260nWeqa86-o9FMIqMt?e=0scDuw) or [badu disk](https://pan.baidu.com/s/13X3-egn0fYSd6NUTxB4cuw?pwd=ew8f) and then place ```pretrained_models``` folder to ```Root/DRNet/model/``` 58 | - for HT21: 59 | - Run ```python test_HT21.py```. 60 | - for SenseCrowd: 61 | - Run ```python test_SENSE.py```. 62 | Then the output file (```*_SENSE_cnt.py```) will be generated. 63 | ## Performance 64 | The results on HT21 and SenseCrowd. 65 | 66 | - HT21 dataset 67 | 68 | | Method | CroHD11~CroHD15 | MAE/MSE/MRAE(%) | 69 | |------------|-------- |-------| 70 | | Paper: VGG+FPN [2,3]| 164.6/1075.5/752.8/784.5/382.3|141.1/192.3/27.4| 71 | | This Repo's Reproduction: VGG+FPN [2,3]|138.4/1017.5/623.9/659.8/348.5|160.7/217.3/25.1| 72 | 73 | - SenseCrowd dataset 74 | 75 | | Method | MAE/MSE/MRAE(%)| MIAE/MOAE | D0~D4 (for MAE) | 76 | |------------|---------|-------|-------| 77 | | Paper: VGG+FPN [2,3]| 12.3/24.7/12.7 |1.98/2.01 |4.1/8.0/23.3/50.0/77.0| 78 | | This Repo's Reproduction: VGG+FPN [2,3] | 11.7/24.6/11.7 | 1.99/1.88| 3.6/6.8/22.4/42.6/85.2 | 79 | 80 | # Video Demo 81 | Please visit [bilibili](https://www.bilibili.com/video/BV1cY411H7hr/) or [YouTube]() to watch the video demonstration. 82 | ![demo](./figures/demo_screen1.png) 83 | # References 84 | 1. Acquisition of Localization Confidence for Accurate Object Detection, ECCV, 2018. 85 | 2. Very Deep Convolutional Networks for Large-scale Image Recognition, arXiv, 2014. 86 | 3. Feature Pyramid Networks for Object Detection, CVPR, 2017. 87 | 88 | # Citation 89 | If you find this project is useful for your research, please cite: 90 | ``` 91 | @article{han2022drvic, 92 | title={DR.VIC: Decomposition and Reasoning for Video Individual Counting}, 93 | author={Han, Tao, Bai Lei, Gao, Junyu, Qi Wang, and Ouyang Wanli}, 94 | booktitle={CVPR}, 95 | year={2022} 96 | } 97 | ``` 98 | 99 | # Acknowledgement 100 | The released PyTorch training script borrows some codes from the [C^3 Framework](https://github.com/gjy3035/C-3-Framework) and [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork) repositories. If you think this repo is helpful for your research, please consider cite them. 101 | -------------------------------------------------------------------------------- /__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/__pycache__/train.cpython-37.pyc -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from easydict import EasyDict as edict 3 | import time 4 | import torch 5 | 6 | # init 7 | __C = edict() 8 | cfg = __C 9 | 10 | #------------------------------TRAIN------------------------ 11 | __C.SEED = 3035 # random seed, for reproduction 12 | __C.DATASET = 'HT21' # dataset selection: HT21, SENSE 13 | __C.NET = 'VGG16_FPN' # 'VGG16_FPN' 14 | 15 | __C.RESUME = False # continue training 16 | __C.RESUME_PATH = './exp/SENSE/11-23_04-55_SENSE_Res50_FPN_5e-05/latest_state.pth' 17 | __C.GPU_ID = '0' # sigle gpu: '0'; multi gpus: '0,1' 18 | 19 | __C.sinkhorn_iterations = 100 20 | __C.FEATURE_DIM = 256 21 | __C.ROI_RADIUS = 4. 22 | if __C.DATASET == 'SENSE': 23 | __C.VAL_INTERVALS =15 24 | else: 25 | __C.VAL_INTERVALS = 50 26 | # learning rate settings 27 | __C.LR_Base = 5e-5 # learning rate 28 | __C.LR_Thre = 1e-2 29 | 30 | __C.LR_DECAY = 0.95 31 | __C.WEIGHT_DECAY = 1e-5 # decay rate 32 | # when training epoch is more than it, the learning rate will be begin to decay 33 | 34 | __C.MAX_EPOCH = 20 35 | 36 | # print 37 | __C.PRINT_FREQ = 20 38 | 39 | now = time.strftime("%m-%d_%H-%M", time.localtime()) 40 | 41 | __C.EXP_NAME = now \ 42 | + '_' + __C.DATASET \ 43 | + '_' + __C.NET \ 44 | + '_' + str(__C.LR_Base) 45 | 46 | __C.VAL_VIS_PATH = './exp/'+__C.DATASET+'_val' 47 | __C.EXP_PATH = os.path.join('./exp', __C.DATASET) # the path of logs, checkpoints, and current codes 48 | if not os.path.exists(__C.EXP_PATH ): 49 | os.makedirs(__C.EXP_PATH ) 50 | #------------------------------VAL------------------------ 51 | 52 | if __C.DATASET == 'HT21': 53 | __C.VAL_FREQ = 1 # Before __C.VAL_DENSE_START epoches, the freq is set as __C.VAL_FREQ 54 | __C.VAL_DENSE_START = 2 55 | else: 56 | __C.VAL_FREQ = 1 57 | __C.VAL_DENSE_START = 0 58 | #------------------------------VIS------------------------ 59 | 60 | #================================================================================ 61 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import torch 5 | import torch.nn.functional as F 6 | from importlib import import_module 7 | import misc.transforms as own_transforms 8 | from misc.transforms import check_image 9 | import torchvision.transforms as standard_transforms 10 | from . import dataset 11 | from . import setting 12 | from . import samplers 13 | from torch.utils.data import DataLoader 14 | from torch.utils.data import RandomSampler 15 | from config import cfg 16 | 17 | import random 18 | class train_pair_transform(object): 19 | def __init__(self,cfg_data, check_dim = True): 20 | self.cfg_data = cfg_data 21 | self.pair_flag = 0 22 | self.scale_factor = 1 23 | self.last_cw_ch =(0,0) 24 | self.crop_left = (0,0) 25 | self.last_crop_left = (0, 0) 26 | self.rate_range = (0.8,1.2) 27 | self.resize_and_crop= own_transforms.RandomCrop( cfg_data.TRAIN_SIZE) 28 | self.scale_to_setting = own_transforms.ScaleByRateWithMin(cfg_data.TRAIN_SIZE[1], cfg_data.TRAIN_SIZE[0]) 29 | 30 | self.flip_flag = 0 31 | self.horizontal_flip = own_transforms.RandomHorizontallyFlip() 32 | 33 | self.last_frame_size = (0,0) 34 | 35 | self.check_dim = check_dim 36 | def __call__(self,img,target): 37 | import numpy as np 38 | w_ori, h_ori = img.size 39 | if self.pair_flag == 1 and self.check_dim: # make sure two frames are with the same shape 40 | assert self.last_frame_size == (w_ori,w_ori) 41 | # self.last_frame_size = (w_ori, w_ori) 42 | self.scale_factor = random.uniform(self.rate_range[0], self.rate_range[1]) 43 | self.c_h,self.c_w = int(self.cfg_data.TRAIN_SIZE[0]/self.scale_factor), int(self.cfg_data.TRAIN_SIZE[1]/self.scale_factor) 44 | img, target = check_image(img, target, (self.c_h,self.c_w)) # make sure the img size is large than we needed 45 | w, h = img.size 46 | if self.pair_flag % 2 == 0: 47 | self.last_cw_ch = (self.c_w,self.c_h) 48 | self.pair_flag = 0 49 | self.last_frame_size = (w_ori, w_ori) 50 | 51 | x1 = random.randint(0, w - self.c_w) 52 | y1 = random.randint(0, h - self.c_h) 53 | self.last_crop_left = (x1,y1) 54 | 55 | if self.pair_flag % 2 == 1: 56 | if self.check_dim: 57 | x1 = max(0, int(self.last_crop_left[0] + (self.last_cw_ch[0]-self.c_w))) 58 | y1 = max(0, int(self.last_crop_left[1] + (self.last_cw_ch[1]-self.c_h))) 59 | else: # for pre_training on other dataset 60 | x1 = random.randint(0, w - self.c_w) 61 | y1 = random.randint(0, h - self.c_h) 62 | self.crop_left = (x1, y1) 63 | 64 | img, target = self.resize_and_crop(img, target, self.crop_left,crop_size=(self.c_h,self.c_w)) 65 | img, target = self.scale_to_setting(img,target) 66 | 67 | self.flip_flag = round(random.random()) 68 | img, target = self.horizontal_flip(img, target, self.flip_flag) 69 | self.pair_flag += 1 70 | 71 | # assert np.array(img).sum()>0 72 | return img, target 73 | 74 | 75 | def collate_fn(batch): 76 | batch = list(filter(lambda x: x is not None, batch)) 77 | # return torch.utils.data.dataloader.default_collate(batch) 78 | # if len(batch) == 0: 79 | # import pdb;pdb.set_trace() 80 | return tuple(zip(*batch)) 81 | 82 | def createTrainData(datasetname, Dataset, cfg_data): 83 | img_transform = standard_transforms.Compose([ 84 | standard_transforms.ToTensor(), 85 | standard_transforms.Normalize(*cfg_data.MEAN_STD) 86 | ]) 87 | 88 | main_transform = train_pair_transform(cfg_data) 89 | train_set =Dataset(cfg_data.TRAIN_LST, 90 | cfg_data.DATA_PATH, 91 | main_transform=main_transform, 92 | img_transform=img_transform, 93 | train=True, 94 | datasetname=datasetname) 95 | 96 | train_sampler = samplers.CategoriesSampler(train_set.labels, frame_intervals=cfg_data.TRAIN_FRAME_INTERVALS, 97 | n_per=cfg_data.TRAIN_BATCH_SIZE) 98 | train_loader = DataLoader(train_set, batch_sampler=train_sampler, num_workers=8, collate_fn=collate_fn, pin_memory=True) 99 | print('dataset is {}, images num is {}'.format(datasetname, train_set.__len__())) 100 | 101 | return train_loader 102 | def createValData(datasetname, Dataset, cfg_data): 103 | 104 | 105 | img_transform = standard_transforms.Compose([ 106 | standard_transforms.ToTensor(), 107 | standard_transforms.Normalize(*cfg_data.MEAN_STD) 108 | ]) 109 | 110 | val_loader = [] 111 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.VAL_LST), 'r') as txt: 112 | scene_names = txt.readlines() 113 | for scene in scene_names: 114 | sub_val_dataset = Dataset([scene.strip()], 115 | cfg_data.DATA_PATH, 116 | main_transform=None, 117 | img_transform= img_transform , 118 | train=False, 119 | datasetname=datasetname) 120 | sub_val_loader = DataLoader(sub_val_dataset, batch_size=cfg_data.VAL_BATCH_SIZE, num_workers=4,collate_fn=collate_fn,pin_memory=False ) 121 | val_loader.append(sub_val_loader) 122 | 123 | return val_loader 124 | def createRestore(mean_std): 125 | return standard_transforms.Compose([ 126 | own_transforms.DeNormalize(*mean_std), 127 | standard_transforms.ToPILImage() 128 | ]) 129 | 130 | def loading_data(datasetname,val_interval): 131 | datasetname = datasetname.upper() 132 | cfg_data = getattr(setting, datasetname).cfg_data 133 | 134 | Dataset = dataset.Dataset 135 | train_loader = createTrainData(datasetname, Dataset, cfg_data) 136 | restore_transform = createRestore(cfg_data.MEAN_STD) 137 | 138 | Dataset = dataset.TestDataset 139 | val_loader = createValTestData(datasetname, Dataset, cfg_data,val_interval, mode ='val') 140 | 141 | 142 | return train_loader, val_loader, restore_transform 143 | 144 | def createValTestData(datasetname, Dataset, cfg_data,frame_interval,mode ='val'): 145 | img_transform = standard_transforms.Compose([ 146 | standard_transforms.ToTensor(), 147 | standard_transforms.Normalize(*cfg_data.MEAN_STD) 148 | ]) 149 | if mode == 'val': 150 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.VAL_LST), 'r') as txt: 151 | scene_names = txt.readlines() 152 | scene_names = [i.strip() for i in scene_names] 153 | data_loader = [] 154 | for scene_name in scene_names: 155 | print(scene_name) 156 | sub_dataset = Dataset(scene_name = scene_name, 157 | base_path=cfg_data.DATA_PATH, 158 | main_transform=None, 159 | img_transform=img_transform, 160 | interval=frame_interval, 161 | target=True, 162 | datasetname = datasetname) 163 | sub_loader = DataLoader(sub_dataset, batch_size=cfg_data.VAL_BATCH_SIZE, 164 | collate_fn=collate_fn, num_workers=0, pin_memory=True) 165 | data_loader.append(sub_loader) 166 | return data_loader 167 | elif mode == 'test': 168 | if datasetname=='HT21': 169 | target = False 170 | scene_names = ['test/HT21-11', 'test/HT21-12', 'test/HT21-13', 'test/HT21-14', 'test/HT21-15'] 171 | else: 172 | target =True 173 | with open(os.path.join( cfg_data.DATA_PATH, cfg_data.TEST_LST), 'r') as txt: 174 | scene_names = txt.readlines() 175 | scene_names = [i.strip() for i in scene_names] 176 | data_loader = [] 177 | for scene_name in scene_names: 178 | print(scene_name) 179 | sub_dataset = Dataset(scene_name=scene_name, 180 | base_path=cfg_data.DATA_PATH, 181 | main_transform=None, 182 | img_transform=img_transform, 183 | interval=frame_interval, 184 | target=target, 185 | datasetname=datasetname) 186 | sub_loader = DataLoader(sub_dataset, batch_size=cfg_data.VAL_BATCH_SIZE, 187 | collate_fn=collate_fn, num_workers=0, pin_memory=True) 188 | data_loader.append(sub_loader) 189 | return data_loader 190 | 191 | 192 | def loading_testset(datasetname, test_interval, mode='test'): 193 | 194 | datasetname = datasetname.upper() 195 | cfg_data = getattr(setting, datasetname).cfg_data 196 | 197 | Dataset = dataset.TestDataset 198 | 199 | test_loader = createValTestData(datasetname, Dataset, cfg_data,test_interval, mode=mode) 200 | 201 | restore_transform = createRestore(cfg_data.MEAN_STD) 202 | return test_loader, restore_transform -------------------------------------------------------------------------------- /datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/dataset.cpython-37.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/samplers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/__pycache__/samplers.cpython-37.pyc -------------------------------------------------------------------------------- /datasets/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os.path as osp 5 | import os 6 | from collections import defaultdict 7 | from pathlib import Path 8 | 9 | import numpy as np 10 | import torch 11 | import torch.utils.data as data 12 | 13 | from torchvision.ops.boxes import clip_boxes_to_image 14 | from PIL import Image 15 | import re 16 | 17 | class Dataset(data.Dataset): 18 | """ 19 | Dataset class. 20 | """ 21 | def __init__(self, txt_path, base_path,main_transform=None,img_transform=None,train=True, datasetname='Empty'): 22 | self.base_path = base_path 23 | self.bboxes = defaultdict(list) 24 | self.imgs_path = [] 25 | self.labels = [] 26 | self.datasetname = datasetname 27 | if train: 28 | with open(osp.join(base_path, txt_path), 'r') as txt: 29 | scene_names = txt.readlines() 30 | else: 31 | scene_names = txt_path # for val and test 32 | 33 | for i in scene_names: 34 | if datasetname == 'HT21': 35 | img_path, label= HT21_ImgPath_and_Target(base_path,i.strip()) 36 | elif datasetname == 'SENSE': 37 | img_path, label = SENSE_ImgPath_and_Target(base_path,i.strip()) 38 | else: 39 | raise NotImplementedError 40 | self.imgs_path+=img_path 41 | self.labels +=label 42 | 43 | self.is_train = train 44 | self.main_transforms = main_transform 45 | self.img_transforms = img_transform 46 | 47 | def __len__(self): 48 | return len(self.imgs_path) 49 | 50 | def __getitem__(self, index): 51 | 52 | img = Image.open(self.imgs_path[index]) 53 | if img.mode is not 'RGB': 54 | img=img.convert('RGB') 55 | 56 | target = self.labels[index].copy() 57 | 58 | if self.main_transforms is not None: 59 | img, target = self.main_transforms(img, target) 60 | if self.img_transforms is not None: 61 | img = self.img_transforms(img) 62 | 63 | return img,target 64 | 65 | def HT21_ImgPath_and_Target(base_path,i): 66 | img_path = [] 67 | labels=[] 68 | root = osp.join(base_path, i + '/img1') 69 | img_ids = os.listdir(root) 70 | img_ids.sort() 71 | gts = defaultdict(list) 72 | with open(osp.join(root.replace('img1', 'gt'), 'gt.txt'), 'r') as f: 73 | lines = f.readlines() 74 | for lin in lines: 75 | lin_list = [float(i) for i in lin.rstrip().split(',')] 76 | ind = int(lin_list[0]) 77 | gts[ind].append(lin_list) 78 | 79 | for img_id in img_ids: 80 | img_id = img_id.strip() 81 | single_path = osp.join(root, img_id) 82 | annotation = gts[int(img_id.split('.')[0])] 83 | annotation = torch.tensor(annotation,dtype=torch.float32) 84 | box = annotation[:,2:6] 85 | points = box[:,0:2] + box[:,2:4]/2 86 | 87 | sigma = torch.min(box[:,2:4], 1)[0] / 2. 88 | ids = annotation[:,1].long() 89 | img_path.append(single_path) 90 | 91 | labels.append({'scene_name':i,'frame':int(img_id.split('.')[0]), 'person_id':ids, 'points':points,'sigma':sigma}) 92 | return img_path, labels 93 | 94 | def SENSE_ImgPath_and_Target(base_path,i): 95 | img_path = [] 96 | labels=[] 97 | root = osp.join(base_path, 'video_ori', i ) 98 | img_ids = os.listdir(root) 99 | img_ids.sort() 100 | gts = defaultdict(list) 101 | with open(root.replace('video_ori', 'label_list_all')+'.txt', 'r') as f: #label_list_all_rmInvalid 102 | lines = f.readlines() 103 | for lin in lines: 104 | lin_list = [i for i in lin.rstrip().split(' ')] 105 | ind = lin_list[0] 106 | lin_list = [float(i) for i in lin_list[3:] if i != ''] 107 | assert len(lin_list) % 7 == 0 108 | gts[ind] = lin_list 109 | 110 | for img_id in img_ids: 111 | img_id = img_id.strip() 112 | single_path = osp.join(root, img_id) 113 | label = gts[img_id] 114 | box_and_point = torch.tensor(label).view(-1, 7).contiguous() 115 | 116 | points = box_and_point[:, 4:6].float() 117 | ids = (box_and_point[:, 6]).long() 118 | 119 | if ids.size(0)>0: 120 | sigma = 0.6*torch.stack([(box_and_point[:,2]-box_and_point[:,0])/2,(box_and_point[:,3]-box_and_point[:,1])/2],1).min(1)[0] #torch.sqrt(((box_and_point[:,2]-box_and_point[:,0])/2)**2 + ((box_and_point[:,3]-box_and_point[:,1])/2)**2) 121 | else: 122 | sigma = torch.tensor([]) 123 | img_path.append(single_path) 124 | 125 | labels.append({'scene_name':i,'frame':int(img_id.split('.')[0]), 'person_id':ids, 'points':points, 'sigma':sigma}) 126 | return img_path, labels 127 | 128 | 129 | class TestDataset(data.Dataset): 130 | """ 131 | Dataset class. 132 | """ 133 | def __init__(self,scene_name, base_path, main_transform=None, img_transform=None, interval=1, target=True, datasetname='Empty'): 134 | self.base_path = base_path 135 | self.target = target 136 | 137 | if self.target: 138 | if datasetname == 'HT21': 139 | self.imgs_path, self.label = HT21_ImgPath_and_Target(self.base_path, scene_name) 140 | elif datasetname == 'SENSE': 141 | self.imgs_path, self.label = SENSE_ImgPath_and_Target(self.base_path, scene_name) 142 | else: 143 | raise NotImplementedError 144 | else: 145 | if datasetname == 'HT21': 146 | self.imgs_path = self.generate_imgPath_label(scene_name) 147 | elif datasetname == 'SENSE': 148 | self.imgs_path, self.label = SENSE_ImgPath_and_Target(self.base_path, scene_name) 149 | else: 150 | raise NotImplementedError 151 | self.interval =interval 152 | 153 | self.main_transforms = main_transform 154 | self.img_transforms = img_transform 155 | self.length = len(self.imgs_path) 156 | def __len__(self): 157 | return len(self.imgs_path) - self.interval 158 | 159 | 160 | def __getitem__(self, index): 161 | index1 = index 162 | index2 = index + self.interval 163 | img1 = Image.open(self.imgs_path[index1]) 164 | img2 = Image.open(self.imgs_path[index2]) 165 | 166 | if img1.mode is not 'RGB': 167 | img1=img1.convert('RGB') 168 | if img2.mode is not 'RGB': 169 | img2 = img2.convert('RGB') 170 | if self.img_transforms is not None: 171 | img1 = self.img_transforms(img1) 172 | img2 = self.img_transforms(img2) 173 | if self.target: 174 | target1 = self.label[index1] 175 | target2 = self.label[index2] 176 | return [img1,img2], [target1,target2] 177 | 178 | return [img1,img2], None 179 | 180 | def generate_imgPath_label(self, i): 181 | 182 | img_path = [] 183 | root = osp.join(self.base_path, i +'/img1') 184 | img_ids = os.listdir(root) 185 | img_ids.sort(key=self.myc) 186 | 187 | 188 | for img_id in img_ids: 189 | img_id = img_id.strip() 190 | single_path = osp.join(root, img_id) 191 | img_path.append(single_path) 192 | 193 | return img_path 194 | 195 | def myc(self, string): 196 | p = re.compile("\d+") 197 | return int(p.findall(string)[0]) -------------------------------------------------------------------------------- /datasets/dataset_prepare/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/datasets/dataset_prepare/__init__.py -------------------------------------------------------------------------------- /datasets/dataset_prepare/functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def euclidean_dist( test_matrix, train_matrix): 3 | """ 4 | Args: 5 | x: pytorch Variable, with shape [m, d] 6 | y: pytorch Variable, with shape [n, d] 7 | Returns: 8 | dist: pytorch Variable, with shape [m, n] 9 | """ 10 | num_test = test_matrix.shape[0] 11 | num_train = train_matrix.shape[0] 12 | dists = np.zeros((num_test, num_train)) 13 | d1 = -2 * np.dot(test_matrix, train_matrix.T) # shape (num_test, num_train) 14 | d2 = np.sum(np.square(test_matrix), axis=1, keepdims=True) # shape (num_test, 1) 15 | d3 = np.sum(np.square(train_matrix), axis=1) # shape (num_train, ) 16 | dists = np.sqrt(d1 + d2 + d3) # broadcasting 17 | 18 | return dists 19 | 20 | 21 | def generate_cycle_mask( height, width): 22 | x, y = np.ogrid[-height:height + 1, -width:width + 1] 23 | # ellipse mask 24 | mask = ((x) ** 2 / (height ** 2) + (y) ** 2 / (width ** 2) <= 1) 25 | mask.dtype = 'uint8' 26 | return mask 27 | 28 | 29 | def average_del_min(data_list): 30 | if len(data_list) == 0: 31 | return 0 32 | if len(data_list) > 2: 33 | data_list.remove(min(data_list)) 34 | # data_list.remove(max(data_list)) 35 | average_data = float(sum(data_list)) / len(data_list) 36 | return average_data 37 | elif len(data_list) <= 2: 38 | average_data = float(sum(data_list)) / len(data_list) 39 | return average_data -------------------------------------------------------------------------------- /datasets/dataset_prepare/scene_label.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import json 4 | 5 | root = 'T:\CVPR2022' 6 | with open('info.json') as f: 7 | info = json.load(f) 8 | cat = ['0~50', '50~100', '100~150', '150~200', '200~400'] 9 | # for k, v in a.items(): 10 | # data.append(v) 11 | # if v in range(0,50): 12 | # number[0]+=1 13 | # elif v in range(50,100): 14 | # number[1]+=1 15 | # elif v in range(100,150): 16 | # number[2]+=1 17 | # elif v in range(150, 200): 18 | # number[3] += 1 19 | # elif v in range(200, 400): 20 | # number[4] += 1 21 | with open(osp.join(root, 'new_label.txt'),'r') as f: 22 | lines = f.readlines() 23 | new_lines = [] 24 | for i in lines: 25 | i = i.rstrip() 26 | scene_name = i.split(' ')[0] 27 | v = info[scene_name] 28 | 29 | if v in range(0,50): 30 | density_label =0 31 | elif v in range(50,100): 32 | density_label = 1 33 | elif v in range(100,150): 34 | density_label = 2 35 | elif v in range(150, 200): 36 | density_label = 3 37 | elif v in range(200, 400): 38 | density_label = 4 39 | new_i = i+' ' +str(density_label)+ '\n' 40 | new_lines.append(new_i) 41 | with open(osp.join(root,'scene_label.txt'), 'w') as f: 42 | 43 | f.writelines(new_lines) 44 | -------------------------------------------------------------------------------- /datasets/dataset_prepare/train_val_divide.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import random 4 | Root = '/media/E/ht/dataset/HT21' 5 | dataset = 'HT21' 6 | 7 | dst_imgs_path = os.path.join(Root,'images') 8 | 9 | def divide_dataset(val_ration =0.1): 10 | test_set = [] 11 | val_set= [] 12 | train_set=[] 13 | train_path = os.path.join(Root+'/train') 14 | scenes= os.listdir(train_path) 15 | 16 | for i_scene in scenes: 17 | sub_files = os.listdir(os.path.join(train_path, i_scene+'/img1')) 18 | for i in sub_files: 19 | train_set.append(os.path.join('train/'+i_scene+'/img1',i)) 20 | 21 | 22 | train_path = os.path.join(Root+'/test') 23 | scenes= os.listdir(train_path) 24 | 25 | for i_scene in scenes: 26 | sub_files = os.listdir(os.path.join(train_path, i_scene+'/img1')) 27 | for i in sub_files: 28 | test_set.append(os.path.join('test/'+i_scene+'/img1',i)) 29 | 30 | 31 | 32 | print("test_set_num:", len(train_set), 'train_val_num:',len(test_set)) 33 | 34 | # val_set = random.sample(train_set, round(val_ration * len(train_val))) 35 | print("val_set_num:", len(val_set)) 36 | train_set = set(train_set) 37 | val_set = set(val_set) 38 | train_set = train_set - val_set 39 | print("train_set_num:", len(train_set)) 40 | 41 | train_set = sorted(train_set) 42 | val_set = sorted(val_set) 43 | test_set = sorted(test_set) 44 | 45 | with open(os.path.join(Root,'train.txt'), "w") as f: 46 | for train_name in train_set: 47 | f.write(train_name+'\n') 48 | f.close() 49 | 50 | with open(os.path.join(Root,'val.txt'), "w") as f: 51 | for valid_name in val_set: 52 | f.write(valid_name+'\n') 53 | 54 | f.close() 55 | 56 | with open(os.path.join(Root,'test.txt'), "w") as f: 57 | for test_name in test_set: 58 | f.write(test_name+'\n') 59 | 60 | f.close() 61 | 62 | 63 | def divide_dataset(val_ration=0.1): 64 | test_set = [] 65 | val_set = [] 66 | train_set = [] 67 | train_path = os.path.join(Root + '/train') 68 | scenes = os.listdir(train_path) 69 | 70 | for i_scene in scenes: 71 | train_set.append(os.path.join('train/' + i_scene)) 72 | 73 | train_path = os.path.join(Root + '/test') 74 | scenes = os.listdir(train_path) 75 | 76 | for i_scene in scenes: 77 | test_set.append(os.path.join('test/' + i_scene )) 78 | 79 | print("test_set_num:", len(train_set), 'train_val_num:', len(test_set)) 80 | 81 | # val_set = random.sample(train_set, round(val_ration * len(train_val))) 82 | print("val_set_num:", len(val_set)) 83 | train_set = set(train_set) 84 | val_set = set(val_set) 85 | train_set = train_set - val_set 86 | print("train_set_num:", len(train_set)) 87 | 88 | train_set = sorted(train_set) 89 | val_set = sorted(val_set) 90 | test_set = sorted(test_set) 91 | 92 | with open(os.path.join(Root, 'train.txt'), "w") as f: 93 | for train_name in train_set: 94 | f.write(train_name + '\n') 95 | f.close() 96 | 97 | with open(os.path.join(Root, 'val.txt'), "w") as f: 98 | for valid_name in val_set: 99 | f.write(valid_name + '\n') 100 | 101 | f.close() 102 | 103 | with open(os.path.join(Root, 'test.txt'), "w") as f: 104 | for test_name in test_set: 105 | f.write(test_name + '\n') 106 | 107 | f.close() 108 | if __name__ == '__main__': 109 | divide_dataset() -------------------------------------------------------------------------------- /datasets/dataset_prepare/video_vis.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | from collections import defaultdict 4 | import numpy as np 5 | import os.path as osp 6 | def plot_boxes(cur_frame, head_map, points, ids,body_map={}, text=True): 7 | plotting_im = cur_frame.copy() 8 | for index, t_dim in enumerate(head_map): 9 | (startX, startY, endX, endY) = [int(i) for i in t_dim] 10 | cv2.rectangle(plotting_im, (startX, startY), (endX, endY), 11 | (0, 255, 0), 2) 12 | cur_centroid = tuple([(startX+endX)//2, 13 | (startY+endY)//2]) 14 | 15 | # cv2.circle(plotting_im, cur_centroid, 2, 16 | # (255, 0, 0), 2) 17 | 18 | if text: 19 | cv2.putText(plotting_im, str(ids[index]), cur_centroid, 20 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2) 21 | for index, t_dim in enumerate(points): 22 | X, Y, = [int(i) for i in t_dim] 23 | cv2.circle(plotting_im, (X, Y), 2, 24 | (0, 0, 255), 2) 25 | 26 | for index, (t_id, t_dim) in enumerate(body_map.items()): 27 | (startX, startY, endX, endY) = [int(i) for i in t_dim] 28 | cv2.rectangle(plotting_im, (startX, startY), (endX, endY), 29 | (0, 255, 0), 2) 30 | return plotting_im 31 | 32 | def CroHead(): 33 | root = '../../dataset/HT21/train' 34 | sub_scenes = os.listdir(root) 35 | print(sub_scenes) 36 | 37 | for sub_scene in sub_scenes[2:]: 38 | imgs_path = os.path.join(root, sub_scene, 'img1') 39 | imgs_id = os.listdir(imgs_path) 40 | det_path = os.path.join(imgs_path.replace('img1', 'det'), 'det.txt') 41 | 42 | bboxes = defaultdict(list) 43 | with open(det_path, 'r') as f: 44 | lines = f.readlines() 45 | # imgs_path = [i.rstrip().strip("#").lstrip() 46 | # for i in lines if i.startswith('#')] 47 | for lin in lines: 48 | lin_list = [float(i) for i in lin.rstrip().split(',')] 49 | ind = int(lin_list[0]) 50 | bboxes[ind].append(lin_list) 51 | f.close() 52 | gts = defaultdict(list) 53 | with open(os.path.join(imgs_path.replace('img1','gt'), 'gt.txt'), 'r') as f: 54 | lines = f.readlines() 55 | for lin in lines: 56 | lin_list = [float(i) for i in lin.rstrip().split(',')] 57 | ind = int(lin_list[0]) 58 | gts[ind].append(lin_list) 59 | f.close() 60 | # print(gts) 61 | # print(imgs_id) 62 | 63 | for img_id in imgs_id: 64 | img_path=os.path.join(imgs_path,img_id) 65 | labels = bboxes[int(img_id.split('.')[0])] 66 | labels_point = gts[int(img_id.split('.')[0])] 67 | annotations = np.zeros((0, 4)) 68 | points = np.zeros((0, 2)) 69 | if len(labels) == 0: 70 | label = [[0, 0, 0, 0, 0]] 71 | ignore_ar = [] 72 | for idx, label in enumerate(labels): 73 | annotation = np.zeros((1, 4)) 74 | # bbox 75 | annotation[0, 0] = label[2] # x1 76 | annotation[0, 1] = label[3] # y1 77 | annotation[0, 2] = label[4] +label[2] # x2 78 | annotation[0, 3] = label[5] +label[3]# y2 79 | annotations = np.append(annotations, annotation, axis=0) 80 | for idx, label in enumerate(labels_point): 81 | point = np.zeros((1, 2)) 82 | # bbox 83 | point[0, 0] = label[2] + label[4]/2# x1 84 | point[0, 1] = label[3] + label[5]/2 # y1 85 | points = np.append(points, point, axis=0) 86 | # print(annotations) 87 | print(len(points)) 88 | img = cv2.imread(img_path) 89 | img = plot_boxes(img,{},points) 90 | # cv2.imshow(img_id, img) 91 | save_path = img_path.replace('img1','vis') 92 | cv2.imwrite(save_path,img) 93 | # cv2.waitKey() 94 | 95 | video_path = 'E:/netdisk\SenseCrowd/video_ori' 96 | label_path = 'E:/netdisk\SenseCrowd/label_list_all_rmInvalid' 97 | import json 98 | import os 99 | from numpy import array 100 | import numpy as np 101 | import pylab as pl 102 | def SensorCrowd(): 103 | Info_dict={} 104 | time = 0 105 | for scene in sorted(os.listdir(video_path)[51:]): 106 | print(scene) 107 | gts = defaultdict(list) 108 | with open(os.path.join(label_path,scene+'.txt')) as f: 109 | lines = f.readlines() 110 | for line in lines: 111 | lin_list = [i for i in line.rstrip().split(' ')] 112 | ind = lin_list[0] 113 | lin_list = [float(i) for i in lin_list[3:] if i != ''] 114 | assert len(lin_list)%7==0 115 | gts[ind]=lin_list 116 | 117 | root = osp.join(video_path, scene) 118 | img_ids = os.listdir(root) 119 | print(img_ids) 120 | id_list = [] 121 | for img_id in img_ids: 122 | if not img_id.endswith("jpg"): 123 | continue 124 | time+=1/5 125 | img_path=osp.join(root, img_id) 126 | label = gts[img_id] 127 | box_and_point = np.array(label).reshape(-1,7) 128 | boxes = box_and_point[:,0:4] 129 | points = box_and_point[:,4:6] 130 | ids = box_and_point[:,6].astype(np.int) 131 | 132 | id_list.append(ids) 133 | 134 | img = cv2.imread(img_path) 135 | print(img_path) 136 | plot_img = plot_boxes(img, boxes, points, ids) 137 | cv2.imshow(img_id, plot_img) 138 | cv2.waitKey() 139 | all_id = np.concatenate(id_list) 140 | Info_dict.update({scene:len(set(all_id))}) 141 | 142 | 143 | print(time) 144 | with open('info.json','w') as f: 145 | json.dump(Info_dict,f) 146 | 147 | # print(Info_dict) 148 | 149 | def SENSE_train_val_test(): 150 | import random 151 | random.seed(0) 152 | scenarios = ['1_cut', ''] 153 | all_scenarios = [] 154 | with open('./info.json','r') as f: 155 | a = json.load(f) 156 | for k, v in a.items(): 157 | all_scenarios.append(k) 158 | print(len(all_scenarios)) 159 | train_val = random.sample(all_scenarios, int(len(all_scenarios)*0.6)) 160 | # print(train_val) 161 | test = list(set(all_scenarios)-set(train_val)) 162 | 163 | val = random.sample(train_val, int(0.1*len(all_scenarios))) 164 | # print(val) 165 | train = list(set(train_val)-set(val)) 166 | data = '' 167 | with open('./train.txt', 'w') as f: 168 | for i in train: data += i+'\n' 169 | f.write(data) 170 | data = '' 171 | with open('./val.txt', 'w') as f: 172 | for i in val: data += i+'\n' 173 | f.write(data) 174 | data = '' 175 | with open('./test.txt', 'w') as f: 176 | for i in test: data += i+'\n' 177 | f.write(data) 178 | 179 | 180 | print(len(train) +len(val)+len(test)) 181 | 182 | def Infor_statistics(): 183 | with open('./info.json','r') as f: 184 | a = json.load(f) 185 | data = [] 186 | number = np.zeros(5) 187 | cat = ['0~50', '50~100', '100~150', '150~200', '200~400'] 188 | for k, v in a.items(): 189 | data.append(v) 190 | if v in range(0,50): 191 | number[0]+=1 192 | elif v in range(50,100): 193 | number[1]+=1 194 | elif v in range(100,150): 195 | number[2]+=1 196 | elif v in range(150, 200): 197 | number[3] += 1 198 | elif v in range(200, 400): 199 | number[4] += 1 200 | data = np.array(data) 201 | import pdb 202 | pdb.set_trace() 203 | 204 | print(data, data.sum()) 205 | draw_hist(data) 206 | 207 | 208 | 209 | def draw_hist(lenths): 210 | data = lenths 211 | 212 | bins = np.linspace(min(data), 400, 10) 213 | bins = [0,100, 200, 400] 214 | pl.hist(data, bins) 215 | 216 | pl.xlabel('Number of people') 217 | 218 | pl.ylabel('Number of occurences') 219 | 220 | pl.title('Frequency distribution of number of people in SensorCrowd (634 Seq)') 221 | 222 | pl.show() 223 | 224 | 225 | 226 | if __name__ =='__main__': 227 | SensorCrowd() 228 | Infor_statistics() 229 | # SENSE_train_val_test() -------------------------------------------------------------------------------- /datasets/samplers.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Yaoyao Liu 3 | ## Modified from: https://github.com/Sha-Lab/FEAT 4 | ## Tianjin University 5 | ## liuyaoyao@tju.edu.cn 6 | ## Copyright (c) 2019 7 | ## 8 | ## This source code is licensed under the MIT-style license found in the 9 | ## LICENSE file in the root directory of this source tree 10 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 | """ Sampler for dataloader. """ 12 | import torch 13 | import numpy as np 14 | import random 15 | class CategoriesSampler(): 16 | """The class to generate episodic data""" 17 | def __init__(self, labels, frame_intervals, n_per): 18 | self.frame_intervals = frame_intervals 19 | self.n_sample = len(labels) 20 | self.n_batch = self.n_sample// n_per 21 | self.n_per = n_per 22 | self.scenes = [] 23 | self.scene_id = {} 24 | for idx, label in enumerate(labels): 25 | scene_name = label['scene_name'] 26 | if scene_name not in self.scene_id.keys(): 27 | self.scene_id.update({scene_name:0}) 28 | self.scene_id[scene_name]+=1 29 | self.scenes.append(scene_name) 30 | 31 | def __len__(self): 32 | return self.n_batch 33 | def __iter__(self): 34 | for i_batch in range(self.n_batch): 35 | batch = [] 36 | frame_a = torch.randperm(self.n_sample )[:self.n_per] 37 | for c in frame_a: 38 | scene_name = self.scenes[c] 39 | # print(c) 40 | tmp_intervals = random.randint(self.frame_intervals[0], 41 | min(self.scene_id[scene_name]//2,self.frame_intervals[1])) 42 | if c 0)[0] 96 | unmatched1 = torch.where(dis.min(0)[0] > 0)[0] 97 | match_gt = {'a2b': matched_a2b, 'un_a': unmatched0, 'un_b': unmatched1} 98 | img0, img1 = plot_id(pair_img[0], pair_img[1], pair_target[0]['points'], pair_target[1]['points'], match_gt) 99 | cv2.imwrite('0.png',img0.copy()) 100 | cv2.imwrite('30.png', img1.copy()) 101 | cv2.imshow('0', img0) 102 | cv2.imshow('1', img1) 103 | 104 | cv2.waitKey() 105 | 106 | if __name__ == '__main__': 107 | plot_intro() -------------------------------------------------------------------------------- /misc/KPI_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | from collections import deque 6 | 7 | class Task_KPI_Pool: 8 | def __init__(self,task_setting, maximum_sample): 9 | """ 10 | :param task_setting: {'den': ['gt', 'den'], 'match': ['gt', 'den']} 11 | :param maximum_sample: the number of the saved samples 12 | """ 13 | self.pool_size = maximum_sample 14 | self.maximum_sample = maximum_sample 15 | assert self.pool_size > 0 16 | self.current_sample = {x: 0 for x in task_setting.keys()} 17 | self.store = task_setting 18 | for key, data in self.store.items(): 19 | self.store[key] = {x: deque() for x in data} 20 | 21 | def add(self, save_dict): 22 | """ 23 | :param save_dict: {'den': {'gt':torch.tensor(10), 'den':torch.tensor(20)}, 24 | 'match': {'gt':torch.tensor(40), 'den':torch.tensor(100)}} 25 | :return: None 26 | """ 27 | for task_key, data in save_dict.items(): 28 | if self.current_sample[task_key]< self.pool_size: 29 | self.current_sample[task_key] = self.current_sample[task_key] + 1 30 | for data_key, data_val in data.items(): 31 | self.store[task_key][data_key].append(data_val) 32 | else: 33 | for data_key, data_val in data.items(): 34 | self.store[task_key][data_key].popleft() 35 | self.store[task_key][data_key].append(data_val) 36 | 37 | def return_feature(self,cls_group): 38 | return_features = [] 39 | return_labels = [] 40 | 41 | return return_features, return_labels 42 | 43 | def query(self): 44 | task_KPI = {} 45 | for task_key in self.store: 46 | data_keys = list(self.store[task_key].keys()) 47 | 48 | gt_list = list(self.store[task_key][data_keys[0]]) 49 | correct_list = list(self.store[task_key][data_keys[1]]) 50 | gt_sum = torch.tensor(gt_list).sum() 51 | 52 | correct_sum = torch.tensor(correct_list).sum() 53 | 54 | 55 | task_KPI.update({task_key:correct_sum/(gt_sum+1e-8)}) 56 | 57 | return task_KPI 58 | 59 | if __name__ == '__main__': 60 | import random 61 | 62 | index = np.random.randint(0, 3, size=30) 63 | # index = random.sample(range(0, 54), 54) 64 | feature = torch.rand(30,3).cuda() 65 | target = torch.Tensor(index).cuda().long() 66 | pred = torch.randn(30,3).cuda() 67 | task = {'den': ['gt', 'den'], 'match': ['gt', 'den']} 68 | save_dict0 = {'den': {'gt':torch.tensor(10), 'den':torch.tensor(20)}, 'match': {'gt':torch.tensor(40), 'den':torch.tensor(100)}} 69 | save_dict1 = {'den': {'gt':torch.tensor(20.6), 'den':torch.tensor(30.8)}, 'match': {'gt':torch.tensor(50), 'den':torch.tensor(120.4)}} 70 | print(task.keys()) 71 | pool = Task_KPI_Pool(task,100) 72 | pool.add(save_dict0) 73 | pool.add(save_dict1) 74 | 75 | print(pool.query()) 76 | 77 | import pdb 78 | 79 | pdb.set_trace() -------------------------------------------------------------------------------- /misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__init__.py -------------------------------------------------------------------------------- /misc/__pycache__/KPI_pool.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/KPI_pool.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/dot_ops.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/dot_ops.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/get_bbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/get_bbox.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/inflation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/inflation.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/layer.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/nms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/nms.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /misc/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/misc/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /misc/cal_mean.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from PIL import Image 3 | import numpy as np 4 | import os 5 | 6 | def make_parser(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--trainDataPath', type=str, default='/media/D/GJY/Dataset/VisDroneCC_1080P_mod16/img', 9 | help='absolute path to your data path') 10 | return parser 11 | 12 | if __name__ == '__main__': 13 | args = make_parser().parse_args() 14 | 15 | imgs_list = [] 16 | 17 | for i_img, img_name in enumerate(os.listdir(args.trainDataPath)): 18 | if i_img % 100 == 0: 19 | print( i_img ) 20 | img = Image.open(os.path.join(args.trainDataPath, img_name)) 21 | if img.mode == 'L': 22 | img = img.convert('RGB') 23 | 24 | img = np.array(img.resize((1024,768),Image.BILINEAR)) 25 | 26 | imgs_list.append(img) 27 | 28 | imgs = np.array(imgs_list).astype(np.float32)/255. 29 | red = imgs[:,:,:,0] 30 | green = imgs[:,:,:,1] 31 | blue = imgs[:,:,:,2] 32 | 33 | 34 | print("means: [{}, {}, {}]".format(np.mean(red),np.mean(green),np.mean(blue))) 35 | print("stdevs: [{}, {}, {}]".format(np.std(red),np.std(green),np.std(blue))) 36 | -------------------------------------------------------------------------------- /misc/dot_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch. nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import math 8 | 9 | class Gaussian(nn.Module): 10 | def __init__(self, in_channels, sigmalist, kernel_size=64, stride=1, padding=0, froze=True): 11 | super(Gaussian, self).__init__() 12 | out_channels = len(sigmalist) * in_channels 13 | # gaussian kernel 14 | mu = kernel_size // 2 15 | gaussFuncTemp = lambda x: (lambda sigma: math.exp(-(x - mu) ** 2 / float(2 * sigma ** 2))) 16 | gaussFuncs = [gaussFuncTemp(x) for x in range(kernel_size)] 17 | windows = [] 18 | for sigma in sigmalist: 19 | gauss = torch.Tensor([gaussFunc(sigma) for gaussFunc in gaussFuncs]) 20 | gauss /= gauss.sum() 21 | _1D_window = gauss.unsqueeze(1) 22 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 23 | window = Variable(_2D_window.expand(in_channels, 1, kernel_size, kernel_size).contiguous()) 24 | windows.append(window) 25 | kernels = torch.stack(windows) 26 | kernels = kernels.permute(1, 0, 2, 3, 4) 27 | weight = kernels.reshape(out_channels, in_channels, kernel_size, kernel_size) 28 | 29 | self.gkernel = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, groups=in_channels, bias=False) 30 | self.gkernel.weight = torch.nn.Parameter(weight) 31 | 32 | if froze: self.frozePara() 33 | 34 | def forward(self, dotmaps): 35 | gaussianmaps = self.gkernel(dotmaps) 36 | return gaussianmaps 37 | 38 | def frozePara(self): 39 | for para in self.parameters(): 40 | para.requires_grad = False 41 | 42 | 43 | class SumPool2d(nn.Module): 44 | def __init__(self, kernel_size): 45 | super(SumPool2d, self).__init__() 46 | self.avgpool = nn.AvgPool2d(kernel_size, stride=1, padding=kernel_size // 2) 47 | if type(kernel_size) is not int: 48 | self.area = kernel_size[0] * kernel_size[1] 49 | else: 50 | self.area = kernel_size * self.kernel_size 51 | 52 | def forward(self, dotmap): 53 | return self.avgpool(dotmap) * self.area -------------------------------------------------------------------------------- /misc/evaluation_code.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import sys 5 | import math 6 | 7 | #MAE = lambda 8 | 9 | errorcode = 'WA' 10 | 11 | class AverageMeter(object): 12 | """Computes and stores the average and current value""" 13 | 14 | def __init__(self): 15 | self.reset() 16 | 17 | def reset(self): 18 | self.maeSum = 0 19 | self.mseSum = 0 20 | self.naeSum = 0 21 | self.count = 0 22 | self.naecount = 0 23 | 24 | def update(self, mae, mse, nae): 25 | 26 | self.maeSum += mae 27 | self.mseSum += mse 28 | if nae >= 0: 29 | self.naeSum += nae 30 | self.naecount += 1 31 | self.count += 1 32 | 33 | def output(self): 34 | if self.count > 0: 35 | mae = self.maeSum / self.count 36 | mse = math.sqrt(self.mseSum / self.count) 37 | else: 38 | mae, mse = -1, -1 39 | nae = self.naeSum / self.naecount if self.naecount > 0 else -1 40 | return mae, mse, nae 41 | 42 | def dictout(self): 43 | mae, mse, nae = self.output() 44 | return dict( 45 | mae = mae, 46 | mse = mse, 47 | nae = nae 48 | ) 49 | 50 | 51 | def readoutput(outtxt): 52 | output = {} 53 | with open(outtxt) as f: 54 | for line in f.readlines(): 55 | line = line.strip().split(' ') 56 | if len(line) == 2: 57 | idx, score = int(line[0]), float(line[1]) 58 | output[idx] = score 59 | return output 60 | 61 | def readtarget(tartxt): 62 | target = {} 63 | with open(tartxt) as f: 64 | for line in f.readlines(): 65 | line = line.strip().split(' ') 66 | if len(line) == 4: 67 | idx, illum, level = map(int, line[:3]) 68 | score = float(line[3]) 69 | target[idx] = dict( 70 | illum = illum, 71 | level = level, 72 | gt_count = score 73 | ) 74 | return target 75 | 76 | def judge(outtxt, tartxt): 77 | output = readoutput(outtxt) 78 | target = readtarget(tartxt) 79 | for key in target.keys(): 80 | if key in output: 81 | target[key]["pd_count"] = output[key] 82 | else: 83 | return errorcode 84 | 85 | totalJudger = AverageMeter() 86 | levelJudger = [AverageMeter() for _ in range(5)] 87 | illumJudger = [AverageMeter() for _ in range(4)] 88 | 89 | for _, score in target.items(): 90 | # get data 91 | illum = score['illum'] 92 | level = score['level'] 93 | gt_count = score['gt_count'] 94 | pd_count = score['pd_count'] 95 | 96 | # process 97 | mae = abs(pd_count - gt_count) 98 | mse = mae ** 2 99 | nae = mae / gt_count if gt_count > 0 else -1 100 | 101 | # save 102 | totalJudger.update(mae, mse, nae) 103 | levelJudger[level].update(mae, mse, nae) 104 | illumJudger[illum].update(mae, mse, nae) 105 | 106 | outputdict = { 107 | 'overall': totalJudger.dictout(), 108 | 'levels': [judger.dictout() for judger in levelJudger], 109 | 'illums': [judger.dictout() for judger in illumJudger], 110 | } 111 | outputdict['mmae'] = dict( 112 | mmae_level = sum(result['mae'] for result in outputdict['levels']) / len(outputdict['levels']), 113 | mmae_illum = sum(result['mae'] for result in outputdict['illums']) / len(outputdict['illums']) 114 | ) 115 | 116 | return outputdict 117 | 118 | 119 | if __name__ == '__main__': 120 | target = {} 121 | if len(sys.argv) != 3: 122 | print(errorcode) 123 | print(judge(sys.argv[1], sys.argv[2])) -------------------------------------------------------------------------------- /misc/get_bbox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from .nms import * 5 | import torch.nn as nn 6 | import cv2 7 | import pdb 8 | def local_maximum(sub_pre,sub_bin, scale_factor=1.): 9 | sub_pre = torch.from_numpy(sub_pre).unsqueeze(0).unsqueeze(0) 10 | max_value = torch.max(sub_pre) 11 | 12 | # kernel = [[1/9., 1/9., 1/9.], [1/9., 1/9., 1/9.], [1/9., 1/9.,1/9.]] 13 | # kernel = torch.FloatTensor(kernel).unsqueeze(0).unsqueeze(0) 14 | # weight = nn.Parameter(data=kernel, requires_grad=False) 15 | # sub_pre = F.conv2d(sub_pre, weight, stride=1,padding=1) 16 | 17 | keep = nn.functional.max_pool2d(sub_pre, (3, 3), stride=1, padding=1) 18 | keep = (keep == sub_pre).float() 19 | sub_pre = keep * sub_pre 20 | 21 | sub_pre[sub_pre < 0.5 * max_value] = 0 22 | sub_pre[sub_pre > 0] = 1 23 | count = int(torch.sum(sub_pre).item()) 24 | 25 | kpoint = sub_pre.data.squeeze(0).squeeze(0).cpu().numpy() 26 | 27 | points = np.array(list(zip(np.nonzero(kpoint)[1], np.nonzero(kpoint)[0]))).astype(np.float32) 28 | # distance_map = cv2.distanceTransform(sub_bin, cv2.DIST_L1,3) 29 | 30 | boxes = np.zeros((len(points), 5)).astype(np.float32) 31 | for i in range(len(points)): 32 | x, y = points[i] 33 | length = scale_factor # max(distance_map[int(y), int(x)], scale_factor) 34 | boxes[i] = [x - length, y - length, 2*length, 2*length, 4*length*length] 35 | pre_data = {'num': count, 'points': points, 'boxes': boxes} 36 | return pre_data 37 | 38 | def Noise_box_detection(recs): 39 | maintain_list = [] 40 | recs[:, 2] = recs[:, 0] + recs[:, 2] 41 | recs[:, 3] = recs[:, 1] + recs[:, 3] 42 | length = len(recs) 43 | 44 | for i in range(length): 45 | if i < length - 1: 46 | j = i + 1 47 | index = (recs[i][0] >= recs[j:][:, 0]) & (recs[i][1] >= recs[j:][:, 1]) \ 48 | & (recs[i][2] <= recs[j:][:, 2]) & (recs[i][3] <= recs[j:][:, 3]) 49 | index = np.where(index == True)[0] 50 | if index.size > 0: 51 | continue 52 | else: 53 | maintain_list.append(i) 54 | else: 55 | maintain_list.append(i) 56 | return maintain_list 57 | 58 | def get_boxInfo_from_Binar_map(pred_map , threshold = 0.3, min_area=4,scale_factor = 1., polish =False): 59 | # import pdb 60 | # pdb.set_trace() 61 | a = torch.ones_like(pred_map) 62 | b = torch.zeros_like(pred_map) 63 | Binar_map = torch.where(pred_map >= threshold, a, b).cpu().numpy() 64 | 65 | Binar_map = Binar_map.squeeze().astype(np.uint8) 66 | pred_map = pred_map.squeeze() 67 | cnt, labels, stats, centroids = cv2.connectedComponentsWithStats(Binar_map, connectivity=4) # centriod (w,h) 68 | 69 | boxes = stats[1:, :].astype(np.float32) 70 | points = centroids[1:, :].astype(np.float32) 71 | index = (boxes[:, 4] >= min_area) 72 | boxes = boxes[index] 73 | points = points[index] 74 | 75 | order = np.argsort(boxes[:, 4]) 76 | points = points[order] 77 | boxes = boxes[order] 78 | 79 | maintain_list = Noise_box_detection(boxes.copy()) 80 | boxes = boxes[maintain_list] 81 | points = points[maintain_list] 82 | 83 | if polish: 84 | boxes_app = [] 85 | points_app = [] 86 | for id in range(len(boxes)): 87 | w_s, h_s, w, h, area = boxes[id] 88 | sub_pre = pred_map[int(h_s):int(h_s) + int(h), int(w_s):int(w_s) + int(w)].copy() 89 | sub_bin = Binar_map[int(h_s):int(h_s) + int(h), int(w_s):int(w_s) + int(w)].copy() 90 | iou = boxes[id, 4] / (w * h) 91 | ration = h / w 92 | if area>20: 93 | if ration > 2 or ration < 0.5 or iou < 0.75: 94 | pred_data = local_maximum(sub_pre,sub_bin,scale_factor) 95 | if pred_data['num'] >= 1: 96 | pred_data['boxes'][:, 0] += w_s 97 | pred_data['boxes'][:, 1] += h_s 98 | pred_data['points'][:, 0] += int(w_s) 99 | pred_data['points'][:, 1] += int(h_s) 100 | boxes[id, :] = pred_data['boxes'][0, :] 101 | points[id, :] = pred_data['points'][0, :] 102 | 103 | for k in range(1, pred_data['num']): 104 | boxes_app.append(pred_data['boxes'][k, :]) 105 | points_app.append(pred_data['points'][k, :]) 106 | 107 | # print('original:{}, add_boxes:{}, final_boxes:{}'.format(len(boxes), len(boxes_app), len(boxes) + len(boxes_app))) 108 | 109 | if len(boxes_app) > 0: 110 | boxes = np.concatenate((boxes, np.array(boxes_app))) 111 | points = np.concatenate((points, np.array(points_app).astype(np.int32))) 112 | new_boxes = np.zeros((len(points), 4)).astype(np.float32) 113 | scores = np.zeros((len(points), 1)).astype(np.float32) 114 | 115 | # for i in range(len(boxes)): 116 | # x_s, y_s, w, h, area = boxes[i] 117 | # x, y = points[i] 118 | # # _scale = scale_map[y_s:y_s + h, x_s:x_s + w] 119 | # # _mask = Binar_map[y_s:y_s + h, x_s:x_s + w] 120 | # _pred = pred_map[int(y_s):int(y_s) + int(h), int(x_s):int(x_s) + int(w)] 121 | # score =pred_map[int(y),int(x)] 122 | # sigma = np.sqrt(w ** 2 + h ** 2) 123 | # sin = h / sigma 124 | # cos = w / sigma 125 | # 126 | # scale = max( scale_map[int(y),int(x)], sigma / 2) #if index.sum()>0 else sigma / 2 #_scale[index].max() 127 | # 128 | # de_h, de_w = scale * sin, scale * cos 129 | # new_x_s, new_x_e = x - de_w, x + de_w 130 | # new_y_s, new_y_e = y - de_h, y + de_h 131 | # new_boxes[i] = [new_x_s, new_y_s, new_x_e, new_y_e] 132 | # scores[i] = score 133 | 134 | batch_id = np.zeros((len(points), 1)).astype(np.float32) 135 | boxes[:, 2] = boxes[:, 0] + boxes[:, 2] 136 | boxes[:, 3] = boxes[:, 1] + boxes[:, 3] 137 | boxes = boxes[:, :4] 138 | boxes = np.hstack((batch_id,boxes / scale_factor)) 139 | boxes = torch.from_numpy(boxes) 140 | # boxes = np.hstack((boxes/scale_factor,scores)) 141 | # new_boxes = np.hstack((new_boxes/scale_factor, scores)) 142 | 143 | if polish: 144 | keep = nms(new_boxes,thresh=0.3) 145 | points = points[keep] 146 | boxes = boxes[keep] 147 | new_boxes = new_boxes[keep] 148 | pred_data = {'num': len(points), 'points': points/scale_factor, 'rois': boxes, 'new_boxes': new_boxes} 149 | return pred_data 150 | 151 | def multiscale_nms(pred_data): 152 | 153 | base_boxes = pred_data[1]['boxes'] 154 | base_points = pred_data[1]['points'] 155 | base_new_boxes = pred_data[1]['new_boxes'] 156 | 157 | for scale in pred_data.keys(): 158 | if scale == 1: 159 | continue 160 | boxes = pred_data[scale]['boxes'] 161 | points = pred_data[scale]['points'] 162 | new_boxes = pred_data[scale]['new_boxes'] 163 | 164 | base_boxes= np.concatenate((base_boxes, boxes)) 165 | base_points= np.concatenate((base_points, points)) 166 | base_new_boxes = np.concatenate((base_new_boxes, new_boxes)) 167 | 168 | # order = np.argsort((base_new_boxes[:, 3]-base_new_boxes[:, 1])*(base_new_boxes[:, 2]-base_new_boxes[:, 0])) 169 | # base_points = base_points[order] 170 | # base_boxes = base_boxes[order] 171 | # base_new_boxes = base_new_boxes[order] 172 | # # 173 | # keep = Noise_box_detection(base_new_boxes.copy()) 174 | # base_points = base_points[keep] 175 | # base_boxes = base_boxes[keep] 176 | # base_new_boxes = base_new_boxes[keep] 177 | 178 | keep = nms(base_new_boxes,thresh=0.2) 179 | base_points = base_points[keep] 180 | base_boxes = base_boxes[keep] 181 | base_new_boxes = base_new_boxes[keep] 182 | 183 | pred_data = {'num': len(base_points), 'points': base_points , 'rois': base_boxes, 'new_boxes': base_new_boxes} 184 | return pred_data 185 | -------------------------------------------------------------------------------- /misc/inflation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import pdb 5 | import torch. nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | import math 9 | import numpy 10 | 11 | class inflation(nn.Module): 12 | def __init__(self,K=15,stride=1,padding=None): 13 | super(inflation,self).__init__() 14 | weight = numpy.zeros((K,K)) 15 | t = (K-1)/2 16 | for i in range(K): 17 | for j in range(K): 18 | if abs(i-t)+abs(j-t)<=t: 19 | weight[i,j] = 1 20 | if padding is None: 21 | padding = K//2 22 | self.ikernel = nn.Conv2d(1,1,K,stride=stride,padding=padding,bias=False) 23 | self.ikernel.weight = torch.nn.Parameter(torch.from_numpy(weight.reshape(1,1,K,K).astype(numpy.float32))) 24 | for para in self.parameters(): 25 | para.requires_grad = False 26 | 27 | def forward(self,x): 28 | x = x.unsqueeze(0) 29 | x = x.unsqueeze(0) 30 | x = self.ikernel(x) 31 | return x.squeeze() 32 | 33 | class Expend(torch.nn.Module): 34 | def __init__(self): 35 | super(Expend, self).__init__() 36 | self.ex = torch.nn.AvgPool2d(15,stride=1,padding=7) 37 | for para in self.parameters(): 38 | para.requires_grad = False 39 | 40 | def forward(self, x): 41 | x = x.unsqueeze(0) 42 | x = self.ex(x) 43 | return x.squeeze() -------------------------------------------------------------------------------- /misc/layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .dot_ops import Gaussian, SumPool2d 5 | import scipy.spatial 6 | import scipy.ndimage 7 | import numpy as np 8 | import torch.nn.functional as F 9 | import cv2 as cv 10 | class Point2Mask(object): 11 | def __init__(self, max_kernel_size=7): 12 | 13 | self.max_kernel_size = max_kernel_size 14 | def __call__(self, target, pre_map): 15 | b,c,h,w = pre_map.size() 16 | mask_map = torch.zeros_like(pre_map) 17 | for idx, sub_target in enumerate(target): 18 | points = sub_target["points"] 19 | # import pdb 20 | # pdb.set_trace() 21 | count = points.shape[0] 22 | if count==0: 23 | continue 24 | elif count==1: 25 | pt = points[0].astype(np.int32) 26 | kernel_size = self.max_kernel_size 27 | up = max(pt[1] - kernel_size, 0) 28 | down = min(pt[1] + kernel_size + 1, h) 29 | left = max(pt[0] - kernel_size, 0) 30 | right = min(pt[0] + kernel_size + 1, w) 31 | 32 | mask_map[idx, 0, up:down + 1, left:right + 1] = 1 33 | else: 34 | leafsize = 2048 35 | tree = scipy.spatial.KDTree(points.copy(), leafsize=leafsize) 36 | distances, locations = tree.query(points, k=2) 37 | for i, pt in enumerate(points): 38 | if pt[0] >= w or pt[1] > h: 39 | continue 40 | pt = pt.astype(np.int32) 41 | kernel_size = (distances[i][1]) * 0.25 42 | kernel_size = min(self.max_kernel_size, int(kernel_size + 0.5)) 43 | up = max(pt[1] - kernel_size,0) 44 | down = min(pt[1] + kernel_size+1,h) 45 | left = max(pt[0] - kernel_size,0) 46 | right = min(pt[0] + kernel_size+1,w) 47 | mask_map[idx,0, up:down+1, left:right+1]=1 48 | 49 | # density_nn[np.where(pnt_density > 0)] = distances[i][1] 50 | # mask_map += pnt_density 51 | # density_std[np.where(pnt_density > 0)] = sigma 52 | # mask_map = mask_map.astype(np.uint8) * 255 53 | # cv.imwrite('../dataset/mask_vis/mask_vis.png', mask_map[0][0].cpu().numpy(), [cv.IMWRITE_PNG_BILEVEL, 1]) 54 | # import pdb 55 | # pdb.set_trace() 56 | # print(mask_map.sum()) 57 | return mask_map 58 | class Gaussianlayer(nn.Module): 59 | def __init__(self, sigma=None, kernel_size=15): 60 | super(Gaussianlayer, self).__init__() 61 | if sigma == None: 62 | sigma = [4] 63 | self.gaussian = Gaussian(1, sigma, kernel_size=kernel_size, padding=kernel_size//2, froze=True) 64 | 65 | def forward(self, dotmaps): 66 | denmaps = self.gaussian(dotmaps) 67 | return denmaps 68 | 69 | 70 | class Conv2d(nn.Module): 71 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, NL='relu', same_padding=False, bn=False, dilation=1): 72 | super(Conv2d, self).__init__() 73 | padding = int((kernel_size - 1) // 2) if same_padding else 0 74 | self.conv = [] 75 | if dilation==1: 76 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, dilation=dilation) 77 | else: 78 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=dilation, dilation=dilation) 79 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0, affine=True) if bn else None 80 | if NL == 'relu' : 81 | self.relu = nn.ReLU(inplace=True) 82 | elif NL == 'prelu': 83 | self.relu = nn.PReLU() 84 | else: 85 | self.relu = None 86 | 87 | def forward(self, x): 88 | x = self.conv(x) 89 | if self.bn is not None: 90 | x = self.bn(x) 91 | if self.relu is not None: 92 | x = self.relu(x) 93 | return x 94 | 95 | 96 | class FC(nn.Module): 97 | def __init__(self, in_features, out_features, NL='relu'): 98 | super(FC, self).__init__() 99 | self.fc = nn.Linear(in_features, out_features) 100 | if NL == 'relu' : 101 | self.relu = nn.ReLU(inplace=True) 102 | elif NL == 'prelu': 103 | self.relu = nn.PReLU() 104 | else: 105 | self.relu = None 106 | 107 | def forward(self, x): 108 | x = self.fc(x) 109 | if self.relu is not None: 110 | x = self.relu(x) 111 | return x 112 | 113 | class convDU(nn.Module): 114 | 115 | def __init__(self, 116 | in_out_channels=2048, 117 | kernel_size=(9,1) 118 | ): 119 | super(convDU, self).__init__() 120 | self.conv = nn.Sequential( 121 | nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)), 122 | nn.ReLU(inplace=True) 123 | ) 124 | 125 | def forward(self, fea): 126 | n, c, h, w = fea.size() 127 | 128 | fea_stack = [] 129 | for i in range(h): 130 | i_fea = fea.select(2, i).resize(n,c,1,w) 131 | if i == 0: 132 | fea_stack.append(i_fea) 133 | continue 134 | fea_stack.append(self.conv(fea_stack[i-1])+i_fea) 135 | # pdb.set_trace() 136 | # fea[:,i,:,:] = self.conv(fea[:,i-1,:,:].expand(n,1,h,w))+fea[:,i,:,:].expand(n,1,h,w) 137 | 138 | 139 | for i in range(h): 140 | pos = h-i-1 141 | if pos == h-1: 142 | continue 143 | fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos] 144 | # pdb.set_trace() 145 | fea = torch.cat(fea_stack, 2) 146 | return fea 147 | 148 | class convLR(nn.Module): 149 | 150 | def __init__(self, 151 | in_out_channels=2048, 152 | kernel_size=(1,9) 153 | ): 154 | super(convLR, self).__init__() 155 | self.conv = nn.Sequential( 156 | nn.Conv2d(in_out_channels, in_out_channels, kernel_size, stride=1, padding=((kernel_size[0]-1)//2,(kernel_size[1]-1)//2)), 157 | nn.ReLU(inplace=True) 158 | ) 159 | 160 | def forward(self, fea): 161 | n, c, h, w = fea.size() 162 | 163 | fea_stack = [] 164 | for i in range(w): 165 | i_fea = fea.select(3, i).resize(n,c,h,1) 166 | if i == 0: 167 | fea_stack.append(i_fea) 168 | continue 169 | fea_stack.append(self.conv(fea_stack[i-1])+i_fea) 170 | 171 | for i in range(w): 172 | pos = w-i-1 173 | if pos == w-1: 174 | continue 175 | fea_stack[pos] = self.conv(fea_stack[pos+1])+fea_stack[pos] 176 | 177 | 178 | fea = torch.cat(fea_stack, 3) 179 | return fea -------------------------------------------------------------------------------- /misc/modelsummary.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import logging 14 | from collections import namedtuple 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 20 | """ 21 | :param model: 22 | :param input_tensors: 23 | :param item_length: 24 | :return: 25 | """ 26 | 27 | summary = [] 28 | 29 | ModuleDetails = namedtuple( 30 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 31 | hooks = [] 32 | layer_instances = {} 33 | 34 | def add_hooks(module): 35 | 36 | def hook(module, input, output): 37 | class_name = str(module.__class__.__name__) 38 | 39 | instance_index = 1 40 | if class_name not in layer_instances: 41 | layer_instances[class_name] = instance_index 42 | else: 43 | instance_index = layer_instances[class_name] + 1 44 | layer_instances[class_name] = instance_index 45 | 46 | layer_name = class_name + "_" + str(instance_index) 47 | 48 | params = 0 49 | 50 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 51 | class_name.find("Linear") != -1: 52 | for param_ in module.parameters(): 53 | params += param_.view(-1).size(0) 54 | 55 | flops = "Not Available" 56 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 57 | flops = ( 58 | torch.prod( 59 | torch.LongTensor(list(module.weight.data.size()))) * 60 | torch.prod( 61 | torch.LongTensor(list(output.size())[2:]))).item() 62 | elif isinstance(module, nn.Linear): 63 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 64 | * input[0].size(1)).item() 65 | 66 | if isinstance(input[0], list): 67 | input = input[0] 68 | if isinstance(output, list): 69 | output = output[0] 70 | 71 | summary.append( 72 | ModuleDetails( 73 | name=layer_name, 74 | input_size=list(input[0].size()), 75 | output_size=list(output.size()), 76 | num_parameters=params, 77 | multiply_adds=flops) 78 | ) 79 | 80 | if not isinstance(module, nn.ModuleList) \ 81 | and not isinstance(module, nn.Sequential) \ 82 | and module != model: 83 | hooks.append(module.register_forward_hook(hook)) 84 | 85 | model.eval() 86 | model.apply(add_hooks) 87 | 88 | space_len = item_length 89 | 90 | model(*input_tensors) 91 | for hook in hooks: 92 | hook.remove() 93 | 94 | details = '' 95 | if verbose: 96 | details = "Model Summary" + \ 97 | os.linesep + \ 98 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 99 | ' ' * (space_len - len("Name")), 100 | ' ' * (space_len - len("Input Size")), 101 | ' ' * (space_len - len("Output Size")), 102 | ' ' * (space_len - len("Parameters")), 103 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 104 | + os.linesep + '-' * space_len * 5 + os.linesep 105 | 106 | params_sum = 0 107 | flops_sum = 0 108 | for layer in summary: 109 | params_sum += layer.num_parameters 110 | if layer.multiply_adds != "Not Available": 111 | flops_sum += layer.multiply_adds 112 | if verbose: 113 | details += "{}{}{}{}{}{}{}{}{}{}".format( 114 | layer.name, 115 | ' ' * (space_len - len(layer.name)), 116 | layer.input_size, 117 | ' ' * (space_len - len(str(layer.input_size))), 118 | layer.output_size, 119 | ' ' * (space_len - len(str(layer.output_size))), 120 | layer.num_parameters, 121 | ' ' * (space_len - len(str(layer.num_parameters))), 122 | layer.multiply_adds, 123 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 124 | + os.linesep + '-' * space_len * 5 + os.linesep 125 | 126 | details += os.linesep \ 127 | + "Total Parameters: {:,}".format(params_sum) \ 128 | + os.linesep + '-' * space_len * 5 + os.linesep 129 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 130 | + os.linesep + '-' * space_len * 5 + os.linesep 131 | details += "Number of Layers" + os.linesep 132 | for layer in layer_instances: 133 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 134 | 135 | return details -------------------------------------------------------------------------------- /misc/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | ''' 8 | nms.py: CPU implementation of non maximal supression modified from Ross's code. 9 | Authors : svp 10 | 11 | Modified from https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py 12 | to accommodate a corner case which handles one box lying completely inside another. 13 | ''' 14 | import numpy as np 15 | 16 | 17 | def is_square(inter, areas): 18 | truth_val = np.logical_not((np.logical_and((np.sqrt(areas) ** 2 == areas), (np.sqrt(inter) ** 2 == inter)))) 19 | return np.float32(truth_val) 20 | 21 | 22 | def nms(dets, thresh): 23 | x1 = dets[:, 0] 24 | y1 = dets[:, 1] 25 | x2 = dets[:, 2] 26 | y2 = dets[:, 3] 27 | scores = dets[:, 4] 28 | 29 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 30 | 31 | order = scores.argsort()[::-1] 32 | 33 | keep = [] 34 | while order.size > 0: 35 | i = order[0] 36 | keep.append(i) 37 | xx1 = np.maximum(x1[i], x1[order[1:]]) 38 | yy1 = np.maximum(y1[i], y1[order[1:]]) 39 | xx2 = np.minimum(x2[i], x2[order[1:]]) 40 | yy2 = np.minimum(y2[i], y2[order[1:]]) 41 | 42 | w = np.maximum(0.0, xx2 - xx1 + 1) 43 | h = np.maximum(0.0, yy2 - yy1 + 1) 44 | inter = w * h 45 | 46 | remove_index_1 = np.where(areas[i] == inter) # i is included by others 47 | remove_index_2 = np.where(areas[order[1:]] == inter) # i include pthers 48 | 49 | ovr = 1 / 3 * inter / (areas[i] + areas[order[1:]] - inter) \ 50 | + 1 / 3 * inter / areas[i] \ 51 | + 1 / 3 * inter / areas[order[1:]] 52 | 53 | # ovr = inter / (areas[i] + areas[order[1:]] - inter)* np.maximum (areas[order[1:]]/areas[i], areas[i]/areas[order[1:]]) 54 | 55 | ovr[remove_index_1] = 1.0 56 | ovr[remove_index_2] = 1.0 57 | inds = np.where(ovr <= thresh)[0] # get the index(a series) 58 | order = order[inds + 1] 59 | 60 | return keep 61 | 62 | 63 | if __name__ == '__main__': 64 | a = np.array([[1, 2, 4, 5, 0.9], [1, 2, 3, 4, 0.99], [8, 2, 9, 4, 0.99]]) 65 | keep = nms(a, 0.2) 66 | print(keep) 67 | np.where(np.array([78, 3, 4, 54, 3, ]) > 10) -------------------------------------------------------------------------------- /model/MatchTool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__init__.py -------------------------------------------------------------------------------- /model/MatchTool/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /model/MatchTool/__pycache__/compute_metric.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/compute_metric.cpython-37.pyc -------------------------------------------------------------------------------- /model/MatchTool/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/MatchTool/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /model/MatchTool/compute_metric.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | from scipy import spatial as ss 5 | import pdb 6 | 7 | import cv2 8 | from .utils import hungarian,read_pred_and_gt,AverageMeter,AverageCategoryMeter 9 | 10 | # gt_file = 'val_gt_loc.txt' 11 | # pred_file = 'TinyFaces_loc_0.8_0.3.txt' 12 | 13 | flagError = False 14 | id_std = [i for i in range(3110,3610,1)] 15 | id_std[59] = 3098 16 | num_classes = 6 17 | 18 | 19 | def compute_metrics(dist_matrix,match_matrix,pred_num,sigma): 20 | for i_pred_p in range(pred_num): 21 | pred_dist = dist_matrix[i_pred_p,:] 22 | match_matrix[i_pred_p,:] = pred_dist<=sigma 23 | 24 | tp, assign = hungarian(match_matrix) 25 | fn_gt_index = np.array(np.where(assign.sum(0)==0))[0] 26 | 27 | fp_pred_index = np.array(np.where(assign.sum(1)==0))[0] 28 | 29 | # tp_pred_index = np.array(np.where(assign.sum(1)==1))[0] 30 | # tp_gt_index = np.array(np.where(assign.sum(0)==1))[0] 31 | 32 | tp_pred_index, tp_gt_index = np.where(assign==1) 33 | 34 | tp = tp_pred_index.shape[0] 35 | fp = fp_pred_index.shape[0] 36 | fn = fn_gt_index.shape[0] 37 | 38 | # 39 | # import pdb 40 | # pdb.set_trace() 41 | return tp,fp,fn,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index 42 | 43 | 44 | 45 | 46 | def associate_pred2gt_point(pred_data, gt_data): 47 | # import pdb 48 | # pdb.set_trace() 49 | pred_p = pred_data['points'].cpu().numpy() 50 | gt_p = gt_data['points'].cpu().numpy() 51 | gt_sigma = gt_data['sigma'].cpu().numpy() 52 | if gt_p.shape[0]>0: 53 | gt_data = {'num':gt_p.shape[0], 'points':gt_p,'sigma':gt_sigma} 54 | else: 55 | gt_data = {'num':0, 'points':[],'sigma':[]} 56 | 57 | tp_l,fp_l,fn_l = [0,0,0] 58 | tp_pred_index,tp_gt_index = [],[] 59 | if gt_data['num'] ==0 and pred_p.shape[0] !=0: 60 | fp_pred_index = np.array(range(pred_p.shape[0])) 61 | fp_l = fp_pred_index.shape[0] 62 | 63 | if pred_p.shape[0] ==0 and gt_data['num'] !=0: 64 | gt_p = gt_data['points'] 65 | fn_gt_index = np.array(range(gt_p.shape[0])) 66 | fn_l = fn_gt_index.shape[0] 67 | 68 | 69 | if gt_data['num'] !=0 and pred_p.shape[0] !=0: 70 | gt_p = gt_data['points'] 71 | sigma = gt_data['sigma'] 72 | 73 | # dist 74 | dist_matrix = ss.distance_matrix(pred_p,gt_p,p=2) 75 | match_matrix = np.zeros(dist_matrix.shape,dtype=bool) 76 | 77 | # sigma_s and sigma_l 78 | tp_l,fp_l,fn_l,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index = compute_metrics(dist_matrix,match_matrix,pred_p.shape[0],sigma) 79 | return tp_pred_index,tp_gt_index 80 | 81 | 82 | 83 | 84 | def associate_pred2gt_point_vis(pred_data, gt_data, gt_diff_idx): 85 | # import pdb 86 | # pdb.set_trace() 87 | pred_p = pred_data.cpu().numpy() 88 | gt_p = gt_data['points'].cpu().numpy()[gt_diff_idx] 89 | gt_sigma = gt_data['sigma'].cpu().numpy()[gt_diff_idx] 90 | if gt_p.shape[0]>0: 91 | gt_data = {'num':gt_p.shape[0], 'points':gt_p,'sigma':gt_sigma} 92 | else: 93 | gt_data = {'num':0, 'points':[],'sigma':[]} 94 | 95 | tp_l,fp_l,fn_l = [0,0,0] 96 | tp_pred_index,tp_gt_index,fp_pred_index,fn_gt_index = [],[],[],[] 97 | if gt_data['num'] ==0 and pred_p.shape[0] !=0: 98 | fp_pred_index = np.array(range(pred_p.shape[0])) 99 | fp_l = fp_pred_index.shape[0] 100 | fn_gt_index = np.array([]) 101 | if pred_p.shape[0] ==0 and gt_data['num'] !=0: 102 | gt_p = gt_data['points'] 103 | fn_gt_index = np.array(range(gt_p.shape[0])) 104 | fn_l = fn_gt_index.shape[0] 105 | fp_pred_index = np.array([]) 106 | 107 | if gt_data['num'] !=0 and pred_p.shape[0] !=0: 108 | gt_p = gt_data['points'] 109 | sigma = gt_data['sigma'] 110 | 111 | # dist 112 | dist_matrix = ss.distance_matrix(pred_p,gt_p,p=2) 113 | match_matrix = np.zeros(dist_matrix.shape,dtype=bool) 114 | 115 | # sigma_s and sigma_l 116 | tp_l,fp_l,fn_l,tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index = compute_metrics(dist_matrix,match_matrix,pred_p.shape[0],sigma) 117 | return tp_pred_index,fp_pred_index ,tp_gt_index, fn_gt_index 118 | 119 | if __name__ == '__main__': 120 | eval_metrics() 121 | -------------------------------------------------------------------------------- /model/MatchTool/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | import time 5 | import random 6 | import shutil 7 | import cv2 8 | from PIL import Image 9 | 10 | import torch 11 | from torch import nn 12 | import torch.nn.functional as F 13 | import torchvision.utils as vutils 14 | import torchvision.transforms as standard_transforms 15 | 16 | import sys 17 | sys.setrecursionlimit(100000) # set the recursion depth 18 | # Hungarian method for bipartite graph 19 | def hungarian(matrixTF): 20 | # matrix to adjacent matrix 21 | edges = np.argwhere(matrixTF) 22 | lnum, rnum = matrixTF.shape 23 | graph = [[] for _ in range(lnum)] 24 | for edge in edges: 25 | graph[edge[0]].append(edge[1]) 26 | 27 | # deep first search 28 | match = [-1 for _ in range(rnum)] 29 | vis = [-1 for _ in range(rnum)] 30 | def dfs(u): 31 | for v in graph[u]: 32 | if vis[v]: continue 33 | vis[v] = True 34 | if match[v] == -1 or dfs(match[v]): 35 | match[v] = u 36 | return True 37 | return False 38 | 39 | # for loop 40 | ans = 0 41 | for a in range(lnum): 42 | for i in range(rnum): vis[i] = False 43 | if dfs(a): ans += 1 44 | 45 | # assignment matrix 46 | assign = np.zeros((lnum, rnum), dtype=bool) 47 | for i, m in enumerate(match): 48 | if m >= 0: 49 | assign[m, i] = True 50 | 51 | return ans, assign 52 | 53 | def read_pred_and_gt(pred_file,gt_file): 54 | # read pred 55 | pred_data = {} 56 | with open(pred_file) as f: 57 | 58 | id_read = [] 59 | for line in f.readlines(): 60 | line = line.strip().split(' ') 61 | 62 | # check1 63 | if len(line) <2 or len(line) % 2 !=0 or (len(line)-2)/2 != int(line[1]): 64 | flagError = True 65 | sys.exit(1) 66 | 67 | line_data = [int(i) for i in line] 68 | idx, num = [line_data[0], line_data[1]] 69 | id_read.append(idx) 70 | 71 | points = [] 72 | if num>0: 73 | points = np.array(line_data[2:]).reshape(((len(line)-2)//2,2)) 74 | pred_data[idx] = {'num': num, 'points':points} 75 | else: 76 | pred_data[idx] = {'num': num, 'points':[]} 77 | 78 | # read gt 79 | gt_data = {} 80 | with open(gt_file) as f: 81 | for line in f.readlines(): 82 | line = line.strip().split(' ') 83 | line_data = [int(i) for i in line] 84 | idx, num = [line_data[0], line_data[1]] 85 | points_r = [] 86 | if num>0: 87 | points_r = np.array(line_data[2:]).reshape(((len(line)-2)//5,5)) 88 | gt_data[idx] = {'num': num, 'points':points_r[:,0:2], 'sigma': points_r[:,2:4], 'level':points_r[:,4]} 89 | else: 90 | gt_data[idx] = {'num': 0, 'points':[], 'sigma':[], 'level':[]} 91 | 92 | return pred_data, gt_data 93 | 94 | class AverageMeter(object): 95 | """Computes and stores the average and current value""" 96 | 97 | def __init__(self): 98 | self.reset() 99 | 100 | def reset(self): 101 | self.cur_val = 0 102 | self.avg = 0 103 | self.sum = 0 104 | self.count = 0 105 | 106 | def update(self, cur_val, cur_count=1): 107 | self.cur_val = cur_val 108 | self.sum += cur_val 109 | self.count += cur_count 110 | self.avg = self.sum / self.count 111 | 112 | class AverageCategoryMeter(object): 113 | """Computes and stores the average and current value""" 114 | 115 | def __init__(self,num_class): 116 | self.num_class = num_class 117 | self.reset() 118 | 119 | def reset(self): 120 | self.cur_val = np.zeros(self.num_class) 121 | self.sum = np.zeros(self.num_class) 122 | 123 | 124 | def update(self, cur_val): 125 | self.cur_val = cur_val 126 | self.sum += cur_val 127 | 128 | class MultiAverageMeter(object): 129 | """Computes and stores the average and current value""" 130 | 131 | def __init__(self,num_class): 132 | self.num_class = num_class 133 | self.reset() 134 | 135 | def reset(self): 136 | self.cur_val = np.zeros(self.num_class) 137 | self.sum = np.zeros(self.num_class) 138 | 139 | 140 | def update(self, cur_val,id): 141 | self.cur_val[id] = cur_val 142 | self.sum[id] += cur_val 143 | 144 | 145 | if __name__ =="__main__": 146 | a = MultiAverageMeter(100) 147 | a.update(10,6) 148 | print(a.cur_val) 149 | print(a.sum) -------------------------------------------------------------------------------- /model/PreciseRoIPooling/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .vim-template* 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiayuan Mao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/README.md: -------------------------------------------------------------------------------- 1 | # PreciseRoIPooling 2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation). 3 | 4 | **Acquisition of Localization Confidence for Accurate Object Detection** 5 | 6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.) 7 | 8 | https://arxiv.org/abs/1807.11590 9 | 10 | **Causion**: To install the library, please `git clone` the repository instead of downloading the zip file, since source files inside the folder `./pytorch/prroi_pool/src/` and `tensorflow/prroi_pool/src/kernels/external` are symbol-linked. Downloading the repository as a zip file will break these symbolic links. Also, there are reports indicating that Windows git versions also breaks the symbol links. See [issues/58](https://github.com/vacancy/PreciseRoIPooling/issues/58). 11 | 12 | ## Brief 13 | 14 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is: 15 | 16 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates. 17 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous. 18 | 19 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper. 20 | 21 |
22 | 23 | ## Implementation 24 | 25 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome. 26 | 27 | ## Usage (PyTorch 1.0) 28 | 29 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented). 30 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do: 31 | 32 | ``` 33 | from prroi_pool import PrRoIPool2D 34 | 35 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 36 | roi_features = avg_pool(features, rois) 37 | 38 | # for those who want to use the "functional" 39 | 40 | from prroi_pool.functional import prroi_pool2d 41 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 42 | ``` 43 | 44 | ## Usage (PyTorch 0.4) 45 | 46 | **!!! Please first checkout to the branch pytorch0.4.** 47 | 48 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented). 49 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do: 50 | 51 | ``` 52 | from prroi_pool import PrRoIPool2D 53 | 54 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 55 | roi_features = avg_pool(features, rois) 56 | 57 | # for those who want to use the "functional" 58 | 59 | from prroi_pool.functional import prroi_pool2d 60 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 61 | ``` 62 | 63 | Here, 64 | 65 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor. 66 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`. 67 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`. 68 | 69 | ## Usage (TensorFlow) 70 | In the directory `tensorflow/`, we provide a TensorFlow-based implementation of PrRoI Pooling. It tested TensorFlow 2.2 and only supports CUDA (CPU mode is not implemented). 71 | To compile the essential components, follow the instruction below 72 | 73 | To use the PrRoI Pooling module, to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do: 74 | ### Requirements 75 | * CUDA compiler(NVCC) 76 | * Tensorflow-GPU 2.x 77 | * CMake 78 | * Microsoft Visual C++ Build Tools(For Windows Users) 79 | ### Step-by-step instructions 80 | #### For Ubuntu Users 81 | ##### CMake Configuration 82 | 83 | ``` 84 | mkdir tensorflow/prroi_pool/build 85 | cd tensorflow/prroi_pool/build 86 | cmake -DCMAKE_BUILD_TYPE="Release" .. 87 | ``` 88 | #### Build & Test PrRoI Pooling module 89 | ``` 90 | make 91 | ``` 92 | #### For Windows Users 93 | 94 | ##### MSVC Configuration 95 | ``` 96 | ${MSVC_INSTALL_PATH}\VC\Auxiliary\Build\vcvars64.bat 97 | ``` 98 | ##### CMake Configuration 99 | 100 | ``` 101 | mkdir tensorflow/prroi_pool/build 102 | cd tensorflow/prroi_pool/build 103 | cmake -DCMAKE_BUILD_TYPE="Release" -G "NMake Makefiles" .. 104 | ``` 105 | ##### Build & Test Custom ops 106 | ``` 107 | nmake BUILD=release 108 | ``` 109 | 110 | To use the module in your code, simply do: 111 | ``` 112 | from prroi_pool import PreciseRoIPooling 113 | 114 | avg_pool = PreciseRoIPooling(window_height, window_width, spatial_scale, data_format) 115 | roi_features = avg_pool([features, rois]) 116 | 117 | ``` 118 | 119 | Here, 120 | 121 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor. 122 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`. 123 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`. 124 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/_assets/prroi_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/_assets/prroi_visualization.png -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /_prroi_pooling 3 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | from .prroi_pool import * 13 | 14 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/functional.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : functional.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch 13 | import torch.autograd as ag 14 | 15 | __all__ = ['prroi_pool2d'] 16 | 17 | 18 | _prroi_pooling = None 19 | 20 | 21 | def _import_prroi_pooling(): 22 | global _prroi_pooling 23 | 24 | if _prroi_pooling is None: 25 | try: 26 | from os.path import join as pjoin, dirname 27 | from torch.utils.cpp_extension import load as load_extension 28 | root_dir = pjoin(dirname(__file__), 'src') 29 | 30 | _prroi_pooling = load_extension( 31 | '_prroi_pooling', 32 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')], 33 | verbose=True 34 | ) 35 | except ImportError: 36 | raise ImportError('Can not compile Precise RoI Pooling library.') 37 | 38 | return _prroi_pooling 39 | 40 | 41 | class PrRoIPool2DFunction(ag.Function): 42 | @staticmethod 43 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale): 44 | _prroi_pooling = _import_prroi_pooling() 45 | 46 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \ 47 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type()) 48 | 49 | pooled_height = int(pooled_height) 50 | pooled_width = int(pooled_width) 51 | spatial_scale = float(spatial_scale) 52 | 53 | features = features.contiguous() 54 | rois = rois.contiguous() 55 | params = (pooled_height, pooled_width, spatial_scale) 56 | 57 | if features.is_cuda: 58 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params) 59 | ctx.params = params 60 | # everything here is contiguous. 61 | ctx.save_for_backward(features, rois, output) 62 | else: 63 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.') 64 | 65 | return output 66 | 67 | @staticmethod 68 | def backward(ctx, grad_output): 69 | _prroi_pooling = _import_prroi_pooling() 70 | 71 | features, rois, output = ctx.saved_tensors 72 | grad_input = grad_coor = None 73 | 74 | if features.requires_grad: 75 | grad_output = grad_output.contiguous() 76 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params) 77 | if rois.requires_grad: 78 | grad_output = grad_output.contiguous() 79 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params) 80 | 81 | return grad_input, grad_coor, None, None, None 82 | 83 | 84 | prroi_pool2d = PrRoIPool2DFunction.apply 85 | 86 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : prroi_pool.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch.nn as nn 13 | 14 | from .functional import prroi_pool2d 15 | 16 | __all__ = ['PrRoIPool2D'] 17 | 18 | 19 | class PrRoIPool2D(nn.Module): 20 | def __init__(self, pooled_height, pooled_width, spatial_scale): 21 | super().__init__() 22 | 23 | self.pooled_height = int(pooled_height) 24 | self.pooled_width = int(pooled_width) 25 | self.spatial_scale = float(spatial_scale) 26 | 27 | def forward(self, features, rois): 28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale) 29 | 30 | def extra_repr(self): 31 | return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__) 32 | 33 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.c 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "prroi_pooling_gpu_impl.cuh" 20 | 21 | 22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) { 23 | int nr_rois = rois.size(0); 24 | int nr_channels = features.size(1); 25 | int height = features.size(2); 26 | int width = features.size(3); 27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options()); 29 | 30 | if (output.numel() == 0) { 31 | THCudaCheck(cudaGetLastError()); 32 | return output; 33 | } 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 36 | PrRoIPoolingForwardGpu( 37 | stream, features.data(), rois.data(), output.data(), 38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 39 | top_count 40 | ); 41 | 42 | THCudaCheck(cudaGetLastError()); 43 | return output; 44 | } 45 | 46 | at::Tensor prroi_pooling_backward_cuda( 47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 48 | int pooled_height, int pooled_width, float spatial_scale) { 49 | 50 | auto features_diff = at::zeros_like(features); 51 | 52 | int nr_rois = rois.size(0); 53 | int batch_size = features.size(0); 54 | int nr_channels = features.size(1); 55 | int height = features.size(2); 56 | int width = features.size(3); 57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 58 | int bottom_count = batch_size * nr_channels * height * width; 59 | 60 | if (output.numel() == 0) { 61 | THCudaCheck(cudaGetLastError()); 62 | return features_diff; 63 | } 64 | 65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 66 | PrRoIPoolingBackwardGpu( 67 | stream, 68 | features.data(), rois.data(), output.data(), output_diff.data(), 69 | features_diff.data(), 70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 71 | top_count, bottom_count 72 | ); 73 | 74 | THCudaCheck(cudaGetLastError()); 75 | return features_diff; 76 | } 77 | 78 | at::Tensor prroi_pooling_coor_backward_cuda( 79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 80 | int pooled_height, int pooled_width, float spatial_scale) { 81 | 82 | auto coor_diff = at::zeros_like(rois); 83 | 84 | int nr_rois = rois.size(0); 85 | int nr_channels = features.size(1); 86 | int height = features.size(2); 87 | int width = features.size(3); 88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 89 | int bottom_count = nr_rois * 5; 90 | 91 | if (output.numel() == 0) { 92 | THCudaCheck(cudaGetLastError()); 93 | return coor_diff; 94 | } 95 | 96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 97 | PrRoIPoolingCoorBackwardGpu( 98 | stream, 99 | features.data(), rois.data(), output.data(), output_diff.data(), 100 | coor_diff.data(), 101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 102 | top_count, bottom_count 103 | ); 104 | 105 | THCudaCheck(cudaGetLastError()); 106 | return coor_diff; 107 | } 108 | 109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward"); 111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward"); 112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor"); 113 | } 114 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.h 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale); 12 | 13 | int prroi_pooling_backward_cuda( 14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 15 | int pooled_height, int pooled_width, float spatial_scale 16 | ); 17 | 18 | int prroi_pooling_coor_backward_cuda( 19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 20 | int pooled_height, int pooled_width, float spatial_scal 21 | ); 22 | 23 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_prroi_pooling2d.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 18/02/2018 6 | # 7 | # This file is part of Jacinle. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from jactorch.utils.unittest import TorchTestCase 16 | 17 | from prroi_pool import PrRoIPool2D 18 | 19 | 20 | class TestPrRoIPool2D(TorchTestCase): 21 | def test_forward(self): 22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5) 23 | features = torch.rand((4, 16, 24, 32)).cuda() 24 | rois = torch.tensor([ 25 | [0, 0, 0, 14, 14], 26 | [1, 14, 14, 28, 28], 27 | ]).float().cuda() 28 | 29 | out = pool(features, rois) 30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1) 31 | 32 | self.assertTensorClose(out, torch.stack(( 33 | out_gold[0, :, :7, :7], 34 | out_gold[1, :, 7:14, 7:14], 35 | ), dim=0)) 36 | 37 | def test_backward_shapeonly(self): 38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5) 39 | 40 | features = torch.rand((4, 2, 24, 32)).cuda() 41 | rois = torch.tensor([ 42 | [0, 0, 0, 4, 4], 43 | [1, 14, 14, 18, 18], 44 | ]).float().cuda() 45 | features.requires_grad = rois.requires_grad = True 46 | out = pool(features, rois) 47 | 48 | loss = out.sum() 49 | loss.backward() 50 | 51 | self.assertTupleEqual(features.size(), features.grad.size()) 52 | self.assertTupleEqual(rois.size(), rois.grad.size()) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # File : CMakeLists.txt 2 | # Author : Kanghee Lee 3 | # Email : lerohiso@gmail.com 4 | # Date : 09/25/2020 5 | # 6 | # This file is part of PreciseRoIPooling. 7 | # Distributed under terms of the MIT license. 8 | 9 | CMAKE_MINIMUM_REQUIRED(VERSION 3.17 FATAL_ERROR) 10 | 11 | PROJECT(precise_roi_pooling) 12 | FIND_PACKAGE(CUDA) 13 | FIND_PACKAGE(PythonInterp 3) 14 | 15 | if (MSVC) 16 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.lib) 17 | elseif (UNIX) 18 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.so) 19 | endif() 20 | 21 | if (NOT EXISTS ${GPU_LIB}) 22 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build_cuda.py" RESULTS_VARIABLE RET_CODE) 23 | if (NOT "${RET_CODE}" STREQUAL "0") 24 | MESSAGE(FATAL_ERROR "Fail to Complie CUDA codes") 25 | endif () 26 | endif () 27 | 28 | if (NOT DEFINED TF_PATH) 29 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_include(), end='', flush=True)" OUTPUT_VARIABLE TF_INC) 30 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='', flush=True)" OUTPUT_VARIABLE TF_LIB) 31 | MESSAGE(STATUS "TF_INC: " ${TF_INC}) 32 | MESSAGE(STATUS "TF_LIB: " ${TF_LIB}) 33 | SET(TF_PATH 1) 34 | endif () 35 | 36 | if (NOT DEFINED TF_FLAGS) 37 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_CFLAGS) 38 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_LFLAGS) 39 | MESSAGE(STATUS "TF_CFLAGS: " ${TF_CFLAGS}) 40 | MESSAGE(STATUS "TF_LFLAGS: " ${TF_LFLAGS}) 41 | SET(TF_FLAGS 1) 42 | endif () 43 | 44 | INCLUDE_DIRECTORIES(${TF_INC}) 45 | LINK_DIRECTORIES(${TF_LIB}) 46 | INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) 47 | 48 | LIST(APPEND CMAKE_CXX_FLAGS "${TF_CFLAGS} ${TF_LFLAGS} -O2 -D GOOGLE_CUDA=1 -std=c++11 -shared") 49 | if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") 50 | LIST(APPEND CMAKE_CXX_FLAGS " -lcudart -DNOMINMAX") 51 | endif () 52 | 53 | MESSAGE(STATUS "CMAKE_CXX_COMPILER_ID: " ${CMAKE_CXX_COMPILER_ID}) 54 | MESSAGE(STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) 55 | 56 | ADD_LIBRARY(precise_roi_pooling SHARED src/kernels/precise_roi_pooling.h 57 | src/kernels/precise_roi_pooling_kernels.cc 58 | src/ops/precise_roi_pooling_ops.cc) 59 | TARGET_COMPILE_FEATURES(precise_roi_pooling PUBLIC cxx_std_11) 60 | SET_TARGET_PROPERTIES(precise_roi_pooling PROPERTIES 61 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/" 62 | LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/" 63 | ) 64 | 65 | ADD_LIBRARY(precise_roi_pooling_gpu SHARED IMPORTED) 66 | if (MSVC) 67 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_IMPLIB ${GPU_LIB}) 68 | elseif (UNIX) 69 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_LOCATION ${GPU_LIB}) 70 | endif() 71 | 72 | ADD_LIBRARY(tensorflow_internal SHARED IMPORTED) 73 | if (MSVC) 74 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES 75 | IMPORTED_IMPLIB ${TF_LIB}/python/_pywrap_tensorflow_internal.lib) 76 | elseif (UNIX) 77 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES 78 | IMPORTED_LOCATION ${TF_LIB}/python/_pywrap_tensorflow_internal.so) 79 | endif() 80 | 81 | TARGET_LINK_LIBRARIES(precise_roi_pooling tensorflow_internal 82 | precise_roi_pooling_gpu 83 | ${CUDA_LIBRARIES}) 84 | 85 | ADD_CUSTOM_TARGET(precise_roi_pooling_test ALL 86 | COMMAND ${CMAKE_COMMAND} -E env 87 | "PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../" 88 | ${PYTHON_EXECUTABLE} tests/precise_roi_pooling_ops_test.py 89 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../") 90 | 91 | ADD_DEPENDENCIES(precise_roi_pooling_test precise_roi_pooling) 92 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | from .precise_roi_pooling_ops import * 12 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/precise_roi_pooling_ops.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : precise_roi_pooling_ops.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import os 16 | import platform 17 | import tensorflow as tf 18 | 19 | __all__ = ['PreciseRoIPooling'] 20 | 21 | os_type = platform.system() 22 | if os_type == 'Windows': 23 | MODULE_NAME = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'module/precise_roi_pooling.dll') 24 | elif os_type == 'Linux': 25 | MODULE_NAME = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'module/libprecise_roi_pooling.so') 26 | 27 | _precise_roi_pooling_ops = tf.load_op_library(MODULE_NAME) 28 | 29 | def _precise_roi_pooling(features, 30 | rois, 31 | pooled_height, 32 | pooled_width, 33 | spatial_scale, 34 | data_format, 35 | name=None): 36 | with tf.name_scope(name or "precise_roi_pooling"): 37 | op_call = _precise_roi_pooling_ops.precise_ro_i_pooling 38 | 39 | if data_format == 'channels_last': 40 | inputs = tf.transpose(features, [0, 3, 1, 2]) 41 | elif data_format == "channels_first": 42 | inputs = features 43 | else: 44 | raise ValueError('`data_format` must be either `channels_last` or `channels_first`') 45 | 46 | outputs = op_call(inputs, 47 | rois, 48 | pooled_height=pooled_height, 49 | pooled_width=pooled_width, 50 | spatial_scale=spatial_scale, 51 | data_format='NCHW') 52 | 53 | if data_format == 'channels_last': 54 | return tf.transpose(outputs, [0, 2, 3, 1]) 55 | 56 | return outputs 57 | 58 | class PreciseRoIPooling(tf.keras.layers.Layer): 59 | def __init__(self, 60 | pooled_height: int, 61 | pooled_width: int, 62 | spatial_scale: float, 63 | data_format: str = 'channels_first', 64 | **kwargs): 65 | self.pooled_height = pooled_height 66 | self.pooled_width = pooled_width 67 | self.spatial_scale = spatial_scale 68 | 69 | if data_format != 'channels_last' and data_format != 'channels_first': 70 | raise ValueError('`data_format` must be either `channels_last` or' 71 | '`channels_first`, instead got %s' % data_format) 72 | 73 | self.data_format = data_format 74 | 75 | super().__init__(**kwargs) 76 | 77 | def build(self, input_shape): 78 | if not isinstance(input_shape, list): 79 | raise ValueError('Input must be a list of two Tensors to process') 80 | super().build(input_shape) 81 | 82 | def call(self, inputs): 83 | if not isinstance(inputs, list): 84 | raise ValueError('Input must be a list of two Tensors to process') 85 | 86 | features = tf.convert_to_tensor(inputs[0]) 87 | rois = tf.convert_to_tensor(inputs[1]) 88 | 89 | return _precise_roi_pooling(features, 90 | rois, 91 | pooled_height=self.pooled_height, 92 | pooled_width=self.pooled_width, 93 | spatial_scale=self.spatial_scale, 94 | data_format=self.data_format) 95 | 96 | def compute_output_shape(self, input_shape): 97 | assert isinstance(input_shape, list) 98 | 99 | # Input validation 100 | if len(input_shape) != 2: 101 | raise ValueError('Input must be a list of two shapes') 102 | 103 | number_of_rois = input_shape[1][0] 104 | 105 | if self.data_format == 'channels_first': 106 | number_of_channels = input_shape[0][1] 107 | return [(number_of_rois, number_of_channels, self.pooled_height, self.pooled_width)] 108 | 109 | elif self.data_format == 'channels_last': 110 | number_of_channels = input_shape[0][3] 111 | return [(number_of_rois, self.pooled_height, self.pooled_width, number_of_channels)] 112 | else: 113 | raise ValueError( 114 | '`data_format` must be either `channels_last` or `channels_first`' 115 | ) 116 | 117 | def get_config(self): 118 | config = { 119 | 'pooled_height': self.pooled_height, 120 | 'pooled_width': self.pooled_width, 121 | 'spatial_scale': self.spatial_scale, 122 | 'data_format': self.data_format, 123 | } 124 | 125 | base_config = super().get_config() 126 | return {**base_config, **config} 127 | 128 | @tf.RegisterGradient('PreciseRoIPooling') 129 | def _precise_roi_pooling_grad(op, grad_output): 130 | pooled_height = op.get_attr('pooled_height') 131 | pooled_width = op.get_attr('pooled_width') 132 | spatial_scale = op.get_attr('spatial_scale') 133 | data_format = op.get_attr('data_format') 134 | 135 | features = tf.convert_to_tensor(op.inputs[0], name='features') 136 | rois = tf.convert_to_tensor(op.inputs[1], name='rois') 137 | pooled_features = tf.convert_to_tensor(op.outputs[0], name='pooled_features') 138 | grad_output = tf.convert_to_tensor(grad_output, name='grad_output') 139 | 140 | op_call = _precise_roi_pooling_ops.precise_ro_i_pooling_grad 141 | grads = op_call(features, 142 | rois, 143 | pooled_features, 144 | grad_output, 145 | pooled_height=pooled_height, 146 | pooled_width=pooled_width, 147 | spatial_scale=spatial_scale, 148 | data_format=data_format) 149 | 150 | features_gradient = tf.convert_to_tensor(grads[0], name='features_gradient') 151 | rois_gradient = tf.convert_to_tensor(grads[1], name='rois_gradient') 152 | return [features_gradient, rois_gradient] 153 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/build_cuda.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : build_cuda.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | import os 12 | import platform 13 | import shutil 14 | import subprocess 15 | 16 | import tensorflow as tf 17 | 18 | CUDA_SRCS = [] 19 | CUDA_OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build') 20 | 21 | if not os.path.isdir(CUDA_OUTPUT_DIR): 22 | os.makedirs(CUDA_OUTPUT_DIR) 23 | 24 | for file in os.listdir(os.path.dirname(os.path.realpath(__file__))): 25 | if file.endswith('.cu.cc'): 26 | CUDA_SRCS.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), file)) 27 | 28 | CUDA_COMPILER = shutil.which('nvcc') 29 | if CUDA_COMPILER == None: 30 | raise ValueError('CUDA Compiler Not Found') 31 | 32 | TF_CFLAGS = ' '.join(tf.sysconfig.get_compile_flags()) 33 | TF_LFLAGS = ' '.join(tf.sysconfig.get_link_flags()) 34 | 35 | CUDA_NVCC_FLAGS = TF_CFLAGS + ' ' + TF_LFLAGS + ' -D GOOGLE_CUDA=1 -x cu --expt-relaxed-constexpr' 36 | 37 | os_type = platform.system() 38 | if os_type == 'Windows': 39 | CUDA_NVCC_FLAGS += ' -Xcompiler -MD -cudart=shared -D_WINSOCKAPI_' 40 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.lib' 41 | elif os_type == 'Linux': 42 | CUDA_NVCC_FLAGS += ' -Xcompiler -fPIC -DNDEBUG' 43 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.so' 44 | 45 | COMMAND = CUDA_COMPILER 46 | COMMAND += ' -c -o ' + os.path.join(CUDA_OUTPUT_DIR, CUDA_OUTPUT_FILENAME) 47 | COMMAND += ' ' + ' '.join(CUDA_SRCS) 48 | COMMAND += ' ' + CUDA_NVCC_FLAGS 49 | 50 | process = subprocess.Popen(COMMAND, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) 51 | process_output = process.communicate()[0] 52 | print(process_output.decode()) 53 | 54 | if process.returncode is not 0: 55 | raise ValueError('Fail to CUDA Compile') 56 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/external/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : precise_roi_pooling.h 3 | * Author : Kanghee Lee 4 | * Email : lerohiso@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | */ 8 | 9 | #ifndef KERNEL_PRECISE_ROI_POOLING_H_ 10 | #define KERNEL_PRECISE_ROI_POOLING_H_ 11 | 12 | #include "tensorflow/core/framework/op_kernel.h" 13 | #include "tensorflow/core/util/tensor_format.h" 14 | 15 | namespace tensorflow { 16 | 17 | namespace functor { 18 | 19 | template 20 | struct PreciseRoIPoolingFunctor { 21 | Status operator()(OpKernelContext* context, 22 | const Tensor& features, 23 | const Tensor& rois, 24 | Tensor* pooled_features, 25 | int pooled_height, 26 | int pooled_width, 27 | float spatial_scale, 28 | TensorFormat data_format); 29 | }; 30 | 31 | template 32 | struct PreciseRoIPoolingGradFunctor { 33 | Status operator()(OpKernelContext* context, 34 | const Tensor& features, 35 | const Tensor& rois, 36 | const Tensor& pooled_features, 37 | const Tensor& pooled_features_diff, 38 | Tensor* features_gradient, 39 | Tensor* rois_gradient, 40 | int pooled_height, 41 | int pooled_width, 42 | float spatial_scale, 43 | TensorFormat data_format); 44 | }; 45 | 46 | } // namespace functor 47 | 48 | } // namespace tensorflow 49 | 50 | #endif // KERNEL_PRECISE_ROI_POOLING_H_ -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling_kernels.cu.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * File : precise_roi_pooling_kernels.cu.cc 3 | * Author : Kanghee Lee 4 | * Email : lerohiso@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | */ 8 | 9 | #if GOOGLE_CUDA 10 | #define EIGEN_USE_GPU 11 | 12 | #include "precise_roi_pooling.h" 13 | #include "external/prroi_pooling_gpu_impl.cu" 14 | #include "tensorflow/core/util/gpu_kernel_helper.h" 15 | 16 | namespace tensorflow { 17 | namespace functor { 18 | 19 | typedef Eigen::GpuDevice GPUDevice; 20 | 21 | // Define the GPU implementation that launches the CUDA kernel. 22 | template 23 | struct PreciseRoIPoolingFunctor { 24 | Status operator()(OpKernelContext *context, 25 | const Tensor& features, 26 | const Tensor& rois, 27 | Tensor* pooled_features, 28 | int pooled_height, 29 | int pooled_width, 30 | float spatial_scale, 31 | TensorFormat data_format) { 32 | const int32 batch_size = GetTensorDim(features, data_format, 'N'); 33 | const int32 number_of_channels = GetTensorDim(features, data_format, 'C'); 34 | const int32 features_height = GetTensorDim(features, data_format, 'H'); 35 | const int32 features_width = GetTensorDim(features, data_format, 'W'); 36 | 37 | const int32 number_of_rois = rois.dim_size(0); 38 | 39 | const int top_count = number_of_rois * number_of_channels * pooled_height * pooled_width; 40 | const GPUDevice &d = context->eigen_gpu_device(); 41 | 42 | PrRoIPoolingForwardGpu(d.stream(), 43 | features.flat().data(), 44 | rois.flat().data(), 45 | pooled_features->flat().data(), 46 | number_of_channels, 47 | features_height, 48 | features_width, 49 | pooled_height, 50 | pooled_width, 51 | spatial_scale, 52 | top_count); 53 | 54 | return Status::OK(); 55 | } 56 | }; 57 | 58 | template 59 | struct PreciseRoIPoolingGradFunctor { 60 | Status operator()(OpKernelContext* context, 61 | const Tensor& features, 62 | const Tensor& rois, 63 | const Tensor& pooled_features, 64 | const Tensor& pooled_features_diff, 65 | Tensor* features_gradient, 66 | Tensor* rois_gradient, 67 | int pooled_height, 68 | int pooled_width, 69 | float spatial_scale, 70 | TensorFormat data_format) { 71 | const int32 batch_size = GetTensorDim(features, data_format, 'N'); 72 | const int32 number_of_channels = GetTensorDim(features, data_format, 'C'); 73 | const int32 features_height = GetTensorDim(features, data_format, 'H'); 74 | const int32 features_width = GetTensorDim(features, data_format, 'W'); 75 | 76 | const int32 number_of_rois = rois.dim_size(0); 77 | 78 | const int top_count = number_of_rois * number_of_channels * pooled_height * pooled_width; 79 | const GPUDevice &d = context->eigen_gpu_device(); 80 | 81 | const int features_gradient_size = batch_size * number_of_channels * features_height * features_width; 82 | const int rois_gradient_size = number_of_rois * 5; 83 | 84 | PrRoIPoolingBackwardGpu(d.stream(), 85 | features.flat().data(), 86 | rois.flat().data(), 87 | pooled_features.flat().data(), 88 | pooled_features_diff.flat().data(), 89 | features_gradient->flat().data(), 90 | number_of_channels, 91 | features_height, 92 | features_width, 93 | pooled_height, 94 | pooled_width, 95 | spatial_scale, 96 | top_count, 97 | features_gradient_size); 98 | 99 | PrRoIPoolingCoorBackwardGpu(d.stream(), 100 | features.flat().data(), 101 | rois.flat().data(), 102 | pooled_features.flat().data(), 103 | pooled_features_diff.flat().data(), 104 | rois_gradient->flat().data(), 105 | number_of_channels, 106 | features_height, 107 | features_width, 108 | pooled_height, 109 | pooled_width, 110 | spatial_scale, 111 | top_count, 112 | rois_gradient_size); 113 | 114 | return Status::OK(); 115 | } 116 | }; 117 | 118 | // Explicitly instantiate functors for the types of OpKernels registered. 119 | template struct PreciseRoIPoolingFunctor; 120 | template struct PreciseRoIPoolingGradFunctor; 121 | 122 | } // end namespace functor 123 | 124 | } // end namespace tensorflow 125 | 126 | #endif // GOOGLE_CUDA -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/prroi_pool/src/ops/precise_roi_pooling_ops.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * File : precise_roi_pooling_ops.cc 3 | * Author : Kanghee Lee 4 | * Email : lerohiso@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | */ 8 | 9 | #include "tensorflow/core/framework/op.h" 10 | #include "tensorflow/core/framework/shape_inference.h" 11 | 12 | namespace tensorflow { 13 | 14 | using ::tensorflow::shape_inference::InferenceContext; 15 | using ::tensorflow::shape_inference::ShapeHandle; 16 | 17 | REGISTER_OP("PreciseRoIPooling") 18 | .Input("features: T") 19 | .Input("rois: T") 20 | .Output("pooled_features: T") 21 | .Attr("pooled_height: int") 22 | .Attr("pooled_width: int") 23 | .Attr("spatial_scale: float") 24 | .Attr("data_format: {'NCHW'} = 'NCHW'") 25 | .Attr("T: realnumbertype") 26 | .SetShapeFn([](InferenceContext* c) { 27 | ShapeHandle features, rois; 28 | 29 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features)); 30 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois)); 31 | 32 | // get input shapes 33 | int32 number_of_rois, number_of_channels; 34 | number_of_rois = c->Value(c->Dim(rois, 0)); 35 | string data_format; 36 | Status s = c->GetAttr("data_format", &data_format); 37 | if (s.ok() && data_format == "NCHW") { 38 | number_of_channels = c->Value(c->Dim(features, 1)); 39 | } 40 | else { 41 | number_of_channels = c->Value(c->Dim(features, 3)); 42 | } 43 | 44 | int32 pooled_height; 45 | int32 pooled_width; 46 | 47 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_height", &pooled_height)); 48 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_width", &pooled_width)); 49 | 50 | // Note, the output is always NCHW (even when input is NHWC) 51 | c->set_output(0, c->MakeShape({number_of_rois, number_of_channels, pooled_height, pooled_width})); 52 | return Status::OK(); 53 | }) 54 | .Doc(R"doc(PreciseRoIPooling op.)doc"); 55 | 56 | REGISTER_OP("PreciseRoIPoolingGrad") 57 | .Input("features: T") 58 | .Input("rois: T") 59 | .Input("pooled_features: T") 60 | .Input("pooled_features_diff: T") 61 | .Output("features_gradient: T") 62 | .Output("rois_gradient: T") 63 | .Attr("pooled_height: int") 64 | .Attr("pooled_width: int") 65 | .Attr("spatial_scale: float") 66 | .Attr("data_format: {'NCHW'} = 'NCHW'") 67 | .Attr("T: realnumbertype") 68 | .SetShapeFn([](InferenceContext* c) { 69 | ShapeHandle features, rois; 70 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features)); 71 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois)); 72 | c->set_output(0, features); 73 | c->set_output(1, rois); 74 | return Status::OK(); 75 | }) 76 | .Doc(R"doc(PreciseRoIPoolingGrad op.)doc"); 77 | 78 | } // namespace tensorflow -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/precise_roi_pooling_ops_test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : precise_roi_pooling_ops_test.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import os 16 | import numpy as np 17 | import tensorflow as tf 18 | 19 | from tensorflow.python.framework import ops 20 | from tensorflow.python.platform import test 21 | from tensorflow.python.framework import test_util 22 | from prroi_pool import PreciseRoIPooling 23 | 24 | 25 | class PreciseRoIPoolingTest(test.TestCase): 26 | @test_util.run_gpu_only 27 | def test_forward(self): 28 | with self.test_session(): 29 | with ops.device("/gpu:0"): 30 | pooled_width = 7 31 | pooled_height = 7 32 | spatial_scale = 0.5 33 | data_format = 'channels_first' 34 | pool = PreciseRoIPooling(pooled_height, 35 | pooled_width, 36 | spatial_scale=spatial_scale, 37 | data_format=data_format) 38 | features = tf.random.uniform([4, 16, 24, 32], dtype=tf.float32) 39 | rois = tf.constant([[0, 0, 0, 14, 14], [1, 14, 14, 28, 28]], dtype=tf.float32) 40 | operation_outputs = pool([features, rois]) 41 | real_outputs = tf.keras.layers.AveragePooling2D(data_format=data_format, strides=1)(features) 42 | real_outputs = tf.stack([real_outputs[0, :, :7, :7], real_outputs[1, :, 7:14, 7:14]], axis=0) 43 | self.assertAllClose(operation_outputs, real_outputs) 44 | 45 | @test_util.run_gpu_only 46 | def test_backward(self): 47 | with self.test_session(): 48 | with ops.device("/gpu:0"): 49 | pooled_width = 2 50 | pooled_height = 2 51 | spatial_scale = 0.5 52 | data_format = 'channels_first' 53 | base_directory = os.path.dirname(os.path.realpath(__file__)) 54 | 55 | # binaries from pytorch prroi_pool module 56 | features = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/features.npy')) 57 | rois = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/rois.npy')) 58 | 59 | real_outputs = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/real_outputs.npy')) 60 | real_gradients0 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients0.npy')) 61 | real_gradients1 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients1.npy')) 62 | features = tf.convert_to_tensor(features) 63 | rois = tf.convert_to_tensor(rois) 64 | with tf.GradientTape() as tape: 65 | tape.watch([features, rois]) 66 | outputs = PreciseRoIPooling(pooled_height=pooled_height, 67 | pooled_width=pooled_width, 68 | spatial_scale=spatial_scale, 69 | data_format=data_format)([features, rois]) 70 | loss = tf.reduce_sum(outputs) 71 | 72 | gradients = tape.gradient(loss, [features, rois]) 73 | 74 | self.assertAllClose(outputs, real_outputs) 75 | self.assertAllClose(gradients[0], real_gradients0) 76 | self.assertAllClose(gradients[1], real_gradients1) 77 | 78 | 79 | if __name__ == '__main__': 80 | test.main() 81 | -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy -------------------------------------------------------------------------------- /model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy -------------------------------------------------------------------------------- /model/VGG/VGG16_FPN.py: -------------------------------------------------------------------------------- 1 | from torchvision import models 2 | import sys 3 | import torch.nn.functional as F 4 | from misc.utils import * 5 | from misc.layer import * 6 | from torchsummary import summary 7 | from model.necks import FPN 8 | from .conv import ResBlock 9 | 10 | BatchNorm2d = nn.BatchNorm2d 11 | BN_MOMENTUM = 0.01 12 | 13 | class VGG16_FPN(nn.Module): 14 | def __init__(self, pretrained=True): 15 | super(VGG16_FPN, self).__init__() 16 | 17 | vgg = models.vgg16_bn(pretrained=pretrained) 18 | features = list(vgg.features.children()) 19 | 20 | self.layer1 = nn.Sequential(*features[0:23]) 21 | self.layer2 = nn.Sequential(*features[23:33]) 22 | self.layer3 = nn.Sequential(*features[33:43]) 23 | 24 | in_channels = [256,512,512] 25 | self.neck = FPN(in_channels,192,len(in_channels)) 26 | self.neck2f = FPN(in_channels, 128, len(in_channels)) 27 | self.loc_head = nn.Sequential( 28 | nn.Dropout2d(0.2), 29 | ResBlock(in_dim=576, out_dim=256, dilation=0, norm="bn"), 30 | ResBlock(in_dim=256, out_dim=128, dilation=0, norm="bn"), 31 | 32 | nn.ConvTranspose2d(128, 64, 2, stride=2, padding=0, output_padding=0, bias=False), 33 | nn.BatchNorm2d(64, momentum=BN_MOMENTUM), 34 | nn.ReLU(inplace=True), 35 | 36 | nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1), 37 | nn.BatchNorm2d(32, momentum=BN_MOMENTUM), 38 | nn.ReLU(inplace=True), 39 | 40 | nn.ConvTranspose2d(32, 16, 2, stride=2, padding=0, output_padding=0, bias=False), 41 | nn.BatchNorm2d(16, momentum=BN_MOMENTUM), 42 | nn.ReLU(inplace=True), 43 | 44 | nn.Conv2d(16, 1, kernel_size=1, stride=1, padding=0), 45 | nn.ReLU(inplace=True) 46 | ) 47 | self.feature_head = nn.Sequential( 48 | nn.Dropout2d(0.2), 49 | ResBlock(in_dim=384, out_dim=384, dilation=0, norm="bn"), 50 | ResBlock(in_dim=384, out_dim=256, dilation=0, norm="bn"), 51 | 52 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), 53 | BatchNorm2d(256, momentum=BN_MOMENTUM), 54 | nn.ReLU(inplace=True), 55 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 56 | ) 57 | def forward(self, x): 58 | f_list = [] 59 | x = self.layer1(x) 60 | f_list.append(x) 61 | x2 = self.layer2(x) 62 | f_list.append(x2) 63 | x = self.layer3(x2) 64 | f_list.append(x) 65 | 66 | 67 | f = self.neck(f_list) 68 | f =torch.cat([f[0], F.interpolate(f[1],scale_factor=2,mode='bilinear',align_corners=True), 69 | F.interpolate(f[2],scale_factor=4, mode='bilinear',align_corners=True)], dim=1) 70 | 71 | x = self.loc_head(f) 72 | 73 | f = self.neck2f(f_list) 74 | f =torch.cat([f[0], F.interpolate(f[1],scale_factor=2,mode='bilinear',align_corners=True), 75 | F.interpolate(f[2],scale_factor=4, mode='bilinear',align_corners=True)], dim=1) 76 | feature = self.feature_head(f) 77 | return feature, x 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /model/VGG/conv.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | conv_cfg = { 5 | 'Conv': nn.Conv2d, 6 | # TODO: octave conv 7 | } 8 | 9 | 10 | class BasicDeconv(nn.Module): 11 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, activate=None): 12 | super(BasicDeconv, self).__init__() 13 | bias = False if activate == 'bn' else True 14 | self.tconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=0, bias=not self.use_bn) 15 | if activate == 'bn': 16 | self.bn = nn.BatchNorm2d(out_channels) 17 | elif activate == 'in': 18 | self.bn = nn.InstanceNorm2d(out_channels) 19 | elif activate == None: 20 | self.bn = None 21 | def forward(self, x): 22 | # pdb.set_trace() 23 | x = self.tconv(x) 24 | x = self.bn(x) 25 | return F.relu(x, inplace=True) 26 | 27 | 28 | class BasicConv(nn.Module): 29 | def __init__(self, in_channels, out_channels,kernel_size,stride=1, padding=0,dilation=1, norm=None, relu =False): 30 | super(BasicConv, self).__init__() 31 | self.relu = relu 32 | bias = True if norm is None else False 33 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,stride=stride, 34 | padding=padding,dilation=dilation, bias=bias) 35 | if norm == 'bn': 36 | self.norm = nn.BatchNorm2d(out_channels,eps=1e-05, momentum=0.01) 37 | elif norm == 'in': 38 | self.norm = nn.InstanceNorm2d(out_channels) 39 | elif norm == None: 40 | self.norm = None 41 | 42 | 43 | def forward(self, x): 44 | x = self.conv(x) 45 | x = self.norm(x) if self.norm is not None else x 46 | x = F.relu(x, inplace=True) if self.relu else x 47 | return x 48 | 49 | class ResBlock(nn.Module): 50 | def __init__(self, in_dim,out_dim, dilation=1, norm="bn"): 51 | super(ResBlock, self).__init__() 52 | padding = dilation+1 53 | model = [] 54 | medium_dim = in_dim//4 55 | model.append(BasicConv(in_dim, medium_dim, 1, 1, 0, norm = norm, relu =True)) 56 | model.append(BasicConv(medium_dim, medium_dim, 3, 1, padding = padding, dilation=dilation+1, norm=norm, relu =True)) 57 | model.append(BasicConv(medium_dim, out_dim, 1, 1, 0, norm=norm, relu =False)) 58 | self.model = nn.Sequential(*model) 59 | if in_dim !=out_dim: 60 | self.downsample = BasicConv(in_dim, out_dim, 1, 1, 0, norm=norm, relu =False) 61 | else: 62 | self.downsample =None 63 | self.relu = nn.ReLU(inplace=True) 64 | def forward(self, x): 65 | residual = x 66 | out = self.model(x) 67 | if self.downsample is not None: 68 | 69 | out += self.downsample(residual) 70 | else: 71 | out += residual 72 | out = self.relu(out) 73 | return out 74 | def build_conv_layer(cfg, *args, **kwargs): 75 | """ Build convolution layer 76 | 77 | Args: 78 | cfg (None or dict): cfg should contain: 79 | type (str): identify conv layer type. 80 | layer args: args needed to instantiate a conv layer. 81 | 82 | Returns: 83 | layer (nn.Module): created conv layer 84 | """ 85 | if cfg is None: 86 | cfg_ = dict(type='Conv') 87 | else: 88 | assert isinstance(cfg, dict) and 'type' in cfg 89 | cfg_ = cfg.copy() 90 | 91 | layer_type = cfg_.pop('type') 92 | if layer_type not in conv_cfg: 93 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 94 | else: 95 | conv_layer = conv_cfg[layer_type] 96 | 97 | layer = conv_layer(*args, **kwargs, **cfg_) 98 | 99 | return layer 100 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__init__.py -------------------------------------------------------------------------------- /model/__pycache__/VIC.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/VIC.cpython-37.pyc -------------------------------------------------------------------------------- /model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /model/__pycache__/optimal_transport_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/optimal_transport_layer.cpython-37.pyc -------------------------------------------------------------------------------- /model/__pycache__/points_from_den.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/__pycache__/points_from_den.cpython-37.pyc -------------------------------------------------------------------------------- /model/necks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .fpn import FPN 3 | 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE'] 6 | -------------------------------------------------------------------------------- /model/necks/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/necks/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /model/necks/__pycache__/fpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taohan10200/DRNet/3e812181285f4bb3906f50e439ff0956a6ff2bb0/model/necks/__pycache__/fpn.cpython-37.pyc -------------------------------------------------------------------------------- /model/necks/fpn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | 6 | class FPN(nn.Module): 7 | """ 8 | Feature Pyramid Network. 9 | 10 | This is an implementation of - Feature Pyramid Networks for Object 11 | Detection (https://arxiv.org/abs/1612.03144) 12 | 13 | Args: 14 | in_channels (List[int]): 15 | number of input channels per scale 16 | 17 | out_channels (int): 18 | number of output channels (used at each scale) 19 | 20 | num_outs (int): 21 | number of output scales 22 | 23 | start_level (int): 24 | index of the first input scale to use as an output scale 25 | 26 | end_level (int, default=-1): 27 | index of the last input scale to use as an output scale 28 | 29 | Example: 30 | >>> import torch 31 | >>> in_channels = [2, 3, 5, 7] 32 | >>> scales = [340, 170, 84, 43] 33 | >>> inputs = [torch.rand(1, c, s, s) 34 | ... for c, s in zip(in_channels, scales)] 35 | >>> self = FPN(in_channels, 11, len(in_channels)).eval() 36 | >>> outputs = self.forward(inputs) 37 | >>> for i in range(len(outputs)): 38 | ... print('outputs[{}].shape = {!r}'.format(i, outputs[i].shape)) 39 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 40 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 41 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 42 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 43 | """ 44 | 45 | def __init__(self,in_channels,out_channels,num_outs,start_level=0,end_level=-1,bn=True): 46 | super(FPN, self).__init__() 47 | assert isinstance(in_channels, list) 48 | self.in_channels = in_channels 49 | self.out_channels = out_channels 50 | self.num_ins = len(in_channels) 51 | self.num_outs = num_outs 52 | 53 | self.fp16_enabled = False 54 | 55 | if end_level == -1: 56 | self.backbone_end_level = self.num_ins 57 | assert num_outs >= self.num_ins - start_level 58 | else: 59 | # if end_level < inputs, no extra level is allowed 60 | self.backbone_end_level = end_level 61 | assert end_level <= len(in_channels) 62 | assert num_outs == end_level - start_level 63 | self.start_level = start_level 64 | self.end_level = end_level 65 | 66 | self.lateral_convs = nn.ModuleList() 67 | self.fpn_convs = nn.ModuleList() 68 | 69 | for i in range(self.start_level, self.backbone_end_level): 70 | l_conv = Conv2d( in_channels[i], out_channels,1,bn=bn, bias=not bn,same_padding=True) 71 | 72 | fpn_conv = Conv2d( out_channels, out_channels,3,bn=bn, bias=not bn,same_padding=True) 73 | 74 | self.lateral_convs.append(l_conv) 75 | self.fpn_convs.append(fpn_conv) 76 | 77 | # add extra conv layers (e.g., RetinaNet) 78 | self.init_weights() 79 | # default init_weights for conv(msra) and norm in ConvModule 80 | def init_weights(self): 81 | for m in self.modules(): 82 | if isinstance(m, nn.Conv2d): 83 | nn.init.xavier_uniform_(m.weight) 84 | 85 | 86 | def forward(self, inputs): 87 | 88 | assert len(inputs) == len(self.in_channels) 89 | 90 | # build laterals 91 | laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)] 92 | 93 | # build top-down path 94 | used_backbone_levels = len(laterals) 95 | for i in range(used_backbone_levels - 1, 0, -1): 96 | prev_shape = laterals[i - 1].shape[2:] 97 | laterals[i - 1] += F.interpolate(laterals[i], size=prev_shape, mode='nearest') 98 | 99 | # build outputs 100 | # part 1: from original levels 101 | outs = [ self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) ] 102 | 103 | 104 | return tuple(outs) 105 | 106 | 107 | 108 | class Conv2d(nn.Module): 109 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, NL='relu', same_padding=False, bn=True, bias=True): 110 | super(Conv2d, self).__init__() 111 | padding = int((kernel_size - 1) // 2) if same_padding else 0 112 | 113 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, bias=bias) 114 | 115 | self.bn = nn.BatchNorm2d(out_channels) if bn else None 116 | if NL == 'relu' : 117 | self.relu = nn.ReLU(inplace=False) 118 | elif NL == 'prelu': 119 | self.relu = nn.PReLU() 120 | else: 121 | self.relu = None 122 | 123 | def forward(self, x): 124 | x = self.conv(x) 125 | if self.bn is not None: 126 | x = self.bn(x) 127 | if self.relu is not None: 128 | x = self.relu(x) 129 | return x -------------------------------------------------------------------------------- /model/optimal_transport_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Optimal_Transport_Layer(nn.Module): 5 | def __init__(self, config): 6 | super(Optimal_Transport_Layer, self).__init__() 7 | self.iters =config['sinkhorn_iterations'] 8 | self.feature_dim = config['feature_dim'] 9 | self.matched_threshold = config['matched_threshold'] 10 | self.bin_score = torch.nn.Parameter(torch.tensor(1.),requires_grad=True) 11 | self.register_parameter('bin_score', self.bin_score) 12 | @property 13 | def loss(self): 14 | return self.matching_loss, self.hard_pair_loss 15 | def forward(self,mdesc0, mdesc1, match_gt=None, ignore =False): 16 | # Compute matching descriptor distance. 17 | sim_matrix = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) 18 | 19 | scores = sim_matrix / self.feature_dim ** .5 20 | 21 | # Run the optimal transport. 22 | scores = log_optimal_transport( 23 | scores, self.bin_score, 24 | iters=self.iters) 25 | 26 | # Get the matches with score above "match_threshold". 27 | max0 = scores[:, :-1, :-1].max(2) # the points in a that have matched in b, return b's index, 28 | max1 = scores[:, :-1, :-1].max(1) # the points in b that have matched in b, return a's index 29 | indices0, indices1 = max0.indices, max1.indices 30 | 31 | mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) 32 | mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) 33 | zero = scores.new_tensor(0) 34 | mscores0 = torch.where(mutual0, max0.values.exp(), zero) 35 | mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) 36 | 37 | valid0 = mutual0 & (mscores0 > self.matched_threshold) 38 | valid1 = mutual1 & valid0.gather(1, indices1) 39 | indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) 40 | indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) 41 | 42 | 43 | scores = scores.squeeze(0).exp() 44 | 45 | if match_gt is not None: 46 | matched_mask = torch.zeros(scores.size()).long().to(scores) 47 | 48 | matched_mask[match_gt['a2b'][:, 0], match_gt['a2b'][:, 1]] = 1 49 | if not ignore: matched_mask[match_gt['un_a'], -1] = 1 50 | if not ignore: matched_mask[-1, match_gt['un_b']] = 1 51 | 52 | self.matching_loss = -torch.log(scores[matched_mask == 1]) 53 | 54 | top2_mask = matched_mask[:-1, :-1] 55 | scores_ = scores[:-1, :-1]* (1 - top2_mask) 56 | self.hard_pair_loss = -(torch.log(1- torch.cat([scores_.max(1)[0], scores_.max(0)[0]]))) 57 | 58 | return scores, indices0.squeeze(0), indices1.squeeze(0), mscores0.squeeze(0), mscores1.squeeze(0) 59 | 60 | def log_sinkhorn_iterations(Z, log_mu, log_nu, iters: int): 61 | """ Perform Sinkhorn Normalization in Log-space for stability""" 62 | 63 | log_u, log_v = torch.zeros_like(log_mu), torch.zeros_like(log_nu) # initialized with the u,v=1, the log(u)=0, log(v)=0 64 | for _ in range(iters): 65 | log_u = log_mu - torch.logsumexp(Z + log_v.unsqueeze(1), dim=2) 66 | log_v = log_nu - torch.logsumexp(Z + log_u.unsqueeze(2), dim=1) 67 | 68 | return Z + log_u.unsqueeze(2) + log_v.unsqueeze(1) 69 | 70 | 71 | def log_optimal_transport(scores, alpha, iters: int): 72 | """ Perform Differentiable Optimal Transport in Log-space for stability""" 73 | b, m, n = scores.shape 74 | one = scores.new_tensor(1) 75 | ms, ns = (m*one).to(scores), (n*one).to(scores) 76 | 77 | bins0 = alpha.expand(b, m, 1) 78 | bins1 = alpha.expand(b, 1, n) 79 | alpha = alpha.expand(b, 1, 1) 80 | 81 | couplings = torch.cat([torch.cat([scores, bins0], -1), 82 | torch.cat([bins1, alpha], -1)], 1) 83 | 84 | norm = - (ms + ns).log() # normalization in the Log-space (log(1/(m+n))) 85 | log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm]) 86 | log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm]) 87 | log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1) 88 | 89 | Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters) 90 | score = Z - norm # multiply probabilities by M+N 91 | return score 92 | 93 | 94 | def arange_like(x, dim: int): 95 | return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1 -------------------------------------------------------------------------------- /model/points_from_den.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class get_ROI_and_MatchInfo(object): 6 | def __init__(self,train_size,rdius=8,feature_scale=0.125): 7 | self.h = train_size[0] 8 | self.w = train_size[1] 9 | self.radius = rdius 10 | self.feature_scale = feature_scale 11 | def __call__(self,target_a, target_b,noise=None, shape =None): 12 | gt_a, gt_b = target_a['points'], target_b['points'] 13 | if shape is not None: 14 | self.h = shape[0] 15 | self.w = shape[1] 16 | if noise == 'ab': 17 | gt_a, gt_b = gt_a + torch.randn(gt_a.size()).to(gt_a)*2, gt_b + torch.randn(gt_b.size()).to(gt_b)*2 18 | elif noise == 'a': 19 | gt_a = gt_a + torch.randn(gt_a.size()).to(gt_a) 20 | elif noise == 'b': 21 | gt_b = gt_b + torch.randn(gt_b.size()).to(gt_b) 22 | 23 | 24 | roi_a = torch.zeros(gt_a.size(0), 5).to(gt_a) 25 | roi_b = torch.zeros(gt_b.size(0), 5).to(gt_b) 26 | roi_a[:, 0] = 0 27 | roi_a[:, 1] = torch.clamp(gt_a[:, 0] - self.radius,min=0) 28 | roi_a[:, 2] = torch.clamp(gt_a[:, 1] - self.radius, min=0) 29 | roi_a[:, 3] = torch.clamp(gt_a[:, 0] + self.radius, max=self.w) 30 | roi_a[:, 4] = torch.clamp(gt_a[:, 1] + self.radius, max=self.h) 31 | 32 | roi_b[:, 0] = 1 33 | roi_b[:, 1] = torch.clamp(gt_b[:, 0] - self.radius, min=0) 34 | roi_b[:, 2] = torch.clamp(gt_b[:, 1] - self.radius, min=0) 35 | roi_b[:, 3] = torch.clamp(gt_b[:, 0] + self.radius, max=self.w) 36 | roi_b[:, 4] = torch.clamp(gt_b[:, 1] + self.radius, max=self.h) 37 | 38 | pois = torch.cat([roi_a, roi_b], dim=0) 39 | 40 | # ===================match the id for the prediction points of two adhesive frame=================== 41 | 42 | a_ids = target_a['person_id'] 43 | b_ids = target_b['person_id'] 44 | 45 | dis = a_ids.unsqueeze(1).expand(-1,len(b_ids)) - b_ids.unsqueeze(0).expand(len(a_ids),-1) 46 | dis = dis.abs() 47 | matched_a, matched_b = torch.where(dis==0) 48 | matched_a2b = torch.stack([matched_a,matched_b],1) 49 | unmatched0 = torch.where(dis.min(1)[0]>0)[0] 50 | unmatched1 = torch.where(dis.min(0)[0]>0)[0] 51 | 52 | match_gt={'a2b': matched_a2b, 'un_a':unmatched0, 'un_b':unmatched1} 53 | 54 | return match_gt, pois 55 | 56 | 57 | def local_maximum_points(sub_pre, gaussian_maximun,radius=8.): 58 | sub_pre = sub_pre.detach() 59 | _,_,h,w = sub_pre.size() 60 | kernel = torch.ones(3,3)/9. 61 | kernel =kernel.unsqueeze(0).unsqueeze(0).cuda() 62 | weight = nn.Parameter(data=kernel, requires_grad=False) 63 | sub_pre = F.conv2d(sub_pre, weight, stride=1, padding=1) 64 | 65 | keep = F.max_pool2d(sub_pre, (5, 5), stride=2, padding=2) 66 | keep = F.interpolate(keep, scale_factor=2) 67 | keep = (keep == sub_pre).float() 68 | sub_pre = keep * sub_pre 69 | 70 | sub_pre[sub_pre < 0.25*gaussian_maximun] = 0 71 | sub_pre[sub_pre > 0] = 1 72 | count = int(torch.sum(sub_pre).item()) 73 | 74 | points = torch.nonzero(sub_pre)[:,[0,1,3,2]].float() # b,c,h,w->b,c,w,h 75 | rois = torch.zeros((points.size(0), 5)).float().to(sub_pre) 76 | rois[:, 0] = points[:, 0] 77 | rois[:, 1] = torch.clamp(points[:, 2] - radius, min=0) 78 | rois[:, 2] = torch.clamp(points[:, 3] - radius, min=0) 79 | rois[:, 3] = torch.clamp(points[:, 2] + radius, max=w) 80 | rois[:, 4] = torch.clamp(points[:, 3] + radius, max=h) 81 | 82 | pre_data = {'num': count, 'points': points, 'rois': rois} 83 | return pre_data 84 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torchsummary==1.5.1 2 | tqdm==4.48.2 3 | scipy==1.6.2 4 | matplotlib==3.5.1 5 | tensorboardX 6 | tensorboard 7 | tensorflow 8 | easydict 9 | pandas 10 | numpy 11 | opencv-python 12 | pyyaml -------------------------------------------------------------------------------- /results/Tracking_HT21_metric.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import defaultdict 3 | import os.path as osp 4 | from train import compute_metrics_all_scenes 5 | import numpy as np 6 | import cv2 7 | from PIL import Image 8 | def tracking_to_crowdflow(): 9 | method = 'HT21_10'# 'HeadHunter_result' 'fairmot_head''PHDTT' 'PHDTT' 10 | 11 | # Root = os.path.join('D:\Crowd_tracking/HeadHunter',method) 12 | Root = os.path.join('/media/E/ht/HeadHunter--T-master/results', method) 13 | # gt_root = 'D:\Crowd_tracking/dataset/HT21/train' 14 | scenes = sorted(os.listdir(Root)) 15 | print(scenes) 16 | scenes_pred_dict = [] 17 | scenes_gt_dict = [] 18 | all_sum = [] 19 | 20 | 21 | for _, i in enumerate(scenes,0): 22 | # if _>0: 23 | # break 24 | pred = defaultdict(list) 25 | gts = defaultdict(list) 26 | 27 | path = os.path.join(Root,i) 28 | id_list = [] 29 | with open(path, 'r') as f: 30 | lines = f.readlines() 31 | for vi, line in enumerate(lines, 0): 32 | line = line.strip().split(',') 33 | img_id = int(line[0]) 34 | tmp_id = int(line[1]) 35 | pred[img_id].append(tmp_id) 36 | id_list.append(tmp_id) 37 | 38 | # with open(osp.join(gt_root, i.split('.')[0], 'gt', 'gt.txt'), 'r') as f: 39 | # lines = f.readlines() 40 | # for lin in lines: 41 | # lin_list = [float(i) for i in lin.rstrip().split(',')] 42 | # ind = int(lin_list[0]) 43 | # gts[ind].append(int(lin_list[1])) 44 | # print(id_list) 45 | id = set(id_list) 46 | all_sum.append(len(id)) 47 | print(all_sum, sum(all_sum[:5]), sum(all_sum[5:])) 48 | 49 | 50 | gt_pre_flow_cnt = torch.cat([torch.tensor([[133., 737., 734., 1040., 321.]]), torch.tensor(all_sum)[None]]).transpose(0, 1) 51 | print(gt_pre_flow_cnt) 52 | time = torch.tensor([585.,2080.,1000.,1050.,1008.]) 53 | MAE = torch.mean(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])) 54 | MSE = torch.mean((gt_pre_flow_cnt[:, 0] - gt_pre_flow_cnt[:, 1])**2).sqrt() 55 | WRAE = torch.sum(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])/gt_pre_flow_cnt[:,0]*(time/(time.sum()+1e-10)))*100 56 | print(MAE, MSE, WRAE) 57 | 58 | 59 | # pred_dict = {'id': i, 'time': len(lines), 'first_frame': 0, 'inflow': [], 'outflow': []} 60 | # gt_dict = {'id': i, 'time': len(lines), 'first_frame': 0, 'inflow': [], 'outflow': []} 61 | # 62 | # interval = 75 63 | # img_num =len(gts.keys()) 64 | # print(img_num) 65 | # for img_id, ids in gts.items(): 66 | # if img_id>img_num-interval: 67 | # break 68 | # 69 | # img_id_b = img_id+interval 70 | # 71 | # pre_ids,pre_ids_b = pred[img_id],pred[img_id_b] 72 | # gt_ids,gt_ids_b = ids, gts[img_id_b] 73 | # 74 | # if img_id == 1: 75 | # pred_dict['first_frame'] = len(pre_ids) 76 | # gt_dict['first_frame'] = len(gt_ids) 77 | # # import pdb 78 | # # pdb.set_trace() 79 | # 80 | # # if (img_id-1) % interval ==0 or img_num== 0: 81 | # pre_inflow =set(pre_ids_b)-set(pre_ids) 82 | # pre_outflow = set(pre_ids)-set(pre_ids_b) 83 | # 84 | # gt_inflow = set(gt_ids_b)-set(gt_ids) 85 | # gt_outflow = set(gt_ids)-set(gt_ids_b) 86 | # pred_dict['inflow'].append(len(pre_inflow)) 87 | # pred_dict['outflow'].append(len(pre_outflow)) 88 | # gt_dict['inflow'].append(len(gt_inflow)) 89 | # gt_dict['outflow'].append(len(gt_outflow)) 90 | # # print(pred_dict, gt_dict) 91 | # scenes_pred_dict.append(pred_dict) 92 | # scenes_gt_dict.append(gt_dict) 93 | # MAE, MSE, WRAE, MIAE, MOAE, cnt_result = compute_metrics_all_scenes(scenes_pred_dict, scenes_gt_dict, interval) 94 | # print(MAE, MSE, WRAE, MIAE, MOAE, cnt_result) 95 | 96 | 97 | def id_counting(): 98 | Root = 'D:/Crowd_tracking/dataset/HT21/train' 99 | scenes = os.listdir(Root) 100 | all_sum = [] 101 | for i in scenes: 102 | path = os.path.join(Root,i,'gt/gt.txt') 103 | id_list = [] 104 | with open(path, 'r') as f: 105 | lines = f.readlines() 106 | for line in lines: 107 | id_list.append(int(line.strip().split(',')[1])) 108 | id = set(id_list) 109 | all_sum.append(len(id)) 110 | print(all_sum, sum(all_sum[:4]), sum(all_sum[4:])) 111 | 112 | if __name__ == '__main__': 113 | import torch 114 | #PHDTT 115 | # gt_pre_flow_cnt = torch.tensor([[133.,737.,734.,1040.,321.],[380.,4530.,5528.,1531.,1648.]]).transpose(0,1) 116 | # #HeadHunter 117 | gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [307., 2145., 2556., 1531., 888.,]]).transpose(0, 1) 118 | # 119 | # #LOI 120 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.],[72.4 ,493.1 ,275.3 ,409.2,189.8]]).transpose(0, 1) 121 | # # Hungarian s=10 122 | # gt_pre_flow_cnt = torch.tensor([[ 129., 133.], 123 | # [ 421., 737.], 124 | # [ 332., 734.], 125 | # [ 331., 1040.], 126 | # [ 185., 321.]]) 127 | # 128 | # 129 | # # Hungarian s=12 130 | # gt_pre_flow_cnt = torch.tensor([[ 188., 133.], 131 | # [ 779., 737.], 132 | # [1069., 734.], 133 | # [ 772., 1040.], 134 | # [ 324., 321.]]) 135 | # # Hungarian s=15 136 | # gt_pre_flow_cnt = torch.tensor([[ 298., 133.], 137 | # [1833., 737.], 138 | # [1921., 734.], 139 | # [1641., 1040.], 140 | # [ 752., 321.]]) 141 | # 142 | # #Tracking 143 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [284., 1364., 1435., 1975., 539., ]]).transpose(0, 1) 144 | 145 | ## SSIC sampling 146 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.], [432.6235237121582, 4244.325263977051, 2307.327682495117, 2219.3844146728516, 1355.9616165161133]]).transpose(0, 1) 147 | # gt_pre_flow_cnt = torch.tensor([[133., 737., 734., 1040., 321.],[83.13096618652344, 216.19476318359375, 224.47157287597656, 174.38177490234375, 118.87664794921875]]).transpose(0,1) 148 | # 149 | time = torch.tensor([585.,2080.,1000.,1050.,1008.]) 150 | MAE = torch.mean(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])) 151 | MSE = torch.mean((gt_pre_flow_cnt[:, 0] - gt_pre_flow_cnt[:, 1])**2).sqrt() 152 | WRAE = torch.sum(torch.abs(gt_pre_flow_cnt[:,0] - gt_pre_flow_cnt[:,1])/gt_pre_flow_cnt[:,0]*(time/(time.sum()+1e-10)))*100 153 | 154 | print(MAE, MSE, WRAE) 155 | 156 | tracking_to_crowdflow() 157 | -------------------------------------------------------------------------------- /vision/engine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import math 5 | import sys 6 | import time 7 | from collections import defaultdict 8 | 9 | import brambox 10 | import pandas as pd 11 | import numpy as np 12 | import torch 13 | import torchvision.models.detection.mask_rcnn 14 | 15 | from head_detection.vision import utils 16 | from brambox.stat._matchboxes import match_det, match_anno 17 | from brambox.stat import coordinates, mr_fppi, ap, pr, threshold, fscore, peak, lamr 18 | 19 | 20 | def check_empty_target(targets): 21 | for tar in targets: 22 | if len(tar['boxes']) < 1: 23 | return True 24 | return False 25 | 26 | 27 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): 28 | model.train() 29 | metric_logger = utils.MetricLogger(delimiter=" ") 30 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 31 | header = 'Epoch: [{}]'.format(epoch) 32 | 33 | lr_scheduler = None 34 | if epoch == 0: 35 | warmup_factor = 1. / 1000 36 | warmup_iters = min(1000, len(data_loader) - 1) 37 | 38 | lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) 39 | 40 | for images, targets in metric_logger.log_every(data_loader, print_freq, header): 41 | if check_empty_target(targets): 42 | continue 43 | images = list(image.to(device) for image in images) 44 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets] 45 | loss_dict = model(images, targets) 46 | 47 | losses = sum(loss for loss in loss_dict.values()) 48 | 49 | # reduce losses over all GPUs for logging purposes 50 | loss_dict_reduced = utils.reduce_dict(loss_dict) 51 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 52 | 53 | loss_value = losses_reduced.item() 54 | 55 | if not math.isfinite(loss_value): 56 | print("Loss is {}, stopping training".format(loss_value)) 57 | print(loss_dict_reduced) 58 | sys.exit(1) 59 | 60 | optimizer.zero_grad() 61 | losses.backward() 62 | optimizer.step() 63 | 64 | if lr_scheduler is not None: 65 | lr_scheduler.step() 66 | 67 | metric_logger.update(loss=losses_reduced, **loss_dict_reduced) 68 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 69 | 70 | return metric_logger 71 | 72 | 73 | def _get_iou_types(model): 74 | model_without_ddp = model 75 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 76 | model_without_ddp = model.module 77 | iou_types = ["bbox"] 78 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): 79 | iou_types.append("segm") 80 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): 81 | iou_types.append("keypoints") 82 | return iou_types 83 | 84 | 85 | def safe_div(x,y): 86 | if y == 0: 87 | return 0 88 | return x / y 89 | 90 | def get_moda(det, anno, threshold=0.2, ignore=None): 91 | if ignore is None: 92 | ignore = anno.ignore.any() 93 | 94 | dets_per_frame = anno.groupby('image').filter(lambda x: any(x['ignore'] == 0)) 95 | dets_per_frame = dets_per_frame.groupby('image').size().to_dict() 96 | # Other param for finding matched anno 97 | crit = coordinates.pdollar if ignore else coordinates.iou 98 | label = len({*det.class_label.unique(), *anno.class_label.unique()}) > 1 99 | matched_dets = match_det(det, anno, threshold, criteria=crit, 100 | class_label=label, ignore=2 if ignore else 0) 101 | fp_per_im = matched_dets[matched_dets.fp==True].groupby('image').size().to_dict() 102 | tp_per_im = matched_dets[matched_dets.tp==True].groupby('image').size().to_dict() 103 | valid_anno = anno[anno.ignore == False].groupby('image').size().to_dict() 104 | assert valid_anno.keys() == tp_per_im.keys() 105 | 106 | moda_ = [] 107 | for k, _ in valid_anno.items(): 108 | n_gt = valid_anno[k] 109 | miss = n_gt-tp_per_im[k] 110 | fp = fp_per_im[k] 111 | moda_.append(safe_div((miss+fp), n_gt)) 112 | return 1 - np.mean(moda_) 113 | 114 | 115 | def get_modp(det, anno, threshold=0.2, ignore=None): 116 | if ignore is None: 117 | ignore = anno.ignore.any() 118 | # Compute TP/FP 119 | if not {'tp', 'fp'}.issubset(det.columns): 120 | crit = coordinates.pdollar if ignore else coordinates.iou 121 | label = len({*det.class_label.unique(), *anno.class_label.unique()}) > 1 122 | det = match_anno(det, anno, threshold, criteria=crit, class_label=label, ignore=2 if ignore else 0) 123 | elif not det.confidence.is_monotonic_decreasing: 124 | det = det.sort_values('confidence', ascending=False) 125 | modp = det.groupby('image')['criteria'].mean().mean() 126 | return modp 127 | 128 | @torch.no_grad() 129 | def evaluate(model, data_loader, out_path=None, benchmark=None): 130 | """ 131 | Evaluates a model over testing set, using AP, Log MMR, F1-score 132 | """ 133 | n_threads = torch.get_num_threads() 134 | torch.set_num_threads(1) 135 | device=torch.device('cuda') 136 | cpu_device = torch.device("cpu") 137 | model.eval() 138 | metric_logger = utils.MetricLogger(delimiter=" ") 139 | header = 'Valid:' 140 | 141 | # Brambox eval related 142 | pred_dict = defaultdict(list) 143 | gt_dict = defaultdict(list) 144 | results = {} 145 | for i, (images, targets) in enumerate(metric_logger.log_every(data_loader, 100, header)): 146 | images = list(img.to(device) for img in images) 147 | 148 | torch.cuda.synchronize() 149 | model_time = time.time() 150 | outputs = model(images) 151 | 152 | outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] 153 | model_time = time.time() - model_time 154 | evaluator_time = time.time() 155 | # Pred lists 156 | pred_boxes = [p['boxes'].numpy() for p in outputs] 157 | pred_scores = [p['scores'].numpy() for p in outputs] 158 | 159 | # GT List 160 | gt_boxes = [gt['boxes'].numpy()for gt in targets] 161 | 162 | # ignore variables are used in our benchmark and CHuman Benchmark 163 | ignore_ar = [gt['ignore'] for gt in targets] 164 | # Just to be sure target and prediction have batchsize 2 165 | assert len(gt_boxes) == len(pred_boxes) 166 | for j in range(len(gt_boxes)): 167 | im_name = str(targets[j]['image_id']) + '.jpg' 168 | # write to results dict for MOT format 169 | results[targets[j]['image_id'].item()] = {'boxes': pred_boxes[j], 170 | 'scores': pred_scores[j]} 171 | for _, (p_b, p_s) in enumerate(zip(pred_boxes[j], pred_scores[j])): 172 | pred_dict['image'].append(im_name) 173 | pred_dict['class_label'].append('head') 174 | pred_dict['id'].append(0) 175 | pred_dict['x_top_left'].append(p_b[0]) 176 | pred_dict['y_top_left'].append(p_b[1]) 177 | pred_dict['width'].append(p_b[2] - p_b[0]) 178 | pred_dict['height'].append(p_b[3] - p_b[1]) 179 | pred_dict['confidence'].append(p_s) 180 | 181 | for _, (gt_b, ignore_val) in enumerate(zip(gt_boxes[j], ignore_ar[j])): 182 | gt_dict['image'].append(im_name) 183 | gt_dict['class_label'].append('head') 184 | gt_dict['id'].append(0) 185 | gt_dict['x_top_left'].append(gt_b[0]) 186 | gt_dict['y_top_left'].append(gt_b[1]) 187 | gt_dict['width'].append(gt_b[2] - gt_b[0]) 188 | gt_dict['height'].append(gt_b[3] - gt_b[1]) 189 | gt_dict['ignore'].append(ignore_val) 190 | 191 | evaluator_time = time.time() - evaluator_time 192 | metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) 193 | 194 | # Save results in MOT format if out_path is provided 195 | if out_path is not None: 196 | data_loader.dataset.write_results_files(results, out_path) 197 | # gather the stats from all processes 198 | pred_df = pd.DataFrame(pred_dict) 199 | gt_df = pd.DataFrame(gt_dict) 200 | pred_df['image'] = pred_df['image'].astype('category') 201 | gt_df['image'] = gt_df['image'].astype('category') 202 | pr_ = pr(pred_df, gt_df, ignore=True) 203 | ap_ = ap(pr_) 204 | mr_fppi_ = mr_fppi(pred_df, gt_df, threshold=0.5, ignore=True) 205 | lamr_ = lamr(mr_fppi_) 206 | f1_ = fscore(pr_) 207 | f1_ = f1_.fillna(0) 208 | threshold_ = peak(f1_) 209 | 210 | moda = get_moda(pred_df, gt_df, threshold=0.2, ignore=True) 211 | modp = get_modp(pred_df, gt_df, threshold=0.2, ignore=True) 212 | 213 | result_dict = {'AP' : ap_, 'MMR' : lamr_, 214 | 'f1' : threshold_.f1, 'r':pr_['recall'].values[-1], 215 | 'moda' : moda, 'modp' : modp} 216 | 217 | metric_logger.synchronize_between_processes() 218 | 219 | torch.set_num_threads(n_threads) 220 | return result_dict 221 | -------------------------------------------------------------------------------- /vision/transform.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from torchvision.transforms import functional as F 4 | 5 | 6 | def _flip_coco_person_keypoints(kps, width): 7 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 8 | flipped_data = kps[:, flip_inds] 9 | flipped_data[..., 0] = width - flipped_data[..., 0] 10 | # Maintain COCO convention that if visibility == 0, then x, y = 0 11 | inds = flipped_data[..., 2] == 0 12 | flipped_data[inds] = 0 13 | return flipped_data 14 | 15 | 16 | class Compose(object): 17 | def __init__(self, transforms): 18 | self.transforms = transforms 19 | 20 | def __call__(self, image, target): 21 | for t in self.transforms: 22 | image, target = t(image, target) 23 | return image, target 24 | 25 | 26 | class RandomHorizontalFlip(object): 27 | def __init__(self, prob): 28 | self.prob = prob 29 | 30 | def __call__(self, image, target): 31 | if random.random() < self.prob: 32 | height, width = image.shape[-2:] 33 | image = image.flip(-1) 34 | bbox = target["boxes"] 35 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] 36 | target["boxes"] = bbox 37 | if "masks" in target: 38 | target["masks"] = target["masks"].flip(-1) 39 | if "keypoints" in target: 40 | keypoints = target["keypoints"] 41 | keypoints = _flip_coco_person_keypoints(keypoints, width) 42 | target["keypoints"] = keypoints 43 | return image, target 44 | 45 | 46 | class ToTensor(object): 47 | def __call__(self, image, target): 48 | image = F.to_tensor(image) 49 | return image, target 50 | --------------------------------------------------------------------------------