├── .gitattributes ├── LICENSE ├── README.md ├── batch_sampler.py ├── config ├── __pycache__ │ ├── config.cpython-38.pyc │ └── config.cpython-39.pyc └── config.py ├── dataloadR ├── batch_sampler.py └── datasetsv2.py ├── datasetsv2.py ├── demo.py ├── evalR ├── __pycache__ │ ├── eval.cpython-38.pyc │ ├── evaluator.cpython-38.pyc │ ├── evaluator1.cpython-38.pyc │ ├── evaluator2.cpython-38.pyc │ ├── evaluatorGGHL.cpython-38.pyc │ ├── evaluatorGGHLv2.cpython-38.pyc │ ├── evaluatorGGHLv2.cpython-39.pyc │ ├── evaluatorGGHLv2_mask.cpython-38.pyc │ ├── evaluatorGGHLv2_mask.cpython-39.pyc │ ├── evaluatorGGHLv2plot.cpython-39.pyc │ ├── evaluator_ABGH.cpython-38.pyc │ ├── evaluator_Center.cpython-38.pyc │ ├── evaluator_demo.cpython-39.pyc │ ├── evaluator_new.cpython-38.pyc │ ├── evaluatorfast.cpython-38.pyc │ ├── voc_eval.cpython-36.pyc │ ├── voc_eval.cpython-37.pyc │ ├── voc_eval.cpython-38.pyc │ └── voc_eval.cpython-39.pyc ├── eval.py ├── evaluatorGGHL.py ├── evaluatorTS.py ├── evaluatorTSplot.py ├── evaluator_demo.py └── voc_eval.py ├── lib └── DCNv2 │ ├── dcn_v2.py │ ├── dcn_v2_amp.py │ ├── dcn_v2_onnx.py │ ├── make.sh │ ├── setup.py │ ├── src │ ├── cpu │ │ ├── dcn_v2_cpu.cpp │ │ ├── dcn_v2_im2col_cpu.cpp │ │ ├── dcn_v2_im2col_cpu.h │ │ ├── dcn_v2_psroi_pooling_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── dcn_v2_cuda.cu │ │ ├── dcn_v2_im2col_cuda.cu │ │ ├── dcn_v2_im2col_cuda.h │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ └── vision.h │ ├── dcn_v2.h │ └── vision.cpp │ ├── testcpu.py │ └── testcuda.py ├── model ├── TSConv.py ├── __pycache__ │ ├── GGHL4.cpython-38.pyc │ ├── GGHL4.cpython-39.pyc │ ├── GGHL6.cpython-39.pyc │ ├── GGHL6single.cpython-39.pyc │ ├── GGHL8.cpython-39.pyc │ ├── GGHLv2.cpython-38.pyc │ ├── GGHLv2.cpython-39.pyc │ ├── double3090.cpython-38.pyc │ └── double3090.cpython-39.pyc ├── backbones │ ├── __pycache__ │ │ ├── darknet53.cpython-38.pyc │ │ ├── darknet53.cpython-39.pyc │ │ ├── model_resnet.cpython-39.pyc │ │ └── resnet.cpython-39.pyc │ ├── darknet53.py │ ├── mobilenetv2.py │ ├── model_resnet.py │ └── resnet.py ├── head │ ├── __pycache__ │ │ ├── head10.cpython-39.pyc │ │ ├── head10single.cpython-39.pyc │ │ ├── head11.cpython-39.pyc │ │ ├── head3.cpython-38.pyc │ │ ├── head4.cpython-38.pyc │ │ ├── head4.cpython-39.pyc │ │ ├── head5.cpython-38.pyc │ │ ├── head5.cpython-39.pyc │ │ ├── head6.cpython-39.pyc │ │ ├── head7.cpython-39.pyc │ │ ├── head9.cpython-39.pyc │ │ ├── head_GGHLv2_x3.cpython-38.pyc │ │ ├── head_ori.cpython-38.pyc │ │ ├── headv2.cpython-38.pyc │ │ ├── headv21.cpython-38.pyc │ │ └── headv21.cpython-39.pyc │ └── head.py ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── activations.cpython-38.pyc │ │ ├── activations.cpython-39.pyc │ │ ├── conv_blocks.cpython-38.pyc │ │ ├── conv_blocks.cpython-39.pyc │ │ ├── convolutions.cpython-38.pyc │ │ ├── convolutions.cpython-39.pyc │ │ ├── msr_blocks.cpython-38.pyc │ │ ├── msr_blocks.cpython-39.pyc │ │ ├── multiscale_fusion_blocks.cpython-38.pyc │ │ ├── multiscale_fusion_blocks.cpython-39.pyc │ │ ├── np_attention_blocks.cpython-38.pyc │ │ └── np_attention_blocks.cpython-39.pyc │ ├── activations.py │ ├── attention_blocks.py │ ├── conv_blocks.py │ ├── convolutions.py │ ├── msr_blocks.py │ ├── multiscale_fusion_blocks.py │ └── np_attention_blocks.py ├── loss │ ├── __pycache__ │ │ ├── loss4.cpython-38.pyc │ │ ├── loss4.cpython-39.pyc │ │ ├── loss6.cpython-39.pyc │ │ ├── lossv2.cpython-38.pyc │ │ ├── lossv2.cpython-39.pyc │ │ ├── lossv2single.cpython-39.pyc │ │ ├── lossv8.cpython-39.pyc │ │ ├── lossv88.cpython-39.pyc │ │ ├── lossv9.cpython-39.pyc │ │ ├── seesaw_loss.cpython-38.pyc │ │ └── seesaw_loss.cpython-39.pyc │ └── loss.py └── neck │ ├── __pycache__ │ ├── neckv2.cpython-38.pyc │ ├── neckv2.cpython-39.pyc │ └── neckv8.cpython-39.pyc │ └── neck.py ├── predictionR └── lr.png ├── requirements.txt ├── test.py ├── train_dist.sh ├── trainv2.py └── utils ├── __pycache__ ├── cosine_lr_scheduler.cpython-38.pyc ├── cosine_lr_scheduler.cpython-39.pyc ├── gpu.cpython-38.pyc ├── gpu.cpython-39.pyc ├── log.cpython-38.pyc ├── log.cpython-39.pyc ├── utils_basic.cpython-38.pyc ├── utils_basic.cpython-39.pyc ├── utils_coco.cpython-38.pyc ├── utils_coco.cpython-39.pyc ├── visualize.cpython-38.pyc └── visualize.cpython-39.pyc ├── cosine_lr_scheduler.py ├── gpu.py ├── log.py ├── mics.py ├── num_of_works_set.py ├── utils_basic.py ├── utils_coco.py └── visualize.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 下个月会有最后一次更新,以后正式说再见了,github不再更新,有代码问题可以电子邮件联系我 2 | 3 | # 更新啦~~ 4 | # TS-Conv: Task-wise Sampling Convolutions for Arbitrary-Oriented Object Detection in Aerial Images 5 | 6 | 7 | Version 8 | 9 | 10 | 11 | GPLv3.0 License 12 | 13 | 14 | 15 | Visitor 17 | 18 | 19 | 20 | E-mail 21 | 22 | 23 | ## This is the implementation of TS-Conv 👋👋👋 24 | [[Arxiv](https://arxiv.org/abs/2209.02200)] 25 | #### 👹👹👹 不出意外的话是毕业前最后一个工作了,可能也是学术圈的最后一个工作,有点水大家见谅。 26 | ![image](https://user-images.githubusercontent.com/33946139/194797485-a38a73d1-a7fd-4c90-92f7-131d76afc2ce.png) 27 | 28 | ### Please give a ⭐️ if this project helped you. If you use it, please consider citing: 29 | ```Arxiv 30 | @ARTICLE{9709203, 31 | author={Huang, Zhanchao and Li, Wei and Xia, Xiang-Gen,Hao Wang and Tao, Ran}, 32 | journal={arXiv}, 33 | title={Task-wise Sampling Convolutions for Arbitrary-Oriented Object Detection in Aerial Images}, 34 | year={2022}, 35 | volume={}, 36 | number={}, 37 | pages={1-16}, 38 | doi={10.48550/arXiv.2209.02200}} 39 | ``` 40 | ### 🤡🤡🤡 Clone不Star,都是耍流氓 41 | 42 | ## 0. Something Important 🦞 🦀 🦑 43 | 44 | * #### 🎃🎃🎃 The usage of the TS-Conv repository is the same as that of the ancestral repository [GGHL](https://github.com/Shank2358/GGHL). If you have any questions, please see the issues there. 45 | #### 用法和祖传的[GGHL](https://github.com/Shank2358/GGHL)仓库一样,有问题可以看那边的issues。MMRotate版本也在写着。TS-Conv还将持续更新一段时间,现在更新完的是主体模型的代码,重点在head,DCN,以及dataload的标签分配那部分,其他和GGHL差不多。可视化和更多其它部分的功能和实验我也在抓紧更新中。 46 | 47 | * #### 💖💖💖 Thanks to [Crescent-Ao](https://github. com/Crescent-Ao) and [haohaoolalahao](https://github.com/haohaoolalahao) for contributions to the GGHL repository, thanks to [Crescent-Ao](https://github.com/Crescent-Ao) for the GGHL deployment Version. Relevant warehouses will continue to be updated, so stay tuned. 48 | #### 打个广告,GGHL部署版本[GGHL-Deployment](https://github.com/Crescent-Ao/GGHL-Deployment)已经上线,欢迎大家使用~~ 感谢我最亲爱的师弟[Crescent-Ao](https://github.com/Crescent-Ao)和[haohaolalahao](https://github.com/haohaolalahao)对GGHL仓库的贡献,感谢[Crescent-Ao](https://github.com/Crescent-Ao)完成的GGHL部署版本。相关仓库还会持续更新中,敬请期待。 49 | 50 | * #### 😺😺😺 Welcome everyone to pay attention to the MGAR completed by [haohaoolalahao](https://github.com/haohaoolalahao) in cooperation with me, which has been accepted by [IEEE TGRS](https://ieeexplore.ieee.org/document/9912396). 51 | #### 再打个广告,欢迎大家关注[haohaolalahao](https://github.com/haohaolalahao)与我合作完成的遥感图像目标检测工作 MGAR: Multi-Grained Angle Representation for Remote Sensing Object Detection,论文已经正式接收[IEEE TGRS](https://ieeexplore.ieee.org/document/9912396) [Arxiv](https://arxiv.org/abs/2209.02884), 感谢大家引用: 52 | ```IEEE TGRS 53 | @ARTICLE{9912396, 54 | author={Wang, Hao and Huang, Zhanchao and Chen, Zhengchao and Song, Ying and Li, Wei}, 55 | journal={IEEE Transactions on Geoscience and Remote Sensing}, 56 | title={Multi-Grained Angle Representation for Remote Sensing Object Detection}, 57 | year={2022}, 58 | volume={}, 59 | number={}, 60 | pages={1-1}, 61 | doi={10.1109/TGRS.2022.3212592}} 62 | ``` 63 | 64 | ## 🌈 1.Environments 65 | Linux (Ubuntu 18.04, GCC>=5.4) & Windows (Win10) 66 | CUDA > 11.1, Cudnn > 8.0.4 67 | 68 | First, install CUDA, Cudnn, and Pytorch. 69 | Second, install the dependent libraries in [requirements.txt](https://github.com/Shank2358/GGHL/blob/main/requirements.txt). 70 | 71 | ```python 72 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch 73 | pip install -r requirements.txt 74 | ``` 75 | 76 | ## 🌟 2.Installation 77 | 1. git clone this repository 78 | 79 | 2. Polygen NMS 80 | The poly_nms in this version is implemented using shapely and numpy libraries to ensure that it can work in different systems and environments without other dependencies. But doing so will slow down the detection speed in dense object scenes. If you want faster speed, you can compile and use the poly_iou library (C++ implementation version) in datasets_tools/DOTA_devkit. The compilation method is described in detail in [DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) . 81 | 82 | ```bash 83 | cd datasets_tools/DOTA_devkit 84 | sudo apt-get install swig 85 | swig -c++ -python polyiou.i 86 | python setup.py build_ext --inplace 87 | ``` 88 | 89 | ## 🎃 3.Datasets 90 | 91 | 1. [DOTA dataset](https://captain-whu.github.io/DOTA/dataset.html) and its [devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) 92 | 93 | #### (1) Training Format 94 | You need to write a script to convert them into the train.txt file required by this repository and put them in the ./dataR folder. 95 | For the specific format of the train.txt file, see the example in the /dataR folder. 96 | 97 | ```txt 98 | image_path xmin,ymin,xmax,ymax,class_id,x1,y1,x2,y2,x3,y3,x4,y4,area_ratio,angle[0,180) xmin,ymin,xmax,ymax,class_id,x1,y1,x2,y2,x3,y3,x4,y4,area_ratio,angle[0,180)... 99 | ``` 100 | The calculation method of angle is explained in [Issues #1](https://github.com/Shank2358/GGHL/issues/1) and our paper. 101 | 102 | #### (2) Validation & Testing Format 103 | The same as the Pascal VOC Format 104 | 105 | #### (3) DataSets Files Structure 106 | ``` 107 | cfg.DATA_PATH = "/opt/datasets/DOTA/" 108 | ├── ... 109 | ├── JPEGImages 110 | | ├── 000001.png 111 | | ├── 000002.png 112 | | └── ... 113 | ├── Annotations (DOTA Dataset Format) 114 | | ├── 000001.txt (class_idx x1 y1 x2 y2 x3 y3 x4 y4) 115 | | ├── 000002.txt 116 | | └── ... 117 | ├── ImageSets 118 | ├── test.txt (testing filename) 119 | ├── 000001 120 | ├── 000002 121 | └── ... 122 | ``` 123 | There is a DOTA2Train.py file in the datasets_tools folder that can be used to generate training and test format labels. 124 | First, you need to use [DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) , the official tools of the DOTA dataset, for image and label splitting. Then, run DOTA2Train.py to convert them to the format required by GGHL. For the use of DOTA_devkit, please refer to the tutorial in the official repository. 125 | 126 | ## 🌠🌠🌠 4.Usage Example 127 | #### (1) Training 128 | ```bash 129 | sh train_GGHL_dist.sh 130 | ``` 131 | 132 | #### (2) Testing 133 | ```python 134 | python test.py 135 | ``` 136 | ## 📝 License 137 | Copyright © 2021 [Shank2358](https://github.com/Shank2358).
138 | This project is [GNU General Public License v3.0](https://github.com/Shank2358/GGHL/blob/main/LICENSE) licensed. 139 | 140 | ## 🤐 To be continued 141 | -------------------------------------------------------------------------------- /batch_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Sampler, RandomSampler, SequentialSampler 2 | import numpy as np 3 | import config.config as cfg 4 | 5 | 6 | class BatchSampler(object): 7 | def __init__( 8 | self, sampler, batch_size, drop_last, multiscale_step=None, img_sizes=None 9 | ): 10 | if not isinstance(sampler, Sampler): 11 | raise ValueError( 12 | "sampler should be an instance of " 13 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 14 | ) 15 | if not isinstance(drop_last, bool): 16 | raise ValueError( 17 | "drop_last should be a boolean value, but got " 18 | "drop_last={}".format(drop_last) 19 | ) 20 | self.sampler = sampler 21 | self.batch_size = batch_size 22 | self.drop_last = drop_last 23 | if multiscale_step is not None and multiscale_step < 1: 24 | raise ValueError( 25 | "multiscale_step should be > 0, but got " 26 | "multiscale_step={}".format(multiscale_step) 27 | ) 28 | if multiscale_step is not None and img_sizes is None: 29 | raise ValueError( 30 | "img_sizes must a list, but got img_sizes={} ".format(img_sizes) 31 | ) 32 | 33 | self.multiscale_step = multiscale_step 34 | self.img_sizes = img_sizes 35 | 36 | def __iter__(self): 37 | 38 | num_batch = 0 39 | batch = [] 40 | size = cfg.TRAIN["TRAIN_IMG_SIZE"] 41 | 42 | for idx in iter(self.sampler): 43 | batch.append([idx, size]) 44 | if len(batch) == self.batch_size: 45 | yield batch 46 | num_batch += 1 47 | batch = [] 48 | if self.multiscale_step and num_batch % self.multiscale_step == 0: 49 | size = np.random.choice(self.img_sizes) 50 | if len(batch) > 0 and not self.drop_last: 51 | yield batch 52 | 53 | def __len__(self): 54 | if self.drop_last: 55 | return len(self.sampler) // self.batch_size 56 | else: 57 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size 58 | -------------------------------------------------------------------------------- /config/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/config/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /config/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/config/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | DATA_PATH = "/mnt/datasets/DOTAx/test/" 3 | PROJECT_PATH = "./" 4 | 5 | DATA = {"CLASSES": ['plane', 6 | 'baseball-diamond', 7 | 'bridge', 8 | 'ground-track-field', 9 | 'small-vehicle', 10 | 'large-vehicle', 11 | 'ship', 12 | 'tennis-court', 13 | 'basketball-court', 14 | 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'], 15 | "NUM": 15} 16 | #,'container-crane', 'airport', 'helipad','container-crane', 'airport', 'helipad' 17 | 18 | DATASET_NAME = "train_DOTAx" #"trainSKU110KR11"#'ssdd'#"train_HRSC2016"#"trainSKU110KR11"#"train_DOTAxv1.5" 19 | MODEL = {"STRIDES":[8, 16, 32], "SCALES_PER_LAYER": 3} 20 | 21 | TRAIN = { 22 | "Transformer_SIZE": 896, 23 | "EVAL_TYPE": 'VOC', 24 | "TRAIN_IMG_SIZE": 960, 25 | "TRAIN_IMG_NUM": 79780, 26 | "AUGMENT": True, 27 | "MULTI_SCALE_TRAIN": True, 28 | "MULTI_TRAIN_RANGE": [23, 28, 1],#[26, 31, 1] 29 | "BATCH_SIZE": 24, 30 | "IOU_THRESHOLD_LOSS": 0.6, 31 | "EPOCHS": 36, 32 | "NUMBER_WORKERS": 8, 33 | "MOMENTUM": 0.9, 34 | "WEIGHT_DECAY": 0.0005, 35 | "LR_INIT": 5e-4, 36 | "LR_END": 1e-6, 37 | "WARMUP_EPOCHS": 5, 38 | "IOU_TYPE": 'GIOU' 39 | } 40 | 41 | TEST = { 42 | "EVAL_TYPE": 'VOC', 43 | "EVAL_JSON": 'test.json', 44 | "EVAL_NAME": 'test', 45 | "NUM_VIS_IMG": 0, 46 | "TEST_IMG_SIZE": 800, 47 | "BATCH_SIZE": 4, 48 | "NUMBER_WORKERS": 16, 49 | "CONF_THRESH": 0.06, 50 | "NMS_THRESH": 0.4, 51 | "IOU_THRESHOLD": 0.5, 52 | "NMS_METHODS": 'NMS', 53 | "MULTI_SCALE_TEST": False, 54 | "MULTI_TEST_RANGE": [832, 992, 32], 55 | "FLIP_TEST": False 56 | } 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /dataloadR/batch_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Sampler, RandomSampler, SequentialSampler 2 | import numpy as np 3 | import config.config as cfg 4 | 5 | 6 | class BatchSampler(object): 7 | def __init__( 8 | self, sampler, batch_size, drop_last, multiscale_step=None, img_sizes=None 9 | ): 10 | if not isinstance(sampler, Sampler): 11 | raise ValueError( 12 | "sampler should be an instance of " 13 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 14 | ) 15 | if not isinstance(drop_last, bool): 16 | raise ValueError( 17 | "drop_last should be a boolean value, but got " 18 | "drop_last={}".format(drop_last) 19 | ) 20 | self.sampler = sampler 21 | self.batch_size = batch_size 22 | self.drop_last = drop_last 23 | if multiscale_step is not None and multiscale_step < 1: 24 | raise ValueError( 25 | "multiscale_step should be > 0, but got " 26 | "multiscale_step={}".format(multiscale_step) 27 | ) 28 | if multiscale_step is not None and img_sizes is None: 29 | raise ValueError( 30 | "img_sizes must a list, but got img_sizes={} ".format(img_sizes) 31 | ) 32 | 33 | self.multiscale_step = multiscale_step 34 | self.img_sizes = img_sizes 35 | 36 | def __iter__(self): 37 | 38 | num_batch = 0 39 | batch = [] 40 | size = cfg.TRAIN["TRAIN_IMG_SIZE"] 41 | 42 | for idx in iter(self.sampler): 43 | batch.append([idx, size]) 44 | if len(batch) == self.batch_size: 45 | yield batch 46 | num_batch += 1 47 | batch = [] 48 | if self.multiscale_step and num_batch % self.multiscale_step == 0: 49 | size = np.random.choice(self.img_sizes) 50 | if len(batch) > 0 and not self.drop_last: 51 | yield batch 52 | 53 | def __len__(self): 54 | if self.drop_last: 55 | return len(self.sampler) // self.batch_size 56 | else: 57 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size 58 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import utils.gpu as gpu 2 | from model.TSConv import GGHL 3 | from tensorboardX import SummaryWriter 4 | from evalR.evaluator_demo import Evaluator 5 | import argparse 6 | import os 7 | import config.config as cfg 8 | import time 9 | import logging 10 | from utils.utils_coco import * 11 | from utils.log import Logger 12 | from torch.cuda import amp 13 | from copy import deepcopy 14 | 15 | class Tester(object): 16 | def __init__(self, weight_path=None, gpu_id=0, visiual=None, eval=False): 17 | self.img_size = cfg.TEST["TEST_IMG_SIZE"] 18 | self.__num_class = cfg.DATA["NUM"] 19 | self.__conf_threshold = cfg.TEST["CONF_THRESH"] 20 | self.__nms_threshold = cfg.TEST["NMS_THRESH"] 21 | self.__device = gpu.select_device(gpu_id, force_cpu=False) 22 | self.__multi_scale_test = cfg.TEST["MULTI_SCALE_TEST"] 23 | self.__flip_test = cfg.TEST["FLIP_TEST"] 24 | self.__classes = cfg.DATA["CLASSES"] 25 | 26 | self.__visiual = visiual 27 | self.__eval = eval 28 | self.__model = GGHL().eval().to(self.__device) # Single GPU 29 | 30 | self.__load_model_weights(weight_path) 31 | 32 | def __load_model_weights(self, weight_path): 33 | print("loading weight file from : {}".format(weight_path)) 34 | weight = os.path.join(weight_path) 35 | chkpt = torch.load(weight, map_location=self.__device) 36 | self.__model.load_state_dict(chkpt) #['model'] 37 | del chkpt 38 | 39 | def test(self): 40 | img_id = '00001' #要测试的图像id 41 | with torch.no_grad(): 42 | Evaluator(self.__model).Test_single_img(img_id) 43 | 44 | 45 | 46 | if __name__ == "__main__": 47 | global logger 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument('--weight_path', type=str, default='/home/hzc/v2/weight/5.15/best.pt', help='weight file path') 50 | parser.add_argument('--log_val_path', type=str, default='log/', help='weight file path') 51 | parser.add_argument('--visiual', type=str, default=None, help='test data path or None') 52 | parser.add_argument('--eval', action='store_true', default=True, help='eval flag') 53 | parser.add_argument('--gpu_id', type=int, default=0, help='gpu id') 54 | parser.add_argument('--log_path', type=str, default='log/', help='log path') 55 | opt = parser.parse_args() 56 | writer = SummaryWriter(logdir=opt.log_path + '/event') 57 | logger = Logger(log_file_name=opt.log_val_path + '/log_coco_test.txt', log_level=logging.DEBUG, 58 | logger_name='GGHL').get_log() 59 | 60 | Tester(weight_path=opt.weight_path, gpu_id=opt.gpu_id, eval=opt.eval, visiual=opt.visiual).test() -------------------------------------------------------------------------------- /evalR/__pycache__/eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/eval.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator1.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator2.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHL.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHL.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHLv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHLv2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2.cpython-39.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHLv2_mask.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHLv2_mask.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-39.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorGGHLv2plot.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2plot.cpython-39.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator_ABGH.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_ABGH.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator_Center.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_Center.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator_demo.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_demo.cpython-39.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluator_new.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_new.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/evaluatorfast.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorfast.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/voc_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-36.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/voc_eval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-37.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/voc_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-38.pyc -------------------------------------------------------------------------------- /evalR/__pycache__/voc_eval.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-39.pyc -------------------------------------------------------------------------------- /evalR/evaluator_demo.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import time 3 | from tqdm import tqdm 4 | import torch.nn.functional as F 5 | from dataloadR.augmentations import * 6 | from evalR import voc_eval 7 | from utils.utils_basic import * 8 | from utils.visualize import * 9 | import time 10 | import multiprocessing 11 | from multiprocessing.dummy import Pool as ThreadPool # 线程池 12 | from collections import defaultdict 13 | 14 | current_milli_time = lambda: int(round(time.time() * 1000)) 15 | 16 | 17 | class Evaluator(object): 18 | def __init__(self, model, visiual=True): 19 | self.classes = cfg.DATA["CLASSES"] 20 | self.classes_num = cfg.DATA["NUM"] 21 | self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'predictionR') # 预测结果的保存路径 22 | self.val_data_path = cfg.DATA_PATH 23 | self.strides = cfg.MODEL["STRIDES"] 24 | self.conf_thresh = cfg.TEST["CONF_THRESH"] 25 | self.nms_thresh = cfg.TEST["NMS_THRESH"] 26 | self.val_shape = cfg.TEST["TEST_IMG_SIZE"] 27 | self.__visiual = visiual 28 | self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"] 29 | self.model = model 30 | self.device = next(model.parameters()).device 31 | self.inference_time = 0. 32 | self.iouthresh_test = cfg.TEST["IOU_THRESHOLD"] 33 | self.multi_test = cfg.TEST["MULTI_SCALE_TEST"] 34 | self.flip_test = cfg.TEST["FLIP_TEST"] 35 | 36 | def Test_single_img(self, img_id): 37 | img_path = os.path.join(self.val_data_path, 'JPEGImages', img_id + '.png') ###测试图像路径 38 | img = cv2.imread(img_path) 39 | bboxes_prd = self.get_bbox(img, self.multi_test, self.flip_test) 40 | for bbox in bboxes_prd: 41 | x1 = bbox[0] 42 | y1 = bbox[1] 43 | x2 = bbox[2] 44 | y2 = bbox[3] 45 | x3 = bbox[4] 46 | y3 = bbox[5] 47 | x4 = bbox[6] 48 | y4 = bbox[7] 49 | score = bbox[8] 50 | class_ind = int(bbox[9]) 51 | class_name = self.classes[class_ind] 52 | score = '%.4f' % score 53 | '''''' 54 | color = np.zeros(3) 55 | points = np.array( 56 | [[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]) 57 | if int(class_ind) == 0: color = (64, 0, 0) 58 | elif int(class_ind) == 1: color = (255, 0, 0) 59 | elif int(class_ind) == 2: color = (0, 255, 255) 60 | elif int(class_ind) == 3: color = (0, 0, 255) 61 | elif int(class_ind) == 4: color = (0, 255, 0) 62 | elif int(class_ind) == 5: color = (255, 0, 0) 63 | elif int(class_ind) == 6: color = (0, 128, 255) 64 | elif int(class_ind) == 7: color = (0, 0, 128) 65 | elif int(class_ind) == 8: color = (0, 128, 0) 66 | elif int(class_ind) == 9: color = (128, 0, 0) 67 | elif int(class_ind) == 10: color = (128, 128, 0) 68 | elif int(class_ind) == 11: color = (0, 128, 128) 69 | elif int(class_ind) == 12: color = (128, 128, 0) 70 | elif int(class_ind) == 13: color = (0, 255, 128) 71 | elif int(class_ind) == 14: color = (255, 128, 255) 72 | cv2.polylines(img, [points], 1, color, 2) 73 | font = cv2.FONT_HERSHEY_SIMPLEX 74 | img = cv2.putText(img, class_name + ' ' + score[:4], (int(float(x1)), int(float(y1))), font, 0.3, (255, 255, 255), 1) 75 | store_path = os.path.join(self.pred_result_path, 'imgs', img_id + '.png') #保存结果路径 76 | cv2.imwrite(store_path, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 77 | 78 | def get_bbox(self, img, multi_test=False, flip_test=False): 79 | if multi_test: 80 | test_input_sizes = range(cfg.TEST["MULTI_TEST_RANGE"][0], cfg.TEST["MULTI_TEST_RANGE"][1], 81 | cfg.TEST["MULTI_TEST_RANGE"][2]) 82 | bboxes_list = [] 83 | for test_input_size in test_input_sizes: 84 | valid_scale = (0, np.inf) 85 | bboxes_list.append(self.__predict(img, test_input_size, valid_scale)) 86 | if flip_test: 87 | bboxes_flip = self.__predict(img[:, ::-1], test_input_size, valid_scale) 88 | bboxes_flip[:, [0, 2]] = img.shape[1] - bboxes_flip[:, [2, 0]] 89 | bboxes_list.append(bboxes_flip) 90 | bboxes = np.row_stack(bboxes_list) 91 | else: 92 | bboxes = self.__predict(img, self.val_shape, (0, np.inf)) 93 | bboxes = self.non_max_suppression_4points(bboxes, self.conf_thresh, self.nms_thresh, multi_label=False) 94 | return bboxes[0].cpu().numpy() 95 | 96 | def __predict(self, img, test_shape, valid_scale): 97 | org_img = np.copy(img) 98 | org_h, org_w, _ = org_img.shape 99 | img = self.__get_img_tensor(img, test_shape).to(self.device) 100 | self.model.eval() 101 | with torch.no_grad(): 102 | start_time = current_milli_time() 103 | _, _, _, p_d = self.model(img) 104 | self.inference_time += (current_milli_time() - start_time) 105 | 106 | pred_bbox = p_d.squeeze() 107 | bboxes = self.__convert_pred(pred_bbox, test_shape, (org_h, org_w), valid_scale) 108 | return bboxes 109 | 110 | def __get_img_tensor(self, img, test_shape): 111 | img = Resize((test_shape, test_shape), correct_box=False)(img, None).transpose(2, 0, 1) 112 | return torch.from_numpy(img[np.newaxis, ...]).float() 113 | 114 | def __convert_pred(self, pred_bbox, test_input_size, org_img_shape, valid_scale): 115 | pred_xyxy = xywh2xyxy(pred_bbox[:, :4]) # xywh2xyxy 116 | pred_conf = pred_bbox[:, 13] 117 | pred_prob = pred_bbox[:, 14:] 118 | org_h, org_w = org_img_shape 119 | resize_ratio = min(1.0 * test_input_size / org_w, 1.0 * test_input_size / org_h) 120 | dw = (test_input_size - resize_ratio * org_w) / 2 121 | dh = (test_input_size - resize_ratio * org_h) / 2 122 | pred_xyxy[:, 0::2] = 1.0 * (pred_xyxy[:, 0::2] - dw) / resize_ratio 123 | pred_xyxy[:, 1::2] = 1.0 * (pred_xyxy[:, 1::2] - dh) / resize_ratio 124 | pred_s = pred_bbox[:, 4:8] 125 | pred_r = pred_bbox[:, 8:9] 126 | zero = torch.zeros_like(pred_s) 127 | pred_s= torch.where(pred_r > 0.9, zero, pred_s) 128 | # (2)将预测的bbox中超出原图的部分裁掉 129 | device = pred_bbox.device 130 | pred_xyxy = torch.cat( 131 | [torch.maximum(pred_xyxy[:, :2], torch.tensor([0, 0]).to(device)), 132 | torch.minimum(pred_xyxy[:, 2:], torch.tensor([org_w - 1, org_h - 1]).to(device))], dim=-1) 133 | 134 | invalid_mask = torch.logical_or((pred_xyxy[:, 0] > pred_xyxy[:, 2]), (pred_xyxy[:, 1] > pred_xyxy[:, 3])) 135 | pred_xyxy[invalid_mask] = 0 136 | pred_s[invalid_mask] = 0 137 | # (4)去掉不在有效范围内的bbox 138 | bboxes_scale = torch.sqrt((pred_xyxy[..., 2:3] - pred_xyxy[..., 0:1]) * (pred_xyxy[..., 3:4] - pred_xyxy[..., 1:2])) 139 | scale_mask = torch.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])).squeeze(-1) 140 | # (5)将score低于score_threshold的bbox去掉 141 | classes = torch.argmax(pred_prob, dim=-1) 142 | scores = pred_conf * pred_prob[torch.arange(len(pred_xyxy)), classes] 143 | score_mask = scores > self.conf_thresh 144 | mask = torch.logical_and(scale_mask, score_mask) 145 | pred_xyxy = pred_xyxy[mask] 146 | pred_s = pred_s[mask] 147 | pred_conf = pred_conf[mask] 148 | #classes = classes[mask] 149 | pred_prob = pred_prob[mask] 150 | x1 = pred_s[:, 0:1] * (pred_xyxy[:, 2:3] - pred_xyxy[:, 0:1]) + pred_xyxy[:, 0:1] 151 | y1 = pred_xyxy[:, 1:2] 152 | x2 = pred_xyxy[:, 2:3] 153 | y2 = pred_s[:, 1:2] * (pred_xyxy[:, 3:4] - pred_xyxy[:, 1:2]) + pred_xyxy[:, 1:2] 154 | x3 = pred_xyxy[:, 2:3] - pred_s[:, 2:3] * (pred_xyxy[:, 2:3] - pred_xyxy[:, 0:1]) 155 | y3 = pred_xyxy[:, 3:4] 156 | x4 = pred_xyxy[:, 0:1] 157 | y4 = pred_xyxy[:, 3:4] - pred_s[:, 3:4] * (pred_xyxy[:, 3:4] - pred_xyxy[:, 1:2]) 158 | coor4points = torch.cat([x1, y1, x2, y2, x3, y3, x4, y4], dim=-1) 159 | 160 | bboxes = torch.cat([coor4points, pred_conf.unsqueeze(-1), pred_prob], dim=-1) 161 | bs = cfg.TEST["NUMBER_WORKERS"] 162 | bboxes = bboxes.view(bs, -1, bboxes.shape[1]) 163 | return bboxes 164 | 165 | def non_max_suppression_4points(self, 166 | prediction, conf_thres=0.2, iou_thres=0.45, merge=False, classes=None, multi_label=False, agnostic=False, 167 | without_iouthres=False 168 | ): 169 | nc = prediction[0].shape[1] - 9 170 | xc = prediction[..., 8] > conf_thres # candidates 171 | 172 | # Settings 173 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 174 | max_det = 500 # maximum number of detections per image 175 | time_limit = 10.0 # seconds to quit after 176 | redundant = True # require redundant detections 177 | multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) 178 | 179 | t = time.time() 180 | # output: (batch_size, ?) 181 | output = [torch.zeros((0, 10), device=prediction.device)] * prediction.shape[0] 182 | for xi, x in enumerate(prediction): # image index, image inference 183 | # Apply constraints 184 | x = x[xc[xi]] # x -> (num_confthres_boxes, no) 185 | # If none remain process next image 186 | if not x.shape[0]: 187 | continue 188 | # Compute conf 189 | x[:, 9:] *= x[:, 8:9] # conf = obj_conf * cls_conf 190 | box = x[:, :8] 191 | if multi_label: 192 | i, j = (x[:, 9:] > conf_thres).nonzero(as_tuple=False).T 193 | # 按列拼接 list x:(num_confthres_boxes, [xywhθ]+[conf]+[classid]) θ∈[0,179] 194 | x = torch.cat((box[i], x[i, j + 9, None], j[:, None].float()), 1) 195 | else: # best class only 196 | conf, j = x[:, 9:].max(1, keepdim=True) 197 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 198 | 199 | if without_iouthres: # 不做nms_iou 200 | output[xi] = x 201 | continue 202 | # Filter by class 按类别筛选 203 | if classes: 204 | x = x[(x[:, 9:] == torch.tensor(classes, device=x.device)).any(1)] # any(1)函数表示每行满足条件的返回布尔值 205 | # If none remain process next image 206 | n = x.shape[0] # number of boxes 207 | if not n: 208 | continue 209 | # Sort by confidence 210 | c = x[:, 9:] * (0 if agnostic else max_wh) # classes 211 | boxes_4points, scores = x[:, :8] + c, x[:, 8] 212 | i = np.array(py_cpu_nms_poly_fast(np.double(boxes_4points.cpu().numpy()), scores.cpu().numpy(), iou_thres)) 213 | if i.shape[0] > max_det: # limit detections 214 | i = i[:max_det] 215 | temp = x[i].clone() 216 | # output[xi] = x[i] # 根据nms索引提取x中的框 x.size=(num_conf_nms, [xywhθ,conf,classid]) θ∈[0,179] 217 | output[xi] = temp 218 | if (time.time() - t) > time_limit: 219 | break # time limit exceeded 220 | return output -------------------------------------------------------------------------------- /evalR/voc_eval.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import os 3 | import pickle 4 | import numpy as np 5 | from utils.utils_basic import * 6 | 7 | 8 | def parse_rec(filename): 9 | """ Parse a PASCAL VOC xml file """ 10 | tree = ET.parse(filename) 11 | objects = [] 12 | for obj in tree.findall('object'): 13 | obj_struct = {} 14 | obj_struct['name'] = obj.find('name').text 15 | obj_struct['pose'] = obj.find('pose').text 16 | obj_struct['truncated'] = int(obj.find('truncated').text) 17 | obj_struct['difficult'] = int(obj.find('difficult').text) 18 | bbox = obj.find('bndbox') 19 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 20 | int(bbox.find('ymin').text), 21 | int(bbox.find('xmax').text), 22 | int(bbox.find('ymax').text)] 23 | objects.append(obj_struct) 24 | 25 | return objects 26 | 27 | 28 | def parse_poly(filename): 29 | """ 30 | :param filename: ground truth file to parse 31 | :return: all instances in a picture 32 | """ 33 | objects = [] 34 | with open(filename, 'r') as f: 35 | while True: 36 | line = f.readline() 37 | if line: 38 | splitlines = line.strip().split(' ') 39 | object_struct = {} 40 | if (len(splitlines) < 9): 41 | continue 42 | classes = cfg.DATA["CLASSES"] 43 | object_struct['name'] = classes[int(splitlines[0])] 44 | # object_struct['name'] = splitlines[0] 45 | if (len(splitlines) == 9): 46 | object_struct['difficult'] = 0 47 | elif (len(splitlines) == 10): 48 | object_struct['difficult'] = int(splitlines[9]) 49 | object_struct['bbox'] = [float(splitlines[1]), 50 | float(splitlines[2]), 51 | float(splitlines[3]), 52 | float(splitlines[4]), 53 | float(splitlines[5]), 54 | float(splitlines[6]), 55 | float(splitlines[7]), 56 | float(splitlines[8])] 57 | objects.append(object_struct) 58 | else: 59 | break 60 | return objects 61 | 62 | 63 | def voc_ap(rec, prec, use_07_metric=False): 64 | """ ap = voc_ap(rec, prec, [use_07_metric]) 65 | Compute VOC AP given precision and recall. 66 | If use_07_metric is true, uses the 67 | VOC 07 11 point method (default:False). 68 | """ 69 | if use_07_metric: 70 | # 11 point metric 71 | ap = 0. 72 | for t in np.arange(0., 1.1, 0.1): 73 | if np.sum(rec >= t) == 0: 74 | p = 0 75 | else: 76 | p = np.max(prec[rec >= t]) 77 | ap = ap + p / 11. 78 | else: 79 | # correct AP calculation 80 | # first append sentinel values at the end 81 | mrec = np.concatenate(([0.], rec, [1.])) 82 | mpre = np.concatenate(([0.], prec, [0.])) 83 | 84 | # compute the precision envelope 85 | for i in range(mpre.size - 1, 0, -1): 86 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 87 | 88 | # to calculate area under PR curve, look for points 89 | # where X axis (recall) changes value 90 | i = np.where(mrec[1:] != mrec[:-1])[0] 91 | 92 | # and sum (\Delta recall) * prec 93 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 94 | return ap 95 | 96 | 97 | '''''' 98 | 99 | 100 | def voc_eval(detpath, 101 | annopath, 102 | imagesetfile, 103 | classname, 104 | cachedir, 105 | ovthresh=0.5, 106 | use_07_metric=False): 107 | # first load gt 108 | if not os.path.isdir(cachedir): 109 | os.mkdir(cachedir) 110 | cachefile = os.path.join(cachedir, 'annots.pkl') 111 | # read list of images 112 | with open(imagesetfile, 'r') as f: 113 | lines = f.readlines() 114 | imagenames = [x.strip() for x in lines] 115 | 116 | if not os.path.isfile(cachefile): 117 | # load annots 118 | recs = {} 119 | for i, imagename in enumerate(imagenames): 120 | ####################parse_poly 121 | recs[imagename] = parse_poly(annopath.format(imagename)) 122 | if i % 100 == 0: 123 | print('Reading annotation for {:d}/{:d}'.format( 124 | i + 1, len(imagenames))) 125 | # save 126 | print('Saving cached annotations to {:s}'.format(cachefile)) 127 | with open(cachefile, 'wb') as f: 128 | pickle.dump(recs, f) 129 | else: 130 | # load 131 | with open(cachefile, 'rb') as f: 132 | recs = pickle.load(f) 133 | 134 | # extract gt objects for this class 135 | class_recs = {} 136 | npos = 0 137 | for imagename in imagenames: 138 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 139 | bbox = np.array([x['bbox'] for x in R]) 140 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 141 | det = [False] * len(R) 142 | npos = npos + sum(~difficult) 143 | class_recs[imagename] = {'bbox': bbox, 144 | 'difficult': difficult, 145 | 'det': det} 146 | 147 | # read dets####################### 148 | detfile = detpath.format(classname) 149 | with open(detfile, 'r') as f: 150 | lines = f.readlines() 151 | 152 | splitlines = [x.strip().split(' ') for x in lines] 153 | image_ids = [x[0] for x in splitlines] 154 | confidence = np.array([float(x[1]) for x in splitlines]) 155 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 156 | 157 | # sort by confidence 158 | sorted_ind = np.argsort(-confidence) 159 | sorted_scores = np.sort(-confidence) 160 | BB = BB[sorted_ind, :] 161 | image_ids = [image_ids[x] for x in sorted_ind] 162 | 163 | # go down dets and mark TPs and FPs 164 | nd = len(image_ids) 165 | tp = np.zeros(nd) 166 | fp = np.zeros(nd) 167 | for d in range(nd): 168 | R = class_recs[image_ids[d]] 169 | bb = BB[d, :].astype(float) 170 | ovmax = -np.inf 171 | BBGT = R['bbox'].astype(float) 172 | 173 | if BBGT.size > 0: 174 | # compute overlaps 175 | # intersection 176 | BBGT_xmin = np.min(BBGT[:, 0::2], axis=1) 177 | BBGT_ymin = np.min(BBGT[:, 1::2], axis=1) 178 | BBGT_xmax = np.max(BBGT[:, 0::2], axis=1) 179 | BBGT_ymax = np.max(BBGT[:, 1::2], axis=1) 180 | bb_xmin = np.min(bb[0::2]) 181 | bb_ymin = np.min(bb[1::2]) 182 | bb_xmax = np.max(bb[0::2]) 183 | bb_ymax = np.max(bb[1::2]) 184 | 185 | ixmin = np.maximum(BBGT_xmin, bb_xmin) 186 | iymin = np.maximum(BBGT_ymin, bb_ymin) 187 | ixmax = np.minimum(BBGT_xmax, bb_xmax) 188 | iymax = np.minimum(BBGT_ymax, bb_ymax) 189 | iw = np.maximum(ixmax - ixmin + 1., 0.) 190 | ih = np.maximum(iymax - iymin + 1., 0.) 191 | inters = iw * ih 192 | 193 | # union 194 | uni = ((bb_xmax - bb_xmin + 1.) * (bb_ymax - bb_ymin + 1.) + 195 | (BBGT_xmax - BBGT_xmin + 1.) * 196 | (BBGT_ymax - BBGT_ymin + 1.) - inters) 197 | 198 | overlaps = inters / uni 199 | 200 | ############################### 201 | 202 | BBGT_keep_mask = overlaps > 0 203 | BBGT_keep = BBGT[BBGT_keep_mask, :] 204 | BBGT_keep_index = np.where(overlaps > 0)[0] 205 | 206 | # pdb.set_trace() 207 | def calcoverlaps(BBGT_keep, bb): 208 | overlaps = [] 209 | for index, GT in enumerate(BBGT_keep): 210 | overlap = polygen_iou_xy4_numpy_eval(BBGT_keep[index], bb) 211 | # overlap = polyiou.iou_poly(polyiou.VectorDouble(BBGT_keep[index]), polyiou.VectorDouble(bb)) 212 | overlaps.append(overlap) 213 | return overlaps 214 | ############################# 215 | 216 | if len(BBGT_keep) > 0: 217 | overlaps = calcoverlaps(BBGT_keep, bb) 218 | 219 | ovmax = np.max(overlaps) 220 | jmax = np.argmax(overlaps) 221 | # pdb.set_trace() 222 | jmax = BBGT_keep_index[jmax] 223 | 224 | if ovmax > ovthresh: 225 | # print(ovmax) 226 | if not R['difficult'][jmax]: 227 | if not R['det'][jmax]: 228 | tp[d] = 1. 229 | R['det'][jmax] = 1 230 | else: 231 | fp[d] = 1. 232 | else: 233 | fp[d] = 1. 234 | 235 | # compute precision recall 236 | # print(tp) 237 | fp = np.cumsum(fp) 238 | tp = np.cumsum(tp) 239 | rec = tp / float(npos) 240 | # avoid divide by zero in case the first detection matches a difficult 241 | # ground truth 242 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 243 | ap = voc_ap(rec, prec, use_07_metric) 244 | 245 | return rec, prec, ap 246 | -------------------------------------------------------------------------------- /lib/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | rm *.so 3 | rm -r build/ 4 | python setup.py build develop 5 | -------------------------------------------------------------------------------- /lib/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import glob 4 | import os 5 | 6 | import torch 7 | from setuptools import find_packages, setup 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | requirements = ["torch", "torchvision"] 11 | 12 | 13 | def get_extensions(): 14 | this_dir = os.path.dirname(os.path.abspath(__file__)) 15 | extensions_dir = os.path.join(this_dir, "src") 16 | 17 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 18 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 19 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 20 | os.environ["CC"] = "g++-10" 21 | sources = main_file + source_cpu 22 | extension = CppExtension 23 | extra_compile_args = {"cxx": []} 24 | define_macros = [] 25 | 26 | 27 | if torch.cuda.is_available() and CUDA_HOME is not None: 28 | extension = CUDAExtension 29 | sources += source_cuda 30 | define_macros += [("WITH_CUDA", None)] 31 | extra_compile_args["nvcc"] = [ 32 | "-DCUDA_HAS_FP16=1", 33 | "-D__CUDA_NO_HALF_OPERATORS__", 34 | "-D__CUDA_NO_HALF_CONVERSIONS__", 35 | "-D__CUDA_NO_HALF2_OPERATORS__", 36 | ] 37 | else: 38 | # raise NotImplementedError('Cuda is not available') 39 | pass 40 | 41 | sources = [os.path.join(extensions_dir, s) for s in sources] 42 | include_dirs = [extensions_dir] 43 | ext_modules = [ 44 | extension( 45 | "_ext", 46 | sources, 47 | include_dirs=include_dirs, 48 | define_macros=define_macros, 49 | extra_compile_args=extra_compile_args, 50 | ) 51 | ] 52 | return ext_modules 53 | 54 | 55 | setup( 56 | name="DCNv2", 57 | version="0.1", 58 | author="charlesshang", 59 | url="https://github.com/charlesshang/DCNv2", 60 | description="deformable convolutional networks", 61 | packages=find_packages(exclude=("configs", "tests")), 62 | # install_requires=requirements, 63 | ext_modules=get_extensions(), 64 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 65 | ) 66 | -------------------------------------------------------------------------------- /lib/DCNv2/src/cpu/dcn_v2_im2col_cpu.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro 64 | 65 | #ifndef DCN_V2_IM2COL_CPU 66 | #define DCN_V2_IM2COL_CPU 67 | 68 | #ifdef __cplusplus 69 | extern "C" 70 | { 71 | #endif 72 | 73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 88 | const int batch_size, const int channels, const int height_im, const int width_im, 89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 91 | const int dilation_h, const int dilation_w, 92 | const int deformable_group, 93 | float *grad_offset, float *grad_mask); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | #endif -------------------------------------------------------------------------------- /lib/DCNv2/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/DCNv2/src/cuda/dcn_v2_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dcn_v2_im2col_cuda.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | // THCState *state = at::globalContext().lazyInitCUDA(); 9 | 10 | // author: Charles Shang 11 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 12 | 13 | // [batch gemm] 14 | // https://github.com/pytorch/pytorch/blob/master/aten/src/THC/generic/THCTensorMathBlas.cu 15 | 16 | 17 | at::Tensor 18 | dcn_v2_cuda_forward(const at::Tensor &input, 19 | const at::Tensor &weight, 20 | const at::Tensor &bias, 21 | const at::Tensor &offset, 22 | const at::Tensor &mask, 23 | const int kernel_h, 24 | const int kernel_w, 25 | const int stride_h, 26 | const int stride_w, 27 | const int pad_h, 28 | const int pad_w, 29 | const int dilation_h, 30 | const int dilation_w, 31 | const int deformable_group) 32 | { 33 | using scalar_t = float; 34 | // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask)); 35 | AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor"); 36 | AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor"); 37 | AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor"); 38 | AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor"); 39 | AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor"); 40 | 41 | const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); 42 | 43 | const int batch = input.size(0); 44 | const int channels = input.size(1); 45 | const int height = input.size(2); 46 | const int width = input.size(3); 47 | 48 | const int channels_out = weight.size(0); 49 | const int channels_kernel = weight.size(1); 50 | const int kernel_h_ = weight.size(2); 51 | const int kernel_w_ = weight.size(3); 52 | 53 | // printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h); 54 | // printf("Channels: %d %d\n", channels, channels_kernel); 55 | // printf("Channels: %d %d\n", channels_out, channels_kernel); 56 | 57 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w, 58 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); 59 | 60 | AT_ASSERTM(channels == channels_kernel, 61 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel); 62 | 63 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; 64 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; 65 | 66 | auto ones = at::ones({batch, bias.sizes()[0], height_out, width_out}, input.options()); 67 | auto columns = at::empty({batch, channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); 68 | auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options()); 69 | 70 | // Add biases to output tensor 71 | // torch implementation 72 | auto ones_T = at::transpose(ones.contiguous(), 3, 1); 73 | ones_T = at::mul(ones_T, bias.contiguous()); 74 | ones_T = at::transpose(ones_T, 3, 1); 75 | output = at::add(output, ones_T); 76 | 77 | modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(), 78 | input.data_ptr(), 79 | offset.data_ptr(), 80 | mask.data_ptr(), 81 | batch, channels, height, width, 82 | height_out, width_out, kernel_h, kernel_w, 83 | pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, 84 | deformable_group, 85 | columns.data_ptr()); 86 | 87 | // Scale columns and add to output 88 | // torch implementation 89 | auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w}); 90 | auto product = at::matmul(weight_flat, columns); 91 | output = at::add(output, product.view({batch, channels_out, height_out, width_out})); 92 | 93 | return output; 94 | } 95 | 96 | std::vector dcn_v2_cuda_backward(const at::Tensor &input, 97 | const at::Tensor &weight, 98 | const at::Tensor &bias, 99 | const at::Tensor &offset, 100 | const at::Tensor &mask, 101 | const at::Tensor &grad_output, 102 | int kernel_h, int kernel_w, 103 | int stride_h, int stride_w, 104 | int pad_h, int pad_w, 105 | int dilation_h, int dilation_w, 106 | int deformable_group) 107 | { 108 | 109 | TORCH_CHECK_ARG(input.is_contiguous(), 1, "input tensor has to be contiguous"); 110 | TORCH_CHECK_ARG(weight.is_contiguous(), 2, "weight tensor has to be contiguous"); 111 | 112 | AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor"); 113 | AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor"); 114 | AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor"); 115 | AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor"); 116 | AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor"); 117 | 118 | const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); 119 | const int batch = input.size(0); 120 | const int channels = input.size(1); 121 | const int height = input.size(2); 122 | const int width = input.size(3); 123 | 124 | const int channels_out = weight.size(0); 125 | const int channels_kernel = weight.size(1); 126 | const int kernel_h_ = weight.size(2); 127 | const int kernel_w_ = weight.size(3); 128 | 129 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w, 130 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); 131 | 132 | AT_ASSERTM(channels == channels_kernel, 133 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel); 134 | 135 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; 136 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; 137 | 138 | auto ones = at::ones({height_out, width_out}, input.options()); 139 | auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); 140 | auto output = at::empty({batch, channels_out, height_out, width_out}, input.options()); 141 | 142 | auto grad_input = at::zeros_like(input); 143 | auto grad_weight = at::zeros_like(weight); 144 | auto grad_bias = at::zeros_like(bias); 145 | auto grad_offset = at::zeros_like(offset); 146 | auto grad_mask = at::zeros_like(mask); 147 | 148 | using scalar_t = float; 149 | 150 | for (int b = 0; b < batch; b++) 151 | { 152 | auto input_n = input.select(0, b); 153 | auto offset_n = offset.select(0, b); 154 | auto mask_n = mask.select(0, b); 155 | auto grad_output_n = grad_output.select(0, b); 156 | auto grad_input_n = grad_input.select(0, b); 157 | auto grad_offset_n = grad_offset.select(0, b); 158 | auto grad_mask_n = grad_mask.select(0, b); 159 | 160 | // Torch implementation 161 | auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w}); 162 | weight_flat = at::transpose(weight_flat, 1, 0); 163 | 164 | auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out}); 165 | columns = at::matmul(weight_flat, grad_output_n_flat); 166 | 167 | // gradient w.r.t. input coordinate data 168 | modulated_deformable_col2im_coord_cuda(c10::cuda::getCurrentCUDAStream(), 169 | columns.data_ptr(), 170 | input_n.data_ptr(), 171 | offset_n.data_ptr(), 172 | mask_n.data_ptr(), 173 | 1, channels, height, width, 174 | height_out, width_out, kernel_h, kernel_w, 175 | pad_h, pad_w, stride_h, stride_w, 176 | dilation_h, dilation_w, deformable_group, 177 | grad_offset_n.data_ptr(), 178 | grad_mask_n.data_ptr()); 179 | // gradient w.r.t. input data 180 | modulated_deformable_col2im_cuda(c10::cuda::getCurrentCUDAStream(), 181 | columns.data_ptr(), 182 | offset_n.data_ptr(), 183 | mask_n.data_ptr(), 184 | 1, channels, height, width, 185 | height_out, width_out, kernel_h, kernel_w, 186 | pad_h, pad_w, stride_h, stride_w, 187 | dilation_h, dilation_w, deformable_group, 188 | grad_input_n.data_ptr()); 189 | 190 | // gradient w.r.t. weight, dWeight should accumulate across the batch and group 191 | modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(), 192 | input_n.data_ptr(), 193 | offset_n.data_ptr(), 194 | mask_n.data_ptr(), 195 | 1, channels, height, width, 196 | height_out, width_out, kernel_h, kernel_w, 197 | pad_h, pad_w, stride_h, stride_w, 198 | dilation_h, dilation_w, deformable_group, 199 | columns.data_ptr()); 200 | 201 | 202 | // Torch implementation 203 | auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0)); 204 | grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w})); 205 | 206 | // Torch implementation 207 | auto ones_flat = ones.view({height_out*width_out}); 208 | product = at::matmul(grad_output_n_flat, ones_flat); 209 | grad_bias = at::add(grad_bias, product); 210 | 211 | } 212 | 213 | return { 214 | grad_input, grad_offset, grad_mask, grad_weight, grad_bias 215 | }; 216 | } 217 | -------------------------------------------------------------------------------- /lib/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /lib/DCNv2/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.type().is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | else{ 39 | return dcn_v2_cpu_forward(input, weight, bias, offset, mask, 40 | kernel_h, kernel_w, 41 | stride_h, stride_w, 42 | pad_h, pad_w, 43 | dilation_h, dilation_w, 44 | deformable_group); 45 | } 46 | } 47 | 48 | std::vector 49 | dcn_v2_backward(const at::Tensor &input, 50 | const at::Tensor &weight, 51 | const at::Tensor &bias, 52 | const at::Tensor &offset, 53 | const at::Tensor &mask, 54 | const at::Tensor &grad_output, 55 | int kernel_h, int kernel_w, 56 | int stride_h, int stride_w, 57 | int pad_h, int pad_w, 58 | int dilation_h, int dilation_w, 59 | int deformable_group) 60 | { 61 | if (input.type().is_cuda()) 62 | { 63 | #ifdef WITH_CUDA 64 | return dcn_v2_cuda_backward(input, 65 | weight, 66 | bias, 67 | offset, 68 | mask, 69 | grad_output, 70 | kernel_h, kernel_w, 71 | stride_h, stride_w, 72 | pad_h, pad_w, 73 | dilation_h, dilation_w, 74 | deformable_group); 75 | #else 76 | AT_ERROR("Not compiled with GPU support"); 77 | #endif 78 | } 79 | else{ 80 | return dcn_v2_cpu_backward(input, 81 | weight, 82 | bias, 83 | offset, 84 | mask, 85 | grad_output, 86 | kernel_h, kernel_w, 87 | stride_h, stride_w, 88 | pad_h, pad_w, 89 | dilation_h, dilation_w, 90 | deformable_group); 91 | } 92 | } 93 | 94 | std::tuple 95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 96 | const at::Tensor &bbox, 97 | const at::Tensor &trans, 98 | const int no_trans, 99 | const float spatial_scale, 100 | const int output_dim, 101 | const int group_size, 102 | const int pooled_size, 103 | const int part_size, 104 | const int sample_per_part, 105 | const float trans_std) 106 | { 107 | if (input.type().is_cuda()) 108 | { 109 | #ifdef WITH_CUDA 110 | return dcn_v2_psroi_pooling_cuda_forward(input, 111 | bbox, 112 | trans, 113 | no_trans, 114 | spatial_scale, 115 | output_dim, 116 | group_size, 117 | pooled_size, 118 | part_size, 119 | sample_per_part, 120 | trans_std); 121 | #else 122 | AT_ERROR("Not compiled with GPU support"); 123 | #endif 124 | } 125 | else{ 126 | return dcn_v2_psroi_pooling_cpu_forward(input, 127 | bbox, 128 | trans, 129 | no_trans, 130 | spatial_scale, 131 | output_dim, 132 | group_size, 133 | pooled_size, 134 | part_size, 135 | sample_per_part, 136 | trans_std); 137 | } 138 | } 139 | 140 | std::tuple 141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 142 | const at::Tensor &input, 143 | const at::Tensor &bbox, 144 | const at::Tensor &trans, 145 | const at::Tensor &top_count, 146 | const int no_trans, 147 | const float spatial_scale, 148 | const int output_dim, 149 | const int group_size, 150 | const int pooled_size, 151 | const int part_size, 152 | const int sample_per_part, 153 | const float trans_std) 154 | { 155 | if (input.type().is_cuda()) 156 | { 157 | #ifdef WITH_CUDA 158 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 159 | input, 160 | bbox, 161 | trans, 162 | top_count, 163 | no_trans, 164 | spatial_scale, 165 | output_dim, 166 | group_size, 167 | pooled_size, 168 | part_size, 169 | sample_per_part, 170 | trans_std); 171 | #else 172 | AT_ERROR("Not compiled with GPU support"); 173 | #endif 174 | } 175 | else{ 176 | return dcn_v2_psroi_pooling_cpu_backward(out_grad, 177 | input, 178 | bbox, 179 | trans, 180 | top_count, 181 | no_trans, 182 | spatial_scale, 183 | output_dim, 184 | group_size, 185 | pooled_size, 186 | part_size, 187 | sample_per_part, 188 | trans_std); 189 | } 190 | } -------------------------------------------------------------------------------- /lib/DCNv2/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /lib/DCNv2/testcpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling 13 | 14 | deformable_groups = 1 15 | N, inC, inH, inW = 2, 2, 4, 4 16 | outC = 2 17 | kH, kW = 3, 3 18 | 19 | 20 | def conv_identify(weight, bias): 21 | weight.data.zero_() 22 | bias.data.zero_() 23 | o, i, h, w = weight.shape 24 | y = h//2 25 | x = w//2 26 | for p in range(i): 27 | for q in range(o): 28 | if p == q: 29 | weight.data[q, p, y, x] = 1.0 30 | 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True) 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True) 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups) 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW) 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | print(input) 67 | print(output) 68 | 69 | def check_gradient_dconv(): 70 | 71 | input = torch.rand(N, inC, inH, inW) * 0.01 72 | input.requires_grad = True 73 | 74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2 75 | # offset.data.zero_() 76 | # offset.data -= 0.5 77 | offset.requires_grad = True 78 | 79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW) 80 | # mask.data.zero_() 81 | mask.requires_grad = True 82 | mask = torch.sigmoid(mask) 83 | 84 | weight = torch.randn(outC, inC, kH, kW) 85 | weight.requires_grad = True 86 | 87 | bias = torch.rand(outC) 88 | bias.requires_grad = True 89 | 90 | stride = 1 91 | padding = 1 92 | dilation = 1 93 | 94 | print('check_gradient_dconv: ', 95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias, 96 | stride, padding, dilation, deformable_groups), 97 | eps=1e-3, atol=1e-4, rtol=1e-2)) 98 | 99 | 100 | def check_pooling_zero_offset(): 101 | 102 | input = torch.randn(2, 16, 64, 64).zero_() 103 | input[0, :, 16:26, 16:26] = 1. 104 | input[1, :, 10:20, 20:30] = 2. 105 | rois = torch.tensor([ 106 | [0, 65, 65, 103, 103], 107 | [1, 81, 41, 119, 79], 108 | ]).float() 109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 110 | pooled_size=7, 111 | output_dim=16, 112 | no_trans=True, 113 | group_size=1, 114 | trans_std=0.0) 115 | 116 | out = pooling(input, rois, input.new()) 117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() 118 | for i in range(rois.shape[0])]) 119 | print(s) 120 | 121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 122 | pooled_size=7, 123 | output_dim=16, 124 | no_trans=False, 125 | group_size=1, 126 | trans_std=0.0) 127 | offset = torch.randn(20, 2, 7, 7).zero_() 128 | dout = dpooling(input, rois, offset) 129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() 130 | for i in range(rois.shape[0])]) 131 | print(s) 132 | 133 | 134 | def check_gradient_dpooling(): 135 | input = torch.randn(2, 3, 5, 5) * 0.01 136 | N = 4 137 | batch_inds = torch.randint(2, (N, 1)).float() 138 | x = torch.rand((N, 1)).float() * 15 139 | y = torch.rand((N, 1)).float() * 15 140 | w = torch.rand((N, 1)).float() * 10 141 | h = torch.rand((N, 1)).float() * 10 142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 143 | offset = torch.randn(N, 2, 3, 3) 144 | input.requires_grad = True 145 | offset.requires_grad = True 146 | 147 | spatial_scale = 1.0 / 4 148 | pooled_size = 3 149 | output_dim = 3 150 | no_trans = 0 151 | group_size = 1 152 | trans_std = 0.0 153 | sample_per_part = 4 154 | part_size = pooled_size 155 | 156 | print('check_gradient_dpooling:', 157 | gradcheck(dcn_v2_pooling, (input, rois, offset, 158 | spatial_scale, 159 | pooled_size, 160 | output_dim, 161 | no_trans, 162 | group_size, 163 | part_size, 164 | sample_per_part, 165 | trans_std), 166 | eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | input = torch.randn(2, 64, 128, 128) 171 | # wrap all things (offset and mask) in DCN 172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, 173 | padding=1, deformable_groups=2) 174 | # print(dcn.weight.shape, input.shape) 175 | output = dcn(input) 176 | targert = output.new(*output.size()) 177 | targert.data.uniform_(-0.01, 0.01) 178 | error = (targert - output).mean() 179 | error.backward() 180 | print(output.shape) 181 | 182 | 183 | def example_dpooling(): 184 | input = torch.randn(2, 32, 64, 64) 185 | batch_inds = torch.randint(2, (20, 1)).float() 186 | x = torch.randint(256, (20, 1)).float() 187 | y = torch.randint(256, (20, 1)).float() 188 | w = torch.randint(64, (20, 1)).float() 189 | h = torch.randint(64, (20, 1)).float() 190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 191 | offset = torch.randn(20, 2, 7, 7) 192 | input.requires_grad = True 193 | offset.requires_grad = True 194 | 195 | # normal roi_align 196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 197 | pooled_size=7, 198 | output_dim=32, 199 | no_trans=True, 200 | group_size=1, 201 | trans_std=0.1) 202 | 203 | # deformable pooling 204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 205 | pooled_size=7, 206 | output_dim=32, 207 | no_trans=False, 208 | group_size=1, 209 | trans_std=0.1) 210 | 211 | out = pooling(input, rois, offset) 212 | dout = dpooling(input, rois, offset) 213 | print(out.shape) 214 | print(dout.shape) 215 | 216 | target_out = out.new(*out.size()) 217 | target_out.data.uniform_(-0.01, 0.01) 218 | target_dout = dout.new(*dout.size()) 219 | target_dout.data.uniform_(-0.01, 0.01) 220 | e = (target_out - out).mean() 221 | e.backward() 222 | e = (target_dout - dout).mean() 223 | e.backward() 224 | 225 | 226 | def example_mdpooling(): 227 | input = torch.randn(2, 32, 64, 64) 228 | input.requires_grad = True 229 | batch_inds = torch.randint(2, (20, 1)).float() 230 | x = torch.randint(256, (20, 1)).float() 231 | y = torch.randint(256, (20, 1)).float() 232 | w = torch.randint(64, (20, 1)).float() 233 | h = torch.randint(64, (20, 1)).float() 234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 235 | 236 | # mdformable pooling (V2) 237 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 238 | pooled_size=7, 239 | output_dim=32, 240 | no_trans=False, 241 | group_size=1, 242 | trans_std=0.1, 243 | deform_fc_dim=1024) 244 | 245 | dout = dpooling(input, rois) 246 | target = dout.new(*dout.size()) 247 | target.data.uniform_(-0.1, 0.1) 248 | error = (target - dout).mean() 249 | error.backward() 250 | print(dout.shape) 251 | 252 | 253 | if __name__ == '__main__': 254 | 255 | example_dconv() 256 | example_dpooling() 257 | example_mdpooling() 258 | 259 | check_pooling_zero_offset() 260 | # zero offset check 261 | if inC == outC: 262 | check_zero_offset() 263 | 264 | check_gradient_dpooling() 265 | check_gradient_dconv() 266 | # """ 267 | # ****** Note: backward is not reentrant error may not be a serious problem, 268 | # ****** since the max error is less than 1e-7, 269 | # ****** Still looking for what trigger this problem 270 | # """ 271 | -------------------------------------------------------------------------------- /lib/DCNv2/testcuda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling 13 | 14 | deformable_groups = 1 15 | N, inC, inH, inW = 2, 2, 4, 4 16 | outC = 2 17 | kH, kW = 3, 3 18 | 19 | 20 | def conv_identify(weight, bias): 21 | weight.data.zero_() 22 | bias.data.zero_() 23 | o, i, h, w = weight.shape 24 | y = h//2 25 | x = w//2 26 | for p in range(i): 27 | for q in range(o): 28 | if p == q: 29 | weight.data[q, p, y, x] = 1.0 30 | 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True).cuda() 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True).cuda() 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | print(input) 67 | print(output) 68 | 69 | def check_gradient_dconv(): 70 | 71 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01 72 | input.requires_grad = True 73 | 74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2 75 | # offset.data.zero_() 76 | # offset.data -= 0.5 77 | offset.requires_grad = True 78 | 79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 80 | # mask.data.zero_() 81 | mask.requires_grad = True 82 | mask = torch.sigmoid(mask) 83 | 84 | weight = torch.randn(outC, inC, kH, kW).cuda() 85 | weight.requires_grad = True 86 | 87 | bias = torch.rand(outC).cuda() 88 | bias.requires_grad = True 89 | 90 | stride = 1 91 | padding = 1 92 | dilation = 1 93 | 94 | print('check_gradient_dconv: ', 95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias, 96 | stride, padding, dilation, deformable_groups), 97 | eps=1e-3, atol=1e-4, rtol=1e-2)) 98 | 99 | 100 | def check_pooling_zero_offset(): 101 | 102 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 103 | input[0, :, 16:26, 16:26] = 1. 104 | input[1, :, 10:20, 20:30] = 2. 105 | rois = torch.tensor([ 106 | [0, 65, 65, 103, 103], 107 | [1, 81, 41, 119, 79], 108 | ]).cuda().float() 109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 110 | pooled_size=7, 111 | output_dim=16, 112 | no_trans=True, 113 | group_size=1, 114 | trans_std=0.0).cuda() 115 | 116 | out = pooling(input, rois, input.new()) 117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() 118 | for i in range(rois.shape[0])]) 119 | print(s) 120 | 121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 122 | pooled_size=7, 123 | output_dim=16, 124 | no_trans=False, 125 | group_size=1, 126 | trans_std=0.0).cuda() 127 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 128 | dout = dpooling(input, rois, offset) 129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() 130 | for i in range(rois.shape[0])]) 131 | print(s) 132 | 133 | 134 | def check_gradient_dpooling(): 135 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01 136 | N = 4 137 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 138 | x = torch.rand((N, 1)).cuda().float() * 15 139 | y = torch.rand((N, 1)).cuda().float() * 15 140 | w = torch.rand((N, 1)).cuda().float() * 10 141 | h = torch.rand((N, 1)).cuda().float() * 10 142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 143 | offset = torch.randn(N, 2, 3, 3).cuda() 144 | input.requires_grad = True 145 | offset.requires_grad = True 146 | 147 | spatial_scale = 1.0 / 4 148 | pooled_size = 3 149 | output_dim = 3 150 | no_trans = 0 151 | group_size = 1 152 | trans_std = 0.0 153 | sample_per_part = 4 154 | part_size = pooled_size 155 | 156 | print('check_gradient_dpooling:', 157 | gradcheck(dcn_v2_pooling, (input, rois, offset, 158 | spatial_scale, 159 | pooled_size, 160 | output_dim, 161 | no_trans, 162 | group_size, 163 | part_size, 164 | sample_per_part, 165 | trans_std), 166 | eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | input = torch.randn(2, 64, 128, 128).cuda() 171 | # wrap all things (offset and mask) in DCN 172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, 173 | padding=1, deformable_groups=2).cuda() 174 | # print(dcn.weight.shape, input.shape) 175 | output = dcn(input) 176 | targert = output.new(*output.size()) 177 | targert.data.uniform_(-0.01, 0.01) 178 | error = (targert - output).mean() 179 | error.backward() 180 | print(output.shape) 181 | 182 | 183 | def example_dpooling(): 184 | input = torch.randn(2, 32, 64, 64).cuda() 185 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 186 | x = torch.randint(256, (20, 1)).cuda().float() 187 | y = torch.randint(256, (20, 1)).cuda().float() 188 | w = torch.randint(64, (20, 1)).cuda().float() 189 | h = torch.randint(64, (20, 1)).cuda().float() 190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 191 | offset = torch.randn(20, 2, 7, 7).cuda() 192 | input.requires_grad = True 193 | offset.requires_grad = True 194 | 195 | # normal roi_align 196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 197 | pooled_size=7, 198 | output_dim=32, 199 | no_trans=True, 200 | group_size=1, 201 | trans_std=0.1).cuda() 202 | 203 | # deformable pooling 204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 205 | pooled_size=7, 206 | output_dim=32, 207 | no_trans=False, 208 | group_size=1, 209 | trans_std=0.1).cuda() 210 | 211 | out = pooling(input, rois, offset) 212 | dout = dpooling(input, rois, offset) 213 | print(out.shape) 214 | print(dout.shape) 215 | 216 | target_out = out.new(*out.size()) 217 | target_out.data.uniform_(-0.01, 0.01) 218 | target_dout = dout.new(*dout.size()) 219 | target_dout.data.uniform_(-0.01, 0.01) 220 | e = (target_out - out).mean() 221 | e.backward() 222 | e = (target_dout - dout).mean() 223 | e.backward() 224 | 225 | 226 | def example_mdpooling(): 227 | input = torch.randn(2, 32, 64, 64).cuda() 228 | input.requires_grad = True 229 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 230 | x = torch.randint(256, (20, 1)).cuda().float() 231 | y = torch.randint(256, (20, 1)).cuda().float() 232 | w = torch.randint(64, (20, 1)).cuda().float() 233 | h = torch.randint(64, (20, 1)).cuda().float() 234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 235 | 236 | # mdformable pooling (V2) 237 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 238 | pooled_size=7, 239 | output_dim=32, 240 | no_trans=False, 241 | group_size=1, 242 | trans_std=0.1, 243 | deform_fc_dim=1024).cuda() 244 | 245 | dout = dpooling(input, rois) 246 | target = dout.new(*dout.size()) 247 | target.data.uniform_(-0.1, 0.1) 248 | error = (target - dout).mean() 249 | error.backward() 250 | print(dout.shape) 251 | 252 | 253 | if __name__ == '__main__': 254 | 255 | example_dconv() 256 | # example_dpooling() 257 | # example_mdpooling() 258 | 259 | # check_pooling_zero_offset() 260 | # zero offset check 261 | # if inC == outC: 262 | # check_zero_offset() 263 | 264 | # check_gradient_dpooling() 265 | #check_gradient_dconv() 266 | # """ 267 | # ****** Note: backward is not reentrant error may not be a serious problem, 268 | # ****** since the max error is less than 1e-7, 269 | # ****** Still looking for what trigger this problem 270 | # """ 271 | -------------------------------------------------------------------------------- /model/TSConv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("..") 4 | import torch.nn as nn 5 | from collections import OrderedDict 6 | from model.backbones.resnet import Resnet101 7 | from model.backbones.darknet53 import Darknet53 8 | from model.neck.neck import Neck 9 | from model.head.head import Head1, Head2 10 | from model.layers.convolutions import Convolutional 11 | from utils.utils_basic import * 12 | 13 | class GGHL(nn.Module): 14 | def __init__(self, init_weights=True, inputsize=int(cfg.TRAIN["TRAIN_IMG_SIZE"]), weight_path=None): 15 | super(GGHL, self).__init__() 16 | self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"]) 17 | self.__nC = cfg.DATA["NUM"] 18 | self.__out_channel = self.__nC + 4 + 5 + 1 19 | self.__backnone = Darknet53()#Resnet101()# 20 | # self.__backnone = PVT2(weight_path=weight_path) 21 | # self.__fpn = Neck(fileters_in=[512, 320, 128, 64], fileters_out=self.__out_channel) 22 | self.__fpn = Neck(fileters_in=[1024, 512, 256, 128], fileters_in_ratio=1) 23 | self.__head1_s = Head1(filters_in=128, stride=self.__strides[0]) 24 | self.__head1_m = Head1(filters_in=256, stride=self.__strides[1]) 25 | self.__head1_l = Head1(filters_in=512, stride=self.__strides[2]) 26 | 27 | self.__head2_s = Head2(filters_in=128, nC=self.__nC, stride=self.__strides[0]) 28 | self.__head2_m = Head2(filters_in=256, nC=self.__nC, stride=self.__strides[1]) 29 | self.__head2_l = Head2(filters_in=512, nC=self.__nC, stride=self.__strides[2]) 30 | 31 | if init_weights: 32 | self.__init_weights() 33 | 34 | def forward(self, x): 35 | out = [] 36 | x_8, x_16, x_32 = self.__backnone(x) 37 | loc2, cls2, loc1, cls1, loc0, cls0 = self.__fpn(x_32, x_16, x_8) 38 | x_s, x_s_de, offsets_loc_s, offsets_cls_s, mask_loc_s, mask_cls_s, w_c8_s, coor_dc_s = self.__head1_s(loc2) 39 | x_m, x_m_de, offsets_loc_m, offsets_cls_m, mask_loc_m, mask_cls_m, w_c8_m, coor_dc_m = self.__head1_m(loc1) 40 | x_l, x_l_de, offsets_loc_l, offsets_cls_l, mask_loc_l, mask_cls_l, w_c8_l, coor_dc_l = self.__head1_l(loc0) 41 | 42 | out_s, out_s_de = self.__head2_s(x_s_de, loc2, cls2, offsets_loc_s, offsets_cls_s, mask_loc_s, mask_cls_s, w_c8_s) 43 | out_m, out_m_de = self.__head2_m(x_m_de, loc1, cls1, offsets_loc_m, offsets_cls_m, mask_loc_m, mask_cls_m, w_c8_m) 44 | out_l, out_l_de = self.__head2_l(x_l_de, loc0, cls0, offsets_loc_l, offsets_cls_l, mask_loc_l, mask_cls_l, w_c8_l) 45 | 46 | out.append((x_s, x_s_de, out_s, out_s_de, coor_dc_s)) 47 | out.append((x_m, x_m_de, out_m, out_m_de, coor_dc_m)) 48 | out.append((x_l, x_l_de, out_l, out_l_de, coor_dc_l)) 49 | 50 | if self.training: 51 | p1, p1_d, p2, p2_d, _ = list(zip(*out)) 52 | return p1, p1_d, p2, p2_d 53 | else: 54 | p1, p1_d, p2, p2_d, offsets_d = list(zip(*out)) 55 | return p1, p1_d, p2, torch.cat(p2_d, 0), torch.cat(offsets_d, 0) 56 | 57 | def __init_weights(self): 58 | " Note :nn.Conv2d nn.BatchNorm2d'initing modes are uniform " 59 | for m in self.modules(): 60 | if isinstance(m, nn.Conv2d): 61 | torch.nn.init.normal_(m.weight.data, 0.0, 0.01) 62 | if m.bias is not None: 63 | m.bias.data.zero_() 64 | # print("initing {}".format(m)) 65 | elif isinstance(m, nn.BatchNorm2d): 66 | torch.nn.init.constant_(m.weight.data, 1.0) 67 | torch.nn.init.constant_(m.bias.data, 0.0) 68 | # print("initing {}".format(m)) 69 | elif isinstance(m, nn.Linear): 70 | m.weight.data.normal_(0, 0.01) 71 | if m.bias is not None: 72 | m.bias.data.zero_() 73 | # print("initing {}".format(m)) 74 | 75 | def load_resnet101_weights(self, weight_file='/home/hzc/v2/weight/resnet101-cd907fc2.pth'): 76 | model_list = self.__backnone.state_dict().keys() 77 | # print(model_list) 78 | weight = torch.load(weight_file) 79 | # print(weight.keys()) 80 | new_weight = OrderedDict() 81 | # # zip 默认遍历最少的list 82 | for model_key, weight_key, weight_value in zip(model_list, weight.keys(), weight.values()): 83 | if model_key[9:] == weight_key: 84 | new_weight[model_key] = weight_value 85 | self.__backnone.load_state_dict(new_weight) 86 | 87 | def load_darknet_weights(self, weight_file, cutoff=52): 88 | "https://github.com/ultralytics/yolov3/blob/master/models.py" 89 | print("load darknet weights : ", weight_file) 90 | with open(weight_file, 'rb') as f: 91 | _ = np.fromfile(f, dtype=np.int32, count=5) 92 | weights = np.fromfile(f, dtype=np.float32) 93 | count = 0 94 | ptr = 0 95 | for m in self.modules(): 96 | if isinstance(m, Convolutional): 97 | # only initing backbone conv's weights 98 | if count == cutoff: 99 | break 100 | count += 1 101 | conv_layer = m._Convolutional__conv 102 | if m.norm == "bn": 103 | # Load BN bias, weights, running mean and running variance 104 | bn_layer = m._Convolutional__norm 105 | num_b = bn_layer.bias.numel() # Number of biases 106 | # Bias 107 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) 108 | bn_layer.bias.data.copy_(bn_b) 109 | ptr += num_b 110 | # Weight 111 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) 112 | bn_layer.weight.data.copy_(bn_w) 113 | ptr += num_b 114 | # Running Mean 115 | bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) 116 | bn_layer.running_mean.data.copy_(bn_rm) 117 | ptr += num_b 118 | # Running Var 119 | bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) 120 | bn_layer.running_var.data.copy_(bn_rv) 121 | ptr += num_b 122 | # print("loading weight {}".format(bn_layer)) 123 | elif m.norm == "gn": 124 | # Load GN bias, weights 125 | bn_layer = m._Convolutional__norm 126 | num_b = bn_layer.bias.numel() # Number of biases 127 | # Bias 128 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) 129 | bn_layer.bias.data.copy_(bn_b) 130 | ptr += num_b 131 | # Weight 132 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) 133 | bn_layer.weight.data.copy_(bn_w) 134 | ptr += num_b 135 | else: 136 | # Load conv. bias 137 | num_b = conv_layer.bias.numel() 138 | conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data) 139 | conv_layer.bias.data.copy_(conv_b) 140 | ptr += num_b 141 | # Load conv. weights 142 | num_w = conv_layer.weight.numel() 143 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data) 144 | conv_layer.weight.data.copy_(conv_w) 145 | ptr += num_w 146 | # print("loading weight {}".format(conv_layer)) 147 | print("loading weight number: {}".format(count)) 148 | 149 | 150 | -------------------------------------------------------------------------------- /model/__pycache__/GGHL4.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL4.cpython-38.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHL4.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL4.cpython-39.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHL6.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL6.cpython-39.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHL6single.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL6single.cpython-39.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHL8.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL8.cpython-39.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHLv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHLv2.cpython-38.pyc -------------------------------------------------------------------------------- /model/__pycache__/GGHLv2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHLv2.cpython-39.pyc -------------------------------------------------------------------------------- /model/__pycache__/double3090.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/double3090.cpython-38.pyc -------------------------------------------------------------------------------- /model/__pycache__/double3090.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/double3090.cpython-39.pyc -------------------------------------------------------------------------------- /model/backbones/__pycache__/darknet53.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/darknet53.cpython-38.pyc -------------------------------------------------------------------------------- /model/backbones/__pycache__/darknet53.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/darknet53.cpython-39.pyc -------------------------------------------------------------------------------- /model/backbones/__pycache__/model_resnet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/model_resnet.cpython-39.pyc -------------------------------------------------------------------------------- /model/backbones/__pycache__/resnet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/resnet.cpython-39.pyc -------------------------------------------------------------------------------- /model/backbones/darknet53.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from ..layers.convolutions import Convolutional 6 | from ..layers.conv_blocks import Residual_block 7 | 8 | class Darknet53(nn.Module): 9 | 10 | def __init__(self, pre_weight=None): 11 | super(Darknet53, self).__init__() 12 | self.__conv = Convolutional(filters_in=3, filters_out=32, kernel_size=3, stride=1, pad=1, norm='bn', 13 | activate='leaky') 14 | 15 | self.__conv_5_0 = Convolutional(filters_in=32, filters_out=64, kernel_size=3, stride=2, pad=1, norm='bn',activate='leaky') 16 | self.__rb_5_0 = Residual_block(filters_in=64, filters_out=64, filters_medium=32) 17 | 18 | self.__conv_5_1 = Convolutional(filters_in=64, filters_out=128, kernel_size=3, stride=2, pad=1, norm='bn', 19 | activate='leaky') 20 | self.__rb_5_1_0 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) 21 | self.__rb_5_1_1 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) 22 | 23 | self.__conv_5_2 = Convolutional(filters_in=128, filters_out=256, kernel_size=3, stride=2, pad=1, norm='bn', 24 | activate='leaky') 25 | self.__rb_5_2_0 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 26 | self.__rb_5_2_1 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 27 | self.__rb_5_2_2 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 28 | self.__rb_5_2_3 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 29 | self.__rb_5_2_4 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 30 | self.__rb_5_2_5 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 31 | self.__rb_5_2_6 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 32 | self.__rb_5_2_7 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) 33 | 34 | self.__conv_5_3 = Convolutional(filters_in=256, filters_out=512, kernel_size=3, stride=2, pad=1, norm='bn', 35 | activate='leaky') 36 | self.__rb_5_3_0 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 37 | self.__rb_5_3_1 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 38 | self.__rb_5_3_2 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 39 | self.__rb_5_3_3 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 40 | self.__rb_5_3_4 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 41 | self.__rb_5_3_5 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 42 | self.__rb_5_3_6 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 43 | self.__rb_5_3_7 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) 44 | 45 | 46 | self.__conv_5_4 = Convolutional(filters_in=512, filters_out=1024, kernel_size=3, stride=2, pad=1, norm='bn', 47 | activate='leaky') 48 | self.__rb_5_4_0 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) 49 | self.__rb_5_4_1 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) 50 | self.__rb_5_4_2 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) 51 | self.__rb_5_4_3 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) 52 | 53 | #self.__focus = Focus(c1=32, c2=64, k=3, p=1) 54 | 55 | def forward(self, x): 56 | x = self.__conv(x) 57 | x0_0 = self.__conv_5_0(x) 58 | 59 | #x0_0 = self.__focus(x) 60 | x0_1 = self.__rb_5_0(x0_0) 61 | 62 | x1_0 = self.__conv_5_1(x0_1) 63 | x1_1 = self.__rb_5_1_0(x1_0) 64 | x1_2 = self.__rb_5_1_1(x1_1) 65 | 66 | x2_0 = self.__conv_5_2(x1_2) 67 | x2_1 = self.__rb_5_2_0(x2_0) 68 | x2_2 = self.__rb_5_2_1(x2_1) 69 | x2_3 = self.__rb_5_2_2(x2_2) 70 | x2_4 = self.__rb_5_2_3(x2_3) 71 | x2_5 = self.__rb_5_2_4(x2_4) 72 | x2_6 = self.__rb_5_2_5(x2_5) 73 | x2_7 = self.__rb_5_2_6(x2_6) 74 | x2_8 = self.__rb_5_2_7(x2_7) 75 | 76 | x3_0 = self.__conv_5_3(x2_8) 77 | x3_1 = self.__rb_5_3_0(x3_0) 78 | x3_2 = self.__rb_5_3_1(x3_1) 79 | x3_3 = self.__rb_5_3_2(x3_2) 80 | x3_4 = self.__rb_5_3_3(x3_3) 81 | x3_5 = self.__rb_5_3_4(x3_4) 82 | x3_6 = self.__rb_5_3_5(x3_5) 83 | x3_7 = self.__rb_5_3_6(x3_6) 84 | x3_8 = self.__rb_5_3_7(x3_7) 85 | 86 | x4_0 = self.__conv_5_4(x3_8) 87 | x4_1 = self.__rb_5_4_0(x4_0) 88 | x4_2 = self.__rb_5_4_1(x4_1) 89 | x4_3 = self.__rb_5_4_2(x4_2) 90 | x4_4 = self.__rb_5_4_3(x4_3) 91 | 92 | return x2_8, x3_8, x4_4 93 | -------------------------------------------------------------------------------- /model/backbones/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference : https://github.com/d-li14/mobilenetv2.pytorch/blob/master/models/imagenet/mobilenetv2.py 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | 8 | __all__ = ['mobilenetv2'] 9 | 10 | def _make_divisible(v, divisor, min_value=None): 11 | """ 12 | This function is taken from the original tf repo. 13 | It ensures that all layers have a channel number that is divisible by 8 14 | It can be seen here: 15 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 16 | :param v: 17 | :param divisor: 18 | :param min_value: 19 | :return: 20 | """ 21 | if min_value is None: 22 | min_value = divisor 23 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 24 | # Make sure that round down does not go down by more than 10%. 25 | if new_v < 0.9 * v: 26 | new_v += divisor 27 | return new_v 28 | 29 | def conv_3x3_bn(inp, oup, stride): 30 | return nn.Sequential( 31 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 32 | nn.BatchNorm2d(oup), 33 | nn.ReLU6(inplace=True), 34 | ) 35 | 36 | 37 | def conv_1x1_bn(inp, oup): 38 | return nn.Sequential( 39 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(oup), 41 | nn.ReLU6(inplace=True) 42 | ) 43 | 44 | class InvertedResidual(nn.Module): 45 | def __init__(self, inp, oup, stride, expand_ratio): 46 | super(InvertedResidual, self).__init__() 47 | assert stride in [1, 2] 48 | 49 | hidden_dim = round(inp * expand_ratio) 50 | self.identity = stride == 1 and inp == oup 51 | 52 | if expand_ratio == 1: 53 | self.conv = nn.Sequential( 54 | # dw 55 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 56 | nn.BatchNorm2d(hidden_dim), 57 | nn.ReLU6(inplace=True), 58 | # pw-linear 59 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 60 | nn.BatchNorm2d(oup), 61 | ) 62 | else: 63 | self.conv = nn.Sequential( 64 | # pw 65 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 66 | nn.BatchNorm2d(hidden_dim), 67 | nn.ReLU6(inplace=True), 68 | # dw 69 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 70 | nn.BatchNorm2d(hidden_dim), 71 | nn.ReLU6(inplace=True), 72 | # pw-linear 73 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 74 | nn.BatchNorm2d(oup), 75 | ) 76 | 77 | def forward(self, x): 78 | conv = self.conv(x) 79 | if self.identity: 80 | return x + conv 81 | else: 82 | return conv 83 | 84 | class _MobileNetV2(nn.Module): 85 | def __init__(self, num_classes=1000, width_mult=1.): 86 | super(_MobileNetV2, self).__init__() 87 | # setting of inverted residual blocks 88 | self.cfgs = [ 89 | # t, c, n, s 90 | [1, 16, 1, 1], 91 | [6, 24, 2, 2], 92 | [6, 32, 3, 2], 93 | [6, 64, 4, 2], 94 | [6, 96, 3, 1], 95 | [6, 160, 3, 2], 96 | [6, 320, 1, 1], 97 | ] 98 | # building first layer 99 | input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8) 100 | layers = [conv_3x3_bn(3, input_channel, 2)] 101 | # building inverted residual blocks 102 | block = InvertedResidual 103 | for t, c, n, s in self.cfgs: 104 | output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) 105 | for i in range(n): 106 | layers.append(block(input_channel, output_channel, s if i == 0 else 1, t)) 107 | input_channel = output_channel 108 | self.features = nn.Sequential(*layers) 109 | # building last several layers 110 | output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280 111 | self.conv = conv_1x1_bn(input_channel, output_channel) 112 | self._initialize_weights() 113 | 114 | def forward(self, x): 115 | x = self.features(x) 116 | x = self.conv(x) 117 | return x 118 | 119 | def _initialize_weights(self): 120 | print("**" * 10, "Initing MobilenetV2 weights", "**" * 10) 121 | 122 | for m in self.modules(): 123 | if isinstance(m, nn.Conv2d): 124 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 125 | m.weight.data.normal_(0, math.sqrt(2. / n)) 126 | if m.bias is not None: 127 | m.bias.data.zero_() 128 | print("initing {}".format(m)) 129 | 130 | elif isinstance(m, nn.BatchNorm2d): 131 | m.weight.data.fill_(1) 132 | m.bias.data.zero_() 133 | print("initing {}".format(m)) 134 | 135 | elif isinstance(m, nn.Linear): 136 | m.weight.data.normal_(0, 0.01) 137 | if m.bias is not None: 138 | m.bias.data.zero_() 139 | print("initing {}".format(m)) 140 | 141 | class FeatureExtractor(nn.Module): 142 | def __init__(self, submodule, extracted_layers): 143 | super(FeatureExtractor, self).__init__() 144 | self.submodule = submodule 145 | self.extracted_layers = extracted_layers 146 | 147 | def forward(self, x): 148 | outputs = [] 149 | for name, module in self.submodule._modules.items(): 150 | if name is "features": 151 | for f_name, f_module in module._modules.items(): 152 | x = f_module(x) 153 | if f_name in self.extracted_layers: 154 | outputs.append(x) 155 | if name is "conv": 156 | x = module(x) 157 | if name in self.extracted_layers: 158 | outputs.append(x) 159 | return outputs 160 | 161 | class MobilenetV2(nn.Module): 162 | def __init__(self, extract_list, weight_path=None, width_mult=1.): 163 | super(MobilenetV2, self).__init__() 164 | 165 | self.__submodule = _MobileNetV2(width_mult=width_mult) 166 | if weight_path: 167 | print("*"*40, "\nLoading weight of MobilenetV2 : {}".format(weight_path)) 168 | pretrained_dict = torch.load(weight_path) 169 | model_dict = self.__submodule.state_dict() 170 | pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} 171 | model_dict.update(pretrained_dict) 172 | self.__submodule.load_state_dict(model_dict) 173 | del pretrained_dict 174 | print("Loaded weight of MobilenetV2 : {}".format(weight_path)) 175 | self.__extractor = FeatureExtractor(self.__submodule, extract_list) 176 | 177 | def forward(self, x): 178 | return self.__extractor(x) 179 | -------------------------------------------------------------------------------- /model/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | from torch import nn 5 | from torchvision.models._utils import IntermediateLayerGetter 6 | 7 | from . import model_resnet 8 | 9 | 10 | class FrozenBatchNorm2d(torch.nn.Module): 11 | """ 12 | BatchNorm2d where the batch statistics and the affine parameters are fixed. 13 | Copy-paste from torchvision.misc.ops with added eps before rqsrt, 14 | without which any other models than torchvision.models.resnet[18,34,50,101] 15 | produce nans. 16 | """ 17 | 18 | def __init__(self, n): 19 | super(FrozenBatchNorm2d, self).__init__() 20 | self.register_buffer("weight", torch.ones(n)) 21 | self.register_buffer("bias", torch.zeros(n)) 22 | self.register_buffer("running_mean", torch.zeros(n)) 23 | self.register_buffer("running_var", torch.ones(n)) 24 | 25 | def _load_from_state_dict( 26 | self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs 27 | ): 28 | num_batches_tracked_key = prefix + "num_batches_tracked" 29 | if num_batches_tracked_key in state_dict: 30 | del state_dict[num_batches_tracked_key] 31 | 32 | super(FrozenBatchNorm2d, self)._load_from_state_dict( 33 | state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs 34 | ) 35 | 36 | def forward(self, x): 37 | # move reshapes to the beginning 38 | # to make it fuser-friendly 39 | w = self.weight.reshape(1, -1, 1, 1) 40 | b = self.bias.reshape(1, -1, 1, 1) 41 | rv = self.running_var.reshape(1, -1, 1, 1) 42 | rm = self.running_mean.reshape(1, -1, 1, 1) 43 | eps = 1e-5 44 | scale = w * (rv + eps).rsqrt() 45 | bias = b - rm * scale 46 | return x * scale + bias 47 | 48 | 49 | class BackboneBase(nn.Module): 50 | def __init__(self, backbone: nn.Module, num_channels: int): 51 | """The function takes in a backbone and a number of channels and returns a body with the layers of 52 | the backbone 53 | 54 | Parameters 55 | ---------- 56 | backbone : nn.Module 57 | the backbone network 58 | num_channels : int 59 | the number of channels in the output feature map. 60 | 61 | """ 62 | super().__init__() 63 | for name, parameter in backbone.named_parameters(): 64 | # print(name, parameter.shape) 65 | if "layer2" not in name and "layer3" not in name and "layer4" not in name: 66 | parameter.requires_grad_(False) 67 | 68 | # return_layers = {"layer2": "3", "layer3": "5", "layer4": "2"} 69 | return_layers = {"layer2": "3", "layer3": "22", "layer4": "2"} 70 | 71 | self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) 72 | self.num_channels = num_channels 73 | 74 | def forward(self, x): 75 | xs = self.backbone(x) 76 | 77 | fmp_list = [] 78 | for name, fmp in xs.items(): 79 | fmp_list.append(fmp) 80 | 81 | return fmp_list[0], fmp_list[1], fmp_list[2] 82 | 83 | 84 | class Backbone(BackboneBase): 85 | """ResNet backbone with frozen BatchNorm.""" 86 | 87 | def __init__(self, name: str, pretrained: bool, dilation: bool, norm_type: str): 88 | if norm_type == "BN": 89 | norm_layer = nn.BatchNorm2d 90 | elif norm_type == "FrozeBN": 91 | norm_layer = FrozenBatchNorm2d 92 | # NOTE: get the backbone network 93 | backbone = getattr(model_resnet, name)( 94 | replace_stride_with_dilation=[False, False, dilation], pretrained=pretrained, norm_layer=norm_layer 95 | ) 96 | num_channels = 512 if name in ("resnet18", "resnet34") else 2048 97 | super().__init__(backbone, num_channels) 98 | 99 | 100 | # def build_resnet(model_name="resnet18", pretrained=False, norm_type="BN"): 101 | # if model_name in ["resnet18", "resnet34", "resnet50", "resnet101", "resnext101_32x8d"]: 102 | # backbone = Backbone(model_name, pretrained, dilation=False, norm_type=norm_type) 103 | # elif model_name in ["resnet50-d", "resnet101-d"]: 104 | # backbone = Backbone(model_name[:-2], pretrained, dilation=True, norm_type=norm_type) 105 | 106 | # # return backbone, backbone.num_channels 107 | # return backbone 108 | 109 | 110 | # A function that returns a model. 111 | def Resnet50(pretrained=False, norm_type="BN"): 112 | """`Resnet50` is a function that returns a `Backbone` object with the following parameters: 113 | 114 | - `name`: "resnet50" 115 | - `pretrained`: False 116 | - `dilation`: False 117 | - `norm_type`: "BN" 118 | 119 | The `Backbone` object is a class that is defined in `backbone.py` 120 | 121 | Parameters 122 | ---------- 123 | pretrained, optional 124 | Whether to use a pretrained model. 125 | norm_type, optional 126 | BN, GN, or SN 127 | 128 | Returns 129 | ------- 130 | A backbone object with the following parameters: 131 | - name: resnet50 132 | - pretrained: False 133 | - dilation: False 134 | - norm_type: BN 135 | 136 | """ 137 | 138 | return Backbone("resnet50", pretrained, dilation=False, norm_type=norm_type) 139 | 140 | # A function that returns a model. 141 | def Resnet101(pretrained=False, norm_type="BN"): 142 | """`Resnet50` is a function that returns a `Backbone` object with the following parameters: 143 | 144 | - `name`: "resnet50" 145 | - `pretrained`: False 146 | - `dilation`: False 147 | - `norm_type`: "BN" 148 | 149 | The `Backbone` object is a class that is defined in `backbone.py` 150 | 151 | Parameters 152 | ---------- 153 | pretrained, optional 154 | Whether to use a pretrained model. 155 | norm_type, optional 156 | BN, GN, or SN 157 | 158 | Returns 159 | ------- 160 | A backbone object with the following parameters: 161 | - name: resnet50 162 | - pretrained: False 163 | - dilation: False 164 | - norm_type: BN 165 | 166 | """ 167 | 168 | return Backbone("resnet101", pretrained, dilation=False, norm_type=norm_type) 169 | 170 | if __name__ == "__main__": 171 | model = Resnet101(pretrained=False, norm_type="BN") 172 | model_list = model.state_dict().keys() 173 | # print(model_list) 174 | weight = torch.load( 175 | "D:\\Github\\v2\\weight\\resnet101-cd907fc2.pth" 176 | ) 177 | # print(weight.keys()) 178 | new_weight = OrderedDict() 179 | # # zip 默认遍历最少的list 180 | for model_key, weight_key, weight_value in zip(model_list, weight.keys(), weight.values()): 181 | if model_key[9:] == weight_key: 182 | new_weight[model_key] = weight_value 183 | model.load_state_dict(new_weight) 184 | 185 | 186 | # print('hello world') 187 | # print(type(feat_dim)) 188 | # x = torch.randn(3, 3, 800, 800) 189 | # x_s, x_m, x_l = model(x) 190 | # print(x_s.size()) 191 | # print(x_m.size()) 192 | # print(x_l.size()) 193 | # from rich import print 194 | # model_keys = list(model.state_dict().keys()) 195 | # print(model_keys) 196 | -------------------------------------------------------------------------------- /model/head/__pycache__/head10.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head10.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head10single.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head10single.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head11.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head11.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head3.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head4.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head4.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head4.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head4.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head5.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head5.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head5.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head5.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head6.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head6.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head7.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head7.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head9.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head9.cpython-39.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head_GGHLv2_x3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head_GGHLv2_x3.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/head_ori.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head_ori.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/headv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv2.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/headv21.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv21.cpython-38.pyc -------------------------------------------------------------------------------- /model/head/__pycache__/headv21.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv21.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__init__.py -------------------------------------------------------------------------------- /model/layers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/activations.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/activations.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/activations.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/activations.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/conv_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/conv_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/conv_blocks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/conv_blocks.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/convolutions.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/convolutions.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/convolutions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/convolutions.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/msr_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/msr_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/msr_blocks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/msr_blocks.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/multiscale_fusion_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/multiscale_fusion_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/multiscale_fusion_blocks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/multiscale_fusion_blocks.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/np_attention_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/np_attention_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /model/layers/__pycache__/np_attention_blocks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/np_attention_blocks.cpython-39.pyc -------------------------------------------------------------------------------- /model/layers/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Swish(nn.Module): # 6 | @staticmethod 7 | def forward(x): 8 | return x * torch.sigmoid(x) 9 | 10 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 11 | @staticmethod 12 | def forward(x): 13 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 14 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 15 | 16 | class MemoryEfficientSwish(nn.Module): 17 | class F(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x): 20 | ctx.save_for_backward(x) 21 | return x * torch.sigmoid(x) 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | x = ctx.saved_tensors[0] 25 | sx = torch.sigmoid(x) 26 | return grad_output * (sx * (1 + x * (1 - sx))) 27 | def forward(self, x): 28 | return self.F.apply(x) 29 | 30 | class Mish(nn.Module): 31 | @staticmethod 32 | def forward(x): 33 | return x * F.softplus(x).tanh() 34 | 35 | class MemoryEfficientMish(nn.Module): 36 | class F(torch.autograd.Function): 37 | @staticmethod 38 | def forward(ctx, x): 39 | ctx.save_for_backward(x) 40 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 41 | @staticmethod 42 | def backward(ctx, grad_output): 43 | x = ctx.saved_tensors[0] 44 | sx = torch.sigmoid(x) 45 | fx = F.softplus(x).tanh() 46 | return grad_output * (fx + x * sx * (1 - fx * fx)) 47 | def forward(self, x): 48 | return self.F.apply(x) 49 | 50 | class FReLU(nn.Module): 51 | def __init__(self, c1, k=3): # ch_in, kernel 52 | super().__init__() 53 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) 54 | self.bn = nn.BatchNorm2d(c1) 55 | 56 | def forward(self, x): 57 | return torch.max(x, self.bn(self.conv(x))) -------------------------------------------------------------------------------- /model/layers/attention_blocks.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class hsigmoid(nn.Module): 5 | def forward(self,x): 6 | out=x*nn.ReLU6(x+3,inplace=True)/6 7 | return out 8 | 9 | class SELayer(nn.Module): 10 | """SENet 11 | """ 12 | def __init__(self, channel, reduction=16): 13 | super(SELayer, self).__init__() 14 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 15 | self.fc = nn.Sequential( 16 | nn.Linear(channel, channel // reduction, bias=False), 17 | #nn.Conv2d(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0, bias=False), 18 | #nn.BatchNorm2d(channel // reduction), 19 | nn.ReLU(inplace=True), 20 | nn.Linear(channel // reduction, channel, bias=False), 21 | #nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False), 22 | #nn.BatchNorm2d(channel), 23 | nn.Sigmoid(), 24 | ) 25 | 26 | def forward(self, x): 27 | b, c, _, _ = x.size() 28 | y = self.avg_pool(x).view(b, c) 29 | y = self.fc(y).view(b, c, 1, 1) 30 | return x * y.expand_as(x) 31 | 32 | class NonLocalBlock(nn.Module): 33 | """Non-local Network 34 | """ 35 | def __init__(self, channel): 36 | super(NonLocalBlock, self).__init__() 37 | self.inter_channel = channel // 2 38 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 39 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 40 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 41 | self.softmax = nn.Softmax(dim=1) 42 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 43 | 44 | def forward(self, x): 45 | # [N, C, H , W] 46 | b, c, h, w = x.size() 47 | # [N, C/2, H * W] 48 | x_phi = self.conv_phi(x).view(b, c, -1) 49 | # [N, H * W, C/2] 50 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() 51 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() 52 | # [N, H * W, H * W] 53 | mul_theta_phi = torch.matmul(x_theta, x_phi) 54 | mul_theta_phi = self.softmax(mul_theta_phi) 55 | # [N, H * W, C/2] 56 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 57 | # [N, C/2, H, W] 58 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 59 | # [N, C, H , W] 60 | mask = self.conv_mask(mul_theta_phi_g) 61 | out = mask + x 62 | return out 63 | 64 | class ContextBlock(nn.Module): 65 | """GCNet 66 | """ 67 | def __init__(self,inplanes,ratio,pooling_type='att', 68 | fusion_types=('channel_add', )): 69 | super(ContextBlock, self).__init__() 70 | valid_fusion_types = ['channel_add', 'channel_mul'] 71 | assert pooling_type in ['avg', 'att'] 72 | assert isinstance(fusion_types, (list, tuple)) 73 | assert all([f in valid_fusion_types for f in fusion_types]) 74 | assert len(fusion_types) > 0, 'at least one fusion should be used' 75 | 76 | self.inplanes = inplanes 77 | self.ratio = ratio 78 | self.planes = int(inplanes * ratio) 79 | self.pooling_type = pooling_type 80 | self.fusion_types = fusion_types 81 | 82 | if pooling_type == 'att': 83 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) 84 | self.softmax = nn.Softmax(dim=2) 85 | else: 86 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 87 | if 'channel_add' in fusion_types: 88 | self.channel_add_conv = nn.Sequential( 89 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 90 | nn.LayerNorm([self.planes, 1, 1]), 91 | nn.ReLU(inplace=True), # yapf: disable 92 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 93 | else: 94 | self.channel_add_conv = None 95 | if 'channel_mul' in fusion_types: 96 | self.channel_mul_conv = nn.Sequential( 97 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 98 | nn.LayerNorm([self.planes, 1, 1]), 99 | nn.ReLU(inplace=True), # yapf: disable 100 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 101 | else: 102 | self.channel_mul_conv = None 103 | 104 | def spatial_pool(self, x): 105 | batch, channel, height, width = x.size() 106 | if self.pooling_type == 'att': 107 | input_x = x 108 | # [N, C, H * W] 109 | input_x = input_x.view(batch, channel, height * width) 110 | # [N, 1, C, H * W] 111 | input_x = input_x.unsqueeze(1) 112 | # [N, 1, H, W] 113 | context_mask = self.conv_mask(x) 114 | # [N, 1, H * W] 115 | context_mask = context_mask.view(batch, 1, height * width) 116 | # [N, 1, H * W] 117 | context_mask = self.softmax(context_mask) 118 | # [N, 1, H * W, 1] 119 | context_mask = context_mask.unsqueeze(-1) 120 | # [N, 1, C, 1] 121 | context = torch.matmul(input_x, context_mask) 122 | # [N, C, 1, 1] 123 | context = context.view(batch, channel, 1, 1) 124 | else: 125 | # [N, C, 1, 1] 126 | context = self.avg_pool(x) 127 | return context 128 | 129 | def forward(self, x): 130 | # [N, C, 1, 1] 131 | context = self.spatial_pool(x) 132 | out = x 133 | if self.channel_mul_conv is not None: 134 | # [N, C, 1, 1] 135 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) 136 | out = out * channel_mul_term 137 | if self.channel_add_conv is not None: 138 | # [N, C, 1, 1] 139 | channel_add_term = self.channel_add_conv(context) 140 | out = out + channel_add_term 141 | return out 142 | 143 | class SpatialCGNL(nn.Module): 144 | """Spatial CGNL block with dot production kernel for image classfication. 145 | """ 146 | def __init__(self, inplanes, planes, use_scale=False, groups=None): 147 | self.use_scale = use_scale 148 | self.groups = groups 149 | super(SpatialCGNL, self).__init__() 150 | self.t = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) 151 | self.p = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) 152 | self.g = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) 153 | self.z = nn.Conv2d(planes, inplanes, kernel_size=1, stride=1, groups=self.groups, bias=False) 154 | self.gn = nn.GroupNorm(num_groups=self.groups, num_channels=inplanes) 155 | 156 | def kernel(self, t, p, g, b, c, h, w): 157 | t = t.view(b, 1, c * h * w) 158 | p = p.view(b, 1, c * h * w) 159 | g = g.view(b, c * h * w, 1) 160 | att = torch.bmm(p, g) 161 | if self.use_scale: 162 | att = att.div((c*h*w)**0.5) 163 | 164 | x = torch.bmm(att, t) 165 | x = x.view(b, c, h, w) 166 | return x 167 | 168 | def forward(self, x): 169 | residual = x 170 | t = self.t(x) 171 | p = self.p(x) 172 | g = self.g(x) 173 | b, c, h, w = t.size() 174 | if self.groups and self.groups > 1: 175 | _c = int(c / self.groups) 176 | ts = torch.split(t, split_size_or_sections=_c, dim=1) 177 | ps = torch.split(p, split_size_or_sections=_c, dim=1) 178 | gs = torch.split(g, split_size_or_sections=_c, dim=1) 179 | _t_sequences = [] 180 | for i in range(self.groups): 181 | _x = self.kernel(ts[i], ps[i], gs[i], 182 | b, _c, h, w) 183 | _t_sequences.append(_x) 184 | x = torch.cat(_t_sequences, dim=1) 185 | else: 186 | x = self.kernel(t, p, g, 187 | b, c, h, w) 188 | x = self.z(x) 189 | x = self.gn(x) + residual 190 | return x 191 | -------------------------------------------------------------------------------- /model/layers/conv_blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from ..layers.convolutions import Convolutional 4 | 5 | class Residual_block(nn.Module): 6 | def __init__(self, filters_in, filters_out, filters_medium, norm="bn", activate="leaky"): 7 | super(Residual_block, self).__init__() 8 | self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_medium, kernel_size=1, 9 | stride=1, pad=0, norm=norm, activate=activate) 10 | self.__conv2 = Convolutional(filters_in=filters_medium, filters_out=filters_out, kernel_size=3, 11 | stride=1, pad=1, norm=norm, activate=activate) 12 | 13 | def forward(self, x): 14 | r = self.__conv1(x) 15 | r = self.__conv2(r) 16 | out = x + r 17 | return out 18 | 19 | class CSP_stage(nn.Module): 20 | def __init__(self, filters_in, n=1, activate="Swish"): 21 | super(CSP_stage, self).__init__() 22 | c_ = filters_in // 2 # hidden channels 23 | self.conv1 = Convolutional(filters_in=filters_in, filters_out=c_, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate) 24 | self.conv2 = Convolutional(filters_in=filters_in, filters_out=c_, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate) 25 | self.res_blocks = nn.Sequential(*[Residual_block(filters_in=c_, filters_out=c_, filters_medium=c_, norm="bn", activate=activate) for _ in range(n)]) 26 | self.conv3 = Convolutional(filters_in=2 * c_, filters_out=filters_in, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate) 27 | 28 | def forward(self, x): 29 | y1 = self.conv1(x) 30 | y2 = self.conv2(x) 31 | y2 = self.res_blocks(y2) 32 | return self.conv3(torch.cat([y2, y1], dim=1)) 33 | 34 | class Residual_block_CSP(nn.Module): 35 | def __init__(self, filters_in): 36 | super(Residual_block_CSP, self).__init__() 37 | self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=1, 38 | stride=1, pad=0, norm="bn", activate="leaky") 39 | self.__conv2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 40 | stride=1, pad=1, norm="bn", activate="leaky") 41 | 42 | def forward(self, x): 43 | r = self.__conv1(x) 44 | r = self.__conv2(r) 45 | out = x + r 46 | return out 47 | 48 | 49 | class InvertedResidual_block(nn.Module): 50 | def __init__(self, inp, oup, stride, expand_ratio): 51 | super(InvertedResidual_block, self).__init__() 52 | self.__stride = stride 53 | hidden_dim = int(inp * expand_ratio) 54 | self.use_res_connect = self.__stride == 1 and inp==oup 55 | if expand_ratio==1: 56 | self.__conv = nn.Sequential( 57 | Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3, 58 | stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"), 59 | Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1, 60 | stride=1, pad=0, norm="bn") 61 | ) 62 | else: 63 | self.__conv = nn.Sequential( 64 | Convolutional(filters_in=inp, filters_out=hidden_dim, kernel_size=1, 65 | stride=1, pad=0, norm="bn", activate="relu6"), 66 | Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3, 67 | stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"), 68 | Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1, 69 | stride=1, pad=0, norm="bn") 70 | ) 71 | 72 | def forward(self, x): 73 | if self.use_res_connect: 74 | return x + self.__conv(x) 75 | else: 76 | return self.__conv(x) 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /model/layers/convolutions.py: -------------------------------------------------------------------------------- 1 | from .activations import * 2 | #from dcn_v2 import DCN 3 | #from modelR.layers.deform_conv_v2 import DeformConv2d 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import math 8 | #from dcn_v2 import DCN 9 | 10 | norm_name = {"bn": nn.BatchNorm2d}# 11 | activate_name = { 12 | "relu": nn.ReLU, 13 | "leaky": nn.LeakyReLU, 14 | "relu6": nn.ReLU6, 15 | "Mish": Mish, 16 | "Swish": Swish, 17 | "MEMish": MemoryEfficientMish, 18 | "MESwish": MemoryEfficientSwish, 19 | "FReLu": FReLU 20 | } 21 | 22 | class Convolutional(nn.Module): 23 | def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, dila=1, norm=None, activate=None): 24 | super(Convolutional, self).__init__() 25 | self.norm = norm 26 | self.activate = activate 27 | self.__conv = nn.Conv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size, 28 | stride=stride, padding=pad, bias=not norm, groups=groups, dilation=dila) 29 | if norm: 30 | assert norm in norm_name.keys() 31 | if norm == "bn": 32 | self.__norm = norm_name[norm](num_features=filters_out) 33 | if activate: 34 | assert activate in activate_name.keys() 35 | if activate == "leaky": 36 | self.__activate = nn.SiLU()#activate_name[activate](negative_slope=0.1, inplace=True)# 37 | if activate == "relu": 38 | self.__activate = activate_name[activate](inplace=True) 39 | if activate == "relu6": 40 | self.__activate = activate_name[activate](inplace=True) 41 | if activate == "Mish": 42 | self.__activate = Mish() 43 | if activate == "Swish": 44 | self.__activate = Swish() 45 | if activate == "MEMish": 46 | self.__activate = MemoryEfficientMish() 47 | if activate == "MESwish": 48 | self.__activate = MemoryEfficientSwish() 49 | if activate == "FReLu": 50 | self.__activate = FReLU() 51 | if activate == "SiLU": 52 | self.__activate = nn.SiLU() 53 | 54 | def forward(self, x): 55 | x = self.__conv(x) 56 | if self.norm: 57 | x = self.__norm(x) 58 | if self.activate: 59 | x = self.__activate(x) 60 | return x 61 | 62 | 63 | class Deformable_Convolutional(nn.Module): 64 | def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, norm=None, activate=None): 65 | super(Deformable_Convolutional, self).__init__() 66 | self.norm = norm 67 | self.activate = activate 68 | self.__dcn = DCN(filters_in, filters_out, kernel_size=kernel_size, stride=stride, padding=pad, deformable_groups=groups) 69 | if norm: 70 | assert norm in norm_name.keys() 71 | if norm == "bn": 72 | self.__norm = norm_name[norm](num_features=filters_out) 73 | if activate: 74 | assert activate in activate_name.keys() 75 | if activate == "leaky": 76 | self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) 77 | if activate == "relu": 78 | self.__activate = activate_name[activate](inplace=True) 79 | if activate == "relu6": 80 | self.__activate = activate_name[activate](inplace=True) 81 | if activate == "Mish": 82 | self.__activate = Mish() 83 | if activate == "Swish": 84 | self.__activate = Swish() 85 | if activate == "MEMish": 86 | self.__activate = MemoryEfficientMish() 87 | if activate == "MESwish": 88 | self.__activate = MemoryEfficientSwish() 89 | if activate == "FReLu": 90 | self.__activate = FReLU() 91 | 92 | def forward(self, x): 93 | x = self.__dcn(x) 94 | if self.norm: 95 | x = self.__norm(x) 96 | if self.activate: 97 | x = self.__activate(x) 98 | return x 99 | 100 | class route_func(nn.Module): 101 | r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference 102 | https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf 103 | Args: 104 | c_in (int): Number of channels in the input image 105 | num_experts (int): Number of experts for mixture. Default: 1 106 | """ 107 | 108 | def __init__(self, c_in, num_experts): 109 | super(route_func, self).__init__() 110 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) 111 | self.fc = nn.Linear(c_in, num_experts) 112 | self.sigmoid = nn.Sigmoid() 113 | 114 | def forward(self, x): 115 | x = self.avgpool(x) 116 | x = x.view(x.size(0), -1) 117 | x = self.fc(x) 118 | x = self.sigmoid(x) 119 | return x 120 | 121 | class CondConv2d(nn.Module): 122 | def __init__(self, in_channels, out_channels, kernel_size, 123 | stride=1, padding=0, dilation=1, groups=1, bias=True, 124 | num_experts=1): 125 | super(CondConv2d, self).__init__() 126 | 127 | self.in_channels = in_channels 128 | self.out_channels = out_channels 129 | self.kernel_size = kernel_size 130 | self.stride = stride 131 | self.padding = padding 132 | self.dilation = dilation 133 | self.groups = groups 134 | self.num_experts = num_experts 135 | 136 | self.weight = nn.Parameter(torch.Tensor(num_experts, out_channels, in_channels // groups, kernel_size, kernel_size)) 137 | if bias: 138 | self.bias = nn.Parameter(torch.Tensor(num_experts, out_channels)) 139 | else: 140 | self.register_parameter('bias', None) 141 | 142 | nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) 143 | if self.bias is not None: 144 | fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) 145 | bound = 1 / math.sqrt(fan_in) 146 | nn.init.uniform_(self.bias, -bound, bound) 147 | 148 | def forward(self, x, routing_weight): 149 | b, c_in, h, w = x.size() 150 | k, c_out, c_in, kh, kw = self.weight.size() 151 | x = x.contiguous().view(1, -1, h, w) 152 | weight = self.weight.contiguous().view(k, -1) 153 | 154 | combined_weight = torch.mm(routing_weight, weight).view(-1, c_in, kh, kw) 155 | 156 | if self.bias is not None: 157 | combined_bias = torch.mm(routing_weight, self.bias).view(-1) 158 | output = F.conv2d( 159 | x, weight=combined_weight, bias=combined_bias, stride=self.stride, padding=self.padding, 160 | dilation=self.dilation, groups=self.groups * b) 161 | else: 162 | output = F.conv2d( 163 | x, weight=combined_weight, bias=None, stride=self.stride, padding=self.padding, 164 | dilation=self.dilation, groups=self.groups * b) 165 | 166 | output = output.view(b, c_out, output.size(-2), output.size(-1)) 167 | return output 168 | 169 | class Cond_Convolutional(nn.Module): 170 | def __init__(self, filters_in, filters_out, kernel_size, stride=1, pad=0, dila=1, groups=1, bias=True, num_experts=1, norm=None, activate=None): 171 | 172 | super(Cond_Convolutional, self).__init__() 173 | self.norm = norm 174 | self.activate = activate 175 | self.__conv = CondConv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size, 176 | stride=stride, padding=pad, dilation=dila, groups=groups, bias=bias, num_experts=num_experts) 177 | self.__routef = route_func(filters_in, num_experts) 178 | if norm: 179 | assert norm in norm_name.keys() 180 | if norm == "bn": 181 | self.__norm = norm_name[norm](num_features=filters_out) 182 | if activate: 183 | assert activate in activate_name.keys() 184 | if activate == "leaky": 185 | self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) 186 | if activate == "relu": 187 | self.__activate = activate_name[activate](inplace=True) 188 | if activate == "relu6": 189 | self.__activate = activate_name[activate](inplace=True) 190 | if activate == "Mish": 191 | self.__activate = Mish() 192 | if activate == "Swish": 193 | self.__activate = Swish() 194 | if activate == "MEMish": 195 | self.__activate = MemoryEfficientMish() 196 | if activate == "MESwish": 197 | self.__activate = MemoryEfficientSwish() 198 | if activate == "FReLu": 199 | self.__activate = FReLU() 200 | 201 | def forward(self, x): 202 | routef = self.__routef(x) 203 | x = self.__conv(x,routef) 204 | if self.norm: 205 | x = self.__norm(x) 206 | if self.activate: 207 | x = self.__activate(x) 208 | return x 209 | 210 | -------------------------------------------------------------------------------- /model/layers/msr_blocks.py: -------------------------------------------------------------------------------- 1 | from dropblock import DropBlock2D, LinearScheduler 2 | from ..layers.convolutions import * 3 | 4 | class MSR_L(nn.Module): 5 | def __init__(self, filters_in): 6 | super(MSR_L, self).__init__() 7 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, 8 | stride=1, pad=1, norm="bn", activate="leaky") 9 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, 10 | stride=1, pad=0, norm="bn", activate="leaky") 11 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 12 | stride=1, pad=2, dila=2, norm="bn", activate="leaky") 13 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 14 | stride=1, pad=4, dila=4, norm="bn", activate="leaky") 15 | self.__dw3 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 16 | stride=1, pad=6, dila=6, norm="bn", activate="leaky") 17 | self.__pw1 = Convolutional(filters_in=filters_in*4, filters_out=filters_in, kernel_size=1, 18 | stride=1, pad=0, norm="bn", activate="Mish") 19 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., 20 | stop_value=0.1, nr_steps=5) 21 | 22 | def forward(self, x): 23 | dw0 = self.__dw0(x) 24 | dw0 = self.__drop(dw0) 25 | pw0 = self.__pw0(dw0) 26 | dw1 = self.__dw1(pw0) 27 | dw2 = self.__dw2(pw0)+dw1 28 | dw3 = self.__dw3(pw0)+dw2 29 | cat = torch.cat((pw0, dw1, dw2, dw3),1) 30 | pw1 = self.__pw1(cat) 31 | return pw1 32 | 33 | class MSR_M(nn.Module): 34 | def __init__(self, filters_in): 35 | super(MSR_M, self).__init__() 36 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, 37 | stride=1, pad=1, norm="bn", activate="leaky") 38 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, 39 | stride=1, pad=0, norm="bn", activate="leaky") 40 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 41 | stride=1, pad=2, dila=2, norm="bn", activate="leaky") 42 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 43 | stride=1, pad=4, dila=4, norm="bn", activate="leaky") 44 | self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, 45 | stride=1, pad=0, norm="bn", activate="Mish") 46 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., 47 | stop_value=0.1, nr_steps=5) 48 | 49 | def forward(self, x): 50 | dw0 = self.__dw0(x) 51 | dw0 = self.__drop(dw0) 52 | pw0 = self.__pw0(dw0) 53 | dw1 = self.__dw1(pw0) 54 | dw2 = self.__dw2(pw0)+dw1 55 | cat = torch.cat((dw1, dw2),1) 56 | pw1 = self.__pw1(cat) 57 | return pw1 58 | 59 | class MSR_S(nn.Module): 60 | def __init__(self, filters_in): 61 | super(MSR_S, self).__init__() 62 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, 63 | stride=1, pad=1, norm="bn", activate="leaky") 64 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, 65 | stride=1, pad=0, norm="bn", activate="leaky") 66 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 67 | stride=1, pad=1, dila=1, norm="bn", activate="leaky") 68 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, 69 | stride=1, pad=2, dila=2, norm="bn", activate="leaky") 70 | self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, 71 | stride=1, pad=0, norm="bn", activate="Mish") 72 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., 73 | stop_value=0.1, nr_steps=5) 74 | 75 | def forward(self, x): 76 | dw0 = self.__dw0(x) 77 | dw0 = self.__drop(dw0) 78 | pw0 = self.__pw0(dw0) 79 | dw1 = self.__dw1(pw0) 80 | dw2 = self.__dw2(pw0)+dw1 81 | cat = torch.cat((dw1, dw2),1) 82 | pw1 = self.__pw1(cat) 83 | return pw1 84 | -------------------------------------------------------------------------------- /model/layers/multiscale_fusion_blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ..layers.convolutions import Convolutional, Deformable_Convolutional 5 | 6 | class SPP(nn.Module): 7 | def __init__(self, depth=512): 8 | super(SPP,self).__init__() 9 | self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) 10 | self.__maxpool9 = nn.MaxPool2d(kernel_size=9, stride=1, padding=4) 11 | self.__maxpool13 = nn.MaxPool2d(kernel_size=13, stride=1, padding=6) 12 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) 13 | 14 | def forward(self, x): 15 | maxpool5 = self.__maxpool5(x) 16 | maxpool9 = self.__maxpool9(x) 17 | maxpool13 = self.__maxpool13(x) 18 | cat_maxpool = torch.cat([x, maxpool5, maxpool9, maxpool13], dim=1) 19 | SPP = self.__outconv(cat_maxpool) 20 | return SPP 21 | 22 | class SPPF(nn.Module): 23 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 24 | def __init__(self, depth=512): 25 | super(SPPF, self).__init__() 26 | self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) 27 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) 28 | def forward(self, x): 29 | maxpool5 = self.__maxpool5(x) 30 | maxpool9 = self.__maxpool5(maxpool5) 31 | cat_maxpool = torch.cat([x, maxpool5, maxpool9, self.__maxpool5(maxpool9)], dim=1) 32 | SPPF = self.__outconv(cat_maxpool) 33 | return SPPF 34 | 35 | class ASPP(nn.Module): 36 | def __init__(self, in_channel=1280, depth=512): 37 | super(ASPP,self).__init__() 38 | self.__dilaconv1 = nn.Conv2d(in_channel, depth, 1, 1) 39 | self.__dilaconv5 = nn.Conv2d(in_channel, depth, 3, 1, padding=2, dilation=2) 40 | self.__dilaconv9 = nn.Conv2d(in_channel, depth, 3, 1, padding=4, dilation=4) 41 | self.__dilaconv13 = nn.Conv2d(in_channel, depth, 3, 1, padding=6, dilation=6) 42 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) 43 | 44 | def forward(self, x): 45 | dilaconv1 = self.__dilaconv1(x) 46 | dilaconv5 = self.__dilaconv5(x) 47 | dilaconv9 = self.__dilaconv9(x) 48 | dilaconv13 = self.__dilaconv13(x) 49 | cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1) 50 | ASPP = self.__outconv(cat_dilaconv) 51 | return ASPP 52 | 53 | class ASFF(nn.Module): 54 | def __init__(self, level, vis=False): 55 | super(ASFF, self).__init__() 56 | self.level = level 57 | self.dim = [512,256,128] 58 | self.inter_dim = self.dim[self.level] 59 | if level == 0: 60 | self.stride_level_1 = Convolutional(256, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') 61 | self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') 62 | self.expand = Convolutional(self.inter_dim, 1024, 3, 1, pad=1, norm='bn', activate='relu6') 63 | elif level == 1: 64 | self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') 65 | self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') 66 | self.expand = Convolutional(self.inter_dim, 512, 3, 1, pad=1, norm='bn', activate='relu6') 67 | elif level == 2: 68 | self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') 69 | self.compress_level_1 = Convolutional(256, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') 70 | self.expand = Convolutional(self.inter_dim, 256, 3, 1, pad=1, norm='bn', activate='relu6') 71 | compress_c = 16 72 | self.weight_level_0 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') 73 | self.weight_level_1 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') 74 | self.weight_level_2 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') 75 | self.weight_levels = nn.Conv2d(compress_c * 3, 3, kernel_size=1, stride=1, padding=0) 76 | self.vis = vis 77 | 78 | def forward(self, x_level_0, x_level_1, x_level_2): 79 | if self.level == 0: 80 | level_0_resized = x_level_0 81 | level_1_resized = self.stride_level_1(x_level_1) 82 | level_2_downsampled_inter = F.max_pool2d(x_level_2, 3, stride=2, padding=1) 83 | level_2_resized = self.stride_level_2(level_2_downsampled_inter) 84 | elif self.level == 1: 85 | level_0_compressed = self.compress_level_0(x_level_0) 86 | level_0_resized = F.interpolate(level_0_compressed, scale_factor=2, mode='nearest') 87 | level_1_resized = x_level_1 88 | level_2_resized = self.stride_level_2(x_level_2) 89 | elif self.level == 2: 90 | level_0_compressed = self.compress_level_0(x_level_0) 91 | level_0_resized = F.interpolate(level_0_compressed, scale_factor=4, mode='nearest') 92 | level_1_compressed = self.compress_level_1(x_level_1) 93 | level_1_resized = F.interpolate(level_1_compressed, scale_factor=2, mode='nearest') 94 | level_2_resized = x_level_2 95 | 96 | level_0_weight_v = self.weight_level_0(level_0_resized) 97 | level_1_weight_v = self.weight_level_1(level_1_resized) 98 | level_2_weight_v = self.weight_level_2(level_2_resized) 99 | levels_weight_v = torch.cat((level_0_weight_v, level_1_weight_v, level_2_weight_v), 1) 100 | levels_weight = self.weight_levels(levels_weight_v) 101 | levels_weight = F.softmax(levels_weight, dim=1) 102 | 103 | fused_out_reduced = level_0_resized * levels_weight[:, 0:1, :, :] + \ 104 | level_1_resized * levels_weight[:, 1:2, :, :] + \ 105 | level_2_resized * levels_weight[:, 2:, :, :] 106 | 107 | out = self.expand(fused_out_reduced) 108 | 109 | if self.vis: 110 | return out, levels_weight, fused_out_reduced.sum(dim=1) 111 | else: 112 | return out 113 | 114 | class FeatureAdaption(nn.Module): 115 | def __init__(self, in_ch, out_ch, n_anchors): 116 | super(FeatureAdaption, self).__init__() 117 | self.sep=False 118 | self.conv_offset = nn.Conv2d(in_channels=2*n_anchors, out_channels=2*9*n_anchors, groups = n_anchors, kernel_size=1,stride=1,padding=0) 119 | self.dconv = Deformable_Convolutional(filters_in=in_ch, filters_out=out_ch, kernel_size=3, stride=1, pad=1, groups=n_anchors) 120 | 121 | def forward(self, input, wh_pred): 122 | wh_pred_new = wh_pred.detach() 123 | offset = self.conv_offset(wh_pred_new) 124 | out = self.dconv(input, offset) 125 | return out 126 | 127 | class Features_Fusion(nn.Module): 128 | def __init__(self, in_channels, out_channels, r=16): 129 | super(Features_Fusion,self).__init__() 130 | self.out_channels = out_channels 131 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 132 | self.conv_fc1 = Convolutional(in_channels, in_channels // r, kernel_size=1, stride=1, pad=0, norm='bn', activate='leaky') 133 | self.conv_fc2 = nn.Conv2d(in_channels // r, out_channels * 2, kernel_size=1, padding=0, bias=False) 134 | self.softmax = nn.Softmax(dim=2) 135 | 136 | 137 | def forward(self, x1, x2): 138 | batch_size = x1.size(0) 139 | x_mix = torch.add(x1,x2) # 逐元素相加生成 混合特征U 140 | x_avg = self.avg_pool(x_mix) 141 | x_fcout = self.conv_fc2(self.conv_fc1(x_avg)) # 先降维,后升维,结果中前一半通道值为a,后一半为b 142 | x_reshape = x_fcout.reshape(batch_size, self.out_channels, 2, -1) # 调整形状,变为两个全连接层的值 143 | x_softmax = self.softmax(x_reshape) # 使得两个全连接层对应位置进行softmax 144 | w1 = x_softmax[:, :, 0:1,:] #将tensor按照指定维度切分成2个tensor块 145 | w2 = x_softmax[:, :, 1:2,:] 146 | out = x1*w1 + x2*w2 # 两个加权后的特征 逐元素相加 147 | return out -------------------------------------------------------------------------------- /model/loss/__pycache__/loss4.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss4.cpython-38.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/loss4.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss4.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/loss6.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss6.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2.cpython-38.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv2single.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2single.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv8.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv8.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv88.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv88.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/lossv9.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv9.cpython-39.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/seesaw_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/seesaw_loss.cpython-38.pyc -------------------------------------------------------------------------------- /model/loss/__pycache__/seesaw_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/seesaw_loss.cpython-39.pyc -------------------------------------------------------------------------------- /model/neck/__pycache__/neckv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv2.cpython-38.pyc -------------------------------------------------------------------------------- /model/neck/__pycache__/neckv2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv2.cpython-39.pyc -------------------------------------------------------------------------------- /model/neck/__pycache__/neckv8.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv8.cpython-39.pyc -------------------------------------------------------------------------------- /predictionR/lr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/predictionR/lr.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29.24 2 | dropblock==0.3.0 3 | einops==0.3.2 4 | imgaug==0.4.0 5 | matplotlib==3.4.3 6 | opencv-python==4.5.3.56 7 | pycocotools==2.0.2 8 | scikit-image==0.18.3 9 | scikit-learn==1.0 10 | scipy==1.7.1 11 | Shapely==1.7.1 12 | tensorboardX==2.4 13 | thop==0.0.31.post2005241907 14 | timm==0.4.12 15 | tqdm==4.62.3 16 | prefetch_generator==1.0.1 17 | rich 18 | tensorboard 19 | nvitop -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import time 4 | import logging 5 | import config.config as cfg 6 | import utils.gpu as gpu 7 | from utils.log import Logger 8 | from model.TSConv import GGHL 9 | from evalR.evaluatorTSplot import Evaluator 10 | from tensorboardX import SummaryWriter 11 | 12 | class Tester(object): 13 | def __init__(self, weight_path=None, gpu_id=0, eval=False): 14 | self.img_size = cfg.TEST["TEST_IMG_SIZE"] 15 | self.__num_class = cfg.DATA["NUM"] 16 | self.__device = gpu.select_device(gpu_id, force_cpu=False) 17 | self.__eval = eval 18 | self.__model = GGHL().to(self.__device) 19 | self.__load_model_weights(weight_path) 20 | 21 | def __load_model_weights(self, weight_path): 22 | print("loading weight file from : {}".format(weight_path)) 23 | weight = os.path.join(weight_path) 24 | chkpt = torch.load(weight, map_location=self.__device) 25 | self.__model.load_state_dict(chkpt['model']) #['model'] 26 | del chkpt 27 | 28 | def test(self): 29 | global logger 30 | logger.info("***********Start Evaluation****************") 31 | mAP = 0 32 | if self.__eval and cfg.TEST["EVAL_TYPE"] == 'VOC': 33 | with torch.no_grad(): 34 | start = time.time() 35 | APs, _, _, inference_time = Evaluator(self.__model).APs_voc() 36 | end = time.time() 37 | logger.info("Test cost time:{:.4f}s".format(end - start)) 38 | for i in APs: 39 | print("{} --> AP : {}".format(i, APs[i])) 40 | mAP += APs[i] 41 | mAP = mAP / self.__num_class 42 | logger.info('mAP:{}'.format(mAP)) 43 | logger.info("inference time: {:.2f} ms".format(inference_time)) 44 | writer.add_scalar('test/VOCmAP', mAP) 45 | 46 | if __name__ == "__main__": 47 | global logger 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument('--weight_path', type=str, default='./weight/best.pt', help='weight file path') 50 | parser.add_argument('--log_val_path', type=str, default='log/', help='weight file path') 51 | parser.add_argument('--eval', action='store_true', default=True, help='eval flag') 52 | parser.add_argument('--gpu_id', type=int, default=1, help='gpu id') 53 | parser.add_argument('--log_path', type=str, default='log/', help='log path') 54 | opt = parser.parse_args() 55 | writer = SummaryWriter(logdir=opt.log_path + '/event') 56 | logger = Logger(log_file_name=opt.log_val_path + '/log_coco_test.txt', log_level=logging.DEBUG, 57 | logger_name='GGHL').get_log() 58 | Tester(weight_path=opt.weight_path, gpu_id=opt.gpu_id, eval=opt.eval).test() -------------------------------------------------------------------------------- /train_dist.sh: -------------------------------------------------------------------------------- 1 | MKL_NUM_THREADS=16 OMP_NUM_THREADS=16 torchrun \ 2 | --standalone \ 3 | --nnodes=1 \ 4 | --nproc_per_node=2 \ 5 | trainv2.py 6 | -------------------------------------------------------------------------------- /utils/__pycache__/cosine_lr_scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/cosine_lr_scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/cosine_lr_scheduler.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/cosine_lr_scheduler.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/gpu.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/gpu.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/gpu.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/gpu.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/log.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/log.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/log.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/log.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_basic.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_basic.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_basic.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_basic.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_coco.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_coco.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_coco.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_coco.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/visualize.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/visualize.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/visualize.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/visualize.cpython-39.pyc -------------------------------------------------------------------------------- /utils/cosine_lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class CosineDecayLR(object): 4 | def __init__(self, optimizer, T_max, lr_init, lr_min=0., warmup=0): 5 | """ 6 | a cosine decay scheduler about steps, not epochs. 7 | :param optimizer: ex. optim.SGD 8 | :param T_max: max steps, and steps=epochs * batches 9 | :param lr_max: lr_max is init lr. 10 | :param warmup: in the training begin, the lr is smoothly increase from 0 to lr_init, which means "warmup", 11 | this means warmup steps, if 0 that means don't use lr warmup. 12 | """ 13 | super(CosineDecayLR, self).__init__() 14 | self.__optimizer = optimizer 15 | self.__T_max = T_max 16 | self.__lr_min = lr_min 17 | self.__lr_max = lr_init 18 | self.__warmup = warmup 19 | 20 | 21 | def step(self, t): 22 | if self.__warmup and t < self.__warmup: 23 | lr = self.__lr_max / self.__warmup * t 24 | else: 25 | T_max = self.__T_max - self.__warmup 26 | t = t - self.__warmup 27 | lr = self.__lr_min + 0.5 * (self.__lr_max - self.__lr_min) * (1 + np.cos(t/T_max * np.pi)) 28 | for param_group in self.__optimizer.param_groups: 29 | param_group["lr"] = lr 30 | 31 | 32 | if __name__ == '__main__': 33 | import matplotlib.pyplot as plt 34 | from matplotlib.ticker import FuncFormatter 35 | import math 36 | from modelR.GGHL import GGHL 37 | import torch.optim as optim 38 | import config.config as cfg 39 | 40 | net = GGHL() 41 | 42 | optimizer = optim.SGD(net.parameters(), cfg.TRAIN["LR_INIT"], cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) 43 | #optimizer = optim.Adam(net.parameters(), lr = cfg.TRAIN["LR_INIT"]) 44 | 45 | scheduler = CosineDecayLR(optimizer, math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])*cfg.TRAIN["TRAIN_IMG_NUM"], 46 | cfg.TRAIN["LR_INIT"], cfg.TRAIN["LR_END"], cfg.TRAIN["WARMUP_EPOCHS"]/cfg.TRAIN["BATCH_SIZE"]*cfg.TRAIN["TRAIN_IMG_NUM"]) 47 | 48 | 49 | y = [] 50 | for t in range(math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])): 51 | for i in range(cfg.TRAIN["TRAIN_IMG_NUM"]): 52 | scheduler.step(cfg.TRAIN["TRAIN_IMG_NUM"]*t+i) 53 | y.append(optimizer.param_groups[0]['lr']) 54 | 55 | print(y) 56 | plt.figure() 57 | plt.plot(y, label='LambdaLR') 58 | plt.xlabel('steps') 59 | plt.ylabel('LR') 60 | plt.tight_layout() 61 | plt.savefig("../predictionR/lr.png", dpi=600) 62 | plt.show() -------------------------------------------------------------------------------- /utils/gpu.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import torch 3 | import torch.backends.cudnn as cudnn 4 | import torch.distributed as dist 5 | import random 6 | import numpy as np 7 | import os 8 | 9 | def init_seeds(seed=0): 10 | # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html 11 | # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible 12 | import torch.backends.cudnn as cudnn 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False) 17 | 18 | @contextmanager 19 | def torch_distributed_zero_first(local_rank: int): 20 | """ 21 | Decorator to make all processes in distributed training wait for each local_master to do something. 22 | """ 23 | if local_rank not in [-1, 0]: 24 | dist.barrier(device_ids=[local_rank]) 25 | yield 26 | if local_rank == 0: 27 | dist.barrier(device_ids=[0]) 28 | 29 | 30 | def select_device(id, force_cpu=False): 31 | cuda = False if force_cpu else torch.cuda.is_available() 32 | cudnn.benchmark = True 33 | device = torch.device('cuda:{}'.format(id) if cuda else 'cpu') 34 | #device = torch.cuda.set_device(0 if cuda else 'cpu') 35 | if not cuda: 36 | print('Using CPU') 37 | if cuda: 38 | #device = torch.cuda.set_device(id) 39 | c = 1024 ** 2 # bytes to MB 40 | ng = torch.cuda.device_count() 41 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 42 | print("Using CUDA device0 _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 43 | (x[0].name, x[0].total_memory / c)) 44 | if ng > 0: 45 | # torch.cuda.set_device(0) # OPTIONAL: Set GPU ID 46 | for i in range(1, ng): 47 | print(" device%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 48 | (i, x[i].name, x[i].total_memory / c)) 49 | 50 | return device 51 | 52 | def select_device_v5(device='', batch_size=None): 53 | # device = 'cpu' or '0' or '0,1,2,3' 54 | device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0' 55 | cpu = device == 'cpu' 56 | if cpu: 57 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 58 | elif device: # non-cpu device requested 59 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 60 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 61 | 62 | cuda = not cpu and torch.cuda.is_available() 63 | if cuda: 64 | devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 65 | n = len(devices) # device count 66 | if n > 1 and batch_size: # check batch_size is divisible by device_count 67 | pass 68 | for i, d in enumerate(devices): 69 | p = torch.cuda.get_device_properties(i) 70 | else: 71 | pass 72 | 73 | return torch.device('cuda:0' if cuda else 'cpu') -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from rich.logging import RichHandler 3 | from logging.config import dictConfig 4 | 5 | class NewRichHandler(RichHandler): 6 | KEYWORDS = { 7 | 'size', 8 | 'Epoch', 9 | 'Batch', 10 | 'Img', 11 | 'Loss', 12 | 'fg', 13 | 'bg', 14 | 'pos', 15 | 'neg', 16 | 'iou', 17 | 'cls', 18 | 'Loss_S', 19 | 'Loss_R', 20 | 'Loss_L', 21 | 'LR', 22 | '_' 23 | } 24 | 25 | class Logger(object): 26 | def __init__(self,log_file_name,log_level,logger_name): 27 | # firstly, create a logger 28 | self.__logger = logging.getLogger(logger_name) 29 | self.__logger.setLevel(log_level) 30 | # secondly, create a handler 31 | file_handler = logging.FileHandler(log_file_name) 32 | console_handler = NewRichHandler(rich_tracebacks=True, tracebacks_show_locals=True) 33 | # thirdly, define the output form of handler 34 | formatter = logging.Formatter( 35 | '[%(asctime)s]-[%(filename)s line:%(lineno)d]:%(message)s' 36 | ) 37 | rich_formatter = logging.Formatter("%(message)s") 38 | file_handler.setFormatter(formatter) 39 | console_handler.setFormatter(rich_formatter) 40 | # finally, add the Hander to logger 41 | self.__logger.addHandler(file_handler) 42 | self.__logger.addHandler(console_handler) 43 | 44 | def get_log(self): 45 | return self.__logger 46 | 47 | if __name__ == "__main__": 48 | logger = Logger('./log.txt', logging.DEBUG, 'demo').get_log() 49 | logger.info('hello') -------------------------------------------------------------------------------- /utils/num_of_works_set.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch.utils.data as d 3 | import torchvision 4 | import torchvision.transforms as transforms 5 | 6 | if __name__ == '__main__': 7 | BATCH_SIZE = 100 8 | transform = transforms.Compose([transforms.ToTensor(), 9 | transforms.Normalize((0.5,), (0.5,))]) 10 | train_set = torchvision.datasets.MNIST('\mnist', download=True, train=True, transform=transform) 11 | 12 | # data loaders 13 | train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) 14 | 15 | for num_workers in range(20): 16 | train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers) 17 | # training ... 18 | start = time.time() 19 | for epoch in range(1): 20 | for step, (batch_x, batch_y) in enumerate(train_loader): 21 | pass 22 | end = time.time() 23 | print('num_workers is {} and it took {} seconds'.format(num_workers, end - start)) -------------------------------------------------------------------------------- /utils/utils_coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import numpy as np 4 | import cv2 5 | 6 | def nms(bbox, thresh, score=None, limit=None): 7 | """Suppress bounding boxes according to their IoUs and confidence scores. 8 | Args: 9 | bbox (array): Bounding boxes to be transformed. The shape is 10 | :math:`(R, 4)`. :math:`R` is the number of bounding boxes. 11 | thresh (float): Threshold of IoUs. 12 | score (array): An array of confidences whose shape is :math:`(R,)`. 13 | limit (int): The upper bound of the number of the output bounding 14 | boxes. If it is not specified, this method selects as many 15 | bounding boxes as possible. 16 | Returns: 17 | array: 18 | An array with indices of bounding boxes that are selected. \ 19 | They are sorted by the scores of bounding boxes in descending \ 20 | order. \ 21 | The shape of this array is :math:`(K,)` and its dtype is\ 22 | :obj:`numpy.int32`. Note that :math:`K \\leq R`. 23 | 24 | from: https://github.com/chainer/chainercv 25 | """ 26 | if len(bbox) == 0: 27 | return np.zeros((0,), dtype=np.int32) 28 | 29 | if score is not None: 30 | order = score.argsort()[::-1] 31 | bbox = bbox[order] 32 | bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1) 33 | 34 | selec = np.zeros(bbox.shape[0], dtype=bool) 35 | for i, b in enumerate(bbox): 36 | tl = np.maximum(b[:2], bbox[selec, :2]) 37 | br = np.minimum(b[2:], bbox[selec, 2:]) 38 | area = np.prod(br - tl, axis=1) * (tl < br).all(axis=1) 39 | 40 | iou = area / (bbox_area[i] + bbox_area[selec] - area) 41 | if (iou >= thresh).any(): 42 | continue 43 | 44 | selec[i] = True 45 | if limit is not None and np.count_nonzero(selec) >= limit: 46 | break 47 | 48 | selec = np.where(selec)[0] 49 | if score is not None: 50 | selec = order[selec] 51 | return selec.astype(np.int32) 52 | 53 | 54 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): 55 | 56 | box_corner = prediction.new(prediction.shape) 57 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 58 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 59 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 60 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 61 | prediction[:, :, :4] = box_corner[:, :, :4] 62 | 63 | output = [None for _ in range(len(prediction))] 64 | for i, image_pred in enumerate(prediction): 65 | # Filter out confidence scores below threshold 66 | class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1) 67 | class_pred = class_pred[0] 68 | conf_mask = (image_pred[:, 4] * class_pred >= conf_thre).squeeze() 69 | image_pred = image_pred[conf_mask] 70 | 71 | # If none are remaining => process next image 72 | if not image_pred.size(0): 73 | continue 74 | # Get detections with higher confidence scores than the threshold 75 | ind = (image_pred[:, 5:] * image_pred[:, 4][:, None] >= conf_thre).nonzero() 76 | # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) 77 | detections = torch.cat(( 78 | image_pred[ind[:, 0], :5], 79 | image_pred[ind[:, 0], 5 + ind[:, 1]].unsqueeze(1), 80 | ind[:, 1].float().unsqueeze(1) 81 | ), 1) 82 | # Iterate through all predicted classes 83 | unique_labels = detections[:, -1].cpu().unique() 84 | if prediction.is_cuda: 85 | unique_labels = unique_labels.cuda() 86 | for c in unique_labels: 87 | # Get the detections with the particular class 88 | detections_class = detections[detections[:, -1] == c] 89 | nms_in = detections_class.cpu().numpy() 90 | nms_out_index = nms( 91 | nms_in[:, :4], nms_thre, score=nms_in[:, 4]*nms_in[:, 5]) 92 | detections_class = detections_class[nms_out_index] 93 | if output[i] is None: 94 | output[i] = detections_class 95 | else: 96 | output[i] = torch.cat((output[i], detections_class)) 97 | 98 | return output 99 | 100 | 101 | def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): 102 | if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: 103 | raise IndexError 104 | 105 | # top left 106 | if xyxy: 107 | tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) 108 | # bottom right 109 | br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) 110 | area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) 111 | area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) 112 | else: 113 | tl = torch.max((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), 114 | (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2)) 115 | # bottom right 116 | br = torch.min((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), 117 | (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2)) 118 | 119 | area_a = torch.prod(bboxes_a[:, 2:], 1) 120 | area_b = torch.prod(bboxes_b[:, 2:], 1) 121 | en = (tl < br).type(tl.type()).prod(dim=2) 122 | area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) 123 | return area_i / (area_a[:, None] + area_b - area_i) 124 | 125 | 126 | def label2box(labels, info_img, maxsize, lrflip): 127 | 128 | h, w, nh, nw, dx, dy = info_img 129 | x1 = labels[:, 1] / w 130 | y1 = labels[:, 2] / h 131 | x2 = (labels[:, 1] + labels[:, 3]) / w 132 | y2 = (labels[:, 2] + labels[:, 4]) / h 133 | labels[:, 1] = (((x1 + x2) / 2) * nw + dx) / maxsize 134 | labels[:, 2] = (((y1 + y2) / 2) * nh + dy) / maxsize 135 | labels[:, 3] = labels[:, 3] * nw / w / maxsize 136 | labels[:, 4] = labels[:, 4] * nh / h / maxsize 137 | if lrflip: 138 | labels[:, 1] = 1 - labels[:, 1] 139 | return labels 140 | 141 | 142 | def box2label(box, info_img): 143 | 144 | h, w, nh, nw, dx, dy = info_img 145 | y1, x1, y2, x2 = box 146 | box_h = ((y2 - y1) / nh) * h 147 | box_w = ((x2 - x1) / nw) * w 148 | y1 = ((y1 - dy) / nh) * h 149 | x1 = ((x1 - dx) / nw) * w 150 | label = [y1, x1, y1 + box_h, x1 + box_w] 151 | return label 152 | 153 | 154 | def preprocess(img, imgsize, jitter, random_placing=False): 155 | h, w, _ = img.shape 156 | img = img[:, :, ::-1] 157 | assert img is not None 158 | 159 | if jitter > 0: 160 | # add jitter 161 | dw = jitter * w 162 | dh = jitter * h 163 | new_ar = (w + np.random.uniform(low=-dw, high=dw))\ 164 | / (h + np.random.uniform(low=-dh, high=dh)) 165 | else: 166 | new_ar = w / h 167 | 168 | if new_ar < 1: 169 | nh = imgsize 170 | nw = nh * new_ar 171 | else: 172 | nw = imgsize 173 | nh = nw / new_ar 174 | nw, nh = int(nw), int(nh) 175 | 176 | if random_placing: 177 | dx = int(np.random.uniform(imgsize - nw)) 178 | dy = int(np.random.uniform(imgsize - nh)) 179 | else: 180 | dx = (imgsize - nw) // 2 181 | dy = (imgsize - nh) // 2 182 | 183 | img = cv2.resize(img, (nw, nh)) 184 | sized = np.ones((imgsize, imgsize, 3), dtype=np.uint8) * 127 185 | sized[dy:dy+nh, dx:dx+nw, :] = img 186 | 187 | info_img = (h, w, nh, nw, dx, dy) 188 | return sized, info_img 189 | 190 | def rand_scale(s): 191 | """ 192 | calculate random scaling factor 193 | Args: 194 | s (float): range of the random scale. 195 | Returns: 196 | random scaling factor (float) whose range is 197 | from 1 / s to s . 198 | """ 199 | scale = np.random.uniform(low=1, high=s) 200 | if np.random.rand() > 0.5: 201 | return scale 202 | return 1 / scale 203 | 204 | def random_distort(img, hue, saturation, exposure): 205 | """ 206 | perform random distortion in the HSV color space. 207 | Args: 208 | img (numpy.ndarray): input image whose shape is :math:`(H, W, C)`. 209 | Values range from 0 to 255. 210 | hue (float): random distortion parameter. 211 | saturation (float): random distortion parameter. 212 | exposure (float): random distortion parameter. 213 | Returns: 214 | img (numpy.ndarray) 215 | """ 216 | dhue = np.random.uniform(low=-hue, high=hue) 217 | dsat = rand_scale(saturation) 218 | dexp = rand_scale(exposure) 219 | 220 | img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) 221 | img = np.asarray(img, dtype=np.float32) / 255. 222 | img[:, :, 1] *= dsat 223 | img[:, :, 2] *= dexp 224 | H = img[:, :, 0] + dhue 225 | 226 | if dhue > 0: 227 | H[H > 1.0] -= 1.0 228 | else: 229 | H[H < 0.0] += 1.0 230 | 231 | img[:, :, 0] = H 232 | img = (img * 255).clip(0, 255).astype(np.uint8) 233 | img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB) 234 | img = np.asarray(img, dtype=np.float32) 235 | 236 | return img 237 | 238 | 239 | def get_coco_label_names(): 240 | """ 241 | COCO label names and correspondence between the model's class index and COCO class index. 242 | Returns: 243 | coco_label_names (tuple of str) : all the COCO label names including background class. 244 | coco_class_ids (list of int) : index of 80 classes that are used in 'instance' annotations 245 | coco_cls_colors (np.ndarray) : randomly generated color vectors used for box visualization 246 | 247 | """ 248 | coco_label_names = ('background', # class zero 249 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 250 | 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 251 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 252 | 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 253 | 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 254 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 255 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 256 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 257 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 258 | 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 259 | 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 260 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 261 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 262 | ) 263 | coco_class_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 264 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 265 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 266 | 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] 267 | 268 | coco_cls_colors = np.random.randint(128, 255, size=(80, 3)) 269 | 270 | return coco_label_names, coco_class_ids, coco_cls_colors 271 | -------------------------------------------------------------------------------- /utils/visualize.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import numpy as np 3 | import PIL.Image as Image 4 | import PIL.ImageColor as ImageColor 5 | import PIL.ImageDraw as ImageDraw 6 | import PIL.ImageFont as ImageFont 7 | 8 | _TITLE_LEFT_MARGIN = 10 9 | _TITLE_TOP_MARGIN = 10 10 | STANDARD_COLORS = [ 11 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 12 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 13 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 14 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 15 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 16 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 17 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 18 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 19 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 20 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 21 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 22 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 23 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 24 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 25 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 26 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 27 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 28 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 29 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 30 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 31 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 32 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 33 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 34 | ] 35 | 36 | def visualize_boxes(image, boxes, labels, probs, class_labels): 37 | 38 | category_index = {} 39 | for id_, label_name in enumerate(class_labels): 40 | category_index[id_] = {"name": label_name} 41 | image=visualize_boxes_and_labels_on_image_array(image, boxes, labels, probs, category_index) 42 | return image 43 | 44 | def visualize_boxes_and_labels_on_image_array( 45 | image, 46 | boxes, 47 | classes, 48 | scores, 49 | category_index, 50 | instance_masks=None, 51 | instance_boundaries=None, 52 | use_normalized_coordinates=False, 53 | max_boxes_to_draw=200, 54 | min_score_thresh=.5, 55 | agnostic_mode=False, 56 | line_thickness=4, 57 | groundtruth_box_visualization_color='black', 58 | skip_scores=False, 59 | skip_labels=False): 60 | 61 | box_to_display_str_map = collections.defaultdict(list) 62 | box_to_color_map = collections.defaultdict(str) 63 | box_to_instance_masks_map = {} 64 | box_to_instance_boundaries_map = {} 65 | if not max_boxes_to_draw: 66 | max_boxes_to_draw = boxes.shape[0] 67 | 68 | sorted_ind = np.argsort(-scores) 69 | boxes=boxes[sorted_ind] 70 | scores=scores[sorted_ind] 71 | classes=classes[sorted_ind] 72 | for i in range(min(max_boxes_to_draw, boxes.shape[0])): 73 | if scores is None or scores[i] > min_score_thresh: 74 | box = tuple(boxes[i].tolist()) 75 | if instance_masks is not None: 76 | box_to_instance_masks_map[box] = instance_masks[i] 77 | if instance_boundaries is not None: 78 | box_to_instance_boundaries_map[box] = instance_boundaries[i] 79 | if scores is None: 80 | box_to_color_map[box] = groundtruth_box_visualization_color 81 | else: 82 | display_str = '' 83 | if not skip_labels: 84 | if not agnostic_mode: 85 | if classes[i] in category_index.keys(): 86 | class_name = category_index[classes[i]]['name'] 87 | else: 88 | class_name = 'N/A' 89 | display_str = str(class_name) 90 | if not skip_scores: 91 | if not display_str: 92 | display_str = '{}%'.format(int(100 * scores[i])) 93 | else: 94 | display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) 95 | box_to_display_str_map[box].append(display_str) 96 | if agnostic_mode: 97 | box_to_color_map[box] = 'DarkOrange' 98 | else: 99 | box_to_color_map[box] = STANDARD_COLORS[ 100 | classes[i] % len(STANDARD_COLORS)] 101 | 102 | for box, color in box_to_color_map.items(): 103 | xmin, ymin, xmax, ymax = box 104 | if instance_masks is not None: 105 | draw_mask_on_image_array( 106 | image, 107 | box_to_instance_masks_map[box], 108 | color=color 109 | ) 110 | if instance_boundaries is not None: 111 | draw_mask_on_image_array( 112 | image, 113 | box_to_instance_boundaries_map[box], 114 | color='red', 115 | alpha=1.0 116 | ) 117 | draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color=color, 118 | thickness=line_thickness, display_str_list=box_to_display_str_map[box], 119 | use_normalized_coordinates=use_normalized_coordinates) 120 | return image 121 | 122 | 123 | def draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color='red', 124 | thickness=4, display_str_list=(), use_normalized_coordinates=True): 125 | 126 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 127 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, 128 | thickness, display_str_list, 129 | use_normalized_coordinates) 130 | np.copyto(image, np.array(image_pil)) 131 | 132 | 133 | def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color='red', thickness=4, display_str_list=(), use_normalized_coordinates=True): 134 | draw = ImageDraw.Draw(image) 135 | im_width, im_height = image.size 136 | if use_normalized_coordinates: 137 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width, 138 | ymin * im_height, ymax * im_height) 139 | else: 140 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 141 | draw.line([(left, top), (left, bottom), (right, bottom), 142 | (right, top), (left, top)], width=thickness, fill=color) 143 | ''' 144 | try: 145 | font = ImageFont.truetype('arial.ttf', 24) 146 | except IOError: 147 | font = ImageFont.load_default() 148 | display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] 149 | total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) 150 | 151 | if top > total_display_str_height: 152 | text_bottom = top 153 | else: 154 | text_bottom = bottom + total_display_str_height 155 | 156 | for display_str in display_str_list[::-1]: 157 | text_width, text_height = font.getsize(display_str) 158 | margin = np.ceil(0.05 * text_height) 159 | draw.rectangle( 160 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 161 | text_bottom)], 162 | fill=color) 163 | draw.text( 164 | (left + margin, text_bottom - text_height - margin), 165 | display_str, 166 | fill='black', 167 | font=font) 168 | text_bottom -= text_height - 2 * margin 169 | ''' 170 | 171 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): 172 | 173 | if image.dtype != np.uint8: 174 | raise ValueError('`image` not of type np.uint8') 175 | if mask.dtype != np.uint8: 176 | raise ValueError('`mask` not of type np.uint8') 177 | if np.any(np.logical_and(mask != 1, mask != 0)): 178 | raise ValueError('`mask` elements should be in [0, 1]') 179 | if image.shape[:2] != mask.shape: 180 | raise ValueError('The image has spatial dimensions %s but the mask has ' 181 | 'dimensions %s' % (image.shape[:2], mask.shape)) 182 | rgb = ImageColor.getrgb(color) 183 | pil_image = Image.fromarray(image) 184 | solid_color = np.expand_dims( 185 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) 186 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') 187 | pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L') 188 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) 189 | np.copyto(image, np.array(pil_image.convert('RGB'))) --------------------------------------------------------------------------------