├── .gitattributes
├── LICENSE
├── README.md
├── batch_sampler.py
├── config
├── __pycache__
│ ├── config.cpython-38.pyc
│ └── config.cpython-39.pyc
└── config.py
├── dataloadR
├── batch_sampler.py
└── datasetsv2.py
├── datasetsv2.py
├── demo.py
├── evalR
├── __pycache__
│ ├── eval.cpython-38.pyc
│ ├── evaluator.cpython-38.pyc
│ ├── evaluator1.cpython-38.pyc
│ ├── evaluator2.cpython-38.pyc
│ ├── evaluatorGGHL.cpython-38.pyc
│ ├── evaluatorGGHLv2.cpython-38.pyc
│ ├── evaluatorGGHLv2.cpython-39.pyc
│ ├── evaluatorGGHLv2_mask.cpython-38.pyc
│ ├── evaluatorGGHLv2_mask.cpython-39.pyc
│ ├── evaluatorGGHLv2plot.cpython-39.pyc
│ ├── evaluator_ABGH.cpython-38.pyc
│ ├── evaluator_Center.cpython-38.pyc
│ ├── evaluator_demo.cpython-39.pyc
│ ├── evaluator_new.cpython-38.pyc
│ ├── evaluatorfast.cpython-38.pyc
│ ├── voc_eval.cpython-36.pyc
│ ├── voc_eval.cpython-37.pyc
│ ├── voc_eval.cpython-38.pyc
│ └── voc_eval.cpython-39.pyc
├── eval.py
├── evaluatorGGHL.py
├── evaluatorTS.py
├── evaluatorTSplot.py
├── evaluator_demo.py
└── voc_eval.py
├── lib
└── DCNv2
│ ├── dcn_v2.py
│ ├── dcn_v2_amp.py
│ ├── dcn_v2_onnx.py
│ ├── make.sh
│ ├── setup.py
│ ├── src
│ ├── cpu
│ │ ├── dcn_v2_cpu.cpp
│ │ ├── dcn_v2_im2col_cpu.cpp
│ │ ├── dcn_v2_im2col_cpu.h
│ │ ├── dcn_v2_psroi_pooling_cpu.cpp
│ │ └── vision.h
│ ├── cuda
│ │ ├── dcn_v2_cuda.cu
│ │ ├── dcn_v2_im2col_cuda.cu
│ │ ├── dcn_v2_im2col_cuda.h
│ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ └── vision.h
│ ├── dcn_v2.h
│ └── vision.cpp
│ ├── testcpu.py
│ └── testcuda.py
├── model
├── TSConv.py
├── __pycache__
│ ├── GGHL4.cpython-38.pyc
│ ├── GGHL4.cpython-39.pyc
│ ├── GGHL6.cpython-39.pyc
│ ├── GGHL6single.cpython-39.pyc
│ ├── GGHL8.cpython-39.pyc
│ ├── GGHLv2.cpython-38.pyc
│ ├── GGHLv2.cpython-39.pyc
│ ├── double3090.cpython-38.pyc
│ └── double3090.cpython-39.pyc
├── backbones
│ ├── __pycache__
│ │ ├── darknet53.cpython-38.pyc
│ │ ├── darknet53.cpython-39.pyc
│ │ ├── model_resnet.cpython-39.pyc
│ │ └── resnet.cpython-39.pyc
│ ├── darknet53.py
│ ├── mobilenetv2.py
│ ├── model_resnet.py
│ └── resnet.py
├── head
│ ├── __pycache__
│ │ ├── head10.cpython-39.pyc
│ │ ├── head10single.cpython-39.pyc
│ │ ├── head11.cpython-39.pyc
│ │ ├── head3.cpython-38.pyc
│ │ ├── head4.cpython-38.pyc
│ │ ├── head4.cpython-39.pyc
│ │ ├── head5.cpython-38.pyc
│ │ ├── head5.cpython-39.pyc
│ │ ├── head6.cpython-39.pyc
│ │ ├── head7.cpython-39.pyc
│ │ ├── head9.cpython-39.pyc
│ │ ├── head_GGHLv2_x3.cpython-38.pyc
│ │ ├── head_ori.cpython-38.pyc
│ │ ├── headv2.cpython-38.pyc
│ │ ├── headv21.cpython-38.pyc
│ │ └── headv21.cpython-39.pyc
│ └── head.py
├── layers
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── activations.cpython-38.pyc
│ │ ├── activations.cpython-39.pyc
│ │ ├── conv_blocks.cpython-38.pyc
│ │ ├── conv_blocks.cpython-39.pyc
│ │ ├── convolutions.cpython-38.pyc
│ │ ├── convolutions.cpython-39.pyc
│ │ ├── msr_blocks.cpython-38.pyc
│ │ ├── msr_blocks.cpython-39.pyc
│ │ ├── multiscale_fusion_blocks.cpython-38.pyc
│ │ ├── multiscale_fusion_blocks.cpython-39.pyc
│ │ ├── np_attention_blocks.cpython-38.pyc
│ │ └── np_attention_blocks.cpython-39.pyc
│ ├── activations.py
│ ├── attention_blocks.py
│ ├── conv_blocks.py
│ ├── convolutions.py
│ ├── msr_blocks.py
│ ├── multiscale_fusion_blocks.py
│ └── np_attention_blocks.py
├── loss
│ ├── __pycache__
│ │ ├── loss4.cpython-38.pyc
│ │ ├── loss4.cpython-39.pyc
│ │ ├── loss6.cpython-39.pyc
│ │ ├── lossv2.cpython-38.pyc
│ │ ├── lossv2.cpython-39.pyc
│ │ ├── lossv2single.cpython-39.pyc
│ │ ├── lossv8.cpython-39.pyc
│ │ ├── lossv88.cpython-39.pyc
│ │ ├── lossv9.cpython-39.pyc
│ │ ├── seesaw_loss.cpython-38.pyc
│ │ └── seesaw_loss.cpython-39.pyc
│ └── loss.py
└── neck
│ ├── __pycache__
│ ├── neckv2.cpython-38.pyc
│ ├── neckv2.cpython-39.pyc
│ └── neckv8.cpython-39.pyc
│ └── neck.py
├── predictionR
└── lr.png
├── requirements.txt
├── test.py
├── train_dist.sh
├── trainv2.py
└── utils
├── __pycache__
├── cosine_lr_scheduler.cpython-38.pyc
├── cosine_lr_scheduler.cpython-39.pyc
├── gpu.cpython-38.pyc
├── gpu.cpython-39.pyc
├── log.cpython-38.pyc
├── log.cpython-39.pyc
├── utils_basic.cpython-38.pyc
├── utils_basic.cpython-39.pyc
├── utils_coco.cpython-38.pyc
├── utils_coco.cpython-39.pyc
├── visualize.cpython-38.pyc
└── visualize.cpython-39.pyc
├── cosine_lr_scheduler.py
├── gpu.py
├── log.py
├── mics.py
├── num_of_works_set.py
├── utils_basic.py
├── utils_coco.py
└── visualize.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 下个月会有最后一次更新,以后正式说再见了,github不再更新,有代码问题可以电子邮件联系我
2 |
3 | # 更新啦~~
4 | # TS-Conv: Task-wise Sampling Convolutions for Arbitrary-Oriented Object Detection in Aerial Images
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | ## This is the implementation of TS-Conv 👋👋👋
24 | [[Arxiv](https://arxiv.org/abs/2209.02200)]
25 | #### 👹👹👹 不出意外的话是毕业前最后一个工作了,可能也是学术圈的最后一个工作,有点水大家见谅。
26 | 
27 |
28 | ### Please give a ⭐️ if this project helped you. If you use it, please consider citing:
29 | ```Arxiv
30 | @ARTICLE{9709203,
31 | author={Huang, Zhanchao and Li, Wei and Xia, Xiang-Gen,Hao Wang and Tao, Ran},
32 | journal={arXiv},
33 | title={Task-wise Sampling Convolutions for Arbitrary-Oriented Object Detection in Aerial Images},
34 | year={2022},
35 | volume={},
36 | number={},
37 | pages={1-16},
38 | doi={10.48550/arXiv.2209.02200}}
39 | ```
40 | ### 🤡🤡🤡 Clone不Star,都是耍流氓
41 |
42 | ## 0. Something Important 🦞 🦀 🦑
43 |
44 | * #### 🎃🎃🎃 The usage of the TS-Conv repository is the same as that of the ancestral repository [GGHL](https://github.com/Shank2358/GGHL). If you have any questions, please see the issues there.
45 | #### 用法和祖传的[GGHL](https://github.com/Shank2358/GGHL)仓库一样,有问题可以看那边的issues。MMRotate版本也在写着。TS-Conv还将持续更新一段时间,现在更新完的是主体模型的代码,重点在head,DCN,以及dataload的标签分配那部分,其他和GGHL差不多。可视化和更多其它部分的功能和实验我也在抓紧更新中。
46 |
47 | * #### 💖💖💖 Thanks to [Crescent-Ao](https://github. com/Crescent-Ao) and [haohaoolalahao](https://github.com/haohaoolalahao) for contributions to the GGHL repository, thanks to [Crescent-Ao](https://github.com/Crescent-Ao) for the GGHL deployment Version. Relevant warehouses will continue to be updated, so stay tuned.
48 | #### 打个广告,GGHL部署版本[GGHL-Deployment](https://github.com/Crescent-Ao/GGHL-Deployment)已经上线,欢迎大家使用~~ 感谢我最亲爱的师弟[Crescent-Ao](https://github.com/Crescent-Ao)和[haohaolalahao](https://github.com/haohaolalahao)对GGHL仓库的贡献,感谢[Crescent-Ao](https://github.com/Crescent-Ao)完成的GGHL部署版本。相关仓库还会持续更新中,敬请期待。
49 |
50 | * #### 😺😺😺 Welcome everyone to pay attention to the MGAR completed by [haohaoolalahao](https://github.com/haohaoolalahao) in cooperation with me, which has been accepted by [IEEE TGRS](https://ieeexplore.ieee.org/document/9912396).
51 | #### 再打个广告,欢迎大家关注[haohaolalahao](https://github.com/haohaolalahao)与我合作完成的遥感图像目标检测工作 MGAR: Multi-Grained Angle Representation for Remote Sensing Object Detection,论文已经正式接收[IEEE TGRS](https://ieeexplore.ieee.org/document/9912396) [Arxiv](https://arxiv.org/abs/2209.02884), 感谢大家引用:
52 | ```IEEE TGRS
53 | @ARTICLE{9912396,
54 | author={Wang, Hao and Huang, Zhanchao and Chen, Zhengchao and Song, Ying and Li, Wei},
55 | journal={IEEE Transactions on Geoscience and Remote Sensing},
56 | title={Multi-Grained Angle Representation for Remote Sensing Object Detection},
57 | year={2022},
58 | volume={},
59 | number={},
60 | pages={1-1},
61 | doi={10.1109/TGRS.2022.3212592}}
62 | ```
63 |
64 | ## 🌈 1.Environments
65 | Linux (Ubuntu 18.04, GCC>=5.4) & Windows (Win10)
66 | CUDA > 11.1, Cudnn > 8.0.4
67 |
68 | First, install CUDA, Cudnn, and Pytorch.
69 | Second, install the dependent libraries in [requirements.txt](https://github.com/Shank2358/GGHL/blob/main/requirements.txt).
70 |
71 | ```python
72 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
73 | pip install -r requirements.txt
74 | ```
75 |
76 | ## 🌟 2.Installation
77 | 1. git clone this repository
78 |
79 | 2. Polygen NMS
80 | The poly_nms in this version is implemented using shapely and numpy libraries to ensure that it can work in different systems and environments without other dependencies. But doing so will slow down the detection speed in dense object scenes. If you want faster speed, you can compile and use the poly_iou library (C++ implementation version) in datasets_tools/DOTA_devkit. The compilation method is described in detail in [DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) .
81 |
82 | ```bash
83 | cd datasets_tools/DOTA_devkit
84 | sudo apt-get install swig
85 | swig -c++ -python polyiou.i
86 | python setup.py build_ext --inplace
87 | ```
88 |
89 | ## 🎃 3.Datasets
90 |
91 | 1. [DOTA dataset](https://captain-whu.github.io/DOTA/dataset.html) and its [devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit)
92 |
93 | #### (1) Training Format
94 | You need to write a script to convert them into the train.txt file required by this repository and put them in the ./dataR folder.
95 | For the specific format of the train.txt file, see the example in the /dataR folder.
96 |
97 | ```txt
98 | image_path xmin,ymin,xmax,ymax,class_id,x1,y1,x2,y2,x3,y3,x4,y4,area_ratio,angle[0,180) xmin,ymin,xmax,ymax,class_id,x1,y1,x2,y2,x3,y3,x4,y4,area_ratio,angle[0,180)...
99 | ```
100 | The calculation method of angle is explained in [Issues #1](https://github.com/Shank2358/GGHL/issues/1) and our paper.
101 |
102 | #### (2) Validation & Testing Format
103 | The same as the Pascal VOC Format
104 |
105 | #### (3) DataSets Files Structure
106 | ```
107 | cfg.DATA_PATH = "/opt/datasets/DOTA/"
108 | ├── ...
109 | ├── JPEGImages
110 | | ├── 000001.png
111 | | ├── 000002.png
112 | | └── ...
113 | ├── Annotations (DOTA Dataset Format)
114 | | ├── 000001.txt (class_idx x1 y1 x2 y2 x3 y3 x4 y4)
115 | | ├── 000002.txt
116 | | └── ...
117 | ├── ImageSets
118 | ├── test.txt (testing filename)
119 | ├── 000001
120 | ├── 000002
121 | └── ...
122 | ```
123 | There is a DOTA2Train.py file in the datasets_tools folder that can be used to generate training and test format labels.
124 | First, you need to use [DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) , the official tools of the DOTA dataset, for image and label splitting. Then, run DOTA2Train.py to convert them to the format required by GGHL. For the use of DOTA_devkit, please refer to the tutorial in the official repository.
125 |
126 | ## 🌠🌠🌠 4.Usage Example
127 | #### (1) Training
128 | ```bash
129 | sh train_GGHL_dist.sh
130 | ```
131 |
132 | #### (2) Testing
133 | ```python
134 | python test.py
135 | ```
136 | ## 📝 License
137 | Copyright © 2021 [Shank2358](https://github.com/Shank2358).
138 | This project is [GNU General Public License v3.0](https://github.com/Shank2358/GGHL/blob/main/LICENSE) licensed.
139 |
140 | ## 🤐 To be continued
141 |
--------------------------------------------------------------------------------
/batch_sampler.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Sampler, RandomSampler, SequentialSampler
2 | import numpy as np
3 | import config.config as cfg
4 |
5 |
6 | class BatchSampler(object):
7 | def __init__(
8 | self, sampler, batch_size, drop_last, multiscale_step=None, img_sizes=None
9 | ):
10 | if not isinstance(sampler, Sampler):
11 | raise ValueError(
12 | "sampler should be an instance of "
13 | "torch.utils.data.Sampler, but got sampler={}".format(sampler)
14 | )
15 | if not isinstance(drop_last, bool):
16 | raise ValueError(
17 | "drop_last should be a boolean value, but got "
18 | "drop_last={}".format(drop_last)
19 | )
20 | self.sampler = sampler
21 | self.batch_size = batch_size
22 | self.drop_last = drop_last
23 | if multiscale_step is not None and multiscale_step < 1:
24 | raise ValueError(
25 | "multiscale_step should be > 0, but got "
26 | "multiscale_step={}".format(multiscale_step)
27 | )
28 | if multiscale_step is not None and img_sizes is None:
29 | raise ValueError(
30 | "img_sizes must a list, but got img_sizes={} ".format(img_sizes)
31 | )
32 |
33 | self.multiscale_step = multiscale_step
34 | self.img_sizes = img_sizes
35 |
36 | def __iter__(self):
37 |
38 | num_batch = 0
39 | batch = []
40 | size = cfg.TRAIN["TRAIN_IMG_SIZE"]
41 |
42 | for idx in iter(self.sampler):
43 | batch.append([idx, size])
44 | if len(batch) == self.batch_size:
45 | yield batch
46 | num_batch += 1
47 | batch = []
48 | if self.multiscale_step and num_batch % self.multiscale_step == 0:
49 | size = np.random.choice(self.img_sizes)
50 | if len(batch) > 0 and not self.drop_last:
51 | yield batch
52 |
53 | def __len__(self):
54 | if self.drop_last:
55 | return len(self.sampler) // self.batch_size
56 | else:
57 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size
58 |
--------------------------------------------------------------------------------
/config/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/config/__pycache__/config.cpython-38.pyc
--------------------------------------------------------------------------------
/config/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/config/__pycache__/config.cpython-39.pyc
--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | DATA_PATH = "/mnt/datasets/DOTAx/test/"
3 | PROJECT_PATH = "./"
4 |
5 | DATA = {"CLASSES": ['plane',
6 | 'baseball-diamond',
7 | 'bridge',
8 | 'ground-track-field',
9 | 'small-vehicle',
10 | 'large-vehicle',
11 | 'ship',
12 | 'tennis-court',
13 | 'basketball-court',
14 | 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'],
15 | "NUM": 15}
16 | #,'container-crane', 'airport', 'helipad','container-crane', 'airport', 'helipad'
17 |
18 | DATASET_NAME = "train_DOTAx" #"trainSKU110KR11"#'ssdd'#"train_HRSC2016"#"trainSKU110KR11"#"train_DOTAxv1.5"
19 | MODEL = {"STRIDES":[8, 16, 32], "SCALES_PER_LAYER": 3}
20 |
21 | TRAIN = {
22 | "Transformer_SIZE": 896,
23 | "EVAL_TYPE": 'VOC',
24 | "TRAIN_IMG_SIZE": 960,
25 | "TRAIN_IMG_NUM": 79780,
26 | "AUGMENT": True,
27 | "MULTI_SCALE_TRAIN": True,
28 | "MULTI_TRAIN_RANGE": [23, 28, 1],#[26, 31, 1]
29 | "BATCH_SIZE": 24,
30 | "IOU_THRESHOLD_LOSS": 0.6,
31 | "EPOCHS": 36,
32 | "NUMBER_WORKERS": 8,
33 | "MOMENTUM": 0.9,
34 | "WEIGHT_DECAY": 0.0005,
35 | "LR_INIT": 5e-4,
36 | "LR_END": 1e-6,
37 | "WARMUP_EPOCHS": 5,
38 | "IOU_TYPE": 'GIOU'
39 | }
40 |
41 | TEST = {
42 | "EVAL_TYPE": 'VOC',
43 | "EVAL_JSON": 'test.json',
44 | "EVAL_NAME": 'test',
45 | "NUM_VIS_IMG": 0,
46 | "TEST_IMG_SIZE": 800,
47 | "BATCH_SIZE": 4,
48 | "NUMBER_WORKERS": 16,
49 | "CONF_THRESH": 0.06,
50 | "NMS_THRESH": 0.4,
51 | "IOU_THRESHOLD": 0.5,
52 | "NMS_METHODS": 'NMS',
53 | "MULTI_SCALE_TEST": False,
54 | "MULTI_TEST_RANGE": [832, 992, 32],
55 | "FLIP_TEST": False
56 | }
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/dataloadR/batch_sampler.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Sampler, RandomSampler, SequentialSampler
2 | import numpy as np
3 | import config.config as cfg
4 |
5 |
6 | class BatchSampler(object):
7 | def __init__(
8 | self, sampler, batch_size, drop_last, multiscale_step=None, img_sizes=None
9 | ):
10 | if not isinstance(sampler, Sampler):
11 | raise ValueError(
12 | "sampler should be an instance of "
13 | "torch.utils.data.Sampler, but got sampler={}".format(sampler)
14 | )
15 | if not isinstance(drop_last, bool):
16 | raise ValueError(
17 | "drop_last should be a boolean value, but got "
18 | "drop_last={}".format(drop_last)
19 | )
20 | self.sampler = sampler
21 | self.batch_size = batch_size
22 | self.drop_last = drop_last
23 | if multiscale_step is not None and multiscale_step < 1:
24 | raise ValueError(
25 | "multiscale_step should be > 0, but got "
26 | "multiscale_step={}".format(multiscale_step)
27 | )
28 | if multiscale_step is not None and img_sizes is None:
29 | raise ValueError(
30 | "img_sizes must a list, but got img_sizes={} ".format(img_sizes)
31 | )
32 |
33 | self.multiscale_step = multiscale_step
34 | self.img_sizes = img_sizes
35 |
36 | def __iter__(self):
37 |
38 | num_batch = 0
39 | batch = []
40 | size = cfg.TRAIN["TRAIN_IMG_SIZE"]
41 |
42 | for idx in iter(self.sampler):
43 | batch.append([idx, size])
44 | if len(batch) == self.batch_size:
45 | yield batch
46 | num_batch += 1
47 | batch = []
48 | if self.multiscale_step and num_batch % self.multiscale_step == 0:
49 | size = np.random.choice(self.img_sizes)
50 | if len(batch) > 0 and not self.drop_last:
51 | yield batch
52 |
53 | def __len__(self):
54 | if self.drop_last:
55 | return len(self.sampler) // self.batch_size
56 | else:
57 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size
58 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import utils.gpu as gpu
2 | from model.TSConv import GGHL
3 | from tensorboardX import SummaryWriter
4 | from evalR.evaluator_demo import Evaluator
5 | import argparse
6 | import os
7 | import config.config as cfg
8 | import time
9 | import logging
10 | from utils.utils_coco import *
11 | from utils.log import Logger
12 | from torch.cuda import amp
13 | from copy import deepcopy
14 |
15 | class Tester(object):
16 | def __init__(self, weight_path=None, gpu_id=0, visiual=None, eval=False):
17 | self.img_size = cfg.TEST["TEST_IMG_SIZE"]
18 | self.__num_class = cfg.DATA["NUM"]
19 | self.__conf_threshold = cfg.TEST["CONF_THRESH"]
20 | self.__nms_threshold = cfg.TEST["NMS_THRESH"]
21 | self.__device = gpu.select_device(gpu_id, force_cpu=False)
22 | self.__multi_scale_test = cfg.TEST["MULTI_SCALE_TEST"]
23 | self.__flip_test = cfg.TEST["FLIP_TEST"]
24 | self.__classes = cfg.DATA["CLASSES"]
25 |
26 | self.__visiual = visiual
27 | self.__eval = eval
28 | self.__model = GGHL().eval().to(self.__device) # Single GPU
29 |
30 | self.__load_model_weights(weight_path)
31 |
32 | def __load_model_weights(self, weight_path):
33 | print("loading weight file from : {}".format(weight_path))
34 | weight = os.path.join(weight_path)
35 | chkpt = torch.load(weight, map_location=self.__device)
36 | self.__model.load_state_dict(chkpt) #['model']
37 | del chkpt
38 |
39 | def test(self):
40 | img_id = '00001' #要测试的图像id
41 | with torch.no_grad():
42 | Evaluator(self.__model).Test_single_img(img_id)
43 |
44 |
45 |
46 | if __name__ == "__main__":
47 | global logger
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument('--weight_path', type=str, default='/home/hzc/v2/weight/5.15/best.pt', help='weight file path')
50 | parser.add_argument('--log_val_path', type=str, default='log/', help='weight file path')
51 | parser.add_argument('--visiual', type=str, default=None, help='test data path or None')
52 | parser.add_argument('--eval', action='store_true', default=True, help='eval flag')
53 | parser.add_argument('--gpu_id', type=int, default=0, help='gpu id')
54 | parser.add_argument('--log_path', type=str, default='log/', help='log path')
55 | opt = parser.parse_args()
56 | writer = SummaryWriter(logdir=opt.log_path + '/event')
57 | logger = Logger(log_file_name=opt.log_val_path + '/log_coco_test.txt', log_level=logging.DEBUG,
58 | logger_name='GGHL').get_log()
59 |
60 | Tester(weight_path=opt.weight_path, gpu_id=opt.gpu_id, eval=opt.eval, visiual=opt.visiual).test()
--------------------------------------------------------------------------------
/evalR/__pycache__/eval.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/eval.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator1.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator2.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHL.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHL.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHLv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHLv2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2.cpython-39.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2_mask.cpython-39.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorGGHLv2plot.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorGGHLv2plot.cpython-39.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator_ABGH.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_ABGH.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator_Center.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_Center.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator_demo.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_demo.cpython-39.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluator_new.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluator_new.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/evaluatorfast.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/evaluatorfast.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/voc_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-36.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/voc_eval.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-37.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/voc_eval.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-38.pyc
--------------------------------------------------------------------------------
/evalR/__pycache__/voc_eval.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/evalR/__pycache__/voc_eval.cpython-39.pyc
--------------------------------------------------------------------------------
/evalR/evaluator_demo.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import time
3 | from tqdm import tqdm
4 | import torch.nn.functional as F
5 | from dataloadR.augmentations import *
6 | from evalR import voc_eval
7 | from utils.utils_basic import *
8 | from utils.visualize import *
9 | import time
10 | import multiprocessing
11 | from multiprocessing.dummy import Pool as ThreadPool # 线程池
12 | from collections import defaultdict
13 |
14 | current_milli_time = lambda: int(round(time.time() * 1000))
15 |
16 |
17 | class Evaluator(object):
18 | def __init__(self, model, visiual=True):
19 | self.classes = cfg.DATA["CLASSES"]
20 | self.classes_num = cfg.DATA["NUM"]
21 | self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'predictionR') # 预测结果的保存路径
22 | self.val_data_path = cfg.DATA_PATH
23 | self.strides = cfg.MODEL["STRIDES"]
24 | self.conf_thresh = cfg.TEST["CONF_THRESH"]
25 | self.nms_thresh = cfg.TEST["NMS_THRESH"]
26 | self.val_shape = cfg.TEST["TEST_IMG_SIZE"]
27 | self.__visiual = visiual
28 | self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"]
29 | self.model = model
30 | self.device = next(model.parameters()).device
31 | self.inference_time = 0.
32 | self.iouthresh_test = cfg.TEST["IOU_THRESHOLD"]
33 | self.multi_test = cfg.TEST["MULTI_SCALE_TEST"]
34 | self.flip_test = cfg.TEST["FLIP_TEST"]
35 |
36 | def Test_single_img(self, img_id):
37 | img_path = os.path.join(self.val_data_path, 'JPEGImages', img_id + '.png') ###测试图像路径
38 | img = cv2.imread(img_path)
39 | bboxes_prd = self.get_bbox(img, self.multi_test, self.flip_test)
40 | for bbox in bboxes_prd:
41 | x1 = bbox[0]
42 | y1 = bbox[1]
43 | x2 = bbox[2]
44 | y2 = bbox[3]
45 | x3 = bbox[4]
46 | y3 = bbox[5]
47 | x4 = bbox[6]
48 | y4 = bbox[7]
49 | score = bbox[8]
50 | class_ind = int(bbox[9])
51 | class_name = self.classes[class_ind]
52 | score = '%.4f' % score
53 | ''''''
54 | color = np.zeros(3)
55 | points = np.array(
56 | [[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]])
57 | if int(class_ind) == 0: color = (64, 0, 0)
58 | elif int(class_ind) == 1: color = (255, 0, 0)
59 | elif int(class_ind) == 2: color = (0, 255, 255)
60 | elif int(class_ind) == 3: color = (0, 0, 255)
61 | elif int(class_ind) == 4: color = (0, 255, 0)
62 | elif int(class_ind) == 5: color = (255, 0, 0)
63 | elif int(class_ind) == 6: color = (0, 128, 255)
64 | elif int(class_ind) == 7: color = (0, 0, 128)
65 | elif int(class_ind) == 8: color = (0, 128, 0)
66 | elif int(class_ind) == 9: color = (128, 0, 0)
67 | elif int(class_ind) == 10: color = (128, 128, 0)
68 | elif int(class_ind) == 11: color = (0, 128, 128)
69 | elif int(class_ind) == 12: color = (128, 128, 0)
70 | elif int(class_ind) == 13: color = (0, 255, 128)
71 | elif int(class_ind) == 14: color = (255, 128, 255)
72 | cv2.polylines(img, [points], 1, color, 2)
73 | font = cv2.FONT_HERSHEY_SIMPLEX
74 | img = cv2.putText(img, class_name + ' ' + score[:4], (int(float(x1)), int(float(y1))), font, 0.3, (255, 255, 255), 1)
75 | store_path = os.path.join(self.pred_result_path, 'imgs', img_id + '.png') #保存结果路径
76 | cv2.imwrite(store_path, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
77 |
78 | def get_bbox(self, img, multi_test=False, flip_test=False):
79 | if multi_test:
80 | test_input_sizes = range(cfg.TEST["MULTI_TEST_RANGE"][0], cfg.TEST["MULTI_TEST_RANGE"][1],
81 | cfg.TEST["MULTI_TEST_RANGE"][2])
82 | bboxes_list = []
83 | for test_input_size in test_input_sizes:
84 | valid_scale = (0, np.inf)
85 | bboxes_list.append(self.__predict(img, test_input_size, valid_scale))
86 | if flip_test:
87 | bboxes_flip = self.__predict(img[:, ::-1], test_input_size, valid_scale)
88 | bboxes_flip[:, [0, 2]] = img.shape[1] - bboxes_flip[:, [2, 0]]
89 | bboxes_list.append(bboxes_flip)
90 | bboxes = np.row_stack(bboxes_list)
91 | else:
92 | bboxes = self.__predict(img, self.val_shape, (0, np.inf))
93 | bboxes = self.non_max_suppression_4points(bboxes, self.conf_thresh, self.nms_thresh, multi_label=False)
94 | return bboxes[0].cpu().numpy()
95 |
96 | def __predict(self, img, test_shape, valid_scale):
97 | org_img = np.copy(img)
98 | org_h, org_w, _ = org_img.shape
99 | img = self.__get_img_tensor(img, test_shape).to(self.device)
100 | self.model.eval()
101 | with torch.no_grad():
102 | start_time = current_milli_time()
103 | _, _, _, p_d = self.model(img)
104 | self.inference_time += (current_milli_time() - start_time)
105 |
106 | pred_bbox = p_d.squeeze()
107 | bboxes = self.__convert_pred(pred_bbox, test_shape, (org_h, org_w), valid_scale)
108 | return bboxes
109 |
110 | def __get_img_tensor(self, img, test_shape):
111 | img = Resize((test_shape, test_shape), correct_box=False)(img, None).transpose(2, 0, 1)
112 | return torch.from_numpy(img[np.newaxis, ...]).float()
113 |
114 | def __convert_pred(self, pred_bbox, test_input_size, org_img_shape, valid_scale):
115 | pred_xyxy = xywh2xyxy(pred_bbox[:, :4]) # xywh2xyxy
116 | pred_conf = pred_bbox[:, 13]
117 | pred_prob = pred_bbox[:, 14:]
118 | org_h, org_w = org_img_shape
119 | resize_ratio = min(1.0 * test_input_size / org_w, 1.0 * test_input_size / org_h)
120 | dw = (test_input_size - resize_ratio * org_w) / 2
121 | dh = (test_input_size - resize_ratio * org_h) / 2
122 | pred_xyxy[:, 0::2] = 1.0 * (pred_xyxy[:, 0::2] - dw) / resize_ratio
123 | pred_xyxy[:, 1::2] = 1.0 * (pred_xyxy[:, 1::2] - dh) / resize_ratio
124 | pred_s = pred_bbox[:, 4:8]
125 | pred_r = pred_bbox[:, 8:9]
126 | zero = torch.zeros_like(pred_s)
127 | pred_s= torch.where(pred_r > 0.9, zero, pred_s)
128 | # (2)将预测的bbox中超出原图的部分裁掉
129 | device = pred_bbox.device
130 | pred_xyxy = torch.cat(
131 | [torch.maximum(pred_xyxy[:, :2], torch.tensor([0, 0]).to(device)),
132 | torch.minimum(pred_xyxy[:, 2:], torch.tensor([org_w - 1, org_h - 1]).to(device))], dim=-1)
133 |
134 | invalid_mask = torch.logical_or((pred_xyxy[:, 0] > pred_xyxy[:, 2]), (pred_xyxy[:, 1] > pred_xyxy[:, 3]))
135 | pred_xyxy[invalid_mask] = 0
136 | pred_s[invalid_mask] = 0
137 | # (4)去掉不在有效范围内的bbox
138 | bboxes_scale = torch.sqrt((pred_xyxy[..., 2:3] - pred_xyxy[..., 0:1]) * (pred_xyxy[..., 3:4] - pred_xyxy[..., 1:2]))
139 | scale_mask = torch.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])).squeeze(-1)
140 | # (5)将score低于score_threshold的bbox去掉
141 | classes = torch.argmax(pred_prob, dim=-1)
142 | scores = pred_conf * pred_prob[torch.arange(len(pred_xyxy)), classes]
143 | score_mask = scores > self.conf_thresh
144 | mask = torch.logical_and(scale_mask, score_mask)
145 | pred_xyxy = pred_xyxy[mask]
146 | pred_s = pred_s[mask]
147 | pred_conf = pred_conf[mask]
148 | #classes = classes[mask]
149 | pred_prob = pred_prob[mask]
150 | x1 = pred_s[:, 0:1] * (pred_xyxy[:, 2:3] - pred_xyxy[:, 0:1]) + pred_xyxy[:, 0:1]
151 | y1 = pred_xyxy[:, 1:2]
152 | x2 = pred_xyxy[:, 2:3]
153 | y2 = pred_s[:, 1:2] * (pred_xyxy[:, 3:4] - pred_xyxy[:, 1:2]) + pred_xyxy[:, 1:2]
154 | x3 = pred_xyxy[:, 2:3] - pred_s[:, 2:3] * (pred_xyxy[:, 2:3] - pred_xyxy[:, 0:1])
155 | y3 = pred_xyxy[:, 3:4]
156 | x4 = pred_xyxy[:, 0:1]
157 | y4 = pred_xyxy[:, 3:4] - pred_s[:, 3:4] * (pred_xyxy[:, 3:4] - pred_xyxy[:, 1:2])
158 | coor4points = torch.cat([x1, y1, x2, y2, x3, y3, x4, y4], dim=-1)
159 |
160 | bboxes = torch.cat([coor4points, pred_conf.unsqueeze(-1), pred_prob], dim=-1)
161 | bs = cfg.TEST["NUMBER_WORKERS"]
162 | bboxes = bboxes.view(bs, -1, bboxes.shape[1])
163 | return bboxes
164 |
165 | def non_max_suppression_4points(self,
166 | prediction, conf_thres=0.2, iou_thres=0.45, merge=False, classes=None, multi_label=False, agnostic=False,
167 | without_iouthres=False
168 | ):
169 | nc = prediction[0].shape[1] - 9
170 | xc = prediction[..., 8] > conf_thres # candidates
171 |
172 | # Settings
173 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
174 | max_det = 500 # maximum number of detections per image
175 | time_limit = 10.0 # seconds to quit after
176 | redundant = True # require redundant detections
177 | multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
178 |
179 | t = time.time()
180 | # output: (batch_size, ?)
181 | output = [torch.zeros((0, 10), device=prediction.device)] * prediction.shape[0]
182 | for xi, x in enumerate(prediction): # image index, image inference
183 | # Apply constraints
184 | x = x[xc[xi]] # x -> (num_confthres_boxes, no)
185 | # If none remain process next image
186 | if not x.shape[0]:
187 | continue
188 | # Compute conf
189 | x[:, 9:] *= x[:, 8:9] # conf = obj_conf * cls_conf
190 | box = x[:, :8]
191 | if multi_label:
192 | i, j = (x[:, 9:] > conf_thres).nonzero(as_tuple=False).T
193 | # 按列拼接 list x:(num_confthres_boxes, [xywhθ]+[conf]+[classid]) θ∈[0,179]
194 | x = torch.cat((box[i], x[i, j + 9, None], j[:, None].float()), 1)
195 | else: # best class only
196 | conf, j = x[:, 9:].max(1, keepdim=True)
197 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
198 |
199 | if without_iouthres: # 不做nms_iou
200 | output[xi] = x
201 | continue
202 | # Filter by class 按类别筛选
203 | if classes:
204 | x = x[(x[:, 9:] == torch.tensor(classes, device=x.device)).any(1)] # any(1)函数表示每行满足条件的返回布尔值
205 | # If none remain process next image
206 | n = x.shape[0] # number of boxes
207 | if not n:
208 | continue
209 | # Sort by confidence
210 | c = x[:, 9:] * (0 if agnostic else max_wh) # classes
211 | boxes_4points, scores = x[:, :8] + c, x[:, 8]
212 | i = np.array(py_cpu_nms_poly_fast(np.double(boxes_4points.cpu().numpy()), scores.cpu().numpy(), iou_thres))
213 | if i.shape[0] > max_det: # limit detections
214 | i = i[:max_det]
215 | temp = x[i].clone()
216 | # output[xi] = x[i] # 根据nms索引提取x中的框 x.size=(num_conf_nms, [xywhθ,conf,classid]) θ∈[0,179]
217 | output[xi] = temp
218 | if (time.time() - t) > time_limit:
219 | break # time limit exceeded
220 | return output
--------------------------------------------------------------------------------
/evalR/voc_eval.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | import os
3 | import pickle
4 | import numpy as np
5 | from utils.utils_basic import *
6 |
7 |
8 | def parse_rec(filename):
9 | """ Parse a PASCAL VOC xml file """
10 | tree = ET.parse(filename)
11 | objects = []
12 | for obj in tree.findall('object'):
13 | obj_struct = {}
14 | obj_struct['name'] = obj.find('name').text
15 | obj_struct['pose'] = obj.find('pose').text
16 | obj_struct['truncated'] = int(obj.find('truncated').text)
17 | obj_struct['difficult'] = int(obj.find('difficult').text)
18 | bbox = obj.find('bndbox')
19 | obj_struct['bbox'] = [int(bbox.find('xmin').text),
20 | int(bbox.find('ymin').text),
21 | int(bbox.find('xmax').text),
22 | int(bbox.find('ymax').text)]
23 | objects.append(obj_struct)
24 |
25 | return objects
26 |
27 |
28 | def parse_poly(filename):
29 | """
30 | :param filename: ground truth file to parse
31 | :return: all instances in a picture
32 | """
33 | objects = []
34 | with open(filename, 'r') as f:
35 | while True:
36 | line = f.readline()
37 | if line:
38 | splitlines = line.strip().split(' ')
39 | object_struct = {}
40 | if (len(splitlines) < 9):
41 | continue
42 | classes = cfg.DATA["CLASSES"]
43 | object_struct['name'] = classes[int(splitlines[0])]
44 | # object_struct['name'] = splitlines[0]
45 | if (len(splitlines) == 9):
46 | object_struct['difficult'] = 0
47 | elif (len(splitlines) == 10):
48 | object_struct['difficult'] = int(splitlines[9])
49 | object_struct['bbox'] = [float(splitlines[1]),
50 | float(splitlines[2]),
51 | float(splitlines[3]),
52 | float(splitlines[4]),
53 | float(splitlines[5]),
54 | float(splitlines[6]),
55 | float(splitlines[7]),
56 | float(splitlines[8])]
57 | objects.append(object_struct)
58 | else:
59 | break
60 | return objects
61 |
62 |
63 | def voc_ap(rec, prec, use_07_metric=False):
64 | """ ap = voc_ap(rec, prec, [use_07_metric])
65 | Compute VOC AP given precision and recall.
66 | If use_07_metric is true, uses the
67 | VOC 07 11 point method (default:False).
68 | """
69 | if use_07_metric:
70 | # 11 point metric
71 | ap = 0.
72 | for t in np.arange(0., 1.1, 0.1):
73 | if np.sum(rec >= t) == 0:
74 | p = 0
75 | else:
76 | p = np.max(prec[rec >= t])
77 | ap = ap + p / 11.
78 | else:
79 | # correct AP calculation
80 | # first append sentinel values at the end
81 | mrec = np.concatenate(([0.], rec, [1.]))
82 | mpre = np.concatenate(([0.], prec, [0.]))
83 |
84 | # compute the precision envelope
85 | for i in range(mpre.size - 1, 0, -1):
86 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
87 |
88 | # to calculate area under PR curve, look for points
89 | # where X axis (recall) changes value
90 | i = np.where(mrec[1:] != mrec[:-1])[0]
91 |
92 | # and sum (\Delta recall) * prec
93 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
94 | return ap
95 |
96 |
97 | ''''''
98 |
99 |
100 | def voc_eval(detpath,
101 | annopath,
102 | imagesetfile,
103 | classname,
104 | cachedir,
105 | ovthresh=0.5,
106 | use_07_metric=False):
107 | # first load gt
108 | if not os.path.isdir(cachedir):
109 | os.mkdir(cachedir)
110 | cachefile = os.path.join(cachedir, 'annots.pkl')
111 | # read list of images
112 | with open(imagesetfile, 'r') as f:
113 | lines = f.readlines()
114 | imagenames = [x.strip() for x in lines]
115 |
116 | if not os.path.isfile(cachefile):
117 | # load annots
118 | recs = {}
119 | for i, imagename in enumerate(imagenames):
120 | ####################parse_poly
121 | recs[imagename] = parse_poly(annopath.format(imagename))
122 | if i % 100 == 0:
123 | print('Reading annotation for {:d}/{:d}'.format(
124 | i + 1, len(imagenames)))
125 | # save
126 | print('Saving cached annotations to {:s}'.format(cachefile))
127 | with open(cachefile, 'wb') as f:
128 | pickle.dump(recs, f)
129 | else:
130 | # load
131 | with open(cachefile, 'rb') as f:
132 | recs = pickle.load(f)
133 |
134 | # extract gt objects for this class
135 | class_recs = {}
136 | npos = 0
137 | for imagename in imagenames:
138 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
139 | bbox = np.array([x['bbox'] for x in R])
140 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
141 | det = [False] * len(R)
142 | npos = npos + sum(~difficult)
143 | class_recs[imagename] = {'bbox': bbox,
144 | 'difficult': difficult,
145 | 'det': det}
146 |
147 | # read dets#######################
148 | detfile = detpath.format(classname)
149 | with open(detfile, 'r') as f:
150 | lines = f.readlines()
151 |
152 | splitlines = [x.strip().split(' ') for x in lines]
153 | image_ids = [x[0] for x in splitlines]
154 | confidence = np.array([float(x[1]) for x in splitlines])
155 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
156 |
157 | # sort by confidence
158 | sorted_ind = np.argsort(-confidence)
159 | sorted_scores = np.sort(-confidence)
160 | BB = BB[sorted_ind, :]
161 | image_ids = [image_ids[x] for x in sorted_ind]
162 |
163 | # go down dets and mark TPs and FPs
164 | nd = len(image_ids)
165 | tp = np.zeros(nd)
166 | fp = np.zeros(nd)
167 | for d in range(nd):
168 | R = class_recs[image_ids[d]]
169 | bb = BB[d, :].astype(float)
170 | ovmax = -np.inf
171 | BBGT = R['bbox'].astype(float)
172 |
173 | if BBGT.size > 0:
174 | # compute overlaps
175 | # intersection
176 | BBGT_xmin = np.min(BBGT[:, 0::2], axis=1)
177 | BBGT_ymin = np.min(BBGT[:, 1::2], axis=1)
178 | BBGT_xmax = np.max(BBGT[:, 0::2], axis=1)
179 | BBGT_ymax = np.max(BBGT[:, 1::2], axis=1)
180 | bb_xmin = np.min(bb[0::2])
181 | bb_ymin = np.min(bb[1::2])
182 | bb_xmax = np.max(bb[0::2])
183 | bb_ymax = np.max(bb[1::2])
184 |
185 | ixmin = np.maximum(BBGT_xmin, bb_xmin)
186 | iymin = np.maximum(BBGT_ymin, bb_ymin)
187 | ixmax = np.minimum(BBGT_xmax, bb_xmax)
188 | iymax = np.minimum(BBGT_ymax, bb_ymax)
189 | iw = np.maximum(ixmax - ixmin + 1., 0.)
190 | ih = np.maximum(iymax - iymin + 1., 0.)
191 | inters = iw * ih
192 |
193 | # union
194 | uni = ((bb_xmax - bb_xmin + 1.) * (bb_ymax - bb_ymin + 1.) +
195 | (BBGT_xmax - BBGT_xmin + 1.) *
196 | (BBGT_ymax - BBGT_ymin + 1.) - inters)
197 |
198 | overlaps = inters / uni
199 |
200 | ###############################
201 |
202 | BBGT_keep_mask = overlaps > 0
203 | BBGT_keep = BBGT[BBGT_keep_mask, :]
204 | BBGT_keep_index = np.where(overlaps > 0)[0]
205 |
206 | # pdb.set_trace()
207 | def calcoverlaps(BBGT_keep, bb):
208 | overlaps = []
209 | for index, GT in enumerate(BBGT_keep):
210 | overlap = polygen_iou_xy4_numpy_eval(BBGT_keep[index], bb)
211 | # overlap = polyiou.iou_poly(polyiou.VectorDouble(BBGT_keep[index]), polyiou.VectorDouble(bb))
212 | overlaps.append(overlap)
213 | return overlaps
214 | #############################
215 |
216 | if len(BBGT_keep) > 0:
217 | overlaps = calcoverlaps(BBGT_keep, bb)
218 |
219 | ovmax = np.max(overlaps)
220 | jmax = np.argmax(overlaps)
221 | # pdb.set_trace()
222 | jmax = BBGT_keep_index[jmax]
223 |
224 | if ovmax > ovthresh:
225 | # print(ovmax)
226 | if not R['difficult'][jmax]:
227 | if not R['det'][jmax]:
228 | tp[d] = 1.
229 | R['det'][jmax] = 1
230 | else:
231 | fp[d] = 1.
232 | else:
233 | fp[d] = 1.
234 |
235 | # compute precision recall
236 | # print(tp)
237 | fp = np.cumsum(fp)
238 | tp = np.cumsum(tp)
239 | rec = tp / float(npos)
240 | # avoid divide by zero in case the first detection matches a difficult
241 | # ground truth
242 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
243 | ap = voc_ap(rec, prec, use_07_metric)
244 |
245 | return rec, prec, ap
246 |
--------------------------------------------------------------------------------
/lib/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | rm *.so
3 | rm -r build/
4 | python setup.py build develop
5 |
--------------------------------------------------------------------------------
/lib/DCNv2/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import glob
4 | import os
5 |
6 | import torch
7 | from setuptools import find_packages, setup
8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
9 |
10 | requirements = ["torch", "torchvision"]
11 |
12 |
13 | def get_extensions():
14 | this_dir = os.path.dirname(os.path.abspath(__file__))
15 | extensions_dir = os.path.join(this_dir, "src")
16 |
17 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
18 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
19 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
20 | os.environ["CC"] = "g++-10"
21 | sources = main_file + source_cpu
22 | extension = CppExtension
23 | extra_compile_args = {"cxx": []}
24 | define_macros = []
25 |
26 |
27 | if torch.cuda.is_available() and CUDA_HOME is not None:
28 | extension = CUDAExtension
29 | sources += source_cuda
30 | define_macros += [("WITH_CUDA", None)]
31 | extra_compile_args["nvcc"] = [
32 | "-DCUDA_HAS_FP16=1",
33 | "-D__CUDA_NO_HALF_OPERATORS__",
34 | "-D__CUDA_NO_HALF_CONVERSIONS__",
35 | "-D__CUDA_NO_HALF2_OPERATORS__",
36 | ]
37 | else:
38 | # raise NotImplementedError('Cuda is not available')
39 | pass
40 |
41 | sources = [os.path.join(extensions_dir, s) for s in sources]
42 | include_dirs = [extensions_dir]
43 | ext_modules = [
44 | extension(
45 | "_ext",
46 | sources,
47 | include_dirs=include_dirs,
48 | define_macros=define_macros,
49 | extra_compile_args=extra_compile_args,
50 | )
51 | ]
52 | return ext_modules
53 |
54 |
55 | setup(
56 | name="DCNv2",
57 | version="0.1",
58 | author="charlesshang",
59 | url="https://github.com/charlesshang/DCNv2",
60 | description="deformable convolutional networks",
61 | packages=find_packages(exclude=("configs", "tests")),
62 | # install_requires=requirements,
63 | ext_modules=get_extensions(),
64 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
65 | )
66 |
--------------------------------------------------------------------------------
/lib/DCNv2/src/cpu/dcn_v2_im2col_cpu.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
64 |
65 | #ifndef DCN_V2_IM2COL_CPU
66 | #define DCN_V2_IM2COL_CPU
67 |
68 | #ifdef __cplusplus
69 | extern "C"
70 | {
71 | #endif
72 |
73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
88 | const int batch_size, const int channels, const int height_im, const int width_im,
89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
91 | const int dilation_h, const int dilation_w,
92 | const int deformable_group,
93 | float *grad_offset, float *grad_mask);
94 |
95 | #ifdef __cplusplus
96 | }
97 | #endif
98 |
99 | #endif
--------------------------------------------------------------------------------
/lib/DCNv2/src/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cpu_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/DCNv2/src/cuda/dcn_v2_cuda.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include "dcn_v2_im2col_cuda.h"
3 |
4 | #include
5 | #include
6 |
7 | #include
8 | // THCState *state = at::globalContext().lazyInitCUDA();
9 |
10 | // author: Charles Shang
11 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
12 |
13 | // [batch gemm]
14 | // https://github.com/pytorch/pytorch/blob/master/aten/src/THC/generic/THCTensorMathBlas.cu
15 |
16 |
17 | at::Tensor
18 | dcn_v2_cuda_forward(const at::Tensor &input,
19 | const at::Tensor &weight,
20 | const at::Tensor &bias,
21 | const at::Tensor &offset,
22 | const at::Tensor &mask,
23 | const int kernel_h,
24 | const int kernel_w,
25 | const int stride_h,
26 | const int stride_w,
27 | const int pad_h,
28 | const int pad_w,
29 | const int dilation_h,
30 | const int dilation_w,
31 | const int deformable_group)
32 | {
33 | using scalar_t = float;
34 | // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
35 | AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
36 | AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
37 | AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
38 | AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
39 | AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");
40 |
41 | const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
42 |
43 | const int batch = input.size(0);
44 | const int channels = input.size(1);
45 | const int height = input.size(2);
46 | const int width = input.size(3);
47 |
48 | const int channels_out = weight.size(0);
49 | const int channels_kernel = weight.size(1);
50 | const int kernel_h_ = weight.size(2);
51 | const int kernel_w_ = weight.size(3);
52 |
53 | // printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h);
54 | // printf("Channels: %d %d\n", channels, channels_kernel);
55 | // printf("Channels: %d %d\n", channels_out, channels_kernel);
56 |
57 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
58 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
59 |
60 | AT_ASSERTM(channels == channels_kernel,
61 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
62 |
63 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
64 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
65 |
66 | auto ones = at::ones({batch, bias.sizes()[0], height_out, width_out}, input.options());
67 | auto columns = at::empty({batch, channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
68 | auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options());
69 |
70 | // Add biases to output tensor
71 | // torch implementation
72 | auto ones_T = at::transpose(ones.contiguous(), 3, 1);
73 | ones_T = at::mul(ones_T, bias.contiguous());
74 | ones_T = at::transpose(ones_T, 3, 1);
75 | output = at::add(output, ones_T);
76 |
77 | modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
78 | input.data_ptr(),
79 | offset.data_ptr(),
80 | mask.data_ptr(),
81 | batch, channels, height, width,
82 | height_out, width_out, kernel_h, kernel_w,
83 | pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
84 | deformable_group,
85 | columns.data_ptr());
86 |
87 | // Scale columns and add to output
88 | // torch implementation
89 | auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w});
90 | auto product = at::matmul(weight_flat, columns);
91 | output = at::add(output, product.view({batch, channels_out, height_out, width_out}));
92 |
93 | return output;
94 | }
95 |
96 | std::vector dcn_v2_cuda_backward(const at::Tensor &input,
97 | const at::Tensor &weight,
98 | const at::Tensor &bias,
99 | const at::Tensor &offset,
100 | const at::Tensor &mask,
101 | const at::Tensor &grad_output,
102 | int kernel_h, int kernel_w,
103 | int stride_h, int stride_w,
104 | int pad_h, int pad_w,
105 | int dilation_h, int dilation_w,
106 | int deformable_group)
107 | {
108 |
109 | TORCH_CHECK_ARG(input.is_contiguous(), 1, "input tensor has to be contiguous");
110 | TORCH_CHECK_ARG(weight.is_contiguous(), 2, "weight tensor has to be contiguous");
111 |
112 | AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
113 | AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
114 | AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
115 | AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
116 | AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");
117 |
118 | const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
119 | const int batch = input.size(0);
120 | const int channels = input.size(1);
121 | const int height = input.size(2);
122 | const int width = input.size(3);
123 |
124 | const int channels_out = weight.size(0);
125 | const int channels_kernel = weight.size(1);
126 | const int kernel_h_ = weight.size(2);
127 | const int kernel_w_ = weight.size(3);
128 |
129 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
130 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
131 |
132 | AT_ASSERTM(channels == channels_kernel,
133 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
134 |
135 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
136 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
137 |
138 | auto ones = at::ones({height_out, width_out}, input.options());
139 | auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
140 | auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());
141 |
142 | auto grad_input = at::zeros_like(input);
143 | auto grad_weight = at::zeros_like(weight);
144 | auto grad_bias = at::zeros_like(bias);
145 | auto grad_offset = at::zeros_like(offset);
146 | auto grad_mask = at::zeros_like(mask);
147 |
148 | using scalar_t = float;
149 |
150 | for (int b = 0; b < batch; b++)
151 | {
152 | auto input_n = input.select(0, b);
153 | auto offset_n = offset.select(0, b);
154 | auto mask_n = mask.select(0, b);
155 | auto grad_output_n = grad_output.select(0, b);
156 | auto grad_input_n = grad_input.select(0, b);
157 | auto grad_offset_n = grad_offset.select(0, b);
158 | auto grad_mask_n = grad_mask.select(0, b);
159 |
160 | // Torch implementation
161 | auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w});
162 | weight_flat = at::transpose(weight_flat, 1, 0);
163 |
164 | auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out});
165 | columns = at::matmul(weight_flat, grad_output_n_flat);
166 |
167 | // gradient w.r.t. input coordinate data
168 | modulated_deformable_col2im_coord_cuda(c10::cuda::getCurrentCUDAStream(),
169 | columns.data_ptr(),
170 | input_n.data_ptr(),
171 | offset_n.data_ptr(),
172 | mask_n.data_ptr(),
173 | 1, channels, height, width,
174 | height_out, width_out, kernel_h, kernel_w,
175 | pad_h, pad_w, stride_h, stride_w,
176 | dilation_h, dilation_w, deformable_group,
177 | grad_offset_n.data_ptr(),
178 | grad_mask_n.data_ptr());
179 | // gradient w.r.t. input data
180 | modulated_deformable_col2im_cuda(c10::cuda::getCurrentCUDAStream(),
181 | columns.data_ptr(),
182 | offset_n.data_ptr(),
183 | mask_n.data_ptr(),
184 | 1, channels, height, width,
185 | height_out, width_out, kernel_h, kernel_w,
186 | pad_h, pad_w, stride_h, stride_w,
187 | dilation_h, dilation_w, deformable_group,
188 | grad_input_n.data_ptr());
189 |
190 | // gradient w.r.t. weight, dWeight should accumulate across the batch and group
191 | modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
192 | input_n.data_ptr(),
193 | offset_n.data_ptr(),
194 | mask_n.data_ptr(),
195 | 1, channels, height, width,
196 | height_out, width_out, kernel_h, kernel_w,
197 | pad_h, pad_w, stride_h, stride_w,
198 | dilation_h, dilation_w, deformable_group,
199 | columns.data_ptr());
200 |
201 |
202 | // Torch implementation
203 | auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0));
204 | grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w}));
205 |
206 | // Torch implementation
207 | auto ones_flat = ones.view({height_out*width_out});
208 | product = at::matmul(grad_output_n_flat, ones_flat);
209 | grad_bias = at::add(grad_bias, product);
210 |
211 | }
212 |
213 | return {
214 | grad_input, grad_offset, grad_mask, grad_weight, grad_bias
215 | };
216 | }
217 |
--------------------------------------------------------------------------------
/lib/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 |
64 | #ifndef DCN_V2_IM2COL_CUDA
65 | #define DCN_V2_IM2COL_CUDA
66 |
67 | #ifdef __cplusplus
68 | extern "C"
69 | {
70 | #endif
71 |
72 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
73 | const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
81 | const float *data_col, const float *data_offset, const float *data_mask,
82 | const int batch_size, const int channels, const int height_im, const int width_im,
83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
85 | const int dilation_h, const int dilation_w,
86 | const int deformable_group, float *grad_im);
87 |
88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
90 | const int batch_size, const int channels, const int height_im, const int width_im,
91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
93 | const int dilation_h, const int dilation_w,
94 | const int deformable_group,
95 | float *grad_offset, float *grad_mask);
96 |
97 | #ifdef __cplusplus
98 | }
99 | #endif
100 |
101 | #endif
--------------------------------------------------------------------------------
/lib/DCNv2/src/cuda/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | at::Tensor
5 | dcn_v2_cuda_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 | at::Tensor
10 | dcn_v2_forward(const at::Tensor &input,
11 | const at::Tensor &weight,
12 | const at::Tensor &bias,
13 | const at::Tensor &offset,
14 | const at::Tensor &mask,
15 | const int kernel_h,
16 | const int kernel_w,
17 | const int stride_h,
18 | const int stride_w,
19 | const int pad_h,
20 | const int pad_w,
21 | const int dilation_h,
22 | const int dilation_w,
23 | const int deformable_group)
24 | {
25 | if (input.type().is_cuda())
26 | {
27 | #ifdef WITH_CUDA
28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
29 | kernel_h, kernel_w,
30 | stride_h, stride_w,
31 | pad_h, pad_w,
32 | dilation_h, dilation_w,
33 | deformable_group);
34 | #else
35 | AT_ERROR("Not compiled with GPU support");
36 | #endif
37 | }
38 | else{
39 | return dcn_v2_cpu_forward(input, weight, bias, offset, mask,
40 | kernel_h, kernel_w,
41 | stride_h, stride_w,
42 | pad_h, pad_w,
43 | dilation_h, dilation_w,
44 | deformable_group);
45 | }
46 | }
47 |
48 | std::vector
49 | dcn_v2_backward(const at::Tensor &input,
50 | const at::Tensor &weight,
51 | const at::Tensor &bias,
52 | const at::Tensor &offset,
53 | const at::Tensor &mask,
54 | const at::Tensor &grad_output,
55 | int kernel_h, int kernel_w,
56 | int stride_h, int stride_w,
57 | int pad_h, int pad_w,
58 | int dilation_h, int dilation_w,
59 | int deformable_group)
60 | {
61 | if (input.type().is_cuda())
62 | {
63 | #ifdef WITH_CUDA
64 | return dcn_v2_cuda_backward(input,
65 | weight,
66 | bias,
67 | offset,
68 | mask,
69 | grad_output,
70 | kernel_h, kernel_w,
71 | stride_h, stride_w,
72 | pad_h, pad_w,
73 | dilation_h, dilation_w,
74 | deformable_group);
75 | #else
76 | AT_ERROR("Not compiled with GPU support");
77 | #endif
78 | }
79 | else{
80 | return dcn_v2_cpu_backward(input,
81 | weight,
82 | bias,
83 | offset,
84 | mask,
85 | grad_output,
86 | kernel_h, kernel_w,
87 | stride_h, stride_w,
88 | pad_h, pad_w,
89 | dilation_h, dilation_w,
90 | deformable_group);
91 | }
92 | }
93 |
94 | std::tuple
95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
96 | const at::Tensor &bbox,
97 | const at::Tensor &trans,
98 | const int no_trans,
99 | const float spatial_scale,
100 | const int output_dim,
101 | const int group_size,
102 | const int pooled_size,
103 | const int part_size,
104 | const int sample_per_part,
105 | const float trans_std)
106 | {
107 | if (input.type().is_cuda())
108 | {
109 | #ifdef WITH_CUDA
110 | return dcn_v2_psroi_pooling_cuda_forward(input,
111 | bbox,
112 | trans,
113 | no_trans,
114 | spatial_scale,
115 | output_dim,
116 | group_size,
117 | pooled_size,
118 | part_size,
119 | sample_per_part,
120 | trans_std);
121 | #else
122 | AT_ERROR("Not compiled with GPU support");
123 | #endif
124 | }
125 | else{
126 | return dcn_v2_psroi_pooling_cpu_forward(input,
127 | bbox,
128 | trans,
129 | no_trans,
130 | spatial_scale,
131 | output_dim,
132 | group_size,
133 | pooled_size,
134 | part_size,
135 | sample_per_part,
136 | trans_std);
137 | }
138 | }
139 |
140 | std::tuple
141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
142 | const at::Tensor &input,
143 | const at::Tensor &bbox,
144 | const at::Tensor &trans,
145 | const at::Tensor &top_count,
146 | const int no_trans,
147 | const float spatial_scale,
148 | const int output_dim,
149 | const int group_size,
150 | const int pooled_size,
151 | const int part_size,
152 | const int sample_per_part,
153 | const float trans_std)
154 | {
155 | if (input.type().is_cuda())
156 | {
157 | #ifdef WITH_CUDA
158 | return dcn_v2_psroi_pooling_cuda_backward(out_grad,
159 | input,
160 | bbox,
161 | trans,
162 | top_count,
163 | no_trans,
164 | spatial_scale,
165 | output_dim,
166 | group_size,
167 | pooled_size,
168 | part_size,
169 | sample_per_part,
170 | trans_std);
171 | #else
172 | AT_ERROR("Not compiled with GPU support");
173 | #endif
174 | }
175 | else{
176 | return dcn_v2_psroi_pooling_cpu_backward(out_grad,
177 | input,
178 | bbox,
179 | trans,
180 | top_count,
181 | no_trans,
182 | spatial_scale,
183 | output_dim,
184 | group_size,
185 | pooled_size,
186 | part_size,
187 | sample_per_part,
188 | trans_std);
189 | }
190 | }
--------------------------------------------------------------------------------
/lib/DCNv2/src/vision.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "dcn_v2.h"
3 |
4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
9 | }
10 |
--------------------------------------------------------------------------------
/lib/DCNv2/testcpu.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import time
7 | import torch
8 | import torch.nn as nn
9 | from torch.autograd import gradcheck
10 |
11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN
12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling
13 |
14 | deformable_groups = 1
15 | N, inC, inH, inW = 2, 2, 4, 4
16 | outC = 2
17 | kH, kW = 3, 3
18 |
19 |
20 | def conv_identify(weight, bias):
21 | weight.data.zero_()
22 | bias.data.zero_()
23 | o, i, h, w = weight.shape
24 | y = h//2
25 | x = w//2
26 | for p in range(i):
27 | for q in range(o):
28 | if p == q:
29 | weight.data[q, p, y, x] = 1.0
30 |
31 |
32 | def check_zero_offset():
33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
34 | kernel_size=(kH, kW),
35 | stride=(1, 1),
36 | padding=(1, 1),
37 | bias=True)
38 |
39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
40 | kernel_size=(kH, kW),
41 | stride=(1, 1),
42 | padding=(1, 1),
43 | bias=True)
44 |
45 | dcn_v2 = DCNv2(inC, outC, (kH, kW),
46 | stride=1, padding=1, dilation=1,
47 | deformable_groups=deformable_groups)
48 |
49 | conv_offset.weight.data.zero_()
50 | conv_offset.bias.data.zero_()
51 | conv_mask.weight.data.zero_()
52 | conv_mask.bias.data.zero_()
53 | conv_identify(dcn_v2.weight, dcn_v2.bias)
54 |
55 | input = torch.randn(N, inC, inH, inW)
56 | offset = conv_offset(input)
57 | mask = conv_mask(input)
58 | mask = torch.sigmoid(mask)
59 | output = dcn_v2(input, offset, mask)
60 | output *= 2
61 | d = (input - output).abs().max()
62 | if d < 1e-10:
63 | print('Zero offset passed')
64 | else:
65 | print('Zero offset failed')
66 | print(input)
67 | print(output)
68 |
69 | def check_gradient_dconv():
70 |
71 | input = torch.rand(N, inC, inH, inW) * 0.01
72 | input.requires_grad = True
73 |
74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2
75 | # offset.data.zero_()
76 | # offset.data -= 0.5
77 | offset.requires_grad = True
78 |
79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW)
80 | # mask.data.zero_()
81 | mask.requires_grad = True
82 | mask = torch.sigmoid(mask)
83 |
84 | weight = torch.randn(outC, inC, kH, kW)
85 | weight.requires_grad = True
86 |
87 | bias = torch.rand(outC)
88 | bias.requires_grad = True
89 |
90 | stride = 1
91 | padding = 1
92 | dilation = 1
93 |
94 | print('check_gradient_dconv: ',
95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,
96 | stride, padding, dilation, deformable_groups),
97 | eps=1e-3, atol=1e-4, rtol=1e-2))
98 |
99 |
100 | def check_pooling_zero_offset():
101 |
102 | input = torch.randn(2, 16, 64, 64).zero_()
103 | input[0, :, 16:26, 16:26] = 1.
104 | input[1, :, 10:20, 20:30] = 2.
105 | rois = torch.tensor([
106 | [0, 65, 65, 103, 103],
107 | [1, 81, 41, 119, 79],
108 | ]).float()
109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
110 | pooled_size=7,
111 | output_dim=16,
112 | no_trans=True,
113 | group_size=1,
114 | trans_std=0.0)
115 |
116 | out = pooling(input, rois, input.new())
117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item()
118 | for i in range(rois.shape[0])])
119 | print(s)
120 |
121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
122 | pooled_size=7,
123 | output_dim=16,
124 | no_trans=False,
125 | group_size=1,
126 | trans_std=0.0)
127 | offset = torch.randn(20, 2, 7, 7).zero_()
128 | dout = dpooling(input, rois, offset)
129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item()
130 | for i in range(rois.shape[0])])
131 | print(s)
132 |
133 |
134 | def check_gradient_dpooling():
135 | input = torch.randn(2, 3, 5, 5) * 0.01
136 | N = 4
137 | batch_inds = torch.randint(2, (N, 1)).float()
138 | x = torch.rand((N, 1)).float() * 15
139 | y = torch.rand((N, 1)).float() * 15
140 | w = torch.rand((N, 1)).float() * 10
141 | h = torch.rand((N, 1)).float() * 10
142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
143 | offset = torch.randn(N, 2, 3, 3)
144 | input.requires_grad = True
145 | offset.requires_grad = True
146 |
147 | spatial_scale = 1.0 / 4
148 | pooled_size = 3
149 | output_dim = 3
150 | no_trans = 0
151 | group_size = 1
152 | trans_std = 0.0
153 | sample_per_part = 4
154 | part_size = pooled_size
155 |
156 | print('check_gradient_dpooling:',
157 | gradcheck(dcn_v2_pooling, (input, rois, offset,
158 | spatial_scale,
159 | pooled_size,
160 | output_dim,
161 | no_trans,
162 | group_size,
163 | part_size,
164 | sample_per_part,
165 | trans_std),
166 | eps=1e-4))
167 |
168 |
169 | def example_dconv():
170 | input = torch.randn(2, 64, 128, 128)
171 | # wrap all things (offset and mask) in DCN
172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,
173 | padding=1, deformable_groups=2)
174 | # print(dcn.weight.shape, input.shape)
175 | output = dcn(input)
176 | targert = output.new(*output.size())
177 | targert.data.uniform_(-0.01, 0.01)
178 | error = (targert - output).mean()
179 | error.backward()
180 | print(output.shape)
181 |
182 |
183 | def example_dpooling():
184 | input = torch.randn(2, 32, 64, 64)
185 | batch_inds = torch.randint(2, (20, 1)).float()
186 | x = torch.randint(256, (20, 1)).float()
187 | y = torch.randint(256, (20, 1)).float()
188 | w = torch.randint(64, (20, 1)).float()
189 | h = torch.randint(64, (20, 1)).float()
190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
191 | offset = torch.randn(20, 2, 7, 7)
192 | input.requires_grad = True
193 | offset.requires_grad = True
194 |
195 | # normal roi_align
196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
197 | pooled_size=7,
198 | output_dim=32,
199 | no_trans=True,
200 | group_size=1,
201 | trans_std=0.1)
202 |
203 | # deformable pooling
204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
205 | pooled_size=7,
206 | output_dim=32,
207 | no_trans=False,
208 | group_size=1,
209 | trans_std=0.1)
210 |
211 | out = pooling(input, rois, offset)
212 | dout = dpooling(input, rois, offset)
213 | print(out.shape)
214 | print(dout.shape)
215 |
216 | target_out = out.new(*out.size())
217 | target_out.data.uniform_(-0.01, 0.01)
218 | target_dout = dout.new(*dout.size())
219 | target_dout.data.uniform_(-0.01, 0.01)
220 | e = (target_out - out).mean()
221 | e.backward()
222 | e = (target_dout - dout).mean()
223 | e.backward()
224 |
225 |
226 | def example_mdpooling():
227 | input = torch.randn(2, 32, 64, 64)
228 | input.requires_grad = True
229 | batch_inds = torch.randint(2, (20, 1)).float()
230 | x = torch.randint(256, (20, 1)).float()
231 | y = torch.randint(256, (20, 1)).float()
232 | w = torch.randint(64, (20, 1)).float()
233 | h = torch.randint(64, (20, 1)).float()
234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
235 |
236 | # mdformable pooling (V2)
237 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
238 | pooled_size=7,
239 | output_dim=32,
240 | no_trans=False,
241 | group_size=1,
242 | trans_std=0.1,
243 | deform_fc_dim=1024)
244 |
245 | dout = dpooling(input, rois)
246 | target = dout.new(*dout.size())
247 | target.data.uniform_(-0.1, 0.1)
248 | error = (target - dout).mean()
249 | error.backward()
250 | print(dout.shape)
251 |
252 |
253 | if __name__ == '__main__':
254 |
255 | example_dconv()
256 | example_dpooling()
257 | example_mdpooling()
258 |
259 | check_pooling_zero_offset()
260 | # zero offset check
261 | if inC == outC:
262 | check_zero_offset()
263 |
264 | check_gradient_dpooling()
265 | check_gradient_dconv()
266 | # """
267 | # ****** Note: backward is not reentrant error may not be a serious problem,
268 | # ****** since the max error is less than 1e-7,
269 | # ****** Still looking for what trigger this problem
270 | # """
271 |
--------------------------------------------------------------------------------
/lib/DCNv2/testcuda.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import time
7 | import torch
8 | import torch.nn as nn
9 | from torch.autograd import gradcheck
10 |
11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN
12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling
13 |
14 | deformable_groups = 1
15 | N, inC, inH, inW = 2, 2, 4, 4
16 | outC = 2
17 | kH, kW = 3, 3
18 |
19 |
20 | def conv_identify(weight, bias):
21 | weight.data.zero_()
22 | bias.data.zero_()
23 | o, i, h, w = weight.shape
24 | y = h//2
25 | x = w//2
26 | for p in range(i):
27 | for q in range(o):
28 | if p == q:
29 | weight.data[q, p, y, x] = 1.0
30 |
31 |
32 | def check_zero_offset():
33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
34 | kernel_size=(kH, kW),
35 | stride=(1, 1),
36 | padding=(1, 1),
37 | bias=True).cuda()
38 |
39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
40 | kernel_size=(kH, kW),
41 | stride=(1, 1),
42 | padding=(1, 1),
43 | bias=True).cuda()
44 |
45 | dcn_v2 = DCNv2(inC, outC, (kH, kW),
46 | stride=1, padding=1, dilation=1,
47 | deformable_groups=deformable_groups).cuda()
48 |
49 | conv_offset.weight.data.zero_()
50 | conv_offset.bias.data.zero_()
51 | conv_mask.weight.data.zero_()
52 | conv_mask.bias.data.zero_()
53 | conv_identify(dcn_v2.weight, dcn_v2.bias)
54 |
55 | input = torch.randn(N, inC, inH, inW).cuda()
56 | offset = conv_offset(input)
57 | mask = conv_mask(input)
58 | mask = torch.sigmoid(mask)
59 | output = dcn_v2(input, offset, mask)
60 | output *= 2
61 | d = (input - output).abs().max()
62 | if d < 1e-10:
63 | print('Zero offset passed')
64 | else:
65 | print('Zero offset failed')
66 | print(input)
67 | print(output)
68 |
69 | def check_gradient_dconv():
70 |
71 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01
72 | input.requires_grad = True
73 |
74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2
75 | # offset.data.zero_()
76 | # offset.data -= 0.5
77 | offset.requires_grad = True
78 |
79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
80 | # mask.data.zero_()
81 | mask.requires_grad = True
82 | mask = torch.sigmoid(mask)
83 |
84 | weight = torch.randn(outC, inC, kH, kW).cuda()
85 | weight.requires_grad = True
86 |
87 | bias = torch.rand(outC).cuda()
88 | bias.requires_grad = True
89 |
90 | stride = 1
91 | padding = 1
92 | dilation = 1
93 |
94 | print('check_gradient_dconv: ',
95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,
96 | stride, padding, dilation, deformable_groups),
97 | eps=1e-3, atol=1e-4, rtol=1e-2))
98 |
99 |
100 | def check_pooling_zero_offset():
101 |
102 | input = torch.randn(2, 16, 64, 64).cuda().zero_()
103 | input[0, :, 16:26, 16:26] = 1.
104 | input[1, :, 10:20, 20:30] = 2.
105 | rois = torch.tensor([
106 | [0, 65, 65, 103, 103],
107 | [1, 81, 41, 119, 79],
108 | ]).cuda().float()
109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
110 | pooled_size=7,
111 | output_dim=16,
112 | no_trans=True,
113 | group_size=1,
114 | trans_std=0.0).cuda()
115 |
116 | out = pooling(input, rois, input.new())
117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item()
118 | for i in range(rois.shape[0])])
119 | print(s)
120 |
121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
122 | pooled_size=7,
123 | output_dim=16,
124 | no_trans=False,
125 | group_size=1,
126 | trans_std=0.0).cuda()
127 | offset = torch.randn(20, 2, 7, 7).cuda().zero_()
128 | dout = dpooling(input, rois, offset)
129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item()
130 | for i in range(rois.shape[0])])
131 | print(s)
132 |
133 |
134 | def check_gradient_dpooling():
135 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01
136 | N = 4
137 | batch_inds = torch.randint(2, (N, 1)).cuda().float()
138 | x = torch.rand((N, 1)).cuda().float() * 15
139 | y = torch.rand((N, 1)).cuda().float() * 15
140 | w = torch.rand((N, 1)).cuda().float() * 10
141 | h = torch.rand((N, 1)).cuda().float() * 10
142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
143 | offset = torch.randn(N, 2, 3, 3).cuda()
144 | input.requires_grad = True
145 | offset.requires_grad = True
146 |
147 | spatial_scale = 1.0 / 4
148 | pooled_size = 3
149 | output_dim = 3
150 | no_trans = 0
151 | group_size = 1
152 | trans_std = 0.0
153 | sample_per_part = 4
154 | part_size = pooled_size
155 |
156 | print('check_gradient_dpooling:',
157 | gradcheck(dcn_v2_pooling, (input, rois, offset,
158 | spatial_scale,
159 | pooled_size,
160 | output_dim,
161 | no_trans,
162 | group_size,
163 | part_size,
164 | sample_per_part,
165 | trans_std),
166 | eps=1e-4))
167 |
168 |
169 | def example_dconv():
170 | input = torch.randn(2, 64, 128, 128).cuda()
171 | # wrap all things (offset and mask) in DCN
172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,
173 | padding=1, deformable_groups=2).cuda()
174 | # print(dcn.weight.shape, input.shape)
175 | output = dcn(input)
176 | targert = output.new(*output.size())
177 | targert.data.uniform_(-0.01, 0.01)
178 | error = (targert - output).mean()
179 | error.backward()
180 | print(output.shape)
181 |
182 |
183 | def example_dpooling():
184 | input = torch.randn(2, 32, 64, 64).cuda()
185 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
186 | x = torch.randint(256, (20, 1)).cuda().float()
187 | y = torch.randint(256, (20, 1)).cuda().float()
188 | w = torch.randint(64, (20, 1)).cuda().float()
189 | h = torch.randint(64, (20, 1)).cuda().float()
190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
191 | offset = torch.randn(20, 2, 7, 7).cuda()
192 | input.requires_grad = True
193 | offset.requires_grad = True
194 |
195 | # normal roi_align
196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
197 | pooled_size=7,
198 | output_dim=32,
199 | no_trans=True,
200 | group_size=1,
201 | trans_std=0.1).cuda()
202 |
203 | # deformable pooling
204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
205 | pooled_size=7,
206 | output_dim=32,
207 | no_trans=False,
208 | group_size=1,
209 | trans_std=0.1).cuda()
210 |
211 | out = pooling(input, rois, offset)
212 | dout = dpooling(input, rois, offset)
213 | print(out.shape)
214 | print(dout.shape)
215 |
216 | target_out = out.new(*out.size())
217 | target_out.data.uniform_(-0.01, 0.01)
218 | target_dout = dout.new(*dout.size())
219 | target_dout.data.uniform_(-0.01, 0.01)
220 | e = (target_out - out).mean()
221 | e.backward()
222 | e = (target_dout - dout).mean()
223 | e.backward()
224 |
225 |
226 | def example_mdpooling():
227 | input = torch.randn(2, 32, 64, 64).cuda()
228 | input.requires_grad = True
229 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
230 | x = torch.randint(256, (20, 1)).cuda().float()
231 | y = torch.randint(256, (20, 1)).cuda().float()
232 | w = torch.randint(64, (20, 1)).cuda().float()
233 | h = torch.randint(64, (20, 1)).cuda().float()
234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
235 |
236 | # mdformable pooling (V2)
237 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
238 | pooled_size=7,
239 | output_dim=32,
240 | no_trans=False,
241 | group_size=1,
242 | trans_std=0.1,
243 | deform_fc_dim=1024).cuda()
244 |
245 | dout = dpooling(input, rois)
246 | target = dout.new(*dout.size())
247 | target.data.uniform_(-0.1, 0.1)
248 | error = (target - dout).mean()
249 | error.backward()
250 | print(dout.shape)
251 |
252 |
253 | if __name__ == '__main__':
254 |
255 | example_dconv()
256 | # example_dpooling()
257 | # example_mdpooling()
258 |
259 | # check_pooling_zero_offset()
260 | # zero offset check
261 | # if inC == outC:
262 | # check_zero_offset()
263 |
264 | # check_gradient_dpooling()
265 | #check_gradient_dconv()
266 | # """
267 | # ****** Note: backward is not reentrant error may not be a serious problem,
268 | # ****** since the max error is less than 1e-7,
269 | # ****** Still looking for what trigger this problem
270 | # """
271 |
--------------------------------------------------------------------------------
/model/TSConv.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append("..")
4 | import torch.nn as nn
5 | from collections import OrderedDict
6 | from model.backbones.resnet import Resnet101
7 | from model.backbones.darknet53 import Darknet53
8 | from model.neck.neck import Neck
9 | from model.head.head import Head1, Head2
10 | from model.layers.convolutions import Convolutional
11 | from utils.utils_basic import *
12 |
13 | class GGHL(nn.Module):
14 | def __init__(self, init_weights=True, inputsize=int(cfg.TRAIN["TRAIN_IMG_SIZE"]), weight_path=None):
15 | super(GGHL, self).__init__()
16 | self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"])
17 | self.__nC = cfg.DATA["NUM"]
18 | self.__out_channel = self.__nC + 4 + 5 + 1
19 | self.__backnone = Darknet53()#Resnet101()#
20 | # self.__backnone = PVT2(weight_path=weight_path)
21 | # self.__fpn = Neck(fileters_in=[512, 320, 128, 64], fileters_out=self.__out_channel)
22 | self.__fpn = Neck(fileters_in=[1024, 512, 256, 128], fileters_in_ratio=1)
23 | self.__head1_s = Head1(filters_in=128, stride=self.__strides[0])
24 | self.__head1_m = Head1(filters_in=256, stride=self.__strides[1])
25 | self.__head1_l = Head1(filters_in=512, stride=self.__strides[2])
26 |
27 | self.__head2_s = Head2(filters_in=128, nC=self.__nC, stride=self.__strides[0])
28 | self.__head2_m = Head2(filters_in=256, nC=self.__nC, stride=self.__strides[1])
29 | self.__head2_l = Head2(filters_in=512, nC=self.__nC, stride=self.__strides[2])
30 |
31 | if init_weights:
32 | self.__init_weights()
33 |
34 | def forward(self, x):
35 | out = []
36 | x_8, x_16, x_32 = self.__backnone(x)
37 | loc2, cls2, loc1, cls1, loc0, cls0 = self.__fpn(x_32, x_16, x_8)
38 | x_s, x_s_de, offsets_loc_s, offsets_cls_s, mask_loc_s, mask_cls_s, w_c8_s, coor_dc_s = self.__head1_s(loc2)
39 | x_m, x_m_de, offsets_loc_m, offsets_cls_m, mask_loc_m, mask_cls_m, w_c8_m, coor_dc_m = self.__head1_m(loc1)
40 | x_l, x_l_de, offsets_loc_l, offsets_cls_l, mask_loc_l, mask_cls_l, w_c8_l, coor_dc_l = self.__head1_l(loc0)
41 |
42 | out_s, out_s_de = self.__head2_s(x_s_de, loc2, cls2, offsets_loc_s, offsets_cls_s, mask_loc_s, mask_cls_s, w_c8_s)
43 | out_m, out_m_de = self.__head2_m(x_m_de, loc1, cls1, offsets_loc_m, offsets_cls_m, mask_loc_m, mask_cls_m, w_c8_m)
44 | out_l, out_l_de = self.__head2_l(x_l_de, loc0, cls0, offsets_loc_l, offsets_cls_l, mask_loc_l, mask_cls_l, w_c8_l)
45 |
46 | out.append((x_s, x_s_de, out_s, out_s_de, coor_dc_s))
47 | out.append((x_m, x_m_de, out_m, out_m_de, coor_dc_m))
48 | out.append((x_l, x_l_de, out_l, out_l_de, coor_dc_l))
49 |
50 | if self.training:
51 | p1, p1_d, p2, p2_d, _ = list(zip(*out))
52 | return p1, p1_d, p2, p2_d
53 | else:
54 | p1, p1_d, p2, p2_d, offsets_d = list(zip(*out))
55 | return p1, p1_d, p2, torch.cat(p2_d, 0), torch.cat(offsets_d, 0)
56 |
57 | def __init_weights(self):
58 | " Note :nn.Conv2d nn.BatchNorm2d'initing modes are uniform "
59 | for m in self.modules():
60 | if isinstance(m, nn.Conv2d):
61 | torch.nn.init.normal_(m.weight.data, 0.0, 0.01)
62 | if m.bias is not None:
63 | m.bias.data.zero_()
64 | # print("initing {}".format(m))
65 | elif isinstance(m, nn.BatchNorm2d):
66 | torch.nn.init.constant_(m.weight.data, 1.0)
67 | torch.nn.init.constant_(m.bias.data, 0.0)
68 | # print("initing {}".format(m))
69 | elif isinstance(m, nn.Linear):
70 | m.weight.data.normal_(0, 0.01)
71 | if m.bias is not None:
72 | m.bias.data.zero_()
73 | # print("initing {}".format(m))
74 |
75 | def load_resnet101_weights(self, weight_file='/home/hzc/v2/weight/resnet101-cd907fc2.pth'):
76 | model_list = self.__backnone.state_dict().keys()
77 | # print(model_list)
78 | weight = torch.load(weight_file)
79 | # print(weight.keys())
80 | new_weight = OrderedDict()
81 | # # zip 默认遍历最少的list
82 | for model_key, weight_key, weight_value in zip(model_list, weight.keys(), weight.values()):
83 | if model_key[9:] == weight_key:
84 | new_weight[model_key] = weight_value
85 | self.__backnone.load_state_dict(new_weight)
86 |
87 | def load_darknet_weights(self, weight_file, cutoff=52):
88 | "https://github.com/ultralytics/yolov3/blob/master/models.py"
89 | print("load darknet weights : ", weight_file)
90 | with open(weight_file, 'rb') as f:
91 | _ = np.fromfile(f, dtype=np.int32, count=5)
92 | weights = np.fromfile(f, dtype=np.float32)
93 | count = 0
94 | ptr = 0
95 | for m in self.modules():
96 | if isinstance(m, Convolutional):
97 | # only initing backbone conv's weights
98 | if count == cutoff:
99 | break
100 | count += 1
101 | conv_layer = m._Convolutional__conv
102 | if m.norm == "bn":
103 | # Load BN bias, weights, running mean and running variance
104 | bn_layer = m._Convolutional__norm
105 | num_b = bn_layer.bias.numel() # Number of biases
106 | # Bias
107 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data)
108 | bn_layer.bias.data.copy_(bn_b)
109 | ptr += num_b
110 | # Weight
111 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data)
112 | bn_layer.weight.data.copy_(bn_w)
113 | ptr += num_b
114 | # Running Mean
115 | bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
116 | bn_layer.running_mean.data.copy_(bn_rm)
117 | ptr += num_b
118 | # Running Var
119 | bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
120 | bn_layer.running_var.data.copy_(bn_rv)
121 | ptr += num_b
122 | # print("loading weight {}".format(bn_layer))
123 | elif m.norm == "gn":
124 | # Load GN bias, weights
125 | bn_layer = m._Convolutional__norm
126 | num_b = bn_layer.bias.numel() # Number of biases
127 | # Bias
128 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data)
129 | bn_layer.bias.data.copy_(bn_b)
130 | ptr += num_b
131 | # Weight
132 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data)
133 | bn_layer.weight.data.copy_(bn_w)
134 | ptr += num_b
135 | else:
136 | # Load conv. bias
137 | num_b = conv_layer.bias.numel()
138 | conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data)
139 | conv_layer.bias.data.copy_(conv_b)
140 | ptr += num_b
141 | # Load conv. weights
142 | num_w = conv_layer.weight.numel()
143 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data)
144 | conv_layer.weight.data.copy_(conv_w)
145 | ptr += num_w
146 | # print("loading weight {}".format(conv_layer))
147 | print("loading weight number: {}".format(count))
148 |
149 |
150 |
--------------------------------------------------------------------------------
/model/__pycache__/GGHL4.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL4.cpython-38.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHL4.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL4.cpython-39.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHL6.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL6.cpython-39.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHL6single.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL6single.cpython-39.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHL8.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHL8.cpython-39.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHLv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHLv2.cpython-38.pyc
--------------------------------------------------------------------------------
/model/__pycache__/GGHLv2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/GGHLv2.cpython-39.pyc
--------------------------------------------------------------------------------
/model/__pycache__/double3090.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/double3090.cpython-38.pyc
--------------------------------------------------------------------------------
/model/__pycache__/double3090.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/__pycache__/double3090.cpython-39.pyc
--------------------------------------------------------------------------------
/model/backbones/__pycache__/darknet53.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/darknet53.cpython-38.pyc
--------------------------------------------------------------------------------
/model/backbones/__pycache__/darknet53.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/darknet53.cpython-39.pyc
--------------------------------------------------------------------------------
/model/backbones/__pycache__/model_resnet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/model_resnet.cpython-39.pyc
--------------------------------------------------------------------------------
/model/backbones/__pycache__/resnet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/backbones/__pycache__/resnet.cpython-39.pyc
--------------------------------------------------------------------------------
/model/backbones/darknet53.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from ..layers.convolutions import Convolutional
6 | from ..layers.conv_blocks import Residual_block
7 |
8 | class Darknet53(nn.Module):
9 |
10 | def __init__(self, pre_weight=None):
11 | super(Darknet53, self).__init__()
12 | self.__conv = Convolutional(filters_in=3, filters_out=32, kernel_size=3, stride=1, pad=1, norm='bn',
13 | activate='leaky')
14 |
15 | self.__conv_5_0 = Convolutional(filters_in=32, filters_out=64, kernel_size=3, stride=2, pad=1, norm='bn',activate='leaky')
16 | self.__rb_5_0 = Residual_block(filters_in=64, filters_out=64, filters_medium=32)
17 |
18 | self.__conv_5_1 = Convolutional(filters_in=64, filters_out=128, kernel_size=3, stride=2, pad=1, norm='bn',
19 | activate='leaky')
20 | self.__rb_5_1_0 = Residual_block(filters_in=128, filters_out=128, filters_medium=64)
21 | self.__rb_5_1_1 = Residual_block(filters_in=128, filters_out=128, filters_medium=64)
22 |
23 | self.__conv_5_2 = Convolutional(filters_in=128, filters_out=256, kernel_size=3, stride=2, pad=1, norm='bn',
24 | activate='leaky')
25 | self.__rb_5_2_0 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
26 | self.__rb_5_2_1 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
27 | self.__rb_5_2_2 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
28 | self.__rb_5_2_3 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
29 | self.__rb_5_2_4 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
30 | self.__rb_5_2_5 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
31 | self.__rb_5_2_6 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
32 | self.__rb_5_2_7 = Residual_block(filters_in=256, filters_out=256, filters_medium=128)
33 |
34 | self.__conv_5_3 = Convolutional(filters_in=256, filters_out=512, kernel_size=3, stride=2, pad=1, norm='bn',
35 | activate='leaky')
36 | self.__rb_5_3_0 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
37 | self.__rb_5_3_1 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
38 | self.__rb_5_3_2 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
39 | self.__rb_5_3_3 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
40 | self.__rb_5_3_4 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
41 | self.__rb_5_3_5 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
42 | self.__rb_5_3_6 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
43 | self.__rb_5_3_7 = Residual_block(filters_in=512, filters_out=512, filters_medium=256)
44 |
45 |
46 | self.__conv_5_4 = Convolutional(filters_in=512, filters_out=1024, kernel_size=3, stride=2, pad=1, norm='bn',
47 | activate='leaky')
48 | self.__rb_5_4_0 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512)
49 | self.__rb_5_4_1 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512)
50 | self.__rb_5_4_2 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512)
51 | self.__rb_5_4_3 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512)
52 |
53 | #self.__focus = Focus(c1=32, c2=64, k=3, p=1)
54 |
55 | def forward(self, x):
56 | x = self.__conv(x)
57 | x0_0 = self.__conv_5_0(x)
58 |
59 | #x0_0 = self.__focus(x)
60 | x0_1 = self.__rb_5_0(x0_0)
61 |
62 | x1_0 = self.__conv_5_1(x0_1)
63 | x1_1 = self.__rb_5_1_0(x1_0)
64 | x1_2 = self.__rb_5_1_1(x1_1)
65 |
66 | x2_0 = self.__conv_5_2(x1_2)
67 | x2_1 = self.__rb_5_2_0(x2_0)
68 | x2_2 = self.__rb_5_2_1(x2_1)
69 | x2_3 = self.__rb_5_2_2(x2_2)
70 | x2_4 = self.__rb_5_2_3(x2_3)
71 | x2_5 = self.__rb_5_2_4(x2_4)
72 | x2_6 = self.__rb_5_2_5(x2_5)
73 | x2_7 = self.__rb_5_2_6(x2_6)
74 | x2_8 = self.__rb_5_2_7(x2_7)
75 |
76 | x3_0 = self.__conv_5_3(x2_8)
77 | x3_1 = self.__rb_5_3_0(x3_0)
78 | x3_2 = self.__rb_5_3_1(x3_1)
79 | x3_3 = self.__rb_5_3_2(x3_2)
80 | x3_4 = self.__rb_5_3_3(x3_3)
81 | x3_5 = self.__rb_5_3_4(x3_4)
82 | x3_6 = self.__rb_5_3_5(x3_5)
83 | x3_7 = self.__rb_5_3_6(x3_6)
84 | x3_8 = self.__rb_5_3_7(x3_7)
85 |
86 | x4_0 = self.__conv_5_4(x3_8)
87 | x4_1 = self.__rb_5_4_0(x4_0)
88 | x4_2 = self.__rb_5_4_1(x4_1)
89 | x4_3 = self.__rb_5_4_2(x4_2)
90 | x4_4 = self.__rb_5_4_3(x4_3)
91 |
92 | return x2_8, x3_8, x4_4
93 |
--------------------------------------------------------------------------------
/model/backbones/mobilenetv2.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference : https://github.com/d-li14/mobilenetv2.pytorch/blob/master/models/imagenet/mobilenetv2.py
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import math
7 |
8 | __all__ = ['mobilenetv2']
9 |
10 | def _make_divisible(v, divisor, min_value=None):
11 | """
12 | This function is taken from the original tf repo.
13 | It ensures that all layers have a channel number that is divisible by 8
14 | It can be seen here:
15 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
16 | :param v:
17 | :param divisor:
18 | :param min_value:
19 | :return:
20 | """
21 | if min_value is None:
22 | min_value = divisor
23 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
24 | # Make sure that round down does not go down by more than 10%.
25 | if new_v < 0.9 * v:
26 | new_v += divisor
27 | return new_v
28 |
29 | def conv_3x3_bn(inp, oup, stride):
30 | return nn.Sequential(
31 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
32 | nn.BatchNorm2d(oup),
33 | nn.ReLU6(inplace=True),
34 | )
35 |
36 |
37 | def conv_1x1_bn(inp, oup):
38 | return nn.Sequential(
39 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
40 | nn.BatchNorm2d(oup),
41 | nn.ReLU6(inplace=True)
42 | )
43 |
44 | class InvertedResidual(nn.Module):
45 | def __init__(self, inp, oup, stride, expand_ratio):
46 | super(InvertedResidual, self).__init__()
47 | assert stride in [1, 2]
48 |
49 | hidden_dim = round(inp * expand_ratio)
50 | self.identity = stride == 1 and inp == oup
51 |
52 | if expand_ratio == 1:
53 | self.conv = nn.Sequential(
54 | # dw
55 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
56 | nn.BatchNorm2d(hidden_dim),
57 | nn.ReLU6(inplace=True),
58 | # pw-linear
59 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
60 | nn.BatchNorm2d(oup),
61 | )
62 | else:
63 | self.conv = nn.Sequential(
64 | # pw
65 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
66 | nn.BatchNorm2d(hidden_dim),
67 | nn.ReLU6(inplace=True),
68 | # dw
69 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
70 | nn.BatchNorm2d(hidden_dim),
71 | nn.ReLU6(inplace=True),
72 | # pw-linear
73 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
74 | nn.BatchNorm2d(oup),
75 | )
76 |
77 | def forward(self, x):
78 | conv = self.conv(x)
79 | if self.identity:
80 | return x + conv
81 | else:
82 | return conv
83 |
84 | class _MobileNetV2(nn.Module):
85 | def __init__(self, num_classes=1000, width_mult=1.):
86 | super(_MobileNetV2, self).__init__()
87 | # setting of inverted residual blocks
88 | self.cfgs = [
89 | # t, c, n, s
90 | [1, 16, 1, 1],
91 | [6, 24, 2, 2],
92 | [6, 32, 3, 2],
93 | [6, 64, 4, 2],
94 | [6, 96, 3, 1],
95 | [6, 160, 3, 2],
96 | [6, 320, 1, 1],
97 | ]
98 | # building first layer
99 | input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8)
100 | layers = [conv_3x3_bn(3, input_channel, 2)]
101 | # building inverted residual blocks
102 | block = InvertedResidual
103 | for t, c, n, s in self.cfgs:
104 | output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
105 | for i in range(n):
106 | layers.append(block(input_channel, output_channel, s if i == 0 else 1, t))
107 | input_channel = output_channel
108 | self.features = nn.Sequential(*layers)
109 | # building last several layers
110 | output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280
111 | self.conv = conv_1x1_bn(input_channel, output_channel)
112 | self._initialize_weights()
113 |
114 | def forward(self, x):
115 | x = self.features(x)
116 | x = self.conv(x)
117 | return x
118 |
119 | def _initialize_weights(self):
120 | print("**" * 10, "Initing MobilenetV2 weights", "**" * 10)
121 |
122 | for m in self.modules():
123 | if isinstance(m, nn.Conv2d):
124 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
125 | m.weight.data.normal_(0, math.sqrt(2. / n))
126 | if m.bias is not None:
127 | m.bias.data.zero_()
128 | print("initing {}".format(m))
129 |
130 | elif isinstance(m, nn.BatchNorm2d):
131 | m.weight.data.fill_(1)
132 | m.bias.data.zero_()
133 | print("initing {}".format(m))
134 |
135 | elif isinstance(m, nn.Linear):
136 | m.weight.data.normal_(0, 0.01)
137 | if m.bias is not None:
138 | m.bias.data.zero_()
139 | print("initing {}".format(m))
140 |
141 | class FeatureExtractor(nn.Module):
142 | def __init__(self, submodule, extracted_layers):
143 | super(FeatureExtractor, self).__init__()
144 | self.submodule = submodule
145 | self.extracted_layers = extracted_layers
146 |
147 | def forward(self, x):
148 | outputs = []
149 | for name, module in self.submodule._modules.items():
150 | if name is "features":
151 | for f_name, f_module in module._modules.items():
152 | x = f_module(x)
153 | if f_name in self.extracted_layers:
154 | outputs.append(x)
155 | if name is "conv":
156 | x = module(x)
157 | if name in self.extracted_layers:
158 | outputs.append(x)
159 | return outputs
160 |
161 | class MobilenetV2(nn.Module):
162 | def __init__(self, extract_list, weight_path=None, width_mult=1.):
163 | super(MobilenetV2, self).__init__()
164 |
165 | self.__submodule = _MobileNetV2(width_mult=width_mult)
166 | if weight_path:
167 | print("*"*40, "\nLoading weight of MobilenetV2 : {}".format(weight_path))
168 | pretrained_dict = torch.load(weight_path)
169 | model_dict = self.__submodule.state_dict()
170 | pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict}
171 | model_dict.update(pretrained_dict)
172 | self.__submodule.load_state_dict(model_dict)
173 | del pretrained_dict
174 | print("Loaded weight of MobilenetV2 : {}".format(weight_path))
175 | self.__extractor = FeatureExtractor(self.__submodule, extract_list)
176 |
177 | def forward(self, x):
178 | return self.__extractor(x)
179 |
--------------------------------------------------------------------------------
/model/backbones/resnet.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | from torch import nn
5 | from torchvision.models._utils import IntermediateLayerGetter
6 |
7 | from . import model_resnet
8 |
9 |
10 | class FrozenBatchNorm2d(torch.nn.Module):
11 | """
12 | BatchNorm2d where the batch statistics and the affine parameters are fixed.
13 | Copy-paste from torchvision.misc.ops with added eps before rqsrt,
14 | without which any other models than torchvision.models.resnet[18,34,50,101]
15 | produce nans.
16 | """
17 |
18 | def __init__(self, n):
19 | super(FrozenBatchNorm2d, self).__init__()
20 | self.register_buffer("weight", torch.ones(n))
21 | self.register_buffer("bias", torch.zeros(n))
22 | self.register_buffer("running_mean", torch.zeros(n))
23 | self.register_buffer("running_var", torch.ones(n))
24 |
25 | def _load_from_state_dict(
26 | self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
27 | ):
28 | num_batches_tracked_key = prefix + "num_batches_tracked"
29 | if num_batches_tracked_key in state_dict:
30 | del state_dict[num_batches_tracked_key]
31 |
32 | super(FrozenBatchNorm2d, self)._load_from_state_dict(
33 | state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
34 | )
35 |
36 | def forward(self, x):
37 | # move reshapes to the beginning
38 | # to make it fuser-friendly
39 | w = self.weight.reshape(1, -1, 1, 1)
40 | b = self.bias.reshape(1, -1, 1, 1)
41 | rv = self.running_var.reshape(1, -1, 1, 1)
42 | rm = self.running_mean.reshape(1, -1, 1, 1)
43 | eps = 1e-5
44 | scale = w * (rv + eps).rsqrt()
45 | bias = b - rm * scale
46 | return x * scale + bias
47 |
48 |
49 | class BackboneBase(nn.Module):
50 | def __init__(self, backbone: nn.Module, num_channels: int):
51 | """The function takes in a backbone and a number of channels and returns a body with the layers of
52 | the backbone
53 |
54 | Parameters
55 | ----------
56 | backbone : nn.Module
57 | the backbone network
58 | num_channels : int
59 | the number of channels in the output feature map.
60 |
61 | """
62 | super().__init__()
63 | for name, parameter in backbone.named_parameters():
64 | # print(name, parameter.shape)
65 | if "layer2" not in name and "layer3" not in name and "layer4" not in name:
66 | parameter.requires_grad_(False)
67 |
68 | # return_layers = {"layer2": "3", "layer3": "5", "layer4": "2"}
69 | return_layers = {"layer2": "3", "layer3": "22", "layer4": "2"}
70 |
71 | self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
72 | self.num_channels = num_channels
73 |
74 | def forward(self, x):
75 | xs = self.backbone(x)
76 |
77 | fmp_list = []
78 | for name, fmp in xs.items():
79 | fmp_list.append(fmp)
80 |
81 | return fmp_list[0], fmp_list[1], fmp_list[2]
82 |
83 |
84 | class Backbone(BackboneBase):
85 | """ResNet backbone with frozen BatchNorm."""
86 |
87 | def __init__(self, name: str, pretrained: bool, dilation: bool, norm_type: str):
88 | if norm_type == "BN":
89 | norm_layer = nn.BatchNorm2d
90 | elif norm_type == "FrozeBN":
91 | norm_layer = FrozenBatchNorm2d
92 | # NOTE: get the backbone network
93 | backbone = getattr(model_resnet, name)(
94 | replace_stride_with_dilation=[False, False, dilation], pretrained=pretrained, norm_layer=norm_layer
95 | )
96 | num_channels = 512 if name in ("resnet18", "resnet34") else 2048
97 | super().__init__(backbone, num_channels)
98 |
99 |
100 | # def build_resnet(model_name="resnet18", pretrained=False, norm_type="BN"):
101 | # if model_name in ["resnet18", "resnet34", "resnet50", "resnet101", "resnext101_32x8d"]:
102 | # backbone = Backbone(model_name, pretrained, dilation=False, norm_type=norm_type)
103 | # elif model_name in ["resnet50-d", "resnet101-d"]:
104 | # backbone = Backbone(model_name[:-2], pretrained, dilation=True, norm_type=norm_type)
105 |
106 | # # return backbone, backbone.num_channels
107 | # return backbone
108 |
109 |
110 | # A function that returns a model.
111 | def Resnet50(pretrained=False, norm_type="BN"):
112 | """`Resnet50` is a function that returns a `Backbone` object with the following parameters:
113 |
114 | - `name`: "resnet50"
115 | - `pretrained`: False
116 | - `dilation`: False
117 | - `norm_type`: "BN"
118 |
119 | The `Backbone` object is a class that is defined in `backbone.py`
120 |
121 | Parameters
122 | ----------
123 | pretrained, optional
124 | Whether to use a pretrained model.
125 | norm_type, optional
126 | BN, GN, or SN
127 |
128 | Returns
129 | -------
130 | A backbone object with the following parameters:
131 | - name: resnet50
132 | - pretrained: False
133 | - dilation: False
134 | - norm_type: BN
135 |
136 | """
137 |
138 | return Backbone("resnet50", pretrained, dilation=False, norm_type=norm_type)
139 |
140 | # A function that returns a model.
141 | def Resnet101(pretrained=False, norm_type="BN"):
142 | """`Resnet50` is a function that returns a `Backbone` object with the following parameters:
143 |
144 | - `name`: "resnet50"
145 | - `pretrained`: False
146 | - `dilation`: False
147 | - `norm_type`: "BN"
148 |
149 | The `Backbone` object is a class that is defined in `backbone.py`
150 |
151 | Parameters
152 | ----------
153 | pretrained, optional
154 | Whether to use a pretrained model.
155 | norm_type, optional
156 | BN, GN, or SN
157 |
158 | Returns
159 | -------
160 | A backbone object with the following parameters:
161 | - name: resnet50
162 | - pretrained: False
163 | - dilation: False
164 | - norm_type: BN
165 |
166 | """
167 |
168 | return Backbone("resnet101", pretrained, dilation=False, norm_type=norm_type)
169 |
170 | if __name__ == "__main__":
171 | model = Resnet101(pretrained=False, norm_type="BN")
172 | model_list = model.state_dict().keys()
173 | # print(model_list)
174 | weight = torch.load(
175 | "D:\\Github\\v2\\weight\\resnet101-cd907fc2.pth"
176 | )
177 | # print(weight.keys())
178 | new_weight = OrderedDict()
179 | # # zip 默认遍历最少的list
180 | for model_key, weight_key, weight_value in zip(model_list, weight.keys(), weight.values()):
181 | if model_key[9:] == weight_key:
182 | new_weight[model_key] = weight_value
183 | model.load_state_dict(new_weight)
184 |
185 |
186 | # print('hello world')
187 | # print(type(feat_dim))
188 | # x = torch.randn(3, 3, 800, 800)
189 | # x_s, x_m, x_l = model(x)
190 | # print(x_s.size())
191 | # print(x_m.size())
192 | # print(x_l.size())
193 | # from rich import print
194 | # model_keys = list(model.state_dict().keys())
195 | # print(model_keys)
196 |
--------------------------------------------------------------------------------
/model/head/__pycache__/head10.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head10.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head10single.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head10single.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head11.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head11.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head3.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head3.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head4.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head4.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head4.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head4.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head5.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head5.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head5.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head5.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head6.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head6.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head7.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head7.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head9.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head9.cpython-39.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head_GGHLv2_x3.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head_GGHLv2_x3.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/head_ori.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/head_ori.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/headv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv2.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/headv21.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv21.cpython-38.pyc
--------------------------------------------------------------------------------
/model/head/__pycache__/headv21.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/head/__pycache__/headv21.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__init__.py
--------------------------------------------------------------------------------
/model/layers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/activations.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/activations.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/activations.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/activations.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/conv_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/conv_blocks.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/conv_blocks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/conv_blocks.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/convolutions.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/convolutions.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/convolutions.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/convolutions.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/msr_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/msr_blocks.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/msr_blocks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/msr_blocks.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/multiscale_fusion_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/multiscale_fusion_blocks.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/multiscale_fusion_blocks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/multiscale_fusion_blocks.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/np_attention_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/np_attention_blocks.cpython-38.pyc
--------------------------------------------------------------------------------
/model/layers/__pycache__/np_attention_blocks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/layers/__pycache__/np_attention_blocks.cpython-39.pyc
--------------------------------------------------------------------------------
/model/layers/activations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class Swish(nn.Module): #
6 | @staticmethod
7 | def forward(x):
8 | return x * torch.sigmoid(x)
9 |
10 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
11 | @staticmethod
12 | def forward(x):
13 | # return x * F.hardsigmoid(x) # for torchscript and CoreML
14 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
15 |
16 | class MemoryEfficientSwish(nn.Module):
17 | class F(torch.autograd.Function):
18 | @staticmethod
19 | def forward(ctx, x):
20 | ctx.save_for_backward(x)
21 | return x * torch.sigmoid(x)
22 | @staticmethod
23 | def backward(ctx, grad_output):
24 | x = ctx.saved_tensors[0]
25 | sx = torch.sigmoid(x)
26 | return grad_output * (sx * (1 + x * (1 - sx)))
27 | def forward(self, x):
28 | return self.F.apply(x)
29 |
30 | class Mish(nn.Module):
31 | @staticmethod
32 | def forward(x):
33 | return x * F.softplus(x).tanh()
34 |
35 | class MemoryEfficientMish(nn.Module):
36 | class F(torch.autograd.Function):
37 | @staticmethod
38 | def forward(ctx, x):
39 | ctx.save_for_backward(x)
40 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
41 | @staticmethod
42 | def backward(ctx, grad_output):
43 | x = ctx.saved_tensors[0]
44 | sx = torch.sigmoid(x)
45 | fx = F.softplus(x).tanh()
46 | return grad_output * (fx + x * sx * (1 - fx * fx))
47 | def forward(self, x):
48 | return self.F.apply(x)
49 |
50 | class FReLU(nn.Module):
51 | def __init__(self, c1, k=3): # ch_in, kernel
52 | super().__init__()
53 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
54 | self.bn = nn.BatchNorm2d(c1)
55 |
56 | def forward(self, x):
57 | return torch.max(x, self.bn(self.conv(x)))
--------------------------------------------------------------------------------
/model/layers/attention_blocks.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 | class hsigmoid(nn.Module):
5 | def forward(self,x):
6 | out=x*nn.ReLU6(x+3,inplace=True)/6
7 | return out
8 |
9 | class SELayer(nn.Module):
10 | """SENet
11 | """
12 | def __init__(self, channel, reduction=16):
13 | super(SELayer, self).__init__()
14 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
15 | self.fc = nn.Sequential(
16 | nn.Linear(channel, channel // reduction, bias=False),
17 | #nn.Conv2d(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0, bias=False),
18 | #nn.BatchNorm2d(channel // reduction),
19 | nn.ReLU(inplace=True),
20 | nn.Linear(channel // reduction, channel, bias=False),
21 | #nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False),
22 | #nn.BatchNorm2d(channel),
23 | nn.Sigmoid(),
24 | )
25 |
26 | def forward(self, x):
27 | b, c, _, _ = x.size()
28 | y = self.avg_pool(x).view(b, c)
29 | y = self.fc(y).view(b, c, 1, 1)
30 | return x * y.expand_as(x)
31 |
32 | class NonLocalBlock(nn.Module):
33 | """Non-local Network
34 | """
35 | def __init__(self, channel):
36 | super(NonLocalBlock, self).__init__()
37 | self.inter_channel = channel // 2
38 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False)
39 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
40 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
41 | self.softmax = nn.Softmax(dim=1)
42 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False)
43 |
44 | def forward(self, x):
45 | # [N, C, H , W]
46 | b, c, h, w = x.size()
47 | # [N, C/2, H * W]
48 | x_phi = self.conv_phi(x).view(b, c, -1)
49 | # [N, H * W, C/2]
50 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous()
51 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous()
52 | # [N, H * W, H * W]
53 | mul_theta_phi = torch.matmul(x_theta, x_phi)
54 | mul_theta_phi = self.softmax(mul_theta_phi)
55 | # [N, H * W, C/2]
56 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g)
57 | # [N, C/2, H, W]
58 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w)
59 | # [N, C, H , W]
60 | mask = self.conv_mask(mul_theta_phi_g)
61 | out = mask + x
62 | return out
63 |
64 | class ContextBlock(nn.Module):
65 | """GCNet
66 | """
67 | def __init__(self,inplanes,ratio,pooling_type='att',
68 | fusion_types=('channel_add', )):
69 | super(ContextBlock, self).__init__()
70 | valid_fusion_types = ['channel_add', 'channel_mul']
71 | assert pooling_type in ['avg', 'att']
72 | assert isinstance(fusion_types, (list, tuple))
73 | assert all([f in valid_fusion_types for f in fusion_types])
74 | assert len(fusion_types) > 0, 'at least one fusion should be used'
75 |
76 | self.inplanes = inplanes
77 | self.ratio = ratio
78 | self.planes = int(inplanes * ratio)
79 | self.pooling_type = pooling_type
80 | self.fusion_types = fusion_types
81 |
82 | if pooling_type == 'att':
83 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
84 | self.softmax = nn.Softmax(dim=2)
85 | else:
86 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
87 | if 'channel_add' in fusion_types:
88 | self.channel_add_conv = nn.Sequential(
89 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
90 | nn.LayerNorm([self.planes, 1, 1]),
91 | nn.ReLU(inplace=True), # yapf: disable
92 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
93 | else:
94 | self.channel_add_conv = None
95 | if 'channel_mul' in fusion_types:
96 | self.channel_mul_conv = nn.Sequential(
97 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
98 | nn.LayerNorm([self.planes, 1, 1]),
99 | nn.ReLU(inplace=True), # yapf: disable
100 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
101 | else:
102 | self.channel_mul_conv = None
103 |
104 | def spatial_pool(self, x):
105 | batch, channel, height, width = x.size()
106 | if self.pooling_type == 'att':
107 | input_x = x
108 | # [N, C, H * W]
109 | input_x = input_x.view(batch, channel, height * width)
110 | # [N, 1, C, H * W]
111 | input_x = input_x.unsqueeze(1)
112 | # [N, 1, H, W]
113 | context_mask = self.conv_mask(x)
114 | # [N, 1, H * W]
115 | context_mask = context_mask.view(batch, 1, height * width)
116 | # [N, 1, H * W]
117 | context_mask = self.softmax(context_mask)
118 | # [N, 1, H * W, 1]
119 | context_mask = context_mask.unsqueeze(-1)
120 | # [N, 1, C, 1]
121 | context = torch.matmul(input_x, context_mask)
122 | # [N, C, 1, 1]
123 | context = context.view(batch, channel, 1, 1)
124 | else:
125 | # [N, C, 1, 1]
126 | context = self.avg_pool(x)
127 | return context
128 |
129 | def forward(self, x):
130 | # [N, C, 1, 1]
131 | context = self.spatial_pool(x)
132 | out = x
133 | if self.channel_mul_conv is not None:
134 | # [N, C, 1, 1]
135 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
136 | out = out * channel_mul_term
137 | if self.channel_add_conv is not None:
138 | # [N, C, 1, 1]
139 | channel_add_term = self.channel_add_conv(context)
140 | out = out + channel_add_term
141 | return out
142 |
143 | class SpatialCGNL(nn.Module):
144 | """Spatial CGNL block with dot production kernel for image classfication.
145 | """
146 | def __init__(self, inplanes, planes, use_scale=False, groups=None):
147 | self.use_scale = use_scale
148 | self.groups = groups
149 | super(SpatialCGNL, self).__init__()
150 | self.t = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
151 | self.p = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
152 | self.g = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
153 | self.z = nn.Conv2d(planes, inplanes, kernel_size=1, stride=1, groups=self.groups, bias=False)
154 | self.gn = nn.GroupNorm(num_groups=self.groups, num_channels=inplanes)
155 |
156 | def kernel(self, t, p, g, b, c, h, w):
157 | t = t.view(b, 1, c * h * w)
158 | p = p.view(b, 1, c * h * w)
159 | g = g.view(b, c * h * w, 1)
160 | att = torch.bmm(p, g)
161 | if self.use_scale:
162 | att = att.div((c*h*w)**0.5)
163 |
164 | x = torch.bmm(att, t)
165 | x = x.view(b, c, h, w)
166 | return x
167 |
168 | def forward(self, x):
169 | residual = x
170 | t = self.t(x)
171 | p = self.p(x)
172 | g = self.g(x)
173 | b, c, h, w = t.size()
174 | if self.groups and self.groups > 1:
175 | _c = int(c / self.groups)
176 | ts = torch.split(t, split_size_or_sections=_c, dim=1)
177 | ps = torch.split(p, split_size_or_sections=_c, dim=1)
178 | gs = torch.split(g, split_size_or_sections=_c, dim=1)
179 | _t_sequences = []
180 | for i in range(self.groups):
181 | _x = self.kernel(ts[i], ps[i], gs[i],
182 | b, _c, h, w)
183 | _t_sequences.append(_x)
184 | x = torch.cat(_t_sequences, dim=1)
185 | else:
186 | x = self.kernel(t, p, g,
187 | b, c, h, w)
188 | x = self.z(x)
189 | x = self.gn(x) + residual
190 | return x
191 |
--------------------------------------------------------------------------------
/model/layers/conv_blocks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from ..layers.convolutions import Convolutional
4 |
5 | class Residual_block(nn.Module):
6 | def __init__(self, filters_in, filters_out, filters_medium, norm="bn", activate="leaky"):
7 | super(Residual_block, self).__init__()
8 | self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_medium, kernel_size=1,
9 | stride=1, pad=0, norm=norm, activate=activate)
10 | self.__conv2 = Convolutional(filters_in=filters_medium, filters_out=filters_out, kernel_size=3,
11 | stride=1, pad=1, norm=norm, activate=activate)
12 |
13 | def forward(self, x):
14 | r = self.__conv1(x)
15 | r = self.__conv2(r)
16 | out = x + r
17 | return out
18 |
19 | class CSP_stage(nn.Module):
20 | def __init__(self, filters_in, n=1, activate="Swish"):
21 | super(CSP_stage, self).__init__()
22 | c_ = filters_in // 2 # hidden channels
23 | self.conv1 = Convolutional(filters_in=filters_in, filters_out=c_, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate)
24 | self.conv2 = Convolutional(filters_in=filters_in, filters_out=c_, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate)
25 | self.res_blocks = nn.Sequential(*[Residual_block(filters_in=c_, filters_out=c_, filters_medium=c_, norm="bn", activate=activate) for _ in range(n)])
26 | self.conv3 = Convolutional(filters_in=2 * c_, filters_out=filters_in, kernel_size=1, stride=1, pad=0, norm="bn", activate=activate)
27 |
28 | def forward(self, x):
29 | y1 = self.conv1(x)
30 | y2 = self.conv2(x)
31 | y2 = self.res_blocks(y2)
32 | return self.conv3(torch.cat([y2, y1], dim=1))
33 |
34 | class Residual_block_CSP(nn.Module):
35 | def __init__(self, filters_in):
36 | super(Residual_block_CSP, self).__init__()
37 | self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=1,
38 | stride=1, pad=0, norm="bn", activate="leaky")
39 | self.__conv2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
40 | stride=1, pad=1, norm="bn", activate="leaky")
41 |
42 | def forward(self, x):
43 | r = self.__conv1(x)
44 | r = self.__conv2(r)
45 | out = x + r
46 | return out
47 |
48 |
49 | class InvertedResidual_block(nn.Module):
50 | def __init__(self, inp, oup, stride, expand_ratio):
51 | super(InvertedResidual_block, self).__init__()
52 | self.__stride = stride
53 | hidden_dim = int(inp * expand_ratio)
54 | self.use_res_connect = self.__stride == 1 and inp==oup
55 | if expand_ratio==1:
56 | self.__conv = nn.Sequential(
57 | Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3,
58 | stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"),
59 | Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1,
60 | stride=1, pad=0, norm="bn")
61 | )
62 | else:
63 | self.__conv = nn.Sequential(
64 | Convolutional(filters_in=inp, filters_out=hidden_dim, kernel_size=1,
65 | stride=1, pad=0, norm="bn", activate="relu6"),
66 | Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3,
67 | stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"),
68 | Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1,
69 | stride=1, pad=0, norm="bn")
70 | )
71 |
72 | def forward(self, x):
73 | if self.use_res_connect:
74 | return x + self.__conv(x)
75 | else:
76 | return self.__conv(x)
77 |
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/model/layers/convolutions.py:
--------------------------------------------------------------------------------
1 | from .activations import *
2 | #from dcn_v2 import DCN
3 | #from modelR.layers.deform_conv_v2 import DeformConv2d
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import math
8 | #from dcn_v2 import DCN
9 |
10 | norm_name = {"bn": nn.BatchNorm2d}#
11 | activate_name = {
12 | "relu": nn.ReLU,
13 | "leaky": nn.LeakyReLU,
14 | "relu6": nn.ReLU6,
15 | "Mish": Mish,
16 | "Swish": Swish,
17 | "MEMish": MemoryEfficientMish,
18 | "MESwish": MemoryEfficientSwish,
19 | "FReLu": FReLU
20 | }
21 |
22 | class Convolutional(nn.Module):
23 | def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, dila=1, norm=None, activate=None):
24 | super(Convolutional, self).__init__()
25 | self.norm = norm
26 | self.activate = activate
27 | self.__conv = nn.Conv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size,
28 | stride=stride, padding=pad, bias=not norm, groups=groups, dilation=dila)
29 | if norm:
30 | assert norm in norm_name.keys()
31 | if norm == "bn":
32 | self.__norm = norm_name[norm](num_features=filters_out)
33 | if activate:
34 | assert activate in activate_name.keys()
35 | if activate == "leaky":
36 | self.__activate = nn.SiLU()#activate_name[activate](negative_slope=0.1, inplace=True)#
37 | if activate == "relu":
38 | self.__activate = activate_name[activate](inplace=True)
39 | if activate == "relu6":
40 | self.__activate = activate_name[activate](inplace=True)
41 | if activate == "Mish":
42 | self.__activate = Mish()
43 | if activate == "Swish":
44 | self.__activate = Swish()
45 | if activate == "MEMish":
46 | self.__activate = MemoryEfficientMish()
47 | if activate == "MESwish":
48 | self.__activate = MemoryEfficientSwish()
49 | if activate == "FReLu":
50 | self.__activate = FReLU()
51 | if activate == "SiLU":
52 | self.__activate = nn.SiLU()
53 |
54 | def forward(self, x):
55 | x = self.__conv(x)
56 | if self.norm:
57 | x = self.__norm(x)
58 | if self.activate:
59 | x = self.__activate(x)
60 | return x
61 |
62 |
63 | class Deformable_Convolutional(nn.Module):
64 | def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, norm=None, activate=None):
65 | super(Deformable_Convolutional, self).__init__()
66 | self.norm = norm
67 | self.activate = activate
68 | self.__dcn = DCN(filters_in, filters_out, kernel_size=kernel_size, stride=stride, padding=pad, deformable_groups=groups)
69 | if norm:
70 | assert norm in norm_name.keys()
71 | if norm == "bn":
72 | self.__norm = norm_name[norm](num_features=filters_out)
73 | if activate:
74 | assert activate in activate_name.keys()
75 | if activate == "leaky":
76 | self.__activate = activate_name[activate](negative_slope=0.1, inplace=True)
77 | if activate == "relu":
78 | self.__activate = activate_name[activate](inplace=True)
79 | if activate == "relu6":
80 | self.__activate = activate_name[activate](inplace=True)
81 | if activate == "Mish":
82 | self.__activate = Mish()
83 | if activate == "Swish":
84 | self.__activate = Swish()
85 | if activate == "MEMish":
86 | self.__activate = MemoryEfficientMish()
87 | if activate == "MESwish":
88 | self.__activate = MemoryEfficientSwish()
89 | if activate == "FReLu":
90 | self.__activate = FReLU()
91 |
92 | def forward(self, x):
93 | x = self.__dcn(x)
94 | if self.norm:
95 | x = self.__norm(x)
96 | if self.activate:
97 | x = self.__activate(x)
98 | return x
99 |
100 | class route_func(nn.Module):
101 | r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference
102 | https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf
103 | Args:
104 | c_in (int): Number of channels in the input image
105 | num_experts (int): Number of experts for mixture. Default: 1
106 | """
107 |
108 | def __init__(self, c_in, num_experts):
109 | super(route_func, self).__init__()
110 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
111 | self.fc = nn.Linear(c_in, num_experts)
112 | self.sigmoid = nn.Sigmoid()
113 |
114 | def forward(self, x):
115 | x = self.avgpool(x)
116 | x = x.view(x.size(0), -1)
117 | x = self.fc(x)
118 | x = self.sigmoid(x)
119 | return x
120 |
121 | class CondConv2d(nn.Module):
122 | def __init__(self, in_channels, out_channels, kernel_size,
123 | stride=1, padding=0, dilation=1, groups=1, bias=True,
124 | num_experts=1):
125 | super(CondConv2d, self).__init__()
126 |
127 | self.in_channels = in_channels
128 | self.out_channels = out_channels
129 | self.kernel_size = kernel_size
130 | self.stride = stride
131 | self.padding = padding
132 | self.dilation = dilation
133 | self.groups = groups
134 | self.num_experts = num_experts
135 |
136 | self.weight = nn.Parameter(torch.Tensor(num_experts, out_channels, in_channels // groups, kernel_size, kernel_size))
137 | if bias:
138 | self.bias = nn.Parameter(torch.Tensor(num_experts, out_channels))
139 | else:
140 | self.register_parameter('bias', None)
141 |
142 | nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
143 | if self.bias is not None:
144 | fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
145 | bound = 1 / math.sqrt(fan_in)
146 | nn.init.uniform_(self.bias, -bound, bound)
147 |
148 | def forward(self, x, routing_weight):
149 | b, c_in, h, w = x.size()
150 | k, c_out, c_in, kh, kw = self.weight.size()
151 | x = x.contiguous().view(1, -1, h, w)
152 | weight = self.weight.contiguous().view(k, -1)
153 |
154 | combined_weight = torch.mm(routing_weight, weight).view(-1, c_in, kh, kw)
155 |
156 | if self.bias is not None:
157 | combined_bias = torch.mm(routing_weight, self.bias).view(-1)
158 | output = F.conv2d(
159 | x, weight=combined_weight, bias=combined_bias, stride=self.stride, padding=self.padding,
160 | dilation=self.dilation, groups=self.groups * b)
161 | else:
162 | output = F.conv2d(
163 | x, weight=combined_weight, bias=None, stride=self.stride, padding=self.padding,
164 | dilation=self.dilation, groups=self.groups * b)
165 |
166 | output = output.view(b, c_out, output.size(-2), output.size(-1))
167 | return output
168 |
169 | class Cond_Convolutional(nn.Module):
170 | def __init__(self, filters_in, filters_out, kernel_size, stride=1, pad=0, dila=1, groups=1, bias=True, num_experts=1, norm=None, activate=None):
171 |
172 | super(Cond_Convolutional, self).__init__()
173 | self.norm = norm
174 | self.activate = activate
175 | self.__conv = CondConv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size,
176 | stride=stride, padding=pad, dilation=dila, groups=groups, bias=bias, num_experts=num_experts)
177 | self.__routef = route_func(filters_in, num_experts)
178 | if norm:
179 | assert norm in norm_name.keys()
180 | if norm == "bn":
181 | self.__norm = norm_name[norm](num_features=filters_out)
182 | if activate:
183 | assert activate in activate_name.keys()
184 | if activate == "leaky":
185 | self.__activate = activate_name[activate](negative_slope=0.1, inplace=True)
186 | if activate == "relu":
187 | self.__activate = activate_name[activate](inplace=True)
188 | if activate == "relu6":
189 | self.__activate = activate_name[activate](inplace=True)
190 | if activate == "Mish":
191 | self.__activate = Mish()
192 | if activate == "Swish":
193 | self.__activate = Swish()
194 | if activate == "MEMish":
195 | self.__activate = MemoryEfficientMish()
196 | if activate == "MESwish":
197 | self.__activate = MemoryEfficientSwish()
198 | if activate == "FReLu":
199 | self.__activate = FReLU()
200 |
201 | def forward(self, x):
202 | routef = self.__routef(x)
203 | x = self.__conv(x,routef)
204 | if self.norm:
205 | x = self.__norm(x)
206 | if self.activate:
207 | x = self.__activate(x)
208 | return x
209 |
210 |
--------------------------------------------------------------------------------
/model/layers/msr_blocks.py:
--------------------------------------------------------------------------------
1 | from dropblock import DropBlock2D, LinearScheduler
2 | from ..layers.convolutions import *
3 |
4 | class MSR_L(nn.Module):
5 | def __init__(self, filters_in):
6 | super(MSR_L, self).__init__()
7 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3,
8 | stride=1, pad=1, norm="bn", activate="leaky")
9 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1,
10 | stride=1, pad=0, norm="bn", activate="leaky")
11 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
12 | stride=1, pad=2, dila=2, norm="bn", activate="leaky")
13 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
14 | stride=1, pad=4, dila=4, norm="bn", activate="leaky")
15 | self.__dw3 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
16 | stride=1, pad=6, dila=6, norm="bn", activate="leaky")
17 | self.__pw1 = Convolutional(filters_in=filters_in*4, filters_out=filters_in, kernel_size=1,
18 | stride=1, pad=0, norm="bn", activate="Mish")
19 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0.,
20 | stop_value=0.1, nr_steps=5)
21 |
22 | def forward(self, x):
23 | dw0 = self.__dw0(x)
24 | dw0 = self.__drop(dw0)
25 | pw0 = self.__pw0(dw0)
26 | dw1 = self.__dw1(pw0)
27 | dw2 = self.__dw2(pw0)+dw1
28 | dw3 = self.__dw3(pw0)+dw2
29 | cat = torch.cat((pw0, dw1, dw2, dw3),1)
30 | pw1 = self.__pw1(cat)
31 | return pw1
32 |
33 | class MSR_M(nn.Module):
34 | def __init__(self, filters_in):
35 | super(MSR_M, self).__init__()
36 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3,
37 | stride=1, pad=1, norm="bn", activate="leaky")
38 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1,
39 | stride=1, pad=0, norm="bn", activate="leaky")
40 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
41 | stride=1, pad=2, dila=2, norm="bn", activate="leaky")
42 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
43 | stride=1, pad=4, dila=4, norm="bn", activate="leaky")
44 | self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1,
45 | stride=1, pad=0, norm="bn", activate="Mish")
46 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0.,
47 | stop_value=0.1, nr_steps=5)
48 |
49 | def forward(self, x):
50 | dw0 = self.__dw0(x)
51 | dw0 = self.__drop(dw0)
52 | pw0 = self.__pw0(dw0)
53 | dw1 = self.__dw1(pw0)
54 | dw2 = self.__dw2(pw0)+dw1
55 | cat = torch.cat((dw1, dw2),1)
56 | pw1 = self.__pw1(cat)
57 | return pw1
58 |
59 | class MSR_S(nn.Module):
60 | def __init__(self, filters_in):
61 | super(MSR_S, self).__init__()
62 | self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3,
63 | stride=1, pad=1, norm="bn", activate="leaky")
64 | self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1,
65 | stride=1, pad=0, norm="bn", activate="leaky")
66 | self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
67 | stride=1, pad=1, dila=1, norm="bn", activate="leaky")
68 | self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3,
69 | stride=1, pad=2, dila=2, norm="bn", activate="leaky")
70 | self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1,
71 | stride=1, pad=0, norm="bn", activate="Mish")
72 | self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0.,
73 | stop_value=0.1, nr_steps=5)
74 |
75 | def forward(self, x):
76 | dw0 = self.__dw0(x)
77 | dw0 = self.__drop(dw0)
78 | pw0 = self.__pw0(dw0)
79 | dw1 = self.__dw1(pw0)
80 | dw2 = self.__dw2(pw0)+dw1
81 | cat = torch.cat((dw1, dw2),1)
82 | pw1 = self.__pw1(cat)
83 | return pw1
84 |
--------------------------------------------------------------------------------
/model/layers/multiscale_fusion_blocks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from ..layers.convolutions import Convolutional, Deformable_Convolutional
5 |
6 | class SPP(nn.Module):
7 | def __init__(self, depth=512):
8 | super(SPP,self).__init__()
9 | self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
10 | self.__maxpool9 = nn.MaxPool2d(kernel_size=9, stride=1, padding=4)
11 | self.__maxpool13 = nn.MaxPool2d(kernel_size=13, stride=1, padding=6)
12 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1)
13 |
14 | def forward(self, x):
15 | maxpool5 = self.__maxpool5(x)
16 | maxpool9 = self.__maxpool9(x)
17 | maxpool13 = self.__maxpool13(x)
18 | cat_maxpool = torch.cat([x, maxpool5, maxpool9, maxpool13], dim=1)
19 | SPP = self.__outconv(cat_maxpool)
20 | return SPP
21 |
22 | class SPPF(nn.Module):
23 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
24 | def __init__(self, depth=512):
25 | super(SPPF, self).__init__()
26 | self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
27 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1)
28 | def forward(self, x):
29 | maxpool5 = self.__maxpool5(x)
30 | maxpool9 = self.__maxpool5(maxpool5)
31 | cat_maxpool = torch.cat([x, maxpool5, maxpool9, self.__maxpool5(maxpool9)], dim=1)
32 | SPPF = self.__outconv(cat_maxpool)
33 | return SPPF
34 |
35 | class ASPP(nn.Module):
36 | def __init__(self, in_channel=1280, depth=512):
37 | super(ASPP,self).__init__()
38 | self.__dilaconv1 = nn.Conv2d(in_channel, depth, 1, 1)
39 | self.__dilaconv5 = nn.Conv2d(in_channel, depth, 3, 1, padding=2, dilation=2)
40 | self.__dilaconv9 = nn.Conv2d(in_channel, depth, 3, 1, padding=4, dilation=4)
41 | self.__dilaconv13 = nn.Conv2d(in_channel, depth, 3, 1, padding=6, dilation=6)
42 | self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1)
43 |
44 | def forward(self, x):
45 | dilaconv1 = self.__dilaconv1(x)
46 | dilaconv5 = self.__dilaconv5(x)
47 | dilaconv9 = self.__dilaconv9(x)
48 | dilaconv13 = self.__dilaconv13(x)
49 | cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1)
50 | ASPP = self.__outconv(cat_dilaconv)
51 | return ASPP
52 |
53 | class ASFF(nn.Module):
54 | def __init__(self, level, vis=False):
55 | super(ASFF, self).__init__()
56 | self.level = level
57 | self.dim = [512,256,128]
58 | self.inter_dim = self.dim[self.level]
59 | if level == 0:
60 | self.stride_level_1 = Convolutional(256, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6')
61 | self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6')
62 | self.expand = Convolutional(self.inter_dim, 1024, 3, 1, pad=1, norm='bn', activate='relu6')
63 | elif level == 1:
64 | self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6')
65 | self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6')
66 | self.expand = Convolutional(self.inter_dim, 512, 3, 1, pad=1, norm='bn', activate='relu6')
67 | elif level == 2:
68 | self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6')
69 | self.compress_level_1 = Convolutional(256, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6')
70 | self.expand = Convolutional(self.inter_dim, 256, 3, 1, pad=1, norm='bn', activate='relu6')
71 | compress_c = 16
72 | self.weight_level_0 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6')
73 | self.weight_level_1 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6')
74 | self.weight_level_2 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6')
75 | self.weight_levels = nn.Conv2d(compress_c * 3, 3, kernel_size=1, stride=1, padding=0)
76 | self.vis = vis
77 |
78 | def forward(self, x_level_0, x_level_1, x_level_2):
79 | if self.level == 0:
80 | level_0_resized = x_level_0
81 | level_1_resized = self.stride_level_1(x_level_1)
82 | level_2_downsampled_inter = F.max_pool2d(x_level_2, 3, stride=2, padding=1)
83 | level_2_resized = self.stride_level_2(level_2_downsampled_inter)
84 | elif self.level == 1:
85 | level_0_compressed = self.compress_level_0(x_level_0)
86 | level_0_resized = F.interpolate(level_0_compressed, scale_factor=2, mode='nearest')
87 | level_1_resized = x_level_1
88 | level_2_resized = self.stride_level_2(x_level_2)
89 | elif self.level == 2:
90 | level_0_compressed = self.compress_level_0(x_level_0)
91 | level_0_resized = F.interpolate(level_0_compressed, scale_factor=4, mode='nearest')
92 | level_1_compressed = self.compress_level_1(x_level_1)
93 | level_1_resized = F.interpolate(level_1_compressed, scale_factor=2, mode='nearest')
94 | level_2_resized = x_level_2
95 |
96 | level_0_weight_v = self.weight_level_0(level_0_resized)
97 | level_1_weight_v = self.weight_level_1(level_1_resized)
98 | level_2_weight_v = self.weight_level_2(level_2_resized)
99 | levels_weight_v = torch.cat((level_0_weight_v, level_1_weight_v, level_2_weight_v), 1)
100 | levels_weight = self.weight_levels(levels_weight_v)
101 | levels_weight = F.softmax(levels_weight, dim=1)
102 |
103 | fused_out_reduced = level_0_resized * levels_weight[:, 0:1, :, :] + \
104 | level_1_resized * levels_weight[:, 1:2, :, :] + \
105 | level_2_resized * levels_weight[:, 2:, :, :]
106 |
107 | out = self.expand(fused_out_reduced)
108 |
109 | if self.vis:
110 | return out, levels_weight, fused_out_reduced.sum(dim=1)
111 | else:
112 | return out
113 |
114 | class FeatureAdaption(nn.Module):
115 | def __init__(self, in_ch, out_ch, n_anchors):
116 | super(FeatureAdaption, self).__init__()
117 | self.sep=False
118 | self.conv_offset = nn.Conv2d(in_channels=2*n_anchors, out_channels=2*9*n_anchors, groups = n_anchors, kernel_size=1,stride=1,padding=0)
119 | self.dconv = Deformable_Convolutional(filters_in=in_ch, filters_out=out_ch, kernel_size=3, stride=1, pad=1, groups=n_anchors)
120 |
121 | def forward(self, input, wh_pred):
122 | wh_pred_new = wh_pred.detach()
123 | offset = self.conv_offset(wh_pred_new)
124 | out = self.dconv(input, offset)
125 | return out
126 |
127 | class Features_Fusion(nn.Module):
128 | def __init__(self, in_channels, out_channels, r=16):
129 | super(Features_Fusion,self).__init__()
130 | self.out_channels = out_channels
131 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
132 | self.conv_fc1 = Convolutional(in_channels, in_channels // r, kernel_size=1, stride=1, pad=0, norm='bn', activate='leaky')
133 | self.conv_fc2 = nn.Conv2d(in_channels // r, out_channels * 2, kernel_size=1, padding=0, bias=False)
134 | self.softmax = nn.Softmax(dim=2)
135 |
136 |
137 | def forward(self, x1, x2):
138 | batch_size = x1.size(0)
139 | x_mix = torch.add(x1,x2) # 逐元素相加生成 混合特征U
140 | x_avg = self.avg_pool(x_mix)
141 | x_fcout = self.conv_fc2(self.conv_fc1(x_avg)) # 先降维,后升维,结果中前一半通道值为a,后一半为b
142 | x_reshape = x_fcout.reshape(batch_size, self.out_channels, 2, -1) # 调整形状,变为两个全连接层的值
143 | x_softmax = self.softmax(x_reshape) # 使得两个全连接层对应位置进行softmax
144 | w1 = x_softmax[:, :, 0:1,:] #将tensor按照指定维度切分成2个tensor块
145 | w2 = x_softmax[:, :, 1:2,:]
146 | out = x1*w1 + x2*w2 # 两个加权后的特征 逐元素相加
147 | return out
--------------------------------------------------------------------------------
/model/loss/__pycache__/loss4.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss4.cpython-38.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/loss4.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss4.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/loss6.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/loss6.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2.cpython-38.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv2single.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv2single.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv8.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv8.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv88.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv88.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/lossv9.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/lossv9.cpython-39.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/seesaw_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/seesaw_loss.cpython-38.pyc
--------------------------------------------------------------------------------
/model/loss/__pycache__/seesaw_loss.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/loss/__pycache__/seesaw_loss.cpython-39.pyc
--------------------------------------------------------------------------------
/model/neck/__pycache__/neckv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv2.cpython-38.pyc
--------------------------------------------------------------------------------
/model/neck/__pycache__/neckv2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv2.cpython-39.pyc
--------------------------------------------------------------------------------
/model/neck/__pycache__/neckv8.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/model/neck/__pycache__/neckv8.cpython-39.pyc
--------------------------------------------------------------------------------
/predictionR/lr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/predictionR/lr.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython==0.29.24
2 | dropblock==0.3.0
3 | einops==0.3.2
4 | imgaug==0.4.0
5 | matplotlib==3.4.3
6 | opencv-python==4.5.3.56
7 | pycocotools==2.0.2
8 | scikit-image==0.18.3
9 | scikit-learn==1.0
10 | scipy==1.7.1
11 | Shapely==1.7.1
12 | tensorboardX==2.4
13 | thop==0.0.31.post2005241907
14 | timm==0.4.12
15 | tqdm==4.62.3
16 | prefetch_generator==1.0.1
17 | rich
18 | tensorboard
19 | nvitop
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import time
4 | import logging
5 | import config.config as cfg
6 | import utils.gpu as gpu
7 | from utils.log import Logger
8 | from model.TSConv import GGHL
9 | from evalR.evaluatorTSplot import Evaluator
10 | from tensorboardX import SummaryWriter
11 |
12 | class Tester(object):
13 | def __init__(self, weight_path=None, gpu_id=0, eval=False):
14 | self.img_size = cfg.TEST["TEST_IMG_SIZE"]
15 | self.__num_class = cfg.DATA["NUM"]
16 | self.__device = gpu.select_device(gpu_id, force_cpu=False)
17 | self.__eval = eval
18 | self.__model = GGHL().to(self.__device)
19 | self.__load_model_weights(weight_path)
20 |
21 | def __load_model_weights(self, weight_path):
22 | print("loading weight file from : {}".format(weight_path))
23 | weight = os.path.join(weight_path)
24 | chkpt = torch.load(weight, map_location=self.__device)
25 | self.__model.load_state_dict(chkpt['model']) #['model']
26 | del chkpt
27 |
28 | def test(self):
29 | global logger
30 | logger.info("***********Start Evaluation****************")
31 | mAP = 0
32 | if self.__eval and cfg.TEST["EVAL_TYPE"] == 'VOC':
33 | with torch.no_grad():
34 | start = time.time()
35 | APs, _, _, inference_time = Evaluator(self.__model).APs_voc()
36 | end = time.time()
37 | logger.info("Test cost time:{:.4f}s".format(end - start))
38 | for i in APs:
39 | print("{} --> AP : {}".format(i, APs[i]))
40 | mAP += APs[i]
41 | mAP = mAP / self.__num_class
42 | logger.info('mAP:{}'.format(mAP))
43 | logger.info("inference time: {:.2f} ms".format(inference_time))
44 | writer.add_scalar('test/VOCmAP', mAP)
45 |
46 | if __name__ == "__main__":
47 | global logger
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument('--weight_path', type=str, default='./weight/best.pt', help='weight file path')
50 | parser.add_argument('--log_val_path', type=str, default='log/', help='weight file path')
51 | parser.add_argument('--eval', action='store_true', default=True, help='eval flag')
52 | parser.add_argument('--gpu_id', type=int, default=1, help='gpu id')
53 | parser.add_argument('--log_path', type=str, default='log/', help='log path')
54 | opt = parser.parse_args()
55 | writer = SummaryWriter(logdir=opt.log_path + '/event')
56 | logger = Logger(log_file_name=opt.log_val_path + '/log_coco_test.txt', log_level=logging.DEBUG,
57 | logger_name='GGHL').get_log()
58 | Tester(weight_path=opt.weight_path, gpu_id=opt.gpu_id, eval=opt.eval).test()
--------------------------------------------------------------------------------
/train_dist.sh:
--------------------------------------------------------------------------------
1 | MKL_NUM_THREADS=16 OMP_NUM_THREADS=16 torchrun \
2 | --standalone \
3 | --nnodes=1 \
4 | --nproc_per_node=2 \
5 | trainv2.py
6 |
--------------------------------------------------------------------------------
/utils/__pycache__/cosine_lr_scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/cosine_lr_scheduler.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/cosine_lr_scheduler.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/cosine_lr_scheduler.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/gpu.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/gpu.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/gpu.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/gpu.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/log.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/log.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/log.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/log.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_basic.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_basic.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_basic.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_basic.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_coco.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_coco.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_coco.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/utils_coco.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/visualize.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/visualize.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/visualize.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shank2358/TS-Conv/132d71f41ed0865742ddcf14235dd07881357dc9/utils/__pycache__/visualize.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/cosine_lr_scheduler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class CosineDecayLR(object):
4 | def __init__(self, optimizer, T_max, lr_init, lr_min=0., warmup=0):
5 | """
6 | a cosine decay scheduler about steps, not epochs.
7 | :param optimizer: ex. optim.SGD
8 | :param T_max: max steps, and steps=epochs * batches
9 | :param lr_max: lr_max is init lr.
10 | :param warmup: in the training begin, the lr is smoothly increase from 0 to lr_init, which means "warmup",
11 | this means warmup steps, if 0 that means don't use lr warmup.
12 | """
13 | super(CosineDecayLR, self).__init__()
14 | self.__optimizer = optimizer
15 | self.__T_max = T_max
16 | self.__lr_min = lr_min
17 | self.__lr_max = lr_init
18 | self.__warmup = warmup
19 |
20 |
21 | def step(self, t):
22 | if self.__warmup and t < self.__warmup:
23 | lr = self.__lr_max / self.__warmup * t
24 | else:
25 | T_max = self.__T_max - self.__warmup
26 | t = t - self.__warmup
27 | lr = self.__lr_min + 0.5 * (self.__lr_max - self.__lr_min) * (1 + np.cos(t/T_max * np.pi))
28 | for param_group in self.__optimizer.param_groups:
29 | param_group["lr"] = lr
30 |
31 |
32 | if __name__ == '__main__':
33 | import matplotlib.pyplot as plt
34 | from matplotlib.ticker import FuncFormatter
35 | import math
36 | from modelR.GGHL import GGHL
37 | import torch.optim as optim
38 | import config.config as cfg
39 |
40 | net = GGHL()
41 |
42 | optimizer = optim.SGD(net.parameters(), cfg.TRAIN["LR_INIT"], cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"])
43 | #optimizer = optim.Adam(net.parameters(), lr = cfg.TRAIN["LR_INIT"])
44 |
45 | scheduler = CosineDecayLR(optimizer, math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])*cfg.TRAIN["TRAIN_IMG_NUM"],
46 | cfg.TRAIN["LR_INIT"], cfg.TRAIN["LR_END"], cfg.TRAIN["WARMUP_EPOCHS"]/cfg.TRAIN["BATCH_SIZE"]*cfg.TRAIN["TRAIN_IMG_NUM"])
47 |
48 |
49 | y = []
50 | for t in range(math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])):
51 | for i in range(cfg.TRAIN["TRAIN_IMG_NUM"]):
52 | scheduler.step(cfg.TRAIN["TRAIN_IMG_NUM"]*t+i)
53 | y.append(optimizer.param_groups[0]['lr'])
54 |
55 | print(y)
56 | plt.figure()
57 | plt.plot(y, label='LambdaLR')
58 | plt.xlabel('steps')
59 | plt.ylabel('LR')
60 | plt.tight_layout()
61 | plt.savefig("../predictionR/lr.png", dpi=600)
62 | plt.show()
--------------------------------------------------------------------------------
/utils/gpu.py:
--------------------------------------------------------------------------------
1 | from contextlib import contextmanager
2 | import torch
3 | import torch.backends.cudnn as cudnn
4 | import torch.distributed as dist
5 | import random
6 | import numpy as np
7 | import os
8 |
9 | def init_seeds(seed=0):
10 | # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
11 | # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible
12 | import torch.backends.cudnn as cudnn
13 | random.seed(seed)
14 | np.random.seed(seed)
15 | torch.manual_seed(seed)
16 | cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
17 |
18 | @contextmanager
19 | def torch_distributed_zero_first(local_rank: int):
20 | """
21 | Decorator to make all processes in distributed training wait for each local_master to do something.
22 | """
23 | if local_rank not in [-1, 0]:
24 | dist.barrier(device_ids=[local_rank])
25 | yield
26 | if local_rank == 0:
27 | dist.barrier(device_ids=[0])
28 |
29 |
30 | def select_device(id, force_cpu=False):
31 | cuda = False if force_cpu else torch.cuda.is_available()
32 | cudnn.benchmark = True
33 | device = torch.device('cuda:{}'.format(id) if cuda else 'cpu')
34 | #device = torch.cuda.set_device(0 if cuda else 'cpu')
35 | if not cuda:
36 | print('Using CPU')
37 | if cuda:
38 | #device = torch.cuda.set_device(id)
39 | c = 1024 ** 2 # bytes to MB
40 | ng = torch.cuda.device_count()
41 | x = [torch.cuda.get_device_properties(i) for i in range(ng)]
42 | print("Using CUDA device0 _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
43 | (x[0].name, x[0].total_memory / c))
44 | if ng > 0:
45 | # torch.cuda.set_device(0) # OPTIONAL: Set GPU ID
46 | for i in range(1, ng):
47 | print(" device%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
48 | (i, x[i].name, x[i].total_memory / c))
49 |
50 | return device
51 |
52 | def select_device_v5(device='', batch_size=None):
53 | # device = 'cpu' or '0' or '0,1,2,3'
54 | device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0'
55 | cpu = device == 'cpu'
56 | if cpu:
57 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
58 | elif device: # non-cpu device requested
59 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
60 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability
61 |
62 | cuda = not cpu and torch.cuda.is_available()
63 | if cuda:
64 | devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
65 | n = len(devices) # device count
66 | if n > 1 and batch_size: # check batch_size is divisible by device_count
67 | pass
68 | for i, d in enumerate(devices):
69 | p = torch.cuda.get_device_properties(i)
70 | else:
71 | pass
72 |
73 | return torch.device('cuda:0' if cuda else 'cpu')
--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from rich.logging import RichHandler
3 | from logging.config import dictConfig
4 |
5 | class NewRichHandler(RichHandler):
6 | KEYWORDS = {
7 | 'size',
8 | 'Epoch',
9 | 'Batch',
10 | 'Img',
11 | 'Loss',
12 | 'fg',
13 | 'bg',
14 | 'pos',
15 | 'neg',
16 | 'iou',
17 | 'cls',
18 | 'Loss_S',
19 | 'Loss_R',
20 | 'Loss_L',
21 | 'LR',
22 | '_'
23 | }
24 |
25 | class Logger(object):
26 | def __init__(self,log_file_name,log_level,logger_name):
27 | # firstly, create a logger
28 | self.__logger = logging.getLogger(logger_name)
29 | self.__logger.setLevel(log_level)
30 | # secondly, create a handler
31 | file_handler = logging.FileHandler(log_file_name)
32 | console_handler = NewRichHandler(rich_tracebacks=True, tracebacks_show_locals=True)
33 | # thirdly, define the output form of handler
34 | formatter = logging.Formatter(
35 | '[%(asctime)s]-[%(filename)s line:%(lineno)d]:%(message)s'
36 | )
37 | rich_formatter = logging.Formatter("%(message)s")
38 | file_handler.setFormatter(formatter)
39 | console_handler.setFormatter(rich_formatter)
40 | # finally, add the Hander to logger
41 | self.__logger.addHandler(file_handler)
42 | self.__logger.addHandler(console_handler)
43 |
44 | def get_log(self):
45 | return self.__logger
46 |
47 | if __name__ == "__main__":
48 | logger = Logger('./log.txt', logging.DEBUG, 'demo').get_log()
49 | logger.info('hello')
--------------------------------------------------------------------------------
/utils/num_of_works_set.py:
--------------------------------------------------------------------------------
1 | import time
2 | import torch.utils.data as d
3 | import torchvision
4 | import torchvision.transforms as transforms
5 |
6 | if __name__ == '__main__':
7 | BATCH_SIZE = 100
8 | transform = transforms.Compose([transforms.ToTensor(),
9 | transforms.Normalize((0.5,), (0.5,))])
10 | train_set = torchvision.datasets.MNIST('\mnist', download=True, train=True, transform=transform)
11 |
12 | # data loaders
13 | train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
14 |
15 | for num_workers in range(20):
16 | train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers)
17 | # training ...
18 | start = time.time()
19 | for epoch in range(1):
20 | for step, (batch_x, batch_y) in enumerate(train_loader):
21 | pass
22 | end = time.time()
23 | print('num_workers is {} and it took {} seconds'.format(num_workers, end - start))
--------------------------------------------------------------------------------
/utils/utils_coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import torch
3 | import numpy as np
4 | import cv2
5 |
6 | def nms(bbox, thresh, score=None, limit=None):
7 | """Suppress bounding boxes according to their IoUs and confidence scores.
8 | Args:
9 | bbox (array): Bounding boxes to be transformed. The shape is
10 | :math:`(R, 4)`. :math:`R` is the number of bounding boxes.
11 | thresh (float): Threshold of IoUs.
12 | score (array): An array of confidences whose shape is :math:`(R,)`.
13 | limit (int): The upper bound of the number of the output bounding
14 | boxes. If it is not specified, this method selects as many
15 | bounding boxes as possible.
16 | Returns:
17 | array:
18 | An array with indices of bounding boxes that are selected. \
19 | They are sorted by the scores of bounding boxes in descending \
20 | order. \
21 | The shape of this array is :math:`(K,)` and its dtype is\
22 | :obj:`numpy.int32`. Note that :math:`K \\leq R`.
23 |
24 | from: https://github.com/chainer/chainercv
25 | """
26 | if len(bbox) == 0:
27 | return np.zeros((0,), dtype=np.int32)
28 |
29 | if score is not None:
30 | order = score.argsort()[::-1]
31 | bbox = bbox[order]
32 | bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)
33 |
34 | selec = np.zeros(bbox.shape[0], dtype=bool)
35 | for i, b in enumerate(bbox):
36 | tl = np.maximum(b[:2], bbox[selec, :2])
37 | br = np.minimum(b[2:], bbox[selec, 2:])
38 | area = np.prod(br - tl, axis=1) * (tl < br).all(axis=1)
39 |
40 | iou = area / (bbox_area[i] + bbox_area[selec] - area)
41 | if (iou >= thresh).any():
42 | continue
43 |
44 | selec[i] = True
45 | if limit is not None and np.count_nonzero(selec) >= limit:
46 | break
47 |
48 | selec = np.where(selec)[0]
49 | if score is not None:
50 | selec = order[selec]
51 | return selec.astype(np.int32)
52 |
53 |
54 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
55 |
56 | box_corner = prediction.new(prediction.shape)
57 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
58 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
59 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
60 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
61 | prediction[:, :, :4] = box_corner[:, :, :4]
62 |
63 | output = [None for _ in range(len(prediction))]
64 | for i, image_pred in enumerate(prediction):
65 | # Filter out confidence scores below threshold
66 | class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1)
67 | class_pred = class_pred[0]
68 | conf_mask = (image_pred[:, 4] * class_pred >= conf_thre).squeeze()
69 | image_pred = image_pred[conf_mask]
70 |
71 | # If none are remaining => process next image
72 | if not image_pred.size(0):
73 | continue
74 | # Get detections with higher confidence scores than the threshold
75 | ind = (image_pred[:, 5:] * image_pred[:, 4][:, None] >= conf_thre).nonzero()
76 | # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
77 | detections = torch.cat((
78 | image_pred[ind[:, 0], :5],
79 | image_pred[ind[:, 0], 5 + ind[:, 1]].unsqueeze(1),
80 | ind[:, 1].float().unsqueeze(1)
81 | ), 1)
82 | # Iterate through all predicted classes
83 | unique_labels = detections[:, -1].cpu().unique()
84 | if prediction.is_cuda:
85 | unique_labels = unique_labels.cuda()
86 | for c in unique_labels:
87 | # Get the detections with the particular class
88 | detections_class = detections[detections[:, -1] == c]
89 | nms_in = detections_class.cpu().numpy()
90 | nms_out_index = nms(
91 | nms_in[:, :4], nms_thre, score=nms_in[:, 4]*nms_in[:, 5])
92 | detections_class = detections_class[nms_out_index]
93 | if output[i] is None:
94 | output[i] = detections_class
95 | else:
96 | output[i] = torch.cat((output[i], detections_class))
97 |
98 | return output
99 |
100 |
101 | def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
102 | if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
103 | raise IndexError
104 |
105 | # top left
106 | if xyxy:
107 | tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
108 | # bottom right
109 | br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
110 | area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
111 | area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
112 | else:
113 | tl = torch.max((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
114 | (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))
115 | # bottom right
116 | br = torch.min((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
117 | (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))
118 |
119 | area_a = torch.prod(bboxes_a[:, 2:], 1)
120 | area_b = torch.prod(bboxes_b[:, 2:], 1)
121 | en = (tl < br).type(tl.type()).prod(dim=2)
122 | area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
123 | return area_i / (area_a[:, None] + area_b - area_i)
124 |
125 |
126 | def label2box(labels, info_img, maxsize, lrflip):
127 |
128 | h, w, nh, nw, dx, dy = info_img
129 | x1 = labels[:, 1] / w
130 | y1 = labels[:, 2] / h
131 | x2 = (labels[:, 1] + labels[:, 3]) / w
132 | y2 = (labels[:, 2] + labels[:, 4]) / h
133 | labels[:, 1] = (((x1 + x2) / 2) * nw + dx) / maxsize
134 | labels[:, 2] = (((y1 + y2) / 2) * nh + dy) / maxsize
135 | labels[:, 3] = labels[:, 3] * nw / w / maxsize
136 | labels[:, 4] = labels[:, 4] * nh / h / maxsize
137 | if lrflip:
138 | labels[:, 1] = 1 - labels[:, 1]
139 | return labels
140 |
141 |
142 | def box2label(box, info_img):
143 |
144 | h, w, nh, nw, dx, dy = info_img
145 | y1, x1, y2, x2 = box
146 | box_h = ((y2 - y1) / nh) * h
147 | box_w = ((x2 - x1) / nw) * w
148 | y1 = ((y1 - dy) / nh) * h
149 | x1 = ((x1 - dx) / nw) * w
150 | label = [y1, x1, y1 + box_h, x1 + box_w]
151 | return label
152 |
153 |
154 | def preprocess(img, imgsize, jitter, random_placing=False):
155 | h, w, _ = img.shape
156 | img = img[:, :, ::-1]
157 | assert img is not None
158 |
159 | if jitter > 0:
160 | # add jitter
161 | dw = jitter * w
162 | dh = jitter * h
163 | new_ar = (w + np.random.uniform(low=-dw, high=dw))\
164 | / (h + np.random.uniform(low=-dh, high=dh))
165 | else:
166 | new_ar = w / h
167 |
168 | if new_ar < 1:
169 | nh = imgsize
170 | nw = nh * new_ar
171 | else:
172 | nw = imgsize
173 | nh = nw / new_ar
174 | nw, nh = int(nw), int(nh)
175 |
176 | if random_placing:
177 | dx = int(np.random.uniform(imgsize - nw))
178 | dy = int(np.random.uniform(imgsize - nh))
179 | else:
180 | dx = (imgsize - nw) // 2
181 | dy = (imgsize - nh) // 2
182 |
183 | img = cv2.resize(img, (nw, nh))
184 | sized = np.ones((imgsize, imgsize, 3), dtype=np.uint8) * 127
185 | sized[dy:dy+nh, dx:dx+nw, :] = img
186 |
187 | info_img = (h, w, nh, nw, dx, dy)
188 | return sized, info_img
189 |
190 | def rand_scale(s):
191 | """
192 | calculate random scaling factor
193 | Args:
194 | s (float): range of the random scale.
195 | Returns:
196 | random scaling factor (float) whose range is
197 | from 1 / s to s .
198 | """
199 | scale = np.random.uniform(low=1, high=s)
200 | if np.random.rand() > 0.5:
201 | return scale
202 | return 1 / scale
203 |
204 | def random_distort(img, hue, saturation, exposure):
205 | """
206 | perform random distortion in the HSV color space.
207 | Args:
208 | img (numpy.ndarray): input image whose shape is :math:`(H, W, C)`.
209 | Values range from 0 to 255.
210 | hue (float): random distortion parameter.
211 | saturation (float): random distortion parameter.
212 | exposure (float): random distortion parameter.
213 | Returns:
214 | img (numpy.ndarray)
215 | """
216 | dhue = np.random.uniform(low=-hue, high=hue)
217 | dsat = rand_scale(saturation)
218 | dexp = rand_scale(exposure)
219 |
220 | img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
221 | img = np.asarray(img, dtype=np.float32) / 255.
222 | img[:, :, 1] *= dsat
223 | img[:, :, 2] *= dexp
224 | H = img[:, :, 0] + dhue
225 |
226 | if dhue > 0:
227 | H[H > 1.0] -= 1.0
228 | else:
229 | H[H < 0.0] += 1.0
230 |
231 | img[:, :, 0] = H
232 | img = (img * 255).clip(0, 255).astype(np.uint8)
233 | img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
234 | img = np.asarray(img, dtype=np.float32)
235 |
236 | return img
237 |
238 |
239 | def get_coco_label_names():
240 | """
241 | COCO label names and correspondence between the model's class index and COCO class index.
242 | Returns:
243 | coco_label_names (tuple of str) : all the COCO label names including background class.
244 | coco_class_ids (list of int) : index of 80 classes that are used in 'instance' annotations
245 | coco_cls_colors (np.ndarray) : randomly generated color vectors used for box visualization
246 |
247 | """
248 | coco_label_names = ('background', # class zero
249 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
250 | 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign',
251 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
252 | 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella',
253 | 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
254 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
255 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass',
256 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
257 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
258 | 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk',
259 | 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
260 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book',
261 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
262 | )
263 | coco_class_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
264 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
265 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67,
266 | 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
267 |
268 | coco_cls_colors = np.random.randint(128, 255, size=(80, 3))
269 |
270 | return coco_label_names, coco_class_ids, coco_cls_colors
271 |
--------------------------------------------------------------------------------
/utils/visualize.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import numpy as np
3 | import PIL.Image as Image
4 | import PIL.ImageColor as ImageColor
5 | import PIL.ImageDraw as ImageDraw
6 | import PIL.ImageFont as ImageFont
7 |
8 | _TITLE_LEFT_MARGIN = 10
9 | _TITLE_TOP_MARGIN = 10
10 | STANDARD_COLORS = [
11 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
12 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
13 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
14 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
15 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
16 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
17 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
18 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
19 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
20 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
21 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
22 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
23 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
24 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
25 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
26 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
27 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
28 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
29 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
30 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
31 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
32 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
33 | 'WhiteSmoke', 'Yellow', 'YellowGreen'
34 | ]
35 |
36 | def visualize_boxes(image, boxes, labels, probs, class_labels):
37 |
38 | category_index = {}
39 | for id_, label_name in enumerate(class_labels):
40 | category_index[id_] = {"name": label_name}
41 | image=visualize_boxes_and_labels_on_image_array(image, boxes, labels, probs, category_index)
42 | return image
43 |
44 | def visualize_boxes_and_labels_on_image_array(
45 | image,
46 | boxes,
47 | classes,
48 | scores,
49 | category_index,
50 | instance_masks=None,
51 | instance_boundaries=None,
52 | use_normalized_coordinates=False,
53 | max_boxes_to_draw=200,
54 | min_score_thresh=.5,
55 | agnostic_mode=False,
56 | line_thickness=4,
57 | groundtruth_box_visualization_color='black',
58 | skip_scores=False,
59 | skip_labels=False):
60 |
61 | box_to_display_str_map = collections.defaultdict(list)
62 | box_to_color_map = collections.defaultdict(str)
63 | box_to_instance_masks_map = {}
64 | box_to_instance_boundaries_map = {}
65 | if not max_boxes_to_draw:
66 | max_boxes_to_draw = boxes.shape[0]
67 |
68 | sorted_ind = np.argsort(-scores)
69 | boxes=boxes[sorted_ind]
70 | scores=scores[sorted_ind]
71 | classes=classes[sorted_ind]
72 | for i in range(min(max_boxes_to_draw, boxes.shape[0])):
73 | if scores is None or scores[i] > min_score_thresh:
74 | box = tuple(boxes[i].tolist())
75 | if instance_masks is not None:
76 | box_to_instance_masks_map[box] = instance_masks[i]
77 | if instance_boundaries is not None:
78 | box_to_instance_boundaries_map[box] = instance_boundaries[i]
79 | if scores is None:
80 | box_to_color_map[box] = groundtruth_box_visualization_color
81 | else:
82 | display_str = ''
83 | if not skip_labels:
84 | if not agnostic_mode:
85 | if classes[i] in category_index.keys():
86 | class_name = category_index[classes[i]]['name']
87 | else:
88 | class_name = 'N/A'
89 | display_str = str(class_name)
90 | if not skip_scores:
91 | if not display_str:
92 | display_str = '{}%'.format(int(100 * scores[i]))
93 | else:
94 | display_str = '{}: {}%'.format(display_str, int(100 * scores[i]))
95 | box_to_display_str_map[box].append(display_str)
96 | if agnostic_mode:
97 | box_to_color_map[box] = 'DarkOrange'
98 | else:
99 | box_to_color_map[box] = STANDARD_COLORS[
100 | classes[i] % len(STANDARD_COLORS)]
101 |
102 | for box, color in box_to_color_map.items():
103 | xmin, ymin, xmax, ymax = box
104 | if instance_masks is not None:
105 | draw_mask_on_image_array(
106 | image,
107 | box_to_instance_masks_map[box],
108 | color=color
109 | )
110 | if instance_boundaries is not None:
111 | draw_mask_on_image_array(
112 | image,
113 | box_to_instance_boundaries_map[box],
114 | color='red',
115 | alpha=1.0
116 | )
117 | draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color=color,
118 | thickness=line_thickness, display_str_list=box_to_display_str_map[box],
119 | use_normalized_coordinates=use_normalized_coordinates)
120 | return image
121 |
122 |
123 | def draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color='red',
124 | thickness=4, display_str_list=(), use_normalized_coordinates=True):
125 |
126 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
127 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
128 | thickness, display_str_list,
129 | use_normalized_coordinates)
130 | np.copyto(image, np.array(image_pil))
131 |
132 |
133 | def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color='red', thickness=4, display_str_list=(), use_normalized_coordinates=True):
134 | draw = ImageDraw.Draw(image)
135 | im_width, im_height = image.size
136 | if use_normalized_coordinates:
137 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
138 | ymin * im_height, ymax * im_height)
139 | else:
140 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
141 | draw.line([(left, top), (left, bottom), (right, bottom),
142 | (right, top), (left, top)], width=thickness, fill=color)
143 | '''
144 | try:
145 | font = ImageFont.truetype('arial.ttf', 24)
146 | except IOError:
147 | font = ImageFont.load_default()
148 | display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
149 | total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
150 |
151 | if top > total_display_str_height:
152 | text_bottom = top
153 | else:
154 | text_bottom = bottom + total_display_str_height
155 |
156 | for display_str in display_str_list[::-1]:
157 | text_width, text_height = font.getsize(display_str)
158 | margin = np.ceil(0.05 * text_height)
159 | draw.rectangle(
160 | [(left, text_bottom - text_height - 2 * margin), (left + text_width,
161 | text_bottom)],
162 | fill=color)
163 | draw.text(
164 | (left + margin, text_bottom - text_height - margin),
165 | display_str,
166 | fill='black',
167 | font=font)
168 | text_bottom -= text_height - 2 * margin
169 | '''
170 |
171 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
172 |
173 | if image.dtype != np.uint8:
174 | raise ValueError('`image` not of type np.uint8')
175 | if mask.dtype != np.uint8:
176 | raise ValueError('`mask` not of type np.uint8')
177 | if np.any(np.logical_and(mask != 1, mask != 0)):
178 | raise ValueError('`mask` elements should be in [0, 1]')
179 | if image.shape[:2] != mask.shape:
180 | raise ValueError('The image has spatial dimensions %s but the mask has '
181 | 'dimensions %s' % (image.shape[:2], mask.shape))
182 | rgb = ImageColor.getrgb(color)
183 | pil_image = Image.fromarray(image)
184 | solid_color = np.expand_dims(
185 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
186 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
187 | pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L')
188 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
189 | np.copyto(image, np.array(pil_image.convert('RGB')))
--------------------------------------------------------------------------------