├── LICENSE ├── README.md ├── TorchCAM └── README.md ├── args.py ├── autoaug ├── README.md ├── archive.py ├── aug_transforms.py ├── augmentations.py ├── cutmix.py ├── cutout.py ├── data │ ├── cutmix.png │ ├── cutout.png │ ├── dropblock.png │ ├── image.png │ ├── mainfold_mixup.png │ ├── mixup.png │ ├── shakedrop.png │ └── stochdepth.png ├── dropblock │ ├── dropblock.py │ ├── resnet18_dropblock.py │ └── scheduler.py ├── label_smoothing.py ├── mixup.py ├── resnet18_manifold_mixup.py ├── resnet18_shakedrop.py ├── shakedrop.py └── stodepth.py ├── ckpts └── info.txt ├── dataloder.py ├── dataset └── info.txt ├── dataset_loder ├── info.txt ├── scoliosis_dataloder.py └── transformer.py ├── deployment ├── README.md ├── c++_libtorch │ ├── CMakeLists.txt │ ├── SaveTorchscriptModel.py │ └── main.cpp ├── flask_v1 │ ├── client.py │ ├── server.py │ └── transform.py └── flask_v2 │ ├── cfg.py │ ├── models │ ├── __init__.py │ ├── build_model.py │ ├── efficientnet_pytorch │ │ ├── __init__.py │ │ ├── model.py │ │ └── utils.py │ ├── resnext_wsl.py │ └── vision │ │ ├── __init__.py │ │ ├── _utils.py │ │ ├── alexnet.py │ │ ├── densenet.py │ │ ├── detection │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── _utils.cpython-36.pyc │ │ │ ├── _utils.cpython-37.pyc │ │ │ ├── backbone_utils.cpython-36.pyc │ │ │ ├── backbone_utils.cpython-37.pyc │ │ │ ├── faster_rcnn.cpython-36.pyc │ │ │ ├── faster_rcnn.cpython-37.pyc │ │ │ ├── generalized_rcnn.cpython-36.pyc │ │ │ ├── generalized_rcnn.cpython-37.pyc │ │ │ ├── image_list.cpython-36.pyc │ │ │ ├── image_list.cpython-37.pyc │ │ │ ├── keypoint_rcnn.cpython-36.pyc │ │ │ ├── keypoint_rcnn.cpython-37.pyc │ │ │ ├── mask_rcnn.cpython-36.pyc │ │ │ ├── mask_rcnn.cpython-37.pyc │ │ │ ├── roi_heads.cpython-36.pyc │ │ │ ├── roi_heads.cpython-37.pyc │ │ │ ├── rpn.cpython-36.pyc │ │ │ ├── rpn.cpython-37.pyc │ │ │ ├── transform.cpython-36.pyc │ │ │ └── transform.cpython-37.pyc │ │ ├── _utils.py │ │ ├── backbone_utils.py │ │ ├── faster_rcnn.py │ │ ├── generalized_rcnn.py │ │ ├── image_list.py │ │ ├── keypoint_rcnn.py │ │ ├── mask_rcnn.py │ │ ├── roi_heads.py │ │ ├── rpn.py │ │ └── transform.py │ │ ├── googlenet.py │ │ ├── inception.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ ├── segmentation │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── _utils.cpython-36.pyc │ │ │ ├── _utils.cpython-37.pyc │ │ │ ├── deeplabv3.cpython-36.pyc │ │ │ ├── deeplabv3.cpython-37.pyc │ │ │ ├── fcn.cpython-36.pyc │ │ │ ├── fcn.cpython-37.pyc │ │ │ ├── segmentation.cpython-36.pyc │ │ │ └── segmentation.cpython-37.pyc │ │ ├── _utils.py │ │ ├── deeplabv3.py │ │ ├── fcn.py │ │ └── segmentation.py │ │ ├── shufflenetv2.py │ │ ├── squeezenet.py │ │ ├── utils.py │ │ └── vgg.py │ ├── redis_db.py │ ├── server.py │ ├── streess_test.py │ └── utils.py ├── eval.py ├── metrics.py ├── models ├── Attention │ ├── A2Atttention.py │ ├── AFT.py │ ├── ANN.py │ ├── BAM.py │ ├── CBAM.py │ ├── CCNet.py │ ├── CoAtNet.py │ ├── CoTAttention.py │ ├── CoordAttention.py │ ├── DaNet.py │ ├── ECAAttention.py │ ├── EMSA.py │ ├── ExternalAttention.py │ ├── GlobalContextBlock.py │ ├── HaloAttention.py │ ├── MBConv.py │ ├── MUSEAttention.py │ ├── MobileViTAttention.py │ ├── NonLocalBlock.py │ ├── OutlookAttention.py │ ├── PSA.py │ ├── ParNetAttention.py │ ├── PolarizedSelfAttention.py │ ├── README.md │ ├── ResidualAttention.py │ ├── S2Attention.py │ ├── SEAttention.py │ ├── SEvariants.py │ ├── SGE.py │ ├── SKAttention.py │ ├── SelfAttention.py │ ├── ShuffleAttention.py │ ├── SimplifiedSelfAttention.py │ ├── TripletAttention.py │ ├── UFOAttention.py │ ├── ViP.py │ └── residual_attention_blocks.py ├── ClassicNetwork │ ├── AlexNet.py │ ├── DenseNet.py │ ├── InceptionV1.py │ ├── InceptionV2.py │ ├── InceptionV3.py │ ├── InceptionV4.py │ ├── Non-local_network.py │ ├── PAN.py │ ├── README.md │ ├── ResNeXt.py │ ├── ResNet.py │ ├── VGGNet.py │ ├── blocks │ │ ├── CBAM_blocks.py │ │ ├── DaNet.py │ │ ├── ECA_blocks.py │ │ ├── MFPA.py │ │ ├── NonLocalBlock.py │ │ ├── SE_block.py │ │ ├── SKNet.py │ │ ├── SK_block.py │ │ ├── STN_blocks.py │ │ ├── attention_block.py │ │ ├── conv_bn.py │ │ ├── dense_block.py │ │ ├── dpn_block.py │ │ ├── inception_blocks.py │ │ ├── non_local_concatenation.py │ │ ├── non_local_dot_product.py │ │ ├── non_local_embedded_gaussian.py │ │ ├── non_local_gaussian.py │ │ ├── residual_attention_blocks.py │ │ ├── residual_blocks.py │ │ ├── resnext_block.py │ │ ├── shuffle_block.py │ │ └── softmax_blocks.py │ ├── csp_darknet.py │ ├── csp_densenet.py │ ├── csp_resnext.py │ ├── darknet.py │ ├── dpn.py │ ├── efficientnet.py │ ├── efficientnet │ │ ├── __init__.py │ │ ├── model.py │ │ └── utils.py │ ├── googlenet.py │ ├── mnasnet.py │ └── transformer.py ├── FaceDetectorAndRecognition │ ├── FaceBoxes.py │ ├── LFFD.py │ ├── README.md │ └── VarGFaceNet.py ├── GAN │ ├── LICENSE │ ├── README.md │ ├── aae │ │ └── aae.py │ ├── acgan │ │ └── acgan.py │ ├── began │ │ └── began.py │ ├── bgan │ │ └── bgan.py │ ├── bicyclegan │ │ ├── bicyclegan.py │ │ ├── datasets.py │ │ └── models.py │ ├── ccgan │ │ ├── ccgan.py │ │ ├── datasets.py │ │ └── models.py │ ├── cgan │ │ └── cgan.py │ ├── cluster_gan │ │ └── clustergan.py │ ├── cogan │ │ ├── cogan.py │ │ └── mnistm.py │ ├── context_encoder │ │ ├── context_encoder.py │ │ ├── datasets.py │ │ └── models.py │ ├── cyclegan │ │ ├── cyclegan.py │ │ ├── datasets.py │ │ ├── models.py │ │ └── utils.py │ ├── dcgan │ │ └── dcgan.py │ ├── discogan │ │ ├── datasets.py │ │ ├── discogan.py │ │ └── models.py │ ├── dragan │ │ └── dragan.py │ ├── dualgan │ │ ├── datasets.py │ │ ├── dualgan.py │ │ └── models.py │ ├── ebgan │ │ └── ebgan.py │ ├── esrgan │ │ ├── datasets.py │ │ ├── esrgan.py │ │ ├── models.py │ │ └── test_on_image.py │ ├── gan │ │ └── gan.py │ ├── infogan │ │ └── infogan.py │ ├── lsgan │ │ └── lsgan.py │ ├── munit │ │ ├── datasets.py │ │ ├── models.py │ │ └── munit.py │ ├── pix2pix │ │ ├── datasets.py │ │ ├── models.py │ │ └── pix2pix.py │ ├── pixelda │ │ ├── mnistm.py │ │ └── pixelda.py │ ├── relativistic_gan │ │ └── relativistic_gan.py │ ├── sgan │ │ └── sgan.py │ ├── softmax_gan │ │ └── softmax_gan.py │ ├── srgan │ │ ├── datasets.py │ │ ├── models.py │ │ └── srgan.py │ ├── stargan │ │ ├── datasets.py │ │ ├── models.py │ │ └── stargan.py │ ├── unit │ │ ├── datasets.py │ │ ├── models.py │ │ └── unit.py │ ├── wgan │ │ └── wgan.py │ ├── wgan_div │ │ └── wgan_div.py │ └── wgan_gp │ │ └── wgan_gp.py ├── HumanPoseEstimation │ ├── Hourglass.py │ ├── LPN.py │ ├── README.md │ ├── SimpleBaseline.py │ └── context_block.py ├── InstanceSegmentation │ ├── PolarMask.py │ └── README.md ├── Lightweight │ ├── GhostNet.py │ ├── MixNet.py │ ├── MobileNetV1.py │ ├── MobileNetV2.py │ ├── MobileNetV3.py │ ├── MobileNetXt.py │ ├── README.md │ ├── ShuffleNet.py │ ├── ShuffleNetV2.py │ ├── SqueezeNet.py │ └── Xception.py ├── ObjectDetection │ ├── ASFF.py │ ├── CenterNet.py │ ├── CornerNet.py │ ├── FCOS.py │ ├── FPN.py │ ├── FisheyeMODNet.py │ ├── FoveaBox.py │ ├── README.md │ ├── RetinaNet.py │ ├── SSD.py │ ├── VoVNet.py │ ├── VoVNetV2.py │ ├── YOLO.py │ ├── YOLO_Nano.py │ ├── YOLOv2.py │ └── YOLOv3.py ├── Others │ ├── DynamicReLU.py │ └── PyramidalConvolution.py ├── README.md ├── SemanticSegmentation │ ├── DeeplabV3Plus.py │ ├── ENet.py │ ├── FCN.py │ ├── FastSCNN.py │ ├── FisheyeMODNet.py │ ├── ICNet.py │ ├── LEDnet.py │ ├── LRNnet.py │ ├── LWnet.py │ ├── README.md │ ├── SegNet.py │ └── Unet.py ├── Utils │ └── utils.py └── pretrainedmodels │ ├── __init__.py │ ├── datasets │ ├── __init__.py │ ├── utils.py │ └── voc.py │ ├── models │ ├── __init__.py │ ├── bninception.py │ ├── cafferesnet.py │ ├── dpn.py │ ├── fbresnet.py │ ├── fbresnet │ │ ├── resnet152_dump.lua │ │ └── resnet152_load.py │ ├── inceptionresnetv2.py │ ├── inceptionv4.py │ ├── nasnet.py │ ├── nasnet_mobile.py │ ├── pnasnet.py │ ├── polynet.py │ ├── resnext.py │ ├── resnext_features │ │ ├── __init__.py │ │ ├── resnext101_32x4d_features.py │ │ └── resnext101_64x4d_features.py │ ├── senet.py │ ├── torchvision_models.py │ ├── utils.py │ ├── vggm.py │ ├── wideresnet.py │ └── xception.py │ ├── utils.py │ └── version.py ├── pytorch_loss ├── README.md ├── __init__.py ├── affinity_loss.py ├── amsoftmax.py ├── conv_ops.py ├── dice_loss.py ├── dual_focal_loss.py ├── ema.py ├── focal_loss.py ├── focal_loss_old.py ├── frelu.py ├── generalized_iou_loss.py ├── hswish.py ├── info.txt ├── label_smooth.py ├── large_margin_softmax.py ├── lovasz_softmax.py ├── mish.py ├── ohem_loss.py ├── one_hot.py ├── pc_softmax.py ├── soft_dice_loss.py ├── swish.py ├── taylor_softmax.py ├── test.py └── triplet_loss.py ├── results ├── info.txt └── training.png ├── runs └── info.txt ├── tf_to_pytorch ├── README.md ├── convert_tf_to_pt │ ├── download.sh │ ├── load_tf_weights.py │ ├── load_tf_weights_tf1.py │ ├── original_tf │ │ ├── efficientnet_builder.py │ │ ├── efficientnet_model.py │ │ ├── eval_ckpt_main.py │ │ ├── eval_ckpt_main_tf1.py │ │ ├── preprocessing.py │ │ └── utils.py │ ├── rename.sh │ └── run.sh └── pretrained_tensorflow │ └── download.sh ├── train_baseline.py ├── utils ├── __init__.py ├── algorithm_utils.py ├── arg_utils.py ├── data_utils.py ├── devide_dataset.py └── preprocess.py └── visualization.py /args.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class data_config: 4 | model_name = "baseline" 5 | 6 | '''***********- dataset and directory-*************''' 7 | dataset='cifar100' 8 | input_size = 32 9 | num_class = 100 10 | data_path = './dataset' 11 | train_file='' 12 | val_file = '' 13 | test_file = '' 14 | MODEL_PATH = './ckpts/cifar100/resnet/' 15 | if not os.path.exists(MODEL_PATH): 16 | os.makedirs(MODEL_PATH) 17 | 18 | '''***********- Hyper Arguments-*************''' 19 | autoaug = 0 # Auto enhancement set to 1 20 | gpus=[0,1] #[1,2,3] 21 | WORKERS = 5 22 | tensorboard= False 23 | epochs = 200 24 | batch_size = 128 25 | delta =0.00001 26 | rand_seed=40 #Fixed seed greater than 0 27 | lr=0.1 28 | warm=1#warm up training phase 29 | optimizer = "torch.optim.SGD" 30 | optimizer_parm = {'lr': lr,'momentum':0.9, 'weight_decay':5e-4, 'nesterov':False} 31 | # optimizer = "torch.optim.AdamW" 32 | # optimizer_parm = {'lr': 0.001, 'weight_decay': 0.00001} 33 | #学习率:小的学习率收敛慢,但能将loss值降到更低。当使用平方和误差作为成本函数时,随着数据量的增多,学习率应该被设置为相应更小的值。adam一般0.001,sgd0.1,batchsize增大,学习率一般也要增大根号n倍 34 | #weight_decay:通常1e-4——1e-5,值越大表示正则化越强。数据集大、复杂,模型简单,调小;数据集小模型越复杂,调大。 35 | scheduler ="torch.optim.lr_scheduler.MultiStepLR" 36 | scheduler_parm ={'milestones':[60,120,160], 'gamma':0.2} 37 | # scheduler = "torch.optim.lr_scheduler.CosineAnnealingLR" 38 | # scheduler_parm = {'T_max': 200, 'eta_min': 1e-4} 39 | # scheduler = "torch.optim.lr_scheduler.StepLR" 40 | # scheduler_parm = {'step_size':1000,'gamma': 0.65} 41 | # scheduler = "torch.optim.lr_scheduler.ReduceLROnPlateau" 42 | # scheduler_parm = {'mode': 'min', 'factor': 0.8,'patience':10, 'verbose':True,'threshold':0.0001, 'threshold_mode':'rel', 'cooldown':2, 'min_lr':0, 'eps':1e-08} 43 | # scheduler = "torch.optim.lr_scheduler.ExponentialLR" 44 | # scheduler_parm = {'gamma': 0.1} 45 | loss_f ='torch.nn.CrossEntropyLoss' 46 | loss_dv = 'torch.nn.KLDivLoss' 47 | loss_fn = 'torch.nn.BCELoss' # loss_fn = 'torch.nn.BCEWithLogitsLoss' # loss_fn='torch.nn.MSELoss' 48 | fn_weight =[3.734438666137167, 1.0, 1.0, 1.0, 3.5203138607843196, 3.664049338245769, 3.734438666137167, 3.6917943287286734, 1.0, 3.7058695139403963, 1.0, 2.193419513003608, 3.720083373160097, 3.6917943287286734, 3.734438666137167, 1.0, 2.6778551377707998] 49 | 50 | 51 | -------------------------------------------------------------------------------- /autoaug/cutout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | Args: 8 | n_holes (int): Number of patches to cut out of each image. 9 | length (int): The length (in pixels) of each square patch. 10 | """ 11 | def __init__(self, n_holes=1, length=8): 12 | self.n_holes = n_holes 13 | self.length = length 14 | 15 | def __call__(self, img): 16 | """ 17 | Args: 18 | img (Tensor): Tensor image of size (C, H, W). 19 | Returns: 20 | Tensor: Image with n_holes of dimension length x length cut out of it. 21 | """ 22 | h = img.size(1) 23 | w = img.size(2) 24 | 25 | mask = np.ones((h, w), np.float32) 26 | 27 | for n in range(self.n_holes): 28 | y = np.random.randint(h) 29 | x = np.random.randint(w) 30 | 31 | y1 = np.clip(y - self.length // 2, 0, h) 32 | y2 = np.clip(y + self.length // 2, 0, h) 33 | x1 = np.clip(x - self.length // 2, 0, w) 34 | x2 = np.clip(x + self.length // 2, 0, w) 35 | 36 | mask[y1: y2, x1: x2] = 0. 37 | 38 | mask = torch.from_numpy(mask) 39 | mask = mask.expand_as(img).cuda() 40 | # mask.to(img.device) 41 | img = img * mask 42 | 43 | return img 44 | 45 | # if args.cutout: 46 | # train_transform.transforms.append(Cutout(n_holes=args.n_holes, length=args.length)) 47 | -------------------------------------------------------------------------------- /autoaug/data/cutmix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/cutmix.png -------------------------------------------------------------------------------- /autoaug/data/cutout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/cutout.png -------------------------------------------------------------------------------- /autoaug/data/dropblock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/dropblock.png -------------------------------------------------------------------------------- /autoaug/data/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/image.png -------------------------------------------------------------------------------- /autoaug/data/mainfold_mixup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/mainfold_mixup.png -------------------------------------------------------------------------------- /autoaug/data/mixup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/mixup.png -------------------------------------------------------------------------------- /autoaug/data/shakedrop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/shakedrop.png -------------------------------------------------------------------------------- /autoaug/data/stochdepth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/autoaug/data/stochdepth.png -------------------------------------------------------------------------------- /autoaug/dropblock/scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | 4 | 5 | class LinearScheduler(nn.Module): 6 | def __init__(self, dropblock, start_value, stop_value, nr_steps): 7 | super(LinearScheduler, self).__init__() 8 | self.dropblock = dropblock 9 | self.i = 0 10 | self.drop_values = np.linspace(start=start_value, stop=stop_value, num=int(nr_steps)) 11 | 12 | def forward(self, x): 13 | return self.dropblock(x) 14 | 15 | def step(self): 16 | if self.i < len(self.drop_values): 17 | self.dropblock.drop_prob = self.drop_values[self.i] 18 | 19 | self.i += 1 -------------------------------------------------------------------------------- /autoaug/label_smoothing.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class LabelSmoothingCrossEntropy(nn.Module): 5 | def __init__(self, eps=0.1, reduction='mean'): 6 | super(LabelSmoothingCrossEntropy, self).__init__() 7 | self.eps = eps 8 | self.reduction = reduction 9 | 10 | def forward(self, output, target): 11 | c = output.size()[-1] 12 | log_preds = F.log_softmax(output, dim=-1) 13 | if self.reduction=='sum': 14 | loss = -log_preds.sum() 15 | else: 16 | loss = -log_preds.sum(dim=-1) 17 | if self.reduction=='mean': 18 | loss = loss.mean() 19 | return loss*self.eps/c + (1-self.eps) * F.nll_loss(log_preds, target, reduction=self.reduction) -------------------------------------------------------------------------------- /autoaug/mixup.py: -------------------------------------------------------------------------------- 1 | 2 | def mixup_data(x, y, alpha=1.0, use_cuda=True): 3 | '''Returns mixed inputs, pairs of targets, and lambda''' 4 | if alpha > 0: 5 | lam = np.random.beta(alpha, alpha) 6 | else: 7 | lam = 1 8 | 9 | batch_size = x.size()[0] 10 | if use_cuda: 11 | index = torch.randperm(batch_size).cuda() 12 | else: 13 | index = torch.randperm(batch_size) 14 | 15 | mixed_x = lam * x + (1 - lam) * x[index, :] 16 | y_a, y_b = y, y[index] 17 | return mixed_x, y_a, y_b, lam 18 | 19 | 20 | def mixup_criterion(criterion, pred, y_a, y_b, lam): 21 | return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) -------------------------------------------------------------------------------- /autoaug/shakedrop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | 9 | class ShakeDropFunction(torch.autograd.Function): 10 | 11 | @staticmethod 12 | def forward(ctx, x, training=True, p_drop=0.5, alpha_range=[-1, 1]): 13 | if training: 14 | gate = torch.cuda.FloatTensor([0]).bernoulli_(1 - p_drop) 15 | # print(gate) 16 | ctx.save_for_backward(gate) 17 | 18 | if gate.item() == 0: 19 | alpha = torch.cuda.FloatTensor(x.size(0)).uniform_(*alpha_range) 20 | alpha = alpha.view(alpha.size(0), 1, 1, 1).expand_as(x) 21 | alpha=Variable(torch.zeros(1)).cuda() 22 | # print(gate) 23 | return alpha * x 24 | # return None 25 | else: 26 | return x 27 | else: 28 | return (1 - p_drop) * x 29 | 30 | @staticmethod 31 | def backward(ctx, grad_output): 32 | gate = ctx.saved_tensors[0] 33 | # print(gate) 34 | if gate.item() == 0: 35 | beta = torch.cuda.FloatTensor(grad_output.size(0)).uniform_(0, 1) 36 | beta = beta.view(beta.size(0), 1, 1, 1).expand_as(grad_output) 37 | beta = Variable(beta) 38 | return beta * grad_output, None, None, None 39 | else: 40 | return grad_output, None, None, None 41 | 42 | 43 | class ShakeDrop(nn.Module): 44 | 45 | def __init__(self, p_drop=0.5, alpha_range=[-1, 1]): 46 | super(ShakeDrop, self).__init__() 47 | self.p_drop = p_drop 48 | self.alpha_range = alpha_range 49 | 50 | 51 | def forward(self, x): 52 | return ShakeDropFunction.apply(x, self.training, self.p_drop, self.alpha_range) 53 | -------------------------------------------------------------------------------- /ckpts/info.txt: -------------------------------------------------------------------------------- 1 | directory to store checkpoints. -------------------------------------------------------------------------------- /dataset/info.txt: -------------------------------------------------------------------------------- 1 | dataset. 2 | -------------------------------------------------------------------------------- /dataset_loder/info.txt: -------------------------------------------------------------------------------- 1 | dataloder for different datasets. -------------------------------------------------------------------------------- /dataset_loder/scoliosis_dataloder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import math 4 | from PIL import Image 5 | from torch.utils.data import Dataset 6 | class ScoliosisDataset(Dataset): 7 | 8 | def __init__(self, data_dir, transform=None, target_transform=None, train=True): 9 | self.data_dir = data_dir 10 | self.transform = transform 11 | self.target_transform = target_transform 12 | self.train = train 13 | self.image_label = self.get_imageFiles() 14 | 15 | def get_imageFiles(self): 16 | labels_dict = {'Normal': 0, 'Mild': 1, 'Moderate': 2, 'Serious': 3} 17 | if self.train: 18 | image_path = os.path.join(self.data_dir, "train") 19 | else: 20 | image_path = os.path.join(self.data_dir, "test") 21 | labels = sorted(os.listdir(image_path)) 22 | 23 | image_list = [] 24 | for label in labels: 25 | image_lists = os.listdir(os.path.join(image_path, label)) 26 | for image_name in image_lists: 27 | if image_name: 28 | image_list.append((os.path.join(image_path, label, image_name), labels_dict[label])) 29 | return sorted(image_list) 30 | 31 | def normal_sampling(self,mean, label_k, std=1): 32 | return math.exp(-(label_k - mean) ** 2 / (2 * std ** 2)) / (math.sqrt(2 * math.pi) * std) 33 | 34 | def __getitem__(self, item): 35 | image_path, label = self.image_label[item] 36 | image = Image.open(image_path).convert('RGB') 37 | if self.train: 38 | if self.transform is not None: 39 | image = self.transform(image) 40 | else: 41 | if self.target_transform is not None: 42 | image = self.target_transform(image) 43 | labels = [self.normal_sampling(int(label), i) for i in range(4)] 44 | labels = [i if i > 1e-10 else 1e-10 for i in labels] 45 | labels = torch.Tensor(labels) 46 | # label = torch.FloatTensor(label) 47 | # print(labels) 48 | return image,label#labels, 49 | 50 | 51 | 52 | def __len__(self): 53 | return len(self.image_label) 54 | -------------------------------------------------------------------------------- /deployment/c++_libtorch/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0 FATAL_ERROR) 2 | project(c) 3 | 4 | set(CMAKE_PREFIX_PATH 5 | /home/luxiangzhe/libtorch_cpu 6 | /home/luxiangzhe/opencv_3.4.3/build) 7 | 8 | find_package(Torch REQUIRED) 9 | find_package(OpenCV REQUIRED) 10 | 11 | 12 | add_executable(c main.cpp) 13 | target_link_libraries(c ${TORCH_LIBRARIES} ${OpenCV_LIBS}) 14 | set_property(TARGET c PROPERTY CXX_STANDARD 14) 15 | -------------------------------------------------------------------------------- /deployment/c++_libtorch/SaveTorchscriptModel.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/28 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | 9 | import torch 10 | 11 | 12 | def load_checkpoint(filepath): 13 | checkpoint = torch.load(filepath, map_location='cpu') 14 | model = checkpoint['model'] # 提取网络结构 15 | model.load_state_dict(checkpoint['model_state_dict']) # 加载网络权重参数 16 | for parameter in model.parameters(): 17 | parameter.requires_grad = False 18 | model.eval() 19 | return model 20 | 21 | model = load_checkpoint('./resnext101_32x8.pth') 22 | 23 | 24 | example = torch.rand(1, 3, 224, 224) 25 | 26 | 27 | traced_script_module = torch.jit.trace(model, example) 28 | 29 | traced_script_module.save('./trace_resnext101_32x8.pt') 30 | output = traced_script_module(torch.ones(1, 3, 224, 224)) 31 | 32 | print(output) 33 | -------------------------------------------------------------------------------- /deployment/flask_v1/client.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020.04.05 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | 7 | import requests 8 | import time 9 | 10 | # Initialize the keras REST API endpoint URL. 11 | REST_API_URL = 'http://127.0.0.1:5000/predict' 12 | 13 | 14 | def predict_result(image_path): 15 | # Initialize image path 16 | image = open(image_path, 'rb').read() 17 | payload = {'image': image} 18 | 19 | # Submit the request. 20 | r = requests.post(REST_API_URL, files=payload).json() 21 | 22 | # Ensure the request was successful. 23 | if r['success']: 24 | # Loop over the predictions and display them. 25 | for (i, result) in enumerate(r['predictions']): 26 | print(result) 27 | 28 | # Otherwise, the request failed. 29 | else: 30 | print('Request failed') 31 | 32 | 33 | if __name__ == '__main__': 34 | 35 | 36 | t1 = time.time() 37 | img_path = '../test_images/electronic_100.png' 38 | predict_result(img_path) 39 | t2 = time.time() 40 | print(t2-t1) 41 | -------------------------------------------------------------------------------- /deployment/flask_v1/server.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020.04.05 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | 7 | import io 8 | from PIL import Image 9 | from flask import Flask, request, jsonify 10 | 11 | import torch 12 | import json 13 | import time 14 | 15 | from transform import get_test_transform 16 | with open('label.json', 'rb') as f: 17 | label_id_name_dict = json.load(f) 18 | 19 | mean = [0.485, 0.456, 0.406] 20 | std = [0.229, 0.224, 0.225] 21 | input_size = 300 22 | 23 | app = Flask(__name__) 24 | 25 | @app.route("/predict", methods=["POST"]) 26 | def predict(): 27 | # initialize the data dictionary that will be returned from the 28 | # view 29 | data = {"success": False} 30 | # print(data) 31 | 32 | # ensure an image was properly uploaded to our endpoint 33 | if request.method == "POST": 34 | # print("Hello") 35 | if request.files.get("image"): 36 | # print("world") 37 | now = time.strftime("%Y-%m-%d-%H_%M_%S",time.localtime(time.time())) 38 | # read the image in PIL format 39 | 40 | image = request.files["image"].read() 41 | image = Image.open(io.BytesIO(image)).convert('RGB') 42 | image.save(now + '.jpg') 43 | # preprocess the image and prepare it for classification 44 | img = get_test_transform(mean, std, input_size)(image).unsqueeze(0) 45 | 46 | # classify the input image and then initialize the list 47 | # of predictions to return to the client 48 | 49 | out = model(img) 50 | # print(out) 51 | pred_label = torch.max(out, 1)[1].item() 52 | # print(pred_label) 53 | data["predictions"] = [] 54 | data["predictions"].append(label_id_name_dict[str(pred_label)]) 55 | 56 | # indicate that the request was a success 57 | data["success"] = True 58 | # print(data["success"]) 59 | 60 | # return the data dictionary as a JSON response 61 | return jsonify(data) 62 | 63 | def load_checkpoint(filepath): 64 | checkpoint = torch.load(filepath) 65 | model = checkpoint['model'] # 提取网络结构 66 | model.load_state_dict(checkpoint['model_state_dict']) # 加载网络权重参数 67 | for parameter in model.parameters(): 68 | parameter.requires_grad = False 69 | model.eval() 70 | return model 71 | 72 | # if this is the main thread of execution first load the model and 73 | # then start the server 74 | if __name__ == "__main__": 75 | print(("* Loading Pytorch model and Flask starting server..." 76 | "please wait until server has fully started")) 77 | num_classes=100 78 | checkpoint_path = './epoch_120.pth' 79 | model = load_checkpoint(checkpoint_path) 80 | print('..... Finished loading model! ......') 81 | app.run(host='0.0.0.0', port =5000,debug=True ) 82 | 83 | 84 | -------------------------------------------------------------------------------- /deployment/flask_v2/cfg.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/21 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | 9 | ''' 10 | 定义web服务所需的一些配置参数 11 | ''' 12 | 13 | # 输入图像的大小 14 | InputSize = (224, 224) 15 | 16 | # 输入图像的通道数 17 | Channel = 3 18 | 19 | # 输入处理图像的batch 20 | BatchSize = 16 21 | 22 | # 数据类型 23 | ImageType = 'float32' 24 | 25 | # 模型运行设备 26 | Device = 'cpu' 27 | 28 | # redis运行的图像队列 29 | ImageQueue = 'image_queue' 30 | 31 | ServeSleep = 0.1 32 | ClientSleep = 0.1 33 | 34 | 35 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | home = os.path.expanduser('~') 3 | ## 预训练模型的存放位置 4 | LOCAL_PRETRAINED = { 5 | 'resnext101_32x8d': home + '/weights/resnext101_32x8.pth', 6 | 'resnext101_32x16d': home + '/weights/resnext101_32x16.pth', 7 | 'resnext101_32x48d': home + '/weights/resnext101_32x48.pth', 8 | 'resnext101_32x32d': home + '/weights/resnext101_32x32.pth', 9 | 'resnet50': home +'/weights/resnet50.pth', 10 | 'resnet101': home +'/weights/resnet101.pth', 11 | 'densenet121': home +'/weights/densenet121.pth', 12 | 'densenet169': home +'/weights/densenet169.pth', 13 | 'moblienetv2': home +'/weights/mobilenetv2.pth', 14 | 'efficientnet-b7': home + '/weights/efficientnet-b7.pth', 15 | 'efficientnet-b8': home + '/weights/efficientnet-b8.pth' 16 | } 17 | 18 | model_urls = { 19 | 'resnext101_32x8d': 'https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth', 20 | 'resnext101_32x16d': 'https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth', 21 | 'resnext101_32x32d': 'https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth', 22 | 'resnext101_32x48d': 'https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth', 23 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 24 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 25 | 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 26 | 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 27 | 'moblienetv2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', 28 | 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', 29 | 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', 30 | 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', 31 | 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', 32 | 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', 33 | 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', 34 | 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', 35 | 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', 36 | } 37 | from .vision import * 38 | from .resnext_wsl import * 39 | from .efficientnet_pytorch import * 40 | from .build_model import * 41 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/efficientnet_pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.6.3" 2 | from .model import EfficientNet 3 | from .utils import ( 4 | GlobalParams, 5 | BlockArgs, 6 | BlockDecoder, 7 | efficientnet, 8 | get_model_params, 9 | ) 10 | 11 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | from .alexnet import * 2 | from .resnet import * 3 | from .vgg import * 4 | from .squeezenet import * 5 | from .inception import * 6 | from .densenet import * 7 | from .googlenet import * 8 | from .mobilenet import * 9 | from .shufflenetv2 import * 10 | from . import segmentation 11 | from . import detection 12 | from.resnet import ResNet, Bottleneck 13 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class IntermediateLayerGetter(nn.ModuleDict): 8 | """ 9 | Module wrapper that returns intermediate layers from a model 10 | 11 | It has a strong assumption that the modules have been registered 12 | into the model in the same order as they are used. 13 | This means that one should **not** reuse the same nn.Module 14 | twice in the forward if you want this to work. 15 | 16 | Additionally, it is only able to query submodules that are directly 17 | assigned to the model. So if `model` is passed, `model.feature1` can 18 | be returned, but not `model.feature1.layer2`. 19 | 20 | Arguments: 21 | model (nn.Module): model on which we will extract the features 22 | return_layers (Dict[name, new_name]): a dict containing the names 23 | of the modules for which the activations will be returned as 24 | the key of the dict, and the value of the dict is the name 25 | of the returned activation (which the user can specify). 26 | 27 | Examples:: 28 | 29 | >>> m = torchvision.models.resnet18(pretrained=True) 30 | >>> # extract layer1 and layer3, giving as names `feat1` and feat2` 31 | >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m, 32 | >>> {'layer1': 'feat1', 'layer3': 'feat2'}) 33 | >>> out = new_m(torch.rand(1, 3, 224, 224)) 34 | >>> print([(k, v.shape) for k, v in out.items()]) 35 | >>> [('feat1', torch.Size([1, 64, 56, 56])), 36 | >>> ('feat2', torch.Size([1, 256, 14, 14]))] 37 | """ 38 | def __init__(self, model, return_layers): 39 | if not set(return_layers).issubset([name for name, _ in model.named_children()]): 40 | raise ValueError("return_layers are not present in model") 41 | 42 | orig_return_layers = return_layers 43 | return_layers = {k: v for k, v in return_layers.items()} 44 | layers = OrderedDict() 45 | for name, module in model.named_children(): 46 | layers[name] = module 47 | if name in return_layers: 48 | del return_layers[name] 49 | if not return_layers: 50 | break 51 | 52 | super(IntermediateLayerGetter, self).__init__(layers) 53 | self.return_layers = orig_return_layers 54 | 55 | def forward(self, x): 56 | out = OrderedDict() 57 | for name, module in self.named_children(): 58 | x = module(x) 59 | if name in self.return_layers: 60 | out_name = self.return_layers[name] 61 | out[out_name] = x 62 | return out 63 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/alexnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .utils import load_state_dict_from_url 3 | 4 | 5 | __all__ = ['AlexNet', 'alexnet'] 6 | 7 | 8 | model_urls = { 9 | 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', 10 | } 11 | 12 | 13 | class AlexNet(nn.Module): 14 | 15 | def __init__(self, num_classes=1000): 16 | super(AlexNet, self).__init__() 17 | self.features = nn.Sequential( 18 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), 19 | nn.ReLU(inplace=True), 20 | nn.MaxPool2d(kernel_size=3, stride=2), 21 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 22 | nn.ReLU(inplace=True), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 25 | nn.ReLU(inplace=True), 26 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 29 | nn.ReLU(inplace=True), 30 | nn.MaxPool2d(kernel_size=3, stride=2), 31 | ) 32 | self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) 33 | self.classifier = nn.Sequential( 34 | nn.Dropout(), 35 | nn.Linear(256 * 6 * 6, 4096), 36 | nn.ReLU(inplace=True), 37 | nn.Dropout(), 38 | nn.Linear(4096, 4096), 39 | nn.ReLU(inplace=True), 40 | nn.Linear(4096, num_classes), 41 | ) 42 | 43 | def forward(self, x): 44 | x = self.features(x) 45 | x = self.avgpool(x) 46 | x = x.view(x.size(0), 256 * 6 * 6) 47 | x = self.classifier(x) 48 | return x 49 | 50 | 51 | def alexnet(pretrained=False, progress=True, **kwargs): 52 | r"""AlexNet model architecture from the 53 | `"One weird trick..." `_ paper. 54 | 55 | Args: 56 | pretrained (bool): If True, returns a model pre-trained on ImageNet 57 | progress (bool): If True, displays a progress bar of the download to stderr 58 | """ 59 | model = AlexNet(**kwargs) 60 | if pretrained: 61 | state_dict = load_state_dict_from_url(model_urls['alexnet'], 62 | progress=progress) 63 | model.load_state_dict(state_dict) 64 | return model 65 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn import * 2 | from .mask_rcnn import * 3 | from .keypoint_rcnn import * 4 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/_utils.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/_utils.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/backbone_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/backbone_utils.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/backbone_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/backbone_utils.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/faster_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/faster_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/faster_rcnn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/faster_rcnn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/generalized_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/generalized_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/generalized_rcnn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/generalized_rcnn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/image_list.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/image_list.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/image_list.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/image_list.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/keypoint_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/keypoint_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/keypoint_rcnn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/keypoint_rcnn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/mask_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/mask_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/mask_rcnn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/mask_rcnn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/roi_heads.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/roi_heads.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/roi_heads.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/roi_heads.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/rpn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/rpn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/rpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/rpn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/transform.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/__pycache__/transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/detection/__pycache__/transform.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/backbone_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from torch import nn 3 | from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool 4 | 5 | from torchvision.ops import misc as misc_nn_ops 6 | from .._utils import IntermediateLayerGetter 7 | from .. import resnet 8 | 9 | 10 | class BackboneWithFPN(nn.Sequential): 11 | """ 12 | Adds a FPN on top of a model. 13 | 14 | Internally, it uses torchvision.models._utils.IntermediateLayerGetter to 15 | extract a submodel that returns the feature maps specified in return_layers. 16 | The same limitations of IntermediatLayerGetter apply here. 17 | 18 | Arguments: 19 | backbone (nn.Module) 20 | return_layers (Dict[name, new_name]): a dict containing the names 21 | of the modules for which the activations will be returned as 22 | the key of the dict, and the value of the dict is the name 23 | of the returned activation (which the user can specify). 24 | in_channels_list (List[int]): number of channels for each feature map 25 | that is returned, in the order they are present in the OrderedDict 26 | out_channels (int): number of channels in the FPN. 27 | 28 | Attributes: 29 | out_channels (int): the number of channels in the FPN 30 | """ 31 | def __init__(self, backbone, return_layers, in_channels_list, out_channels): 32 | body = IntermediateLayerGetter(backbone, return_layers=return_layers) 33 | fpn = FeaturePyramidNetwork( 34 | in_channels_list=in_channels_list, 35 | out_channels=out_channels, 36 | extra_blocks=LastLevelMaxPool(), 37 | ) 38 | super(BackboneWithFPN, self).__init__(OrderedDict( 39 | [("body", body), ("fpn", fpn)])) 40 | self.out_channels = out_channels 41 | 42 | 43 | def resnet_fpn_backbone(backbone_name, pretrained): 44 | backbone = resnet.__dict__[backbone_name]( 45 | pretrained=pretrained, 46 | norm_layer=misc_nn_ops.FrozenBatchNorm2d) 47 | # freeze layers 48 | for name, parameter in backbone.named_parameters(): 49 | if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: 50 | parameter.requires_grad_(False) 51 | 52 | return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} 53 | 54 | in_channels_stage2 = 256 55 | in_channels_list = [ 56 | in_channels_stage2, 57 | in_channels_stage2 * 2, 58 | in_channels_stage2 * 4, 59 | in_channels_stage2 * 8, 60 | ] 61 | out_channels = 256 62 | return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) 63 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | from collections import OrderedDict 7 | import torch 8 | from torch import nn 9 | 10 | 11 | class GeneralizedRCNN(nn.Module): 12 | """ 13 | Main class for Generalized R-CNN. 14 | 15 | Arguments: 16 | backbone (nn.Module): 17 | rpn (nn.Module): 18 | heads (nn.Module): takes the features + the proposals from the RPN and computes 19 | detections / masks from it. 20 | transform (nn.Module): performs the data transformation from the inputs to feed into 21 | the model 22 | """ 23 | 24 | def __init__(self, backbone, rpn, roi_heads, transform): 25 | super(GeneralizedRCNN, self).__init__() 26 | self.transform = transform 27 | self.backbone = backbone 28 | self.rpn = rpn 29 | self.roi_heads = roi_heads 30 | 31 | def forward(self, images, targets=None): 32 | """ 33 | Arguments: 34 | images (list[Tensor]): images to be processed 35 | targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) 36 | 37 | Returns: 38 | result (list[BoxList] or dict[Tensor]): the output from the model. 39 | During training, it returns a dict[Tensor] which contains the losses. 40 | During testing, it returns list[BoxList] contains additional fields 41 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 42 | 43 | """ 44 | if self.training and targets is None: 45 | raise ValueError("In training mode, targets should be passed") 46 | original_image_sizes = [img.shape[-2:] for img in images] 47 | images, targets = self.transform(images, targets) 48 | features = self.backbone(images.tensors) 49 | if isinstance(features, torch.Tensor): 50 | features = OrderedDict([(0, features)]) 51 | proposals, proposal_losses = self.rpn(images, features, targets) 52 | detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) 53 | detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) 54 | 55 | losses = {} 56 | losses.update(detector_losses) 57 | losses.update(proposal_losses) 58 | 59 | if self.training: 60 | return losses 61 | 62 | return detections 63 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/detection/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .segmentation import * 2 | from .fcn import * 3 | from .deeplabv3 import * 4 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/_utils.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/_utils.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/deeplabv3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/deeplabv3.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/deeplabv3.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/deeplabv3.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/fcn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/fcn.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/fcn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/fcn.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/segmentation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/segmentation.cpython-36.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/__pycache__/segmentation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/deployment/flask_v2/models/vision/segmentation/__pycache__/segmentation.cpython-37.pyc -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | 8 | class _SimpleSegmentationModel(nn.Module): 9 | def __init__(self, backbone, classifier, aux_classifier=None): 10 | super(_SimpleSegmentationModel, self).__init__() 11 | self.backbone = backbone 12 | self.classifier = classifier 13 | self.aux_classifier = aux_classifier 14 | 15 | def forward(self, x): 16 | input_shape = x.shape[-2:] 17 | # contract: features is a dict of tensors 18 | features = self.backbone(x) 19 | 20 | result = OrderedDict() 21 | x = features["out"] 22 | x = self.classifier(x) 23 | x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) 24 | result["out"] = x 25 | 26 | if self.aux_classifier is not None: 27 | x = features["aux"] 28 | x = self.aux_classifier(x) 29 | x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) 30 | result["aux"] = x 31 | 32 | return result 33 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/segmentation/fcn.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ._utils import _SimpleSegmentationModel 4 | 5 | 6 | __all__ = ["FCN"] 7 | 8 | 9 | class FCN(_SimpleSegmentationModel): 10 | """ 11 | Implements a Fully-Convolutional Network for semantic segmentation. 12 | 13 | Arguments: 14 | backbone (nn.Module): the network used to compute the features for the model. 15 | The backbone should return an OrderedDict[Tensor], with the key being 16 | "out" for the last feature map used, and "aux" if an auxiliary classifier 17 | is used. 18 | classifier (nn.Module): module that takes the "out" element returned from 19 | the backbone and returns a dense prediction. 20 | aux_classifier (nn.Module, optional): auxiliary classifier used during training 21 | """ 22 | pass 23 | 24 | 25 | class FCNHead(nn.Sequential): 26 | def __init__(self, in_channels, channels): 27 | inter_channels = in_channels // 4 28 | layers = [ 29 | nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), 30 | nn.BatchNorm2d(inter_channels), 31 | nn.ReLU(), 32 | nn.Dropout(0.1), 33 | nn.Conv2d(inter_channels, channels, 1) 34 | ] 35 | 36 | super(FCNHead, self).__init__(*layers) 37 | -------------------------------------------------------------------------------- /deployment/flask_v2/models/vision/utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | from torch.hub import load_state_dict_from_url 3 | except ImportError: 4 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 5 | -------------------------------------------------------------------------------- /deployment/flask_v2/redis_db.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/21 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | import torch 9 | import numpy as np 10 | import time 11 | 12 | import json 13 | from cfg import InputSize, ImageQueue, BatchSize, ImageType, ClientSleep, ServeSleep, Channel 14 | from utils import db, base64_decode_image,decode_predictions 15 | 16 | 17 | 18 | 19 | def classify_process(filepath): 20 | # 导入模型 21 | print("* Loading model...") 22 | model = load_checkpoint(filepath) 23 | print("* Model loaded") 24 | while True: 25 | # 从数据库中创建预测图像队列 26 | queue = db.lrange(ImageQueue, 0, BatchSize - 1) 27 | imageIDs = [] 28 | batch = None 29 | # 遍历队列 30 | for q in queue: 31 | # 获取队列中的图像并反序列化解码 32 | q = json.loads(q.decode("utf-8")) 33 | image = base64_decode_image(q["image"], ImageType, 34 | (1, InputSize[0], InputSize[1], Channel)) 35 | # 检查batch列表是否为空 36 | if batch is None: 37 | batch = image 38 | # 合并batch 39 | else: 40 | batch = np.vstack([batch, image]) 41 | # 更新图像ID 42 | imageIDs.append(q["id"]) 43 | # print(imageIDs) 44 | if len(imageIDs) > 0: 45 | print("* Batch size: {}".format(batch.shape)) 46 | preds = model(torch.from_numpy(batch.transpose([0, 3,1,2]))) 47 | results = decode_predictions(preds) 48 | # 遍历图像ID和预测结果并打印 49 | for (imageID, resultSet) in zip(imageIDs, results): 50 | # initialize the list of output predictions 51 | output = [] 52 | # loop over the results and add them to the list of 53 | # output predictions 54 | print(resultSet) 55 | for label in resultSet: 56 | prob = label.item() 57 | r = {"label": label.item(), "probability": float(prob)} 58 | output.append(r) 59 | # 保存结果到数据库 60 | db.set(imageID, json.dumps(output)) 61 | # 从队列中删除已预测过的图像 62 | db.ltrim(ImageQueue, len(imageIDs), -1) 63 | time.sleep(ServeSleep) 64 | 65 | 66 | 67 | 68 | def load_checkpoint(filepath): 69 | checkpoint = torch.load(filepath, map_location='cpu') 70 | model = checkpoint['model'] # 提取网络结构 71 | model.load_state_dict(checkpoint['model_state_dict']) # 加载网络权重参数 72 | for parameter in model.parameters(): 73 | parameter.requires_grad = False 74 | model.eval() 75 | return model 76 | 77 | 78 | if __name__ == '__main__': 79 | filepath = '../c/resnext101_32x8.pth' 80 | classify_process(filepath) -------------------------------------------------------------------------------- /deployment/flask_v2/server.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/21 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | 9 | from PIL import Image 10 | 11 | import io 12 | from flask import Flask, request, jsonify 13 | import time 14 | import json 15 | import uuid 16 | 17 | import sys 18 | sys.path.append('.') 19 | 20 | from cfg import * 21 | from utils import db, image_transform, base64_encode_image 22 | 23 | # 初始化实例 24 | app = Flask(__name__) 25 | 26 | 27 | 28 | @app.route('/predict', methods=['POST']) 29 | def predict(): 30 | 31 | data = {'Success': False} 32 | 33 | if request.files.get('image'): 34 | 35 | now = time.strftime("%Y-%m-%d-%H_%M_%S",time.localtime(time.time())) 36 | 37 | image = request.files['image'].read() 38 | image = Image.open(io.BytesIO(image)) 39 | image = image_transform(InputSize)(image).numpy() 40 | # 将数组以C语言存储顺序存储 41 | image = image.copy(order="C") 42 | # 生成图像ID 43 | k = str(uuid.uuid4()) 44 | d = {"id": k, "image": base64_encode_image(image)} 45 | # print(d) 46 | db.rpush(ImageQueue, json.dumps(d)) 47 | # 运行服务 48 | while True: 49 | # 获取输出结果 50 | output = db.get(k) 51 | # print(output) 52 | if output is not None: 53 | output = output.decode("utf-8") 54 | data["predictions"] = json.loads(output) 55 | db.delete(k) 56 | break 57 | time.sleep(ClientSleep) 58 | data["success"] = True 59 | return jsonify(data) 60 | 61 | if __name__ == '__main__': 62 | 63 | app.run(host='127.0.0.1', port =5000,debug=True ) 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /deployment/flask_v2/streess_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/21 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | 9 | # curl -X POST -F image=@test.jpg 'http://127.0.0.1:5000/predict' 10 | 11 | from threading import Thread 12 | import requests 13 | import time 14 | 15 | # 请求的URL 16 | REST_API_URL = "http://127.0.0.1:5000/predict" 17 | # 测试图片 18 | IMAGE_PATH = "./test.jpg" 19 | 20 | # 并发数 21 | NUM_REQUESTS = 500 22 | # 请求间隔 23 | SLEEP_COUNT = 0.05 24 | def call_predict_endpoint(n): 25 | 26 | # 上传图像 27 | image = open(IMAGE_PATH, "rb").read() 28 | payload = {"image": image} 29 | # 提交请求 30 | r = requests.post(REST_API_URL, files=payload).json() 31 | # 确认请求是否成功 32 | if r["success"]: 33 | print("[INFO] thread {} OK".format(n)) 34 | else: 35 | print("[INFO] thread {} FAILED".format(n)) 36 | # 多线程进行 37 | for i in range(0, NUM_REQUESTS): 38 | # 创建线程来调用api 39 | t = Thread(target=call_predict_endpoint, args=(i,)) 40 | t.daemon = True 41 | t.start() 42 | time.sleep(SLEEP_COUNT) 43 | time.sleep(300) -------------------------------------------------------------------------------- /deployment/flask_v2/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @time :2020/8/21 3 | # @IDE : pycharm 4 | # @author :lxztju 5 | # @github : https://github.com/lxztju 6 | # @Emial : lxztju@163.com 7 | 8 | 9 | import numpy as np 10 | from PIL import Image 11 | 12 | from flask import Flask 13 | from cfg import * 14 | import redis 15 | import base64 16 | import sys 17 | 18 | from torchvision import transforms 19 | 20 | 21 | class Resize(object): 22 | def __init__(self, size, interpolation=Image.BILINEAR): 23 | self.size = size 24 | self.interpolation = interpolation 25 | 26 | def __call__(self, img): 27 | # padding 28 | ratio = self.size[0] / self.size[1] 29 | w, h = img.size 30 | if w / h < ratio: 31 | t = int(h * ratio) 32 | w_padding = (t - w) // 2 33 | img = img.crop((-w_padding, 0, w+w_padding, h)) 34 | else: 35 | t = int(w / ratio) 36 | h_padding = (t - h) // 2 37 | img = img.crop((0, -h_padding, w, h+h_padding)) 38 | 39 | img = img.resize(self.size, self.interpolation) 40 | 41 | return img 42 | 43 | mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] 44 | def image_transform(inputsize): 45 | return transforms.Compose([ 46 | Resize((int(inputsize[0] * (256 / 224)), int(inputsize[1] * (256 / 224)))), 47 | transforms.CenterCrop(inputsize), 48 | transforms.ToTensor(), 49 | transforms.Normalize(mean=mean, std=std), 50 | ]) 51 | 52 | def decode_predictions(predition): 53 | return predition 54 | 55 | 56 | 57 | 58 | # 构建redis数据库 59 | db = redis.StrictRedis(host="127.0.0.1", port=6379, db=0) 60 | 61 | 62 | def base64_encode_image(img): 63 | return base64.b64encode(img).decode("utf-8") 64 | 65 | def base64_decode_image(img, dtype, shape): 66 | # 查看python版本,如果是python3版本进行转换 67 | if sys.version_info.major == 3: 68 | img = bytes(img, encoding="utf-8") 69 | img = np.frombuffer(base64.decodebytes(img), dtype=dtype) 70 | img = img.reshape(shape) 71 | return img 72 | 73 | 74 | 75 | def prepare_image(image, target): 76 | # if the image mode is not RGB, convert it 77 | if image.mode != "RGB": 78 | image = image.convert("RGB") 79 | # resize the input image and preprocess it 80 | image = image.resize(target) 81 | image = np.array(image) 82 | image = np.expand_dims(image, axis=0) 83 | # return the processed image 84 | return image -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import io 2 | import glob 3 | import os 4 | import numpy as np 5 | from shutil import move 6 | from os.path import join 7 | from os import listdir, rmdir 8 | 9 | 10 | # target_folder = '/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/' 11 | # test_folder = '/disks/disk2/lishengyan/dataset/tiny-imagenet-200/test1/' 12 | # 13 | # os.mkdir(test_folder) 14 | # val_dict = {} 15 | # with open('/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/val_annotations.txt', 'r') as f: 16 | # for line in f.readlines(): 17 | # split_line = line.split('\t') 18 | # val_dict[split_line[0]] = split_line[1] 19 | # 20 | # paths = glob.glob('/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/images/*') 21 | # for path in paths: 22 | # file = path.split('/')[-1] 23 | # folder = val_dict[file] 24 | # if not os.path.exists(target_folder + str(folder)): 25 | # os.mkdir(target_folder + str(folder)) 26 | # os.mkdir(target_folder + str(folder) + '/images') 27 | # if not os.path.exists(test_folder + str(folder)): 28 | # os.mkdir(test_folder + str(folder)) 29 | # os.mkdir(test_folder + str(folder) + '/images') 30 | # 31 | # for path in paths: 32 | # file = path.split('/')[-1] 33 | # folder = val_dict[file] 34 | # if len(glob.glob(target_folder + str(folder) + '/images/*')) < 25: 35 | # dest = target_folder + str(folder) + '/images/' + str(file) 36 | # else: 37 | # dest = test_folder + str(folder) + '/images/' + str(file) 38 | # move(path, dest) 39 | # 40 | # rmdir('./tiny-imagenet-200/val/images') 41 | 42 | target_folder = '/disks/disk2/data/SCUT-FBP5500/test/' 43 | source_folder = '/disks/disk2/data/SCUT-FBP5500/Images/' 44 | 45 | # os.mkdir(test_folder) 46 | val_dict = {} 47 | with open('/disks/disk2/data/SCUT-FBP5500_v2/train_test_files/split_of_60%training and 40%testing/test.txt', 'r') as f: 48 | imgs = list(map(lambda line: line.strip().split(' '), f)) 49 | 50 | print(len(imgs)) 51 | 52 | 53 | for i in range(len(imgs)): 54 | img_name, label = imgs[i] 55 | label =float(label) 56 | if label<2.5: 57 | label=0 58 | elif label<3.0: 59 | label = 1 60 | elif label < 3.5: 61 | label = 2 62 | else: 63 | label = 3 64 | label = str(label) 65 | img_path=source_folder+img_name 66 | dest_path=target_folder+label+'/'+img_name 67 | try: 68 | move(img_path,dest_path) 69 | except: 70 | pass 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /models/Attention/A2Atttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from torch.nn import functional as F 6 | 7 | 8 | 9 | class DoubleAttention(nn.Module): 10 | 11 | def __init__(self, in_channels,c_m,c_n,reconstruct = True): 12 | super().__init__() 13 | self.in_channels=in_channels 14 | self.reconstruct = reconstruct 15 | self.c_m=c_m 16 | self.c_n=c_n 17 | self.convA=nn.Conv2d(in_channels,c_m,1) 18 | self.convB=nn.Conv2d(in_channels,c_n,1) 19 | self.convV=nn.Conv2d(in_channels,c_n,1) 20 | if self.reconstruct: 21 | self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size = 1) 22 | self.init_weights() 23 | 24 | 25 | def init_weights(self): 26 | for m in self.modules(): 27 | if isinstance(m, nn.Conv2d): 28 | init.kaiming_normal_(m.weight, mode='fan_out') 29 | if m.bias is not None: 30 | init.constant_(m.bias, 0) 31 | elif isinstance(m, nn.BatchNorm2d): 32 | init.constant_(m.weight, 1) 33 | init.constant_(m.bias, 0) 34 | elif isinstance(m, nn.Linear): 35 | init.normal_(m.weight, std=0.001) 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | 39 | def forward(self, x): 40 | b, c, h,w=x.shape 41 | assert c==self.in_channels 42 | A=self.convA(x) #b,c_m,h,w 43 | B=self.convB(x) #b,c_n,h,w 44 | V=self.convV(x) #b,c_n,h,w 45 | tmpA=A.view(b,self.c_m,-1) 46 | attention_maps=F.softmax(B.view(b,self.c_n,-1)) 47 | attention_vectors=F.softmax(V.view(b,self.c_n,-1)) 48 | # step 1: feature gating 49 | global_descriptors=torch.bmm(tmpA,attention_maps.permute(0,2,1)) #b.c_m,c_n 50 | # step 2: feature distribution 51 | tmpZ = global_descriptors.matmul(attention_vectors) #b,c_m,h*w 52 | tmpZ=tmpZ.view(b,self.c_m,h,w) #b,c_m,h,w 53 | if self.reconstruct: 54 | tmpZ=self.conv_reconstruct(tmpZ) 55 | 56 | return tmpZ 57 | 58 | 59 | if __name__ == '__main__': 60 | input=torch.randn(50,512,7,7) 61 | a2 = DoubleAttention(512,128,128,True) 62 | output=a2(input) 63 | print(output.shape) 64 | 65 | -------------------------------------------------------------------------------- /models/Attention/AFT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class AFT_FULL(nn.Module): 9 | 10 | def __init__(self, d_model,n=49,simple=False): 11 | 12 | super(AFT_FULL, self).__init__() 13 | self.fc_q = nn.Linear(d_model, d_model) 14 | self.fc_k = nn.Linear(d_model, d_model) 15 | self.fc_v = nn.Linear(d_model,d_model) 16 | if(simple): 17 | self.position_biases=torch.zeros((n,n)) 18 | else: 19 | self.position_biases=nn.Parameter(torch.ones((n,n))) 20 | self.d_model = d_model 21 | self.n=n 22 | self.sigmoid=nn.Sigmoid() 23 | 24 | self.init_weights() 25 | 26 | 27 | def init_weights(self): 28 | for m in self.modules(): 29 | if isinstance(m, nn.Conv2d): 30 | init.kaiming_normal_(m.weight, mode='fan_out') 31 | if m.bias is not None: 32 | init.constant_(m.bias, 0) 33 | elif isinstance(m, nn.BatchNorm2d): 34 | init.constant_(m.weight, 1) 35 | init.constant_(m.bias, 0) 36 | elif isinstance(m, nn.Linear): 37 | init.normal_(m.weight, std=0.001) 38 | if m.bias is not None: 39 | init.constant_(m.bias, 0) 40 | 41 | def forward(self, input): 42 | 43 | bs, n,dim = input.shape 44 | 45 | q = self.fc_q(input) #bs,n,dim 46 | k = self.fc_k(input).view(1,bs,n,dim) #1,bs,n,dim 47 | v = self.fc_v(input).view(1,bs,n,dim) #1,bs,n,dim 48 | 49 | numerator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1))*v,dim=2) #n,bs,dim 50 | denominator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1)),dim=2) #n,bs,dim 51 | 52 | out=(numerator/denominator) #n,bs,dim 53 | out=self.sigmoid(q)*(out.permute(1,0,2)) #bs,n,dim 54 | 55 | return out 56 | 57 | 58 | if __name__ == '__main__': 59 | input=torch.randn(50,49,512) 60 | aft_full = AFT_FULL(d_model=512, n=49) 61 | output=aft_full(input) 62 | print(output.shape) 63 | 64 | -------------------------------------------------------------------------------- /models/Attention/CCNet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | -------------------------------------------------------------------------------- /models/Attention/CoTAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import flatten, nn 4 | from torch.nn import init 5 | from torch.nn.modules.activation import ReLU 6 | from torch.nn.modules.batchnorm import BatchNorm2d 7 | from torch.nn import functional as F 8 | 9 | 10 | 11 | class CoTAttention(nn.Module): 12 | 13 | def __init__(self, dim=512,kernel_size=3): 14 | super().__init__() 15 | self.dim=dim 16 | self.kernel_size=kernel_size 17 | 18 | self.key_embed=nn.Sequential( 19 | nn.Conv2d(dim,dim,kernel_size=kernel_size,padding=kernel_size//2,groups=4,bias=False), 20 | nn.BatchNorm2d(dim), 21 | nn.ReLU() 22 | ) 23 | self.value_embed=nn.Sequential( 24 | nn.Conv2d(dim,dim,1,bias=False), 25 | nn.BatchNorm2d(dim) 26 | ) 27 | 28 | factor=4 29 | self.attention_embed=nn.Sequential( 30 | nn.Conv2d(2*dim,2*dim//factor,1,bias=False), 31 | nn.BatchNorm2d(2*dim//factor), 32 | nn.ReLU(), 33 | nn.Conv2d(2*dim//factor,kernel_size*kernel_size*dim,1) 34 | ) 35 | 36 | 37 | def forward(self, x): 38 | bs,c,h,w=x.shape 39 | k1=self.key_embed(x) #bs,c,h,w 40 | v=self.value_embed(x).view(bs,c,-1) #bs,c,h,w 41 | 42 | y=torch.cat([k1,x],dim=1) #bs,2c,h,w 43 | att=self.attention_embed(y) #bs,c*k*k,h,w 44 | att=att.reshape(bs,c,self.kernel_size*self.kernel_size,h,w) 45 | att=att.mean(2,keepdim=False).view(bs,c,-1) #bs,c,h*w 46 | k2=F.softmax(att,dim=-1)*v 47 | k2=k2.view(bs,c,h,w) 48 | 49 | 50 | return k1+k2 51 | 52 | 53 | if __name__ == '__main__': 54 | input=torch.randn(50,512,7,7) 55 | cot = CoTAttention(dim=512,kernel_size=3) 56 | output=cot(input) 57 | print(output.shape) 58 | 59 | -------------------------------------------------------------------------------- /models/Attention/CoordAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class h_sigmoid(nn.Module): 6 | def __init__(self, inplace=True): 7 | super(h_sigmoid, self).__init__() 8 | self.relu = nn.ReLU6(inplace=inplace) 9 | 10 | def forward(self, x): 11 | return self.relu(x + 3) / 6 12 | 13 | class h_swish(nn.Module): 14 | def __init__(self, inplace=True): 15 | super(h_swish, self).__init__() 16 | self.sigmoid = h_sigmoid(inplace=inplace) 17 | 18 | def forward(self, x): 19 | return x * self.sigmoid(x) 20 | 21 | class CoordAtt(nn.Module): 22 | def __init__(self, inp, oup, reduction=32): 23 | super(CoordAtt, self).__init__() 24 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 25 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 26 | 27 | mip = max(8, inp // reduction) 28 | 29 | self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) 30 | self.bn1 = nn.BatchNorm2d(mip) 31 | self.act = h_swish() 32 | 33 | self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 34 | self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 35 | 36 | 37 | def forward(self, x): 38 | identity = x 39 | 40 | n,c,h,w = x.size() 41 | x_h = self.pool_h(x) 42 | x_w = self.pool_w(x).permute(0, 1, 3, 2) 43 | 44 | y = torch.cat([x_h, x_w], dim=2) 45 | y = self.conv1(y) 46 | y = self.bn1(y) 47 | y = self.act(y) 48 | 49 | x_h, x_w = torch.split(y, [h, w], dim=2) 50 | x_w = x_w.permute(0, 1, 3, 2) 51 | 52 | a_h = self.conv_h(x_h).sigmoid() 53 | a_w = self.conv_w(x_w).sigmoid() 54 | 55 | out = identity * a_w * a_h 56 | 57 | return out -------------------------------------------------------------------------------- /models/Attention/ECAAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from collections import OrderedDict 6 | 7 | 8 | 9 | class ECAAttention(nn.Module): 10 | 11 | def __init__(self, kernel_size=3): 12 | super().__init__() 13 | self.gap=nn.AdaptiveAvgPool2d(1) 14 | self.conv=nn.Conv1d(1,1,kernel_size=kernel_size,padding=(kernel_size-1)//2) 15 | self.sigmoid=nn.Sigmoid() 16 | 17 | def init_weights(self): 18 | for m in self.modules(): 19 | if isinstance(m, nn.Conv2d): 20 | init.kaiming_normal_(m.weight, mode='fan_out') 21 | if m.bias is not None: 22 | init.constant_(m.bias, 0) 23 | elif isinstance(m, nn.BatchNorm2d): 24 | init.constant_(m.weight, 1) 25 | init.constant_(m.bias, 0) 26 | elif isinstance(m, nn.Linear): 27 | init.normal_(m.weight, std=0.001) 28 | if m.bias is not None: 29 | init.constant_(m.bias, 0) 30 | 31 | def forward(self, x): 32 | y=self.gap(x) #bs,c,1,1 33 | y=y.squeeze(-1).permute(0,2,1) #bs,1,c 34 | y=self.conv(y) #bs,1,c 35 | y=self.sigmoid(y) #bs,1,c 36 | y=y.permute(0,2,1).unsqueeze(-1) #bs,c,1,1 37 | return x*y.expand_as(x) 38 | 39 | 40 | 41 | 42 | 43 | 44 | if __name__ == '__main__': 45 | input=torch.randn(50,512,7,7) 46 | eca = ECAAttention(kernel_size=3) 47 | output=eca(input) 48 | print(output.shape) 49 | 50 | -------------------------------------------------------------------------------- /models/Attention/ExternalAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class ExternalAttention(nn.Module): 9 | 10 | def __init__(self, d_model,S=64): 11 | super().__init__() 12 | self.mk=nn.Linear(d_model,S,bias=False) 13 | self.mv=nn.Linear(S,d_model,bias=False) 14 | self.softmax=nn.Softmax(dim=1) 15 | self.init_weights() 16 | 17 | 18 | def init_weights(self): 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | init.kaiming_normal_(m.weight, mode='fan_out') 22 | if m.bias is not None: 23 | init.constant_(m.bias, 0) 24 | elif isinstance(m, nn.BatchNorm2d): 25 | init.constant_(m.weight, 1) 26 | init.constant_(m.bias, 0) 27 | elif isinstance(m, nn.Linear): 28 | init.normal_(m.weight, std=0.001) 29 | if m.bias is not None: 30 | init.constant_(m.bias, 0) 31 | 32 | def forward(self, queries): 33 | attn=self.mk(queries) #bs,n,S 34 | attn=self.softmax(attn) #bs,n,S 35 | attn=attn/torch.sum(attn,dim=2,keepdim=True) #bs,n,S 36 | out=self.mv(attn) #bs,n,d_model 37 | 38 | return out 39 | 40 | 41 | if __name__ == '__main__': 42 | input=torch.randn(50,49,512) 43 | ea = ExternalAttention(d_model=512,S=8) 44 | output=ea(input) 45 | print(output.shape) 46 | 47 | -------------------------------------------------------------------------------- /models/Attention/NonLocalBlock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class NonLocalBlock(nn.Module): 7 | def __init__(self, channel): 8 | super(NonLocalBlock, self).__init__() 9 | self.inter_channel = channel // 2 10 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 11 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 12 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 13 | self.softmax = nn.Softmax(dim=1) 14 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 15 | 16 | def forward(self, x): 17 | # [N, C, H , W] 18 | b, c, h, w = x.size() 19 | # [N, C/2, H * W] 20 | x_phi = self.conv_phi(x).view(b, c, -1) 21 | # [N, H * W, C/2] 22 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() 23 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() 24 | # [N, H * W, H * W] 25 | mul_theta_phi = torch.matmul(x_theta, x_phi) 26 | mul_theta_phi = self.softmax(mul_theta_phi) 27 | # [N, H * W, C/2] 28 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 29 | # [N, C/2, H, W] 30 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 31 | # [N, C, H , W] 32 | mask = self.conv_mask(mul_theta_phi_g) 33 | out = mask + x 34 | return out 35 | 36 | 37 | if __name__=='__main__': 38 | model = NonLocalBlock(channel=16) 39 | print(model) 40 | 41 | input = torch.randn(1, 16, 64, 64) 42 | out = model(input) 43 | print(out.shape) -------------------------------------------------------------------------------- /models/Attention/OutlookAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | import math 6 | from torch.nn import functional as F 7 | 8 | class OutlookAttention(nn.Module): 9 | 10 | def __init__(self,dim,num_heads=1,kernel_size=3,padding=1,stride=1,qkv_bias=False, 11 | attn_drop=0.1): 12 | super().__init__() 13 | self.dim=dim 14 | self.num_heads=num_heads 15 | self.head_dim=dim//num_heads 16 | self.kernel_size=kernel_size 17 | self.padding=padding 18 | self.stride=stride 19 | self.scale=self.head_dim**(-0.5) 20 | 21 | self.v_pj=nn.Linear(dim,dim,bias=qkv_bias) 22 | self.attn=nn.Linear(dim,kernel_size**4*num_heads) 23 | 24 | self.attn_drop=nn.Dropout(attn_drop) 25 | self.proj=nn.Linear(dim,dim) 26 | self.proj_drop=nn.Dropout(attn_drop) 27 | 28 | self.unflod=nn.Unfold(kernel_size,padding,stride) #手动卷积 29 | self.pool=nn.AvgPool2d(kernel_size=stride,stride=stride,ceil_mode=True) 30 | 31 | def forward(self, x) : 32 | B,H,W,C=x.shape 33 | 34 | #映射到新的特征v 35 | v=self.v_pj(x).permute(0,3,1,2) #B,C,H,W 36 | h,w=math.ceil(H/self.stride),math.ceil(W/self.stride) 37 | v=self.unflod(v).reshape(B,self.num_heads,self.head_dim,self.kernel_size*self.kernel_size,h*w).permute(0,1,4,3,2) #B,num_head,H*W,kxk,head_dim 38 | 39 | #生成Attention Map 40 | attn=self.pool(x.permute(0,3,1,2)).permute(0,2,3,1) #B,H,W,C 41 | attn=self.attn(attn).reshape(B,h*w,self.num_heads,self.kernel_size*self.kernel_size \ 42 | ,self.kernel_size*self.kernel_size).permute(0,2,1,3,4) #B,num_head,H*W,kxk,kxk 43 | attn=self.scale*attn 44 | attn=attn.softmax(-1) 45 | attn=self.attn_drop(attn) 46 | 47 | #获取weighted特征 48 | out=(attn @ v).permute(0,1,4,3,2).reshape(B,C*self.kernel_size*self.kernel_size,h*w) #B,dimxkxk,H*W 49 | out=F.fold(out,output_size=(H,W),kernel_size=self.kernel_size, 50 | padding=self.padding,stride=self.stride) #B,C,H,W 51 | out=self.proj(out.permute(0,2,3,1)) #B,H,W,C 52 | out=self.proj_drop(out) 53 | 54 | return out 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | input=torch.randn(50,28,28,512) 60 | outlook = OutlookAttention(dim=512) 61 | output=outlook(input) 62 | print(output.shape) 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /models/Attention/PSA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class PSA(nn.Module): 9 | 10 | def __init__(self, channel=512,reduction=4,S=4): 11 | super().__init__() 12 | self.S=S 13 | 14 | self.convs=[] 15 | for i in range(S): 16 | self.convs.append(nn.Conv2d(channel//S,channel//S,kernel_size=2*(i+1)+1,padding=i+1)) 17 | 18 | self.se_blocks=[] 19 | for i in range(S): 20 | self.se_blocks.append(nn.Sequential( 21 | nn.AdaptiveAvgPool2d(1), 22 | nn.Conv2d(channel//S, channel // (S*reduction),kernel_size=1, bias=False), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(channel // (S*reduction), channel//S,kernel_size=1, bias=False), 25 | nn.Sigmoid() 26 | )) 27 | 28 | self.softmax=nn.Softmax(dim=1) 29 | 30 | 31 | def init_weights(self): 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv2d): 34 | init.kaiming_normal_(m.weight, mode='fan_out') 35 | if m.bias is not None: 36 | init.constant_(m.bias, 0) 37 | elif isinstance(m, nn.BatchNorm2d): 38 | init.constant_(m.weight, 1) 39 | init.constant_(m.bias, 0) 40 | elif isinstance(m, nn.Linear): 41 | init.normal_(m.weight, std=0.001) 42 | if m.bias is not None: 43 | init.constant_(m.bias, 0) 44 | 45 | def forward(self, x): 46 | b, c, h, w = x.size() 47 | 48 | #Step1:SPC module 49 | SPC_out=x.view(b,self.S,c//self.S,h,w) #bs,s,ci,h,w 50 | for idx,conv in enumerate(self.convs): 51 | SPC_out[:,idx,:,:,:]=conv(SPC_out[:,idx,:,:,:]) 52 | 53 | #Step2:SE weight 54 | se_out=[] 55 | for idx,se in enumerate(self.se_blocks): 56 | se_out.append(se(SPC_out[:,idx,:,:,:])) 57 | SE_out=torch.stack(se_out,dim=1) 58 | SE_out=SE_out.expand_as(SPC_out) 59 | 60 | #Step3:Softmax 61 | softmax_out=self.softmax(SE_out) 62 | 63 | #Step4:SPA 64 | PSA_out=SPC_out*softmax_out 65 | PSA_out=PSA_out.view(b,-1,h,w) 66 | 67 | return PSA_out 68 | 69 | 70 | if __name__ == '__main__': 71 | input=torch.randn(50,512,7,7) 72 | psa = PSA(channel=512,reduction=8) 73 | output=psa(input) 74 | a=output.view(-1).sum() 75 | a.backward() 76 | print(output.shape) 77 | 78 | -------------------------------------------------------------------------------- /models/Attention/ParNetAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class ParNetAttention(nn.Module): 9 | 10 | def __init__(self, channel=512): 11 | super().__init__() 12 | self.sse = nn.Sequential( 13 | nn.AdaptiveAvgPool2d(1), 14 | nn.Conv2d(channel,channel,kernel_size=1), 15 | nn.Sigmoid() 16 | ) 17 | 18 | self.conv1x1=nn.Sequential( 19 | nn.Conv2d(channel,channel,kernel_size=1), 20 | nn.BatchNorm2d(channel) 21 | ) 22 | self.conv3x3=nn.Sequential( 23 | nn.Conv2d(channel,channel,kernel_size=3,padding=1), 24 | nn.BatchNorm2d(channel) 25 | ) 26 | self.silu=nn.SiLU() 27 | 28 | 29 | def forward(self, x): 30 | b, c, _, _ = x.size() 31 | x1=self.conv1x1(x) 32 | x2=self.conv3x3(x) 33 | x3=self.sse(x)*x 34 | y=self.silu(x1+x2+x3) 35 | return y 36 | 37 | 38 | if __name__ == '__main__': 39 | input=torch.randn(50,512,7,7) 40 | pna = ParNetAttention(channel=512) 41 | output=pna(input) 42 | print(output.shape) 43 | 44 | -------------------------------------------------------------------------------- /models/Attention/ResidualAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class ResidualAttention(nn.Module): 9 | 10 | def __init__(self, channel=512 , num_class=1000,la=0.2): 11 | super().__init__() 12 | self.la=la 13 | self.fc=nn.Conv2d(in_channels=channel,out_channels=num_class,kernel_size=1,stride=1,bias=False) 14 | 15 | def forward(self, x): 16 | b,c,h,w=x.shape 17 | y_raw=self.fc(x).flatten(2) #b,num_class,hxw 18 | y_avg=torch.mean(y_raw,dim=2) #b,num_class 19 | y_max=torch.max(y_raw,dim=2)[0] #b,num_class 20 | score=y_avg+self.la*y_max 21 | return score 22 | 23 | 24 | 25 | 26 | if __name__ == '__main__': 27 | input=torch.randn(50,512,7,7) 28 | resatt = ResidualAttention(channel=512,num_class=1000,la=0.2) 29 | output=resatt(input) 30 | print(output.shape) 31 | 32 | -------------------------------------------------------------------------------- /models/Attention/S2Attention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | def spatial_shift1(x): 8 | b,w,h,c = x.size() 9 | x[:,1:,:,:c//4] = x[:,:w-1,:,:c//4] 10 | x[:,:w-1,:,c//4:c//2] = x[:,1:,:,c//4:c//2] 11 | x[:,:,1:,c//2:c*3//4] = x[:,:,:h-1,c//2:c*3//4] 12 | x[:,:,:h-1,3*c//4:] = x[:,:,1:,3*c//4:] 13 | return x 14 | 15 | 16 | def spatial_shift2(x): 17 | b,w,h,c = x.size() 18 | x[:,:,1:,:c//4] = x[:,:,:h-1,:c//4] 19 | x[:,:,:h-1,c//4:c//2] = x[:,:,1:,c//4:c//2] 20 | x[:,1:,:,c//2:c*3//4] = x[:,:w-1,:,c//2:c*3//4] 21 | x[:,:w-1,:,3*c//4:] = x[:,1:,:,3*c//4:] 22 | return x 23 | 24 | 25 | class SplitAttention(nn.Module): 26 | def __init__(self,channel=512,k=3): 27 | super().__init__() 28 | self.channel=channel 29 | self.k=k 30 | self.mlp1=nn.Linear(channel,channel,bias=False) 31 | self.gelu=nn.GELU() 32 | self.mlp2=nn.Linear(channel,channel*k,bias=False) 33 | self.softmax=nn.Softmax(1) 34 | 35 | def forward(self,x_all): 36 | b,k,h,w,c=x_all.shape 37 | x_all=x_all.reshape(b,k,-1,c) #bs,k,n,c 38 | a=torch.sum(torch.sum(x_all,1),1) #bs,c 39 | hat_a=self.mlp2(self.gelu(self.mlp1(a))) #bs,kc 40 | hat_a=hat_a.reshape(b,self.k,c) #bs,k,c 41 | bar_a=self.softmax(hat_a) #bs,k,c 42 | attention=bar_a.unsqueeze(-2) # #bs,k,1,c 43 | out=attention*x_all # #bs,k,n,c 44 | out=torch.sum(out,1).reshape(b,h,w,c) 45 | return out 46 | 47 | 48 | class S2Attention(nn.Module): 49 | 50 | def __init__(self, channels=512 ): 51 | super().__init__() 52 | self.mlp1 = nn.Linear(channels,channels*3) 53 | self.mlp2 = nn.Linear(channels,channels) 54 | self.split_attention = SplitAttention() 55 | 56 | def forward(self, x): 57 | b,c,w,h = x.size() 58 | x=x.permute(0,2,3,1) 59 | x = self.mlp1(x) 60 | x1 = spatial_shift1(x[:,:,:,:c]) 61 | x2 = spatial_shift2(x[:,:,:,c:c*2]) 62 | x3 = x[:,:,:,c*2:] 63 | x_all=torch.stack([x1,x2,x3],1) 64 | a = self.split_attention(x_all) 65 | x = self.mlp2(a) 66 | x=x.permute(0,3,1,2) 67 | return x 68 | 69 | 70 | 71 | 72 | if __name__ == '__main__': 73 | input=torch.randn(50,512,7,7) 74 | s2att = S2Attention(channels=512) 75 | output=s2att(input) 76 | print(output.shape) 77 | 78 | -------------------------------------------------------------------------------- /models/Attention/SEAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class SEAttention(nn.Module): 9 | def __init__(self, channel,ratio = 16): 10 | super(SEAttention, self).__init__() 11 | self.squeeze = nn.AdaptiveAvgPool2d(1) 12 | self.excitation = nn.Sequential( 13 | nn.Linear(in_features=channel, out_features=channel // ratio), 14 | nn.ReLU(inplace=True), 15 | nn.Linear(in_features=channel // ratio, out_features=channel), 16 | nn.Sigmoid() 17 | ) 18 | def forward(self, x): 19 | b, c, _, _ = x.size() 20 | y = self.squeeze(x).view(b, c) 21 | z = self.excitation(y).view(b, c, 1, 1) 22 | return x * z.expand_as(x) 23 | 24 | 25 | if __name__ == '__main__': 26 | input=torch.randn(50,512,7,7) 27 | se = SEAttention(channel=512,reduction=8) 28 | output=se(input) 29 | print(output.shape) 30 | 31 | -------------------------------------------------------------------------------- /models/Attention/SEvariants.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class cSE_Module(nn.Module): 7 | def __init__(self, channel,ratio = 16): 8 | super(cSE_Module, self).__init__() 9 | self.squeeze = nn.AdaptiveAvgPool2d(1) 10 | self.excitation = nn.Sequential( 11 | nn.Linear(in_features=channel, out_features=channel // ratio), 12 | nn.ReLU(inplace=True), 13 | nn.Linear(in_features=channel // ratio, out_features=channel), 14 | nn.Sigmoid() 15 | ) 16 | def forward(self, x): 17 | b, c, _, _ = x.size() 18 | y = self.squeeze(x).view(b, c) 19 | z = self.excitation(y).view(b, c, 1, 1) 20 | return x * z.expand_as(x) 21 | 22 | 23 | class sSE_Module(nn.Module): 24 | def __init__(self, channel): 25 | super(sSE_Module, self).__init__() 26 | self.spatial_excitation = nn.Sequential( 27 | nn.Conv2d(in_channels=channel, out_channels=1, kernel_size=1,stride=1,padding=0), 28 | nn.Sigmoid() 29 | ) 30 | def forward(self, x): 31 | z = self.spatial_excitation(x) 32 | return x * z.expand_as(x) 33 | 34 | 35 | class scSE_Module(nn.Module): 36 | def __init__(self, channel,ratio = 16): 37 | super(scSE_Module, self).__init__() 38 | self.cSE = cSE_Module(channel,ratio) 39 | self.sSE = sSE_Module(channel) 40 | 41 | def forward(self, x): 42 | return self.cSE(x) + self.sSE(x) 43 | 44 | 45 | if __name__=='__main__': 46 | # model = cSE_Module(channel=16) 47 | # model = sSE_Module(channel=16) 48 | model = scSE_Module(channel=16) 49 | print(model) 50 | 51 | input = torch.randn(1, 16, 64, 64) 52 | out = model(input) 53 | print(out.shape) -------------------------------------------------------------------------------- /models/Attention/SGE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class SpatialGroupEnhance(nn.Module): 9 | 10 | def __init__(self, groups): 11 | super().__init__() 12 | self.groups=groups 13 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 14 | self.weight=nn.Parameter(torch.zeros(1,groups,1,1)) 15 | self.bias=nn.Parameter(torch.zeros(1,groups,1,1)) 16 | self.sig=nn.Sigmoid() 17 | self.init_weights() 18 | 19 | 20 | def init_weights(self): 21 | for m in self.modules(): 22 | if isinstance(m, nn.Conv2d): 23 | init.kaiming_normal_(m.weight, mode='fan_out') 24 | if m.bias is not None: 25 | init.constant_(m.bias, 0) 26 | elif isinstance(m, nn.BatchNorm2d): 27 | init.constant_(m.weight, 1) 28 | init.constant_(m.bias, 0) 29 | elif isinstance(m, nn.Linear): 30 | init.normal_(m.weight, std=0.001) 31 | if m.bias is not None: 32 | init.constant_(m.bias, 0) 33 | 34 | def forward(self, x): 35 | b, c, h,w=x.shape 36 | x=x.view(b*self.groups,-1,h,w) #bs*g,dim//g,h,w 37 | xn=x*self.avg_pool(x) #bs*g,dim//g,h,w 38 | xn=xn.sum(dim=1,keepdim=True) #bs*g,1,h,w 39 | t=xn.view(b*self.groups,-1) #bs*g,h*w 40 | 41 | t=t-t.mean(dim=1,keepdim=True) #bs*g,h*w 42 | std=t.std(dim=1,keepdim=True)+1e-5 43 | t=t/std #bs*g,h*w 44 | t=t.view(b,self.groups,h,w) #bs,g,h*w 45 | 46 | t=t*self.weight+self.bias #bs,g,h*w 47 | t=t.view(b*self.groups,1,h,w) #bs*g,1,h*w 48 | x=x*self.sig(t) 49 | x=x.view(b,c,h,w) 50 | 51 | return x 52 | 53 | 54 | if __name__ == '__main__': 55 | input=torch.randn(50,512,7,7) 56 | sge = SpatialGroupEnhance(groups=8) 57 | output=sge(input) 58 | print(output.shape) 59 | 60 | -------------------------------------------------------------------------------- /models/Attention/SKAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from collections import OrderedDict 6 | 7 | 8 | 9 | class SKAttention(nn.Module): 10 | 11 | def __init__(self, channel=512,kernels=[1,3,5,7],reduction=16,group=1,L=32): 12 | super().__init__() 13 | self.d=max(L,channel//reduction) 14 | self.convs=nn.ModuleList([]) 15 | for k in kernels: 16 | self.convs.append( 17 | nn.Sequential(OrderedDict([ 18 | ('conv',nn.Conv2d(channel,channel,kernel_size=k,padding=k//2,groups=group)), 19 | ('bn',nn.BatchNorm2d(channel)), 20 | ('relu',nn.ReLU()) 21 | ])) 22 | ) 23 | self.fc=nn.Linear(channel,self.d) 24 | self.fcs=nn.ModuleList([]) 25 | for i in range(len(kernels)): 26 | self.fcs.append(nn.Linear(self.d,channel)) 27 | self.softmax=nn.Softmax(dim=0) 28 | 29 | 30 | 31 | def forward(self, x): 32 | bs, c, _, _ = x.size() 33 | conv_outs=[] 34 | ### split 35 | for conv in self.convs: 36 | conv_outs.append(conv(x)) 37 | feats=torch.stack(conv_outs,0)#k,bs,channel,h,w 38 | 39 | ### fuse 40 | U=sum(conv_outs) #bs,c,h,w 41 | 42 | ### reduction channel 43 | S=U.mean(-1).mean(-1) #bs,c 44 | Z=self.fc(S) #bs,d 45 | 46 | ### calculate attention weight 47 | weights=[] 48 | for fc in self.fcs: 49 | weight=fc(Z) 50 | weights.append(weight.view(bs,c,1,1)) #bs,channel 51 | attention_weughts=torch.stack(weights,0)#k,bs,channel,1,1 52 | attention_weughts=self.softmax(attention_weughts)#k,bs,channel,1,1 53 | 54 | ### fuse 55 | V=(attention_weughts*feats).sum(0) 56 | return V 57 | 58 | 59 | 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | input=torch.randn(50,512,7,7) 65 | se = SKAttention(channel=512,reduction=8) 66 | output=se(input) 67 | print(output.shape) 68 | 69 | -------------------------------------------------------------------------------- /models/Attention/ShuffleAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from torch.nn.parameter import Parameter 6 | 7 | 8 | class ShuffleAttention(nn.Module): 9 | 10 | def __init__(self, channel=512,reduction=16,G=8): 11 | super().__init__() 12 | self.G=G 13 | self.channel=channel 14 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 15 | self.gn = nn.GroupNorm(channel // (2 * G), channel // (2 * G)) 16 | self.cweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1)) 17 | self.cbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1)) 18 | self.sweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1)) 19 | self.sbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1)) 20 | self.sigmoid=nn.Sigmoid() 21 | 22 | 23 | def init_weights(self): 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | init.kaiming_normal_(m.weight, mode='fan_out') 27 | if m.bias is not None: 28 | init.constant_(m.bias, 0) 29 | elif isinstance(m, nn.BatchNorm2d): 30 | init.constant_(m.weight, 1) 31 | init.constant_(m.bias, 0) 32 | elif isinstance(m, nn.Linear): 33 | init.normal_(m.weight, std=0.001) 34 | if m.bias is not None: 35 | init.constant_(m.bias, 0) 36 | 37 | 38 | @staticmethod 39 | def channel_shuffle(x, groups): 40 | b, c, h, w = x.shape 41 | x = x.reshape(b, groups, -1, h, w) 42 | x = x.permute(0, 2, 1, 3, 4) 43 | 44 | # flatten 45 | x = x.reshape(b, -1, h, w) 46 | 47 | return x 48 | 49 | def forward(self, x): 50 | b, c, h, w = x.size() 51 | #group into subfeatures 52 | x=x.view(b*self.G,-1,h,w) #bs*G,c//G,h,w 53 | 54 | #channel_split 55 | x_0,x_1=x.chunk(2,dim=1) #bs*G,c//(2*G),h,w 56 | 57 | #channel attention 58 | x_channel=self.avg_pool(x_0) #bs*G,c//(2*G),1,1 59 | x_channel=self.cweight*x_channel+self.cbias #bs*G,c//(2*G),1,1 60 | x_channel=x_0*self.sigmoid(x_channel) 61 | 62 | #spatial attention 63 | x_spatial=self.gn(x_1) #bs*G,c//(2*G),h,w 64 | x_spatial=self.sweight*x_spatial+self.sbias #bs*G,c//(2*G),h,w 65 | x_spatial=x_1*self.sigmoid(x_spatial) #bs*G,c//(2*G),h,w 66 | 67 | # concatenate along channel axis 68 | out=torch.cat([x_channel,x_spatial],dim=1) #bs*G,c//G,h,w 69 | out=out.contiguous().view(b,-1,h,w) 70 | 71 | # channel shuffle 72 | out = self.channel_shuffle(out, 2) 73 | return out 74 | 75 | 76 | if __name__ == '__main__': 77 | input=torch.randn(50,512,7,7) 78 | se = ShuffleAttention(channel=512,G=8) 79 | output=se(input) 80 | print(output.shape) 81 | 82 | -------------------------------------------------------------------------------- /models/Attention/TripletAttention.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torchvision 6 | 7 | 8 | class ChannelPool(nn.Module): 9 | def forward(self, x): 10 | return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 ) 11 | 12 | 13 | class SpatialGate(nn.Module): 14 | def __init__(self): 15 | super(SpatialGate, self).__init__() 16 | 17 | self.channel_pool = ChannelPool() 18 | self.conv = nn.Sequential( 19 | nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3), 20 | nn.BatchNorm2d(1) 21 | ) 22 | self.sigmod = nn.Sigmoid() 23 | 24 | def forward(self, x): 25 | out = self.conv(self.channel_pool(x)) 26 | return out * self.sigmod(out) 27 | 28 | 29 | class TripletAttention(nn.Module): 30 | def __init__(self, spatial=True): 31 | super(TripletAttention, self).__init__() 32 | self.spatial = spatial 33 | self.height_gate = SpatialGate() 34 | self.width_gate = SpatialGate() 35 | if self.spatial: 36 | self.spatial_gate = SpatialGate() 37 | 38 | def forward(self, x): 39 | x_perm1 = x.permute(0, 2, 1, 3).contiguous() 40 | x_out1 = self.height_gate(x_perm1) 41 | x_out1 = x_out1.permute(0, 2, 1, 3).contiguous() 42 | 43 | x_perm2 = x.permute(0, 3, 2, 1).contiguous() 44 | x_out2 = self.width_gate(x_perm2) 45 | x_out2 = x_out2.permute(0, 3, 2, 1).contiguous() 46 | 47 | if self.spatial: 48 | x_out3 = self.spatial_gate(x) 49 | return (1/3) * (x_out1 + x_out2 + x_out3) 50 | else: 51 | return (1/2) * (x_out1 + x_out2) 52 | 53 | 54 | 55 | if __name__=='__main__': 56 | model = TripletAttention() 57 | print(model) 58 | 59 | input = torch.randn(1, 16, 256, 256) 60 | out = model(input) 61 | print(out.shape) -------------------------------------------------------------------------------- /models/Attention/UFOAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.functional import norm 5 | from torch.nn import init 6 | 7 | 8 | def XNorm(x,gamma): 9 | norm_tensor=torch.norm(x,2,-1,True) 10 | return x*gamma/norm_tensor 11 | 12 | 13 | class UFOAttention(nn.Module): 14 | ''' 15 | Scaled dot-product attention 16 | ''' 17 | 18 | def __init__(self, d_model, d_k, d_v, h,dropout=.1): 19 | ''' 20 | :param d_model: Output dimensionality of the model 21 | :param d_k: Dimensionality of queries and keys 22 | :param d_v: Dimensionality of values 23 | :param h: Number of heads 24 | ''' 25 | super(UFOAttention, self).__init__() 26 | self.fc_q = nn.Linear(d_model, h * d_k) 27 | self.fc_k = nn.Linear(d_model, h * d_k) 28 | self.fc_v = nn.Linear(d_model, h * d_v) 29 | self.fc_o = nn.Linear(h * d_v, d_model) 30 | self.dropout=nn.Dropout(dropout) 31 | self.gamma=nn.Parameter(torch.randn((1,h,1,1))) 32 | 33 | self.d_model = d_model 34 | self.d_k = d_k 35 | self.d_v = d_v 36 | self.h = h 37 | 38 | self.init_weights() 39 | 40 | 41 | def init_weights(self): 42 | for m in self.modules(): 43 | if isinstance(m, nn.Conv2d): 44 | init.kaiming_normal_(m.weight, mode='fan_out') 45 | if m.bias is not None: 46 | init.constant_(m.bias, 0) 47 | elif isinstance(m, nn.BatchNorm2d): 48 | init.constant_(m.weight, 1) 49 | init.constant_(m.bias, 0) 50 | elif isinstance(m, nn.Linear): 51 | init.normal_(m.weight, std=0.001) 52 | if m.bias is not None: 53 | init.constant_(m.bias, 0) 54 | 55 | def forward(self, queries, keys, values): 56 | b_s, nq = queries.shape[:2] 57 | nk = keys.shape[1] 58 | 59 | q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k) 60 | k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk) 61 | v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v) 62 | 63 | kv=torch.matmul(k, v) #bs,h,c,c 64 | kv_norm=XNorm(kv,self.gamma) #bs,h,c,c 65 | q_norm=XNorm(q,self.gamma) #bs,h,n,c 66 | out=torch.matmul(q_norm,kv_norm).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) 67 | out = self.fc_o(out) # (b_s, nq, d_model) 68 | 69 | 70 | return out 71 | 72 | 73 | if __name__ == '__main__': 74 | input=torch.randn(50,49,512) 75 | ufo = UFOAttention(d_model=512, d_k=512, d_v=512, h=8) 76 | output=ufo(input,input,input) 77 | print(output.shape) 78 | 79 | -------------------------------------------------------------------------------- /models/Attention/ViP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class MLP(nn.Module): 6 | def __init__(self,in_features,hidden_features,out_features,act_layer=nn.GELU,drop=0.1): 7 | super().__init__() 8 | self.fc1=nn.Linear(in_features,hidden_features) 9 | self.act=act_layer() 10 | self.fc2=nn.Linear(hidden_features,out_features) 11 | self.drop=nn.Dropout(drop) 12 | 13 | def forward(self, x) : 14 | return self.drop(self.fc2(self.drop(self.act(self.fc1(x))))) 15 | 16 | class WeightedPermuteMLP(nn.Module): 17 | def __init__(self,dim,seg_dim=8, qkv_bias=False, proj_drop=0.): 18 | super().__init__() 19 | self.seg_dim=seg_dim 20 | 21 | self.mlp_c=nn.Linear(dim,dim,bias=qkv_bias) 22 | self.mlp_h=nn.Linear(dim,dim,bias=qkv_bias) 23 | self.mlp_w=nn.Linear(dim,dim,bias=qkv_bias) 24 | 25 | self.reweighting=MLP(dim,dim//4,dim*3) 26 | 27 | self.proj=nn.Linear(dim,dim) 28 | self.proj_drop=nn.Dropout(proj_drop) 29 | 30 | def forward(self,x) : 31 | B,H,W,C=x.shape 32 | 33 | c_embed=self.mlp_c(x) 34 | 35 | S=C//self.seg_dim 36 | h_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,2,1,4).reshape(B,self.seg_dim,W,H*S) 37 | h_embed=self.mlp_h(h_embed).reshape(B,self.seg_dim,W,H,S).permute(0,3,2,1,4).reshape(B,H,W,C) 38 | 39 | w_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,1,2,4).reshape(B,self.seg_dim,H,W*S) 40 | w_embed=self.mlp_w(w_embed).reshape(B,self.seg_dim,H,W,S).permute(0,2,3,1,4).reshape(B,H,W,C) 41 | 42 | weight=(c_embed+h_embed+w_embed).permute(0,3,1,2).flatten(2).mean(2) 43 | weight=self.reweighting(weight).reshape(B,C,3).permute(2,0,1).softmax(0).unsqueeze(2).unsqueeze(2) 44 | 45 | x=c_embed*weight[0]+w_embed*weight[1]+h_embed*weight[2] 46 | 47 | x=self.proj_drop(self.proj(x)) 48 | 49 | return x 50 | 51 | 52 | 53 | if __name__ == '__main__': 54 | input=torch.randn(64,8,8,512) 55 | seg_dim=8 56 | vip=WeightedPermuteMLP(512,seg_dim) 57 | out=vip(input) 58 | print(out.shape) 59 | -------------------------------------------------------------------------------- /models/ClassicNetwork/AlexNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class AlexNet(nn.Module): 6 | def __init__(self,num_classes=1000): 7 | super(AlexNet,self).__init__() 8 | self.feature_extraction = nn.Sequential( 9 | nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False), 10 | nn.ReLU(inplace=True), 11 | nn.MaxPool2d(kernel_size=3,stride=2,padding=0), 12 | nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False), 13 | nn.ReLU(inplace=True), 14 | nn.MaxPool2d(kernel_size=3,stride=2,padding=0), 15 | nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False), 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False), 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 22 | ) 23 | self.classifier = nn.Sequential( 24 | nn.Dropout(p=0.5), 25 | nn.Linear(in_features=256*6*6,out_features=4096), 26 | nn.Dropout(p=0.5), 27 | nn.Linear(in_features=4096, out_features=4096), 28 | nn.Linear(in_features=4096, out_features=num_classes), 29 | ) 30 | def forward(self,x): 31 | x = self.feature_extraction(x) 32 | x = x.view(x.size(0),256*6*6) 33 | x = self.classifier(x) 34 | return x 35 | 36 | 37 | if __name__ =='__main__': 38 | # model = torchvision.models.AlexNet() 39 | model = AlexNet() 40 | print(model) 41 | 42 | input = torch.randn(8,3,224,224) 43 | out = model(input) 44 | print(out.shape) 45 | 46 | -------------------------------------------------------------------------------- /models/ClassicNetwork/InceptionV4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class InceptionV4(nn.Module): 6 | def __init__(self): 7 | super(InceptionV4, self).__init__() 8 | 9 | def forward(self): 10 | return out 11 | 12 | if __name__=='__main__': 13 | model = InceptionV4() 14 | print(model) 15 | 16 | input = torch.randn(1, 3, 224, 224) 17 | out = model(input) 18 | print(out.shape) -------------------------------------------------------------------------------- /models/ClassicNetwork/Non-local_network.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | # from blocks.non_local_concatenation import NONLocalBlock2D 3 | # from blocks.non_local_gaussian import NONLocalBlock2D 4 | from blocks.non_local_embedded_gaussian import NONLocalBlock2D 5 | # from blocks.non_local_dot_product import NONLocalBlock2D 6 | 7 | 8 | class Network(nn.Module): 9 | def __init__(self): 10 | super(Network, self).__init__() 11 | 12 | self.conv_1 = nn.Sequential( 13 | nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1), 14 | nn.BatchNorm2d(32), 15 | nn.ReLU(), 16 | nn.MaxPool2d(2), 17 | ) 18 | 19 | self.nl_1 = NONLocalBlock2D(in_channels=32) 20 | self.conv_2 = nn.Sequential( 21 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1), 22 | nn.BatchNorm2d(64), 23 | nn.ReLU(), 24 | nn.MaxPool2d(2), 25 | ) 26 | 27 | self.nl_2 = NONLocalBlock2D(in_channels=64) 28 | self.conv_3 = nn.Sequential( 29 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), 30 | nn.BatchNorm2d(128), 31 | nn.ReLU(), 32 | nn.MaxPool2d(2), 33 | ) 34 | 35 | self.fc = nn.Sequential( 36 | nn.Linear(in_features=128*3*3, out_features=256), 37 | nn.ReLU(), 38 | nn.Dropout(0.5), 39 | 40 | nn.Linear(in_features=256, out_features=10) 41 | ) 42 | 43 | def forward(self, x): 44 | batch_size = x.size(0) 45 | 46 | feature_1 = self.conv_1(x) 47 | nl_feature_1 = self.nl_1(feature_1) 48 | 49 | feature_2 = self.conv_2(nl_feature_1) 50 | nl_feature_2 = self.nl_2(feature_2) 51 | 52 | output = self.conv_3(nl_feature_2).view(batch_size, -1) 53 | output = self.fc(output) 54 | 55 | return output 56 | 57 | def forward_with_nl_map(self, x): 58 | batch_size = x.size(0) 59 | 60 | feature_1 = self.conv_1(x) 61 | nl_feature_1, nl_map_1 = self.nl_1(feature_1, return_nl_map=True) 62 | 63 | feature_2 = self.conv_2(nl_feature_1) 64 | nl_feature_2, nl_map_2 = self.nl_2(feature_2, return_nl_map=True) 65 | 66 | output = self.conv_3(nl_feature_2).view(batch_size, -1) 67 | output = self.fc(output) 68 | 69 | return output, [nl_map_1, nl_map_2] 70 | 71 | 72 | if __name__ == '__main__': 73 | import torch 74 | 75 | img = torch.randn(3, 1, 28, 28) 76 | net = Network() 77 | print(net) 78 | out = net(img) 79 | print(out.size()) -------------------------------------------------------------------------------- /models/ClassicNetwork/README.md: -------------------------------------------------------------------------------- 1 | # ClassicNetwork 2 | Classical network implemented by pytorch 3 | 4 | 5 | 6 | **AlexNet:** 7 | 8 | - ImageNet Classification with Deep Convolutional Neural Networks, Alex Krizhevsky, 2012 9 | 10 | 11 | 12 | **VGG:** 13 | 14 | - Very Deep Convolutional Networks for Large-Scale Image Recognition,Karen Simonyan,2014 15 | 16 | 17 | 18 | **ResNet:** 19 | 20 | - Deep Residual Learning for Image Recognition, He-Kaiming, 2015 21 | 22 | 23 | 24 | **InceptionV1:** 25 | 26 | - Going deeper with convolutions , Christian Szegedy , 2014 27 | 28 | 29 | 30 | **InceptionV2 and InceptionV3:** 31 | 32 | - Rethinking the Inception Architecture for Computer Vision , Christian Szegedy ,2015 33 | 34 | 35 | 36 | **InceptionV4 and Inception-ResNet:** 37 | 38 | - Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning , Christian Szegedy ,2016 39 | 40 | 41 | 42 | **DenseNet:** 43 | 44 | - Densely Connected Convolutional Networks, 2017 45 | 46 | 47 | 48 | **ResNeXt:** 49 | 50 | - Aggregated Residual Transformations for Deep Neural Networks,2017 51 | 52 | 53 | **CSP-DarkNet:** 54 | 55 | - -------------------------------------------------------------------------------- /models/ClassicNetwork/VGGNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3BNReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | class VGGNet(nn.Module): 13 | def __init__(self, block_nums,num_classes=1000): 14 | super(VGGNet, self).__init__() 15 | 16 | self.stage1 = self._make_layers(in_channels=3, out_channels=64, block_num=block_nums[0]) 17 | self.stage2 = self._make_layers(in_channels=64, out_channels=128, block_num=block_nums[1]) 18 | self.stage3 = self._make_layers(in_channels=128, out_channels=256, block_num=block_nums[2]) 19 | self.stage4 = self._make_layers(in_channels=256, out_channels=512, block_num=block_nums[3]) 20 | self.stage5 = self._make_layers(in_channels=512, out_channels=512, block_num=block_nums[4]) 21 | 22 | self.classifier = nn.Sequential( 23 | nn.Linear(in_features=512*7*7,out_features=4096), 24 | nn.Dropout(p=0.2), 25 | nn.Linear(in_features=4096, out_features=4096), 26 | nn.Dropout(p=0.2), 27 | nn.Linear(in_features=4096, out_features=num_classes) 28 | ) 29 | 30 | self._init_params() 31 | 32 | def _make_layers(self, in_channels, out_channels, block_num): 33 | layers = [] 34 | layers.append(Conv3x3BNReLU(in_channels,out_channels)) 35 | for i in range(1,block_num): 36 | layers.append(Conv3x3BNReLU(out_channels,out_channels)) 37 | layers.append(nn.MaxPool2d(kernel_size=2,stride=2, ceil_mode=False)) 38 | return nn.Sequential(*layers) 39 | 40 | def _init_params(self): 41 | for m in self.modules(): 42 | if isinstance(m, nn.Conv2d): 43 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 44 | elif isinstance(m, nn.BatchNorm2d): 45 | nn.init.constant_(m.weight, 1) 46 | nn.init.constant_(m.bias, 0) 47 | 48 | def forward(self, x): 49 | x = self.stage1(x) 50 | x = self.stage2(x) 51 | x = self.stage3(x) 52 | x = self.stage4(x) 53 | x = self.stage5(x) 54 | x = x.view(x.size(0),-1) 55 | out = self.classifier(x) 56 | return out 57 | 58 | def VGG16(): 59 | block_nums = [2, 2, 3, 3, 3] 60 | model = VGGNet(block_nums) 61 | return model 62 | 63 | def VGG19(): 64 | block_nums = [2, 2, 4, 4, 4] 65 | model = VGGNet(block_nums) 66 | return model 67 | 68 | if __name__ == '__main__': 69 | model = VGG16() 70 | print(model) 71 | 72 | input = torch.randn(1,3,224,224) 73 | out = model(input) 74 | print(out.shape) 75 | 76 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/CBAM_blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | 6 | 7 | class ChannelAttention(nn.Module): 8 | def __init__(self, in_planes, ratio=16): 9 | super(ChannelAttention, self).__init__() 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.max_pool = nn.AdaptiveMaxPool2d(1) 12 | self.shared_MLP = nn.Sequential( 13 | nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), 14 | nn.ReLU(), 15 | nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 16 | ) 17 | # self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) 18 | # self.relu1 = nn.ReLU() 19 | # self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 20 | 21 | self.sigmoid = nn.Sigmoid() 22 | 23 | def forward(self, x): 24 | avg_out =self.shared_MLP(self.avg_pool(x))# self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 25 | max_out =self.shared_MLP(self.max_pool(x))# self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 26 | out = avg_out + max_out 27 | return self.sigmoid(out) 28 | 29 | 30 | class SpatialAttention(nn.Module): 31 | def __init__(self, kernel_size=7): 32 | super(SpatialAttention, self).__init__() 33 | 34 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 35 | padding = 3 if kernel_size == 7 else 1 36 | 37 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 38 | self.sigmoid = nn.Sigmoid() 39 | 40 | def forward(self, x): 41 | avg_out = torch.mean(x, dim=1, keepdim=True) 42 | max_out, _ = torch.max(x, dim=1, keepdim=True) 43 | x = torch.cat([avg_out, max_out], dim=1) 44 | x = self.conv1(x) 45 | return self.sigmoid(x) 46 | 47 | 48 | class CBAM(nn.Module): 49 | def __init__(self, planes): 50 | super(CBAM, self).__init__() 51 | self.ca = ChannelAttention(planes) 52 | self.sa = SpatialAttention() 53 | 54 | def forward(self, x): 55 | x = self.ca(x) * x 56 | x = self.sa(x) * x 57 | return x 58 | 59 | if __name__ == '__main__': 60 | img = torch.randn(16, 32, 20, 20) 61 | net = CBAM(32) 62 | print(net) 63 | out = net(img) 64 | print(out.size()) -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/ECA_blocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | SE structure 4 | 5 | """ 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | class ECA(nn.Module): 10 | """Constructs a ECA module. 11 | Args: 12 | channel: Number of channels of the input feature map 13 | k_size: Adaptive selection of kernel size 14 | """ 15 | def __init__(self, channel, k_size=3): 16 | super(ECA, self).__init__() 17 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 18 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 19 | self.sigmoid = nn.Sigmoid() 20 | 21 | def forward(self, x): 22 | # x: input features with shape [b, c, h, w] 23 | b, c, h, w = x.size() 24 | 25 | # feature descriptor on the global spatial information 26 | y = self.avg_pool(x) 27 | 28 | # Two different branches of ECA module 29 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 30 | 31 | # Multi-scale information fusion 32 | y = self.sigmoid(y) 33 | 34 | return x * y.expand_as(x) -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/NonLocalBlock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class NonLocalBlock(nn.Module): 7 | def __init__(self, channel): 8 | super(NonLocalBlock, self).__init__() 9 | self.inter_channel = channel // 2 10 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 11 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 12 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 13 | self.softmax = nn.Softmax(dim=1) 14 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 15 | 16 | def forward(self, x): 17 | # [N, C, H , W] 18 | b, c, h, w = x.size() 19 | # [N, C/2, H * W] 20 | x_phi = self.conv_phi(x).view(b, c, -1) 21 | # [N, H * W, C/2] 22 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() 23 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() 24 | # [N, H * W, H * W] 25 | mul_theta_phi = torch.matmul(x_theta, x_phi) 26 | mul_theta_phi = self.softmax(mul_theta_phi) 27 | # print(mul_theta_phi[0,:,0]) 28 | # [N, H * W, C/2] 29 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 30 | # [N, C/2, H, W] 31 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 32 | # [N, C, H , W] 33 | mask = self.conv_mask(mul_theta_phi_g) 34 | out = mask + x 35 | return out 36 | 37 | 38 | if __name__=='__main__': 39 | model = NonLocalBlock(channel=16) 40 | print(model) 41 | 42 | input = torch.randn(1, 16, 64, 64) 43 | out = model(input) 44 | print(out.shape) -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/SE_block.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | SE structure 4 | 5 | """ 6 | 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class SE(nn.Module): 12 | 13 | def __init__(self, in_chnls, ratio): 14 | super(SE, self).__init__() 15 | self.squeeze = nn.AdaptiveAvgPool2d((1, 1)) 16 | self.compress = nn.Conv2d(in_chnls, in_chnls // ratio, 1, 1, 0) 17 | self.excitation = nn.Conv2d(in_chnls // ratio, in_chnls, 1, 1, 0) 18 | 19 | def forward(self, x): 20 | out = self.squeeze(x) 21 | out = self.compress(out) 22 | out = F.relu(out) 23 | out = self.excitation(out) 24 | return x*F.sigmoid(out) 25 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/SKNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class SKConv(nn.Module): 6 | def __init__(self, features, WH, M, G, r, stride=1, L=32): 7 | super(SKConv, self).__init__() 8 | d = max(int(features / r), L) 9 | self.M = M 10 | self.features = features 11 | self.convs = nn.ModuleList([]) 12 | for i in range(M): 13 | # 使用不同kernel size的卷积 14 | self.convs.append( 15 | nn.Sequential( 16 | nn.Conv2d(features, 17 | features, 18 | kernel_size=3 + i * 2, 19 | stride=stride, 20 | padding=1 + i, 21 | groups=G), nn.BatchNorm2d(features), 22 | nn.ReLU(inplace=False))) 23 | 24 | self.fc = nn.Linear(features, d) 25 | self.fcs = nn.ModuleList([]) 26 | for i in range(M): 27 | self.fcs.append(nn.Linear(d, features)) 28 | self.softmax = nn.Softmax(dim=1) 29 | 30 | def forward(self, x): 31 | for i, conv in enumerate(self.convs): 32 | fea = conv(x).unsqueeze_(dim=1) 33 | if i == 0: 34 | feas = fea 35 | else: 36 | feas = torch.cat([feas, fea], dim=1) 37 | fea_U = torch.sum(feas, dim=1) 38 | fea_s = fea_U.mean(-1).mean(-1) 39 | fea_z = self.fc(fea_s) 40 | for i, fc in enumerate(self.fcs): 41 | print(i, fea_z.shape) 42 | vector = fc(fea_z).unsqueeze_(dim=1) 43 | print(i, vector.shape) 44 | if i == 0: 45 | attention_vectors = vector 46 | else: 47 | attention_vectors = torch.cat([attention_vectors, vector], 48 | dim=1) 49 | attention_vectors = self.softmax(attention_vectors) 50 | attention_vectors = attention_vectors.unsqueeze(-1).unsqueeze(-1) 51 | fea_v = (feas * attention_vectors).sum(dim=1) 52 | return fea_v 53 | 54 | 55 | if __name__ == "__main__": 56 | t = torch.ones((32, 256, 24, 24)) 57 | sk = SKConv(256, WH=1, M=2, G=1, r=2) 58 | out = sk(t) 59 | print(out.shape) -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/STN_blocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | (Spatial Transformer Networks) structure 4 | 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | class STN(nn.Module): 11 | """Constructs a ECA module. 12 | Args: 13 | inplanes: Number of channels of the input feature map 14 | 15 | """ 16 | def __init__(self): 17 | super(STN, self).__init__() 18 | # Spatial transformer localization-network 19 | self.localization = nn.Sequential( 20 | nn.Conv2d(3, 8, kernel_size=7), 21 | nn.MaxPool2d(2, stride=2), 22 | nn.ReLU(True), 23 | nn.Conv2d(8, 16, kernel_size=5), 24 | nn.MaxPool2d(2, stride=2), 25 | nn.ReLU(True) 26 | ) 27 | # Regressor for the 3 * 2 affine matrix 28 | self.fc_loc = nn.Sequential( 29 | nn.Linear(10 * 3 * 3, 32), 30 | nn.ReLU(True), 31 | nn.Linear(32, 3 * 2) 32 | ) 33 | # Initialize the weights/bias with identity transformation 34 | self.fc_loc[2].weight.data.zero_() 35 | self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) 36 | 37 | # Spatial transformer network forward function 38 | def forward(self, x): 39 | xs = self.localization(x) 40 | xs = xs.view(x.size()[0], -1) 41 | 42 | theta = self.fc_loc(xs) 43 | theta = theta.view(-1, 2, 3) 44 | 45 | grid = F.affine_grid(theta, x.size()) 46 | x = F.grid_sample(x, grid) 47 | 48 | return x 49 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/conv_bn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:UTF-8 -*- 2 | """ 3 | @Cai Yichao 2020_09_011 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class BN_Conv2d(nn.Module): 12 | """ 13 | BN_CONV, default activation is ReLU 14 | """ 15 | 16 | def __init__(self, in_channels: object, out_channels: object, kernel_size: object, stride: object, padding: object, 17 | dilation=1, groups=1, bias=False, activation=nn.ReLU(inplace=True)) -> object: 18 | super(BN_Conv2d, self).__init__() 19 | layers = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 20 | padding=padding, dilation=dilation, groups=groups, bias=bias), 21 | nn.BatchNorm2d(out_channels)] 22 | if activation is not None: 23 | layers.append(activation) 24 | self.seq = nn.Sequential(*layers) 25 | 26 | def forward(self, x): 27 | return self.seq(x) 28 | 29 | 30 | class BN_Conv2d_Leaky(nn.Module): 31 | """ 32 | BN_CONV_LeakyRELU 33 | """ 34 | 35 | def __init__(self, in_channels: object, out_channels: object, kernel_size: object, stride: object, padding: object, 36 | dilation=1, groups=1, bias=False) -> object: 37 | super(BN_Conv2d_Leaky, self).__init__() 38 | self.seq = nn.Sequential( 39 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 40 | padding=padding, dilation=dilation, groups=groups, bias=bias), 41 | nn.BatchNorm2d(out_channels) 42 | ) 43 | 44 | def forward(self, x): 45 | return F.leaky_relu(self.seq(x)) 46 | 47 | 48 | class Mish(nn.Module): 49 | def __init__(self): 50 | super(Mish, self).__init__() 51 | 52 | def forward(self, x): 53 | return x * torch.tanh(F.softplus(x)) 54 | 55 | 56 | class BN_Conv_Mish(nn.Module): 57 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation=1, groups=1, bias=False): 58 | super(BN_Conv_Mish, self).__init__() 59 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation=dilation, 60 | groups=groups, bias=bias) 61 | self.bn = nn.BatchNorm2d(out_channels) 62 | 63 | def forward(self, x): 64 | out = self.bn(self.conv(x)) 65 | return Mish()(out) 66 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/dense_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Cai Yichao 2020_09_15 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from models.blocks.conv_bn import BN_Conv2d 10 | 11 | 12 | class DenseBlock(nn.Module): 13 | 14 | def __init__(self, input_channels, num_layers, growth_rate): 15 | super(DenseBlock, self).__init__() 16 | self.num_layers = num_layers 17 | self.k0 = input_channels 18 | self.k = growth_rate 19 | self.layers = self.__make_layers() 20 | 21 | def __make_layers(self): 22 | layer_list = [] 23 | for i in range(self.num_layers): 24 | layer_list.append(nn.Sequential( 25 | BN_Conv2d(self.k0 + i * self.k, 4 * self.k, 1, 1, 0), 26 | BN_Conv2d(4 * self.k, self.k, 3, 1, 1) 27 | )) 28 | return layer_list 29 | 30 | def forward(self, x): 31 | feature = self.layers[0](x) 32 | out = torch.cat((x, feature), 1) 33 | for i in range(1, len(self.layers)): 34 | feature = self.layers[i](out) 35 | out = torch.cat((feature, out), 1) 36 | return out 37 | 38 | 39 | class CSP_DenseBlock(nn.Module): 40 | 41 | def __init__(self, in_channels, num_layers, k, part_ratio=0.5): 42 | super(CSP_DenseBlock, self).__init__() 43 | self.part1_chnls = int(in_channels * part_ratio) 44 | self.part2_chnls = in_channels - self.part1_chnls 45 | self.dense = DenseBlock(self.part2_chnls, num_layers, k) 46 | # trans_chnls = self.part2_chnls + k * num_layers 47 | # self.transtion = BN_Conv2d(trans_chnls, trans_chnls, 1, 1, 0) 48 | 49 | def forward(self, x): 50 | part1 = x[:, :self.part1_chnls, :, :] 51 | part2 = x[:, self.part1_chnls:, :, :] 52 | part2 = self.dense(part2) 53 | # part2 = self.transtion(part2) 54 | out = torch.cat((part1, part2), 1) 55 | return out 56 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/dpn_block.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | @Cai Yichao 2020_09_16 4 | """ 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from models.blocks.conv_bn import BN_Conv2d 10 | 11 | 12 | class DPN_Block(nn.Module): 13 | """ 14 | Dual Path block 15 | """ 16 | 17 | def __init__(self, in_chnls, add_chnl, cat_chnl, cardinality, d, stride): 18 | super(DPN_Block, self).__init__() 19 | self.add = add_chnl 20 | self.cat = cat_chnl 21 | self.chnl = cardinality * d 22 | self.conv1 = BN_Conv2d(in_chnls, self.chnl, 1, 1, 0) 23 | self.conv2 = BN_Conv2d(self.chnl, self.chnl, 3, stride, 1, groups=cardinality) 24 | self.conv3 = nn.Conv2d(self.chnl, add_chnl + cat_chnl, 1, 1, 0) 25 | self.bn = nn.BatchNorm2d(add_chnl + cat_chnl) 26 | self.shortcut = nn.Sequential() 27 | if add_chnl != in_chnls: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_chnls, add_chnl, 1, stride, 0), 30 | nn.BatchNorm2d(add_chnl) 31 | ) 32 | 33 | def forward(self, x): 34 | out = self.conv1(x) 35 | out = self.conv2(out) 36 | out = self.bn(self.conv3(out)) 37 | add = out[:, :self.add, :, :] + self.shortcut(x) 38 | out = torch.cat((add, out[:, self.add:, :, :]), dim=1) 39 | return F.relu(out) 40 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/resnext_block.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | @Cai Yichao 2020_09_08 4 | """ 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from models.blocks.SE_block import SE 9 | from models.blocks.conv_bn import BN_Conv2d 10 | 11 | 12 | class ResNeXt_Block(nn.Module): 13 | """ 14 | ResNeXt block with group convolutions 15 | """ 16 | 17 | def __init__(self, in_chnls, cardinality, group_depth, stride, is_se=False): 18 | super(ResNeXt_Block, self).__init__() 19 | self.is_se = is_se 20 | self.group_chnls = cardinality * group_depth 21 | self.conv1 = BN_Conv2d(in_chnls, self.group_chnls, 1, stride=1, padding=0) 22 | self.conv2 = BN_Conv2d(self.group_chnls, self.group_chnls, 3, stride=stride, padding=1, groups=cardinality) 23 | self.conv3 = nn.Conv2d(self.group_chnls, self.group_chnls * 2, 1, stride=1, padding=0) 24 | self.bn = nn.BatchNorm2d(self.group_chnls * 2) 25 | if self.is_se: 26 | self.se = SE(self.group_chnls * 2, 16) 27 | self.short_cut = nn.Sequential( 28 | nn.Conv2d(in_chnls, self.group_chnls * 2, 1, stride, 0, bias=False), 29 | nn.BatchNorm2d(self.group_chnls * 2) 30 | ) 31 | 32 | def forward(self, x): 33 | out = self.conv1(x) 34 | out = self.conv2(out) 35 | out = self.bn(self.conv3(out)) 36 | if self.is_se: 37 | coefficient = self.se(out) 38 | out *= coefficient 39 | out += self.short_cut(x) 40 | return F.relu(out) 41 | -------------------------------------------------------------------------------- /models/ClassicNetwork/blocks/softmax_blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class softmax_layer(nn.Module): 6 | """Constructs a ECA module. 7 | Args: 8 | input: [B,K,F] 9 | output: [B,F] 10 | """ 11 | def __init__(self, dim=512): 12 | super(softmax_layer, self).__init__() 13 | 14 | self.dim = dim 15 | self.w_omega = nn.Parameter(torch.Tensor(self.dim, self.dim)) 16 | self.u_omega = nn.Parameter(torch.Tensor(self.dim, 1)) 17 | 18 | nn.init.uniform_(self.w_omega, -0.1, 0.1) 19 | nn.init.uniform_(self.u_omega, -0.1, 0.1) 20 | 21 | def forward(self, x): 22 | # inputs的形状是[B,K,F] 23 | # Attention过程 24 | u = torch.tanh(torch.matmul(x, self.w_omega)) 25 | # u形状是[B,K,F] 26 | att = torch.matmul(u, self.u_omega) 27 | # att形状是[B,K,1] 28 | att_score = F.softmax(att, dim=1) 29 | # att_score形状仍为[B,K,1] 30 | scored_x = x * att_score 31 | # scored_x形状是(batch_size, seq_len, 2 * num_hiddens) 32 | # Attention过程结束 33 | outs = torch.sum(scored_x, dim=1) 34 | return outs 35 | -------------------------------------------------------------------------------- /models/ClassicNetwork/darknet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 3 | An unofficial implementation of Darknet with pytorch 4 | @Cai Yichao 2020_09_08 5 | """ 6 | 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torchsummary import summary 10 | from models.blocks.conv_bn import BN_Conv2d 11 | from models.blocks.residual_blocks import Dark_block 12 | 13 | 14 | class DarkNet(nn.Module): 15 | 16 | def __init__(self, layers: object, num_classes, is_se=False) -> object: 17 | super(DarkNet, self).__init__() 18 | self.is_se = is_se 19 | filters = [64, 128, 256, 512, 1024] 20 | 21 | self.conv1 = BN_Conv2d(3, 32, 3, 1, 1) 22 | self.redu1 = BN_Conv2d(32, 64, 3, 2, 1) 23 | self.conv2 = self.__make_layers(filters[0], layers[0]) 24 | self.redu2 = BN_Conv2d(filters[0], filters[1], 3, 2, 1) 25 | self.conv3 = self.__make_layers(filters[1], layers[1]) 26 | self.redu3 = BN_Conv2d(filters[1], filters[2], 3, 2, 1) 27 | self.conv4 = self.__make_layers(filters[2], layers[2]) 28 | self.redu4 = BN_Conv2d(filters[2], filters[3], 3, 2, 1) 29 | self.conv5 = self.__make_layers(filters[3], layers[3]) 30 | self.redu5 = BN_Conv2d(filters[3], filters[4], 3, 2, 1) 31 | self.conv6 = self.__make_layers(filters[4], layers[4]) 32 | self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) 33 | self.fc = nn.Linear(filters[4], num_classes) 34 | 35 | def __make_layers(self, num_filter, num_layers): 36 | layers = [] 37 | for _ in range(num_layers): 38 | layers.append(Dark_block(num_filter, self.is_se)) 39 | return nn.Sequential(*layers) 40 | 41 | def forward(self, x): 42 | out = self.conv1(x) 43 | out = self.redu1(out) 44 | out = self.conv2(out) 45 | out = self.redu2(out) 46 | out = self.conv3(out) 47 | out = self.redu3(out) 48 | out = self.conv4(out) 49 | out = self.redu4(out) 50 | out = self.conv5(out) 51 | out = self.redu5(out) 52 | out = self.conv6(out) 53 | out = self.global_pool(out) 54 | out = out.view(out.size(0), -1) 55 | out = self.fc(out) 56 | # return F.softmax(out) 57 | return out 58 | 59 | 60 | def darknet_53(num_classes=1000): 61 | return DarkNet([1, 2, 8, 8, 4], num_classes) 62 | 63 | 64 | # def test(): 65 | # net = darknet_53() 66 | # summary(net, (3, 256, 256)) 67 | # 68 | # test() 69 | -------------------------------------------------------------------------------- /models/ClassicNetwork/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.7.0" 2 | from .model import EfficientNet, VALID_MODELS 3 | from .utils import ( 4 | GlobalParams, 5 | BlockArgs, 6 | BlockDecoder, 7 | efficientnet, 8 | get_model_params, 9 | ) 10 | -------------------------------------------------------------------------------- /models/FaceDetectorAndRecognition/README.md: -------------------------------------------------------------------------------- 1 | # FaceDetectorAndRecognition 2 | 3 | 4 | 5 | **FaceBoxes** 6 | 7 | FaceBoxes: A CPU Real-time Face Detector with High Accuracy,2018 8 | 9 | https://arxiv.org/pdf/1708.05234.pdf 10 | 11 | https://liumin.blog.csdn.net/article/details/97698853 12 | 13 | 14 | 15 | **LFFD** 16 | 17 | LFFD: A Light and Fast Face Detector for Edge Devices,2019 18 | 19 | https://arxiv.org/pdf/1904.10633.pdf 20 | 21 | https://liumin.blog.csdn.net/article/details/100181190 -------------------------------------------------------------------------------- /models/GAN/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Erik Linder-Norén 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /models/GAN/bicyclegan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, input_shape, mode="train"): 13 | self.transform = transforms.Compose( 14 | [ 15 | transforms.Resize(input_shape[-2:], Image.BICUBIC), 16 | transforms.ToTensor(), 17 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), 18 | ] 19 | ) 20 | 21 | self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*")) 22 | 23 | def __getitem__(self, index): 24 | 25 | img = Image.open(self.files[index % len(self.files)]) 26 | w, h = img.size 27 | img_A = img.crop((0, 0, w / 2, h)) 28 | img_B = img.crop((w / 2, 0, w, h)) 29 | 30 | if np.random.random() < 0.5: 31 | img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB") 32 | img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB") 33 | 34 | img_A = self.transform(img_A) 35 | img_B = self.transform(img_B) 36 | 37 | return {"A": img_A, "B": img_B} 38 | 39 | def __len__(self): 40 | return len(self.files) 41 | -------------------------------------------------------------------------------- /models/GAN/ccgan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | class ImageDataset(Dataset): 11 | def __init__(self, root, transforms_x=None, transforms_lr=None, mode='train'): 12 | self.transform_x = transforms.Compose(transforms_x) 13 | self.transform_lr = transforms.Compose(transforms_lr) 14 | 15 | self.files = sorted(glob.glob('%s/*.*' % root)) 16 | 17 | def __getitem__(self, index): 18 | 19 | img = Image.open(self.files[index % len(self.files)]) 20 | 21 | x = self.transform_x(img) 22 | x_lr = self.transform_lr(img) 23 | 24 | return {'x': x, 'x_lr': x_lr} 25 | 26 | def __len__(self): 27 | return len(self.files) 28 | -------------------------------------------------------------------------------- /models/GAN/context_encoder/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, transforms_=None, img_size=128, mask_size=64, mode="train"): 13 | self.transform = transforms.Compose(transforms_) 14 | self.img_size = img_size 15 | self.mask_size = mask_size 16 | self.mode = mode 17 | self.files = sorted(glob.glob("%s/*.jpg" % root)) 18 | self.files = self.files[:-4000] if mode == "train" else self.files[-4000:] 19 | 20 | def apply_random_mask(self, img): 21 | """Randomly masks image""" 22 | y1, x1 = np.random.randint(0, self.img_size - self.mask_size, 2) 23 | y2, x2 = y1 + self.mask_size, x1 + self.mask_size 24 | masked_part = img[:, y1:y2, x1:x2] 25 | masked_img = img.clone() 26 | masked_img[:, y1:y2, x1:x2] = 1 27 | 28 | return masked_img, masked_part 29 | 30 | def apply_center_mask(self, img): 31 | """Mask center part of image""" 32 | # Get upper-left pixel coordinate 33 | i = (self.img_size - self.mask_size) // 2 34 | masked_img = img.clone() 35 | masked_img[:, i : i + self.mask_size, i : i + self.mask_size] = 1 36 | 37 | return masked_img, i 38 | 39 | def __getitem__(self, index): 40 | 41 | img = Image.open(self.files[index % len(self.files)]) 42 | img = self.transform(img) 43 | if self.mode == "train": 44 | # For training data perform random mask 45 | masked_img, aux = self.apply_random_mask(img) 46 | else: 47 | # For test data mask the center of the image 48 | masked_img, aux = self.apply_center_mask(img) 49 | 50 | return img, masked_img, aux 51 | 52 | def __len__(self): 53 | return len(self.files) 54 | -------------------------------------------------------------------------------- /models/GAN/context_encoder/models.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | 5 | 6 | class Generator(nn.Module): 7 | def __init__(self, channels=3): 8 | super(Generator, self).__init__() 9 | 10 | def downsample(in_feat, out_feat, normalize=True): 11 | layers = [nn.Conv2d(in_feat, out_feat, 4, stride=2, padding=1)] 12 | if normalize: 13 | layers.append(nn.BatchNorm2d(out_feat, 0.8)) 14 | layers.append(nn.LeakyReLU(0.2)) 15 | return layers 16 | 17 | def upsample(in_feat, out_feat, normalize=True): 18 | layers = [nn.ConvTranspose2d(in_feat, out_feat, 4, stride=2, padding=1)] 19 | if normalize: 20 | layers.append(nn.BatchNorm2d(out_feat, 0.8)) 21 | layers.append(nn.ReLU()) 22 | return layers 23 | 24 | self.model = nn.Sequential( 25 | *downsample(channels, 64, normalize=False), 26 | *downsample(64, 64), 27 | *downsample(64, 128), 28 | *downsample(128, 256), 29 | *downsample(256, 512), 30 | nn.Conv2d(512, 4000, 1), 31 | *upsample(4000, 512), 32 | *upsample(512, 256), 33 | *upsample(256, 128), 34 | *upsample(128, 64), 35 | nn.Conv2d(64, channels, 3, 1, 1), 36 | nn.Tanh() 37 | ) 38 | 39 | def forward(self, x): 40 | return self.model(x) 41 | 42 | 43 | class Discriminator(nn.Module): 44 | def __init__(self, channels=3): 45 | super(Discriminator, self).__init__() 46 | 47 | def discriminator_block(in_filters, out_filters, stride, normalize): 48 | """Returns layers of each discriminator block""" 49 | layers = [nn.Conv2d(in_filters, out_filters, 3, stride, 1)] 50 | if normalize: 51 | layers.append(nn.InstanceNorm2d(out_filters)) 52 | layers.append(nn.LeakyReLU(0.2, inplace=True)) 53 | return layers 54 | 55 | layers = [] 56 | in_filters = channels 57 | for out_filters, stride, normalize in [(64, 2, False), (128, 2, True), (256, 2, True), (512, 1, True)]: 58 | layers.extend(discriminator_block(in_filters, out_filters, stride, normalize)) 59 | in_filters = out_filters 60 | 61 | layers.append(nn.Conv2d(out_filters, 1, 3, 1, 1)) 62 | 63 | self.model = nn.Sequential(*layers) 64 | 65 | def forward(self, img): 66 | return self.model(img) 67 | -------------------------------------------------------------------------------- /models/GAN/cyclegan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | 10 | def to_rgb(image): 11 | rgb_image = Image.new("RGB", image.size) 12 | rgb_image.paste(image) 13 | return rgb_image 14 | 15 | 16 | class ImageDataset(Dataset): 17 | def __init__(self, root, transforms_=None, unaligned=False, mode="train"): 18 | self.transform = transforms.Compose(transforms_) 19 | self.unaligned = unaligned 20 | 21 | self.files_A = sorted(glob.glob(os.path.join(root, "%s/A" % mode) + "/*.*")) 22 | self.files_B = sorted(glob.glob(os.path.join(root, "%s/B" % mode) + "/*.*")) 23 | 24 | def __getitem__(self, index): 25 | image_A = Image.open(self.files_A[index % len(self.files_A)]) 26 | 27 | if self.unaligned: 28 | image_B = Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)]) 29 | else: 30 | image_B = Image.open(self.files_B[index % len(self.files_B)]) 31 | 32 | # Convert grayscale images to rgb 33 | if image_A.mode != "RGB": 34 | image_A = to_rgb(image_A) 35 | if image_B.mode != "RGB": 36 | image_B = to_rgb(image_B) 37 | 38 | item_A = self.transform(image_A) 39 | item_B = self.transform(image_B) 40 | return {"A": item_A, "B": item_B} 41 | 42 | def __len__(self): 43 | return max(len(self.files_A), len(self.files_B)) 44 | -------------------------------------------------------------------------------- /models/GAN/cyclegan/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | import datetime 4 | import sys 5 | 6 | from torch.autograd import Variable 7 | import torch 8 | import numpy as np 9 | 10 | from torchvision.utils import save_image 11 | 12 | 13 | class ReplayBuffer: 14 | def __init__(self, max_size=50): 15 | assert max_size > 0, "Empty buffer or trying to create a black hole. Be careful." 16 | self.max_size = max_size 17 | self.data = [] 18 | 19 | def push_and_pop(self, data): 20 | to_return = [] 21 | for element in data.data: 22 | element = torch.unsqueeze(element, 0) 23 | if len(self.data) < self.max_size: 24 | self.data.append(element) 25 | to_return.append(element) 26 | else: 27 | if random.uniform(0, 1) > 0.5: 28 | i = random.randint(0, self.max_size - 1) 29 | to_return.append(self.data[i].clone()) 30 | self.data[i] = element 31 | else: 32 | to_return.append(element) 33 | return Variable(torch.cat(to_return)) 34 | 35 | 36 | class LambdaLR: 37 | def __init__(self, n_epochs, offset, decay_start_epoch): 38 | assert (n_epochs - decay_start_epoch) > 0, "Decay must start before the training session ends!" 39 | self.n_epochs = n_epochs 40 | self.offset = offset 41 | self.decay_start_epoch = decay_start_epoch 42 | 43 | def step(self, epoch): 44 | return 1.0 - max(0, epoch + self.offset - self.decay_start_epoch) / (self.n_epochs - self.decay_start_epoch) 45 | -------------------------------------------------------------------------------- /models/GAN/discogan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import torch 4 | 5 | import numpy as np 6 | 7 | from torch.utils.data import Dataset 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, transforms_=None, mode='train'): 13 | self.transform = transforms.Compose(transforms_) 14 | 15 | self.files = sorted(glob.glob(os.path.join(root, mode) + '/*.*')) 16 | 17 | def __getitem__(self, index): 18 | 19 | img = Image.open(self.files[index % len(self.files)]) 20 | w, h = img.size 21 | img_A = img.crop((0, 0, w/2, h)) 22 | img_B = img.crop((w/2, 0, w, h)) 23 | 24 | if np.random.random() < 0.5: 25 | img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], 'RGB') 26 | img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], 'RGB') 27 | 28 | img_A = self.transform(img_A) 29 | img_B = self.transform(img_B) 30 | 31 | return {'A': img_A, 'B': img_B} 32 | 33 | def __len__(self): 34 | return len(self.files) 35 | -------------------------------------------------------------------------------- /models/GAN/dualgan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, transforms_=None, mode="train"): 13 | self.transform = transforms.Compose(transforms_) 14 | 15 | self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*")) 16 | 17 | def __getitem__(self, index): 18 | 19 | img = Image.open(self.files[index % len(self.files)]) 20 | w, h = img.size 21 | img_A = img.crop((0, 0, w / 2, h)) 22 | img_B = img.crop((w / 2, 0, w, h)) 23 | 24 | if np.random.random() < 0.5: 25 | img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB") 26 | img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB") 27 | 28 | img_A = self.transform(img_A) 29 | img_B = self.transform(img_B) 30 | 31 | return {"A": img_A, "B": img_B} 32 | 33 | def __len__(self): 34 | return len(self.files) 35 | -------------------------------------------------------------------------------- /models/GAN/esrgan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | 11 | # Normalization parameters for pre-trained PyTorch models 12 | mean = np.array([0.485, 0.456, 0.406]) 13 | std = np.array([0.229, 0.224, 0.225]) 14 | 15 | 16 | def denormalize(tensors): 17 | """ Denormalizes image tensors using mean and std """ 18 | for c in range(3): 19 | tensors[:, c].mul_(std[c]).add_(mean[c]) 20 | return torch.clamp(tensors, 0, 255) 21 | 22 | 23 | class ImageDataset(Dataset): 24 | def __init__(self, root, hr_shape): 25 | hr_height, hr_width = hr_shape 26 | # Transforms for low resolution images and high resolution images 27 | self.lr_transform = transforms.Compose( 28 | [ 29 | transforms.Resize((hr_height // 4, hr_height // 4), Image.BICUBIC), 30 | transforms.ToTensor(), 31 | transforms.Normalize(mean, std), 32 | ] 33 | ) 34 | self.hr_transform = transforms.Compose( 35 | [ 36 | transforms.Resize((hr_height, hr_height), Image.BICUBIC), 37 | transforms.ToTensor(), 38 | transforms.Normalize(mean, std), 39 | ] 40 | ) 41 | 42 | self.files = sorted(glob.glob(root + "/*.*")) 43 | 44 | def __getitem__(self, index): 45 | img = Image.open(self.files[index % len(self.files)]) 46 | img_lr = self.lr_transform(img) 47 | img_hr = self.hr_transform(img) 48 | 49 | return {"lr": img_lr, "hr": img_hr} 50 | 51 | def __len__(self): 52 | return len(self.files) 53 | -------------------------------------------------------------------------------- /models/GAN/esrgan/test_on_image.py: -------------------------------------------------------------------------------- 1 | from models import GeneratorRRDB 2 | from datasets import denormalize, mean, std 3 | import torch 4 | from torch.autograd import Variable 5 | import argparse 6 | import os 7 | from torchvision import transforms 8 | from torchvision.utils import save_image 9 | from PIL import Image 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--image_path", type=str, required=True, help="Path to image") 13 | parser.add_argument("--checkpoint_model", type=str, required=True, help="Path to checkpoint model") 14 | parser.add_argument("--channels", type=int, default=3, help="Number of image channels") 15 | parser.add_argument("--residual_blocks", type=int, default=23, help="Number of residual blocks in G") 16 | opt = parser.parse_args() 17 | print(opt) 18 | 19 | os.makedirs("images/outputs", exist_ok=True) 20 | 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | # Define model and load model checkpoint 24 | generator = GeneratorRRDB(opt.channels, filters=64, num_res_blocks=opt.residual_blocks).to(device) 25 | generator.load_state_dict(torch.load(opt.checkpoint_model)) 26 | generator.eval() 27 | 28 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) 29 | 30 | # Prepare input 31 | image_tensor = Variable(transform(Image.open(opt.image_path))).to(device).unsqueeze(0) 32 | 33 | # Upsample image 34 | with torch.no_grad(): 35 | sr_image = denormalize(generator(image_tensor)).cpu() 36 | 37 | # Save image 38 | fn = opt.image_path.split("/")[-1] 39 | save_image(sr_image, f"images/outputs/sr-{fn}") 40 | -------------------------------------------------------------------------------- /models/GAN/munit/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, transforms_=None, mode="train"): 13 | self.transform = transforms.Compose(transforms_) 14 | 15 | self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*")) 16 | if mode == "train": 17 | self.files.extend(sorted(glob.glob(os.path.join(root, "test") + "/*.*"))) 18 | 19 | def __getitem__(self, index): 20 | 21 | img = Image.open(self.files[index % len(self.files)]) 22 | w, h = img.size 23 | img_A = img.crop((0, 0, w / 2, h)) 24 | img_B = img.crop((w / 2, 0, w, h)) 25 | 26 | if np.random.random() < 0.5: 27 | img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB") 28 | img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB") 29 | 30 | img_A = self.transform(img_A) 31 | img_B = self.transform(img_B) 32 | 33 | return {"A": img_A, "B": img_B} 34 | 35 | def __len__(self): 36 | return len(self.files) 37 | -------------------------------------------------------------------------------- /models/GAN/pix2pix/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from PIL import Image 8 | import torchvision.transforms as transforms 9 | 10 | 11 | class ImageDataset(Dataset): 12 | def __init__(self, root, transforms_=None, mode="train"): 13 | self.transform = transforms.Compose(transforms_) 14 | 15 | self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*")) 16 | if mode == "train": 17 | self.files.extend(sorted(glob.glob(os.path.join(root, "test") + "/*.*"))) 18 | 19 | def __getitem__(self, index): 20 | 21 | img = Image.open(self.files[index % len(self.files)]) 22 | w, h = img.size 23 | img_A = img.crop((0, 0, w / 2, h)) 24 | img_B = img.crop((w / 2, 0, w, h)) 25 | 26 | if np.random.random() < 0.5: 27 | img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB") 28 | img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB") 29 | 30 | img_A = self.transform(img_A) 31 | img_B = self.transform(img_B) 32 | 33 | return {"A": img_A, "B": img_B} 34 | 35 | def __len__(self): 36 | return len(self.files) 37 | -------------------------------------------------------------------------------- /models/GAN/srgan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | 11 | # Normalization parameters for pre-trained PyTorch models 12 | mean = np.array([0.485, 0.456, 0.406]) 13 | std = np.array([0.229, 0.224, 0.225]) 14 | 15 | 16 | class ImageDataset(Dataset): 17 | def __init__(self, root, hr_shape): 18 | hr_height, hr_width = hr_shape 19 | # Transforms for low resolution images and high resolution images 20 | self.lr_transform = transforms.Compose( 21 | [ 22 | transforms.Resize((hr_height // 4, hr_height // 4), Image.BICUBIC), 23 | transforms.ToTensor(), 24 | transforms.Normalize(mean, std), 25 | ] 26 | ) 27 | self.hr_transform = transforms.Compose( 28 | [ 29 | transforms.Resize((hr_height, hr_height), Image.BICUBIC), 30 | transforms.ToTensor(), 31 | transforms.Normalize(mean, std), 32 | ] 33 | ) 34 | 35 | self.files = sorted(glob.glob(root + "/*.*")) 36 | 37 | def __getitem__(self, index): 38 | img = Image.open(self.files[index % len(self.files)]) 39 | img_lr = self.lr_transform(img) 40 | img_hr = self.hr_transform(img) 41 | 42 | return {"lr": img_lr, "hr": img_hr} 43 | 44 | def __len__(self): 45 | return len(self.files) 46 | -------------------------------------------------------------------------------- /models/GAN/stargan/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | import torch 6 | 7 | from torch.utils.data import Dataset 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | 11 | 12 | class CelebADataset(Dataset): 13 | def __init__(self, root, transforms_=None, mode="train", attributes=None): 14 | self.transform = transforms.Compose(transforms_) 15 | 16 | self.selected_attrs = attributes 17 | self.files = sorted(glob.glob("%s/*.jpg" % root)) 18 | self.files = self.files[:-2000] if mode == "train" else self.files[-2000:] 19 | self.label_path = glob.glob("%s/*.txt" % root)[0] 20 | self.annotations = self.get_annotations() 21 | 22 | def get_annotations(self): 23 | """Extracts annotations for CelebA""" 24 | annotations = {} 25 | lines = [line.rstrip() for line in open(self.label_path, "r")] 26 | self.label_names = lines[1].split() 27 | for _, line in enumerate(lines[2:]): 28 | filename, *values = line.split() 29 | labels = [] 30 | for attr in self.selected_attrs: 31 | idx = self.label_names.index(attr) 32 | labels.append(1 * (values[idx] == "1")) 33 | annotations[filename] = labels 34 | return annotations 35 | 36 | def __getitem__(self, index): 37 | filepath = self.files[index % len(self.files)] 38 | filename = filepath.split("/")[-1] 39 | img = self.transform(Image.open(filepath)) 40 | label = self.annotations[filename] 41 | label = torch.FloatTensor(np.array(label)) 42 | 43 | return img, label 44 | 45 | def __len__(self): 46 | return len(self.files) 47 | -------------------------------------------------------------------------------- /models/GAN/unit/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class ImageDataset(Dataset): 11 | def __init__(self, root, transforms_=None, unaligned=False, mode="train"): 12 | self.transform = transforms.Compose(transforms_) 13 | self.unaligned = unaligned 14 | 15 | self.files_A = sorted(glob.glob(os.path.join(root, "%s/A" % mode) + "/*.*")) 16 | self.files_B = sorted(glob.glob(os.path.join(root, "%s/B" % mode) + "/*.*")) 17 | 18 | def __getitem__(self, index): 19 | item_A = self.transform(Image.open(self.files_A[index % len(self.files_A)])) 20 | 21 | if self.unaligned: 22 | item_B = self.transform(Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])) 23 | else: 24 | item_B = self.transform(Image.open(self.files_B[index % len(self.files_B)])) 25 | 26 | return {"A": item_A, "B": item_B} 27 | 28 | def __len__(self): 29 | return max(len(self.files_A), len(self.files_B)) 30 | -------------------------------------------------------------------------------- /models/HumanPoseEstimation/README.md: -------------------------------------------------------------------------------- 1 | # HumanPoseEstimation-network 2 | Pytorch implementation of HumanPoseEstimation-network 3 | 4 | 5 | 6 | **StackedHG:** 7 | 8 | Stacked Hourglass Networks for Human Pose Estimation ,2016 9 | 10 | https://arxiv.org/pdf/1603.06937.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/101484455 13 | 14 | 15 | 16 | **Simple Baselines** 17 | 18 | Simple Baselines for Human Pose Estimation and Tracking 19 | 20 | https://arxiv.org/pdf/1804.06208.pdf 21 | 22 | https://liumin.blog.csdn.net/article/details/103447040 23 | 24 | 25 | 26 | **LPN:** 27 | 28 | Simple and Lightweight Human Pose Estimation 29 | 30 | https://arxiv.org/pdf/1911.10346v1.pdf 31 | 32 | https://liumin.blog.csdn.net/article/details/103448034 -------------------------------------------------------------------------------- /models/InstanceSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # InstanceSegmentation-network 2 | Pytorch implementation of InstanceSegmentation-network 3 | 4 | 5 | 6 | **PolarMask :** 7 | 8 | PolarMask: Single Shot Instance Segmentation with Polar Representation ,2019 9 | 10 | https://arxiv.org/pdf/1909.13226.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/101975085 13 | 14 | -------------------------------------------------------------------------------- /models/Lightweight/MobileNetV1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def BottleneckV1(in_channels, out_channels, stride): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=in_channels,kernel_size=3,stride=stride,padding=1,groups=in_channels), 8 | nn.BatchNorm2d(in_channels), 9 | nn.ReLU6(inplace=True), 10 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 11 | nn.BatchNorm2d(out_channels), 12 | nn.ReLU6(inplace=True) 13 | ) 14 | 15 | class MobileNetV1(nn.Module): 16 | def __init__(self, num_classes=1000): 17 | super(MobileNetV1, self).__init__() 18 | 19 | self.first_conv = nn.Sequential( 20 | nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=2,padding=1), 21 | nn.BatchNorm2d(32), 22 | nn.ReLU6(inplace=True), 23 | ) 24 | 25 | self.bottleneck = nn.Sequential( 26 | BottleneckV1(32, 64, stride=1), 27 | BottleneckV1(64, 128, stride=2), 28 | BottleneckV1(128, 128, stride=1), 29 | BottleneckV1(128, 256, stride=2), 30 | BottleneckV1(256, 256, stride=1), 31 | BottleneckV1(256, 512, stride=2), 32 | BottleneckV1(512, 512, stride=1), 33 | BottleneckV1(512, 512, stride=1), 34 | BottleneckV1(512, 512, stride=1), 35 | BottleneckV1(512, 512, stride=1), 36 | BottleneckV1(512, 512, stride=1), 37 | BottleneckV1(512, 1024, stride=2), 38 | BottleneckV1(1024, 1024, stride=1), 39 | ) 40 | 41 | self.avg_pool = nn.AvgPool2d(kernel_size=7,stride=1) 42 | self.linear = nn.Linear(in_features=1024,out_features=num_classes) 43 | self.dropout = nn.Dropout(p=0.2) 44 | self.softmax = nn.Softmax(dim=1) 45 | 46 | self.init_params() 47 | 48 | def init_params(self): 49 | for m in self.modules(): 50 | if isinstance(m, nn.Conv2d): 51 | nn.init.kaiming_normal_(m.weight) 52 | nn.init.constant_(m.bias,0) 53 | elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d): 54 | nn.init.constant_(m.weight, 1) 55 | nn.init.constant_(m.bias, 0) 56 | 57 | def forward(self, x): 58 | x = self.first_conv(x) 59 | x = self.bottleneck(x) 60 | x = self.avg_pool(x) 61 | x = x.view(x.size(0),-1) 62 | x = self.dropout(x) 63 | x = self.linear(x) 64 | out = self.softmax(x) 65 | return out 66 | 67 | if __name__=='__main__': 68 | model = MobileNetV1() 69 | print(model) 70 | 71 | input = torch.randn(1, 3, 224, 224) 72 | out = model(input) 73 | print(out.shape) 74 | -------------------------------------------------------------------------------- /models/Lightweight/README.md: -------------------------------------------------------------------------------- 1 | # Lightweight-network 2 | 3 | Lightweight network PyTorch实现 4 | 5 | 6 | 7 | ## MobileNets: 8 | 9 | **MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications** 10 | 11 | 12 | 13 | ## MobileNetV2: 14 | 15 | **MobileNetV2: Inverted Residuals and Linear Bottlenecks** 16 | 17 | 18 | 19 | ## MobileNetV3: 20 | 21 | **Searching for MobileNetV3** 22 | 23 | 24 | ## ShuffleNet: 25 | 26 | **ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices** 27 | 28 | 29 | ## ShuffleNet V2: 30 | 31 | **ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design** 32 | 33 | 34 | 35 | ## SqueezeNet 36 | 37 | **SqueezeNet:AlexNet-level accuracy with 50x fewer parameters and < 0.5MB Model Size** 38 | 39 | ## Xception 40 | 41 | **Xception: Deep Learning with Depthwise Separable Convolutions** 42 | 43 | 44 | 45 | ## MixNet 46 | 47 | **MixNet: Mixed Depthwise Convolutional Kernels** -------------------------------------------------------------------------------- /models/Lightweight/SqueezeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FireModule(nn.Module): 6 | def __init__(self, in_channels, out_channels, mid_channels=None): 7 | super(FireModule, self).__init__() 8 | mid_channels = out_channels//4 9 | 10 | self.squeeze = nn.Conv2d(in_channels=in_channels,out_channels=mid_channels,kernel_size=1,stride=1) 11 | self.squeeze_relu = nn.ReLU6(inplace=True) 12 | 13 | self.expand3x3 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=1,padding=1) 14 | self.expand3x3_relu = nn.ReLU6(inplace=True) 15 | 16 | self.expand1x1 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1) 17 | self.expand1x1_relu = nn.ReLU6(inplace=True) 18 | 19 | def forward(self, x): 20 | x = self.squeeze_relu(self.squeeze(x)) 21 | y = self.expand3x3_relu(self.expand3x3(x)) 22 | z = self.expand1x1_relu(self.expand1x1(x)) 23 | out = torch.cat([y, z],dim=1) 24 | return out 25 | 26 | class SqueezeNet(nn.Module): 27 | def __init__(self, num_classes = 1000): 28 | super(SqueezeNet, self).__init__() 29 | 30 | self.bottleneck = nn.Sequential( 31 | nn.Conv2d(in_channels=3, out_channels=96,kernel_size=7,stride=2,padding=3), 32 | nn.BatchNorm2d(96), 33 | nn.ReLU6(inplace=True), 34 | nn.MaxPool2d(kernel_size=3,stride=2), 35 | 36 | FireModule(in_channels=96, out_channels=64), 37 | FireModule(in_channels=128, out_channels=64), 38 | FireModule(in_channels=128, out_channels=128), 39 | nn.MaxPool2d(kernel_size=3,stride=2), 40 | 41 | FireModule(in_channels=256, out_channels=128), 42 | FireModule(in_channels=256, out_channels=192), 43 | FireModule(in_channels=384, out_channels=192), 44 | FireModule(in_channels=384, out_channels=256), 45 | nn.MaxPool2d(kernel_size=3, stride=2), 46 | 47 | FireModule(in_channels=512, out_channels=256), 48 | nn.Dropout(p=0.5), 49 | nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1), 50 | nn.ReLU(inplace=True), 51 | nn.AvgPool2d(kernel_size=13, stride=1), 52 | ) 53 | 54 | def forward(self, x): 55 | out = self.bottleneck(x) 56 | return out.view(out.size(1),-1) 57 | 58 | if __name__ == '__main__': 59 | model = SqueezeNet() 60 | print(model) 61 | 62 | input = torch.rand(1,3,224,224) 63 | out = model(input) 64 | print(out.shape) 65 | 66 | -------------------------------------------------------------------------------- /models/ObjectDetection/CenterNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | 7 | 8 | 9 | 10 | if __name__ == '__main__': 11 | model = YOLO() 12 | print(model) 13 | 14 | data = torch.randn(1,3,448,448) 15 | output = model(data) 16 | print(output.shape) -------------------------------------------------------------------------------- /models/ObjectDetection/FPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FPN(nn.Module): 6 | def __init__(self): 7 | super(FPN, self).__init__() 8 | resnet = torchvision.models.resnet50() 9 | layers = list(resnet.children()) 10 | 11 | self.layer1 = nn.Sequential(*layers[:5]) 12 | self.layer2 = nn.Sequential(*layers[5]) 13 | self.layer3 = nn.Sequential(*layers[6]) 14 | self.layer4 = nn.Sequential(*layers[7]) 15 | 16 | self.lateral5 = nn.Conv2d(in_channels=2048,out_channels=256,kernel_size=1) 17 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 18 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 19 | self.lateral2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1) 20 | 21 | self.upsample2 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 22 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 23 | self.upsample4 = nn.ConvTranspose2d(in_channels=256,out_channels=256, kernel_size=4, stride=2, padding=1) 24 | 25 | self.smooth2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 26 | self.smooth3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 27 | self.smooth4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 28 | 29 | self.init_params() 30 | 31 | def init_params(self): 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv2d): 34 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 35 | elif isinstance(m, nn.BatchNorm2d): 36 | nn.init.constant_(m.weight, 1) 37 | nn.init.constant_(m.bias, 0) 38 | 39 | def forward(self, x): 40 | c2 = x = self.layer1(x) 41 | c3 = x = self.layer2(x) 42 | c4 = x = self.layer3(x) 43 | c5 = x = self.layer4(x) 44 | 45 | p5 = self.lateral5(c5) 46 | p4 = self.upsample4(p5)+ self.lateral4(c4) 47 | p3 = self.upsample3(p4)+ self.lateral3(c3) 48 | p2 = self.upsample2(p3)+ self.lateral2(c2) 49 | 50 | p4 = self.smooth4(p4) 51 | p3 = self.smooth3(p3) 52 | p2 = self.smooth4(p2) 53 | return p2,p3,p4,p5 54 | 55 | if __name__ == '__main__': 56 | model = FPN() 57 | print(model) 58 | 59 | input = torch.randn(1, 3, 224, 224) 60 | p2, p3, p4, p5 = model(input) 61 | print(p2.shape) 62 | print(p3.shape) 63 | print(p4.shape) 64 | print(p5.shape) -------------------------------------------------------------------------------- /models/ObjectDetection/README.md: -------------------------------------------------------------------------------- 1 | # ObjectDetection-network 2 | Pytorch implementation of ObjectDetection-network 3 | 4 | 5 | 6 | **SSD:** 7 | 8 | SSD: Single Shot MultiBox Detector,2016 9 | 10 | https://arxiv.org/pdf/1512.02325.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/100530275 13 | 14 | 15 | 16 | **YOLO:** 17 | 18 | You Only Look Once: Unified, Real-Time Object Detection, 2016 19 | 20 | https://arxiv.org/pdf/1506.02640.pdf 21 | 22 | https://liumin.blog.csdn.net/article/details/100904605 23 | 24 | 25 | 26 | **YOLOv2:** 27 | 28 | YOLO9000: Better, Faster, Stronger,2017 29 | 30 | https://arxiv.org/pdf/1804.02767.pdf 31 | 32 | https://liumin.blog.csdn.net/article/details/100904645 33 | 34 | 35 | 36 | **YOLOv3:** 37 | 38 | YOLOv3: An Incremental Improvement, 2018 39 | 40 | https://arxiv.org/pdf/1612.08242.pdf 41 | 42 | https://liumin.blog.csdn.net/article/details/100904663 43 | 44 | 45 | 46 | **FCOS:** 47 | 48 | FCOS: Fully Convolutional One-Stage Object Detection, 2019 49 | 50 | https://arxiv.org/pdf/1904.01355.pdf 51 | 52 | https://liumin.blog.csdn.net/article/details/89007219 53 | 54 | 55 | 56 | **FPN:** 57 | 58 | Feature Pyramid Networks for Object Detection, 2017 59 | 60 | https://arxiv.org/pdf/1612.03144v2.pdf 61 | 62 | https://liumin.blog.csdn.net/article/details/100864158 63 | 64 | 65 | 66 | **RetinaNet:** 67 | 68 | https://liumin.blog.csdn.net/article/details/102135318 69 | 70 | https://arxiv.org/pdf/1708.02002.pdf 71 | 72 | https://liumin.blog.csdn.net/article/details/102135318 73 | 74 | 75 | 76 | **Objects as Points:** 77 | 78 | Objects as Points,2019 79 | 80 | https://arxiv.org/pdf/1904.07850v1.pdf 81 | 82 | https://liumin.blog.csdn.net/article/details/100867545 83 | 84 | 85 | 86 | **FSAF:** 87 | 88 | Feature Selective Anchor-Free Module for Single-Shot Object Detection, 2019 89 | 90 | https://arxiv.org/pdf/1903.00621.pdf 91 | 92 | https://liumin.blog.csdn.net/article/details/100942317 93 | 94 | 95 | 96 | **CenterNet** 97 | 98 | CenterNet: Keypoint Triplets for Object Detection, 2019 99 | 100 | https://arxiv.org/pdf/1904.08189.pdf 101 | 102 | https://liumin.blog.csdn.net/article/details/100942259 103 | 104 | 105 | 106 | **FoveaBox** 107 | 108 | FoveaBox: Beyond Anchor-based Object Detector, 2019 109 | 110 | https://arxiv.org/pdf/1904.03797v1.pdf 111 | 112 | https://liumin.blog.csdn.net/article/details/100941880 -------------------------------------------------------------------------------- /models/ObjectDetection/YOLOv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,padding=0): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class Darknet19(nn.Module): 19 | def __init__(self, num_classes=1000): 20 | super(Darknet19, self).__init__() 21 | 22 | self.feature = nn.Sequential( 23 | Conv3x3BNReLU(in_channels=3, out_channels=32), 24 | nn.MaxPool2d(kernel_size=2,stride=2), 25 | Conv3x3BNReLU(in_channels=32, out_channels=64), 26 | nn.MaxPool2d(kernel_size=2, stride=2), 27 | Conv3x3BNReLU(in_channels=64, out_channels=128), 28 | Conv1x1BNReLU(in_channels=128, out_channels=64), 29 | Conv3x3BNReLU(in_channels=64, out_channels=128), 30 | nn.MaxPool2d(kernel_size=2, stride=2), 31 | Conv3x3BNReLU(in_channels=128, out_channels=256), 32 | Conv1x1BNReLU(in_channels=256, out_channels=128), 33 | Conv3x3BNReLU(in_channels=128, out_channels=256), 34 | nn.MaxPool2d(kernel_size=2, stride=2), 35 | Conv3x3BNReLU(in_channels=256, out_channels=512), 36 | Conv1x1BNReLU(in_channels=512, out_channels=256), 37 | Conv3x3BNReLU(in_channels=256, out_channels=512), 38 | Conv1x1BNReLU(in_channels=512, out_channels=256), 39 | Conv3x3BNReLU(in_channels=256, out_channels=512), 40 | nn.MaxPool2d(kernel_size=2, stride=2), 41 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 42 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 43 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 44 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 45 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 46 | ) 47 | 48 | self.classifier = nn.Sequential( 49 | Conv1x1BNReLU(in_channels=1024, out_channels=num_classes), 50 | nn.AvgPool2d(kernel_size=7,stride=1), 51 | ) 52 | self.softmax = nn.Softmax(dim=1) 53 | 54 | def forward(self, x): 55 | x = self.feature(x) 56 | x = self.classifier(x) 57 | x = torch.squeeze(x, dim=3).contiguous() 58 | x = torch.squeeze(x, dim=2).contiguous() 59 | out = self.softmax(x) 60 | return out 61 | 62 | if __name__ == '__main__': 63 | model = Darknet19() 64 | print(model) 65 | 66 | input = torch.randn(1,3,224,224) 67 | out = model(input) 68 | print(out.shape) -------------------------------------------------------------------------------- /models/ObjectDetection/YOLOv3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride=1): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class Residual(nn.Module): 19 | def __init__(self, nchannels): 20 | super(Residual, self).__init__() 21 | mid_channels = nchannels // 2 22 | self.conv1x1 = Conv1x1BNReLU(in_channels=nchannels, out_channels=mid_channels) 23 | self.conv3x3 = Conv3x3BNReLU(in_channels=mid_channels, out_channels=nchannels) 24 | 25 | def forward(self, x): 26 | out = self.conv3x3(self.conv1x1(x)) 27 | return out + x 28 | 29 | class Darknet19(nn.Module): 30 | def __init__(self, num_classes=1000): 31 | super(Darknet19, self).__init__() 32 | self.first_conv = Conv3x3BNReLU(in_channels=3, out_channels=32) 33 | 34 | self.block1 = self._make_layers(in_channels=32,out_channels=64, block_num=1) 35 | self.block2 = self._make_layers(in_channels=64,out_channels=128, block_num=2) 36 | self.block3 = self._make_layers(in_channels=128,out_channels=256, block_num=8) 37 | self.block4 = self._make_layers(in_channels=256,out_channels=512, block_num=8) 38 | self.block5 = self._make_layers(in_channels=512,out_channels=1024, block_num=4) 39 | 40 | self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=1) 41 | self.linear = nn.Linear(in_features=1024,out_features=num_classes) 42 | self.softmax = nn.Softmax(dim=1) 43 | 44 | def _make_layers(self, in_channels,out_channels, block_num): 45 | _layers = [] 46 | _layers.append(Conv3x3BNReLU(in_channels=in_channels, out_channels=out_channels, stride=2)) 47 | for _ in range(block_num): 48 | _layers.append(Residual(nchannels=out_channels)) 49 | return nn.Sequential(*_layers) 50 | 51 | def forward(self, x): 52 | x = self.first_conv(x) 53 | x = self.block1(x) 54 | x = self.block2(x) 55 | x = self.block3(x) 56 | x = self.block4(x) 57 | x = self.block5(x) 58 | 59 | x = self.avg_pool(x) 60 | x = x.view(x.size(0),-1) 61 | x = self.linear(x) 62 | out = self.softmax(x) 63 | return out 64 | 65 | if __name__ == '__main__': 66 | model = Darknet19() 67 | print(model) 68 | 69 | input = torch.randn(1,3,256,256) 70 | out = model(input) 71 | print(out.shape) -------------------------------------------------------------------------------- /models/Others/PyramidalConvolution.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/28 13:40 4 | # @Author : liumin 5 | # @File : PyramidalConvolution.py -------------------------------------------------------------------------------- /models/SemanticSegmentation/FCN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FCN8s(nn.Module): 6 | def __init__(self, num_classes): 7 | super(FCN8s, self).__init__() 8 | vgg = torchvision.models.vgg16() 9 | 10 | features = list(vgg.features.children()) 11 | 12 | self.padd = nn.ZeroPad2d([100,100,100,100]) 13 | 14 | self.pool3 = nn.Sequential(*features[:17]) 15 | self.pool4 = nn.Sequential(*features[17:24]) 16 | self.pool5 = nn.Sequential(*features[24:]) 17 | 18 | self.pool3_conv1x1 = nn.Conv2d(256, num_classes, kernel_size=1) 19 | self.pool4_conv1x1 = nn.Conv2d(512, num_classes, kernel_size=1) 20 | 21 | self.output5 = nn.Sequential( 22 | nn.Conv2d(512, 4096, kernel_size=7), 23 | nn.ReLU(inplace=True), 24 | nn.Dropout(), 25 | nn.Conv2d(4096, 4096, kernel_size=1), 26 | nn.ReLU(inplace=True), 27 | nn.Dropout(), 28 | nn.Conv2d(4096, num_classes, kernel_size=1), 29 | ) 30 | 31 | self.up_pool3_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8) 32 | self.up_pool4_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2) 33 | self.up_pool5_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2) 34 | 35 | def forward(self, x): 36 | _,_, w, h = x.size() 37 | 38 | x = self.padd(x) 39 | pool3 = self.pool3(x) 40 | pool4 = self.pool4(pool3) 41 | pool5 = self.pool5(pool4) 42 | 43 | output5 = self.up_pool5_out(self.output5(pool5)) 44 | 45 | pool4_out = self.pool4_conv1x1(0.01 * pool4) 46 | output4 = self.up_pool4_out(pool4_out[:,:,5:(5 + output5.size()[2]) ,5:(5 + output5.size()[3])]+output5) 47 | 48 | pool3_out = self.pool3_conv1x1(0.0001 * pool3) 49 | output3 = self.up_pool3_out(pool3_out[:, :, 9:(9 + output4.size()[2]), 9:(9 + output4.size()[3])] + output4) 50 | 51 | out = self.up_pool3_out(output3) 52 | 53 | out = out[:, :, 31: (31 + h), 31: (31 + w)].contiguous() 54 | return out 55 | 56 | 57 | if __name__ == '__main__': 58 | model = FCN8s(num_classes=20) 59 | print(model) 60 | 61 | input = torch.randn(1,3,224,224) 62 | output = model(input) 63 | print(output.shape) 64 | 65 | -------------------------------------------------------------------------------- /models/SemanticSegmentation/LRNnet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/8 15:31 4 | # @Author : liumin 5 | # @File : LRNnet.py -------------------------------------------------------------------------------- /models/SemanticSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # SemanticSegmentation-network 2 | pytorch implemention of SemanticSegmentation-network 3 | 4 | 5 | 6 | **FCN:** 7 | Fully Convolutional Networks for Semantic Segmentation 8 | 9 | https://arxiv.org/pdf/1411.4038.pdf 10 | 11 | 12 | 13 | **Fast-SCNN:** 14 | 15 | Fast-SCNN: Fast Semantic Segmentation Network 16 | 17 | https://arxiv.org/pdf/1902.04502.pdf 18 | 19 | 20 | 21 | **LEDNet:** 22 | 23 | LEDNet: A Lightweight Encoder-Decoder Network for Real-time Semantic Segmentation 24 | 25 | https://arxiv.org/pdf/1905.02423.pdf 26 | 27 | 28 | 29 | **LRNNet:** 30 | 31 | LRNNet: A Light-Weighted Network with Efficient Reduced Non-Local Operation for Real-Time Semantic Segmentation 32 | 33 | https://arxiv.org/pdf/2006.02706.pdf 34 | 35 | 36 | 37 | **FisheyeMODNet:** 38 | 39 | FisheyeMODNet: Moving Object detection on Surround-view Cameras for Autonomous Driving (2019) 40 | 41 | https://arxiv.org/pdf/1908.11789v1.pdf -------------------------------------------------------------------------------- /models/Utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride,padding=1): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=1): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding), 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU6(inplace=True) 23 | ) 24 | 25 | def ConvBN(in_channels,out_channels,kernel_size,stride,padding=1): 26 | return nn.Sequential( 27 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding), 28 | nn.BatchNorm2d(out_channels) 29 | ) 30 | 31 | class ResidualBlock(nn.Module): 32 | def __init__(self, in_channels, out_channels): 33 | super(ResidualBlock, self).__init__() 34 | mid_channels = out_channels//2 35 | 36 | self.bottleneck = nn.Sequential( 37 | ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1), 38 | ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1), 39 | ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1), 40 | ) 41 | self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1) 42 | 43 | def forward(self, x): 44 | out = self.bottleneck(x) 45 | return out+self.shortcut(x) -------------------------------------------------------------------------------- /models/pretrainedmodels/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | 3 | from . import models 4 | from . import datasets 5 | 6 | from .models.utils import pretrained_settings 7 | from .models.utils import model_names 8 | 9 | # to support pretrainedmodels.__dict__['nasnetalarge'] 10 | # but depreciated 11 | from .models.fbresnet import fbresnet152 12 | from .models.cafferesnet import cafferesnet101 13 | from .models.bninception import bninception 14 | from .models.resnext import resnext101_32x4d 15 | from .models.resnext import resnext101_64x4d 16 | from .models.inceptionv4 import inceptionv4 17 | from .models.inceptionresnetv2 import inceptionresnetv2 18 | from .models.nasnet import nasnetalarge 19 | from .models.nasnet_mobile import nasnetamobile 20 | from .models.torchvision_models import alexnet 21 | from .models.torchvision_models import densenet121 22 | from .models.torchvision_models import densenet169 23 | from .models.torchvision_models import densenet201 24 | from .models.torchvision_models import densenet161 25 | from .models.torchvision_models import resnet18 26 | from .models.torchvision_models import resnet34 27 | from .models.torchvision_models import resnet50 28 | from .models.torchvision_models import resnet101 29 | from .models.torchvision_models import resnet152 30 | from .models.torchvision_models import inceptionv3 31 | from .models.torchvision_models import squeezenet1_0 32 | from .models.torchvision_models import squeezenet1_1 33 | from .models.torchvision_models import vgg11 34 | from .models.torchvision_models import vgg11_bn 35 | from .models.torchvision_models import vgg13 36 | from .models.torchvision_models import vgg13_bn 37 | from .models.torchvision_models import vgg16 38 | from .models.torchvision_models import vgg16_bn 39 | from .models.torchvision_models import vgg19_bn 40 | from .models.torchvision_models import vgg19 41 | from .models.dpn import dpn68 42 | from .models.dpn import dpn68b 43 | from .models.dpn import dpn92 44 | from .models.dpn import dpn98 45 | from .models.dpn import dpn131 46 | from .models.dpn import dpn107 47 | from .models.xception import xception 48 | from .models.senet import senet154 49 | from .models.senet import se_resnet50 50 | from .models.senet import se_resnet101 51 | from .models.senet import se_resnet152 52 | from .models.senet import se_resnext50_32x4d 53 | from .models.senet import se_resnext101_32x4d 54 | from .models.pnasnet import pnasnet5large 55 | from .models.polynet import polynet 56 | -------------------------------------------------------------------------------- /models/pretrainedmodels/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from .voc import Voc2007Classification -------------------------------------------------------------------------------- /models/pretrainedmodels/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from .fbresnet import fbresnet152 3 | 4 | from .cafferesnet import cafferesnet101 5 | 6 | from .bninception import bninception 7 | 8 | from .resnext import resnext101_32x4d 9 | from .resnext import resnext101_64x4d 10 | 11 | from .inceptionv4 import inceptionv4 12 | 13 | from .inceptionresnetv2 import inceptionresnetv2 14 | 15 | from .nasnet import nasnetalarge 16 | 17 | from .nasnet_mobile import nasnetamobile 18 | 19 | from .torchvision_models import alexnet 20 | from .torchvision_models import densenet121 21 | from .torchvision_models import densenet169 22 | from .torchvision_models import densenet201 23 | from .torchvision_models import densenet161 24 | from .torchvision_models import resnet18 25 | from .torchvision_models import resnet34 26 | from .torchvision_models import resnet50 27 | from .torchvision_models import resnet101 28 | from .torchvision_models import resnet152 29 | from .torchvision_models import inceptionv3 30 | from .torchvision_models import squeezenet1_0 31 | from .torchvision_models import squeezenet1_1 32 | from .torchvision_models import vgg11 33 | from .torchvision_models import vgg11_bn 34 | from .torchvision_models import vgg13 35 | from .torchvision_models import vgg13_bn 36 | from .torchvision_models import vgg16 37 | from .torchvision_models import vgg16_bn 38 | from .torchvision_models import vgg19_bn 39 | from .torchvision_models import vgg19 40 | 41 | from .dpn import dpn68 42 | from .dpn import dpn68b 43 | from .dpn import dpn92 44 | from .dpn import dpn98 45 | from .dpn import dpn131 46 | from .dpn import dpn107 47 | 48 | from .xception import xception 49 | 50 | from .senet import senet154 51 | from .senet import se_resnet50 52 | from .senet import se_resnet101 53 | from .senet import se_resnet152 54 | from .senet import se_resnext50_32x4d 55 | from .senet import se_resnext101_32x4d 56 | 57 | from .pnasnet import pnasnet5large 58 | from .polynet import polynet 59 | -------------------------------------------------------------------------------- /models/pretrainedmodels/models/fbresnet/resnet152_dump.lua: -------------------------------------------------------------------------------- 1 | require 'cutorch' 2 | require 'cunn' 3 | require 'cudnn' 4 | require 'image' 5 | vision=require 'torchnet-vision' 6 | 7 | net=vision.models.resnet.load{filename='data/resnet152/net.t7',length=152} 8 | print(net) 9 | 10 | require 'nn' 11 | nn.Module.parameters = function(self) 12 | if self.weight and self.bias and self.running_mean and self.running_var then 13 | return {self.weight, self.bias, self.running_mean, self.running_var}, {self.gradWeight, self.gradBias} 14 | 15 | elseif self.weight and self.bias then 16 | return {self.weight, self.bias}, {self.gradWeight, self.gradBias} 17 | elseif self.weight then 18 | return {self.weight}, {self.gradWeight} 19 | elseif self.bias then 20 | return {self.bias}, {self.gradBias} 21 | else 22 | return 23 | end 24 | end 25 | 26 | netparams, _ = net:parameters() 27 | print(#netparams) 28 | torch.save('data/resnet152/netparams.t7', netparams) 29 | 30 | net=net:cuda() 31 | net:evaluate() 32 | --p, gp = net:getParameters() 33 | input = torch.ones(1,3,224,224) 34 | input[{1,1,1,1}] = -1 35 | input[1] = image.load('data/cat_224.png') 36 | print(input:sum()) 37 | input = input:cuda() 38 | output=net:forward(input) 39 | 40 | for i=1, 11 do 41 | torch.save('data/resnet152/output'..i..'.t7', net:get(i).output:float()) 42 | end 43 | -------------------------------------------------------------------------------- /models/pretrainedmodels/models/resnext_features/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from .resnext101_32x4d_features import resnext101_32x4d_features 3 | from .resnext101_64x4d_features import resnext101_64x4d_features -------------------------------------------------------------------------------- /models/pretrainedmodels/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from .fbresnet import pretrained_settings as fbresnet_settings 3 | from .bninception import pretrained_settings as bninception_settings 4 | from .resnext import pretrained_settings as resnext_settings 5 | from .inceptionv4 import pretrained_settings as inceptionv4_settings 6 | from .inceptionresnetv2 import pretrained_settings as inceptionresnetv2_settings 7 | from .torchvision_models import pretrained_settings as torchvision_models_settings 8 | from .nasnet_mobile import pretrained_settings as nasnet_mobile_settings 9 | from .nasnet import pretrained_settings as nasnet_settings 10 | from .dpn import pretrained_settings as dpn_settings 11 | from .xception import pretrained_settings as xception_settings 12 | from .senet import pretrained_settings as senet_settings 13 | from .cafferesnet import pretrained_settings as cafferesnet_settings 14 | from .pnasnet import pretrained_settings as pnasnet_settings 15 | from .polynet import pretrained_settings as polynet_settings 16 | 17 | all_settings = [ 18 | fbresnet_settings, 19 | bninception_settings, 20 | resnext_settings, 21 | inceptionv4_settings, 22 | inceptionresnetv2_settings, 23 | torchvision_models_settings, 24 | nasnet_mobile_settings, 25 | nasnet_settings, 26 | dpn_settings, 27 | xception_settings, 28 | senet_settings, 29 | cafferesnet_settings, 30 | pnasnet_settings, 31 | polynet_settings 32 | ] 33 | 34 | model_names = [] 35 | pretrained_settings = {} 36 | for settings in all_settings: 37 | for model_name, model_settings in settings.items(): 38 | pretrained_settings[model_name] = model_settings 39 | model_names.append(model_name) 40 | -------------------------------------------------------------------------------- /models/pretrainedmodels/version.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | __version__ = '0.7.4' 3 | -------------------------------------------------------------------------------- /pytorch_loss/README.md: -------------------------------------------------------------------------------- 1 | # pytorch-loss 2 | 3 | Implementation of label-smooth, amsoftmax, focal-loss, dual-focal-loss, triplet-loss, giou-loss, affinity-loss, pc_softmax_cross_entropy, ohem-loss(softmax based on line hard mining loss), large-margin-softmax(bmvc2019), lovasz-softmax-loss, and dice-loss(both generalized soft dice loss and batch soft dice loss). Maybe this is useful in my future work. 4 | 5 | 6 | Also tried to implement swish, hard-swish(hswish) and mish activation functions. 7 | 8 | Additionally, cuda based one-hot function is added (support label smooth). 9 | 10 | Newly add an "Exponential Moving Average(EMA)" operator. 11 | 12 | Add convolution ops, such as coord-conv2d, and dynamic-conv2d(dy-conv2d). 13 | 14 | Some operators are implemented with pytorch cuda extension, so you need to compile it first: 15 | ``` 16 | $ python setup.py install 17 | ``` 18 | 19 | After installing, now you can pick up what you need and use the losses or ops like one of thes: 20 | ```python 21 | from pytorch_loss import SwishV1, SwishV2, SwishV3 22 | from pytorch_loss import HSwishV1, HSwishV2, HSwishV3 23 | from pytorch_loss import MishV1, MishV2, MishV3 24 | from pytorch_loss import convert_to_one_hot, convert_to_one_hot_cu, OnehotEncoder 25 | from pytorch_loss import EMA 26 | 27 | from pytorch_loss import TripletLoss 28 | from pytorch_loss import SoftDiceLossV1, SoftDiceLossV2, SoftDiceLossV3 29 | from pytorch_loss import PCSoftmaxCrossEntropyV1, PCSoftmaxCrossEntropyV2 30 | from pytorch_loss import LargeMarginSoftmaxV1, LargeMarginSoftmaxV2, LargeMarginSoftmaxV3 31 | from pytorch_loss import LabelSmoothSoftmaxCEV1, LabelSmoothSoftmaxCEV2, LabelSmoothSoftmaxCEV3 32 | from pytorch_loss import generalized_iou_loss 33 | from pytorch_loss import FocalLossV1, FocalLossV2, FocalLossV3 34 | from pytorch_loss import Dual_Focal_loss 35 | from pytorch_loss import GeneralizedSoftDiceLoss, BatchSoftDiceLoss 36 | from pytorch_loss import AMSoftmax 37 | from pytorch_loss import AffinityFieldLoss, AffinityLoss 38 | from pytorch_loss import OhemCELoss, OhemLargeMarginLoss 39 | from pytorch_loss import LovaszSoftmaxV1, LovaszSoftmaxV3 40 | from pytorch_loss import TaylorCrossEntropyLossV1, TaylorCrossEntropyLossV3 41 | 42 | from pytorch_loss import TaylorSoftmaxV1, TaylorSoftmaxV3 43 | from pytorch_loss import LogTaylorSoftmaxV1, LogTaylorSoftmaxV3 44 | 45 | from pytorch_loss import CoordConv2d, DY_Conv2d 46 | ``` 47 | Note that some losses or ops have 3 versions, like `LabelSmoothSoftmaxCEV1`, `LabelSmoothSoftmaxCEV2`, `LabelSmoothSoftmaxCEV3`, here `V1` means the implementation with pure pytorch ops and use `torch.autograd` for backward computation, `V2` means implementation with pure pytorch ops but use self-derived formula for backward computation, and `V3` means implementation with cuda extension. Generally speaking, the `V3` ops are faster and more memory efficient, since I have tried to squeeze everything in one cuda kernel function, which in most cases brings less overhead than a combination of pytorch ops. 48 | 49 | 50 | -------------------------------------------------------------------------------- /pytorch_loss/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .swish import SwishV1, SwishV2, SwishV3 3 | from .hswish import HSwishV1, HSwishV2, HSwishV3 4 | from .frelu import FReLU 5 | from .mish import MishV1, MishV2, MishV3 6 | from .one_hot import convert_to_one_hot, convert_to_one_hot_cu, OnehotEncoder 7 | from .ema import EMA 8 | 9 | from .triplet_loss import TripletLoss 10 | from .soft_dice_loss import SoftDiceLossV1, SoftDiceLossV2, SoftDiceLossV3 11 | from .pc_softmax import PCSoftmaxCrossEntropyV1, PCSoftmaxCrossEntropyV2 12 | from .large_margin_softmax import LargeMarginSoftmaxV1, LargeMarginSoftmaxV2, LargeMarginSoftmaxV3 13 | from .label_smooth import LabelSmoothSoftmaxCEV1, LabelSmoothSoftmaxCEV2, LabelSmoothSoftmaxCEV3 14 | from .generalized_iou_loss import generalized_iou_loss 15 | from .focal_loss import FocalLossV1, FocalLossV2, FocalLossV3 16 | from .dual_focal_loss import Dual_Focal_loss 17 | from .dice_loss import GeneralizedSoftDiceLoss, BatchSoftDiceLoss 18 | from .amsoftmax import AMSoftmax 19 | from .affinity_loss import AffinityFieldLoss, AffinityLoss 20 | from .ohem_loss import OhemCELoss, OhemLargeMarginLoss 21 | from .conv_ops import CoordConv2d, DY_Conv2d 22 | from .lovasz_softmax import LovaszSoftmaxV1, LovaszSoftmaxV3 23 | from .taylor_softmax import TaylorSoftmaxV1, TaylorSoftmaxV3, LogTaylorSoftmaxV1, LogTaylorSoftmaxV3, TaylorCrossEntropyLossV1, TaylorCrossEntropyLossV3 24 | -------------------------------------------------------------------------------- /pytorch_loss/amsoftmax.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class AMSoftmax(nn.Module): 10 | 11 | def __init__(self, 12 | in_feats, 13 | n_classes=10, 14 | m=0.3, 15 | s=15): 16 | super(AMSoftmax, self).__init__() 17 | self.m = m 18 | self.s = s 19 | self.in_feats = in_feats 20 | self.W = torch.nn.Parameter(torch.randn(in_feats, n_classes), requires_grad=True) 21 | self.ce = nn.CrossEntropyLoss() 22 | nn.init.xavier_normal_(self.W, gain=1) 23 | 24 | def forward(self, x, lb): 25 | assert x.size()[0] == lb.size()[0] 26 | assert x.size()[1] == self.in_feats 27 | x_norm = torch.norm(x, p=2, dim=1, keepdim=True).clamp(min=1e-9) 28 | x_norm = torch.div(x, x_norm) 29 | w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-9) 30 | w_norm = torch.div(self.W, w_norm) 31 | costh = torch.mm(x_norm, w_norm) 32 | delt_costh = torch.zeros_like(costh).scatter_(1, lb.unsqueeze(1), self.m) 33 | costh_m = costh - delt_costh 34 | costh_m_s = self.s * costh_m 35 | loss = self.ce(costh_m_s, lb) 36 | return loss 37 | 38 | 39 | if __name__ == '__main__': 40 | criteria = AMSoftmax(1024, 10) 41 | a = torch.randn(20, 1024) 42 | lb = torch.randint(0, 10, (20, ), dtype=torch.long) 43 | loss = criteria(a, lb) 44 | loss.backward() 45 | 46 | print(loss.detach().numpy()) 47 | print(list(criteria.parameters())[0].shape) 48 | print(type(next(criteria.parameters()))) 49 | 50 | 51 | -------------------------------------------------------------------------------- /pytorch_loss/dual_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Dual_Focal_loss(nn.Module): 6 | ''' 7 | This loss is proposed in this paper: https://arxiv.org/abs/1909.11932 8 | It does not work in my projects, hope it will work well in your projects. 9 | Hope you can correct me if there are any mistakes in the implementation. 10 | ''' 11 | 12 | def __init__(self, ignore_lb=255, eps=1e-5, reduction='mean'): 13 | super(Dual_Focal_loss, self).__init__() 14 | self.ignore_lb = ignore_lb 15 | self.eps = eps 16 | self.reduction = reduction 17 | self.mse = nn.MSELoss(reduction='none') 18 | 19 | def forward(self, logits, label): 20 | ignore = label.data.cpu() == self.ignore_lb 21 | n_valid = (ignore == 0).sum() 22 | label = label.clone() 23 | label[ignore] = 0 24 | lb_one_hot = logits.data.clone().zero_().scatter_(1, label.unsqueeze(1), 1).detach() 25 | 26 | pred = torch.softmax(logits, dim=1) 27 | loss = -torch.log(self.eps + 1. - self.mse(pred, lb_one_hot)).sum(dim=1) 28 | loss[ignore] = 0 29 | if self.reduction == 'mean': 30 | loss = loss.sum() / n_valid 31 | elif self.reduction == 'sum': 32 | loss = loss.sum() 33 | elif self.reduction == 'none': 34 | loss = loss 35 | return loss 36 | 37 | -------------------------------------------------------------------------------- /pytorch_loss/frelu.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class FReLU(nn.Module): 8 | 9 | def __init__(self, in_chan): 10 | super(FReLU, self).__init__() 11 | self.conv = nn.Conv2d(in_chan, in_chan, 3, 1, 1, groups=in_chan) 12 | self.bn = nn.BatchNorm2d(in_chan) 13 | nn.init.xavier_normal_(self.conv.weight, gain=1.) 14 | 15 | def forward(self, x): 16 | branch = self.bn(self.conv(x)) 17 | out = torch.max(x, branch) 18 | return out 19 | 20 | 21 | if __name__ == "__main__": 22 | m = FReLU(32) 23 | inten = torch.randn(4, 32, 224, 224) 24 | out = m(inten) 25 | print(out.size()) 26 | -------------------------------------------------------------------------------- /pytorch_loss/generalized_iou_loss.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | 6 | 7 | def generalized_iou_loss(gt_bboxes, pr_bboxes, reduction='mean'): 8 | """ 9 | gt_bboxes: tensor (-1, 4) xyxy 10 | pr_bboxes: tensor (-1, 4) xyxy 11 | loss proposed in the paper of giou 12 | """ 13 | gt_area = (gt_bboxes[:, 2]-gt_bboxes[:, 0])*(gt_bboxes[:, 3]-gt_bboxes[:, 1]) 14 | pr_area = (pr_bboxes[:, 2]-pr_bboxes[:, 0])*(pr_bboxes[:, 3]-pr_bboxes[:, 1]) 15 | 16 | # iou 17 | lt = torch.max(gt_bboxes[:, :2], pr_bboxes[:, :2]) 18 | rb = torch.min(gt_bboxes[:, 2:], pr_bboxes[:, 2:]) 19 | TO_REMOVE = 1 20 | wh = (rb - lt + TO_REMOVE).clamp(min=0) 21 | inter = wh[:, 0] * wh[:, 1] 22 | union = gt_area + pr_area - inter 23 | iou = inter / union 24 | # enclosure 25 | lt = torch.min(gt_bboxes[:, :2], pr_bboxes[:, :2]) 26 | rb = torch.max(gt_bboxes[:, 2:], pr_bboxes[:, 2:]) 27 | wh = (rb - lt + TO_REMOVE).clamp(min=0) 28 | enclosure = wh[:, 0] * wh[:, 1] 29 | 30 | giou = iou - (enclosure-union)/enclosure 31 | loss = 1. - giou 32 | if reduction == 'mean': 33 | loss = loss.mean() 34 | elif reduction == 'sum': 35 | loss = loss.sum() 36 | elif reduction == 'none': 37 | pass 38 | return loss 39 | 40 | if __name__ == '__main__': 41 | gt_bbox = torch.tensor([[1, 2, 3, 4]], dtype=torch.float32) 42 | pr_bbox = torch.tensor([[2, 3, 4, 5]], dtype=torch.float32) 43 | loss = generalized_iou_loss(gt_bbox, pr_bbox, reduction='none') 44 | print(loss) 45 | 46 | 47 | -------------------------------------------------------------------------------- /pytorch_loss/info.txt: -------------------------------------------------------------------------------- 1 | Common loss functions for deep learning. 2 | -------------------------------------------------------------------------------- /pytorch_loss/ohem_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | import ohem_cpp 10 | from .large_margin_softmax import LargeMarginSoftmaxV3 11 | 12 | 13 | class OhemCELoss(nn.Module): 14 | 15 | def __init__(self, score_thresh, n_min=None, ignore_index=255): 16 | super(OhemCELoss, self).__init__() 17 | self.score_thresh = score_thresh 18 | self.ignore_lb = ignore_index 19 | self.n_min = n_min 20 | self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_index, reduction='mean') 21 | 22 | def forward(self, logits, labels): 23 | n_min = labels.numel() // 16 if self.n_min is None else self.n_min 24 | labels = ohem_cpp.score_ohem_label(logits.float(), labels, 25 | self.ignore_lb, self.score_thresh, n_min).detach() 26 | loss = self.criteria(logits, labels) 27 | return loss 28 | 29 | 30 | class OhemLargeMarginLoss(nn.Module): 31 | 32 | def __init__(self, score_thresh, n_min=None, ignore_index=255): 33 | super(OhemLargeMarginLoss, self).__init__() 34 | self.score_thresh = score_thresh 35 | self.ignore_lb = ignore_index 36 | self.n_min = n_min 37 | self.criteria = LargeMarginSoftmaxV3( 38 | ignore_index=ignore_index, reduction='mean') 39 | 40 | def forward(self, logits, labels): 41 | n_min = labels.numel() // 16 if self.n_min is None else self.n_min 42 | labels = ohem_cpp.score_ohem_label(logits.float(), labels, 43 | self.ignore_lb, self.score_thresh, n_min).detach() 44 | loss = self.criteria(logits, labels) 45 | return loss 46 | 47 | 48 | if __name__ == '__main__': 49 | criteria1 = OhemLargeMarginLoss(score_thresh=0.7, n_min=16*20*20//16).cuda() 50 | criteria2 = OhemCELoss(score_thresh=0.7, n_min=16*20*20//16).cuda() 51 | net1 = nn.Sequential( 52 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 53 | ) 54 | net1.cuda() 55 | net1.train() 56 | net2 = nn.Sequential( 57 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 58 | ) 59 | net2.cuda() 60 | net2.train() 61 | 62 | with torch.no_grad(): 63 | inten = torch.randn(16, 3, 20, 20).cuda() 64 | lbs = torch.randint(0, 19, [16, 20, 20]).cuda() 65 | lbs[1, 10, 10] = 255 66 | 67 | torch.autograd.set_detect_anomaly(True) 68 | 69 | logits1 = net1(inten) 70 | logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear', align_corners=True) 71 | logits2 = net2(inten) 72 | logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear', align_corners=True) 73 | 74 | loss1 = criteria1(logits1, lbs) 75 | loss2 = criteria2(logits2, lbs.clone()) 76 | loss = loss1 + loss2 77 | loss.backward() 78 | 79 | 80 | -------------------------------------------------------------------------------- /pytorch_loss/test.py: -------------------------------------------------------------------------------- 1 | 2 | ## TODO: test case should cover, n_class from 3 to 256, test ignore index, test speed and memory usage 3 | 4 | from lovasz_softmax import LovaszSoftmaxV1, LovaszSoftmaxV3 5 | from label_smooth import LabelSmoothSoftmaxCEV3 6 | import torchvision 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import numpy as np 11 | import random 12 | torch.manual_seed(15) 13 | random.seed(15) 14 | np.random.seed(15) 15 | torch.backends.cudnn.deterministic = True 16 | 17 | 18 | class Model(nn.Module): 19 | def __init__(self, n_classes): 20 | super(Model, self).__init__() 21 | net = torchvision.models.resnet18(pretrained=False) 22 | self.conv1 = net.conv1 23 | self.bn1 = net.bn1 24 | self.maxpool = net.maxpool 25 | self.relu = net.relu 26 | self.layer1 = net.layer1 27 | self.layer2 = net.layer2 28 | self.layer3 = net.layer3 29 | self.layer4 = net.layer4 30 | self.fc = nn.Conv2d(512, n_classes, 3, 1, 1) 31 | def forward(self, x): 32 | feat = self.conv1(x) 33 | feat = self.bn1(feat) 34 | feat = self.relu(feat) 35 | feat = self.maxpool(feat) 36 | feat = self.layer1(feat) 37 | feat = self.layer2(feat) 38 | feat = self.layer3(feat) 39 | feat = self.layer4(feat) 40 | feat = self.fc(feat) 41 | # out = F.interpolate(feat, x.size()[2:], mode='bilinear', align_corners=True) 42 | out = torch.mean(feat, dim=(2, 3)) 43 | return out 44 | 45 | c = 2 46 | net1 = Model(c) 47 | # net2 = Model() 48 | # net2.load_state_dict(net1.state_dict()) 49 | red = 'mean' 50 | # criteria1 = LovaszSoftmaxV1(reduction='sum', ignore_index=255) 51 | # criteria1 = LovaszSoftmaxV3(reduction='sum', ignore_index=255) 52 | criteria1 = LabelSmoothSoftmaxCEV3(reduction='sum', ignore_index=255) 53 | print(criteria1) 54 | 55 | net1.cuda() 56 | # net2.cuda() 57 | net1.train() 58 | # net2.train() 59 | criteria1.cuda() 60 | # criteria2.cuda() 61 | # net1 = net1.half() 62 | 63 | optim1 = torch.optim.SGD(net1.parameters(), lr=1e-2) 64 | # optim2 = torch.optim.SGD(net2.parameters(), lr=1e-2) 65 | 66 | bs, h, w = 2, 1000, 1000 67 | for it in range(1000): 68 | inten = torch.randn(bs, 3, h, w).cuda()#.half() 69 | # lbs = torch.randint(0, c, (bs, h, w)).cuda() 70 | lbs = torch.randint(0, c, (bs, )).cuda() 71 | # lbs[1, 1, 1] = 255 72 | # lbs[0, 3:100, 2:100] = 255 73 | # lbs[1, 4:70, 28:200] = 255 74 | logits1 = net1(inten) 75 | logits1.retain_grad() 76 | loss1 = criteria1(logits1, lbs) 77 | optim1.zero_grad() 78 | loss1.backward() 79 | optim1.step() 80 | with torch.no_grad(): 81 | if (it+1) % 50 == 0: 82 | print('iter: {}, ================='.format(it+1)) 83 | -------------------------------------------------------------------------------- /pytorch_loss/triplet_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class TripletLoss(nn.Module): 10 | ''' 11 | Compute normal triplet loss or soft margin triplet loss given triplets 12 | ''' 13 | def __init__(self, margin=None): 14 | super(TripletLoss, self).__init__() 15 | self.margin = margin 16 | if self.margin is None: # if no margin assigned, use soft-margin 17 | self.Loss = nn.SoftMarginLoss() 18 | else: 19 | self.Loss = nn.TripletMarginLoss(margin=margin, p=2) 20 | 21 | def forward(self, anchor, pos, neg): 22 | if self.margin is None: 23 | num_samples = anchor.shape[0] 24 | y = torch.ones((num_samples, 1)).view(-1) 25 | if anchor.is_cuda: y = y.cuda() 26 | ap_dist = torch.norm(anchor-pos, 2, dim=1).view(-1) 27 | an_dist = torch.norm(anchor-neg, 2, dim=1).view(-1) 28 | loss = self.Loss(an_dist - ap_dist, y) 29 | else: 30 | loss = self.Loss(anchor, pos, neg) 31 | 32 | return loss 33 | 34 | 35 | if __name__ == '__main__': 36 | pass 37 | -------------------------------------------------------------------------------- /results/info.txt: -------------------------------------------------------------------------------- 1 | training results -------------------------------------------------------------------------------- /results/training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhugeKongan/torch-template-for-deep-learning/82978f454c7f2662d0eb972b5a4a1e5d5961b232/results/training.png -------------------------------------------------------------------------------- /runs/info.txt: -------------------------------------------------------------------------------- 1 | tensorboard --logdir= -------------------------------------------------------------------------------- /tf_to_pytorch/README.md: -------------------------------------------------------------------------------- 1 | ### TensorFlow to PyTorch Conversion 2 | 3 | This directory is used to convert TensorFlow weights to PyTorch. It was hacked together fairly quickly, so the code is not the most beautiful (just a warning!), but it does the job. I will be refactoring it soon. 4 | 5 | I should also emphasize that you do *not* need to run any of this code to load pretrained weights. Simply use `EfficientNet.from_pretrained(...)`. 6 | 7 | That being said, the main script here is `convert_to_tf/load_tf_weights.py`. In order to use it, you should first download the pretrained TensorFlow weights: 8 | ```bash 9 | cd pretrained_tensorflow 10 | ./download.sh efficientnet-b0 11 | cd .. 12 | ``` 13 | Then 14 | ```bash 15 | mkdir -p pretrained_pytorch 16 | cd convert_tf_to_pt 17 | python load_tf_weights.py \ 18 | --model_name efficientnet-b0 \ 19 | --tf_checkpoint ../pretrained_tensorflow/efficientnet-b0/ \ 20 | --output_file ../pretrained_pytorch/efficientnet-b0.pth 21 | ``` 22 | 23 | 26 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir original_tf 4 | cd original_tf 5 | touch __init__.py 6 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/efficientnet_builder.py 7 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/efficientnet_model.py 8 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/eval_ckpt_main.py 9 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/utils.py 10 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/preprocessing.py 11 | cd .. 12 | mkdir -p tmp -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/rename.sh: -------------------------------------------------------------------------------- 1 | for i in 0 1 2 3 4 5 6 7 8 2 | do 3 | X=$(sha256sum efficientnet-b${i}.pth | head -c 8) 4 | mv efficientnet-b${i}.pth efficientnet-b${i}-${X}.pth 5 | done 6 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/run.sh: -------------------------------------------------------------------------------- 1 | python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b0 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b0/ --output_file ../pretrained_pytorch/efficientnet-b0.pth 2 | 3 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b1 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b1/ --output_file ../pretrained_pytorch/efficientnet-b1.pth 4 | 5 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b2 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b2/ --output_file ../pretrained_pytorch/efficientnet-b2.pth 6 | 7 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b3 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b3/ --output_file ../pretrained_pytorch/efficientnet-b3.pth 8 | 9 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b4 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b4/ --output_file ../pretrained_pytorch/efficientnet-b4.pth 10 | 11 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b5 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b5/ --output_file ../pretrained_pytorch/efficientnet-b5.pth 12 | 13 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b6 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b6/ --output_file ../pretrained_pytorch/efficientnet-b6.pth 14 | 15 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b7 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b7/ --output_file ../pretrained_pytorch/efficientnet-b7.pth 16 | 17 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b8 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b8/ --output_file ../pretrained_pytorch/efficientnet-b8.pth 18 | -------------------------------------------------------------------------------- /tf_to_pytorch/pretrained_tensorflow/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # This script accepts a single command-line argument, which specifies which model to download. 5 | # Only the b0, b1, b2, and b3 models have been released, so your command must be one of them. 6 | 7 | # For example, to download efficientnet-b0, run: 8 | # ./download.sh efficientnet-b0 9 | # And to download efficientnet-b3, run: 10 | # ./download.sh efficientnet-b3 11 | 12 | MODEL=$1 13 | wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/advprop/${MODEL}.tar.gz 14 | tar xvf ${MODEL}.tar.gz 15 | rm ${MODEL}.tar.gz 16 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # from torchvision.transforms.functional import resize 3 | # import torch 4 | # a=torch.ones(2,2,10,10) 5 | # # b=torch.zeros(2,1,5,5) 6 | # a[:,:,1:6,2:7]=2 7 | # # b[:,:,2:,2:]=3 8 | # print(a) 9 | # # print(b) 10 | # # print(a.mul(b)) 11 | # # print(a.reshape(-1,5,5)) 12 | # # 13 | # c=resize(a,[5,5]) 14 | # # a=torch.resize_as_(a,b) 15 | # # print(a) 16 | # print(c) 17 | # import torch 18 | # a= torch.Tensor([ 19 | # [4,1,2,0,0], 20 | # [2,4,0,0,0], 21 | # [1,1,1,6,5], 22 | # [1,2,2,2,2], 23 | # [3,0,0,0,0], 24 | # [2,2,0,0,0]]) 25 | # index = torch.LongTensor([[3],[2],[5],[5],[1],[2]]) 26 | # print(a.size(),index.size()) 27 | # b = torch.gather(a, 1,index-1) 28 | # print(b) 29 | # import torch 30 | # a= torch.Tensor([ 31 | # [0.4,0.1,0.2,0.,0.3], 32 | # [0.2,0.4,0.2,0.1,0.1], 33 | # [0.1,0.1,0.1,0.6,0], 34 | # [0.1,0.3,0.2,0.2,0.2], 35 | # [0.3,0.0,0.7,0,0], 36 | # [0.2,0.2,0.0,0.5,0.1]]) 37 | # index = torch.LongTensor([[1],[2],[4],[2],[3],[4]]) 38 | # print(a.size(),index.size()) 39 | # b = torch.gather(a, 1,index-1) 40 | # print(b) 41 | # import torch 42 | # targets=torch.zeros(3,5) 43 | # scr1=torch.Tensor([[0.1],[0.2],[0.3]]) 44 | # scr2=torch.Tensor([[0.6],[0.5],[0.4]]) 45 | # index1 = torch.LongTensor([[3],[2],[5]]) 46 | # index2 = torch.LongTensor([[1],[2],[4]]) 47 | # targets.scatter_(1,index1-1,scr1) 48 | # targets.scatter_(1,index2-1,scr2) 49 | # print(targets) -------------------------------------------------------------------------------- /utils/devide_dataset.py: -------------------------------------------------------------------------------- 1 | import io 2 | import glob 3 | import os 4 | from shutil import move 5 | from os.path import join 6 | from os import listdir, rmdir 7 | 8 | target_folder = '/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/' 9 | test_folder = '/disks/disk2/lishengyan/dataset/tiny-imagenet-200/test/' 10 | 11 | os.mkdir(test_folder) 12 | val_dict = {} 13 | with open('/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/val_annotations.txt', 'r') as f: 14 | for line in f.readlines(): 15 | split_line = line.split('\t') 16 | val_dict[split_line[0]] = split_line[1] 17 | 18 | paths = glob.glob('/disks/disk2/lishengyan/dataset/tiny-imagenet-200/val/images/*') 19 | for path in paths: 20 | file = path.split('/')[-1] 21 | folder = val_dict[file] 22 | if not os.path.exists(target_folder + str(folder)): 23 | os.mkdir(target_folder + str(folder)) 24 | os.mkdir(target_folder + str(folder) + '/images') 25 | if not os.path.exists(test_folder + str(folder)): 26 | os.mkdir(test_folder + str(folder)) 27 | os.mkdir(test_folder + str(folder) + '/images') 28 | 29 | for path in paths: 30 | file = path.split('/')[-1] 31 | folder = val_dict[file] 32 | # if len(glob.glob(target_folder + str(folder) + '/images/*')) < 50: 33 | # dest = target_folder + str(folder) + '/images/' + str(file) 34 | # else: 35 | dest = test_folder + str(folder) + '/images/' + str(file) 36 | move(path, dest) 37 | 38 | # rmdir('./tiny-imagenet-200/val/images') -------------------------------------------------------------------------------- /utils/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | 5 | 6 | import sys 7 | sys.path.append("..") 8 | import cfg 9 | import random 10 | 11 | 12 | 13 | if __name__ == '__main__': 14 | traindata_path = cfg.BASE + 'train' 15 | labels = os.listdir(traindata_path) 16 | valdata_path = cfg.BASE + 'test' 17 | ##写train.txt文件 18 | txtpath = cfg.BASE 19 | # print(labels) 20 | for index, label in enumerate(labels): 21 | imglist = glob.glob(os.path.join(traindata_path,label, '*.png')) 22 | # print(imglist) 23 | random.shuffle(imglist) 24 | print(len(imglist)) 25 | trainlist = imglist[:int(0.8*len(imglist))] 26 | vallist = imglist[(int(0.8*len(imglist))+1):] 27 | with open(txtpath + 'train.txt', 'a')as f: 28 | for img in trainlist: 29 | # print(img + ' ' + str(index)) 30 | f.write(img + ' ' + str(index)) 31 | f.write('\n') 32 | 33 | with open(txtpath + 'val.txt', 'a')as f: 34 | for img in vallist: 35 | # print(img + ' ' + str(index)) 36 | f.write(img + ' ' + str(index)) 37 | f.write('\n') 38 | 39 | 40 | imglist = glob.glob(os.path.join(valdata_path, '*.jpg')) 41 | with open(txtpath + 'test.txt', 'a')as f: 42 | for img in imglist: 43 | f.write(img) 44 | f.write('\n') -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | 以resnet50为例,进行特征可视化 4 | 5 | 模型的定义来自于torchvision中的定义 6 | 7 | 针对特定的模型需要查找模型的定义,针对所需可视化的网络层的输出,然后导出特定的输出featuremap 8 | 9 | ''' 10 | 11 | 12 | import torch 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import torch 16 | import os 17 | from PIL import Image 18 | import numpy as np 19 | 20 | import sys 21 | sys.path.append("..") 22 | import cfg 23 | from dataloder import validation_transforms 24 | 25 | # 对于给定的一个网络层的输出x,x为numpy格式的array,维度为[0, channels, width, height] 26 | # %matplotlib inline 27 | def draw_features(width, height, channels,x,savename): 28 | ''' 29 | x: 输入的array,某一层的网络层输出 30 | savename: 特征可视化的保存路径 31 | width, height: 分别表示可视化子图的个数,二者乘积等于channels 32 | ''' 33 | fig = plt.figure(figsize=(32,32)) 34 | fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.05, hspace=0.05) 35 | for i in range(channels): 36 | plt.subplot(height,width, i + 1) 37 | plt.axis('off') 38 | img = x[0, i, :, :] 39 | pmin = np.min(img) 40 | pmax = np.max(img) 41 | img = (img - pmin) / (pmax - pmin + 0.000001) 42 | plt.imshow(img, cmap='gray') 43 | # print("{}/{}".format(i, channels)) 44 | fig.savefig(savename, dpi=300) 45 | fig.clf() 46 | plt.close() 47 | 48 | 49 | # 读取模型 50 | def load_checkpoint(filepath): 51 | checkpoint = torch.load(filepath) 52 | model = checkpoint['model'] # 提取网络结构 53 | model.load_state_dict(checkpoint['model_state_dict']) # 加载网络权重参数 54 | for parameter in model.parameters(): 55 | parameter.requires_grad = False 56 | model.eval() 57 | # print(model) 58 | # for name in model.state_dict(): 59 | # print(name) 60 | return model 61 | 62 | 63 | savepath = './' 64 | def predict(model): 65 | # 读入模型 66 | model = load_checkpoint(model) 67 | print('..... Finished loading model! ......') 68 | ##将模型放置在gpu上运行 69 | if torch.cuda.is_available(): 70 | model.cuda() 71 | 72 | img = Image.open(img_path).convert('RGB') 73 | 74 | img = validation_transforms(size=cfg.INPUT_SIZE)(img).unsqueeze(0) 75 | 76 | if torch.cuda.is_available(): 77 | img = img.cuda() 78 | with torch.no_grad(): 79 | x = model.conv1(img) 80 | draw_features(8, 8, 64, x.cpu().numpy(), "{}/f1_conv1.png".format(savepath)) 81 | 82 | 83 | 84 | 85 | 86 | if __name__ == "__main__": 87 | 88 | trained_model = '/disk/haihua/weights/resnet50/epoch_39.pth' 89 | img_path = './test.png' 90 | 91 | predict(trained_model) 92 | 93 | 94 | 95 | 96 | --------------------------------------------------------------------------------