├── .github └── stale.yml ├── .gitignore ├── BBN ├── bbn_dataset.py ├── bbn_model.py ├── dataset.txt ├── readme.md └── train.py ├── CAM_pytorch ├── __init__.py ├── checkpoint │ └── .gitkeep ├── data │ ├── MyDataSet.py │ └── __init__.py ├── main.py ├── models │ ├── VGG_CAM.py │ └── __init__.py ├── readme.md └── utils │ ├── __init__.py │ ├── config.py │ └── visualize.py ├── CUDA_Python ├── CUDA-Python证书.pdf ├── readme.md ├── 课程1 │ ├── .ipynb_checkpoints │ │ └── Introduction to CUDA Python with Numba-checkpoint.ipynb │ ├── Introduction to CUDA Python with Numba.ipynb │ ├── images │ │ ├── DLI Header.png │ │ ├── numba_flowchart.png │ │ └── run_the_assessment.png │ ├── section1.tar.gz │ └── solutions │ │ ├── make_pulses_solution.py │ │ ├── monte_carlo_pi_solution.py │ │ └── zero_suppress_solution.py ├── 课程2 │ ├── .ipynb_checkpoints │ │ └── Custom CUDA Kernels in Python with Numba-checkpoint.ipynb │ ├── Custom CUDA Kernels in Python with Numba.ipynb │ ├── assessment │ │ └── histogram.py │ ├── debug │ │ ├── ex1.py │ │ ├── ex1a.py │ │ ├── ex2.py │ │ ├── ex3.py │ │ └── ex3a.py │ ├── images │ │ ├── DLI Header.png │ │ └── run_the_assessment.png │ ├── img │ │ ├── numba_flowchart.png │ │ ├── sensor_humidity.png │ │ └── sensor_temp.png │ ├── section2.tar.gz │ └── solutions │ │ ├── hypot_stride_solution.py │ │ ├── monte_carlo_pi_solution.py │ │ └── square_device_solution.py ├── 课程3 │ ├── .ipynb_checkpoints │ │ └── Effective Memory Use-checkpoint.ipynb │ ├── Effective Memory Use.ipynb │ ├── Multidimensional Grids and Shared Memory for CUDA Python with Numba.ipynb │ ├── assessment │ │ └── definition.py │ ├── images │ │ ├── DLI Header.png │ │ ├── mm_image.png │ │ ├── run_assess_task.png │ │ └── run_the_assessment.png │ └── solutions │ │ ├── add_matrix_solution.py │ │ ├── add_matrix_stride_solution.py │ │ ├── col_sums_solution.py │ │ ├── matrix_add_solution.py │ │ ├── matrix_multiply_solution.py │ │ ├── matrix_multiply_stride_solution.py │ │ ├── monte_carlo_pi_solution.py │ │ └── tile_transpose_solution.py └── 课程笔记.pdf ├── DataHub └── readme.md ├── FPN_pytorch ├── README.md ├── fpn.py └── retina_fpn.py ├── FasterRcnn_pytorch ├── LICENSE ├── README.MD ├── __pycache__ │ └── trainer.cpython-35.pyc ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── dataset.cpython-35.pyc │ │ ├── util.cpython-35.pyc │ │ └── voc_dataset.cpython-35.pyc │ ├── dataset.py │ ├── util.py │ └── voc_dataset.py ├── demo.ipynb ├── misc │ ├── convert_caffe_pretrain.py │ ├── demo.jpg │ └── train_fast.py ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── faster_rcnn.cpython-35.pyc │ │ ├── faster_rcnn_vgg16.cpython-35.pyc │ │ ├── region_proposal_network.cpython-35.pyc │ │ └── roi_module.cpython-35.pyc │ ├── faster_rcnn.py │ ├── faster_rcnn_vgg16.py │ ├── region_proposal_network.py │ ├── roi_module.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── bbox_tools.cpython-35.pyc │ │ ├── creator_tool.cpython-35.pyc │ │ └── roi_cupy.cpython-35.pyc │ │ ├── bbox_tools.py │ │ ├── creator_tool.py │ │ ├── nms │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── _nms_gpu_post_py.cpython-35.pyc │ │ │ └── non_maximum_suppression.cpython-35.pyc │ │ ├── _nms_gpu_post.c │ │ ├── _nms_gpu_post.pyx │ │ ├── _nms_gpu_post_py.py │ │ ├── build.py │ │ ├── build │ │ │ ├── lib.linux-x86_64-3.5 │ │ │ │ └── _nms_gpu_post.cpython-35m-x86_64-linux-gnu.so │ │ │ └── temp.linux-x86_64-3.5 │ │ │ │ └── _nms_gpu_post.o │ │ └── non_maximum_suppression.py │ │ └── roi_cupy.py ├── requirements.txt ├── train.py ├── trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── array_tool.cpython-35.pyc │ ├── config.cpython-35.pyc │ ├── eval_tool.cpython-35.pyc │ └── vis_tool.cpython-35.pyc │ ├── array_tool.py │ ├── config.py │ ├── eval_tool.py │ └── vis_tool.py ├── GhostNet ├── G-Ghost.png ├── g_ghost_regnet.py └── readme.md ├── LICENSE ├── RepVGG ├── readme.md ├── repvgg.png └── repvgg.py ├── SSD_pytorch ├── checkpoint │ └── .gitkeep ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── voc0712.cpython-35.pyc │ └── voc0712.py ├── main.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── box_utils.cpython-35.pyc │ │ └── ssd.cpython-35.pyc │ ├── box_utils.py │ ├── functions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── detection.cpython-35.pyc │ │ │ └── prior_box.cpython-35.pyc │ │ ├── detection.py │ │ └── prior_box.py │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── init_weights.cpython-35.pyc │ │ │ ├── l2norm.cpython-35.pyc │ │ │ └── multibox_loss.cpython-35.pyc │ │ ├── init_weights.py │ │ ├── l2norm.py │ │ └── multibox_loss.py │ └── ssd.py ├── readme.md ├── temp │ └── test.png └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── augmentations.cpython-35.pyc │ ├── config.cpython-35.pyc │ ├── eval_untils.cpython-35.pyc │ ├── timer.cpython-35.pyc │ └── visualize.cpython-35.pyc │ ├── augmentations.py │ ├── config.py │ ├── eval_untils.py │ ├── timer.py │ └── visualize.py ├── UNet_pytorch ├── dice_loss.py ├── eval.py ├── predict.py ├── readme.md ├── submit.py ├── train.py ├── unet │ ├── __init__.py │ ├── unet_model.py │ └── unet_parts.py └── utils │ ├── __init__.py │ ├── config.py │ ├── crf.py │ ├── data_vis.py │ ├── load.py │ └── utils.py ├── Yolov1_pytorch ├── checkpoint │ └── .gitkeep ├── config.py ├── data │ ├── __init__.py │ ├── dataset.py │ ├── voc2007test.txt │ ├── voc2012train.txt │ └── xml_2_txt.py ├── main.py ├── main_resnet.py ├── models │ ├── __init__.py │ ├── net.py │ └── resnet.py ├── readme.md └── utils │ ├── __init__.py │ ├── predictUtils.py │ ├── testImgs │ └── __init__.py │ ├── visualize.py │ └── yoloLoss.py ├── Yolov3_pytorch ├── checkpoints │ ├── .gitkeep │ └── download_weights.sh ├── config │ ├── coco.data │ └── yolov3.cfg ├── data │ ├── coco.names │ ├── get_coco_dataset.sh │ └── samples │ │ ├── dog.jpg │ │ ├── eagle.jpg │ │ ├── giraffe.jpg │ │ ├── herd_of_horses.jpg │ │ ├── img1.jpg │ │ ├── img2.jpg │ │ ├── img3.jpg │ │ ├── img4.jpg │ │ ├── messi.jpg │ │ └── person.jpg ├── datasets │ └── datasets.py ├── main.py ├── models │ └── models.py ├── readme.md └── utils │ ├── __init__.py │ ├── config.py │ ├── parse_config.py │ ├── utils.py │ └── visualize.py └── readme.md /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 在一个问题变得陈旧之前不活跃的天数  2 | daysUntilStale: 60 3 | # Number of days of inactivity before a stale issue is closed 在一个陈旧的问题被关闭之前,没有活动的天数 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 这些标签的问题永远不会被认为是过时的 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 当标记过期问题时使用的标签 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 当把一个问题标记为过时时发表评论。设置为' false '禁用 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions.(由于长期不活动,机器人自动关闭此问题,如果需要欢迎提问) 16 | # Comment to post when closing a stale issue. Set to `false` to disable 在关闭过期问题时发表评论。设置为' false '禁用 17 | closeComment: false -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .DS_Store 3 | .idea 4 | -------------------------------------------------------------------------------- /BBN/bbn_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import torch.utils.data as data 4 | import torch 5 | import random 6 | import glob 7 | from tqdm import tqdm 8 | from timm.data.transforms_factory import create_transform as timm_transform 9 | from PIL import Image 10 | import torch 11 | import cv2 12 | import os 13 | import numpy as np 14 | import torchvision 15 | from torchvision.transforms import transforms 16 | 17 | def Process(img_path, img_size, use_augment): 18 | """ 19 | timm默认预处理 20 | """ 21 | # 读取图像 22 | assert os.path.exists(img_path), f"{img_path} 图像不存在" 23 | img = cv2.imread(img_path, cv2.IMREAD_COLOR) # BGR 24 | img = Image.fromarray(img) 25 | if use_augment: 26 | # 增广:Random(缩放、裁剪、翻转、色彩...) 27 | img_trans = timm_transform( 28 | img_size, 29 | is_training=True, 30 | re_prob=0.5, 31 | re_mode="pixel", # 随机擦除 32 | auto_augment=None, # 自动增广 eg:rand-m9-mstd0.5 33 | ) 34 | else: 35 | # 不增广:ReSize256 -> CenterCrop224 36 | img_trans = timm_transform(img_size) 37 | return img_trans(img) 38 | 39 | 40 | class BBN_Dataset(data.Dataset): 41 | """数据加载器""" 42 | 43 | def __init__(self, txt_path, mode, size): 44 | """ 45 | 46 | Args: 47 | txt_path (str): 数据集路径 48 | mode (str): 类型 49 | size (list): 图像尺寸 eg: [224,224] 50 | """ 51 | assert mode in ["train", "val", "test"] 52 | self.use_augment = True if mode == "train" else False # 训练集开启增广 53 | self.size = size 54 | 55 | self.dataset = self.load_txt(txt_path) 56 | self.imgs_list = self.dataset[mode] 57 | self.all_labels = self.dataset["all_labels"] 58 | 59 | # 训练集开启BBN 60 | if mode == "train": 61 | 62 | labels_list = [label for _, label in self.imgs_list] # 所有图片对应的类别列表 63 | class_index_dict = dict() # key类别名对应的索引 values该类的所有图片索引 64 | class_nums_list = [0] * len(self.all_labels) # 每个类对应的图片数 65 | for index, label in enumerate(labels_list): 66 | if not int(label) in class_index_dict: 67 | class_index_dict[int(label)] = [] 68 | class_index_dict[int(label)].append(index) 69 | 70 | class_nums_list[int(label)] += 1 71 | 72 | # 构建逆向采样分布 73 | max_num = max(class_nums_list) # 类内最大样本数 74 | class_weight = [max_num / i for i in class_nums_list] # 概率占比的倒数 列表 75 | sum_weight = sum(class_weight) # 逆向的概率占比之和 76 | self.class_weight, self.sum_weight = class_weight, sum_weight 77 | self.class_index_dict = class_index_dict 78 | 79 | def __getitem__(self, index): 80 | img_path, label = self.imgs_list[index] 81 | # 图像预处理 82 | img = Process(img_path, self.size, self.use_augment) 83 | # 训练集 BBN采样 84 | if self.use_augment: 85 | sample_class = self.sample_class_index_by_weight() # 类别索引 86 | sample_indexes = self.class_index_dict[sample_class] # 获得该类别的所有图片索引(对应图片顺序) 87 | sample_index = random.choice(sample_indexes) # 随机抽取一个样本 88 | img2_path, label2 = self.imgs_list[sample_index] 89 | img2 = Process(img2_path, self.size, self.use_augment) 90 | 91 | return img, label, img_path, img2, label2, img2_path 92 | # 验证集/测试集 93 | else: 94 | return img,label,img_path 95 | 96 | def __len__(self): 97 | return len(self.imgs_list) 98 | 99 | def load_txt(self, txt_path): 100 | """单标签分类 加载数据集 101 | 102 | Args: 103 | txt_path (str): 数据集路径 104 | 105 | 训练格式形如 类型, 类别名, 图片路径 106 | train, dog, img1.jpg 107 | val, dog, img2.jpg 108 | test, cat, img3.jpg 109 | 110 | 返回: 111 | { 112 | "train": [ 113 | img_1, 0, 114 | img_2, 1, 115 | ... 116 | ], 117 | "val": 类似, 118 | "test": 类似, 119 | "all_labels": ["dog", "cat",...], 120 | } 121 | 122 | """ 123 | # 读取 124 | f = open(txt_path) 125 | txt_list = f.readlines() 126 | txt_list =[ txt.split(",") for txt in txt_list] 127 | f.close() 128 | 129 | 130 | # 获取所有类别 131 | all_labels = [txt_i[1] for txt_i in txt_list] 132 | all_labels = list(set(all_labels)) 133 | all_labels.sort() 134 | 135 | # 构建数据集 136 | dataset = { 137 | "train": [], 138 | "val": [], 139 | "test": [], 140 | "all_labels": all_labels, 141 | } 142 | for mode, label, img_path in txt_list: 143 | assert mode in ["train", "val", "test"] 144 | dataset[mode].append([img_path, all_labels.index(label)]) 145 | return dataset 146 | 147 | def sample_class_index_by_weight(self): 148 | """ 149 | 逆向采样 150 | """ 151 | # rand_number 0~逆向比例之和 152 | rand_number, now_sum = random.random() * self.sum_weight, 0 153 | # self.cls_num 类别总数 154 | # 遍历每个类别 即判断随机数处于哪个类别范围内,即返回该类别索引 155 | for i in range(len(self.class_weight)): 156 | now_sum += self.class_weight[i] 157 | if rand_number <= now_sum: 158 | return i # 采样的类别索引 -------------------------------------------------------------------------------- /BBN/dataset.txt: -------------------------------------------------------------------------------- 1 | train,dog,CatDog/dog/dog_67.jpg 2 | train,dog,CatDog/dog/dog_2.jpg 3 | train,dog,CatDog/dog/dog_52.jpg 4 | train,dog,CatDog/dog/dog_82.jpg 5 | train,dog,CatDog/dog/dog_99.jpg 6 | train,dog,CatDog/dog/dog_85.jpg 7 | train,dog,CatDog/dog/dog_55.jpg 8 | train,dog,CatDog/dog/dog_41.jpg 9 | train,dog,CatDog/dog/dog_8.jpg 10 | train,dog,CatDog/dog/dog_56.jpg 11 | train,dog,CatDog/dog/dog_25.jpg 12 | train,dog,CatDog/dog/dog_92.jpg 13 | train,dog,CatDog/dog/dog_33.jpg 14 | train,dog,CatDog/dog/dog_62.jpg 15 | train,dog,CatDog/dog/dog_51.jpg 16 | train,dog,CatDog/dog/dog_13.jpg 17 | train,dog,CatDog/dog/dog_74.jpg 18 | train,dog,CatDog/dog/dog_24.jpg 19 | train,dog,CatDog/dog/dog_93.jpg 20 | train,dog,CatDog/dog/dog_12.jpg 21 | train,dog,CatDog/dog/dog_5.jpg 22 | train,dog,CatDog/dog/dog_22.jpg 23 | train,dog,CatDog/dog/dog_30.jpg 24 | train,dog,CatDog/dog/dog_28.jpg 25 | train,dog,CatDog/dog/dog_79.jpg 26 | train,dog,CatDog/dog/dog_35.jpg 27 | train,dog,CatDog/dog/dog_23.jpg 28 | train,dog,CatDog/dog/dog_94.jpg 29 | train,dog,CatDog/dog/dog_54.jpg 30 | train,dog,CatDog/dog/dog_40.jpg 31 | train,dog,CatDog/dog/dog_53.jpg 32 | train,dog,CatDog/dog/dog_88.jpg 33 | train,dog,CatDog/dog/dog_59.jpg 34 | train,dog,CatDog/dog/dog_42.jpg 35 | train,dog,CatDog/dog/dog_21.jpg 36 | train,dog,CatDog/dog/dog_73.jpg 37 | train,dog,CatDog/dog/dog_18.jpg 38 | train,dog,CatDog/dog/dog_43.jpg 39 | train,dog,CatDog/dog/dog_46.jpg 40 | train,dog,CatDog/dog/dog_57.jpg 41 | train,dog,CatDog/dog/dog_96.jpg 42 | train,dog,CatDog/dog/dog_77.jpg 43 | train,dog,CatDog/dog/dog_4.jpg 44 | train,dog,CatDog/dog/dog_20.jpg 45 | train,dog,CatDog/dog/dog_10.jpg 46 | train,dog,CatDog/dog/dog_69.jpg 47 | train,dog,CatDog/dog/dog_100.jpg 48 | train,dog,CatDog/dog/dog_66.jpg 49 | train,dog,CatDog/dog/dog_95.jpg 50 | train,dog,CatDog/dog/dog_84.jpg 51 | train,dog,CatDog/dog/dog_64.jpg 52 | train,dog,CatDog/dog/dog_31.jpg 53 | train,dog,CatDog/dog/dog_16.jpg 54 | train,dog,CatDog/dog/dog_89.jpg 55 | train,dog,CatDog/dog/dog_76.jpg 56 | train,dog,CatDog/dog/dog_19.jpg 57 | train,dog,CatDog/dog/dog_70.jpg 58 | train,dog,CatDog/dog/dog_91.jpg 59 | train,dog,CatDog/dog/dog_44.jpg 60 | train,dog,CatDog/dog/dog_86.jpg 61 | train,dog,CatDog/dog/dog_78.jpg 62 | train,dog,CatDog/dog/dog_61.jpg 63 | train,dog,CatDog/dog/dog_45.jpg 64 | train,dog,CatDog/dog/dog_37.jpg 65 | train,dog,CatDog/dog/dog_11.jpg 66 | train,dog,CatDog/dog/dog_60.jpg 67 | train,dog,CatDog/dog/dog_6.jpg 68 | train,dog,CatDog/dog/dog_27.jpg 69 | train,dog,CatDog/dog/dog_65.jpg 70 | train,dog,CatDog/dog/dog_29.jpg 71 | train,cat,CatDog/cat/cat_56.jpg 72 | train,cat,CatDog/cat/cat_35.jpg 73 | train,cat,CatDog/cat/cat_90.jpg 74 | train,cat,CatDog/cat/cat_32.jpg 75 | train,cat,CatDog/cat/cat_7.jpg 76 | train,cat,CatDog/cat/cat_37.jpg 77 | train,cat,CatDog/cat/cat_100.jpg 78 | train,cat,CatDog/cat/cat_25.jpg 79 | train,cat,CatDog/cat/cat_28.jpg 80 | train,cat,CatDog/cat/cat_77.jpg 81 | train,cat,CatDog/cat/cat_23.jpg 82 | train,cat,CatDog/cat/cat_21.jpg 83 | train,cat,CatDog/cat/cat_11.jpg 84 | train,cat,CatDog/cat/cat_47.jpg 85 | train,cat,CatDog/cat/cat_27.jpg 86 | train,cat,CatDog/cat/cat_41.jpg 87 | train,cat,CatDog/cat/cat_97.jpg 88 | train,cat,CatDog/cat/cat_39.jpg 89 | train,cat,CatDog/cat/cat_98.jpg 90 | train,cat,CatDog/cat/cat_38.jpg 91 | val,dog,CatDog/dog/dog_39.jpg 92 | val,dog,CatDog/dog/dog_81.jpg 93 | val,dog,CatDog/dog/dog_1.jpg 94 | val,dog,CatDog/dog/dog_71.jpg 95 | val,dog,CatDog/dog/dog_98.jpg 96 | val,dog,CatDog/dog/dog_80.jpg 97 | val,dog,CatDog/dog/dog_49.jpg 98 | val,dog,CatDog/dog/dog_26.jpg 99 | val,dog,CatDog/dog/dog_38.jpg 100 | val,dog,CatDog/dog/dog_15.jpg 101 | val,cat,CatDog/cat/cat_63.jpg 102 | val,cat,CatDog/cat/cat_14.jpg 103 | val,cat,CatDog/cat/cat_43.jpg 104 | val,cat,CatDog/cat/cat_64.jpg 105 | val,cat,CatDog/cat/cat_84.jpg 106 | val,cat,CatDog/cat/cat_52.jpg 107 | val,cat,CatDog/cat/cat_57.jpg 108 | val,cat,CatDog/cat/cat_46.jpg 109 | val,cat,CatDog/cat/cat_60.jpg 110 | val,cat,CatDog/cat/cat_44.jpg 111 | test,dog,CatDog/dog/dog_32.jpg 112 | test,dog,CatDog/dog/dog_75.jpg 113 | test,dog,CatDog/dog/dog_58.jpg 114 | test,dog,CatDog/dog/dog_3.jpg 115 | test,dog,CatDog/dog/dog_7.jpg 116 | test,dog,CatDog/dog/dog_34.jpg 117 | test,dog,CatDog/dog/dog_48.jpg 118 | test,dog,CatDog/dog/dog_83.jpg 119 | test,dog,CatDog/dog/dog_36.jpg 120 | test,dog,CatDog/dog/dog_9.jpg 121 | test,dog,CatDog/dog/dog_63.jpg 122 | test,dog,CatDog/dog/dog_72.jpg 123 | test,dog,CatDog/dog/dog_50.jpg 124 | test,dog,CatDog/dog/dog_97.jpg 125 | test,dog,CatDog/dog/dog_47.jpg 126 | test,dog,CatDog/dog/dog_17.jpg 127 | test,dog,CatDog/dog/dog_68.jpg 128 | test,dog,CatDog/dog/dog_14.jpg 129 | test,dog,CatDog/dog/dog_90.jpg 130 | test,dog,CatDog/dog/dog_87.jpg 131 | test,cat,CatDog/cat/cat_58.jpg 132 | test,cat,CatDog/cat/cat_17.jpg 133 | test,cat,CatDog/cat/cat_96.jpg 134 | test,cat,CatDog/cat/cat_40.jpg 135 | test,cat,CatDog/cat/cat_87.jpg 136 | test,cat,CatDog/cat/cat_69.jpg 137 | test,cat,CatDog/cat/cat_67.jpg 138 | test,cat,CatDog/cat/cat_3.jpg 139 | test,cat,CatDog/cat/cat_18.jpg 140 | test,cat,CatDog/cat/cat_2.jpg 141 | test,cat,CatDog/cat/cat_13.jpg 142 | test,cat,CatDog/cat/cat_15.jpg 143 | test,cat,CatDog/cat/cat_80.jpg 144 | test,cat,CatDog/cat/cat_95.jpg 145 | test,cat,CatDog/cat/cat_5.jpg 146 | test,cat,CatDog/cat/cat_73.jpg 147 | test,cat,CatDog/cat/cat_6.jpg 148 | test,cat,CatDog/cat/cat_10.jpg 149 | test,cat,CatDog/cat/cat_36.jpg 150 | test,cat,CatDog/cat/cat_65.jpg 151 | -------------------------------------------------------------------------------- /BBN/readme.md: -------------------------------------------------------------------------------- 1 | # BBN: Bilateral-Branch Network with Cumulative Learning for Long-Tailed Visual Recognition 2 | 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub) 4 | 5 | 6 | - [官方库](https://github.com/Megvii-Nanjing/BBN) [官方知乎解读](https://zhuanlan.zhihu.com/p/123876769) 7 | - 目的:图像分类任务中,长尾数据分布存在极端的类别不平衡问题 8 | 9 | ## TODO 10 | - [x] BBN 数据加载、模型定义 11 | 12 | > 基于官方库,简化代码。便于迁移到任意训练框架。 13 | 14 | - [ ] 训练示例和采样可视化 15 | 16 | -------------------------------------------------------------------------------- /BBN/train.py: -------------------------------------------------------------------------------- 1 | from .bbn_dataset import BBN_Dataset 2 | from .bbn_model import BBN_ResNet50 3 | from torch.utils.data import DataLoader 4 | from tqdm import tqdm 5 | import torch.nn as nn 6 | from pycm import ConfusionMatrix 7 | import torch 8 | # 初始化模型 9 | model=BBN_ResNet50() 10 | 11 | 12 | # 构建数据集 13 | batch=64 14 | txt_path="./dataset.txt" 15 | train_set = BBN_Dataset(txt_path=txt_path,mode="train",size=[224,224]) 16 | val_set = BBN_Dataset(txt_path=txt_path,mode="val",size=[224,224]) 17 | 18 | # 构建数据集加载器 19 | train_dataloader = DataLoader( 20 | dataset=train_set, 21 | batch_size=batch, 22 | num_workers=4, 23 | shuffle=True, 24 | drop_last=True, 25 | ) 26 | val_dataloader = DataLoader( 27 | dataset=val_set, 28 | batch_size=batch, 29 | num_workers=4, 30 | ) 31 | 32 | # 开始训练 33 | optimizer=None 34 | lr_scheduler=None 35 | criterion=nn.CrossEntropyLoss() 36 | Epochs=100 37 | 38 | for epoch in range(Epochs): 39 | optimizer.zero_grad() 40 | 41 | for batch_idx, ( 42 | imgs, 43 | labels, 44 | imgs_path, 45 | imgs2, 46 | labels2, 47 | imgs_path2, 48 | ) in enumerate(tqdm(train_dataloader)): 49 | model.train() 50 | # 正常采样分布 51 | imgs, labels = imgs.cuda(), labels.cuda() 52 | # 逆向采样分布 53 | imgs2, labels2 = imgs2.cuda(), labels2.cuda() 54 | 55 | l = 1 - ((epoch - 1) / Epochs) ** 2 # parabolic decay抛物线 56 | params = {"imgs1": imgs, "imgs2": imgs2, "l": l} 57 | output = model(params) 58 | loss = l * criterion(output, labels) + (1 - l) * criterion( 59 | output, labels2 60 | ) 61 | loss.backward() 62 | optimizer.step() 63 | optimizer.zero_grad() 64 | model.eval() 65 | lr_scheduler.step() 66 | # 评估模型 67 | preds_list, labels_list = [], [] 68 | for batch_idx, (imgs, labels, imgs_path) in enumerate(tqdm(val_dataloader)): 69 | imgs, labels = imgs.cuda(), labels.cuda() 70 | scores = model(imgs) 71 | scores = torch.nn.functional.softmax(scores, dim=1) 72 | preds = torch.argmax(scores, dim=1) 73 | 74 | preds_list.append(preds) 75 | labels_list.append(labels) 76 | preds_list = torch.cat(preds_list, dim=0).cpu().numpy() 77 | labels_list = torch.cat(labels_list, dim=0).cpu().numpy() 78 | acc=ConfusionMatrix(labels_list, preds_list).Overall_ACC 79 | print("val acc:",acc) -------------------------------------------------------------------------------- /CAM_pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/__init__.py -------------------------------------------------------------------------------- /CAM_pytorch/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/checkpoint/.gitkeep -------------------------------------------------------------------------------- /CAM_pytorch/data/MyDataSet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # power by Mr.Li 4 | import os 5 | from torch.utils import data 6 | from torchvision import transforms as T 7 | import cv2 8 | import random 9 | from utils.config import opt 10 | class MyDataSet(data.Dataset): 11 | ''' 12 | 主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据 13 | ''' 14 | def __init__(self, root, transforms=None, train=True, test=False): 15 | self.test = test #状态 16 | self.train = train 17 | self.root = root #数据集路径 18 | 19 | # 读取文件夹下所有图像 20 | if root!='': 21 | pos_root=os.path.join(root, 'pos') 22 | neg_root = os.path.join(root, 'neg') 23 | 24 | pos_imgs = [os.path.join(pos_root, img) for img in os.listdir(pos_root)] 25 | neg_imgs = [os.path.join(neg_root, img) for img in os.listdir(neg_root)] 26 | 27 | imgs = pos_imgs + neg_imgs 28 | # 打乱数据集 29 | random.shuffle(imgs) 30 | else: 31 | print('数据集为空???') 32 | imgs = [] 33 | 34 | imgs_num = len (imgs) 35 | # 划分数据集 36 | if train: 37 | self.imgs = imgs[:int(0.8 * imgs_num)] 38 | else: 39 | self.imgs = imgs[int(0.8 * imgs_num):] 40 | 41 | 42 | 43 | # 对图像进行转化(若未指定转化,则执行默认操作) 44 | if transforms is None: 45 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 46 | 47 | if self.test or not train: # 测试集和验证集 48 | self.transforms = T.Compose([ 49 | T.ToTensor(), 50 | normalize 51 | ]) 52 | else: # 训练集 53 | self.transforms = T.Compose([ 54 | T.ToTensor(), 55 | normalize 56 | ]) 57 | 58 | def __getitem__(self, index): 59 | ''' 60 | 一次返回一张图片的数据 61 | ''' 62 | # 图片的完整路径 63 | img_path = self.imgs[index] 64 | # 读取图像 65 | img = cv2.imread(img_path) 66 | img = self.BGR2RGB(img) # 因为pytorch自身提供的预训练好的模型期望的输入是RGB 67 | img = cv2.resize(img, (64, 128)) 68 | # 对图片进行转化 69 | img = self.transforms(img) 70 | # 标签真值 71 | if 'neg' in img_path: 72 | label=0 # 没有人 73 | else: 74 | label=1 # 有人 75 | 76 | return img,label 77 | 78 | def __len__(self): 79 | return len(self.imgs) 80 | 81 | def BGR2RGB(self, img): 82 | return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 83 | 84 | def get_test_img(self): 85 | # 读取图像 86 | img_origin = cv2.imread(opt.test_img) 87 | img = self.BGR2RGB(img_origin) # 因为pytorch自身提供的预训练好的模型期望的输入是RGB 88 | img = cv2.resize(img, (64, 128)) 89 | # 对图片进行转化 90 | img = self.transforms(img) 91 | return img_origin,img 92 | 93 | -------------------------------------------------------------------------------- /CAM_pytorch/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/data/__init__.py -------------------------------------------------------------------------------- /CAM_pytorch/models/VGG_CAM.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | from torch import nn 4 | import torch as t 5 | from torchvision.models import vgg16 6 | from utils.config import opt 7 | class VGG16_CAM(nn.Module): 8 | ''' 9 | 定义网络 10 | ''' 11 | def __init__(self): 12 | super(VGG16_CAM, self).__init__() 13 | # 设置网络名称 14 | self.moduel_name = str("VGG16_CAM") 15 | # 去掉 VGG16 feature层的maxpool层 16 | self.feature_layer = nn.Sequential(*list(vgg16(pretrained=True).features.children())[0:-1]) 17 | # 全局平均池化层 GAP 18 | self.fc_layer = nn.Linear(512,2) 19 | 20 | def forward(self, x): 21 | x = self.feature_layer(x) 22 | # GAP 全局平均池化 23 | x = t.mean(x,dim=3) 24 | x = t.mean(x,dim=2) 25 | 26 | # 全连接层+softmax层 27 | x = self.fc_layer(x) 28 | # x = F.softmax(x) #交叉熵自带softmax 29 | return x 30 | 31 | 32 | # def test(): 33 | # from torch.autograd import Variable 34 | # model=VGG16_CAM() 35 | # print(model) 36 | # img=t.rand(2,3,224,224) 37 | # img=Variable(img) 38 | # output=model(img) 39 | # print(output.size()) 40 | # 41 | # if __name__ == '__main__': 42 | # test() -------------------------------------------------------------------------------- /CAM_pytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .VGG_CAM import VGG16_CAM -------------------------------------------------------------------------------- /CAM_pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # class activation mapping 2 | 3 | 4 | - 环境: 5 | 6 | | python版本 | pytorch版本 | 7 | | ----------- | ---------- | 8 | | 3.5 | 0.3.0 | 9 | 10 | 11 | - 作用:分类、定位(不使用真值框进行定位,论文证明 卷积层本身就有定位功能) 12 | 13 | ---------- 14 | 15 | ## 数据集 16 | 17 | - [INRIA Person数据集(官方)](http://pascal.inrialpes.fr/data/human/) 18 | - [INRIA Person数据集(百度云)](https://pan.baidu.com/s/1adTzYgX13K4CIjZNODRXqQ) 19 | 20 | 21 | ## 预训练模型 22 | 23 | - [VGG16_CAM_39_99.455.pth](https://pan.baidu.com/s/1OVnxBBhmtVgTEUz0nNmrFg) 24 | 25 | 26 | ## 训练 27 | 28 | 1、在config.py中配置数据集等训练参数 29 | 30 | 2、执行main.py开始训练 31 | 32 | ## 可视化 33 | 34 | 1、在config.py中配置预训练模型 35 | 36 | 2、执行main.py可视化class_activation_map 37 | 38 | 39 | 40 | 41 | ## 训练过程 42 |
43 | 图片说明图片说明 44 |
45 | 46 | ---------- 47 | 48 | ## 效果 49 | 50 | - 网络分类时重点关注的区域(即网络的分类依据) 51 | 52 |
53 | 图片说明图片说明 54 |
55 | 56 |
57 | 图片说明图片说明 58 |
59 | 60 |
61 | 图片说明图片说明 62 |
63 | 64 | ---------- 65 | 66 | ## 参考 67 | 68 | - [Keras implementation of CAM](https://github.com/jacobgil/keras-cam) 69 | - [可视化CNN](https://github.com/huanghao-code/VisCNN_CVPR_2016_Loc) 70 | - [论文CVPR 2016](https://arxiv.org/pdf/1512.04150.pdf) -------------------------------------------------------------------------------- /CAM_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/utils/__init__.py -------------------------------------------------------------------------------- /CAM_pytorch/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | # 设置默认参数 4 | import datetime 5 | import os 6 | class DefaultConfig(): 7 | # 使用的模型,名字必须与models/__init__.py中的名字一致 8 | # 目前支持的网络 9 | model = 'VGG16_CAM' 10 | 11 | # 数据集地址 12 | dataset_root = '/home/bobo/data/cam_dataset/INRIAPerson/Train' 13 | 14 | # 保存模型 15 | root = os.path.abspath(os.path.join(os.path.dirname(__file__))) + '/' 16 | checkpoint_root = root + '../checkpoint/' # 存储模型的路径 17 | # load_model_path = None # 加载预训练的模型的路径,为None代表不加载(用于训练) 18 | load_model_path = checkpoint_root+'VGG16_CAM_39_99.455.pth' 19 | 20 | use_gpu = True # user GPU or not 21 | batch_size = 32 22 | num_workers = 4 # 加载数据时的线程数 23 | 24 | max_epoch = 40 25 | 26 | 27 | lr = 0.01 28 | lr_decay = 0.5 29 | 30 | test_img='/home/bobo/windowsPycharmProject/cam_pytorch/person_and_bike_191.png' #一张测试图片地址 31 | 32 | 33 | 34 | #初始化该类的一个对象 35 | opt=DefaultConfig() -------------------------------------------------------------------------------- /CAM_pytorch/utils/visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # power by Mr.Li 4 | import visdom 5 | import time 6 | import numpy as np 7 | class Visualizer(object): 8 | ''' 9 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function` 10 | 调用原生的visdom接口 11 | ''' 12 | def __init__(self, env='default', **kwargs): 13 | self.vis = visdom.Visdom(env=env, **kwargs) 14 | # 画的第几个数,相当于横座标 15 | # 保存(’loss',23) 即loss的第23个点 16 | self.index = {} 17 | self.log_text = '' 18 | def reinit(self,env='default',**kwargs): 19 | ''' 20 | 修改visdom的配置 重新初始化 21 | ''' 22 | self.vis = visdom.Visdom(env=env,**kwargs) 23 | return self 24 | def plot_many(self, d): 25 | ''' 26 | 一次plot多个损失图形 27 | @params d: dict (name,value) i.e. ('loss',0.11) 28 | ''' 29 | for k, v in d.items(): 30 | self.plot(k, v) 31 | def img_many(self, d): 32 | ''' 33 | 一次画多个图像 34 | ''' 35 | for k, v in d.items(): 36 | self.img(k, v) 37 | def plot(self, name, y,**kwargs): 38 | ''' 39 | self.plot('loss',1.00) 40 | ''' 41 | #得到下标序号 42 | x = self.index.get(name, 0) 43 | self.vis.line(Y=np.array([y]), X=np.array([x]), 44 | win=name,#窗口名 45 | opts=dict(title=name), 46 | update=None if x == 0 else 'append', #按照append的画图形 47 | **kwargs 48 | ) 49 | #下标累加1 50 | self.index[name] = x + 1 51 | def img(self, name, img_,**kwargs): 52 | ''' 53 | self.img('input_img',t.Tensor(64,64)) 54 | self.img('input_imgs',t.Tensor(3,64,64)) 55 | self.img('input_imgs',t.Tensor(100,1,64,64)) 56 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10) 57 | 58 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!! 59 | ''' 60 | self.vis.images(img_.cpu().numpy(), 61 | win=name, 62 | opts=dict(title=name), 63 | **kwargs 64 | ) 65 | def log(self,info,win='log_text'): 66 | ''' 67 | self.log({'loss':1,'lr':0.0001}) 68 | 打印日志 69 | ''' 70 | 71 | self.log_text += ('[{time}] {info}
'.format( 72 | time=time.strftime('%m%d_%H%M%S'),\ 73 | info=info)) 74 | self.vis.text(self.log_text,win) 75 | def __getattr__(self, name): 76 | return getattr(self.vis, name) -------------------------------------------------------------------------------- /CUDA_Python/CUDA-Python证书.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/CUDA-Python证书.pdf -------------------------------------------------------------------------------- /CUDA_Python/readme.md: -------------------------------------------------------------------------------- 1 | # 加速计算基础——CUDA Python 通关版 2 | 3 | >- [Nvidia课程官网](https://courses.nvidia.com/courses/course-v1:DLI+C-AC-02+V1/) 4 | > 5 | >- [本人课程证书](https://courses.nvidia.com/certificates/59466c0d52ae45a394d3b40902aad864/) 6 | 7 | ### 课程1 使用 Numba 的 CUDA Python 简介 8 | 9 | - 基于 Numba 的 CUDA Python 编程简介 10 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数 11 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存 12 | 13 | ### 课程2 使用 Numba 的 CUDA Python 的自定义核函数和内存管理 14 | 15 | - 基于 Numba 的 CUDA Python 编程简介 16 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数 17 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存 18 | 19 | ### 课程3 有效使用内存子系统 20 | 21 | * 编写受益于合并内存访问模式的 CUDA 核函数。 22 | * 使用多维网格和线程块。 23 | * 使用共享内存来协调块内的线程。 24 | * 使用共享内存来促进合并内存访问模式。 25 | * 解决共享内存区的冲突。 26 | 27 | -------------------------------------------------------------------------------- /CUDA_Python/课程1/images/DLI Header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/DLI Header.png -------------------------------------------------------------------------------- /CUDA_Python/课程1/images/numba_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/numba_flowchart.png -------------------------------------------------------------------------------- /CUDA_Python/课程1/images/run_the_assessment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/run_the_assessment.png -------------------------------------------------------------------------------- /CUDA_Python/课程1/section1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/section1.tar.gz -------------------------------------------------------------------------------- /CUDA_Python/课程1/solutions/make_pulses_solution.py: -------------------------------------------------------------------------------- 1 | n = 100000 2 | noise = (np.random.normal(size=n) * 3).astype(np.float32) 3 | t = np.arange(n, dtype=np.float32) 4 | period = n / 23 5 | 6 | d_noise = cuda.to_device(noise) 7 | d_t = cuda.to_device(t) 8 | d_pulses = cuda.device_array(shape=(n,), dtype=np.float32) 9 | 10 | make_pulses(d_t, period, 100.0, out=d_pulses) 11 | waveform = add_ufunc(d_pulses, d_noise) -------------------------------------------------------------------------------- /CUDA_Python/课程1/solutions/monte_carlo_pi_solution.py: -------------------------------------------------------------------------------- 1 | from numba import jit # `jit` is the Numba just-in-time-compiler function 2 | import random 3 | 4 | @jit # Use the decorator syntax to mark `monte_carlo_pi` for Numba compilation 5 | def monte_carlo_pi(nsamples): 6 | acc = 0 7 | for i in range(nsamples): 8 | x = random.random() 9 | y = random.random() 10 | if (x**2 + y**2) < 1.0: 11 | acc += 1 12 | return 4.0 * acc / nsamples -------------------------------------------------------------------------------- /CUDA_Python/课程1/solutions/zero_suppress_solution.py: -------------------------------------------------------------------------------- 1 | @vectorize(['int16(int16, int16)'], target='cuda') 2 | def zero_suppress(waveform_value, threshold): 3 | if waveform_value < threshold: 4 | result = 0 5 | else: 6 | result = waveform_value 7 | return result -------------------------------------------------------------------------------- /CUDA_Python/课程2/assessment/histogram.py: -------------------------------------------------------------------------------- 1 | # Add your solution here 2 | @cuda.jit 3 | def cuda_histogram(x, xmin, xmax, histogram_out): 4 | '''Increment bin counts in histogram_out, given histogram range [xmin, xmax).''' 5 | nbins = histogram_out.shape[0] # 分为N组 6 | bin_width = (xmax - xmin) / nbins # 每组宽度 7 | 8 | 9 | start = cuda.grid(1) 10 | 11 | stride=cuda.gridsize(1) # 1指 所有进程按一维下标索引 12 | for i in range(start,x.shape[0],stride): 13 | bin_number=(x[i] - xmin)/bin_width # 所有进程的一次并行计算 14 | if bin_number >= 0 and bin_number < histogram_out.shape[0]: 15 | cuda.atomic.add(histogram_out, bin_number, 1)# 原子操作 全局加1 -------------------------------------------------------------------------------- /CUDA_Python/课程2/debug/ex1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numba import cuda 4 | 5 | @cuda.jit 6 | def histogram(x, xmin, xmax, histogram_out): 7 | nbins = histogram_out.shape[0] 8 | bin_width = (xmax - xmin) / nbins 9 | 10 | start = cuda.grid(1) 11 | stride = cuda.gridsize(1) 12 | 13 | for i in range(start, x.shape[0], stride): 14 | bin_number = np.int32((x[i] - xmin)/bin_width) 15 | if bin_number >= 0 and bin_number < histogram_out.shape[0]: 16 | histogram_out[bin_number] += 1 17 | 18 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32) 19 | xmin = np.float32(-4.0) 20 | xmax = np.float32(4.0) 21 | histogram_out = np.zeros(shape=10, dtype=np.int32) 22 | 23 | histogram[64, 64](x, xmin, xmax, histogram_out) 24 | 25 | print('input count:', x.shape[0]) 26 | print('histogram:', histogram_out) 27 | print('count:', histogram_out.sum()) 28 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/debug/ex1a.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numba import cuda 4 | 5 | @cuda.jit 6 | def histogram(x, xmin, xmax, histogram_out): 7 | nbins = histogram_out.shape[0] 8 | bin_width = (xmax - xmin) / nbins 9 | 10 | start = cuda.grid(1) 11 | stride = cuda.gridsize(1) 12 | 13 | for i in range(start, x.shape[0], stride): 14 | bin_number = np.int32((x[i] - xmin)/bin_width) 15 | if bin_number >= 0 and bin_number < histogram_out.shape[0]: 16 | histogram_out[bin_number] += 1 17 | print('in range', x[i], bin_number) 18 | else: 19 | print('out of range', x[i], bin_number) 20 | 21 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32) 22 | xmin = np.float32(-4.0) 23 | xmax = np.float32(4.0) 24 | histogram_out = np.zeros(shape=10, dtype=np.int32) 25 | 26 | histogram[64, 64](x, xmin, xmax, histogram_out) 27 | 28 | print('input count:', x.shape[0]) 29 | print('histogram:', histogram_out) 30 | print('count:', histogram_out.sum()) 31 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/debug/ex2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numba import cuda 4 | 5 | @cuda.jit 6 | def histogram(x, xmin, xmax, histogram_out): 7 | nbins = histogram_out.shape[0] 8 | bin_width = (xmax - xmin) / nbins 9 | 10 | start = cuda.grid(1) 11 | stride = cuda.gridsize(1) 12 | 13 | ### DEBUG FIRST THREAD 14 | if start == 0: 15 | from pdb import set_trace; set_trace() 16 | ### 17 | 18 | for i in range(start, x.shape[0], stride): 19 | bin_number = np.int32((x[i] + xmin)/bin_width) 20 | 21 | if bin_number >= 0 and bin_number < histogram_out.shape[0]: 22 | cuda.atomic.add(histogram_out, bin_number, 1) 23 | 24 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32) 25 | xmin = np.float32(-4.0) 26 | xmax = np.float32(4.0) 27 | histogram_out = np.zeros(shape=10, dtype=np.int32) 28 | 29 | histogram[64, 64](x, xmin, xmax, histogram_out) 30 | 31 | print('input count:', x.shape[0]) 32 | print('histogram:', histogram_out) 33 | print('count:', histogram_out.sum()) 34 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/debug/ex3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numba import cuda 4 | 5 | @cuda.jit 6 | def histogram(x, xmin, xmax, histogram_out): 7 | nbins = histogram_out.shape[0] 8 | bin_width = (xmax - xmin) / nbins 9 | 10 | start = cuda.grid(1) 11 | stride = cuda.gridsize(1) 12 | 13 | for i in range(start, x.shape[0], stride): 14 | bin_number = np.int32((x[i] + xmin)/bin_width) 15 | 16 | if bin_number >= 0 or bin_number < histogram_out.shape[0]: 17 | cuda.atomic.add(histogram_out, bin_number, 1) 18 | 19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32) 20 | xmin = np.float32(-4.0) 21 | xmax = np.float32(4.0) 22 | histogram_out = np.zeros(shape=10, dtype=np.int32) 23 | 24 | histogram[64, 64](x, xmin, xmax, histogram_out) 25 | 26 | print('input count:', x.shape[0]) 27 | print('histogram:', histogram_out) 28 | print('count:', histogram_out.sum()) 29 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/debug/ex3a.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numba import cuda 4 | 5 | @cuda.jit(debug=True) 6 | def histogram(x, xmin, xmax, histogram_out): 7 | nbins = histogram_out.shape[0] 8 | bin_width = (xmax - xmin) / nbins 9 | 10 | start = cuda.grid(1) 11 | stride = cuda.gridsize(1) 12 | 13 | for i in range(start, x.shape[0], stride): 14 | bin_number = np.int32((x[i] + xmin)/bin_width) 15 | 16 | if bin_number >= 0 or bin_number < histogram_out.shape[0]: 17 | cuda.atomic.add(histogram_out, bin_number, 1) 18 | 19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32) 20 | xmin = np.float32(-4.0) 21 | xmax = np.float32(4.0) 22 | histogram_out = np.zeros(shape=10, dtype=np.int32) 23 | 24 | histogram[64, 64](x, xmin, xmax, histogram_out) 25 | 26 | print('input count:', x.shape[0]) 27 | print('histogram:', histogram_out) 28 | print('count:', histogram_out.sum()) 29 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/images/DLI Header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/DLI Header.png -------------------------------------------------------------------------------- /CUDA_Python/课程2/images/run_the_assessment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/run_the_assessment.png -------------------------------------------------------------------------------- /CUDA_Python/课程2/img/numba_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/numba_flowchart.png -------------------------------------------------------------------------------- /CUDA_Python/课程2/img/sensor_humidity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_humidity.png -------------------------------------------------------------------------------- /CUDA_Python/课程2/img/sensor_temp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_temp.png -------------------------------------------------------------------------------- /CUDA_Python/课程2/section2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/section2.tar.gz -------------------------------------------------------------------------------- /CUDA_Python/课程2/solutions/hypot_stride_solution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import cuda 3 | from math import hypot 4 | 5 | @cuda.jit 6 | def hypot_stride(a, b, c): 7 | idx = cuda.grid(1) 8 | stride = cuda.gridsize(1) 9 | 10 | for i in range(idx, a.shape[0], stride): 11 | c[i] = hypot(a[i], b[i]) 12 | 13 | n = 1000000 14 | a = np.random.uniform(-12, 12, n).astype(np.float32) 15 | b = np.random.uniform(-12, 12, n).astype(np.float32) 16 | d_a = cuda.to_device(a) 17 | d_b = cuda.to_device(b) 18 | d_c = cuda.device_array_like(d_b) 19 | 20 | hypot_stride[1, 1](d_a, d_b, d_c) -------------------------------------------------------------------------------- /CUDA_Python/课程2/solutions/monte_carlo_pi_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def monte_carlo_pi_device(rng_states, nsamples, out): 3 | thread_id = cuda.grid(1) 4 | 5 | # Compute pi by drawing random (x, y) points and finding what 6 | # fraction lie inside a unit circle 7 | acc = 0 8 | for i in range(nsamples): 9 | x = xoroshiro128p_uniform_float32(rng_states, thread_id) 10 | y = xoroshiro128p_uniform_float32(rng_states, thread_id) 11 | if x**2 + y**2 <= 1.0: 12 | acc += 1 13 | 14 | out[thread_id] = 4.0 * acc / nsamples 15 | -------------------------------------------------------------------------------- /CUDA_Python/课程2/solutions/square_device_solution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import cuda 3 | 4 | @cuda.jit 5 | def square_device(a, out): 6 | idx = cuda.grid(1) 7 | out[idx] = a[idx]**2 8 | 9 | n = 4096 10 | a = np.arange(n) 11 | 12 | d_a = cuda.to_device(a) 13 | d_out = cuda.device_array(shape=(n,), dtype=np.float32) 14 | 15 | threads = 32 16 | blocks = 128 17 | 18 | square_device[blocks, threads](d_a, d_out) -------------------------------------------------------------------------------- /CUDA_Python/课程3/assessment/definition.py: -------------------------------------------------------------------------------- 1 | # Use the 'File' menu above to 'Save' after pasting in your own mm_shared function definition. -------------------------------------------------------------------------------- /CUDA_Python/课程3/images/DLI Header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/DLI Header.png -------------------------------------------------------------------------------- /CUDA_Python/课程3/images/mm_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/mm_image.png -------------------------------------------------------------------------------- /CUDA_Python/课程3/images/run_assess_task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_assess_task.png -------------------------------------------------------------------------------- /CUDA_Python/课程3/images/run_the_assessment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_the_assessment.png -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/add_matrix_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def add_matrix(A, B, C): 3 | i,j = cuda.grid(2) 4 | 5 | C[j,i] = A[j,i] + B[j,i] 6 | 7 | A = np.arange(36*36).reshape(36, 36).astype(np.int32) 8 | B = A * 2 9 | C = np.zeros_like(A) 10 | d_A = cuda.to_device(A) 11 | d_B = cuda.to_device(B) 12 | d_C = cuda.to_device(C) 13 | 14 | blocks = (6,6) 15 | threads_per_block = (6,6) 16 | 17 | add_matrix[blocks, threads_per_block](d_A, d_B, d_C) -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/add_matrix_stride_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def add_matrix_stride(A, B, C): 3 | 4 | y, x = cuda.grid(2) 5 | stride_y, stride_x = cuda.gridsize(2) 6 | 7 | for i in range(x, A.shape[0], stride_x): 8 | for j in range(y, A.shape[1], stride_y): 9 | C[i][j] = A[i][j] + B[i][j] 10 | 11 | A = np.arange(64*64).reshape(64, 64).astype(np.int32) 12 | B = A * 2 13 | C = np.zeros_like(A) 14 | d_A = cuda.to_device(A) 15 | d_B = cuda.to_device(B) 16 | d_C = cuda.to_device(C) 17 | 18 | blocks = (6,6) 19 | threads_per_block = (6,6) 20 | 21 | add_matrix_stride[blocks, threads_per_block](d_A, d_B, d_C) -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/col_sums_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def col_sums(a, sums, ds): 3 | idx = cuda.grid(1) 4 | sum = 0.0 5 | 6 | for i in range(ds): 7 | sum += a[i][idx] 8 | 9 | sums[idx] = sum 10 | -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/matrix_add_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def matrix_add(a, b, out, coalesced): 3 | x, y = cuda.grid(2) 4 | 5 | if coalesced == True: 6 | out[y][x] = a[y][x] + b[y][x] 7 | else: 8 | out[x][y] = a[x][y] + b[x][y] 9 | -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/matrix_multiply_solution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import cuda 3 | 4 | @cuda.jit 5 | def mm(a, b, c): 6 | column, row = cuda.grid(2) 7 | sum = 0 8 | 9 | for i in range(a.shape[0]): 10 | sum += a[row][i] * b[i][column] 11 | 12 | c[row][column] = sum 13 | 14 | a = np.arange(16).reshape(4,4).astype(np.int32) 15 | b = np.arange(16).reshape(4,4).astype(np.int32) 16 | c = np.zeros_like(a) 17 | 18 | d_a = cuda.to_device(a) 19 | d_b = cuda.to_device(b) 20 | d_c = cuda.to_device(c) 21 | 22 | grid = (2,2) 23 | block = (2,2) 24 | mm[grid, block](d_a, d_b, d_c) -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/matrix_multiply_stride_solution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import cuda 3 | 4 | @cuda.jit 5 | def mm_stride(A, B, C): 6 | 7 | grid_column, grid_row = cuda.grid(2) 8 | stride_column, stride_row = cuda.gridsize(2) 9 | 10 | for data_row in range(grid_row, A.shape[0], stride_row): 11 | for data_column in range(grid_column, B.shape[1], stride_column): 12 | sum = 0 13 | for i in range(A.shape[1]): # `range(B.shape[0])` is also okay 14 | sum += A[data_row][i] * B[i][data_column] 15 | 16 | C[data_row][data_column] = sum 17 | 18 | n = 1024 19 | a = np.arange(n*n).reshape(n,n).astype(np.int32) 20 | b = np.arange(n*n).reshape(n,n).astype(np.int32) 21 | c = np.zeros((a.shape[0], b.shape[1])).astype(np.int32) 22 | 23 | d_a = cuda.to_device(a) 24 | d_b = cuda.to_device(b) 25 | d_c = cuda.to_device(c) 26 | 27 | ts = (32,32) 28 | bs = (32,32) 29 | 30 | mm_stride[bs, ts](d_a, d_b, d_c) -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/monte_carlo_pi_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def monte_carlo_pi_device(rng_states, nsamples, out): 3 | thread_id = cuda.grid(1) 4 | 5 | # Compute pi by drawing random (x, y) points and finding what 6 | # fraction lie inside a unit circle 7 | acc = 0 8 | for i in range(nsamples): 9 | x = xoroshiro128p_uniform_float32(rng_states, thread_id) 10 | y = xoroshiro128p_uniform_float32(rng_states, thread_id) 11 | if x**2 + y**2 <= 1.0: 12 | acc += 1 13 | 14 | out[thread_id] = 4.0 * acc / nsamples 15 | 16 | nsamples = 10000000 17 | threads_per_block = 128 18 | blocks = 32 19 | grid_size = threads_per_block * blocks 20 | 21 | samples_per_thread = int(nsamples / grid_size) 22 | rng_states = create_xoroshiro128p_states(grid_size, seed=1) 23 | d_out = cuda.device_array(threads_per_block * blocks, dtype=np.float32) 24 | 25 | monte_carlo_pi_device[blocks, threads_per_block](rng_states, samples_per_thread, d_out) -------------------------------------------------------------------------------- /CUDA_Python/课程3/solutions/tile_transpose_solution.py: -------------------------------------------------------------------------------- 1 | @cuda.jit 2 | def tile_transpose(a, transposed): 3 | # `tile_transpose` assumes it is launched with a 32x32 block dimension, 4 | # and that `a` is a multiple of these dimensions. 5 | 6 | # 1) Create 32x32 shared memory array. 7 | tile = cuda.shared.array((32, 32), numba_types.int32) 8 | 9 | # Compute offsets into global input array. Recall for coalesced access we want to map threadIdx.x increments to 10 | # the fastest changing index in the data, i.e. the column in our array. 11 | a_col = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x 12 | a_row = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y 13 | 14 | # 2) Make coalesced read from global memory into shared memory array. 15 | # Note the use of local thread indices for the shared memory write, 16 | # and global offsets for global memory read. 17 | tile[cuda.threadIdx.y, cuda.threadIdx.x] = a[a_row, a_col] 18 | 19 | # 3) Wait for all threads in the block to finish updating shared memory. 20 | cuda.syncthreads() 21 | 22 | # 4) Calculate transposed location for the shared memory array tile 23 | # to be written back to global memory. Note that blockIdx.y*blockDim.y 24 | # and blockIdx.x* blockDim.x are swapped (because we want to write to the 25 | # transpose locations), but we want to keep access coalesced, so match up the 26 | # threadIdx.x to the fastest changing index, i.e. the column 27 | t_col = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.x 28 | t_row = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.y 29 | 30 | # 5) Write from shared memory (using thread indices) 31 | # back to global memory (using grid indices) 32 | # transposing each element within the shared memory array. 33 | transposed[t_row, t_col] = tile[cuda.threadIdx.x, cuda.threadIdx.y] -------------------------------------------------------------------------------- /CUDA_Python/课程笔记.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程笔记.pdf -------------------------------------------------------------------------------- /DataHub/readme.md: -------------------------------------------------------------------------------- 1 | # 公开数据集汇总 2 | 3 | 4 | 5 | ## 数据集平台 6 | 7 | - [百度](https://aistudio.baidu.com/aistudio/datasetoverview) 8 | - [阿里天池](https://tianchi.aliyun.com/dataset) 9 | - [kaggle](https://www.kaggle.com/datasets) 10 | - [集市](https://www.cvmart.net/dataSets) 11 | 12 | 13 | 14 | ## 多模态 15 | 16 | | 链接 | 标注类型 | 数量 | 17 | | ------------------------------------------------------------ | ----------------------- | ---- | 18 | | [COYO-700M](https://github.com/kakaobrain/coyo-dataset) | 大规模英文图文对 数据集 | 7亿 | 19 | | [img2dataset](https://github.com/rom1504/img2dataset/tree/main) | N个图文对数据集 | | 20 | 21 | 22 | 23 | ## 安全场景 24 | 25 | ### 枪支 26 | 27 | | 链接 | 标注类型 | 数量 | 28 | | ------------------------------------------------------------ | -------- | ---- | 29 | | [URL1](https://www.kaggle.com/datasets/shivanirana63/labeled-guns-data-for-object-detection) | 目标检测 | 3k | 30 | | [URL2](https://www.kaggle.com/code/gattoni/faster-rcnn-guns-object-detection-with-save-load/data) | 目标检测 | 0.3k | 31 | 32 | ### 二维码 33 | 34 | | 链接 | 标注类型 | 数量 | 35 | | ------------------------------------------------------------ | -------- | ---- | 36 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/147099/0) | 目标检测 | 0.6k | 37 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/103078/0) | 目标检测 | 2k | 38 | 39 | ### 火灾烟雾 40 | 41 | | 链接 | 标注类型 | 数量 | 42 | | ------------------------------------------------------------ | -------- | ---- | 43 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/107770/0) | 目标检测 | 6.9k | 44 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/90352) | 目标检测 | 2k | 45 | | [URL3](https://aistudio.baidu.com/aistudio/datasetdetail/84374/0) | 目标检测 | 5k | 46 | 47 | ### 抽烟 48 | 49 | | 链接 | 标注类型 | 数量 | 50 | | ------------------------------------------------------------ | -------- | ---- | 51 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/72629/0) | 目标检测 | 1.5k | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /FPN_pytorch/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-FPN 2 | 3 | _Feature Pyramid Networks_ in PyTorch. 4 | 5 | [原地址](https://github.com/kuangliu/pytorch-fpn) 6 | 7 | References: 8 | [1] [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144) 9 | [2] [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) 10 | 11 | 12 | 13 | # 自己想法 14 | 15 | - 目前工作 16 | 17 | 无意完成 以FPN为基础的RPN网络的fast rcnn,仅了解FPN基本思想即可。 18 | 19 | - 网络结构 20 | 21 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/67784779.jpg) 22 | 23 | 24 | - 基本流程 25 | 26 | 27 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/28743914.jpg) -------------------------------------------------------------------------------- /FPN_pytorch/fpn.py: -------------------------------------------------------------------------------- 1 | '''FPN in PyTorch. 2 | 3 | See the paper "Feature Pyramid Networks for Object Detection" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from torch.autograd import Variable 10 | 11 | 12 | class Bottleneck(nn.Module): 13 | expansion = 4 14 | 15 | def __init__(self, in_planes, planes, stride=1): 16 | super(Bottleneck, self).__init__() 17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*planes: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*planes) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = F.relu(self.bn2(self.conv2(out))) 34 | out = self.bn3(self.conv3(out)) 35 | out += self.shortcut(x) 36 | out = F.relu(out) 37 | return out 38 | 39 | 40 | class FPN(nn.Module): 41 | ''' 42 | 继承nn.Module,实现自定义网络模型 43 | ''' 44 | def __init__(self, block, num_blocks): 45 | super(FPN, self).__init__() 46 | # 输入通道数 47 | self.in_planes = 64 48 | # nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True) 49 | 50 | # 原论文网络结构 in_channels=3 out_channels=64 51 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 52 | # 通道数BN层的参数是输出通道数out_channels=64 53 | self.bn1 = nn.BatchNorm2d(64) 54 | 55 | # Bottom-up layers 56 | # 自底向上的网络 resnet网络 57 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 58 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 59 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 60 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 61 | 62 | # Top layer (最顶层只有侧边连接,kernel_size=1目的减少通道数,形状不变) 63 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels 减少通道数 64 | 65 | # Smooth layers 平滑层 66 | # 作用:在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积,目的是消除上采样的混叠效应 67 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 68 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 69 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 70 | 71 | # Lateral layers 侧边层 72 | # (1*1的卷积核的主要作用是减少卷积核的个数,也就是减少了feature map的个数,并不改变feature map的尺寸大小。) 73 | self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) 74 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0) 75 | self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0) 76 | 77 | def _make_layer(self, block, planes, num_blocks, stride): 78 | ''' 79 | resnet网络 80 | ''' 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def _upsample_add(self, x, y): 89 | ''' 90 | Upsample and add two feature maps. 91 | 上采样 并 将两个feature maps求和 92 | Args: 93 | x: (Variable) top feature map to be upsampled. 将要上采样的 上层feature map 94 | y: (Variable) lateral feature map. 侧边的feature map 95 | 96 | Returns: 97 | (Variable) added feature map. 98 | 99 | Note in PyTorch, when input size is odd, the upsampled feature map 100 | with `F.upsample(..., scale_factor=2, mode='nearest')` 101 | maybe not equal to the lateral feature map size. 102 | 在PyTorch中,当输入大小为奇数时,请注意上采样的特征映射 103 |     用'F.upsample(...,scale_factor = 2,mode ='nearest')` 104 |     可能不等于横向特征地图尺寸。 105 | 106 | e.g. 107 | original input size: [N,_,15,15] -> 108 | conv2d feature map size: [N,_,8,8] -> 109 | upsampled feature map size: [N,_,16,16] 110 | 111 | So we choose bilinear upsample which supports arbitrary output sizes. 112 | 所以我们选择支持任意输出大小的双线性上采样。 113 | ''' 114 | _,_,H,W = y.size() 115 | # 使用 双线性插值bilinear对x进行上采样,之后与y逐元素相加 116 | return F.upsample(x, size=(H,W), mode='bilinear') + y 117 | 118 | def forward(self, x): 119 | # Bottom-up 自底向上 conv -> batchnmorm -> relu ->maxpool 120 | c1 = F.relu(self.bn1(self.conv1(x))) 121 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1) 122 | 123 | # resnet网络 124 | c2 = self.layer1(c1) 125 | c3 = self.layer2(c2) 126 | c4 = self.layer3(c3) 127 | c5 = self.layer4(c4) 128 | 129 | # Top-down 自顶向下并与侧边相连 130 | p5 = self.toplayer(c5) #减少通道数 131 | p4 = self._upsample_add(p5, self.latlayer1(c4)) 132 | p3 = self._upsample_add(p4, self.latlayer2(c3)) 133 | p2 = self._upsample_add(p3, self.latlayer3(c2)) 134 | 135 | # Smooth 平滑层(在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积,目的是消除上采样的混叠效应) 136 | p4 = self.smooth1(p4) 137 | p3 = self.smooth2(p3) 138 | p2 = self.smooth3(p2) 139 | return p2, p3, p4, p5 140 | 141 | 142 | def FPN101(): 143 | # [2,4,23,3]为FPN101的参数 144 | # return FPN(Bottleneck, [2,4,23,3]) 145 | 146 | #[2,2,2,2]为FPN18的参数 147 | return FPN(Bottleneck, [2,2,2,2]) 148 | 149 | 150 | def test(): 151 | # 新建FPN101网络 152 | net = FPN101() 153 | print('网络结构为') 154 | print(net) 155 | # 前向传播,得到网络输出值 fms即为p2, p3, p4, p5 156 | fms = net(Variable(torch.randn(1,3,224,224))) 157 | print('网络输出的内容为') 158 | for fm in fms: 159 | print(fm.size()) 160 | 161 | test() 162 | -------------------------------------------------------------------------------- /FPN_pytorch/retina_fpn.py: -------------------------------------------------------------------------------- 1 | '''RetinaFPN in PyTorch. 2 | 3 | See the paper "Focal Loss for Dense Object Detection" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from torch.autograd import Variable 10 | 11 | 12 | class Bottleneck(nn.Module): 13 | expansion = 4 14 | 15 | def __init__(self, in_planes, planes, stride=1): 16 | super(Bottleneck, self).__init__() 17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*planes: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*planes) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = F.relu(self.bn2(self.conv2(out))) 34 | out = self.bn3(self.conv3(out)) 35 | out += self.shortcut(x) 36 | out = F.relu(out) 37 | return out 38 | 39 | 40 | class RetinaFPN(nn.Module): 41 | def __init__(self, block, num_blocks): 42 | super(RetinaFPN, self).__init__() 43 | self.in_planes = 64 44 | 45 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 46 | self.bn1 = nn.BatchNorm2d(64) 47 | 48 | # Bottom-up layers 49 | self.layer2 = self._make_layer(block, 64, num_blocks[0], stride=1) 50 | self.layer3 = self._make_layer(block, 128, num_blocks[1], stride=2) 51 | self.layer4 = self._make_layer(block, 256, num_blocks[2], stride=2) 52 | self.layer5 = self._make_layer(block, 512, num_blocks[3], stride=2) 53 | self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1) 54 | self.conv7 = nn.Conv2d( 256, 256, kernel_size=3, stride=2, padding=1) 55 | 56 | # Top layer 57 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels 58 | 59 | # Smooth layers 60 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 61 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 62 | 63 | # Lateral layers 64 | self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) 65 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0) 66 | 67 | def _make_layer(self, block, planes, num_blocks, stride): 68 | strides = [stride] + [1]*(num_blocks-1) 69 | layers = [] 70 | for stride in strides: 71 | layers.append(block(self.in_planes, planes, stride)) 72 | self.in_planes = planes * block.expansion 73 | return nn.Sequential(*layers) 74 | 75 | def _upsample_add(self, x, y): 76 | '''Upsample and add two feature maps. 77 | 78 | Args: 79 | x: (Variable) top feature map to be upsampled. 80 | y: (Variable) lateral feature map. 81 | 82 | Returns: 83 | (Variable) added feature map. 84 | 85 | Note in PyTorch, when input size is odd, the upsampled feature map 86 | with `F.upsample(..., scale_factor=2, mode='nearest')` 87 | maybe not equal to the lateral feature map size. 88 | 89 | e.g. 90 | original input size: [N,_,15,15] -> 91 | conv2d feature map size: [N,_,8,8] -> 92 | upsampled feature map size: [N,_,16,16] 93 | 94 | So we choose bilinear upsample which supports arbitrary output sizes. 95 | ''' 96 | _,_,H,W = y.size() 97 | return F.upsample(x, size=(H,W), mode='bilinear') + y 98 | 99 | def forward(self, x): 100 | # Bottom-up 101 | c1 = F.relu(self.bn1(self.conv1(x))) 102 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1) 103 | c2 = self.layer2(c1) 104 | c3 = self.layer3(c2) 105 | c4 = self.layer4(c3) 106 | c5 = self.layer5(c4) 107 | p6 = self.conv6(c5) 108 | p7 = self.conv7(F.relu(p6)) 109 | # Top-down 110 | p5 = self.toplayer(c5) 111 | p4 = self._upsample_add(p5, self.latlayer1(c4)) 112 | p3 = self._upsample_add(p4, self.latlayer2(c3)) 113 | # Smooth 114 | p4 = self.smooth1(p4) 115 | p3 = self.smooth2(p3) 116 | return p3, p4, p5, p6, p7 117 | 118 | 119 | def RetinaFPN101(): 120 | # return RetinaFPN(Bottleneck, [2,4,23,3]) 121 | return RetinaFPN(Bottleneck, [2,2,2,2]) 122 | 123 | 124 | def test(): 125 | net = RetinaFPN101() 126 | fms = net(Variable(torch.randn(1,3,600,900))) 127 | for fm in fms: 128 | print(fm.size()) 129 | 130 | test() 131 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 Yun Chen 4 | 5 | Original works by: 6 | -------------------------------------------------------- 7 | chainer/chainercv 8 | Copyright (c) 2017 Yusuke Niitani 9 | Licensed under The MIT License 10 | https://github.com/chainer/chainercv/blob/master/LICENSE 11 | -------------------------------------------------------- 12 | Faster R-CNN 13 | Copyright (c) 2015 Microsoft 14 | Licensed under The MIT License 15 | https://github.com/rbgirshick/py-faster-rcnn/blob/master/LICENSE 16 | -------------------------------------------------------- 17 | 18 | Permission is hereby granted, free of charge, to any person obtaining a copy 19 | of this software and associated documentation files (the "Software"), to deal 20 | in the Software without restriction, including without limitation the rights 21 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 22 | copies of the Software, and to permit persons to whom the Software is 23 | furnished to do so, subject to the following conditions: 24 | 25 | The above copyright notice and this permission notice shall be included in 26 | all copies or substantial portions of the Software. 27 | 28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 31 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 33 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 34 | THE SOFTWARE. -------------------------------------------------------------------------------- /FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__init__.py -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/dataset.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | from .voc_dataset import VOCBboxDataset 3 | from skimage import transform as sktsf 4 | from torchvision import transforms as tvtsf 5 | from . import util 6 | import numpy as np 7 | from utils.config import opt 8 | 9 | 10 | def inverse_normalize(img): 11 | """ 12 | 将[-1,1]范围的图像近似还原回[0,255]之间 13 | """ 14 | if opt.caffe_pretrain: 15 | img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1)) 16 | return img[::-1, :, :] 17 | # approximate un-normalize for visualize 18 | return (img * 0.225 + 0.45).clip(min=0, max=1) * 255 19 | 20 | 21 | def pytorch_normalze(img): 22 | """ 23 | https://github.com/pytorch/vision/issues/223 24 | return appr -1~1 RGB 25 | 对pytorch格式的图像进行规范化,返回值范围在[-1,1]之间 通道为RGB 26 | """ 27 | normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406], 28 | std=[0.229, 0.224, 0.225]) 29 | img = normalize(t.from_numpy(img)) 30 | return img.numpy() 31 | 32 | 33 | def caffe_normalize(img): 34 | """ 35 | return appr -125-125 BGR 36 | 对caffe格式的图像进行规范化,返回值范围在[-125,125]之间 通道为BGR 37 | """ 38 | img = img[[2, 1, 0], :, :] # RGB-BGR 39 | img = img * 255 40 | mean = np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1) 41 | img = (img - mean).astype(np.float32, copy=True) 42 | return img 43 | 44 | 45 | def preprocess(img, min_size=600, max_size=1000): 46 | """Preprocess an image for feature extraction. 47 | 48 | The length of the shorter edge is scaled to :obj:`self.min_size`. 49 | After the scaling, if the length of the longer edge is longer than 50 | :param min_size: 51 | :obj:`self.max_size`, the image is scaled to fit the longer edge 52 | to :obj:`self.max_size`. 53 | After resizing the image, the image is subtracted by a mean image value 54 | :obj:`self.mean`. 55 | 56 | 预处理图像以进行特征提取。 57 | 较短边的长度缩放为:min_size。 58 | 缩放后,如果长边的长度比min_size或者max_size长,则长边的长度被缩放到max_size 59 | 调整图像大小后,图像减去平均图像值mean 60 | 61 | 图片进行缩放,使得长边小于等于1000,短边小于等于600(至少有一个等于) 62 | 63 | Args: 64 | img (~numpy.ndarray): An image. This is in CHW and RGB format. 65 | The range of its value is :math:`[0, 255]`. 66 | 67 | Returns: 68 | ~numpy.ndarray: A preprocessed image. 69 | 70 | """ 71 | C, H, W = img.shape 72 | scale1 = min_size / min(H, W) 73 | scale2 = max_size / max(H, W) 74 | scale = min(scale1, scale2) 75 | img = img / 255. 76 | img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect') 77 | # both the longer and shorter should be less than 78 | # max_size and min_size 79 | if opt.caffe_pretrain: 80 | normalize = caffe_normalize 81 | else: 82 | normalize = pytorch_normalze 83 | #调用上述方法对img进行规范化 84 | return normalize(img) 85 | 86 | 87 | class Transform(object): 88 | 89 | def __init__(self, min_size=600, max_size=1000): 90 | self.min_size = min_size 91 | self.max_size = max_size 92 | 93 | def __call__(self, in_data): 94 | img, bbox, label = in_data 95 | _, H, W = img.shape 96 | #调用上述方法进行缩放图像 97 | img = preprocess(img, self.min_size, self.max_size) 98 | _, o_H, o_W = img.shape 99 | scale = o_H / H 100 | #对图像对应的bbox也进行同等尺度的缩放 101 | bbox = util.resize_bbox(bbox, (H, W), (o_H, o_W)) 102 | 103 | # horizontally flip 104 | #水平翻转(对img和对应的bbox进行同等尺度的水平翻转)=============================只进行水平翻转 105 | img, params = util.random_flip( 106 | img, x_random=True, return_param=True) 107 | bbox = util.flip_bbox( 108 | bbox, (o_H, o_W), x_flip=params['x_flip']) 109 | 110 | return img, bbox, label, scale 111 | 112 | 113 | class Dataset: 114 | def __init__(self, opt): 115 | self.opt = opt 116 | #初始化VOCBboxDataset,传入 数据集地址 117 | #eg: /data/image/voc/VOCdevkit/VOC2007/ 118 | self.db = VOCBboxDataset(opt.voc_data_dir) 119 | #调用上述方法Transform(图像转化方式),进行初始化 120 | self.tsf = Transform(opt.min_size, opt.max_size) 121 | 122 | def __getitem__(self, idx): 123 | #得到原始img,检测框、标签、困难度 124 | ori_img, bbox, label, difficult = self.db.get_example(idx) 125 | #调用上述方法Transform,执行__call__方法。返回规范化后的img, bbox, label, 转化之后的比例scale 126 | img, bbox, label, scale = self.tsf((ori_img, bbox, label)) 127 | # TODO: check whose stride is negative to fix this instead copy all 128 | # some of the strides of a given numpy array are negative. 129 | 130 | return img.copy(), bbox.copy(), label.copy(), scale 131 | 132 | def __len__(self): 133 | return len(self.db) 134 | 135 | 136 | class TestDataset: 137 | 138 | def __init__(self, opt, split='test', use_difficult=True): 139 | self.opt = opt 140 | self.db = VOCBboxDataset(opt.voc_data_dir, split=split, use_difficult=use_difficult) 141 | 142 | def __getitem__(self, idx): 143 | ori_img, bbox, label, difficult = self.db.get_example(idx) 144 | img = preprocess(ori_img) 145 | return img, ori_img.shape[1:], bbox, label, difficult 146 | 147 | def __len__(self): 148 | return len(self.db) 149 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/data/voc_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | 4 | import numpy as np 5 | 6 | from .util import read_image 7 | 8 | 9 | class VOCBboxDataset: 10 | """Bounding box dataset for PASCAL `VOC`_. 11 | 边界框数据集 12 | 13 | .. _`VOC`: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/ 14 | 15 | The index corresponds to each image. 16 | 17 | When queried by an index, if :obj:`return_difficult == False`, 18 | this dataset returns a corresponding 19 | :obj:`img, bbox, label`, a tuple of an image, bounding boxes and labels. 20 | This is the default behaviour. 21 | If :obj:`return_difficult == True`, this dataset returns corresponding 22 | :obj:`img, bbox, label, difficult`. :obj:`difficult` is a boolean array 23 | that indicates whether bounding boxes are labeled as difficult or not. 24 | 25 | The bounding boxes are packed into a two dimensional tensor of shape 26 | :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in 27 | the image. The second axis represents attributes of the bounding box. 28 | They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the 29 | four attributes are coordinates of the top left and the bottom right 30 | vertices. 31 | 32 | The labels are packed into a one dimensional tensor of shape :math:`(R,)`. 33 | :math:`R` is the number of bounding boxes in the image. 34 | The class name of the label :math:`l` is :math:`l` th element of 35 | :obj:`VOC_BBOX_LABEL_NAMES`. 36 | 37 | The array :obj:`difficult` is a one dimensional boolean array of shape 38 | :math:`(R,)`. :math:`R` is the number of bounding boxes in the image. 39 | If :obj:`use_difficult` is :obj:`False`, this array is 40 | a boolean array with all :obj:`False`. 41 | 42 | The type of the image, the bounding boxes and the labels are as follows. 43 | 44 | * :obj:`img.dtype == numpy.float32` 45 | * :obj:`bbox.dtype == numpy.float32` 46 | * :obj:`label.dtype == numpy.int32` 47 | * :obj:`difficult.dtype == numpy.bool` 48 | 49 | Args: 50 | data_dir (string): Path to the root of the training data. 51 | i.e. "/data/image/voc/VOCdevkit/VOC2007/" 52 | split ({'train', 'val', 'trainval', 'test'}): Select a split of the 53 | dataset. :obj:`test` split is only available for 54 | 2007 dataset. 55 | year ({'2007', '2012'}): Use a dataset prepared for a challenge 56 | held in :obj:`year`. 57 | use_difficult (bool): If :obj:`True`, use images that are labeled as 58 | difficult in the original annotation. 59 | return_difficult (bool): If :obj:`True`, this dataset returns 60 | a boolean array 61 | that indicates whether bounding boxes are labeled as difficult 62 | or not. The default value is :obj:`False`. 63 | 64 | """ 65 | 66 | def __init__(self, data_dir, split='trainval', 67 | use_difficult=False, return_difficult=False, 68 | ): 69 | 70 | # if split not in ['train', 'trainval', 'val']: 71 | # if not (split == 'test' and year == '2007'): 72 | # warnings.warn( 73 | # 'please pick split from \'train\', \'trainval\', \'val\'' 74 | # 'for 2012 dataset. For 2007 dataset, you can pick \'test\'' 75 | # ' in addition to the above mentioned splits.' 76 | # ) 77 | id_list_file = os.path.join( 78 | data_dir, 'ImageSets/Main/{0}.txt'.format(split)) 79 | 80 | self.ids = [id_.strip() for id_ in open(id_list_file)] 81 | self.data_dir = data_dir 82 | self.use_difficult = use_difficult 83 | self.return_difficult = return_difficult 84 | #将voc标签名称赋值给VOCBboxDataset对象 85 | self.label_names = VOC_BBOX_LABEL_NAMES 86 | 87 | def __len__(self): 88 | return len(self.ids) 89 | #dataset用到该方法得到一张图片的各种信息 90 | def get_example(self, i): 91 | """Returns the i-th example. 92 | 93 | Returns a color image and bounding boxes. The image is in CHW format. 94 | The returned image is RGB. 95 | 返回彩色图像和bbox。图像大小为CHW(通道、高、宽),返回图像为RGB 96 | 97 | Args: 98 | i (int): The index of the example. 99 | 100 | Returns: 101 | tuple of an image and bounding boxes 102 | 103 | """ 104 | id_ = self.ids[i] 105 | #=======================================================================在这里读取路径后只拿最后一个\后面的内容 106 | anno = ET.parse( 107 | os.path.join(self.data_dir, 'Annotations', id_ + '.xml')) 108 | bbox = list() 109 | label = list() 110 | difficult = list() 111 | for obj in anno.findall('object'): 112 | # when in not using difficult split, and the object is difficult, skipt it. 113 | #在不使用困难分割时,对象是difficult,跳过它 114 | if not self.use_difficult and int(obj.find('difficult').text) == 1: 115 | continue 116 | 117 | difficult.append(int(obj.find('difficult').text)) 118 | bndbox_anno = obj.find('bndbox') 119 | # subtract 1 to make pixel indexes 0-based 120 | #减1 以使像素索引基于0 121 | bbox.append([ 122 | int(bndbox_anno.find(tag).text) - 1 123 | for tag in ('ymin', 'xmin', 'ymax', 'xmax')]) 124 | name = obj.find('name').text.lower().strip() 125 | label.append(VOC_BBOX_LABEL_NAMES.index(name)) 126 | bbox = np.stack(bbox).astype(np.float32) 127 | label = np.stack(label).astype(np.int32) 128 | # When `use_difficult==False`, all elements in `difficult` are False. 129 | difficult = np.array(difficult, dtype=np.bool).astype(np.uint8) # PyTorch don't support np.bool 130 | 131 | # Load a image 132 | img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg') 133 | img = read_image(img_file, color=True) 134 | 135 | # if self.return_difficult: 136 | # return img, bbox, label, difficult 137 | return img, bbox, label, difficult 138 | 139 | __getitem__ = get_example 140 | 141 | #标签名称 142 | VOC_BBOX_LABEL_NAMES = ( 143 | 'aeroplane', 144 | 'bicycle', 145 | 'bird', 146 | 'boat', 147 | 'bottle', 148 | 'bus', 149 | 'car', 150 | 'cat', 151 | 'chair', 152 | 'cow', 153 | 'diningtable', 154 | 'dog', 155 | 'horse', 156 | 'motorbike', 157 | 'person', 158 | 'pottedplant', 159 | 'sheep', 160 | 'sofa', 161 | 'train', 162 | 'tvmonitor') 163 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/misc/convert_caffe_pretrain.py: -------------------------------------------------------------------------------- 1 | # code from ruotian luo 2 | # https://github.com/ruotianluo/pytorch-faster-rcnn 3 | import torch 4 | from torch.utils.model_zoo import load_url 5 | from torchvision import models 6 | 7 | sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth") 8 | sd['classifier.0.weight'] = sd['classifier.1.weight'] 9 | sd['classifier.0.bias'] = sd['classifier.1.bias'] 10 | del sd['classifier.1.weight'] 11 | del sd['classifier.1.bias'] 12 | 13 | sd['classifier.3.weight'] = sd['classifier.4.weight'] 14 | sd['classifier.3.bias'] = sd['classifier.4.bias'] 15 | del sd['classifier.4.weight'] 16 | del sd['classifier.4.bias'] 17 | 18 | 19 | # speicify the path to save 20 | torch.save(sd, "vgg16_caffe.pth") -------------------------------------------------------------------------------- /FasterRcnn_pytorch/misc/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/misc/demo.jpg -------------------------------------------------------------------------------- /FasterRcnn_pytorch/misc/train_fast.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import ipdb 4 | import matplotlib 5 | from tqdm import tqdm 6 | 7 | from utils.config import opt 8 | from data.dataset import Dataset, TestDataset 9 | from model import FasterRCNNVGG16 10 | from torch.autograd import Variable 11 | from torch.utils import data as data_ 12 | from trainer import FasterRCNNTrainer 13 | from utils import array_tool as at 14 | from utils.vis_tool import visdom_bbox 15 | from utils.eval_tool import eval_detection_voc 16 | 17 | matplotlib.use('agg') 18 | 19 | def eval(dataloader, faster_rcnn, test_num=10000): 20 | pred_bboxes, pred_labels, pred_scores = list(), list(), list() 21 | gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 22 | for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(dataloader)): 23 | sizes = [sizes[0][0], sizes[1][0]] 24 | pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs, [sizes]) 25 | gt_bboxes += list(gt_bboxes_.numpy()) 26 | gt_labels += list(gt_labels_.numpy()) 27 | gt_difficults += list(gt_difficults_.numpy()) 28 | pred_bboxes += pred_bboxes_ 29 | pred_labels += pred_labels_ 30 | pred_scores += pred_scores_ 31 | if ii == test_num: break 32 | 33 | result = eval_detection_voc( 34 | pred_bboxes, pred_labels, pred_scores, 35 | gt_bboxes, gt_labels, gt_difficults, 36 | use_07_metric=True) 37 | return result 38 | 39 | 40 | def train(**kwargs): 41 | opt._parse(kwargs) 42 | 43 | dataset = Dataset(opt) 44 | print('load data') 45 | dataloader = data_.DataLoader(dataset, \ 46 | batch_size=1, \ 47 | shuffle=True, \ 48 | # pin_memory=True, 49 | num_workers=opt.num_workers) 50 | testset = TestDataset(opt) 51 | test_dataloader = data_.DataLoader(testset, 52 | batch_size=1, 53 | num_workers=2, 54 | shuffle=False, \ 55 | # pin_memory=True 56 | ) 57 | faster_rcnn = FasterRCNNVGG16() 58 | print('model construct completed') 59 | trainer = FasterRCNNTrainer(faster_rcnn).cuda() 60 | if opt.load_path: 61 | trainer.load(opt.load_path) 62 | print('load pretrained model from %s' % opt.load_path) 63 | 64 | trainer.vis.text(dataset.db.label_names, win='labels') 65 | best_map = 0 66 | for epoch in range(7): 67 | trainer.reset_meters() 68 | for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): 69 | scale = at.scalar(scale) 70 | img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() 71 | img, bbox, label = Variable(img), Variable(bbox), Variable(label) 72 | losses = trainer.train_step(img, bbox, label, scale) 73 | 74 | if (ii + 1) % opt.plot_every == 0: 75 | if os.path.exists(opt.debug_file): 76 | ipdb.set_trace() 77 | 78 | # plot loss 79 | trainer.vis.plot_many(trainer.get_meter_data()) 80 | 81 | # plot groud truth bboxes 82 | ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 83 | gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 84 | at.tonumpy(bbox_)[0], 85 | label_[0].numpy()) 86 | trainer.vis.img('gt_img', gt_img) 87 | 88 | # plot predicti bboxes 89 | _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) 90 | pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 91 | at.tonumpy(_bboxes[0]), 92 | at.tonumpy(_labels[0]).reshape(-1), 93 | at.tonumpy(_scores[0])) 94 | trainer.vis.img('pred_img', pred_img) 95 | 96 | # rpn confusion matrix(meter) 97 | trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') 98 | # roi confusion matrix 99 | trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) 100 | if epoch==4: 101 | trainer.faster_rcnn.scale_lr(opt.lr_decay) 102 | 103 | eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) 104 | print('eval_result') 105 | trainer.save(mAP=eval_result['map']) 106 | 107 | if __name__ == '__main__': 108 | import fire 109 | 110 | fire.Fire() 111 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_vgg16 import FasterRCNNVGG16 2 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/roi_module.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from string import Template 3 | 4 | import cupy, torch 5 | import cupy as cp 6 | import torch as t 7 | from torch.autograd import Function 8 | 9 | from model.utils.roi_cupy import kernel_backward, kernel_forward 10 | 11 | Stream = namedtuple('Stream', ['ptr']) 12 | 13 | 14 | @cupy.util.memoize(for_each_device=True) 15 | def load_kernel(kernel_name, code, **kwargs): 16 | cp.cuda.runtime.free(0) 17 | code = Template(code).substitute(**kwargs) 18 | kernel_code = cupy.cuda.compile_with_cache(code) 19 | return kernel_code.get_function(kernel_name) 20 | 21 | 22 | CUDA_NUM_THREADS = 1024 23 | 24 | 25 | def GET_BLOCKS(N, K=CUDA_NUM_THREADS): 26 | return (N + K - 1) // K 27 | 28 | 29 | class RoI(Function): 30 | """ 31 | NOTE:only CUDA-compatible 32 | """ 33 | 34 | def __init__(self, outh, outw, spatial_scale): 35 | self.forward_fn = load_kernel('roi_forward', kernel_forward) 36 | self.backward_fn = load_kernel('roi_backward', kernel_backward) 37 | self.outh, self.outw, self.spatial_scale = outh, outw, spatial_scale 38 | 39 | def forward(self, x, rois): 40 | # NOTE: MAKE SURE input is contiguous too 41 | x = x.contiguous() 42 | rois = rois.contiguous() 43 | self.in_size = B, C, H, W = x.size() 44 | self.N = N = rois.size(0) 45 | output = t.zeros(N, C, self.outh, self.outw).cuda() 46 | self.argmax_data = t.zeros(N, C, self.outh, self.outw).int().cuda() 47 | self.rois = rois 48 | args = [x.data_ptr(), rois.data_ptr(), 49 | output.data_ptr(), 50 | self.argmax_data.data_ptr(), 51 | self.spatial_scale, C, H, W, 52 | self.outh, self.outw, 53 | output.numel()] 54 | stream = Stream(ptr=torch.cuda.current_stream().cuda_stream) 55 | self.forward_fn(args=args, 56 | block=(CUDA_NUM_THREADS, 1, 1), 57 | grid=(GET_BLOCKS(output.numel()), 1, 1), 58 | stream=stream) 59 | return output 60 | 61 | def backward(self, grad_output): 62 | ##NOTE: IMPORTANT CONTIGUOUS 63 | # TODO: input 64 | grad_output = grad_output.contiguous() 65 | B, C, H, W = self.in_size 66 | grad_input = t.zeros(self.in_size).cuda() 67 | stream = Stream(ptr=torch.cuda.current_stream().cuda_stream) 68 | args = [grad_output.data_ptr(), 69 | self.argmax_data.data_ptr(), 70 | self.rois.data_ptr(), 71 | grad_input.data_ptr(), 72 | self.N, self.spatial_scale, C, H, W, self.outh, self.outw, 73 | grad_input.numel()] 74 | self.backward_fn(args=args, 75 | block=(CUDA_NUM_THREADS, 1, 1), 76 | grid=(GET_BLOCKS(grad_input.numel()), 1, 1), 77 | stream=stream 78 | ) 79 | return grad_input, None 80 | 81 | 82 | class RoIPooling2D(t.nn.Module): 83 | 84 | def __init__(self, outh, outw, spatial_scale): 85 | super(RoIPooling2D, self).__init__() 86 | self.RoI = RoI(outh, outw, spatial_scale) 87 | 88 | def forward(self, x, rois): 89 | return self.RoI(x, rois) 90 | 91 | 92 | def test_roi_module(): 93 | ## fake data### 94 | B, N, C, H, W, PH, PW = 2, 8, 4, 32, 32, 7, 7 95 | 96 | bottom_data = t.randn(B, C, H, W).cuda() 97 | bottom_rois = t.randn(N, 5) 98 | bottom_rois[:int(N / 2), 0] = 0 99 | bottom_rois[int(N / 2):, 0] = 1 100 | bottom_rois[:, 1:] = (t.rand(N, 4) * 100).float() 101 | bottom_rois = bottom_rois.cuda() 102 | spatial_scale = 1. / 16 103 | outh, outw = PH, PW 104 | 105 | # pytorch version 106 | module = RoIPooling2D(outh, outw, spatial_scale) 107 | x = t.autograd.Variable(bottom_data, requires_grad=True) 108 | rois = t.autograd.Variable(bottom_rois) 109 | output = module(x, rois) 110 | output.sum().backward() 111 | 112 | def t2c(variable): 113 | npa = variable.data.cpu().numpy() 114 | return cp.array(npa) 115 | 116 | def test_eq(variable, array, info): 117 | cc = cp.asnumpy(array) 118 | neq = (cc != variable.data.cpu().numpy()) 119 | assert neq.sum() == 0, 'test failed: %s' % info 120 | 121 | # chainer version,if you're going to run this 122 | # pip install chainer 123 | import chainer.functions as F 124 | from chainer import Variable 125 | x_cn = Variable(t2c(x)) 126 | 127 | o_cn = F.roi_pooling_2d(x_cn, t2c(rois), outh, outw, spatial_scale) 128 | test_eq(output, o_cn.array, 'forward') 129 | F.sum(o_cn).backward() 130 | test_eq(x.grad, x_cn.grad, 'backward') 131 | print('test pass') 132 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__init__.py -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from model.utils.nms.non_maximum_suppression import non_maximum_suppression -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post.pyx: -------------------------------------------------------------------------------- 1 | cimport numpy as np 2 | from libc.stdint cimport uint64_t 3 | 4 | import numpy as np 5 | 6 | def _nms_gpu_post(np.ndarray[np.uint64_t, ndim=1] mask, 7 | int n_bbox, 8 | int threads_per_block, 9 | int col_blocks 10 | ): 11 | cdef: 12 | int i, j, nblock, index 13 | uint64_t inblock 14 | int n_selection = 0 15 | uint64_t one_ull = 1 16 | np.ndarray[np.int32_t, ndim=1] selection 17 | np.ndarray[np.uint64_t, ndim=1] remv 18 | 19 | selection = np.zeros((n_bbox,), dtype=np.int32) 20 | remv = np.zeros((col_blocks,), dtype=np.uint64) 21 | 22 | for i in range(n_bbox): 23 | nblock = i // threads_per_block 24 | inblock = i % threads_per_block 25 | 26 | if not (remv[nblock] & one_ull << inblock): 27 | selection[n_selection] = i 28 | n_selection += 1 29 | 30 | index = i * col_blocks 31 | for j in range(nblock, col_blocks): 32 | remv[j] |= mask[index + j] 33 | return selection, n_selection 34 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post_py.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def _nms_gpu_post( mask, 5 | n_bbox, 6 | threads_per_block, 7 | col_blocks 8 | ): 9 | n_selection = 0 10 | one_ull = np.array([1],dtype=np.uint64) 11 | selection = np.zeros((n_bbox,), dtype=np.int32) 12 | remv = np.zeros((col_blocks,), dtype=np.uint64) 13 | 14 | for i in range(n_bbox): 15 | nblock = i // threads_per_block 16 | inblock = i % threads_per_block 17 | 18 | if not (remv[nblock] & one_ull << inblock): 19 | selection[n_selection] = i 20 | n_selection += 1 21 | 22 | index = i * col_blocks 23 | for j in range(nblock, col_blocks): 24 | remv[j] |= mask[index + j] 25 | return selection, n_selection 26 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/build.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])] 6 | setup( 7 | name="Hello pyx", 8 | cmdclass={'build_ext': build_ext}, 9 | ext_modules=ext_modules 10 | ) 11 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o -------------------------------------------------------------------------------- /FasterRcnn_pytorch/model/utils/roi_cupy.py: -------------------------------------------------------------------------------- 1 | kernel_forward = ''' 2 | extern "C" 3 | __global__ void roi_forward(const float* const bottom_data,const float* const bottom_rois, 4 | float* top_data, int* argmax_data, 5 | const double spatial_scale,const int channels,const int height, 6 | const int width, const int pooled_height, 7 | const int pooled_width,const int NN 8 | ){ 9 | 10 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 11 | if(idx>=NN) 12 | return; 13 | const int pw = idx % pooled_width; 14 | const int ph = (idx / pooled_width) % pooled_height; 15 | const int c = (idx / pooled_width / pooled_height) % channels; 16 | int num = idx / pooled_width / pooled_height / channels; 17 | const int roi_batch_ind = bottom_rois[num * 5 + 0]; 18 | const int roi_start_w = round(bottom_rois[num * 5 + 1] * spatial_scale); 19 | const int roi_start_h = round(bottom_rois[num * 5 + 2] * spatial_scale); 20 | const int roi_end_w = round(bottom_rois[num * 5 + 3] * spatial_scale); 21 | const int roi_end_h = round(bottom_rois[num * 5 + 4] * spatial_scale); 22 | // Force malformed ROIs to be 1x1 23 | const int roi_width = max(roi_end_w - roi_start_w + 1, 1); 24 | const int roi_height = max(roi_end_h - roi_start_h + 1, 1); 25 | const float bin_size_h = static_cast(roi_height) 26 | / static_cast(pooled_height); 27 | const float bin_size_w = static_cast(roi_width) 28 | / static_cast(pooled_width); 29 | 30 | int hstart = static_cast(floor(static_cast(ph) 31 | * bin_size_h)); 32 | int wstart = static_cast(floor(static_cast(pw) 33 | * bin_size_w)); 34 | int hend = static_cast(ceil(static_cast(ph + 1) 35 | * bin_size_h)); 36 | int wend = static_cast(ceil(static_cast(pw + 1) 37 | * bin_size_w)); 38 | 39 | // Add roi offsets and clip to input boundaries 40 | hstart = min(max(hstart + roi_start_h, 0), height); 41 | hend = min(max(hend + roi_start_h, 0), height); 42 | wstart = min(max(wstart + roi_start_w, 0), width); 43 | wend = min(max(wend + roi_start_w, 0), width); 44 | bool is_empty = (hend <= hstart) || (wend <= wstart); 45 | 46 | // Define an empty pooling region to be zero 47 | float maxval = is_empty ? 0 : -1E+37; 48 | // If nothing is pooled, argmax=-1 causes nothing to be backprop'd 49 | int maxidx = -1; 50 | const int data_offset = (roi_batch_ind * channels + c) * height * width; 51 | for (int h = hstart; h < hend; ++h) { 52 | for (int w = wstart; w < wend; ++w) { 53 | int bottom_index = h * width + w; 54 | if (bottom_data[data_offset + bottom_index] > maxval) { 55 | maxval = bottom_data[data_offset + bottom_index]; 56 | maxidx = bottom_index; 57 | } 58 | } 59 | } 60 | top_data[idx]=maxval; 61 | argmax_data[idx]=maxidx; 62 | } 63 | ''' 64 | kernel_backward = ''' 65 | extern "C" 66 | __global__ void roi_backward(const float* const top_diff, 67 | const int* const argmax_data,const float* const bottom_rois, 68 | float* bottom_diff, const int num_rois, 69 | const double spatial_scale, int channels, 70 | int height, int width, int pooled_height, 71 | int pooled_width,const int NN) 72 | { 73 | 74 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 75 | ////Importtan >= instead of > 76 | if(idx>=NN) 77 | return; 78 | int w = idx % width; 79 | int h = (idx / width) % height; 80 | int c = (idx/ (width * height)) % channels; 81 | int num = idx / (width * height * channels); 82 | 83 | float gradient = 0; 84 | // Accumulate gradient over all ROIs that pooled this element 85 | for (int roi_n = 0; roi_n < num_rois; ++roi_n) { 86 | // Skip if ROI's batch index doesn't match num 87 | if (num != static_cast(bottom_rois[roi_n * 5])) { 88 | continue; 89 | } 90 | 91 | int roi_start_w = round(bottom_rois[roi_n * 5 + 1] 92 | * spatial_scale); 93 | int roi_start_h = round(bottom_rois[roi_n * 5 + 2] 94 | * spatial_scale); 95 | int roi_end_w = round(bottom_rois[roi_n * 5 + 3] 96 | * spatial_scale); 97 | int roi_end_h = round(bottom_rois[roi_n * 5 + 4] 98 | * spatial_scale); 99 | 100 | // Skip if ROI doesn't include (h, w) 101 | const bool in_roi = (w >= roi_start_w && w <= roi_end_w && 102 | h >= roi_start_h && h <= roi_end_h); 103 | if (!in_roi) { 104 | continue; 105 | } 106 | 107 | int offset = (roi_n * channels + c) * pooled_height 108 | * pooled_width; 109 | 110 | // Compute feasible set of pooled units that could have pooled 111 | // this bottom unit 112 | 113 | // Force malformed ROIs to be 1x1 114 | int roi_width = max(roi_end_w - roi_start_w + 1, 1); 115 | int roi_height = max(roi_end_h - roi_start_h + 1, 1); 116 | 117 | float bin_size_h = static_cast(roi_height) 118 | / static_cast(pooled_height); 119 | float bin_size_w = static_cast(roi_width) 120 | / static_cast(pooled_width); 121 | 122 | int phstart = floor(static_cast(h - roi_start_h) 123 | / bin_size_h); 124 | int phend = ceil(static_cast(h - roi_start_h + 1) 125 | / bin_size_h); 126 | int pwstart = floor(static_cast(w - roi_start_w) 127 | / bin_size_w); 128 | int pwend = ceil(static_cast(w - roi_start_w + 1) 129 | / bin_size_w); 130 | 131 | phstart = min(max(phstart, 0), pooled_height); 132 | phend = min(max(phend, 0), pooled_height); 133 | pwstart = min(max(pwstart, 0), pooled_width); 134 | pwend = min(max(pwend, 0), pooled_width); 135 | for (int ph = phstart; ph < phend; ++ph) { 136 | for (int pw = pwstart; pw < pwend; ++pw) { 137 | int index_ = ph * pooled_width + pw + offset; 138 | if (argmax_data[index_] == (h * width + w)) { 139 | gradient += top_diff[index_]; 140 | } 141 | } 142 | } 143 | } 144 | bottom_diff[idx] = gradient; 145 | } 146 | ''' 147 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-image 2 | pprint 3 | cython 4 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 cy 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/array_tool.py: -------------------------------------------------------------------------------- 1 | """ 2 | tools to convert specified type 3 | """ 4 | import torch as t 5 | import numpy as np 6 | 7 | 8 | def tonumpy(data): 9 | if isinstance(data, np.ndarray): 10 | return data 11 | if isinstance(data, t._TensorBase): 12 | return data.cpu().numpy() 13 | if isinstance(data, t.autograd.Variable): 14 | return tonumpy(data.data) 15 | 16 | 17 | def totensor(data, cuda=True): 18 | if isinstance(data, np.ndarray): 19 | tensor = t.from_numpy(data) 20 | if isinstance(data, t._TensorBase): 21 | tensor = data 22 | if isinstance(data, t.autograd.Variable): 23 | tensor = data.data 24 | if cuda: 25 | tensor = tensor.cuda() 26 | return tensor 27 | 28 | 29 | def tovariable(data): 30 | if isinstance(data, np.ndarray): 31 | return tovariable(totensor(data)) 32 | if isinstance(data, t._TensorBase): 33 | return t.autograd.Variable(data) 34 | if isinstance(data, t.autograd.Variable): 35 | return data 36 | else: 37 | raise ValueError("UnKnow data type: %s, input should be {np.ndarray,Tensor,Variable}" %type(data)) 38 | 39 | 40 | def scalar(data): 41 | if isinstance(data, np.ndarray): 42 | return data.reshape(1)[0] 43 | if isinstance(data, t._TensorBase): 44 | return data.view(1)[0] 45 | if isinstance(data, t.autograd.Variable): 46 | return data.data.view(1)[0] 47 | -------------------------------------------------------------------------------- /FasterRcnn_pytorch/utils/config.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | 3 | 4 | # Default Configs for training 5 | # NOTE that, config items could be overwriten by passing argument through command line. 6 | # e.g. --voc-data-dir='./data/' 7 | 8 | class Config: 9 | # data 10 | #更改_bobo 11 | voc_data_dir = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/VOCdevkit/Pascal VOC2007/VOCdevkit/VOC2007' 12 | min_size = 600 # image resize 13 | max_size = 1000 # image resize 14 | num_workers = 8 15 | test_num_workers = 8 16 | 17 | # sigma for l1_smooth_loss 18 | rpn_sigma = 3. 19 | roi_sigma = 1. 20 | 21 | # param for optimizer 22 | # 0.0005 in origin paper but 0.0001 in tf-faster-rcnn 23 | weight_decay = 0.0005 24 | lr_decay = 0.1 # 1e-3 -> 1e-4 25 | lr = 1e-3 26 | 27 | 28 | # visualization 29 | env = 'faster-rcnn' # visdom env 30 | port = 8097 31 | plot_every = 40 # vis every N iter 32 | 33 | # preset 34 | data = 'voc' 35 | pretrained_model = 'vgg16' 36 | 37 | # training 38 | epoch = 14 39 | 40 | 41 | use_adam = False # Use Adam optimizer 42 | use_chainer = False # try match everything as chainer 43 | use_drop = False # use dropout in RoIHead 44 | # debug 45 | debug_file = '/tmp/debugf' 46 | 47 | test_num = 10000 48 | # model 49 | load_path = None 50 | 51 | #caffe_pretrain = False # use caffe pretrained model instead of torchvision 52 | caffe_pretrain = True # use caffe pretrained model instead of torchvision 53 | caffe_pretrain_path = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/simplefasterrcnnpytorchmaster/checkpoints/vgg16_caffe.pth' 54 | 55 | 56 | def _parse(self, kwargs): 57 | state_dict = self._state_dict() 58 | for k, v in kwargs.items(): 59 | if k not in state_dict: 60 | raise ValueError('UnKnown Option: "--%s"' % k) 61 | setattr(self, k, v) 62 | 63 | print('======user config========') 64 | pprint(self._state_dict()) 65 | print('==========end============') 66 | 67 | def _state_dict(self): 68 | return {k: getattr(self, k) for k, _ in Config.__dict__.items() \ 69 | if not k.startswith('_')} 70 | 71 | 72 | opt = Config() 73 | -------------------------------------------------------------------------------- /GhostNet/G-Ghost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/GhostNet/G-Ghost.png -------------------------------------------------------------------------------- /GhostNet/readme.md: -------------------------------------------------------------------------------- 1 | # GhostNets on Heterogeneous Devices via Cheap Operations 2 | 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub) 4 | 5 | # 说明 6 | 7 | - CVPR2020 C-GhostNet 8 | - IJCV 2022 G-GhostNet [官方库](https://github.com/huawei-noah/Efficient-AI-Backbones) [原作知乎解读](https://zhuanlan.zhihu.com/p/540547718) 9 | 10 | ## 简读 11 | 12 | | 主题 | 描述 | 13 | | ---- | :----------------------------------------------------------- | 14 | | 问题 | 问题1:深度可分离卷积(逐通道卷积+逐点卷积)、通道打乱等复杂操作在GPU下并行度不高,造成耗时。
问题2:观察到stage级别内部特征存在冗余。 | 15 | | 解决 | 问题1:仅采用普通卷积/分组卷积,加速GPU并行
问题2:在stage级别应用Ghost形式,用"便宜操作"生成冗余特征。 | 16 | | 实现 | C-Ghost: 卷积级别特征冗余,代替原来的一个普通卷积
G-Ghost: stage级别特征冗余,以代替原来的一个stage网络结构 | 17 | 18 | # 具体实现 19 | 20 | 21 | 核心方法`Stage`,结构图如下 22 | 23 | ![](G-Ghost.png) 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Max deGroot, Ellis Brown 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /RepVGG/readme.md: -------------------------------------------------------------------------------- 1 | # RepVGG: Making VGG-style ConvNets Great Again 2 | 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub) 4 | 5 | 6 | 7 | # 说明 8 | 9 | - CVPR-2021 [官方库](https://github.com/DingXiaoH/RepVGG) [原作知乎解读](https://zhuanlan.zhihu.com/p/344324470) 10 | 11 | - 本仓库仅提取网络定义部分,用以分析。 12 | 13 | - 应用广泛 14 | 15 | > 1. [yolov6](https://zhuanlan.zhihu.com/p/533127196)、[yolov7](https://arxiv.org/abs/2207.02696)等:设计结构重参数化的网络进行训练,推理时转化为等价的简单结构,加速推理。 16 | >2. [MNN线性超参数化工具](https://www.yuque.com/mnn/cn/ph6021):设计小模型 --> 训练线性过参数化大模型 --> 转换等价小模型推理 17 | 18 | ## 简读 19 | 20 | | | | 21 | | -------- | ------------------------------------------------------------ | 22 | | 问题 | 问题1:resnet等多分支结构,造成内存消耗,推理速度下降。
问题2:depthwise等复杂操作,造成内存消耗,硬件支持差。 | 23 | | 创新点 | RepVGG是VGG结构,快速、省内存、灵活
解决问题1:推理时无分支
解决问题2:仅包含3x3conv+ReLU的VGG style结构,无复杂操作 | 24 | | 具体实现 | 解耦训练和推理的网络结构。
训练:多分支提升性能
推理:结构重参数化,转为等价的VGG style结构。 | 25 | 26 | # 具体实现 27 | 28 | 核心方法`get_equivalent_kernel_bias` 29 | 30 | 1. 融合BN 31 | 32 | `conv layer + BN layer `--> `conv layer` 33 | 34 | - [详解BN层](https://blog.csdn.net/ECNU_LZJ/article/details/104203604) 35 | 36 | - [卷积与BN层融合公式](https://blog.csdn.net/oYeZhou/article/details/112802348) 37 | 38 | 2. 其余分支转为3x3卷积 39 | 40 | 1x1conv -> 3x3conv 41 | 42 | bn->3x3conv 43 | 44 | 3. 三个分支合并 45 | 46 | `conv(x, W1) + conv(x, W2) + conv(x, W3) = conv(x, W1+W2+W3))` 47 | 48 | ![](repvgg.png) 49 | 50 | -------------------------------------------------------------------------------- /RepVGG/repvgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/RepVGG/repvgg.png -------------------------------------------------------------------------------- /SSD_pytorch/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/checkpoint/.gitkeep -------------------------------------------------------------------------------- /SSD_pytorch/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT 2 | import torch 3 | import cv2 4 | import numpy as np 5 | 6 | def detection_collate(batch): 7 | """Custom collate fn for dealing with batches of images that have a different 8 | number of associated object annotations (bounding boxes). 9 | 10 | Arguments: 11 | batch: (tuple) A tuple of tensor images and lists of annotations 12 | 13 | Return: 14 | A tuple containing: 15 | 1) (tensor) batch of images stacked on their 0 dim 16 | 2) (list of tensors) annotations for a given image are stacked on 17 | 0 dim 18 | """ 19 | targets = [] 20 | imgs = [] 21 | for sample in batch: 22 | imgs.append(sample[0]) 23 | targets.append(torch.FloatTensor(sample[1])) 24 | return torch.stack(imgs, 0), targets 25 | 26 | 27 | def base_transform(image, size, mean): 28 | x = cv2.resize(image, (size, size)).astype(np.float32) 29 | x -= mean 30 | x = x.astype(np.float32) 31 | return x 32 | 33 | 34 | class BaseTransform: 35 | def __init__(self, size, mean): 36 | self.size = size 37 | self.mean = np.array(mean, dtype=np.float32) 38 | 39 | def __call__(self, image, boxes=None, labels=None): 40 | return base_transform(image, self.size, self.mean), boxes, labels 41 | -------------------------------------------------------------------------------- /SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ..box_utils import decode, nms 4 | from SSD_pytorch.utils.config import opt 5 | 6 | 7 | class Detect(Function): 8 | """At test time, Detect is the final layer of SSD. Decode location preds, 9 | apply non-maximum suppression to location predictions based on conf 10 | scores and threshold to a top_k number of output predictions for both 11 | confidence score and locations. 12 | """ 13 | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): 14 | self.num_classes = num_classes 15 | self.background_label = bkg_label 16 | self.top_k = top_k 17 | # Parameters used in nms. 18 | self.nms_thresh = nms_thresh 19 | if nms_thresh <= 0: 20 | raise ValueError('nms_threshold must be non negative.') 21 | self.conf_thresh = conf_thresh 22 | self.variance = opt.voc['variance'] 23 | 24 | def forward(self, loc_data, conf_data, prior_data): 25 | """ 26 | Args: 27 | loc_data: (tensor) Loc preds from loc layers 28 | Shape: [batch,num_priors*4] 29 | conf_data: (tensor) Shape: Conf preds from conf layers 30 | Shape: [batch*num_priors,num_classes] 31 | prior_data: (tensor) Prior boxes and variances from priorbox layers 32 | Shape: [1,num_priors,4] 33 | """ 34 | num = loc_data.size(0) # batch size 35 | num_priors = prior_data.size(0) 36 | output = torch.zeros(num, self.num_classes, self.top_k, 5) 37 | conf_preds = conf_data.view(num, num_priors, 38 | self.num_classes).transpose(2, 1) 39 | 40 | # Decode predictions into bboxes. 41 | for i in range(num): 42 | decoded_boxes = decode(loc_data[i], prior_data, self.variance) 43 | # For each class, perform nms 44 | conf_scores = conf_preds[i].clone() 45 | 46 | for cl in range(1, self.num_classes): 47 | c_mask = conf_scores[cl].gt(self.conf_thresh) 48 | scores = conf_scores[cl][c_mask] 49 | if scores.dim() == 0: 50 | continue 51 | l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 52 | boxes = decoded_boxes[l_mask].view(-1, 4) 53 | # idx of highest scoring and non-overlapping boxes per class 54 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 55 | output[i, cl, :count] = \ 56 | torch.cat((scores[ids[:count]].unsqueeze(1), 57 | boxes[ids[:count]]), 1) 58 | flt = output.contiguous().view(num, -1, 5) 59 | _, idx = flt[:, :, 0].sort(1, descending=True) 60 | _, rank = idx.sort(1) 61 | flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) 62 | return output 63 | -------------------------------------------------------------------------------- /SSD_pytorch/models/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from math import sqrt as sqrt 3 | from itertools import product as product 4 | import torch 5 | 6 | 7 | class PriorBox(object): 8 | """Compute priorbox coordinates in center-offset form for each source 9 | feature map. 10 | 对于每个feature map,生成预测框(中心坐标及偏移量) 11 | """ 12 | def __init__(self, cfg): 13 | super(PriorBox, self).__init__() 14 | # 300 15 | self.image_size = cfg['min_dim'] 16 | # number of priors for feature map location (either 4 or 6) 17 | # 每个网格的预测框数目 (4 or 6) 18 | self.num_priors = len(cfg['aspect_ratios']) 19 | #方差 20 | self.variance = cfg['variance'] or [0.1] 21 | # 值为[38, 19, 10, 5, 3, 1] 即feature map的尺寸大小 22 | self.feature_maps = cfg['feature_maps'] 23 | # s_k 表示先验框大小相对于图片的比例,而 s_{min} 和 s_{max} 表示比例的最小值与最大值 24 | # min_sizes和max_sizes用来计算s_k,s_k_prime,以便计算 长宽比为1时的两个w.h 25 | # 各个特征图的先验框尺度 [30, 60, 111, 162, 213, 264] 26 | self.min_sizes = cfg['min_sizes'] 27 | # [60, 111, 162, 213, 264, 315] 28 | self.max_sizes = cfg['max_sizes'] 29 | # 感受野大小,即相对于原图的缩小倍数 30 | self.steps = cfg['steps'] 31 | # 纵横比[[2], [2, 3], [2, 3], [2, 3], [2], [2]] 32 | self.aspect_ratios = cfg['aspect_ratios'] 33 | # True 34 | self.clip = cfg['clip'] 35 | # VOC 36 | self.version = cfg['name'] 37 | for v in self.variance: 38 | if v <= 0: 39 | raise ValueError('Variances must be greater than 0') 40 | 41 | def forward(self): 42 | # mean 是保存预测框的列表 43 | mean = [] 44 | # 遍历不同feature map的尺寸大小 45 | for k, f in enumerate(self.feature_maps): 46 | # product用于求多个可迭代对象的笛卡尔积,它跟嵌套的 for 循环等价 47 | # repeat用于指定重复生成序列的次数。 48 | # 参考:http://funhacks.net/2017/02/13/itertools/ 49 | # 即若f为2,则i,j取值为00,01,10,11。即遍历每一个可能 50 | 51 | # 当k=0,f=38时,range(f)的值为(0,1,...,37)则product(range(f), repeat=2)的所有取值为(0,0)(0,1)...直到(37,0),,,(37,37) 52 | # 遍历一个feature map上的每一个网格 53 | for i, j in product(range(f), repeat=2): 54 | # fk 是第 k 个 feature map 的大小 55 | #image_size=300 steps为每层feature maps的感受野 56 | f_k = self.image_size / self.steps[k] 57 | # 单位中心unit center x,y 58 | # 每一个网格的中心,设置为:(i+0.5|fk|,j+0.5|fk|),其中,|fk| 是第 k 个 feature map 的大小,同时,i,j∈[0,|fk|) 59 | cx = (j + 0.5) / f_k 60 | cy = (i + 0.5) / f_k 61 | 62 | 63 | # 总体上:先添加长宽比为1的两个w、h(比较特殊),再通过循环添加其他长宽比 64 | # 长宽比aspect_ratio: 1 65 | # 真实大小rel size: min_size 66 | # 先验框大小相对于图片的比例 67 | #计算s_k 是为了求解w、h 68 | s_k = self.min_sizes[k]/self.image_size 69 | # 由于长宽比为1,则w=s_k h=s_k 70 | mean += [cx, cy, s_k, s_k] 71 | 72 | # 对于 aspect ratio 为 1 时,还增加了一个 default box长宽比aspect_ratio: 1 73 | # rel size: sqrt(s_k * s_(k+1)) 74 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size)) 75 | # 由于长宽比为1,则w=s_k_prime h=s_k_prime 76 | mean += [cx, cy, s_k_prime, s_k_prime] 77 | 78 | # 其余的长宽比 79 | for ar in self.aspect_ratios[k]: 80 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)] 81 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)] 82 | # 将mean的list转化为tensor 83 | output = torch.Tensor(mean).view(-1, 4) 84 | 85 | # clip:True 将输入input张量每个元素的夹紧到区间 [min,max],并返回结果到一个新张量 86 | # 操作为 如果元素>max,则置为max。min类似 87 | if self.clip: 88 | output.clamp_(max=1, min=0) 89 | return output 90 | -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2norm import L2Norm 2 | from .multibox_loss import MultiBoxLoss 3 | 4 | __all__ = ['L2Norm', 'MultiBoxLoss'] 5 | -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/init_weights.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.init as init 3 | ''' 4 | 使用xavier方法来初始化vgg后面的新增层、loc用于回归层、conf用于分类层 的权重 5 | ''' 6 | def xavier(param): 7 | ''' 8 | 使用xavier算法初始化新增层的权重 9 | ''' 10 | init.xavier_uniform(param) 11 | 12 | 13 | def weights_init(m): 14 | if isinstance(m, nn.Conv2d): 15 | xavier(m.weight.data) 16 | m.bias.data.zero_() -------------------------------------------------------------------------------- /SSD_pytorch/models/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | class L2Norm(nn.Module): 8 | def __init__(self,n_channels, scale): 9 | super(L2Norm,self).__init__() 10 | self.n_channels = n_channels 11 | self.gamma = scale or None 12 | self.eps = 1e-10 13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | init.constant(self.weight,self.gamma) 18 | 19 | def forward(self, x): 20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 21 | #x /= norm 22 | x = torch.div(x,norm) 23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 24 | return out 25 | -------------------------------------------------------------------------------- /SSD_pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # 重构SSD代码实现 2 | 3 | ---------- 4 | 5 | 该仓库基于[Max deGroot](https://github.com/amdegroot)与[Ellis Brown](https://github.com/ellisbrown)的[ssd.pytorch](https://github.com/amdegroot/ssd.pytorch)进行的,非常感谢他们无私的奉献。 6 | 7 | 8 | - [原地址](https://github.com/amdegroot/ssd.pytorch) 9 | - [原地址的加注释版本](https://github.com/bobo0810/pytorchSSD) 10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/SSD_pytorch) 强烈推荐!(即本仓库) 11 | 12 | ---------- 13 | 14 | # 目前支持: 15 | 16 | - 数据集:原作者支持VOC、COCO,该仓库仅支持VOC,如果有时间,考虑将COCO加上。 17 | - 网络:支持SSD300 18 | 19 | # 原因: 20 | 21 | 大牛们写代码果然不拘小节,结构混乱依然不影响他们这么优秀。强迫症犯了,一周时间理解源码,一天内重构完成。哇,世界清爽了~ 22 | 23 | ###### 注:该项目功能上并未进行任何修改,仅做重构,用于理解。 24 | 25 | 26 | # 相比原作者的特点: 27 | 28 | - 所有参数均可在config.py中设置 29 | - 重新整理结构,并加入大量代码注释 30 | 31 | ### 环境: 32 | 33 | | python版本 | pytorch版本 | 34 | |------------|-------------| 35 | | 3.5 | 0.3.0 | 36 | 37 | ---------- 38 | 39 | # 一般项目结构 40 | 41 | 1、定义网络 42 | 43 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/16409622.jpg) 44 | 45 | 2、封装数据集 46 | 47 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/38894621.jpg) 48 | 49 | 3、工具类 50 | 51 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/98583532.jpg) 52 | 53 | 4、主函数 54 | 55 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/32257225.jpg) 56 | 57 | 58 | ---------- 59 | 60 | # SSD网络结构 61 | 62 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/78466722.jpg) 63 | 64 | - vgg16网络结构 65 | 66 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/87243004.jpg) 67 | 68 | ---------- 69 | 70 | # 准备数据集: 71 | 下载VOC2007和VOC2012的数据集,并在utils/config.py中的voc_data_root配置数据集的根目录。 72 | ``` 73 | VOCdevkit 74 | │ 75 | └───VOC2007 76 | │ │ JPEGImages 77 | │ │ ImageSets 78 | │ │ Annotations 79 | │ │ ... 80 | │ 81 | └───VOC2012 82 | │ │ JPEGImages 83 | │ │ ImageSets 84 | │ │ Annotations 85 | │ │ ... 86 | ``` 87 | 88 | ---------- 89 | 90 | # Trian: 91 | 92 | 作用:使用VOC2007和2012的训练集+验证集 开始训练 93 | 94 | 1、开启Visdom(类似TnsorFlow的tensorboard,可视化工具) 95 | ``` 96 | # First install Python server and client 97 | pip install visdom 98 | # Start the server 99 | python -m visdom.server 100 | ``` 101 | 2、下载SSD的基础网络VGG16(去掉fc层) 102 | 103 | 下载地址:[vgg16_reducedfc.pth](https://pan.baidu.com/s/19Iumt072GMiFGlS5lVNy1Q) 104 | 105 | 下载完成后将其放置在checkpoint文件夹下即可。也可通过配置config.py中basenet的路径。 106 | 107 | 3、开始训练 108 | 109 | 在main.py中将train()注释取消,其他方法注释掉,即可运行。 110 | 111 | ---------- 112 | 113 | # Eval: 114 | 115 | 作用:VOC2007测试集,计算各分类AP及mAP 116 | 117 | 1、在config.py中load_model_path配置预训练模型的路径 118 | 119 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA) 120 | 121 | 122 | 2、在main.py中将eval()注释取消,其他方法注释掉,即可运行。 123 | 124 | ---------- 125 | 126 | # Test: 127 | 128 | 功能:VOC2007测试集,将预测结果写入txt 129 | 130 | 1、在config.py中load_model_path配置预训练模型的路径 131 | 132 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA) 133 | 134 | 2、在main.py中将test()注释取消,其他方法注释掉,即可运行。 135 | 136 |

结果

137 |
138 | 图片说明 139 |
140 | 141 | ---------- 142 | 143 | # Predict: 144 | 145 | 功能:可视化一张预测图片 146 | 147 | 1、在config.py中load_model_path配置预训练模型的路径 148 | 149 | 预训练模型下载:[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA) 150 | 2、在main.py中将predict()注释取消,其他方法注释掉,即可运行。 151 | 152 | 153 |

原图

154 |
155 | 图片说明 156 |
157 | 158 | 159 |

预处理之后的图像

160 |
161 | 图片说明 162 |
163 | 164 | 165 | 166 |

预测结果

167 |
168 | 图片说明 169 |
170 | 171 | ---------- 172 | 173 | # 关于作者 174 | 175 | - 原作者 [Max deGroot](https://github.com/amdegroot)、[Ellis Brown](https://github.com/ellisbrown) 176 | 177 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810) -------------------------------------------------------------------------------- /SSD_pytorch/temp/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/temp/test.png -------------------------------------------------------------------------------- /SSD_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .augmentations import SSDAugmentation -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc -------------------------------------------------------------------------------- /SSD_pytorch/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | # 设置默认参数 4 | import os.path 5 | class DefaultConfig(): 6 | env = 'SSD_' # visdom 环境的名字 7 | visdom=True # 是否可视化 8 | # 目前支持的网络 9 | model = 'vgg16' 10 | 11 | 12 | voc_data_root='/home/bobo/data/VOCdevkit/' # VOC数据集根目录,该文件夹下有两个子文件夹。一个叫VOC2007,一个叫VOC2012 13 | 14 | # 基础网络,即特征提取网络(去掉全连接的预训练模型vgg16) 15 | basenet='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/vgg16_reducedfc.pth' #应为全路径 预训练好的去掉全连接层的vgg16模型 16 | batch_size = 32 # 训练集的batch size 17 | start_iter=0 #训练从第几个item开始 18 | num_workers = 4 # 加载数据时的线程数 19 | use_gpu = True # user GPU or not 20 | lr = 0.001 # 初始的学习率 21 | momentum=0.9 #优化器的动量值 22 | weight_decay=5e-4 #随机梯度下降SGD的权重衰减 23 | gamma=0.1 # Gamma update for SGD 学习率调整参数 24 | 25 | checkpoint_root ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/' #保存模型的目录 26 | # load_model_path = None # 加载预训练的模型的路径,为None代表不加载 27 | load_model_path ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/ssd300_COCO_100000.pth' 28 | # load_model_path='C:\\Users\\Administrator\\Desktop\\ssd300_COCO_10000.pth' 29 | 30 | 31 | # gets home dir cross platform 32 | HOME = os.path.expanduser("~") 33 | # 使边界框漂亮 34 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 35 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 36 | MEANS = (104, 117, 123) 37 | # SSD300 配置 38 | voc = { 39 | 'num_classes': 21, # 分类类别20+背景1 40 | 'lr_steps': (80000, 100000, 120000), 41 | 'max_iter': 120000, # 迭代次数 42 | 'feature_maps': [38, 19, 10, 5, 3, 1], 43 | 'min_dim': 300, # 当前SSD300只支持大小300×300的数据集训练 44 | 'steps': [8, 16, 32, 64, 100, 300], # 感受野,相对于原图缩小的倍数 45 | 'min_sizes': [30, 60, 111, 162, 213, 264], 46 | 'max_sizes': [60, 111, 162, 213, 264, 315], 47 | 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 48 | 'variance': [0.1, 0.2], # 方差 49 | 'clip': True, 50 | 'name': 'VOC', 51 | } 52 | 53 | # 验证 54 | confidence_threshold=0.01 # 检测置信度阈值 or 0.05 55 | top_k=5 # 进一步限制要解析的预测数量 56 | cleanup=True # 清除并删除eval后的结果文件 57 | temp= '/home/bobo/windowsPycharmProject/SSD_pytorch/temp' #保存验证的临时文件 58 | annopath = os.path.join(voc_data_root, 'VOC2007', 'Annotations', '%s.xml') 59 | imgpath = os.path.join(voc_data_root, 'VOC2007', 'JPEGImages', '%s.jpg') 60 | imgsetpath = os.path.join(voc_data_root, 'VOC2007', 'ImageSets', 61 | 'Main', '{:s}.txt') 62 | 63 | #测试 64 | temp_test='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/' # 保存测试集(VOC2007测试集)的网络预测结果 65 | 66 | #预测,可视化一张预测图片 67 | test_img='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/test.png' 68 | 69 | 70 | #初始化该类的一个对象 71 | opt=DefaultConfig() -------------------------------------------------------------------------------- /SSD_pytorch/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | class Timer(object): 3 | """A simple timer.""" 4 | def __init__(self): 5 | self.total_time = 0. 6 | self.calls = 0 7 | self.start_time = 0. 8 | self.diff = 0. 9 | self.average_time = 0. 10 | 11 | def tic(self): 12 | # using time.time instead of time.clock because time time.clock 13 | # does not normalize for multithreading 14 | self.start_time = time.time() 15 | 16 | def toc(self, average=True): 17 | self.diff = time.time() - self.start_time 18 | self.total_time += self.diff 19 | self.calls += 1 20 | self.average_time = self.total_time / self.calls 21 | if average: 22 | return self.average_time 23 | else: 24 | return self.diff -------------------------------------------------------------------------------- /SSD_pytorch/utils/visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # power by Mr.Li 4 | import visdom 5 | import time 6 | import numpy as np 7 | import torch 8 | class Visualizer(object): 9 | ''' 10 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function` 11 | 调用原生的visdom接口 12 | ''' 13 | def __init__(self, env='default', **kwargs): 14 | self.vis = visdom.Visdom(env=env, **kwargs) 15 | # 画的第几个数,相当于横座标 16 | # 保存(’loss',23) 即loss的第23个点 17 | self.index = {} 18 | self.log_text = '' 19 | def reinit(self,env='default',**kwargs): 20 | ''' 21 | 修改visdom的配置 重新初始化 22 | ''' 23 | self.vis = visdom.Visdom(env=env,**kwargs) 24 | return self 25 | def plot_many(self, d): 26 | ''' 27 | 一次plot多个损失图形 28 | @params d: dict (name,value) i.e. ('loss',0.11) 29 | ''' 30 | for k, v in d.items(): 31 | self.plot(k, v) 32 | def img_many(self, d): 33 | ''' 34 | 一次画多个图像 35 | ''' 36 | for k, v in d.items(): 37 | self.img(k, v) 38 | def plot(self, name, y,**kwargs): 39 | ''' 40 | self.plot('loss',1.00) 41 | ''' 42 | #得到下标序号 43 | x = self.index.get(name, 0) 44 | self.vis.line(Y=np.array([y]), X=np.array([x]), 45 | win=name,#窗口名 46 | opts=dict(title=name), 47 | update=None if x == 0 else 'append', #按照append的画图形 48 | **kwargs 49 | ) 50 | #下标累加1 51 | self.index[name] = x + 1 52 | def img(self, name, img_,**kwargs): 53 | ''' 54 | self.img('input_img',t.Tensor(64,64)) 55 | self.img('input_imgs',t.Tensor(3,64,64)) 56 | self.img('input_imgs',t.Tensor(100,1,64,64)) 57 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10) 58 | 59 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!! 60 | ''' 61 | self.vis.images(img_.cpu().numpy(), 62 | win=name, 63 | opts=dict(title=name), 64 | **kwargs 65 | ) 66 | def log(self,info,win='log_text'): 67 | ''' 68 | self.log({'loss':1,'lr':0.0001}) 69 | 打印日志 70 | ''' 71 | 72 | self.log_text += ('[{time}] {info}
'.format( 73 | time=time.strftime('%m%d_%H%M%S'),\ 74 | info=info)) 75 | self.vis.text(self.log_text,win) 76 | def __getattr__(self, name): 77 | return getattr(self.vis, name) 78 | 79 | def create_vis_plot(self,_xlabel, _ylabel, _title, _legend): 80 | viz = visdom.Visdom() 81 | ''' 82 | 新增可视化图形 83 | ''' 84 | return viz.line( 85 | X=torch.zeros((1,)).cpu(), 86 | Y=torch.zeros((1, 3)).cpu(), 87 | opts=dict( 88 | xlabel=_xlabel, 89 | ylabel=_ylabel, 90 | title=_title, 91 | legend=_legend 92 | ) 93 | ) 94 | 95 | def update_vis_plot(self,iteration, loc, conf, window1, window2, update_type, 96 | epoch_size=1): 97 | ''' 98 | 可视化图形里更新数据 99 | ''' 100 | viz = visdom.Visdom() 101 | viz.line( 102 | X=torch.ones((1, 3)).cpu() * iteration, 103 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu() / epoch_size, 104 | win=window1, 105 | update=update_type 106 | ) 107 | # initialize epoch plot on first iteration 108 | 109 | if iteration == 0: 110 | viz.line( 111 | X=torch.zeros((1, 3)).cpu(), 112 | Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu(), 113 | win=window2, 114 | update=True 115 | ) -------------------------------------------------------------------------------- /UNet_pytorch/dice_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function, Variable 3 | 4 | class DiceCoeff(Function): 5 | """Dice coeff for individual examples""" 6 | 7 | def forward(self, input, target): 8 | self.save_for_backward(input, target) 9 | eps = 0.0001 10 | self.inter = torch.dot(input.view(-1), target.view(-1)) 11 | self.union = torch.sum(input) + torch.sum(target) + eps 12 | 13 | t = (2 * self.inter.float() + eps) / self.union.float() 14 | return t 15 | 16 | # This function has only a single output, so it gets only one gradient 17 | def backward(self, grad_output): 18 | 19 | input, target = self.saved_variables 20 | grad_input = grad_target = None 21 | 22 | if self.needs_input_grad[0]: 23 | grad_input = grad_output * 2 * (target * self.union - self.inter) \ 24 | / self.union * self.union 25 | if self.needs_input_grad[1]: 26 | grad_target = None 27 | 28 | return grad_input, grad_target 29 | 30 | 31 | def dice_coeff(input, target): 32 | """Dice coeff for batches""" 33 | if input.is_cuda: 34 | s = torch.FloatTensor(1).cuda().zero_() 35 | else: 36 | s = torch.FloatTensor(1).zero_() 37 | 38 | for i, c in enumerate(zip(input, target)): 39 | s = s + DiceCoeff().forward(c[0], c[1]) 40 | 41 | return s / (i + 1) 42 | -------------------------------------------------------------------------------- /UNet_pytorch/eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from dice_loss import dice_coeff 5 | 6 | 7 | def eval_net(net, dataset, gpu=False): 8 | ''' 9 | :param net: 训练的网络 10 | :param dataset: 验证集 11 | ''' 12 | """Evaluation without the densecrf with the dice coefficient""" 13 | tot = 0 14 | for i, b in enumerate(dataset): 15 | img = b[0] 16 | true_mask = b[1] 17 | 18 | img = torch.from_numpy(img).unsqueeze(0) 19 | true_mask = torch.from_numpy(true_mask).unsqueeze(0) 20 | 21 | if gpu: 22 | img = img.cuda() 23 | true_mask = true_mask.cuda() 24 | 25 | mask_pred = net(img)[0] 26 | mask_pred = (F.sigmoid(mask_pred) > 0.5).float() 27 | # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性 28 | tot += dice_coeff(mask_pred, true_mask).item() 29 | return tot / i 30 | -------------------------------------------------------------------------------- /UNet_pytorch/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | from PIL import Image 9 | 10 | from unet import UNet 11 | from utils import resize_and_crop, normalize, split_img_into_squares, hwc_to_chw, merge_masks, dense_crf 12 | from utils import plot_img_and_mask 13 | 14 | from torchvision import transforms 15 | 16 | 17 | def predict_img(net, 18 | full_img, 19 | scale_factor=0.5, 20 | out_threshold=0.5, 21 | use_dense_crf=True, 22 | use_gpu=False): 23 | img_height = full_img.size[1] 24 | img_width = full_img.size[0] 25 | 26 | img = resize_and_crop(full_img, scale=scale_factor) 27 | img = normalize(img) 28 | 29 | left_square, right_square = split_img_into_squares(img) 30 | 31 | left_square = hwc_to_chw(left_square) 32 | right_square = hwc_to_chw(right_square) 33 | 34 | X_left = torch.from_numpy(left_square).unsqueeze(0) 35 | X_right = torch.from_numpy(right_square).unsqueeze(0) 36 | 37 | if use_gpu: 38 | X_left = X_left.cuda() 39 | X_right = X_right.cuda() 40 | 41 | with torch.no_grad(): 42 | output_left = net(X_left) 43 | output_right = net(X_right) 44 | 45 | left_probs = F.sigmoid(output_left).squeeze(0) 46 | right_probs = F.sigmoid(output_right).squeeze(0) 47 | 48 | tf = transforms.Compose( 49 | [ 50 | transforms.ToPILImage(), 51 | transforms.Resize(img_height), 52 | transforms.ToTensor() 53 | ] 54 | ) 55 | 56 | left_probs = tf(left_probs.cpu()) 57 | right_probs = tf(right_probs.cpu()) 58 | 59 | left_mask_np = left_probs.squeeze().cpu().numpy() 60 | right_mask_np = right_probs.squeeze().cpu().numpy() 61 | 62 | full_mask = merge_masks(left_mask_np, right_mask_np, img_width) 63 | 64 | if use_dense_crf: 65 | full_mask = dense_crf(np.array(full_img).astype(np.uint8), full_mask) 66 | 67 | return full_mask > out_threshold 68 | 69 | 70 | def get_args(): 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument('--model', '-m', default='MODEL.pth', 73 | metavar='FILE', 74 | help="Specify the file in which is stored the model" 75 | " (default : 'MODEL.pth')") 76 | parser.add_argument('--input', '-i', metavar='INPUT', nargs='+', 77 | help='filenames of input images', required=True) 78 | 79 | parser.add_argument('--output', '-o', metavar='INPUT', nargs='+', 80 | help='filenames of ouput images') 81 | parser.add_argument('--cpu', '-c', action='store_true', 82 | help="Do not use the cuda version of the net", 83 | default=False) 84 | parser.add_argument('--viz', '-v', action='store_true', 85 | help="Visualize the images as they are processed", 86 | default=False) 87 | parser.add_argument('--no-save', '-n', action='store_true', 88 | help="Do not save the output masks", 89 | default=False) 90 | parser.add_argument('--no-crf', '-r', action='store_true', 91 | help="Do not use dense CRF postprocessing", 92 | default=False) 93 | parser.add_argument('--mask-threshold', '-t', type=float, 94 | help="Minimum probability value to consider a mask pixel white", 95 | default=0.5) 96 | parser.add_argument('--scale', '-s', type=float, 97 | help="Scale factor for the input images", 98 | default=0.5) 99 | 100 | return parser.parse_args() 101 | 102 | 103 | def get_output_filenames(args): 104 | in_files = args.input 105 | out_files = [] 106 | 107 | if not args.output: 108 | for f in in_files: 109 | pathsplit = os.path.splitext(f) 110 | out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1])) 111 | elif len(in_files) != len(args.output): 112 | print("Error : Input files and output files are not of the same length") 113 | raise SystemExit() 114 | else: 115 | out_files = args.output 116 | 117 | return out_files 118 | 119 | 120 | def mask_to_image(mask): 121 | return Image.fromarray((mask * 255).astype(np.uint8)) 122 | 123 | 124 | if __name__ == "__main__": 125 | args = get_args() 126 | in_files = args.input 127 | out_files = get_output_filenames(args) 128 | 129 | net = UNet(n_channels=3, n_classes=1) 130 | 131 | print("Loading model {}".format(args.model)) 132 | 133 | if not args.cpu: 134 | print("Using CUDA version of the net, prepare your GPU !") 135 | net.cuda() 136 | net.load_state_dict(torch.load(args.model)) 137 | else: 138 | net.cpu() 139 | net.load_state_dict(torch.load(args.model, map_location='cpu')) 140 | print("Using CPU version of the net, this may be very slow") 141 | 142 | print("Model loaded !") 143 | 144 | for i, fn in enumerate(in_files): 145 | print("\nPredicting image {} ...".format(fn)) 146 | 147 | img = Image.open(fn) 148 | if img.size[0] < img.size[1]: 149 | print("Error: image height larger than the width") 150 | 151 | mask = predict_img(net=net, 152 | full_img=img, 153 | scale_factor=args.scale, 154 | out_threshold=args.mask_threshold, 155 | use_dense_crf=not args.no_crf, 156 | use_gpu=not args.cpu) 157 | 158 | if args.viz: 159 | print("Visualizing results for image {}, close to continue ...".format(fn)) 160 | plot_img_and_mask(img, mask) 161 | 162 | if not args.no_save: 163 | out_fn = out_files[i] 164 | result = mask_to_image(mask) 165 | result.save(out_files[i]) 166 | 167 | print("Mask saved to {}".format(out_files[i])) 168 | -------------------------------------------------------------------------------- /UNet_pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # U-Net网络 2 | 3 | ---------- 4 | 5 | 该仓库基于[milesial](https://github.com/milesial)的[Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)进行的,非常感谢大佬无私的奉献。 6 | 7 | 8 | - [原地址](https://github.com/milesial/Pytorch-UNet) 9 | - [原地址的加注释版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/UNet_pytorch) 10 | 11 | ---------- 12 | 13 | # 目前支持: 14 | 15 | - 数据集: Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q) 16 | - 网络:U-Net 17 | 18 | 19 | # 相比原作者的特点: 20 | 21 | - 所有参数均可在config.py中设置 22 | - 重新整理结构,并加入大量代码注释 23 | - loading 24 | 25 | ---------- 26 | 27 | - 环境: 28 | 29 | | python版本 | pytorch版本 | 30 | | ----------- | ---------- | 31 | | 3.5 | 0.4 | 32 | 33 | - 依赖: 34 | 35 | pip install pydensecrf 36 | 37 | ---------- 38 | 39 | # U-Net网络结构 40 | 41 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/659347.jpg) 42 | 43 | - ###### 原论文左侧 conv 3x3 无pad,故每次conv后feature map尺寸缩小。故与右侧feature map融合之前需要裁剪。 44 | - ###### 该仓库左侧 conv 3x3 pad=1,故每次conv后feature map尺寸不变。故反卷积后保证尺度统一与右侧feature map融合即可。 45 | 46 | 47 | ---------- 48 | 49 | # 准备数据集: 50 | 下载Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q)数据集,并在utils/config.py中配置数据集的根目录。 51 | ``` 52 | CarvanaImageMaskingChallenge 53 | │ 54 | └───train 55 | │ │ xxx.gif 56 | │ │ ... 57 | │ 58 | └───train_masks 59 | │ │ xxx.jpg 60 | │ │ ... 61 | ``` 62 | 63 | 64 | ---------- 65 | 66 | # Trian: 67 | 68 | 1、在config.py中配置训练参数 69 | 70 | 2、执行train.py开始训练 71 | 72 | ---------- 73 | 74 | # Eval: 75 | 76 | 每训练一轮epoch都将计算Dice距离(用于度量两个集合的相似性) 77 | ---------- 78 | 79 | # Predict: 80 | 81 | 功能:可视化一张预测图片 82 | 83 | 1、将预训练模型放到项目根目录下 84 | 85 | 预训练模型下载:[MODEL.pth](https://pan.baidu.com/s/1D_OtX16iL3aJefvOqyRWnw) 86 | 87 | 2、预测单张图片 88 | 89 | python predict.py -i image.jpg -o output.jpg 90 | 91 | 3、预测多张图片并显示 92 | 93 | python predict.py -i image1.jpg image2.jpg --viz --no-save 94 | 95 | 96 |
97 | 图片说明图片说明 98 |
99 | 100 | 101 | ---------- 102 | 103 | # 关于作者 104 | 105 | - 原作者 [milesial](https://github.com/milesial) 106 | 107 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810) -------------------------------------------------------------------------------- /UNet_pytorch/submit.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | 4 | import torch 5 | 6 | from predict import predict_img 7 | from utils import rle_encode 8 | from unet import UNet 9 | 10 | 11 | def submit(net, gpu=False): 12 | """Used for Kaggle submission: predicts and encode all test images""" 13 | dir = 'data/test/' 14 | 15 | N = len(list(os.listdir(dir))) 16 | with open('SUBMISSION.csv', 'a') as f: 17 | f.write('img,rle_mask\n') 18 | for index, i in enumerate(os.listdir(dir)): 19 | print('{}/{}'.format(index, N)) 20 | 21 | img = Image.open(dir + i) 22 | 23 | mask = predict_img(net, img, gpu) 24 | enc = rle_encode(mask) 25 | f.write('{},{}\n'.format(i, ' '.join(map(str, enc)))) 26 | 27 | 28 | if __name__ == '__main__': 29 | net = UNet(3, 1).cuda() 30 | net.load_state_dict(torch.load('MODEL.pth')) 31 | submit(net, True) 32 | -------------------------------------------------------------------------------- /UNet_pytorch/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch import optim 10 | 11 | from eval import eval_net 12 | from unet import UNet 13 | from utils import get_ids, split_ids, split_train_val, get_imgs_and_masks, batch 14 | from utils.config import opt_train 15 | 16 | def train_net(net, 17 | epochs=5, 18 | batch_size=1, 19 | lr=0.1, 20 | val_percent=0.05, # 训练集:验证集= 0.95: 0.05 21 | save_cp=True, 22 | gpu=False, 23 | img_scale=0.5): 24 | 25 | dir_img = opt_train.dir_img 26 | dir_mask = opt_train.dir_mask 27 | dir_checkpoint = opt_train.dir_checkpoint 28 | 29 | # 得到 图片路径列表 ids为 图片名称(无后缀名) 30 | ids = get_ids(dir_img) 31 | # 得到truple元组 (无后缀名的 图片名称,序号) 32 | # eg:当n为2 图片名称为bobo.jpg 时, 得到(bobo,0) (bobo,1) 33 | # 当序号为0 时,裁剪宽度,得到左边部分图片 当序号为1 时,裁剪宽度,得到右边部分图片 34 | ids = split_ids(ids) 35 | # 打乱数据集后,按照val_percent的比例来 切分 训练集 和 验证集 36 | iddataset = split_train_val(ids, val_percent) 37 | 38 | 39 | print(''' 40 | 开始训练: 41 | Epochs: {} 42 | Batch size: {} 43 | Learning rate: {} 44 | 训练集大小: {} 45 | 验证集大小: {} 46 | GPU: {} 47 | '''.format(epochs, batch_size, lr, len(iddataset['train']), 48 | len(iddataset['val']), str(gpu))) 49 | 50 | #训练集大小 51 | N_train = len(iddataset['train']) 52 | 53 | optimizer = optim.SGD(net.parameters(), 54 | lr=lr, 55 | momentum=0.9, 56 | weight_decay=0.0005) 57 | 58 | #二进制交叉熵 59 | criterion = nn.BCELoss() 60 | 61 | for epoch in range(epochs): 62 | print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) 63 | 64 | # reset the generators 65 | # 每轮epoch得到 训练集 和 验证集 66 | train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) 67 | val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) 68 | 69 | 70 | 71 | 72 | # 重置epoch损失计数器 73 | epoch_loss = 0 74 | 75 | for i, b in enumerate(batch(train, batch_size)): 76 | # 得到 一个batch的 imgs tensor 及 对应真实mask值 77 | # 当序号为0 时,裁剪宽度,得到左边部分图片[384,384,3] 当序号为1 时,裁剪宽度,得到右边部分图片[384,190,3] 78 | imgs = np.array([i[0] for i in b]).astype(np.float32) 79 | true_masks = np.array([i[1] for i in b]) 80 | 81 | # 将值转为 torch tensor 82 | imgs = torch.from_numpy(imgs) 83 | true_masks = torch.from_numpy(true_masks) 84 | 85 | # 训练数据转到GPU上 86 | if gpu: 87 | imgs = imgs.cuda() 88 | true_masks = true_masks.cuda() 89 | 90 | # 得到 网络输出的预测mask [10,1,384,384] 91 | masks_pred = net(imgs) 92 | # 经过sigmoid 93 | masks_probs = F.sigmoid(masks_pred) 94 | masks_probs_flat = masks_probs.view(-1) 95 | 96 | true_masks_flat = true_masks.view(-1) 97 | # 计算二进制交叉熵损失 98 | loss = criterion(masks_probs_flat, true_masks_flat) 99 | # 统计一个epoch的所有batch的loss之和,用以计算 一个epoch的 loss均值 100 | epoch_loss += loss.item() 101 | 102 | # 输出 当前epoch的第几个batch 及 当前batch的loss 103 | print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) 104 | 105 | # 优化器梯度清零 106 | optimizer.zero_grad() 107 | # 反向传播 108 | loss.backward() 109 | # 更新参数 110 | optimizer.step() 111 | 112 | # 一轮epoch结束,该轮epoch的 loss均值 113 | print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) 114 | 115 | # 每轮epoch之后使用验证集进行评价 116 | if True: 117 | # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性 118 | val_dice = eval_net(net, val, gpu) 119 | print('Validation Dice Coeff: {}'.format(val_dice)) 120 | 121 | # 保存模型 122 | if save_cp: 123 | torch.save(net.state_dict(), 124 | dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) 125 | print('Checkpoint {} saved !'.format(epoch + 1)) 126 | 127 | 128 | 129 | 130 | 131 | if __name__ == '__main__': 132 | 133 | # 获取训练参数 134 | args = opt_train 135 | # n_channels:输入图像的通道数 n_classes:二分类 136 | net = UNet(n_channels=3, n_classes=1) 137 | 138 | # 加载预训练模型 139 | if args.load: 140 | net.load_state_dict(torch.load(args.load)) 141 | print('Model loaded from {}'.format(args.load)) 142 | 143 | # 网络转移到GPU上 144 | if args.gpu: 145 | net.cuda() 146 | cudnn.benchmark = True # 速度更快,但占用内存更多 147 | 148 | try: 149 | train_net(net=net, 150 | epochs=args.epochs, 151 | batch_size=args.batchsize, 152 | lr=args.lr, 153 | gpu=args.gpu, 154 | img_scale=args.scale) 155 | except KeyboardInterrupt: 156 | # 当运行出错时,保存最新的模型 157 | torch.save(net.state_dict(), 'INTERRUPTED.pth') 158 | print('Saved interrupt') 159 | try: 160 | sys.exit(0) 161 | except SystemExit: 162 | os._exit(0) 163 | -------------------------------------------------------------------------------- /UNet_pytorch/unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet_model import UNet 2 | -------------------------------------------------------------------------------- /UNet_pytorch/unet/unet_model.py: -------------------------------------------------------------------------------- 1 | # full assembly of the sub-parts to form the complete net 2 | 3 | from .unet_parts import * 4 | 5 | class UNet(nn.Module): 6 | def __init__(self, n_channels, n_classes): 7 | super(UNet, self).__init__() 8 | self.inc = inconv(n_channels, 64) # 输出层,n_channels=3 输入图像为3通道 9 | self.down1 = down(64, 128) 10 | self.down2 = down(128, 256) 11 | self.down3 = down(256, 512) 12 | self.down4 = down(512, 512) 13 | self.up1 = up(1024, 256) 14 | self.up2 = up(512, 128) 15 | self.up3 = up(256, 64) 16 | self.up4 = up(128, 64) 17 | # 最后一层的卷积核大小为1*1,将64通道的特征图转化为特定深度(分类数量,二分类为2)的结果 18 | self.outc = outconv(64, n_classes) # 输出层,n_classes=1 二分类 19 | 20 | def forward(self, x): 21 | x1 = self.inc(x) 22 | x2 = self.down1(x1) 23 | x3 = self.down2(x2) 24 | x4 = self.down3(x3) 25 | x5 = self.down4(x4) 26 | x = self.up1(x5, x4) 27 | x = self.up2(x, x3) 28 | x = self.up3(x, x2) 29 | x = self.up4(x, x1) 30 | x = self.outc(x) 31 | return x 32 | -------------------------------------------------------------------------------- /UNet_pytorch/unet/unet_parts.py: -------------------------------------------------------------------------------- 1 | # sub-parts of the U-Net model 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class double_conv(nn.Module): 9 | '''(conv => BN => ReLU) * 2''' 10 | def __init__(self, in_ch, out_ch): 11 | super(double_conv, self).__init__() 12 | self.conv = nn.Sequential( 13 | # 每次重复中都有2个卷积层,卷积核大小均为3*3 14 | nn.Conv2d(in_ch, out_ch, 3, padding=1), 15 | nn.BatchNorm2d(out_ch), 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(out_ch, out_ch, 3, padding=1), 18 | nn.BatchNorm2d(out_ch), 19 | nn.ReLU(inplace=True) 20 | ) 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | return x 25 | 26 | 27 | class inconv(nn.Module): 28 | def __init__(self, in_ch, out_ch): 29 | # 输入层,in_ch=3输入通道数, out_ch=64输出通道数 30 | super(inconv, self).__init__() 31 | self.conv = double_conv(in_ch, out_ch) 32 | 33 | def forward(self, x): 34 | x = self.conv(x) 35 | return x 36 | 37 | 38 | class down(nn.Module): 39 | def __init__(self, in_ch, out_ch): 40 | super(down, self).__init__() 41 | # 论文:它的架构是一种重复结构,每次重复中都有2个卷积层和一个pooling层,卷积层中卷积核大小均为3*3,激活函数使用ReLU 42 | self.mpconv = nn.Sequential( 43 | nn.MaxPool2d(2), 44 | double_conv(in_ch, out_ch) 45 | ) 46 | 47 | def forward(self, x): 48 | x = self.mpconv(x) 49 | return x 50 | 51 | 52 | class up(nn.Module): 53 | def __init__(self, in_ch, out_ch, bilinear=True): 54 | super(up, self).__init__() 55 | 56 | # would be a nice idea if the upsampling could be learned too, 57 | # but my machine do not have enough memory to handle all those weights 58 | # 默认为全部为 上采样(作者因内存不足)。 59 | # bilinear=False 改为 使用反卷积(论文方法),效果会更好? 60 | 61 | # 使用双线性上采样来放大输入 62 | if bilinear: # batchsize=10 scale=0.3 时 占用9047M 63 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 64 | # (论文方法)二维反卷积层 反卷积层可以理解为输入的数据和卷积核的位置反转的卷积操作. 反卷积有时候也会被翻译成解卷积. 65 | else: # 论文方法 batchsize=10 scale=0.3 时 占用9040M 66 | self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2) 67 | 68 | self.conv = double_conv(in_ch, out_ch) 69 | 70 | def forward(self, x1, x2): 71 | x1 = self.up(x1) 72 | diffX = x1.size()[2] - x2.size()[2] 73 | diffY = x1.size()[3] - x2.size()[3] 74 | x2 = F.pad(x2, (diffX // 2, int(diffX / 2), 75 | diffY // 2, int(diffY / 2))) 76 | # 深度相加 77 | x = torch.cat([x2, x1], dim=1) 78 | x = self.conv(x) 79 | return x 80 | 81 | 82 | class outconv(nn.Module): 83 | def __init__(self, in_ch, out_ch): 84 | super(outconv, self).__init__() 85 | # 最后一层的卷积核大小为1*1,将64通道的特征图转化为特定深度(分类数量,二分类为2)的结果 86 | self.conv = nn.Conv2d(in_ch, out_ch, 1) 87 | 88 | def forward(self, x): 89 | x = self.conv(x) 90 | return x 91 | -------------------------------------------------------------------------------- /UNet_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .crf import * 2 | from .load import * 3 | from .utils import * 4 | from .data_vis import * 5 | -------------------------------------------------------------------------------- /UNet_pytorch/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | # 设置默认参数 4 | import os.path 5 | class DefaultConfig_train(): 6 | epochs=5 #number of epochs 7 | batchsize= 10 #batch size 8 | lr=0.1 #learning rate 9 | gpu=True #use cudas 10 | load=False #load file model 11 | scale=0.3 #downscaling factor of the images 图像训练时缩小倍数 该值对内存影响较大(仓库默认0.5) 12 | 13 | # 数据集 14 | dir_img = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train/' 15 | dir_mask = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train_masks/' 16 | dir_checkpoint = './checkpoints/' # 模型保存位置 17 | 18 | visdom=True # 是否可视化 19 | 20 | env = 'U-Net' # visdom 环境的名字 21 | visdom = True # 是否可视化 22 | datesets_name='Carvana Image Masking Challenge' # 数据集名称 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | class DefaultConfig_predict(): 31 | input='./intput.jpg' #filenames of input images 32 | output='./output.jpg' #filenames of ouput images 33 | model= './MODEL.pth' # Specify the file in which is stored the model 34 | cpu=False #Do not use the cuda version of the net 35 | scale=0.5 #Scale factor for the input images 36 | mask_threshold=0.5 #Minimum probability value to consider a mask pixel white 37 | no_crf=False #Do not use dense CRF postprocessing 38 | no_save=False #Do not save the output masks 39 | viz=False #Visualize the images as they are processed 40 | #初始化该类的一个对象 41 | opt_train=DefaultConfig_train() 42 | 43 | opt_predict=DefaultConfig_predict() 44 | 45 | -------------------------------------------------------------------------------- /UNet_pytorch/utils/crf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydensecrf.densecrf as dcrf 3 | 4 | def dense_crf(img, output_probs): 5 | h = output_probs.shape[0] 6 | w = output_probs.shape[1] 7 | 8 | output_probs = np.expand_dims(output_probs, 0) 9 | output_probs = np.append(1 - output_probs, output_probs, axis=0) 10 | 11 | d = dcrf.DenseCRF2D(w, h, 2) 12 | U = -np.log(output_probs) 13 | U = U.reshape((2, -1)) 14 | U = np.ascontiguousarray(U) 15 | img = np.ascontiguousarray(img) 16 | 17 | d.setUnaryEnergy(U) 18 | 19 | d.addPairwiseGaussian(sxy=20, compat=3) 20 | d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10) 21 | 22 | Q = d.inference(5) 23 | Q = np.argmax(np.array(Q), axis=0).reshape((h, w)) 24 | 25 | return Q 26 | -------------------------------------------------------------------------------- /UNet_pytorch/utils/data_vis.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def plot_img_and_mask(img, mask): 4 | fig = plt.figure() 5 | a = fig.add_subplot(1, 2, 1) 6 | a.set_title('Input image') 7 | plt.imshow(img) 8 | 9 | b = fig.add_subplot(1, 2, 2) 10 | b.set_title('Output mask') 11 | plt.imshow(mask) 12 | plt.show() -------------------------------------------------------------------------------- /UNet_pytorch/utils/load.py: -------------------------------------------------------------------------------- 1 | # 2 | # load.py : utils on generators / lists of ids to transform from strings to 3 | # cropped images and masks 4 | 5 | import os 6 | 7 | import numpy as np 8 | from PIL import Image 9 | 10 | from .utils import resize_and_crop, get_square, normalize, hwc_to_chw 11 | 12 | 13 | def get_ids(dir): 14 | """Returns a list of the ids in the directory""" 15 | # eg:f[:-4]是为了去掉 .jpg 后缀。结果只为 照片名称,无后缀。 16 | return (f[:-4] for f in os.listdir(dir)) 17 | 18 | 19 | def split_ids(ids, n=2): 20 | """Split each id in n, creating n tuples (id, k) for each id""" 21 | return ((id, i) for i in range(n) for id in ids) 22 | 23 | 24 | def to_cropped_imgs(ids, dir, suffix, scale): 25 | """From a list of tuples, returns the correct cropped img""" 26 | #返回 tuples,(img的resize后的tensor,序号) 27 | for id, pos in ids: 28 | im = resize_and_crop(Image.open(dir + id + suffix), scale=scale) 29 | # get_square: 当pos为0 时,裁剪宽度,得到左边部分图片[384,384,3] 当pos为1 时,裁剪宽度,得到右边部分图片[384,190,3] 30 | yield get_square(im, pos) 31 | 32 | def get_imgs_and_masks(ids, dir_img, dir_mask, scale): 33 | ''' 34 | :param ids: 35 | :param dir_img: 图片路径 36 | :param dir_mask: mask图片路径 37 | :param scale: 图像训练时缩小倍数 38 | :return:all the couples (img, mask) 39 | ''' 40 | """Return all the couples (img, mask)""" 41 | 42 | # 读取图片,并按照scale进行resize 43 | imgs = to_cropped_imgs(ids, dir_img, '.jpg', scale) 44 | 45 | # need to transform from HWC to CHW 转化(高H、宽W、通道C)为(通道C、高H、宽W) 46 | imgs_switched = map(hwc_to_chw, imgs) 47 | # 归一化(值转化到0-1之间) 48 | imgs_normalized = map(normalize, imgs_switched) 49 | 50 | masks = to_cropped_imgs(ids, dir_mask, '_mask.gif', scale) 51 | # list( rezise且经过转化和归一化后的图像tensor,resize后的mask图像tensor) 52 | return zip(imgs_normalized, masks) 53 | 54 | 55 | def get_full_img_and_mask(id, dir_img, dir_mask): 56 | im = Image.open(dir_img + id + '.jpg') 57 | mask = Image.open(dir_mask + id + '_mask.gif') 58 | return np.array(im), np.array(mask) 59 | -------------------------------------------------------------------------------- /UNet_pytorch/utils/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | 5 | def get_square(img, pos): 6 | """Extract a left or a right square from ndarray shape : (H, W, C))""" 7 | h = img.shape[0] 8 | if pos == 0: 9 | return img[:, :h] 10 | else: 11 | return img[:, -h:] 12 | 13 | def split_img_into_squares(img): 14 | return get_square(img, 0), get_square(img, 1) 15 | 16 | def hwc_to_chw(img): 17 | return np.transpose(img, axes=[2, 0, 1]) 18 | 19 | def resize_and_crop(pilimg, scale=0.5, final_height=None): 20 | w = pilimg.size[0] 21 | h = pilimg.size[1] 22 | newW = int(w * scale) 23 | newH = int(h * scale) 24 | 25 | if not final_height: 26 | diff = 0 27 | else: 28 | diff = newH - final_height 29 | 30 | img = pilimg.resize((newW, newH)) 31 | # crop 从图像中提取出某个矩形大小的图像。它接收一个四元素的元组作为参数,(起始点的横坐标,起始点的纵坐标,宽度,高度),坐标系统的原点(0, 0)是左上角。 32 | img = img.crop((0, diff // 2, newW, newH - diff // 2)) 33 | return np.array(img, dtype=np.float32) 34 | 35 | def batch(iterable, batch_size): 36 | """Yields lists by batch""" 37 | b = [] 38 | for i, t in enumerate(iterable): 39 | b.append(t) 40 | if (i + 1) % batch_size == 0: 41 | yield b 42 | b = [] 43 | 44 | if len(b) > 0: 45 | yield b 46 | 47 | def split_train_val(dataset, val_percent=0.05): 48 | dataset = list(dataset) 49 | length = len(dataset) 50 | n = int(length * val_percent) 51 | random.shuffle(dataset) 52 | return {'train': dataset[:-n], 'val': dataset[-n:]} 53 | 54 | 55 | def normalize(x): 56 | return x / 255 57 | 58 | def merge_masks(img1, img2, full_w): 59 | h = img1.shape[0] 60 | 61 | new = np.zeros((h, full_w), np.float32) 62 | new[:, :full_w // 2 + 1] = img1[:, :full_w // 2 + 1] 63 | new[:, full_w // 2 + 1:] = img2[:, -(full_w // 2 - 1):] 64 | 65 | return new 66 | 67 | 68 | # credits to https://stackoverflow.com/users/6076729/manuel-lagunas 69 | def rle_encode(mask_image): 70 | pixels = mask_image.flatten() 71 | # We avoid issues with '1' at the start or end (at the corners of 72 | # the original image) by setting those pixels to '0' explicitly. 73 | # We do not expect these to be non-zero for an accurate mask, 74 | # so this should not harm the score. 75 | pixels[0] = 0 76 | pixels[-1] = 0 77 | runs = np.where(pixels[1:] != pixels[:-1])[0] + 2 78 | runs[1::2] = runs[1::2] - runs[:-1:2] 79 | return runs 80 | -------------------------------------------------------------------------------- /Yolov1_pytorch/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/checkpoint/.gitkeep -------------------------------------------------------------------------------- /Yolov1_pytorch/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | # 设置默认参数 4 | class DefaultConfig(): 5 | env = 'YOLOv1' # visdom 环境的名字 6 | # model = 'NetWork' # 使用的模型,名字必须与models/__init__.py中的名字一致 7 | file_root = '/home/zhuhui/data/VOCdevkit/VOC2012/JPEGImages/' #VOC2012的训练集 8 | test_root = '/home/zhuhui/data/VOCdevkit/VOC2007/JPEGImages/' #VOC2007的测试集 9 | train_Annotations = '/home/zhuhui/data/VOCdevkit/VOC2012/Annotations/' 10 | voc_2007test='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2007test.txt' 11 | voc_2012train='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2012train.txt' 12 | 13 | test_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/a.jpg' 14 | result_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/result_a.jpg' 15 | 16 | 17 | 18 | batch_size = 32 # batch size 19 | use_gpu = True # user GPU or not 20 | num_workers = 4 # how many workers for loading data 加载数据时的线程 21 | print_freq = 20 # print info every N batch 22 | 23 | # load_model_path =None # 加载预训练的模型的路径,为None代表不加载 24 | best_test_loss_model_path= '/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_val_best.pth' 25 | current_epoch_model_path='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_bobo.pth' 26 | load_model_path = None # 加载预训练的模型的路径,为None代表不加载 27 | num_epochs = 120 #训练的epoch次数 28 | learning_rate = 0.001 # initial learning rate 29 | lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay 30 | momentum=0.95 31 | weight_decay =5e-4 # 损失函数 32 | # VOC的类别 33 | VOC_CLASSES = ( # always index 0 34 | 'aeroplane', 'bicycle', 'bird', 'boat', 35 | 'bottle', 'bus', 'car', 'cat', 'chair', 36 | 'cow', 'diningtable', 'dog', 'horse', 37 | 'motorbike', 'person', 'pottedplant', 38 | 'sheep', 'sofa', 'train', 'tvmonitor') 39 | 40 | 41 | #初始化该类的一个对象 42 | opt=DefaultConfig() -------------------------------------------------------------------------------- /Yolov1_pytorch/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/data/__init__.py -------------------------------------------------------------------------------- /Yolov1_pytorch/data/xml_2_txt.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import os 3 | from config import opt 4 | 5 | def parse_rec(filename): 6 | """ 7 | Parse a PASCAL VOC xml file 8 | 解析一个 PASCAL VOC xml file 9 | 将数据集从xml解析为txt 用于生成voc2007test.txt等 10 | """ 11 | tree = ET.parse(filename) 12 | objects = [] 13 | # 遍历一张图中的所有物体 14 | for obj in tree.findall('object'): 15 | obj_struct = {} 16 | obj_struct['name'] = obj.find('name').text 17 | #obj_struct['pose'] = obj.find('pose').text 18 | #obj_struct['truncated'] = int(obj.find('truncated').text) 19 | #obj_struct['difficult'] = int(obj.find('difficult').text) 20 | bbox = obj.find('bndbox') 21 | # 从原图左上角开始为原点,向右为x轴,向下为y轴。左上角(xmin,ymin)和右下角(xmax,ymax) 22 | obj_struct['bbox'] = [int(float(bbox.find('xmin').text)), 23 | int(float(bbox.find('ymin').text)), 24 | int(float(bbox.find('xmax').text)), 25 | int(float(bbox.find('ymax').text))] 26 | objects.append(obj_struct) 27 | 28 | return objects 29 | 30 | # 新建一个名为voc2012train的txt文件,准备写入数据 31 | txt_file = open('data/voc2012train.txt','w') 32 | Annotations = opt.train_Annotations 33 | xml_files = os.listdir(Annotations) 34 | 35 | # 遍历所有的xml 36 | for xml_file in xml_files: 37 | image_path = xml_file.split('.')[0] + '.jpg' 38 | # txt 写入图像名字 非完整路径 39 | txt_file.write(image_path+' ') 40 | results = parse_rec(Annotations + xml_file) 41 | num_obj = len(results) 42 | # txt写入 一张图中的物体总数 43 | txt_file.write(str(num_obj)+' ') 44 | # 遍历一张图片中的所有物体 45 | for result in results: 46 | class_name = result['name'] 47 | bbox = result['bbox'] 48 | class_name = opt.VOC_CLASSES.index(class_name) 49 | # txt写入 bbox的坐标 以及 每个物体对应的类的序号 50 | txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ') 51 | txt_file.write('\n') 52 | #最后格式:图像名(1个值) 物体总数(1个值) bbox坐标(4个值) 物体对应的类的序号(1个值) 53 | 54 | txt_file.close() -------------------------------------------------------------------------------- /Yolov1_pytorch/main_resnet.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torchvision.transforms as transforms 8 | from torch.autograd import Variable 9 | from torch.utils.data import DataLoader 10 | from tqdm import tqdm 11 | 12 | from config import opt 13 | from data.dataset import yoloDataset 14 | from models.resnet import resnet152_bo,resnet152 15 | from utils.visualize import Visualizer 16 | from utils.yoloLoss import yoloLoss 17 | from utils.predictUtils import predict_result 18 | from utils.predictUtils import voc_eval 19 | from utils.predictUtils import voc_ap 20 | 21 | 22 | def train(): 23 | vis=Visualizer(opt.env) 24 | # 网络部分======================================================开始 25 | # True则返回预训练好的resnet152_bo模型 26 | net=resnet152_bo(resnet152(pretrained=True)) 27 | # 将模型加载到内存中(CPU) 28 | if opt.load_model_path: 29 | net.load_state_dict(torch.load(opt.load_model_path,map_location=lambda storage,loc:storage)) 30 | # 再将模型转移到GPU上 31 | if opt.use_gpu: 32 | net.cuda() 33 | # 输出网络结构 34 | print(net) 35 | print('加载好预先训练好的模型') 36 | # 将模型调整为训练模式 37 | net.train() 38 | # 网络部分======================================================结束 39 | 40 | # 加载数据部分====================================================开始 41 | # 自定义封装数据集 42 | train_dataset = yoloDataset(root=opt.file_root, list_file=opt.voc_2012train, train=True, transform=[transforms.ToTensor()]) 43 | # 数据集加载器 shuffle:打乱顺序 num_workers:线程数 44 | train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) 45 | test_dataset = yoloDataset(root=opt.test_root, list_file=opt.voc_2007test, train=False, transform=[transforms.ToTensor()]) 46 | test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4) 47 | # 加载数据部分====================================================结束 48 | 49 | #自定义的损失函数 7代表将图像分为7x7的网格 2代表一个网格预测两个框 5代表 λcoord 更重视8维的坐标预测 0.5代表没有object的bbox的confidence loss 50 | criterion = yoloLoss(7, 2, 5, 0.5) 51 | learning_rate=opt.learning_rate 52 | # 优化器 53 | optimizer = torch.optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum, weight_decay=opt.weight_decay) 54 | #optimizer = torch.optim.Adam(net.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) 55 | print('训练集有 %d 张图像' % (len(train_dataset))) 56 | print('一个batch的大小为 %d' % (opt.batch_size)) 57 | # 将训练过程的信息写入log文件中 58 | logfile = open('log/log.txt', 'w') 59 | # inf为正无穷大 60 | best_test_loss = np.inf 61 | 62 | for epoch in range(opt.num_epochs): 63 | if epoch == 1: 64 | learning_rate = 0.0005 65 | if epoch == 2: 66 | learning_rate = 0.00075 67 | if epoch == 3: 68 | learning_rate = 0.001 69 | if epoch == 80: 70 | learning_rate = 0.0001 71 | if epoch == 100: 72 | learning_rate = 0.00001 73 | for param_group in optimizer.param_groups: 74 | param_group['lr'] = learning_rate 75 | # 第几次epoch 及 当前epoch的学习率 76 | print('\n\n当前的epoch为 %d / %d' % (epoch + 1, opt.num_epochs)) 77 | print('当前epoch的学习率: {}'.format(learning_rate)) 78 | 79 | # 每轮epoch的总loss 80 | total_loss = 0. 81 | # 开始训练 82 | for i, (images, target) in enumerate(train_loader): 83 | images = Variable(images) 84 | target = Variable(target) 85 | if opt.use_gpu: 86 | images, target = images.cuda(), target.cuda() 87 | # 前向传播,得到预测值 88 | pred = net(images) 89 | # 计算损失 yoloLoss继承nn.Module,调用方法名即自动进行前向传播,执行forward方法 90 | loss = criterion(pred, target) 91 | total_loss += loss.data[0] 92 | # 优化器梯度清零 93 | optimizer.zero_grad() 94 | #loss反向传播 95 | loss.backward() 96 | # 更新参数 97 | optimizer.step() 98 | if (i + 1) % opt.print_freq == 0: 99 | print('在训练集上:当前epoch为 [%d/%d], Iter [%d/%d] 当前batch损失为: %.4f, 当前epoch到目前为止平均损失为: %.4f' 100 | % (epoch + 1, opt.num_epochs, i + 1, len(train_loader), loss.data[0], total_loss / (i + 1))) 101 | # 画出训练集的平均损失 102 | vis.plot_train_val(loss_train=total_loss / (i + 1)) 103 | # 保存最新的模型 104 | torch.save(net.state_dict(),opt.current_epoch_model_path) 105 | vis.log("epoch:{epoch},lr:{lr}".format( 106 | epoch=epoch, lr=learning_rate)) 107 | 108 | # =========================================================看到此 109 | # 一次epoch验证 110 | validation_loss = 0.0 111 | # 模型调整为验证模式 112 | net.eval() 113 | # 每轮epoch之后用VOC2007测试集进行验证 114 | for i, (images, target) in enumerate(test_loader): 115 | images = Variable(images, volatile=True) 116 | target = Variable(target, volatile=True) 117 | if opt.use_gpu: 118 | images, target = images.cuda(), target.cuda() 119 | # 前向传播得到预测值 120 | pred = net(images) 121 | # loss 122 | loss = criterion(pred, target) 123 | validation_loss += loss.data[0] 124 | # 计算在VOC2007测试集上的平均损失 125 | validation_loss /= len(test_loader) 126 | # 画出验证集的平均损失 127 | vis.plot_train_val(loss_val=validation_loss) 128 | # 训练模型的目标是 在验证集上的loss最小 129 | # 保存到目前为止 在验证集上的loss最小 的模型 130 | if best_test_loss > validation_loss: 131 | best_test_loss = validation_loss 132 | print('当前得到最好的验证集的平均损失为 %.5f' % best_test_loss) 133 | torch.save(net.state_dict(),opt.best_test_loss_model_path) 134 | # 将当前epoch的参数写入log文件中 135 | logfile.writelines(str(epoch) + '\t' + str(validation_loss) + '\n') 136 | logfile.flush() 137 | 138 | def predict(): 139 | # fasle 返回 未训练的模型 140 | predict_model = resnet152_bo(resnet152(pretrained=True)) 141 | 142 | predict_model.load_state_dict(torch.load(opt.load_model_path,map_location=lambda storage,loc:storage)) 143 | # 模型改为预测模式 144 | predict_model.eval() 145 | # 如果GPU可用,加载到GPU 146 | if opt.use_gpu: 147 | predict_model.cuda() 148 | # 测试集照片地址 149 | test_img_dir = opt.test_img_dir 150 | image = cv2.imread(test_img_dir) 151 | # result中内容为 左上角坐标、右下角坐标、类别名、输入的测试图地址、预测类别的可能性 152 | result = predict_result(predict_model, test_img_dir) 153 | for left_up, right_bottom, class_name, _, prob in result: 154 | # 将预测框添加到测试图片中 155 | cv2.rectangle(image, left_up, right_bottom, (0, 255, 0), 2) 156 | # 预测框的左上角写入 所属类别 157 | cv2.putText(image, class_name, left_up, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA) 158 | print(prob) 159 | # 将测试结果写入 160 | cv2.imwrite(opt.result_img_dir,image) 161 | 162 | 163 | 164 | # 主函数 165 | if __name__ == '__main__': 166 | 167 | # 命令行工具 168 | import fire 169 | fire.Fire() 170 | 171 | train() 172 | # predict() 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /Yolov1_pytorch/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/models/__init__.py -------------------------------------------------------------------------------- /Yolov1_pytorch/models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | import math 4 | import torch.nn.functional as F 5 | from torchvision.models import resnet152 6 | 7 | class resnet152_bo(nn.Module): 8 | 9 | def __init__(self, features, num_classes=1000): 10 | super(resnet152_bo, self).__init__() 11 | model=resnet152() 12 | # 先不修改网络结构,先试试只修改最后一个输出层参数 13 | # #取掉model的后两层(去掉最后的最大池化层和全连接层) 14 | self.features=nn.Sequential(*list(model.children())[:-2]) 15 | self.classifier=nn.Sequential( 16 | nn.Linear(2048 * 7 * 7, 4096), 17 | nn.ReLU(True), 18 | nn.Dropout(), 19 | # 取消一层全连接层 20 | # nn.Linear(4096, 4096), 21 | # nn.ReLU(True), 22 | # nn.Dropout(), 23 | # 最后一层修改为1470 即为1470代表一张图的信息(1470=7x7x30) 24 | nn.Linear(4096, 1470), 25 | ) 26 | # model.fc = nn.Linear(2048, 1470) 27 | # self.resnet152_bo=model 28 | # 只修改了线形层 所以只给线形层初始化权重 29 | self._initialize_weights() 30 | 31 | def _initialize_weights(self): 32 | for m in self.modules(): 33 | # 只修改了线形层 所以只给线形层初始化权重 34 | if isinstance(m, nn.Linear): 35 | m.weight.data.normal_(0, 0.01) 36 | m.bias.data.zero_() 37 | def forward(self, x): 38 | x = self.features(x) 39 | x = x.view(x.size(0), -1) 40 | x = self.classifier(x) 41 | # 得到输出,经过sigmoid 归一化到0-1之间 42 | x = F.sigmoid(x) 43 | # 再改变形状,返回(xxx,7,7,30) xxx代表几张照片,(7,7,30)代表一张照片的信息 44 | x = x.view(-1,7,7,30) 45 | return x 46 | 47 | 48 | 49 | def test(): 50 | ''' 51 | 测试使用 52 | ''' 53 | import torch 54 | from torch.autograd import Variable 55 | 56 | model = resnet152_bo(resnet152(pretrained=True)) 57 | img = torch.rand(2,3,224,224) 58 | img = Variable(img) 59 | output = model(img) 60 | output = output.view(-1, 7, 7, 30) 61 | print(output.size()) 62 | 63 | if __name__ == '__main__': 64 | test() -------------------------------------------------------------------------------- /Yolov1_pytorch/readme.md: -------------------------------------------------------------------------------- 1 | - 环境: 2 | 3 | | python版本 | pytorch版本 | 4 | | ----------- | ---------- | 5 | | 3.5 | 0.3.0 | 6 | 7 | - 说明: 8 | 9 | 1、基本实现参考: [pytorchYOLOv1master][1] 10 | 11 | 2、仅重构代码,并未提升效果 12 | 13 | 3、 测试时,在VOC2012训练集上loss为0.1左右,在VOC2007测试集上loss基本降低 很少。怀疑过拟合。 14 | 15 | 4、运行main.py请确保启动可视化工具visdom 16 | 17 | - 当前工作: 18 | 19 | 1、~~训练完,可视化测试图像和loss等~~ 20 | 21 | 2、~~将训练好的模型放到这里~~ 22 | 23 | 3、~~准备添加注释,以便理解~~ 24 | 25 | 4、尝试优化网络模型,提高mAP 26 | 27 | - 改进方向: 28 | 29 | 1、更改学习率 30 | 31 | 2、~~调整网络结构(参考版本为vgg16,试试残差)~~ 32 | 33 | 3、~~更改优化器从SGD到Adam~~ 34 | 35 | 36 | - 下载网络模型: 37 | 38 | 1、在VOC2007测试集上验证的效果最好的一个网络模型([百度网盘](https://pan.baidu.com/s/1HCO24KGqjJw01raiCB7f2A)) 39 | 40 | 2、保存的最后一个网络模型 ([百度网盘](https://pan.baidu.com/s/1HKY7qGgK7i3Fv_ks9ldflw)) 41 | 42 | - 效果: 43 | 44 | 验证集:voc2012训练集 45 | 46 | 模型:在VOC2007测试集上验证的效果最好的一个网络模型 47 | 48 | 49 |
50 | 图片说明 51 |
52 | 53 | 54 | ### loss趋势 55 | 56 |
57 | 58 | | epoch | VOC2007测试集的loss | 59 | |-------|---------------------| 60 | | 0 | 5.806424896178707 | 61 | | 1 | 5.855176733386132 | 62 | | 2 | 5.9203009036279495 | 63 | | ... | ... | 64 | | 118 | 5.187265388427242 | 65 | | 119 | 5.190768877152474 | 66 | 67 |
68 | 69 | 70 |
71 | 图片说明 72 |
73 |

注:蓝线为在VOC2012训练集上的loss,黄线为 VOC2007测试集的loss

74 | 75 | 76 | ### 网络表现 77 | 78 | - 最后保存的模型 在VOC2007验证集的表现 79 |
80 | 图片说明图片说明 81 |
82 |
83 | 图片说明图片说明 84 |
85 |
86 | 图片说明图片说明 87 |
88 | 89 | 90 | - 最后保存的模型 在VOC2012训练集的表现(可能过拟合,在训练集表现优秀) 91 | 92 |
93 | 图片说明图片说明 94 |
95 |
96 | 图片说明 97 | 图片说明 98 |
99 |
100 | 图片说明图片说明 101 |
102 | 103 | 104 | # 以下为本人新增内容 105 | 106 | - 新增内容: 107 | 108 | 新增Resnet152网络来替换原作者的VGG16。(代码包括main_resnet.py 、models/resnet.py ) 109 | 110 | - 实现细节: 111 | 112 | 仅仅将Resnet152网络的最后一层全连接层的输出改为1470,再改变形状为7x7x30。 113 | 114 | - 效果: 115 | 116 | 极其不好,猜测原因为Resnet152网络是用来分类,将其直接用于回归导致效果不好。 117 | 118 | - loss图: 119 | 120 | | Resnet152+Ada优化器 | Resnet152+SGD优化器 | 121 | | --- | ------------------ | 122 | | 见左下 | 见右下 | 123 | 124 |
125 | 图片说明图片说明 126 |
127 | 128 | - 优化建议: 129 | 130 | 1、~~使用Resnet50,网络最后处理参考原文VGG16处理试试~~ 131 | 132 | 2、Resnet50去掉最后一层,加入类似VGG16的两层全连接层+Drop等 。loss仍降不下去,该项目以后不再提供提升效果的内容。 133 | 134 | 135 | - 特别鸣谢: 136 | 137 | xiongzihua:[原作者][2] 138 | 139 | 朱辉师兄:抽空帮我理清思路,讲清代码 140 | 141 | 142 | 143 | 144 | [1]: https://github.com/xiongzihua/pytorch-YOLO-v1 145 | [2]: https://github.com/xiongzihua/pytorch-YOLO-v1 -------------------------------------------------------------------------------- /Yolov1_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/__init__.py -------------------------------------------------------------------------------- /Yolov1_pytorch/utils/testImgs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/testImgs/__init__.py -------------------------------------------------------------------------------- /Yolov1_pytorch/utils/visualize.py: -------------------------------------------------------------------------------- 1 | import visdom 2 | import numpy as np 3 | 4 | class Visualizer(): 5 | def __init__(self, env='main', **kwargs): 6 | ''' 7 | **kwargs, dict option 8 | ''' 9 | self.vis = visdom.Visdom(env=env) 10 | self.index = {} # x, dict 11 | self.log_text = '' 12 | self.env = env 13 | 14 | def plot_train_val(self, loss_train=None, loss_val=None): 15 | ''' 16 | plot val loss and train loss in one figure 17 | ''' 18 | x = self.index.get('train_val', 0) 19 | 20 | if x == 0: 21 | loss = loss_train if loss_train else loss_val 22 | win_y = np.column_stack((loss, loss)) 23 | win_x = np.column_stack((x, x)) 24 | self.win = self.vis.line(Y=win_y, X=win_x, 25 | env=self.env) 26 | # opts=dict( 27 | # title='train_test_loss', 28 | # )) 29 | self.index['train_val'] = x + 1 30 | return 31 | 32 | if loss_train != None: 33 | self.vis.line(Y=np.array([loss_train]), X=np.array([x]), 34 | win=self.win, 35 | name='1', 36 | update='append', 37 | env=self.env) 38 | self.index['train_val'] = x + 5 39 | else: 40 | self.vis.line(Y=np.array([loss_val]), X=np.array([x]), 41 | win=self.win, 42 | name='2', 43 | update='append', 44 | env=self.env) 45 | 46 | def plot_many(self, d): 47 | ''' 48 | d: dict {name, value} 49 | ''' 50 | for k, v in d.iteritems(): 51 | self.plot(k, v) 52 | 53 | def plot(self, name, y, **kwargs): 54 | ''' 55 | plot('loss', 1.00) 56 | ''' 57 | x = self.index.get(name, 0) # if none, return 0 58 | self.vis.line(Y=np.array([y]), X=np.array([x]), 59 | win=name, 60 | opts=dict(title=name), 61 | update=None if x== 0 else 'append', 62 | **kwargs) 63 | self.index[name] = x + 1 64 | 65 | def log(self, info, win='log_text'): 66 | ''' 67 | show text in box not write into txt? 68 | ''' 69 | pass 70 | -------------------------------------------------------------------------------- /Yolov3_pytorch/checkpoints/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/checkpoints/.gitkeep -------------------------------------------------------------------------------- /Yolov3_pytorch/checkpoints/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget https://pjreddie.com/media/files/yolov3.weights 4 | -------------------------------------------------------------------------------- /Yolov3_pytorch/config/coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | train=data/coco/trainvalno5k.txt 3 | valid=data/coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /Yolov3_pytorch/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /Yolov3_pytorch/data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 4 | 5 | # Clone COCO API 6 | git clone https://github.com/pdollar/coco 7 | cd coco 8 | 9 | mkdir images 10 | cd images 11 | 12 | # 数据集自己已上传 13 | ## Download Images 14 | #wget -c https://pjreddie.com/media/files/train2014.zip 15 | #wget -c https://pjreddie.com/media/files/val2014.zip 16 | 17 | ## Unzip 18 | #unzip -q train2014.zip 19 | #unzip -q val2014.zip 20 | 21 | cd .. 22 | 23 | # Download COCO Metadata 24 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 25 | wget -c https://pjreddie.com/media/files/coco/5k.part 26 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 27 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 28 | tar xzf labels.tgz 29 | unzip -q instances_train-val2014.zip 30 | 31 | # Set Up Image Lists 32 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 33 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 34 | -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/dog.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/eagle.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/giraffe.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/herd_of_horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/herd_of_horses.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img1.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img2.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img3.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img4.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/messi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/messi.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/data/samples/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/person.jpg -------------------------------------------------------------------------------- /Yolov3_pytorch/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | import torch 7 | 8 | from torch.utils.data import Dataset 9 | from PIL import Image 10 | import torchvision.transforms as transforms 11 | 12 | import matplotlib.pyplot as plt 13 | import matplotlib.patches as patches 14 | 15 | from skimage.transform import resize 16 | 17 | import sys 18 | 19 | 20 | import glob 21 | import random 22 | import os 23 | import numpy as np 24 | 25 | import torch 26 | 27 | from torch.utils.data import Dataset 28 | from PIL import Image 29 | import torchvision.transforms as transforms 30 | 31 | import matplotlib.pyplot as plt 32 | import matplotlib.patches as patches 33 | 34 | from skimage.transform import resize 35 | 36 | import sys 37 | 38 | class ImageFolder(Dataset): 39 | ''' 40 | 仅detect.py用到,用于测试例子 41 | ''' 42 | def __init__(self, folder_path, img_size=416): 43 | self.files = sorted(glob.glob('%s/*.*' % folder_path)) 44 | self.img_shape = (img_size, img_size) 45 | 46 | def __getitem__(self, index): 47 | img_path = self.files[index % len(self.files)] 48 | # Extract image 49 | img = np.array(Image.open(img_path)) 50 | h, w, _ = img.shape 51 | dim_diff = np.abs(h - w) 52 | # Upper (left) and lower (right) padding 53 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 54 | # Determine padding 55 | pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) 56 | # Add padding 57 | input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255. 58 | # Resize and normalize 59 | input_img = resize(input_img, (*self.img_shape, 3), mode='reflect') 60 | # Channels-first 61 | input_img = np.transpose(input_img, (2, 0, 1)) 62 | # As pytorch tensor 63 | input_img = torch.from_numpy(input_img).float() 64 | 65 | # 返回图片路径、经过处理后的图像tensor 66 | return img_path, input_img 67 | 68 | def __len__(self): 69 | return len(self.files) 70 | 71 | 72 | class ListDataset(Dataset): 73 | ''' 74 | 数据集加载器 75 | ''' 76 | def __init__(self, list_path, img_size=416): 77 | # 读取 数据集中分配为训练集的txt文本,以list形式保存 78 | with open(list_path, 'r') as file: 79 | self.img_files = file.readlines() 80 | # 读取 数据集中分配为训练集的txt文本(即标签,coco数据集以txt保存 框真值),以list形式保存 81 | self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files] 82 | # 输入训练图像大小 83 | self.img_shape = (img_size, img_size) 84 | self.max_objects = 50 # 设定一张图像最多真实存在50个物体(封装 图像真值框时使用到) 85 | 86 | def __getitem__(self, index): 87 | 88 | ''' 89 | 训练时获取单张图像及真值 90 | ''' 91 | 92 | # 读取图像为tensor 93 | img_path = self.img_files[index % len(self.img_files)].rstrip() 94 | 95 | copy_img=Image.open(img_path).copy() 96 | img = np.array(copy_img) 97 | 98 | # Handles images with less than three channels 99 | # 处理 图像的通道数不为3 时(即该图像损坏),则 读取下一张图片 100 | while len(img.shape) != 3: 101 | index += 1 102 | img_path = self.img_files[index % len(self.img_files)].rstrip() 103 | img = np.array(Image.open(img_path)) 104 | 105 | # 对图像tensor进行处理(数据增强、规范化) 106 | 107 | # w,h按照较大值填充成正方形 108 | h, w, _ = img.shape 109 | # np.abs 绝对值 110 | dim_diff = np.abs(h - w) 111 | # Upper (left) and lower (right) padding 112 | # 上(左)和下(右)填充 113 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 114 | # Determine padding 确定填充 115 | pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) 116 | # Add padding 添加填充 117 | input_img = np.pad(img, pad, 'constant', constant_values=128) / 255. 118 | 119 | # 填充成正方形后 resize到 指定形状(一般为416x416) 120 | padded_h, padded_w, _ = input_img.shape 121 | # Resize and normalize resize并规范化 122 | input_img = resize(input_img, (*self.img_shape, 3), mode='reflect') 123 | 124 | # Channels-first 转换通道 125 | input_img = np.transpose(input_img, (2, 0, 1)) 126 | # As pytorch tensor 转为pytorch tensor 127 | input_img = torch.from_numpy(input_img).float() 128 | 129 | #--------- 130 | # 真值标签处理 131 | #--------- 132 | label_path = self.label_files[index % len(self.img_files)].rstrip() 133 | labels = None 134 | if os.path.exists(label_path): 135 | # eg:[8,5] 8:该图像有8个bbox 5: 0代表类别对应序号 1~4代表坐标(值在0~1之间) 136 | labels = np.loadtxt(label_path).reshape(-1, 5) 137 | # Extract coordinates for unpadded + unscaled image 138 | # 提取未填充+未缩放图像的坐标 139 | x1 = w * (labels[:, 1] - labels[:, 3]/2) 140 | y1 = h * (labels[:, 2] - labels[:, 4]/2) 141 | x2 = w * (labels[:, 1] + labels[:, 3]/2) 142 | y2 = h * (labels[:, 2] + labels[:, 4]/2) 143 | # Adjust for added padding 144 | # 添加填充,以便于 图像调整一致 145 | x1 += pad[1][0] 146 | y1 += pad[0][0] 147 | x2 += pad[1][0] 148 | y2 += pad[0][0] 149 | # Calculate ratios from coordinates 150 | # 从坐标计算比率 151 | labels[:, 1] = ((x1 + x2) / 2) / padded_w 152 | labels[:, 2] = ((y1 + y2) / 2) / padded_h 153 | labels[:, 3] *= w / padded_w 154 | labels[:, 4] *= h / padded_h 155 | # Fill matrix 156 | # 填充矩阵(将 txt里的内容,即每张图像的所有物体填入,最多添加50个物体) 157 | filled_labels = np.zeros((self.max_objects, 5)) 158 | if labels is not None: 159 | filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects] 160 | filled_labels = torch.from_numpy(filled_labels) 161 | # 返回 图像路径、处理后的图像tensor、坐标被归一化后的真值框filled_labels[50,5] 值在0-1之间 162 | return img_path, input_img, filled_labels 163 | 164 | def __len__(self): 165 | return len(self.img_files) 166 | -------------------------------------------------------------------------------- /Yolov3_pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # 重构YOLO v3代码实现 2 | 3 | ---------- 4 | 5 | 该仓库基于[eriklindernoren](https://github.com/eriklindernoren)的[PyTorch-YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3)进行的,非常感谢他无私的奉献。 6 | 7 | 8 | - [原地址](https://github.com/eriklindernoren/PyTorch-YOLOv3) 9 | - [原地址的加注释版本](https://github.com/bobo0810/PyTorch-YOLOv3-master) 10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/Yolov3_pytorch) 强烈推荐!(即本仓库) 11 | 12 | ---------- 13 | 14 | # 目前支持: 15 | 16 | - 数据集:COCO 17 | - 网络:Darknet-52 18 | 19 | # 相比原作者的特点: 20 | 21 | - 所有参数均可在config.py中设置 22 | - 重新整理结构,并加入大量代码注释 23 | - 加入visdom可视化 24 | 25 | 26 | ---------- 27 | 28 | # 一般项目结构 29 | 30 | 1、定义网络 31 | 32 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/16409622.jpg) 33 | 34 | 2、封装数据集 35 | 36 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/38894621.jpg) 37 | 38 | 3、工具类 39 | 40 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/98583532.jpg) 41 | 42 | 4、主函数 43 | 44 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/32257225.jpg) 45 | 46 | - 环境: 47 | 48 | | python版本 | pytorch版本 | 49 | | ----------- | ---------- | 50 | | 3.5 | 0.4 | 51 | 52 | ---------- 53 | 54 | # Darknet-52网络结构 55 | 56 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/16734558.jpg) 57 | 58 | 以下阅读源码有用: 59 | 60 | hyperparams 61 | 62 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/97781689.jpg) 63 | 64 | module_list 65 | 66 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/10165593.jpg) 67 | 68 | module_defs 69 | 70 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/56737437.jpg) 71 | 72 | 73 | 74 | ---------- 75 | 76 | # 准备数据集: 77 | 下载COCO的数据集 78 | 79 | ``` 80 | $ cd data/ 81 | $ bash get_coco_dataset.sh 82 | ``` 83 | 84 | 数据集结构 85 | ``` 86 | data/coco 87 | │ 88 | └───images 89 | │ │ train2014 90 | │ │ val2014 91 | │ 92 | └───labels 93 | │ │ train2014 94 | │ │ val2014 95 | │ ... 96 | │ ... 97 | 98 | ``` 99 | 100 | ---------- 101 | 102 | # Trian: 103 | 104 | 1、开启Visdom(类似TnsorFlow的tensorboard,可视化工具) 105 | 106 | ``` 107 | # First install Python server and client 108 | pip install visdom 109 | # Start the server 110 | python -m visdom.server 111 | ``` 112 | 113 | 2、开始训练 114 | 115 | 在config.py中设置参数。 116 | 117 | 在main.py中将运行train()。 118 | 119 | ###### 由于原仓库保存、加载模型bug,故不支持保存为 .weight官方格式(二进制且仅保存conv和bn层参数,其余参数读取cfg文件即可),训练保存模型为.pt模型(保存整个模型)。 120 | 121 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/68971633.jpg) 122 | 123 | ---------- 124 | 125 | # Test: 126 | 127 | 作用:测试,计算mAP 128 | 129 | 1、下载官方的预训练模型 130 | 131 | ``` 132 | $ cd checkpoints/ 133 | $ bash download_weights.sh 134 | ``` 135 | 136 | 2、在config.py中load_model_path配置预训练模型的路径 137 | 138 | ###### 支持官方模型 .weight 和 自训练模型 .pt 139 | 140 | 3、 在config.py中设置参数。 141 | 142 | 在main.py中运行test()。 143 | 144 | 145 | | Model | mAP (min. 50 IoU) | 146 | |---------------------|-------------------| 147 | | YOLOv3 (paper) | 57.9 | 148 | | YOLOv3 (官方) | 58.38 | 149 | | YOLOv3 (this impl.) | 58.2 | 150 | 151 | 152 | 153 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/77791130.jpg) 154 | 155 | ---------- 156 | 157 | # Predict: 158 | 159 | 功能:可视化预测图片 160 | 161 | 1、在config.py中load_model_path配置预训练模型的路径 162 | ###### 支持官方模型 .weight 和 自训练模型 .pt 163 | 2、在config.py中设置参数。 164 | 165 | 在main.py中将运行detect()。 166 | 167 | 168 | 官方模型效果: 169 | 170 |
171 | 图片说明 172 | 图片说明 173 |
174 |
175 | 图片说明图片说明 176 |
177 | 178 | 179 | 180 | 181 | 182 | ---------- 183 | 184 | ## 参考文献: 185 | 186 | 推荐配合阅读,效果更佳~ 187 | 188 | - [从0到1实现YOLOv3(part one)](https://blog.csdn.net/qq_25737169/article/details/80530579) 189 | 190 | - [从0到1实现YOLO v3(part two)](https://blog.csdn.net/qq_25737169/article/details/80634360) 191 | 192 | - [yolo v3 译文](https://zhuanlan.zhihu.com/p/34945787) 193 | 194 | - [YOLO v3网络结构分析](https://blog.csdn.net/qq_37541097/article/details/81214953) 195 | 196 | ---------- 197 | 198 | # 关于作者 199 | 200 | - 原作者 [eriklindernoren](https://github.com/eriklindernoren) 201 | 202 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810) 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /Yolov3_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/utils/__init__.py -------------------------------------------------------------------------------- /Yolov3_pytorch/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # power by Mr.Li 3 | # 设置默认参数 4 | import os.path 5 | class DefaultConfig_train(): 6 | epochs=30 # 训练轮数 7 | image_folder='data/samples' #数据集地址 8 | batch_size=16 #batch大小 9 | model_config_path='config/yolov3.cfg' # 模型网络结构 10 | data_config_path='config/coco.data' # 配置数据集的使用情况 11 | class_path='data/coco.names' #coco数据集类别标签 12 | conf_thres=0.8 # 物体置信度阈值 13 | nms_thres= 0.4 # iou for nms的阈值 14 | n_cpu=0 # 批生成期间要使用的cpu线程数 15 | img_size=416 # 输入图像尺寸的大小 16 | use_cuda=True # 是否使用GPU 17 | visdom=True # 是否使用visdom来可视化loss 18 | print_freq = 8 # 训练时,每N个batch显示 19 | lr_decay = 0.1 # 1e-3 -> 1e-4 20 | 21 | checkpoint_interval=1 # 每隔几个模型保存一次 22 | checkpoint_dir='./checkpoints' # 保存生成模型的路径 23 | 24 | load_model_path=None # 加载预训练的模型的路径,为None代表不加载 25 | # load_model_path=checkpoint_dir+'/latestbobo.pt' # 预训练权重 (仅.pt) 26 | 27 | class DefaultConfig_test(): 28 | epochs=200 #number of epochs 29 | batch_size=16 #size of each image batch 30 | model_config_path='config/yolov3.cfg' #'path to model config file' 31 | data_config_path='config/coco.data' #'path to data config file' 32 | 33 | checkpoint_dir = './checkpoints' # 保存生成模型的路径 34 | # load_model_path=None # 加载预训练的模型的路径,为None代表不加载 35 | load_model_path=checkpoint_dir+'/8yolov3.pt' # 预训练权重 (.weights或者.pt) 36 | 37 | class_path='data/coco.names' #'path to class label file' 38 | iou_thres=0.5 #'iou threshold required to qualify as detected' 39 | conf_thres=0.5 #'object confidence threshold' 40 | nms_thres=0.45 #'iou thresshold for non-maximum suppression' 41 | n_cpu=0 #'number of cpu threads to use during batch generation' 42 | img_size=416 #size of each image dimension 43 | use_cuda=True #'whether to use cuda if available' 44 | 45 | 46 | class DefaultConfig_detect(): 47 | image_folder= 'data/samples' #path to dataset 48 | config_path='config/yolov3.cfg' #path to model config file 49 | 50 | 51 | checkpoint_dir='./checkpoints' # 保存生成模型的路径 52 | # load_model_path=None # 加载预训练的模型的路径,为None代表不加载 53 | load_model_path = checkpoint_dir + '/yolov3.weights' # 预训练权重 (.weights或者.pt) 54 | 55 | 56 | class_path='data/coco.names' #path to class label file 57 | conf_thres=0.8 #object confidence threshold 58 | nms_thres=0.4 #iou thresshold for non-maximum suppression 59 | batch_size=1 #size of the batches 60 | n_cpu=8 #number of cpu threads to use during batch generation 61 | img_size=416 #size of each image dimension 62 | use_cuda=True #whether to use cuda if available 63 | 64 | 65 | 66 | #初始化该类的一个对象 67 | opt_train=DefaultConfig_train() 68 | opt_test=DefaultConfig_test() 69 | opt_detect=DefaultConfig_detect() 70 | -------------------------------------------------------------------------------- /Yolov3_pytorch/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def parse_model_config(path): 4 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 5 | ''' 6 | 解析yolo-v3层配置文件并返回模块定义 7 | 返回结果 为 每部分写为一行 8 | path: yolov3.cfg的路径 9 | ''' 10 | file = open(path, 'r') 11 | # 按行读取,存为list 12 | lines = file.read().split('\n') 13 | # 过滤掉 "#"开头的内容,即注释信息 14 | lines = [x for x in lines if x and not x.startswith('#')] 15 | # lstrip去掉左边的(头部),rstrip去掉右边的(尾部) 默认删除字符串头和尾的空白字符(包括\n,\r,\t这些) 16 | lines = [x.rstrip().lstrip() for x in lines] # 去除边缘空白,即去掉左右两侧的空格等字符 17 | module_defs = [] 18 | for line in lines: 19 | # 检查字符串是否是以指定子字符串 [ 开头,如果是则返回 True,否则返回 False 20 | if line.startswith('['): # This marks the start of a new block 标志着一个新区块的开始 21 | module_defs.append({}) 22 | module_defs[-1]['type'] = line[1:-1].rstrip() 23 | if module_defs[-1]['type'] == 'convolutional': 24 | module_defs[-1]['batch_normalize'] = 0 25 | else: 26 | key, value = line.split("=") 27 | value = value.strip() 28 | module_defs[-1][key.rstrip()] = value.strip() 29 | 30 | return module_defs 31 | 32 | def parse_data_config(path): 33 | """Parses the dataloader configuration file""" 34 | ''' 35 | 解析dataloader配置文件 36 | ''' 37 | options = dict() 38 | # 默认GPU有4个 39 | options['gpus'] = '0,1,2,3' 40 | # 数据集加载器加载数据时使用线程数 41 | options['num_workers'] = '10' 42 | with open(path, 'r') as fp: 43 | lines = fp.readlines() 44 | for line in lines: 45 | line = line.strip() 46 | if line == '' or line.startswith('#'): 47 | continue 48 | key, value = line.split('=') 49 | options[key.strip()] = value.strip() 50 | return options 51 | -------------------------------------------------------------------------------- /Yolov3_pytorch/utils/visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # power by Mr.Li 4 | import visdom 5 | import time 6 | import numpy as np 7 | import torch 8 | class Visualizer(object): 9 | ''' 10 | 封装了visdom的基本操作,但是你仍然可以通过`self.vis.function` 11 | 调用原生的visdom接口 12 | ''' 13 | def __init__(self, env='default', **kwargs): 14 | self.vis = visdom.Visdom(env=env, **kwargs) 15 | # 画的第几个数,相当于横座标 16 | # 保存(’loss',23) 即loss的第23个点 17 | self.index = {} 18 | self.log_text = '' 19 | def reinit(self,env='default',**kwargs): 20 | ''' 21 | 修改visdom的配置 重新初始化 22 | ''' 23 | self.vis = visdom.Visdom(env=env,**kwargs) 24 | return self 25 | def plot_many(self, d): 26 | ''' 27 | 一次plot多个损失图形 28 | @params d: dict (name,value) i.e. ('loss',0.11) 29 | ''' 30 | for k, v in d.items(): 31 | self.plot(k, v) 32 | def img_many(self, d): 33 | ''' 34 | 一次画多个图像 35 | ''' 36 | for k, v in d.items(): 37 | self.img(k, v) 38 | def plot(self, name, y,**kwargs): 39 | ''' 40 | self.plot('loss',1.00) 41 | ''' 42 | #得到下标序号 43 | x = self.index.get(name, 0) 44 | self.vis.line(Y=np.array([y]), X=np.array([x]), 45 | win=name,#窗口名 46 | opts=dict(title=name), 47 | update=None if x == 0 else 'append', #按照append的画图形 48 | **kwargs 49 | ) 50 | #下标累加1 51 | self.index[name] = x + 1 52 | def img(self, name, img_,**kwargs): 53 | ''' 54 | self.img('input_img',t.Tensor(64,64)) 55 | self.img('input_imgs',t.Tensor(3,64,64)) 56 | self.img('input_imgs',t.Tensor(100,1,64,64)) 57 | self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10) 58 | 59 | !!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!! 60 | ''' 61 | self.vis.images(img_.cpu().numpy(), 62 | win=name, 63 | opts=dict(title=name), 64 | **kwargs 65 | ) 66 | def log(self,info,win='log_text'): 67 | ''' 68 | self.log({'loss':1,'lr':0.0001}) 69 | 打印日志 70 | ''' 71 | 72 | self.log_text += ('[{time}] {info}
'.format( 73 | time=time.strftime('%m%d_%H%M%S'),\ 74 | info=info)) 75 | self.vis.text(self.log_text,win) 76 | def __getattr__(self, name): 77 | return getattr(self.vis, name) 78 | 79 | def create_vis_plot(self,_xlabel, _ylabel, _title, _legend): 80 | viz = visdom.Visdom() 81 | ''' 82 | 新增可视化图形 83 | ''' 84 | return viz.line( 85 | X=torch.zeros((1,)).cpu(), 86 | Y=torch.zeros((1, 3)).cpu(), 87 | opts=dict( 88 | xlabel=_xlabel, 89 | ylabel=_ylabel, 90 | title=_title, 91 | legend=_legend 92 | ) 93 | ) 94 | 95 | --------------------------------------------------------------------------------