├── .github
    └── stale.yml
├── .gitignore
├── BBN
    ├── bbn_dataset.py
    ├── bbn_model.py
    ├── dataset.txt
    ├── readme.md
    └── train.py
├── CAM_pytorch
    ├── __init__.py
    ├── checkpoint
    │   └── .gitkeep
    ├── data
    │   ├── MyDataSet.py
    │   └── __init__.py
    ├── main.py
    ├── models
    │   ├── VGG_CAM.py
    │   └── __init__.py
    ├── readme.md
    └── utils
    │   ├── __init__.py
    │   ├── config.py
    │   └── visualize.py
├── CUDA_Python
    ├── CUDA-Python证书.pdf
    ├── readme.md
    ├── 课程1
    │   ├── .ipynb_checkpoints
    │   │   └── Introduction to CUDA Python with Numba-checkpoint.ipynb
    │   ├── Introduction to CUDA Python with Numba.ipynb
    │   ├── images
    │   │   ├── DLI Header.png
    │   │   ├── numba_flowchart.png
    │   │   └── run_the_assessment.png
    │   ├── section1.tar.gz
    │   └── solutions
    │   │   ├── make_pulses_solution.py
    │   │   ├── monte_carlo_pi_solution.py
    │   │   └── zero_suppress_solution.py
    ├── 课程2
    │   ├── .ipynb_checkpoints
    │   │   └── Custom CUDA Kernels in Python with Numba-checkpoint.ipynb
    │   ├── Custom CUDA Kernels in Python with Numba.ipynb
    │   ├── assessment
    │   │   └── histogram.py
    │   ├── debug
    │   │   ├── ex1.py
    │   │   ├── ex1a.py
    │   │   ├── ex2.py
    │   │   ├── ex3.py
    │   │   └── ex3a.py
    │   ├── images
    │   │   ├── DLI Header.png
    │   │   └── run_the_assessment.png
    │   ├── img
    │   │   ├── numba_flowchart.png
    │   │   ├── sensor_humidity.png
    │   │   └── sensor_temp.png
    │   ├── section2.tar.gz
    │   └── solutions
    │   │   ├── hypot_stride_solution.py
    │   │   ├── monte_carlo_pi_solution.py
    │   │   └── square_device_solution.py
    ├── 课程3
    │   ├── .ipynb_checkpoints
    │   │   └── Effective Memory Use-checkpoint.ipynb
    │   ├── Effective Memory Use.ipynb
    │   ├── Multidimensional Grids and Shared Memory for CUDA Python with Numba.ipynb
    │   ├── assessment
    │   │   └── definition.py
    │   ├── images
    │   │   ├── DLI Header.png
    │   │   ├── mm_image.png
    │   │   ├── run_assess_task.png
    │   │   └── run_the_assessment.png
    │   └── solutions
    │   │   ├── add_matrix_solution.py
    │   │   ├── add_matrix_stride_solution.py
    │   │   ├── col_sums_solution.py
    │   │   ├── matrix_add_solution.py
    │   │   ├── matrix_multiply_solution.py
    │   │   ├── matrix_multiply_stride_solution.py
    │   │   ├── monte_carlo_pi_solution.py
    │   │   └── tile_transpose_solution.py
    └── 课程笔记.pdf
├── DataHub
    └── readme.md
├── FPN_pytorch
    ├── README.md
    ├── fpn.py
    └── retina_fpn.py
├── FasterRcnn_pytorch
    ├── LICENSE
    ├── README.MD
    ├── __pycache__
    │   └── trainer.cpython-35.pyc
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── dataset.cpython-35.pyc
    │   │   ├── util.cpython-35.pyc
    │   │   └── voc_dataset.cpython-35.pyc
    │   ├── dataset.py
    │   ├── util.py
    │   └── voc_dataset.py
    ├── demo.ipynb
    ├── misc
    │   ├── convert_caffe_pretrain.py
    │   ├── demo.jpg
    │   └── train_fast.py
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── faster_rcnn.cpython-35.pyc
    │   │   ├── faster_rcnn_vgg16.cpython-35.pyc
    │   │   ├── region_proposal_network.cpython-35.pyc
    │   │   └── roi_module.cpython-35.pyc
    │   ├── faster_rcnn.py
    │   ├── faster_rcnn_vgg16.py
    │   ├── region_proposal_network.py
    │   ├── roi_module.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-35.pyc
    │   │       ├── bbox_tools.cpython-35.pyc
    │   │       ├── creator_tool.cpython-35.pyc
    │   │       └── roi_cupy.cpython-35.pyc
    │   │   ├── bbox_tools.py
    │   │   ├── creator_tool.py
    │   │   ├── nms
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-35.pyc
    │   │       │   ├── _nms_gpu_post_py.cpython-35.pyc
    │   │       │   └── non_maximum_suppression.cpython-35.pyc
    │   │       ├── _nms_gpu_post.c
    │   │       ├── _nms_gpu_post.pyx
    │   │       ├── _nms_gpu_post_py.py
    │   │       ├── build.py
    │   │       ├── build
    │   │       │   ├── lib.linux-x86_64-3.5
    │   │       │   │   └── _nms_gpu_post.cpython-35m-x86_64-linux-gnu.so
    │   │       │   └── temp.linux-x86_64-3.5
    │   │       │   │   └── _nms_gpu_post.o
    │   │       └── non_maximum_suppression.py
    │   │   └── roi_cupy.py
    ├── requirements.txt
    ├── train.py
    ├── trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       ├── array_tool.cpython-35.pyc
    │       ├── config.cpython-35.pyc
    │       ├── eval_tool.cpython-35.pyc
    │       └── vis_tool.cpython-35.pyc
    │   ├── array_tool.py
    │   ├── config.py
    │   ├── eval_tool.py
    │   └── vis_tool.py
├── GhostNet
    ├── G-Ghost.png
    ├── g_ghost_regnet.py
    └── readme.md
├── LICENSE
├── RepVGG
    ├── readme.md
    ├── repvgg.png
    └── repvgg.py
├── SSD_pytorch
    ├── checkpoint
    │   └── .gitkeep
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   └── voc0712.cpython-35.pyc
    │   └── voc0712.py
    ├── main.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── box_utils.cpython-35.pyc
    │   │   └── ssd.cpython-35.pyc
    │   ├── box_utils.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── detection.cpython-35.pyc
    │   │   │   └── prior_box.cpython-35.pyc
    │   │   ├── detection.py
    │   │   └── prior_box.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── init_weights.cpython-35.pyc
    │   │   │   ├── l2norm.cpython-35.pyc
    │   │   │   └── multibox_loss.cpython-35.pyc
    │   │   ├── init_weights.py
    │   │   ├── l2norm.py
    │   │   └── multibox_loss.py
    │   └── ssd.py
    ├── readme.md
    ├── temp
    │   └── test.png
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       ├── augmentations.cpython-35.pyc
    │       ├── config.cpython-35.pyc
    │       ├── eval_untils.cpython-35.pyc
    │       ├── timer.cpython-35.pyc
    │       └── visualize.cpython-35.pyc
    │   ├── augmentations.py
    │   ├── config.py
    │   ├── eval_untils.py
    │   ├── timer.py
    │   └── visualize.py
├── UNet_pytorch
    ├── dice_loss.py
    ├── eval.py
    ├── predict.py
    ├── readme.md
    ├── submit.py
    ├── train.py
    ├── unet
    │   ├── __init__.py
    │   ├── unet_model.py
    │   └── unet_parts.py
    └── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── crf.py
    │   ├── data_vis.py
    │   ├── load.py
    │   └── utils.py
├── Yolov1_pytorch
    ├── checkpoint
    │   └── .gitkeep
    ├── config.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── voc2007test.txt
    │   ├── voc2012train.txt
    │   └── xml_2_txt.py
    ├── main.py
    ├── main_resnet.py
    ├── models
    │   ├── __init__.py
    │   ├── net.py
    │   └── resnet.py
    ├── readme.md
    └── utils
    │   ├── __init__.py
    │   ├── predictUtils.py
    │   ├── testImgs
    │       └── __init__.py
    │   ├── visualize.py
    │   └── yoloLoss.py
├── Yolov3_pytorch
    ├── checkpoints
    │   ├── .gitkeep
    │   └── download_weights.sh
    ├── config
    │   ├── coco.data
    │   └── yolov3.cfg
    ├── data
    │   ├── coco.names
    │   ├── get_coco_dataset.sh
    │   └── samples
    │   │   ├── dog.jpg
    │   │   ├── eagle.jpg
    │   │   ├── giraffe.jpg
    │   │   ├── herd_of_horses.jpg
    │   │   ├── img1.jpg
    │   │   ├── img2.jpg
    │   │   ├── img3.jpg
    │   │   ├── img4.jpg
    │   │   ├── messi.jpg
    │   │   └── person.jpg
    ├── datasets
    │   └── datasets.py
    ├── main.py
    ├── models
    │   └── models.py
    ├── readme.md
    └── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── parse_config.py
    │   ├── utils.py
    │   └── visualize.py
└── readme.md


/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale   在一个问题变得陈旧之前不活跃的天数 
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed  在一个陈旧的问题被关闭之前，没有活动的天数
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale 这些标签的问题永远不会被认为是过时的
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale 当标记过期问题时使用的标签
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable 当把一个问题标记为过时时发表评论。设置为' false '禁用
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.(由于长期不活动，机器人自动关闭此问题，如果需要欢迎提问)
16 | # Comment to post when closing a stale issue. Set to `false` to disable 在关闭过期问题时发表评论。设置为' false '禁用
17 | closeComment: false


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .DS_Store
3 | .idea
4 | 


--------------------------------------------------------------------------------
/BBN/bbn_dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import torch.utils.data as data
  4 | import torch
  5 | import random
  6 | import glob
  7 | from tqdm import tqdm
  8 | from timm.data.transforms_factory import create_transform as timm_transform
  9 | from PIL import Image
 10 | import torch
 11 | import cv2
 12 | import os
 13 | import numpy as np
 14 | import torchvision
 15 | from torchvision.transforms import transforms
 16 | 
 17 | def Process(img_path, img_size, use_augment):
 18 |     """
 19 |     timm默认预处理
 20 |     """
 21 |     # 读取图像
 22 |     assert os.path.exists(img_path), f"{img_path} 图像不存在"
 23 |     img = cv2.imread(img_path, cv2.IMREAD_COLOR)  # BGR
 24 |     img = Image.fromarray(img)
 25 |     if use_augment:
 26 |         # 增广：Random(缩放、裁剪、翻转、色彩...)
 27 |         img_trans = timm_transform(
 28 |             img_size,
 29 |             is_training=True,
 30 |             re_prob=0.5,
 31 |             re_mode="pixel",  # 随机擦除
 32 |             auto_augment=None,  # 自动增广  eg：rand-m9-mstd0.5
 33 |         )
 34 |     else:
 35 |         # 不增广：ReSize256 -> CenterCrop224
 36 |         img_trans = timm_transform(img_size)
 37 |     return img_trans(img)
 38 | 
 39 | 
 40 | class BBN_Dataset(data.Dataset):
 41 |     """数据加载器"""
 42 | 
 43 |     def __init__(self, txt_path, mode, size):
 44 |         """
 45 | 
 46 |         Args:
 47 |             txt_path (str): 数据集路径
 48 |             mode (str): 类型  
 49 |             size (list): 图像尺寸  eg: [224,224]
 50 |         """
 51 |         assert mode in ["train", "val", "test"]
 52 |         self.use_augment = True if mode == "train" else False  # 训练集开启增广
 53 |         self.size = size
 54 | 
 55 |         self.dataset = self.load_txt(txt_path)
 56 |         self.imgs_list = self.dataset[mode]
 57 |         self.all_labels = self.dataset["all_labels"]
 58 | 
 59 |         # 训练集开启BBN
 60 |         if mode == "train":
 61 | 
 62 |             labels_list = [label for _, label in self.imgs_list]  # 所有图片对应的类别列表
 63 |             class_index_dict = dict()  # key类别名对应的索引  values该类的所有图片索引
 64 |             class_nums_list = [0] * len(self.all_labels)  # 每个类对应的图片数
 65 |             for index, label in enumerate(labels_list):
 66 |                 if not int(label) in class_index_dict:
 67 |                     class_index_dict[int(label)] = []
 68 |                 class_index_dict[int(label)].append(index)
 69 | 
 70 |                 class_nums_list[int(label)] += 1
 71 | 
 72 |             # 构建逆向采样分布
 73 |             max_num = max(class_nums_list)  # 类内最大样本数
 74 |             class_weight = [max_num / i for i in class_nums_list]  # 概率占比的倒数 列表
 75 |             sum_weight = sum(class_weight)  # 逆向的概率占比之和
 76 |             self.class_weight, self.sum_weight = class_weight, sum_weight
 77 |             self.class_index_dict = class_index_dict
 78 | 
 79 |     def __getitem__(self, index):
 80 |         img_path, label = self.imgs_list[index]
 81 |         # 图像预处理
 82 |         img = Process(img_path, self.size, self.use_augment)
 83 |         # 训练集 BBN采样
 84 |         if self.use_augment:
 85 |             sample_class = self.sample_class_index_by_weight()  # 类别索引
 86 |             sample_indexes = self.class_index_dict[sample_class]  # 获得该类别的所有图片索引（对应图片顺序）
 87 |             sample_index = random.choice(sample_indexes)  # 随机抽取一个样本
 88 |             img2_path, label2 = self.imgs_list[sample_index]
 89 |             img2 = Process(img2_path, self.size, self.use_augment)
 90 | 
 91 |             return img, label, img_path, img2, label2, img2_path
 92 |         # 验证集/测试集
 93 |         else:
 94 |             return img,label,img_path
 95 | 
 96 |     def __len__(self):
 97 |         return len(self.imgs_list)
 98 | 
 99 |     def load_txt(self, txt_path):
100 |         """单标签分类 加载数据集
101 | 
102 |         Args:
103 |             txt_path (str): 数据集路径
104 | 
105 |             训练格式形如  类型, 类别名, 图片路径
106 |             train, dog,  img1.jpg
107 |             val,   dog,  img2.jpg
108 |             test,  cat,  img3.jpg
109 | 
110 |             返回:
111 |             {
112 |                 "train": [
113 |                     img_1, 0,
114 |                     img_2, 1,
115 |                     ...
116 |                 ],
117 |                 "val":  类似,
118 |                 "test": 类似,
119 |                 "all_labels": ["dog", "cat",...],
120 |             }
121 | 
122 |         """
123 |         # 读取
124 |         f = open(txt_path)
125 |         txt_list = f.readlines()
126 |         txt_list =[ txt.split(",")  for txt in txt_list]
127 |         f.close()
128 |         
129 |         
130 |         # 获取所有类别
131 |         all_labels = [txt_i[1] for txt_i in txt_list]
132 |         all_labels = list(set(all_labels))
133 |         all_labels.sort()
134 | 
135 |         # 构建数据集
136 |         dataset = {
137 |             "train": [],
138 |             "val": [],
139 |             "test": [],
140 |             "all_labels": all_labels,
141 |         }
142 |         for mode, label, img_path in txt_list:
143 |             assert mode in ["train", "val", "test"]
144 |             dataset[mode].append([img_path, all_labels.index(label)])
145 |         return dataset
146 | 
147 |     def sample_class_index_by_weight(self):
148 |         """
149 |         逆向采样
150 |         """
151 |         # rand_number  0~逆向比例之和
152 |         rand_number, now_sum = random.random() * self.sum_weight, 0
153 |         # self.cls_num 类别总数
154 |         # 遍历每个类别   即判断随机数处于哪个类别范围内，即返回该类别索引
155 |         for i in range(len(self.class_weight)):
156 |             now_sum += self.class_weight[i]
157 |             if rand_number <= now_sum:
158 |                 return i  # 采样的类别索引


--------------------------------------------------------------------------------
/BBN/dataset.txt:
--------------------------------------------------------------------------------
  1 | train,dog,CatDog/dog/dog_67.jpg
  2 | train,dog,CatDog/dog/dog_2.jpg
  3 | train,dog,CatDog/dog/dog_52.jpg
  4 | train,dog,CatDog/dog/dog_82.jpg
  5 | train,dog,CatDog/dog/dog_99.jpg
  6 | train,dog,CatDog/dog/dog_85.jpg
  7 | train,dog,CatDog/dog/dog_55.jpg
  8 | train,dog,CatDog/dog/dog_41.jpg
  9 | train,dog,CatDog/dog/dog_8.jpg
 10 | train,dog,CatDog/dog/dog_56.jpg
 11 | train,dog,CatDog/dog/dog_25.jpg
 12 | train,dog,CatDog/dog/dog_92.jpg
 13 | train,dog,CatDog/dog/dog_33.jpg
 14 | train,dog,CatDog/dog/dog_62.jpg
 15 | train,dog,CatDog/dog/dog_51.jpg
 16 | train,dog,CatDog/dog/dog_13.jpg
 17 | train,dog,CatDog/dog/dog_74.jpg
 18 | train,dog,CatDog/dog/dog_24.jpg
 19 | train,dog,CatDog/dog/dog_93.jpg
 20 | train,dog,CatDog/dog/dog_12.jpg
 21 | train,dog,CatDog/dog/dog_5.jpg
 22 | train,dog,CatDog/dog/dog_22.jpg
 23 | train,dog,CatDog/dog/dog_30.jpg
 24 | train,dog,CatDog/dog/dog_28.jpg
 25 | train,dog,CatDog/dog/dog_79.jpg
 26 | train,dog,CatDog/dog/dog_35.jpg
 27 | train,dog,CatDog/dog/dog_23.jpg
 28 | train,dog,CatDog/dog/dog_94.jpg
 29 | train,dog,CatDog/dog/dog_54.jpg
 30 | train,dog,CatDog/dog/dog_40.jpg
 31 | train,dog,CatDog/dog/dog_53.jpg
 32 | train,dog,CatDog/dog/dog_88.jpg
 33 | train,dog,CatDog/dog/dog_59.jpg
 34 | train,dog,CatDog/dog/dog_42.jpg
 35 | train,dog,CatDog/dog/dog_21.jpg
 36 | train,dog,CatDog/dog/dog_73.jpg
 37 | train,dog,CatDog/dog/dog_18.jpg
 38 | train,dog,CatDog/dog/dog_43.jpg
 39 | train,dog,CatDog/dog/dog_46.jpg
 40 | train,dog,CatDog/dog/dog_57.jpg
 41 | train,dog,CatDog/dog/dog_96.jpg
 42 | train,dog,CatDog/dog/dog_77.jpg
 43 | train,dog,CatDog/dog/dog_4.jpg
 44 | train,dog,CatDog/dog/dog_20.jpg
 45 | train,dog,CatDog/dog/dog_10.jpg
 46 | train,dog,CatDog/dog/dog_69.jpg
 47 | train,dog,CatDog/dog/dog_100.jpg
 48 | train,dog,CatDog/dog/dog_66.jpg
 49 | train,dog,CatDog/dog/dog_95.jpg
 50 | train,dog,CatDog/dog/dog_84.jpg
 51 | train,dog,CatDog/dog/dog_64.jpg
 52 | train,dog,CatDog/dog/dog_31.jpg
 53 | train,dog,CatDog/dog/dog_16.jpg
 54 | train,dog,CatDog/dog/dog_89.jpg
 55 | train,dog,CatDog/dog/dog_76.jpg
 56 | train,dog,CatDog/dog/dog_19.jpg
 57 | train,dog,CatDog/dog/dog_70.jpg
 58 | train,dog,CatDog/dog/dog_91.jpg
 59 | train,dog,CatDog/dog/dog_44.jpg
 60 | train,dog,CatDog/dog/dog_86.jpg
 61 | train,dog,CatDog/dog/dog_78.jpg
 62 | train,dog,CatDog/dog/dog_61.jpg
 63 | train,dog,CatDog/dog/dog_45.jpg
 64 | train,dog,CatDog/dog/dog_37.jpg
 65 | train,dog,CatDog/dog/dog_11.jpg
 66 | train,dog,CatDog/dog/dog_60.jpg
 67 | train,dog,CatDog/dog/dog_6.jpg
 68 | train,dog,CatDog/dog/dog_27.jpg
 69 | train,dog,CatDog/dog/dog_65.jpg
 70 | train,dog,CatDog/dog/dog_29.jpg
 71 | train,cat,CatDog/cat/cat_56.jpg
 72 | train,cat,CatDog/cat/cat_35.jpg
 73 | train,cat,CatDog/cat/cat_90.jpg
 74 | train,cat,CatDog/cat/cat_32.jpg
 75 | train,cat,CatDog/cat/cat_7.jpg
 76 | train,cat,CatDog/cat/cat_37.jpg
 77 | train,cat,CatDog/cat/cat_100.jpg
 78 | train,cat,CatDog/cat/cat_25.jpg
 79 | train,cat,CatDog/cat/cat_28.jpg
 80 | train,cat,CatDog/cat/cat_77.jpg
 81 | train,cat,CatDog/cat/cat_23.jpg
 82 | train,cat,CatDog/cat/cat_21.jpg
 83 | train,cat,CatDog/cat/cat_11.jpg
 84 | train,cat,CatDog/cat/cat_47.jpg
 85 | train,cat,CatDog/cat/cat_27.jpg
 86 | train,cat,CatDog/cat/cat_41.jpg
 87 | train,cat,CatDog/cat/cat_97.jpg
 88 | train,cat,CatDog/cat/cat_39.jpg
 89 | train,cat,CatDog/cat/cat_98.jpg
 90 | train,cat,CatDog/cat/cat_38.jpg
 91 | val,dog,CatDog/dog/dog_39.jpg
 92 | val,dog,CatDog/dog/dog_81.jpg
 93 | val,dog,CatDog/dog/dog_1.jpg
 94 | val,dog,CatDog/dog/dog_71.jpg
 95 | val,dog,CatDog/dog/dog_98.jpg
 96 | val,dog,CatDog/dog/dog_80.jpg
 97 | val,dog,CatDog/dog/dog_49.jpg
 98 | val,dog,CatDog/dog/dog_26.jpg
 99 | val,dog,CatDog/dog/dog_38.jpg
100 | val,dog,CatDog/dog/dog_15.jpg
101 | val,cat,CatDog/cat/cat_63.jpg
102 | val,cat,CatDog/cat/cat_14.jpg
103 | val,cat,CatDog/cat/cat_43.jpg
104 | val,cat,CatDog/cat/cat_64.jpg
105 | val,cat,CatDog/cat/cat_84.jpg
106 | val,cat,CatDog/cat/cat_52.jpg
107 | val,cat,CatDog/cat/cat_57.jpg
108 | val,cat,CatDog/cat/cat_46.jpg
109 | val,cat,CatDog/cat/cat_60.jpg
110 | val,cat,CatDog/cat/cat_44.jpg
111 | test,dog,CatDog/dog/dog_32.jpg
112 | test,dog,CatDog/dog/dog_75.jpg
113 | test,dog,CatDog/dog/dog_58.jpg
114 | test,dog,CatDog/dog/dog_3.jpg
115 | test,dog,CatDog/dog/dog_7.jpg
116 | test,dog,CatDog/dog/dog_34.jpg
117 | test,dog,CatDog/dog/dog_48.jpg
118 | test,dog,CatDog/dog/dog_83.jpg
119 | test,dog,CatDog/dog/dog_36.jpg
120 | test,dog,CatDog/dog/dog_9.jpg
121 | test,dog,CatDog/dog/dog_63.jpg
122 | test,dog,CatDog/dog/dog_72.jpg
123 | test,dog,CatDog/dog/dog_50.jpg
124 | test,dog,CatDog/dog/dog_97.jpg
125 | test,dog,CatDog/dog/dog_47.jpg
126 | test,dog,CatDog/dog/dog_17.jpg
127 | test,dog,CatDog/dog/dog_68.jpg
128 | test,dog,CatDog/dog/dog_14.jpg
129 | test,dog,CatDog/dog/dog_90.jpg
130 | test,dog,CatDog/dog/dog_87.jpg
131 | test,cat,CatDog/cat/cat_58.jpg
132 | test,cat,CatDog/cat/cat_17.jpg
133 | test,cat,CatDog/cat/cat_96.jpg
134 | test,cat,CatDog/cat/cat_40.jpg
135 | test,cat,CatDog/cat/cat_87.jpg
136 | test,cat,CatDog/cat/cat_69.jpg
137 | test,cat,CatDog/cat/cat_67.jpg
138 | test,cat,CatDog/cat/cat_3.jpg
139 | test,cat,CatDog/cat/cat_18.jpg
140 | test,cat,CatDog/cat/cat_2.jpg
141 | test,cat,CatDog/cat/cat_13.jpg
142 | test,cat,CatDog/cat/cat_15.jpg
143 | test,cat,CatDog/cat/cat_80.jpg
144 | test,cat,CatDog/cat/cat_95.jpg
145 | test,cat,CatDog/cat/cat_5.jpg
146 | test,cat,CatDog/cat/cat_73.jpg
147 | test,cat,CatDog/cat/cat_6.jpg
148 | test,cat,CatDog/cat/cat_10.jpg
149 | test,cat,CatDog/cat/cat_36.jpg
150 | test,cat,CatDog/cat/cat_65.jpg
151 | 


--------------------------------------------------------------------------------
/BBN/readme.md:
--------------------------------------------------------------------------------
 1 | # BBN: Bilateral-Branch Network with Cumulative Learning for Long-Tailed Visual Recognition
 2 | 
 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
 4 | 
 5 | 
 6 | - [官方库](https://github.com/Megvii-Nanjing/BBN)    [官方知乎解读](https://zhuanlan.zhihu.com/p/123876769)
 7 | - 目的：图像分类任务中，长尾数据分布存在极端的类别不平衡问题
 8 | 
 9 | ## TODO
10 | - [x] BBN 数据加载、模型定义
11 | 
12 |   > 基于官方库，简化代码。便于迁移到任意训练框架。
13 | 
14 | - [ ] 训练示例和采样可视化
15 | 
16 | 


--------------------------------------------------------------------------------
/BBN/train.py:
--------------------------------------------------------------------------------
 1 | from .bbn_dataset import BBN_Dataset
 2 | from .bbn_model import BBN_ResNet50
 3 | from torch.utils.data import DataLoader
 4 | from tqdm import tqdm
 5 | import torch.nn as nn
 6 | from pycm import ConfusionMatrix
 7 | import torch
 8 | # 初始化模型
 9 | model=BBN_ResNet50()
10 | 
11 | 
12 | # 构建数据集
13 | batch=64
14 | txt_path="./dataset.txt"
15 | train_set = BBN_Dataset(txt_path=txt_path,mode="train",size=[224,224])
16 | val_set = BBN_Dataset(txt_path=txt_path,mode="val",size=[224,224])
17 | 
18 | # 构建数据集加载器
19 | train_dataloader = DataLoader(
20 |     dataset=train_set,
21 |     batch_size=batch,
22 |     num_workers=4,
23 |     shuffle=True,
24 |     drop_last=True,
25 | )
26 | val_dataloader = DataLoader(
27 |     dataset=val_set,
28 |     batch_size=batch,
29 |     num_workers=4,
30 | )
31 | 
32 | # 开始训练
33 | optimizer=None
34 | lr_scheduler=None
35 | criterion=nn.CrossEntropyLoss()
36 | Epochs=100
37 | 
38 | for epoch in range(Epochs):
39 |     optimizer.zero_grad()
40 | 
41 |     for batch_idx, (
42 |             imgs,
43 |             labels,
44 |             imgs_path,
45 |             imgs2,
46 |             labels2,
47 |             imgs_path2,
48 |     ) in enumerate(tqdm(train_dataloader)):
49 |         model.train()
50 |         # 正常采样分布
51 |         imgs, labels = imgs.cuda(), labels.cuda()
52 |         # 逆向采样分布
53 |         imgs2, labels2 = imgs2.cuda(), labels2.cuda()
54 | 
55 |         l = 1 - ((epoch - 1) / Epochs) ** 2  # parabolic decay抛物线
56 |         params = {"imgs1": imgs, "imgs2": imgs2, "l": l}
57 |         output = model(params)
58 |         loss = l * criterion(output, labels) + (1 - l) * criterion(
59 |             output, labels2
60 |         )
61 |         loss.backward()
62 |         optimizer.step()
63 |         optimizer.zero_grad()
64 |     model.eval()
65 |     lr_scheduler.step()
66 |     # 评估模型
67 |     preds_list, labels_list = [], []
68 |     for batch_idx, (imgs, labels, imgs_path) in enumerate(tqdm(val_dataloader)):
69 |         imgs, labels = imgs.cuda(), labels.cuda()
70 |         scores = model(imgs)
71 |         scores = torch.nn.functional.softmax(scores, dim=1)
72 |         preds = torch.argmax(scores, dim=1)
73 | 
74 |         preds_list.append(preds)
75 |         labels_list.append(labels)
76 |     preds_list = torch.cat(preds_list, dim=0).cpu().numpy()
77 |     labels_list = torch.cat(labels_list, dim=0).cpu().numpy()
78 |     acc=ConfusionMatrix(labels_list, preds_list).Overall_ACC
79 |     print("val acc:",acc)


--------------------------------------------------------------------------------
/CAM_pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/__init__.py


--------------------------------------------------------------------------------
/CAM_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/CAM_pytorch/data/MyDataSet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding:utf-8 -*-
 3 | # power by Mr.Li
 4 | import os
 5 | from torch.utils import data
 6 | from torchvision import transforms as T
 7 | import cv2
 8 | import random
 9 | from utils.config import opt
10 | class MyDataSet(data.Dataset):
11 |     '''
12 |     主要目标： 获取所有图片的地址，并根据训练，验证，测试划分数据
13 |     '''
14 |     def __init__(self, root, transforms=None, train=True, test=False):
15 |         self.test = test  #状态
16 |         self.train = train
17 |         self.root = root  #数据集路径
18 | 
19 |         # 读取文件夹下所有图像
20 |         if root!='':
21 |             pos_root=os.path.join(root, 'pos')
22 |             neg_root = os.path.join(root, 'neg')
23 | 
24 |             pos_imgs = [os.path.join(pos_root, img) for img in os.listdir(pos_root)]
25 |             neg_imgs = [os.path.join(neg_root, img) for img in os.listdir(neg_root)]
26 | 
27 |             imgs = pos_imgs + neg_imgs
28 |             # 打乱数据集
29 |             random.shuffle(imgs)
30 |         else:
31 |             print('数据集为空？？？')
32 |             imgs = []
33 | 
34 |         imgs_num = len (imgs)
35 |         # 划分数据集
36 |         if train:
37 |             self.imgs = imgs[:int(0.8 * imgs_num)]
38 |         else:
39 |             self.imgs = imgs[int(0.8 * imgs_num):]
40 | 
41 | 
42 | 
43 |         # 对图像进行转化(若未指定转化，则执行默认操作)
44 |         if transforms is None:
45 |             normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
46 | 
47 |             if self.test or not train:  # 测试集和验证集
48 |                 self.transforms = T.Compose([
49 |                     T.ToTensor(),
50 |                     normalize
51 |                 ])
52 |             else:  # 训练集
53 |                 self.transforms = T.Compose([
54 |                     T.ToTensor(),
55 |                     normalize
56 |                 ])
57 | 
58 |     def __getitem__(self, index):
59 |         '''
60 |         一次返回一张图片的数据
61 |         '''
62 |         # 图片的完整路径
63 |         img_path = self.imgs[index]
64 |         # 读取图像
65 |         img = cv2.imread(img_path)
66 |         img = self.BGR2RGB(img)  # 因为pytorch自身提供的预训练好的模型期望的输入是RGB
67 |         img = cv2.resize(img, (64, 128))
68 |         # 对图片进行转化
69 |         img = self.transforms(img)
70 |         # 标签真值
71 |         if 'neg' in img_path:
72 |             label=0  # 没有人
73 |         else:
74 |             label=1   # 有人
75 | 
76 |         return img,label
77 | 
78 |     def __len__(self):
79 |         return len(self.imgs)
80 | 
81 |     def BGR2RGB(self, img):
82 |         return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
83 | 
84 |     def get_test_img(self):
85 |         # 读取图像
86 |         img_origin = cv2.imread(opt.test_img)
87 |         img = self.BGR2RGB(img_origin)  # 因为pytorch自身提供的预训练好的模型期望的输入是RGB
88 |         img = cv2.resize(img, (64, 128))
89 |         # 对图片进行转化
90 |         img = self.transforms(img)
91 |         return img_origin,img
92 | 
93 | 


--------------------------------------------------------------------------------
/CAM_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/data/__init__.py


--------------------------------------------------------------------------------
/CAM_pytorch/models/VGG_CAM.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | from torch import nn
 4 | import torch as t
 5 | from torchvision.models import vgg16
 6 | from utils.config import opt
 7 | class VGG16_CAM(nn.Module):
 8 |     '''
 9 |     定义网络
10 |     '''
11 |     def __init__(self):
12 |         super(VGG16_CAM, self).__init__()
13 |         # 设置网络名称
14 |         self.moduel_name = str("VGG16_CAM")
15 |         # 去掉 VGG16 feature层的maxpool层
16 |         self.feature_layer = nn.Sequential(*list(vgg16(pretrained=True).features.children())[0:-1])
17 |         # 全局平均池化层 GAP
18 |         self.fc_layer = nn.Linear(512,2)
19 | 
20 |     def forward(self, x):
21 |         x = self.feature_layer(x)
22 |         # GAP 全局平均池化
23 |         x = t.mean(x,dim=3)
24 |         x = t.mean(x,dim=2)
25 | 
26 |         # 全连接层+softmax层
27 |         x = self.fc_layer(x)
28 |         # x = F.softmax(x)   #交叉熵自带softmax
29 |         return x
30 | 
31 | 
32 | # def test():
33 | #     from torch.autograd import Variable
34 | #     model=VGG16_CAM()
35 | #     print(model)
36 | #     img=t.rand(2,3,224,224)
37 | #     img=Variable(img)
38 | #     output=model(img)
39 | #     print(output.size())
40 | #
41 | # if __name__ == '__main__':
42 | #     test()


--------------------------------------------------------------------------------
/CAM_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .VGG_CAM import VGG16_CAM


--------------------------------------------------------------------------------
/CAM_pytorch/readme.md:
--------------------------------------------------------------------------------
 1 | #  class activation mapping
 2 | 
 3 | 
 4 |  - 环境：
 5 | 
 6 |     | python版本  |  pytorch版本 |
 7 |     | ----------- | ----------   |
 8 |     |  3.5  | 0.3.0   |
 9 | 
10 | 
11 | - 作用：分类、定位（不使用真值框进行定位，论文证明 卷积层本身就有定位功能）
12 | 
13 | ----------
14 | 
15 | ## 数据集
16 | 
17 |  - [INRIA Person数据集(官方)](http://pascal.inrialpes.fr/data/human/)    
18 |  - [INRIA Person数据集(百度云)](https://pan.baidu.com/s/1adTzYgX13K4CIjZNODRXqQ)
19 | 
20 | 
21 | ## 预训练模型
22 | 
23 | - [VGG16_CAM_39_99.455.pth](https://pan.baidu.com/s/1OVnxBBhmtVgTEUz0nNmrFg)
24 |     
25 | 
26 | ## 训练
27 | 
28 | 1、在config.py中配置数据集等训练参数
29 | 
30 | 2、执行main.py开始训练
31 | 
32 | ## 可视化
33 | 
34 | 1、在config.py中配置预训练模型
35 | 
36 | 2、执行main.py可视化class_activation_map
37 | 
38 | 
39 | 
40 | 
41 | ## 训练过程
42 | <div align="center">
43 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/15578714.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/18-10-19/81997632.jpg" width="400px"  height="300px" alt="图片说明" > 
44 | </div>
45 | 
46 | ----------
47 | 
48 | ## 效果
49 | 
50 | - 网络分类时重点关注的区域(即网络的分类依据)
51 | 
52 | <div align="center">
53 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/97478889.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/91606455.jpg" width="400px"  height="300px" alt="图片说明" > 
54 | </div>
55 | 
56 | <div align="center">
57 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/94097073.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/27379841.jpg" width="400px"  height="300px" alt="图片说明" > 
58 | </div>
59 | 
60 | <div align="center">
61 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/32549346.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/53559366.jpg" width="400px"  height="300px" alt="图片说明" > 
62 | </div>
63 | 
64 | ----------
65 | 
66 | ## 参考
67 | 
68 | - [Keras implementation of CAM](https://github.com/jacobgil/keras-cam)
69 | - [可视化CNN](https://github.com/huanghao-code/VisCNN_CVPR_2016_Loc)
70 | - [论文CVPR 2016](https://arxiv.org/pdf/1512.04150.pdf)


--------------------------------------------------------------------------------
/CAM_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CAM_pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/CAM_pytorch/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | # 设置默认参数
 4 | import datetime
 5 | import os
 6 | class DefaultConfig():
 7 |     # 使用的模型，名字必须与models/__init__.py中的名字一致
 8 |     # 目前支持的网络
 9 |     model = 'VGG16_CAM'
10 | 
11 |     # 数据集地址
12 |     dataset_root = '/home/bobo/data/cam_dataset/INRIAPerson/Train'
13 | 
14 |     # 保存模型
15 |     root = os.path.abspath(os.path.join(os.path.dirname(__file__))) + '/'
16 |     checkpoint_root = root + '../checkpoint/'  # 存储模型的路径
17 |     # load_model_path = None  # 加载预训练的模型的路径，为None代表不加载（用于训练）
18 |     load_model_path = checkpoint_root+'VGG16_CAM_39_99.455.pth'
19 | 
20 |     use_gpu = True  # user GPU or not
21 |     batch_size = 32
22 |     num_workers = 4  #  加载数据时的线程数
23 | 
24 |     max_epoch = 40
25 | 
26 | 
27 |     lr = 0.01
28 |     lr_decay = 0.5
29 | 
30 |     test_img='/home/bobo/windowsPycharmProject/cam_pytorch/person_and_bike_191.png'  #一张测试图片地址
31 | 
32 | 
33 | 
34 | #初始化该类的一个对象
35 | opt=DefaultConfig()


--------------------------------------------------------------------------------
/CAM_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding:utf-8 -*-
 3 | # power by Mr.Li
 4 | import visdom
 5 | import time
 6 | import numpy as np
 7 | class Visualizer(object):
 8 |     '''
 9 |     封装了visdom的基本操作，但是你仍然可以通过`self.vis.function`
10 |     调用原生的visdom接口
11 |     '''
12 |     def __init__(self, env='default', **kwargs):
13 |         self.vis = visdom.Visdom(env=env, **kwargs)
14 |         # 画的第几个数，相当于横座标
15 |         # 保存（’loss',23） 即loss的第23个点
16 |         self.index = {}
17 |         self.log_text = ''
18 |     def reinit(self,env='default',**kwargs):
19 |         '''
20 |         修改visdom的配置  重新初始化
21 |         '''
22 |         self.vis = visdom.Visdom(env=env,**kwargs)
23 |         return self
24 |     def plot_many(self, d):
25 |         '''
26 |         一次plot多个损失图形
27 |         @params d: dict (name,value) i.e. ('loss',0.11)
28 |         '''
29 |         for k, v in d.items():
30 |             self.plot(k, v)
31 |     def img_many(self, d):
32 |         '''
33 |      一次画多个图像
34 |         '''
35 |         for k, v in d.items():
36 |             self.img(k, v)
37 |     def plot(self, name, y,**kwargs):
38 |         '''
39 |         self.plot('loss',1.00)
40 |         '''
41 |         #得到下标序号
42 |         x = self.index.get(name, 0)
43 |         self.vis.line(Y=np.array([y]), X=np.array([x]),
44 |                       win=name,#窗口名
45 |                       opts=dict(title=name),
46 |                       update=None if x == 0 else 'append', #按照append的画图形
47 |                       **kwargs
48 |                       )
49 |         #下标累加1
50 |         self.index[name] = x + 1
51 |     def img(self, name, img_,**kwargs):
52 |         '''
53 |         self.img('input_img',t.Tensor(64,64))
54 |         self.img('input_imgs',t.Tensor(3,64,64))
55 |         self.img('input_imgs',t.Tensor(100,1,64,64))
56 |         self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
57 | 
58 |         ！！！don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~！！！
59 |         '''
60 |         self.vis.images(img_.cpu().numpy(),
61 |                        win=name,
62 |                        opts=dict(title=name),
63 |                        **kwargs
64 |                        )
65 |     def log(self,info,win='log_text'):
66 |         '''
67 |         self.log({'loss':1,'lr':0.0001})
68 |         打印日志
69 |         '''
70 | 
71 |         self.log_text += ('[{time}] {info} <br>'.format(
72 |                             time=time.strftime('%m%d_%H%M%S'),\
73 |                             info=info))
74 |         self.vis.text(self.log_text,win)
75 |     def __getattr__(self, name):
76 |         return getattr(self.vis, name)


--------------------------------------------------------------------------------
/CUDA_Python/CUDA-Python证书.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/CUDA-Python证书.pdf


--------------------------------------------------------------------------------
/CUDA_Python/readme.md:
--------------------------------------------------------------------------------
 1 | # 加速计算基础——CUDA Python 通关版
 2 | 
 3 | >- [Nvidia课程官网](https://courses.nvidia.com/courses/course-v1:DLI+C-AC-02+V1/)
 4 | >
 5 | >- [本人课程证书](https://courses.nvidia.com/certificates/59466c0d52ae45a394d3b40902aad864/)
 6 | 
 7 | ### 课程1 使用 Numba 的 CUDA Python 简介
 8 | 
 9 | - 基于 Numba 的 CUDA Python 编程简介
10 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数
11 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存
12 | 
13 | ### 课程2 使用 Numba 的 CUDA Python 的自定义核函数和内存管理
14 | 
15 | - 基于 Numba 的 CUDA Python 编程简介
16 | - 使用 Numba 在 Python 中编写自定义的 CUDA 核函数
17 | - 使用 Numba 实现 CUDA Python 的多维网格和共享内存
18 | 
19 | ### 课程3 有效使用内存子系统
20 | 
21 | * 编写受益于合并内存访问模式的 CUDA 核函数。
22 | * 使用多维网格和线程块。
23 | * 使用共享内存来协调块内的线程。
24 | * 使用共享内存来促进合并内存访问模式。
25 | * 解决共享内存区的冲突。
26 | 
27 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/DLI Header.png


--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/numba_flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/numba_flowchart.png


--------------------------------------------------------------------------------
/CUDA_Python/课程1/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/images/run_the_assessment.png


--------------------------------------------------------------------------------
/CUDA_Python/课程1/section1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程1/section1.tar.gz


--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/make_pulses_solution.py:
--------------------------------------------------------------------------------
 1 | n = 100000
 2 | noise = (np.random.normal(size=n) * 3).astype(np.float32)
 3 | t = np.arange(n, dtype=np.float32)
 4 | period = n / 23
 5 | 
 6 | d_noise = cuda.to_device(noise)
 7 | d_t = cuda.to_device(t)
 8 | d_pulses = cuda.device_array(shape=(n,), dtype=np.float32)
 9 | 
10 | make_pulses(d_t, period, 100.0, out=d_pulses)
11 | waveform = add_ufunc(d_pulses, d_noise)


--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
 1 | from numba import jit # `jit` is the Numba just-in-time-compiler function
 2 | import random
 3 | 
 4 | @jit # Use the decorator syntax to mark `monte_carlo_pi` for Numba compilation
 5 | def monte_carlo_pi(nsamples):
 6 |     acc = 0
 7 |     for i in range(nsamples):
 8 |         x = random.random()
 9 |         y = random.random()
10 |         if (x**2 + y**2) < 1.0:
11 |             acc += 1
12 |     return 4.0 * acc / nsamples


--------------------------------------------------------------------------------
/CUDA_Python/课程1/solutions/zero_suppress_solution.py:
--------------------------------------------------------------------------------
1 | @vectorize(['int16(int16, int16)'], target='cuda')
2 | def zero_suppress(waveform_value, threshold):
3 |     if waveform_value < threshold:
4 |         result = 0
5 |     else:
6 |         result = waveform_value
7 |     return result


--------------------------------------------------------------------------------
/CUDA_Python/课程2/assessment/histogram.py:
--------------------------------------------------------------------------------
 1 | # Add your solution here
 2 | @cuda.jit
 3 | def cuda_histogram(x, xmin, xmax, histogram_out):
 4 |     '''Increment bin counts in histogram_out, given histogram range [xmin, xmax).'''
 5 |     nbins = histogram_out.shape[0] # 分为N组
 6 |     bin_width = (xmax - xmin) / nbins # 每组宽度
 7 |     
 8 |     
 9 |     start = cuda.grid(1)
10 |     
11 |     stride=cuda.gridsize(1) # 1指 所有进程按一维下标索引
12 |     for i in range(start,x.shape[0],stride):
13 |         bin_number=(x[i] - xmin)/bin_width # 所有进程的一次并行计算
14 |         if bin_number >= 0 and bin_number < histogram_out.shape[0]:
15 |             cuda.atomic.add(histogram_out, bin_number, 1)# 原子操作 全局加1


--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numba import cuda
 4 | 
 5 | @cuda.jit
 6 | def histogram(x, xmin, xmax, histogram_out):
 7 |     nbins = histogram_out.shape[0]
 8 |     bin_width = (xmax - xmin) / nbins
 9 | 
10 |     start = cuda.grid(1)
11 |     stride = cuda.gridsize(1)
12 | 
13 |     for i in range(start, x.shape[0], stride):
14 |         bin_number = np.int32((x[i] - xmin)/bin_width)
15 |         if bin_number >= 0 and bin_number < histogram_out.shape[0]:
16 |             histogram_out[bin_number] += 1
17 | 
18 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
19 | xmin = np.float32(-4.0)
20 | xmax = np.float32(4.0)
21 | histogram_out = np.zeros(shape=10, dtype=np.int32)
22 | 
23 | histogram[64, 64](x, xmin, xmax, histogram_out)
24 | 
25 | print('input count:', x.shape[0])
26 | print('histogram:', histogram_out)
27 | print('count:', histogram_out.sum())
28 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex1a.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numba import cuda
 4 | 
 5 | @cuda.jit
 6 | def histogram(x, xmin, xmax, histogram_out):
 7 |     nbins = histogram_out.shape[0]
 8 |     bin_width = (xmax - xmin) / nbins
 9 | 
10 |     start = cuda.grid(1)
11 |     stride = cuda.gridsize(1)
12 | 
13 |     for i in range(start, x.shape[0], stride):
14 |         bin_number = np.int32((x[i] - xmin)/bin_width)
15 |         if bin_number >= 0 and bin_number < histogram_out.shape[0]:
16 |             histogram_out[bin_number] += 1
17 |             print('in range', x[i], bin_number)
18 |         else:
19 |             print('out of range', x[i], bin_number)
20 | 
21 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
22 | xmin = np.float32(-4.0)
23 | xmax = np.float32(4.0)
24 | histogram_out = np.zeros(shape=10, dtype=np.int32)
25 | 
26 | histogram[64, 64](x, xmin, xmax, histogram_out)
27 | 
28 | print('input count:', x.shape[0])
29 | print('histogram:', histogram_out)
30 | print('count:', histogram_out.sum())
31 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numba import cuda
 4 | 
 5 | @cuda.jit
 6 | def histogram(x, xmin, xmax, histogram_out):
 7 |     nbins = histogram_out.shape[0]
 8 |     bin_width = (xmax - xmin) / nbins
 9 | 
10 |     start = cuda.grid(1)
11 |     stride = cuda.gridsize(1)
12 | 
13 |     ### DEBUG FIRST THREAD
14 |     if start == 0:
15 |         from pdb import set_trace; set_trace()
16 |     ###
17 | 
18 |     for i in range(start, x.shape[0], stride):
19 |         bin_number = np.int32((x[i] + xmin)/bin_width)
20 | 
21 |         if bin_number >= 0 and bin_number < histogram_out.shape[0]:
22 |             cuda.atomic.add(histogram_out, bin_number, 1)
23 | 
24 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
25 | xmin = np.float32(-4.0)
26 | xmax = np.float32(4.0)
27 | histogram_out = np.zeros(shape=10, dtype=np.int32)
28 | 
29 | histogram[64, 64](x, xmin, xmax, histogram_out)
30 | 
31 | print('input count:', x.shape[0])
32 | print('histogram:', histogram_out)
33 | print('count:', histogram_out.sum())
34 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numba import cuda
 4 | 
 5 | @cuda.jit
 6 | def histogram(x, xmin, xmax, histogram_out):
 7 |     nbins = histogram_out.shape[0]
 8 |     bin_width = (xmax - xmin) / nbins
 9 | 
10 |     start = cuda.grid(1)
11 |     stride = cuda.gridsize(1)
12 | 
13 |     for i in range(start, x.shape[0], stride):
14 |         bin_number = np.int32((x[i] + xmin)/bin_width)
15 | 
16 |         if bin_number >= 0 or bin_number < histogram_out.shape[0]:
17 |             cuda.atomic.add(histogram_out, bin_number, 1)
18 | 
19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
20 | xmin = np.float32(-4.0)
21 | xmax = np.float32(4.0)
22 | histogram_out = np.zeros(shape=10, dtype=np.int32)
23 | 
24 | histogram[64, 64](x, xmin, xmax, histogram_out)
25 | 
26 | print('input count:', x.shape[0])
27 | print('histogram:', histogram_out)
28 | print('count:', histogram_out.sum())
29 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/debug/ex3a.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numba import cuda
 4 | 
 5 | @cuda.jit(debug=True)
 6 | def histogram(x, xmin, xmax, histogram_out):
 7 |     nbins = histogram_out.shape[0]
 8 |     bin_width = (xmax - xmin) / nbins
 9 | 
10 |     start = cuda.grid(1)
11 |     stride = cuda.gridsize(1)
12 | 
13 |     for i in range(start, x.shape[0], stride):
14 |         bin_number = np.int32((x[i] + xmin)/bin_width)
15 | 
16 |         if bin_number >= 0 or bin_number < histogram_out.shape[0]:
17 |             cuda.atomic.add(histogram_out, bin_number, 1)
18 | 
19 | x = np.random.normal(size=50, loc=0, scale=1).astype(np.float32)
20 | xmin = np.float32(-4.0)
21 | xmax = np.float32(4.0)
22 | histogram_out = np.zeros(shape=10, dtype=np.int32)
23 | 
24 | histogram[64, 64](x, xmin, xmax, histogram_out)
25 | 
26 | print('input count:', x.shape[0])
27 | print('histogram:', histogram_out)
28 | print('count:', histogram_out.sum())
29 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/DLI Header.png


--------------------------------------------------------------------------------
/CUDA_Python/课程2/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/images/run_the_assessment.png


--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/numba_flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/numba_flowchart.png


--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/sensor_humidity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_humidity.png


--------------------------------------------------------------------------------
/CUDA_Python/课程2/img/sensor_temp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/img/sensor_temp.png


--------------------------------------------------------------------------------
/CUDA_Python/课程2/section2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程2/section2.tar.gz


--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/hypot_stride_solution.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import cuda
 3 | from math import hypot
 4 | 
 5 | @cuda.jit
 6 | def hypot_stride(a, b, c):
 7 |     idx = cuda.grid(1)
 8 |     stride = cuda.gridsize(1)
 9 |     
10 |     for i in range(idx, a.shape[0], stride):
11 |         c[i] = hypot(a[i], b[i])
12 |         
13 | n = 1000000
14 | a = np.random.uniform(-12, 12, n).astype(np.float32)
15 | b = np.random.uniform(-12, 12, n).astype(np.float32)
16 | d_a = cuda.to_device(a)
17 | d_b = cuda.to_device(b)
18 | d_c = cuda.device_array_like(d_b)
19 | 
20 | hypot_stride[1, 1](d_a, d_b, d_c)


--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def monte_carlo_pi_device(rng_states, nsamples, out):
 3 |     thread_id = cuda.grid(1)
 4 |  
 5 |     # Compute pi by drawing random (x, y) points and finding what
 6 |     # fraction lie inside a unit circle
 7 |     acc = 0
 8 |     for i in range(nsamples):
 9 |         x = xoroshiro128p_uniform_float32(rng_states, thread_id)
10 |         y = xoroshiro128p_uniform_float32(rng_states, thread_id)
11 |         if x**2 + y**2 <= 1.0:
12 |             acc += 1
13 |  
14 |     out[thread_id] = 4.0 * acc / nsamples
15 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程2/solutions/square_device_solution.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import cuda
 3 | 
 4 | @cuda.jit
 5 | def square_device(a, out):
 6 |     idx = cuda.grid(1)
 7 |     out[idx] = a[idx]**2
 8 |     
 9 | n = 4096
10 | a = np.arange(n)
11 | 
12 | d_a = cuda.to_device(a)
13 | d_out = cuda.device_array(shape=(n,), dtype=np.float32)
14 | 
15 | threads = 32
16 | blocks = 128
17 | 
18 | square_device[blocks, threads](d_a, d_out)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/assessment/definition.py:
--------------------------------------------------------------------------------
1 | # Use the 'File' menu above to 'Save' after pasting in your own mm_shared function definition.


--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/DLI Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/DLI Header.png


--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/mm_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/mm_image.png


--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/run_assess_task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_assess_task.png


--------------------------------------------------------------------------------
/CUDA_Python/课程3/images/run_the_assessment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程3/images/run_the_assessment.png


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/add_matrix_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def add_matrix(A, B, C):
 3 |     i,j = cuda.grid(2)
 4 |     
 5 |     C[j,i] = A[j,i] + B[j,i]
 6 |     
 7 | A = np.arange(36*36).reshape(36, 36).astype(np.int32)
 8 | B = A * 2
 9 | C = np.zeros_like(A)
10 | d_A = cuda.to_device(A)
11 | d_B = cuda.to_device(B)
12 | d_C = cuda.to_device(C)
13 | 
14 | blocks = (6,6)
15 | threads_per_block = (6,6)
16 | 
17 | add_matrix[blocks, threads_per_block](d_A, d_B, d_C)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/add_matrix_stride_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def add_matrix_stride(A, B, C):
 3 | 
 4 |     y, x = cuda.grid(2)
 5 |     stride_y, stride_x = cuda.gridsize(2)
 6 |     
 7 |     for i in range(x, A.shape[0], stride_x):
 8 |         for j in range(y, A.shape[1], stride_y):
 9 |             C[i][j] = A[i][j] + B[i][j]
10 | 
11 | A = np.arange(64*64).reshape(64, 64).astype(np.int32)
12 | B = A * 2
13 | C = np.zeros_like(A)
14 | d_A = cuda.to_device(A)
15 | d_B = cuda.to_device(B)
16 | d_C = cuda.to_device(C)
17 | 
18 | blocks = (6,6)
19 | threads_per_block = (6,6)
20 | 
21 | add_matrix_stride[blocks, threads_per_block](d_A, d_B, d_C)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/col_sums_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def col_sums(a, sums, ds):
 3 |     idx = cuda.grid(1)
 4 |     sum = 0.0
 5 | 
 6 |     for i in range(ds):
 7 |         sum += a[i][idx]
 8 | 
 9 |     sums[idx] = sum
10 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_add_solution.py:
--------------------------------------------------------------------------------
1 | @cuda.jit
2 | def matrix_add(a, b, out, coalesced):
3 |     x, y = cuda.grid(2)
4 |     
5 |     if coalesced == True:
6 |         out[y][x] = a[y][x] + b[y][x]
7 |     else:
8 |         out[x][y] = a[x][y] + b[x][y]
9 | 


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_multiply_solution.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import cuda
 3 | 
 4 | @cuda.jit
 5 | def mm(a, b, c):
 6 |     column, row = cuda.grid(2)
 7 |     sum = 0
 8 |     
 9 |     for i in range(a.shape[0]):
10 |         sum += a[row][i] * b[i][column]
11 |         
12 |     c[row][column] = sum
13 |     
14 | a = np.arange(16).reshape(4,4).astype(np.int32)
15 | b = np.arange(16).reshape(4,4).astype(np.int32)
16 | c = np.zeros_like(a)
17 | 
18 | d_a = cuda.to_device(a)
19 | d_b = cuda.to_device(b)
20 | d_c = cuda.to_device(c)
21 | 
22 | grid = (2,2)
23 | block = (2,2)
24 | mm[grid, block](d_a, d_b, d_c)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/matrix_multiply_stride_solution.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import cuda
 3 | 
 4 | @cuda.jit
 5 | def mm_stride(A, B, C):
 6 | 
 7 |     grid_column, grid_row = cuda.grid(2)
 8 |     stride_column, stride_row = cuda.gridsize(2)
 9 |     
10 |     for data_row in range(grid_row, A.shape[0], stride_row):
11 |         for data_column in range(grid_column, B.shape[1], stride_column):
12 |             sum = 0
13 |             for i in range(A.shape[1]): # `range(B.shape[0])` is also okay
14 |                 sum += A[data_row][i] * B[i][data_column]
15 |                 
16 |             C[data_row][data_column] = sum
17 | 
18 | n = 1024
19 | a = np.arange(n*n).reshape(n,n).astype(np.int32)
20 | b = np.arange(n*n).reshape(n,n).astype(np.int32)
21 | c = np.zeros((a.shape[0], b.shape[1])).astype(np.int32)
22 | 
23 | d_a = cuda.to_device(a)
24 | d_b = cuda.to_device(b)
25 | d_c = cuda.to_device(c)
26 | 
27 | ts = (32,32)
28 | bs = (32,32)
29 | 
30 | mm_stride[bs, ts](d_a, d_b, d_c)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/monte_carlo_pi_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def monte_carlo_pi_device(rng_states, nsamples, out):
 3 |     thread_id = cuda.grid(1)
 4 | 
 5 |     # Compute pi by drawing random (x, y) points and finding what
 6 |     # fraction lie inside a unit circle
 7 |     acc = 0
 8 |     for i in range(nsamples):
 9 |         x = xoroshiro128p_uniform_float32(rng_states, thread_id)
10 |         y = xoroshiro128p_uniform_float32(rng_states, thread_id)
11 |         if x**2 + y**2 <= 1.0:
12 |             acc += 1
13 | 
14 |     out[thread_id] = 4.0 * acc / nsamples
15 |     
16 | nsamples = 10000000
17 | threads_per_block = 128
18 | blocks = 32
19 | grid_size = threads_per_block * blocks
20 | 
21 | samples_per_thread = int(nsamples / grid_size)
22 | rng_states = create_xoroshiro128p_states(grid_size, seed=1)
23 | d_out = cuda.device_array(threads_per_block * blocks, dtype=np.float32)
24 | 
25 | monte_carlo_pi_device[blocks, threads_per_block](rng_states, samples_per_thread, d_out)


--------------------------------------------------------------------------------
/CUDA_Python/课程3/solutions/tile_transpose_solution.py:
--------------------------------------------------------------------------------
 1 | @cuda.jit
 2 | def tile_transpose(a, transposed):
 3 |     # `tile_transpose` assumes it is launched with a 32x32 block dimension,
 4 |     # and that `a` is a multiple of these dimensions.
 5 |     
 6 |     # 1) Create 32x32 shared memory array.
 7 |     tile = cuda.shared.array((32, 32), numba_types.int32)
 8 |     
 9 |     # Compute offsets into global input array. Recall for coalesced access we want to map threadIdx.x increments to
10 |     # the fastest changing index in the data, i.e. the column in our array.
11 |     a_col = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
12 |     a_row = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
13 | 
14 |     # 2) Make coalesced read from global memory into shared memory array.
15 |     # Note the use of local thread indices for the shared memory write,
16 |     # and global offsets for global memory read.
17 |     tile[cuda.threadIdx.y, cuda.threadIdx.x] = a[a_row, a_col]
18 | 
19 |     # 3) Wait for all threads in the block to finish updating shared memory.
20 |     cuda.syncthreads()
21 | 
22 |     # 4) Calculate transposed location for the shared memory array tile
23 |     # to be written back to global memory. Note that blockIdx.y*blockDim.y 
24 |     # and blockIdx.x* blockDim.x are swapped (because we want to write to the
25 |     # transpose locations), but we want to keep access coalesced, so match up the
26 |     # threadIdx.x to the fastest changing index, i.e. the column
27 |     t_col = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.x
28 |     t_row = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.y
29 | 
30 |     # 5) Write from shared memory (using thread indices)
31 |     # back to global memory (using grid indices)
32 |     # transposing each element within the shared memory array.
33 |     transposed[t_row, t_col] = tile[cuda.threadIdx.x, cuda.threadIdx.y]


--------------------------------------------------------------------------------
/CUDA_Python/课程笔记.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/CUDA_Python/课程笔记.pdf


--------------------------------------------------------------------------------
/DataHub/readme.md:
--------------------------------------------------------------------------------
 1 | # 公开数据集汇总
 2 | 
 3 | 
 4 | 
 5 | ## 数据集平台
 6 | 
 7 | - [百度](https://aistudio.baidu.com/aistudio/datasetoverview)
 8 | - [阿里天池](https://tianchi.aliyun.com/dataset)
 9 | - [kaggle](https://www.kaggle.com/datasets)
10 | - [集市](https://www.cvmart.net/dataSets)
11 | 
12 | 
13 | 
14 | ## 多模态
15 | 
16 | | 链接                                                         | 标注类型                | 数量 |
17 | | ------------------------------------------------------------ | ----------------------- | ---- |
18 | | [COYO-700M](https://github.com/kakaobrain/coyo-dataset)      | 大规模英文图文对 数据集 | 7亿  |
19 | | [img2dataset](https://github.com/rom1504/img2dataset/tree/main) | N个图文对数据集         |      |
20 | 
21 | 
22 | 
23 | ## 安全场景
24 | 
25 | ### 枪支
26 | 
27 | | 链接                                                         | 标注类型 | 数量 |
28 | | ------------------------------------------------------------ | -------- | ---- |
29 | | [URL1](https://www.kaggle.com/datasets/shivanirana63/labeled-guns-data-for-object-detection) | 目标检测 | 3k   |
30 | | [URL2](https://www.kaggle.com/code/gattoni/faster-rcnn-guns-object-detection-with-save-load/data) | 目标检测 | 0.3k |
31 | 
32 | ### 二维码
33 | 
34 | | 链接                                                         | 标注类型 | 数量 |
35 | | ------------------------------------------------------------ | -------- | ---- |
36 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/147099/0) | 目标检测 | 0.6k |
37 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/103078/0) | 目标检测 | 2k   |
38 | 
39 | ### 火灾烟雾
40 | 
41 | | 链接                                                         | 标注类型 | 数量 |
42 | | ------------------------------------------------------------ | -------- | ---- |
43 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/107770/0) | 目标检测 | 6.9k |
44 | | [URL2](https://aistudio.baidu.com/aistudio/datasetdetail/90352) | 目标检测 | 2k   |
45 | | [URL3](https://aistudio.baidu.com/aistudio/datasetdetail/84374/0) | 目标检测 | 5k   |
46 | 
47 | ### 抽烟
48 | 
49 | | 链接                                                         | 标注类型 | 数量 |
50 | | ------------------------------------------------------------ | -------- | ---- |
51 | | [URL1](https://aistudio.baidu.com/aistudio/datasetdetail/72629/0) | 目标检测 | 1.5k |
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/FPN_pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch-FPN 
 2 | 
 3 | _Feature Pyramid Networks_ in PyTorch.
 4 | 
 5 | [原地址](https://github.com/kuangliu/pytorch-fpn) 
 6 | 
 7 | References:  
 8 | [1] [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)  
 9 | [2] [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)  
10 | 
11 | 
12 | 
13 | # 自己想法
14 | 
15 | - 目前工作
16 | 
17 |   无意完成 以FPN为基础的RPN网络的fast rcnn，仅了解FPN基本思想即可。
18 | 
19 | - 网络结构
20 | 
21 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/67784779.jpg)
22 | 
23 | 
24 | - 基本流程
25 |  
26 | 
27 |  ![](https://github.com/bobo0810/imageRepo/blob/master/img/28743914.jpg)


--------------------------------------------------------------------------------
/FPN_pytorch/fpn.py:
--------------------------------------------------------------------------------
  1 | '''FPN in PyTorch.
  2 | 
  3 | See the paper "Feature Pyramid Networks for Object Detection" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class Bottleneck(nn.Module):
 13 |     expansion = 4
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(Bottleneck, self).__init__()
 17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 22 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 23 | 
 24 |         self.shortcut = nn.Sequential()
 25 |         if stride != 1 or in_planes != self.expansion*planes:
 26 |             self.shortcut = nn.Sequential(
 27 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = F.relu(self.bn2(self.conv2(out)))
 34 |         out = self.bn3(self.conv3(out))
 35 |         out += self.shortcut(x)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class FPN(nn.Module):
 41 |     '''
 42 |     继承nn.Module，实现自定义网络模型
 43 |     '''
 44 |     def __init__(self, block, num_blocks):
 45 |         super(FPN, self).__init__()
 46 |         # 输入通道数
 47 |         self.in_planes = 64
 48 |         # nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
 49 | 
 50 |         # 原论文网络结构 in_channels=3  out_channels=64
 51 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 52 |         # 通道数BN层的参数是输出通道数out_channels=64
 53 |         self.bn1 = nn.BatchNorm2d(64)
 54 | 
 55 |         # Bottom-up layers
 56 |         # 自底向上的网络   resnet网络
 57 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 58 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 59 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 60 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 61 | 
 62 |         # Top layer （最顶层只有侧边连接，kernel_size=1目的减少通道数，形状不变）
 63 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels  减少通道数
 64 | 
 65 |         # Smooth layers   平滑层
 66 |         # 作用：在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积，目的是消除上采样的混叠效应
 67 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 68 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 69 |         self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 70 | 
 71 |         # Lateral layers   侧边层
 72 |         # （1*1的卷积核的主要作用是减少卷积核的个数，也就是减少了feature map的个数，并不改变feature map的尺寸大小。）
 73 |         self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 74 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
 75 |         self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)
 76 | 
 77 |     def _make_layer(self, block, planes, num_blocks, stride):
 78 |         '''
 79 |         resnet网络
 80 |         '''
 81 |         strides = [stride] + [1]*(num_blocks-1)
 82 |         layers = []
 83 |         for stride in strides:
 84 |             layers.append(block(self.in_planes, planes, stride))
 85 |             self.in_planes = planes * block.expansion
 86 |         return nn.Sequential(*layers)
 87 | 
 88 |     def _upsample_add(self, x, y):
 89 |         '''
 90 |         Upsample and add two feature maps.
 91 |         上采样 并 将两个feature maps求和
 92 |         Args:
 93 |           x: (Variable) top feature map to be upsampled.  将要上采样的 上层feature map
 94 |           y: (Variable) lateral feature map.    侧边的feature map
 95 | 
 96 |         Returns:
 97 |           (Variable) added feature map.
 98 | 
 99 |         Note in PyTorch, when input size is odd, the upsampled feature map
100 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
101 |         maybe not equal to the lateral feature map size.
102 |         在PyTorch中，当输入大小为奇数时，请注意上采样的特征映射
103 |     用'F.upsample（...，scale_factor = 2，mode ='nearest'）`
104 |     可能不等于横向特征地图尺寸。
105 | 
106 |         e.g.
107 |         original input size: [N,_,15,15] ->
108 |         conv2d feature map size: [N,_,8,8] ->
109 |         upsampled feature map size: [N,_,16,16]
110 | 
111 |         So we choose bilinear upsample which supports arbitrary output sizes.
112 |         所以我们选择支持任意输出大小的双线性上采样。
113 |         '''
114 |         _,_,H,W = y.size()
115 |         # 使用 双线性插值bilinear对x进行上采样，之后与y逐元素相加
116 |         return F.upsample(x, size=(H,W), mode='bilinear') + y
117 | 
118 |     def forward(self, x):
119 |         # Bottom-up  自底向上   conv -> batchnmorm -> relu  ->maxpool
120 |         c1 = F.relu(self.bn1(self.conv1(x)))
121 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
122 | 
123 |         # resnet网络
124 |         c2 = self.layer1(c1)
125 |         c3 = self.layer2(c2)
126 |         c4 = self.layer3(c3)
127 |         c5 = self.layer4(c4)
128 | 
129 |         # Top-down  自顶向下并与侧边相连
130 |         p5 = self.toplayer(c5)  #减少通道数
131 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
132 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
133 |         p2 = self._upsample_add(p3, self.latlayer3(c2))
134 | 
135 |         # Smooth  平滑层（在融合之后还会再采用3*3的卷积核对每个融合结果进行卷积，目的是消除上采样的混叠效应）
136 |         p4 = self.smooth1(p4)
137 |         p3 = self.smooth2(p3)
138 |         p2 = self.smooth3(p2)
139 |         return p2, p3, p4, p5
140 | 
141 | 
142 | def FPN101():
143 |     # [2,4,23,3]为FPN101的参数
144 |     # return FPN(Bottleneck, [2,4,23,3])
145 | 
146 |     #[2,2,2,2]为FPN18的参数
147 |     return FPN(Bottleneck, [2,2,2,2])
148 | 
149 | 
150 | def test():
151 |     # 新建FPN101网络
152 |     net = FPN101()
153 |     print('网络结构为')
154 |     print(net)
155 |     # 前向传播，得到网络输出值 fms即为p2, p3, p4, p5
156 |     fms = net(Variable(torch.randn(1,3,224,224)))
157 |     print('网络输出的内容为')
158 |     for fm in fms:
159 |         print(fm.size())
160 | 
161 | test()
162 | 


--------------------------------------------------------------------------------
/FPN_pytorch/retina_fpn.py:
--------------------------------------------------------------------------------
  1 | '''RetinaFPN in PyTorch.
  2 | 
  3 | See the paper "Focal Loss for Dense Object Detection" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class Bottleneck(nn.Module):
 13 |     expansion = 4
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(Bottleneck, self).__init__()
 17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 22 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 23 | 
 24 |         self.shortcut = nn.Sequential()
 25 |         if stride != 1 or in_planes != self.expansion*planes:
 26 |             self.shortcut = nn.Sequential(
 27 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = F.relu(self.bn2(self.conv2(out)))
 34 |         out = self.bn3(self.conv3(out))
 35 |         out += self.shortcut(x)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class RetinaFPN(nn.Module):
 41 |     def __init__(self, block, num_blocks):
 42 |         super(RetinaFPN, self).__init__()
 43 |         self.in_planes = 64
 44 | 
 45 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 46 |         self.bn1 = nn.BatchNorm2d(64)
 47 | 
 48 |         # Bottom-up layers
 49 |         self.layer2 = self._make_layer(block,  64, num_blocks[0], stride=1)
 50 |         self.layer3 = self._make_layer(block, 128, num_blocks[1], stride=2)
 51 |         self.layer4 = self._make_layer(block, 256, num_blocks[2], stride=2)
 52 |         self.layer5 = self._make_layer(block, 512, num_blocks[3], stride=2)
 53 |         self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
 54 |         self.conv7 = nn.Conv2d( 256, 256, kernel_size=3, stride=2, padding=1)
 55 | 
 56 |         # Top layer
 57 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels
 58 | 
 59 |         # Smooth layers
 60 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 61 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 62 | 
 63 |         # Lateral layers
 64 |         self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 65 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
 66 | 
 67 |     def _make_layer(self, block, planes, num_blocks, stride):
 68 |         strides = [stride] + [1]*(num_blocks-1)
 69 |         layers = []
 70 |         for stride in strides:
 71 |             layers.append(block(self.in_planes, planes, stride))
 72 |             self.in_planes = planes * block.expansion
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def _upsample_add(self, x, y):
 76 |         '''Upsample and add two feature maps.
 77 | 
 78 |         Args:
 79 |           x: (Variable) top feature map to be upsampled.
 80 |           y: (Variable) lateral feature map.
 81 | 
 82 |         Returns:
 83 |           (Variable) added feature map.
 84 | 
 85 |         Note in PyTorch, when input size is odd, the upsampled feature map
 86 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
 87 |         maybe not equal to the lateral feature map size.
 88 | 
 89 |         e.g.
 90 |         original input size: [N,_,15,15] ->
 91 |         conv2d feature map size: [N,_,8,8] ->
 92 |         upsampled feature map size: [N,_,16,16]
 93 | 
 94 |         So we choose bilinear upsample which supports arbitrary output sizes.
 95 |         '''
 96 |         _,_,H,W = y.size()
 97 |         return F.upsample(x, size=(H,W), mode='bilinear') + y
 98 | 
 99 |     def forward(self, x):
100 |         # Bottom-up
101 |         c1 = F.relu(self.bn1(self.conv1(x)))
102 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
103 |         c2 = self.layer2(c1)
104 |         c3 = self.layer3(c2)
105 |         c4 = self.layer4(c3)
106 |         c5 = self.layer5(c4)
107 |         p6 = self.conv6(c5)
108 |         p7 = self.conv7(F.relu(p6))
109 |         # Top-down
110 |         p5 = self.toplayer(c5)
111 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
112 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
113 |         # Smooth
114 |         p4 = self.smooth1(p4)
115 |         p3 = self.smooth2(p3)
116 |         return p3, p4, p5, p6, p7
117 | 
118 | 
119 | def RetinaFPN101():
120 |     # return RetinaFPN(Bottleneck, [2,4,23,3])
121 |     return RetinaFPN(Bottleneck, [2,2,2,2])
122 | 
123 | 
124 | def test():
125 |     net = RetinaFPN101()
126 |     fms = net(Variable(torch.randn(1,3,600,900)))
127 |     for fm in fms:
128 |         print(fm.size())
129 | 
130 | test()
131 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 Yun Chen
 4 | 
 5 | Original works by:
 6 | --------------------------------------------------------
 7 | chainer/chainercv
 8 | Copyright (c) 2017 Yusuke Niitani
 9 | Licensed under The MIT License
10 | https://github.com/chainer/chainercv/blob/master/LICENSE
11 | --------------------------------------------------------
12 | Faster R-CNN
13 | Copyright (c) 2015 Microsoft
14 | Licensed under The MIT License
15 | https://github.com/rbgirshick/py-faster-rcnn/blob/master/LICENSE
16 | --------------------------------------------------------
17 | 
18 | Permission is hereby granted, free of charge, to any person obtaining a copy
19 | of this software and associated documentation files (the "Software"), to deal
20 | in the Software without restriction, including without limitation the rights
21 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 | copies of the Software, and to permit persons to whom the Software is
23 | furnished to do so, subject to the following conditions:
24 | 
25 | The above copyright notice and this permission notice shall be included in
26 | all copies or substantial portions of the Software.
27 | 
28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 | THE SOFTWARE.


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/__pycache__/trainer.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__init__.py


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/dataset.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/util.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/data/__pycache__/voc_dataset.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch as t
  2 | from .voc_dataset import VOCBboxDataset
  3 | from skimage import transform as sktsf
  4 | from torchvision import transforms as tvtsf
  5 | from . import util
  6 | import numpy as np
  7 | from utils.config import opt
  8 | 
  9 | 
 10 | def inverse_normalize(img):
 11 |     """
 12 |    将[-1,1]范围的图像近似还原回[0,255]之间
 13 |     """
 14 |     if opt.caffe_pretrain:
 15 |         img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1))
 16 |         return img[::-1, :, :]
 17 |     # approximate un-normalize for visualize
 18 |     return (img * 0.225 + 0.45).clip(min=0, max=1) * 255
 19 | 
 20 | 
 21 | def pytorch_normalze(img):
 22 |     """
 23 |     https://github.com/pytorch/vision/issues/223
 24 |     return appr -1~1 RGB
 25 |     对pytorch格式的图像进行规范化，返回值范围在[-1,1]之间 通道为RGB
 26 |     """
 27 |     normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
 28 |                                 std=[0.229, 0.224, 0.225])
 29 |     img = normalize(t.from_numpy(img))
 30 |     return img.numpy()
 31 | 
 32 | 
 33 | def caffe_normalize(img):
 34 |     """
 35 |     return appr -125-125 BGR
 36 |     对caffe格式的图像进行规范化，返回值范围在[-125,125]之间 通道为BGR
 37 |     """
 38 |     img = img[[2, 1, 0], :, :]  # RGB-BGR
 39 |     img = img * 255
 40 |     mean = np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1)
 41 |     img = (img - mean).astype(np.float32, copy=True)
 42 |     return img
 43 | 
 44 | 
 45 | def preprocess(img, min_size=600, max_size=1000):
 46 |     """Preprocess an image for feature extraction.
 47 |      
 48 |     The length of the shorter edge is scaled to :obj:`self.min_size`.
 49 |     After the scaling, if the length of the longer edge is longer than
 50 |     :param min_size:
 51 |     :obj:`self.max_size`, the image is scaled to fit the longer edge
 52 |     to :obj:`self.max_size`.
 53 |     After resizing the image, the image is subtracted by a mean image value
 54 |     :obj:`self.mean`.
 55 |     
 56 |     预处理图像以进行特征提取。
 57 |     较短边的长度缩放为：min_size。
 58 |     缩放后，如果长边的长度比min_size或者max_size长，则长边的长度被缩放到max_size
 59 |     调整图像大小后，图像减去平均图像值mean
 60 |     
 61 |     图片进行缩放，使得长边小于等于1000，短边小于等于600（至少有一个等于）
 62 | 
 63 |     Args:
 64 |         img (~numpy.ndarray): An image. This is in CHW and RGB format.
 65 |             The range of its value is :math:`[0, 255]`.
 66 | 
 67 |     Returns:
 68 |         ~numpy.ndarray: A preprocessed image.
 69 | 
 70 |     """
 71 |     C, H, W = img.shape
 72 |     scale1 = min_size / min(H, W)
 73 |     scale2 = max_size / max(H, W)
 74 |     scale = min(scale1, scale2)
 75 |     img = img / 255.
 76 |     img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect')
 77 |     # both the longer and shorter should be less than
 78 |     # max_size and min_size
 79 |     if opt.caffe_pretrain:
 80 |         normalize = caffe_normalize
 81 |     else:
 82 |         normalize = pytorch_normalze
 83 |     #调用上述方法对img进行规范化
 84 |     return normalize(img)
 85 | 
 86 | 
 87 | class Transform(object):
 88 | 
 89 |     def __init__(self, min_size=600, max_size=1000):
 90 |         self.min_size = min_size
 91 |         self.max_size = max_size
 92 | 
 93 |     def __call__(self, in_data):
 94 |         img, bbox, label = in_data
 95 |         _, H, W = img.shape
 96 |         #调用上述方法进行缩放图像
 97 |         img = preprocess(img, self.min_size, self.max_size)
 98 |         _, o_H, o_W = img.shape
 99 |         scale = o_H / H
100 |         #对图像对应的bbox也进行同等尺度的缩放
101 |         bbox = util.resize_bbox(bbox, (H, W), (o_H, o_W))
102 | 
103 |         # horizontally flip 
104 | 		#水平翻转（对img和对应的bbox进行同等尺度的水平翻转）=============================只进行水平翻转
105 |         img, params = util.random_flip(
106 |             img, x_random=True, return_param=True)
107 |         bbox = util.flip_bbox(
108 |             bbox, (o_H, o_W), x_flip=params['x_flip'])
109 | 
110 |         return img, bbox, label, scale
111 | 
112 | 
113 | class Dataset:
114 |     def __init__(self, opt):
115 |         self.opt = opt
116 | 		#初始化VOCBboxDataset，传入 数据集地址
117 | 		#eg:  /data/image/voc/VOCdevkit/VOC2007/
118 |         self.db = VOCBboxDataset(opt.voc_data_dir)
119 |         #调用上述方法Transform（图像转化方式），进行初始化
120 |         self.tsf = Transform(opt.min_size, opt.max_size)
121 | 
122 |     def __getitem__(self, idx):
123 | 	    #得到原始img，检测框、标签、困难度
124 |         ori_img, bbox, label, difficult = self.db.get_example(idx)
125 |         #调用上述方法Transform，执行__call__方法。返回规范化后的img, bbox, label, 转化之后的比例scale
126 |         img, bbox, label, scale = self.tsf((ori_img, bbox, label))
127 |         # TODO: check whose stride is negative to fix this instead copy all
128 |         # some of the strides of a given numpy array are negative.
129 |         
130 |         return img.copy(), bbox.copy(), label.copy(), scale
131 | 
132 |     def __len__(self):
133 |         return len(self.db)
134 | 
135 | 
136 | class TestDataset:
137 |     
138 |     def __init__(self, opt, split='test', use_difficult=True):
139 |         self.opt = opt
140 |         self.db = VOCBboxDataset(opt.voc_data_dir, split=split, use_difficult=use_difficult)
141 | 
142 |     def __getitem__(self, idx):
143 |         ori_img, bbox, label, difficult = self.db.get_example(idx)
144 |         img = preprocess(ori_img)
145 |         return img, ori_img.shape[1:], bbox, label, difficult
146 | 
147 |     def __len__(self):
148 |         return len(self.db)
149 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/data/voc_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | import numpy as np
  5 | 
  6 | from .util import read_image
  7 | 
  8 | 
  9 | class VOCBboxDataset:
 10 |     """Bounding box dataset for PASCAL `VOC`_.
 11 |     边界框数据集
 12 | 
 13 |     .. _`VOC`: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
 14 | 
 15 |     The index corresponds to each image.
 16 | 
 17 |     When queried by an index, if :obj:`return_difficult == False`,
 18 |     this dataset returns a corresponding
 19 |     :obj:`img, bbox, label`, a tuple of an image, bounding boxes and labels.
 20 |     This is the default behaviour.
 21 |     If :obj:`return_difficult == True`, this dataset returns corresponding
 22 |     :obj:`img, bbox, label, difficult`. :obj:`difficult` is a boolean array
 23 |     that indicates whether bounding boxes are labeled as difficult or not.
 24 | 
 25 |     The bounding boxes are packed into a two dimensional tensor of shape
 26 |     :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in
 27 |     the image. The second axis represents attributes of the bounding box.
 28 |     They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the
 29 |     four attributes are coordinates of the top left and the bottom right
 30 |     vertices.
 31 | 
 32 |     The labels are packed into a one dimensional tensor of shape :math:`(R,)`.
 33 |     :math:`R` is the number of bounding boxes in the image.
 34 |     The class name of the label :math:`l` is :math:`l` th element of
 35 |     :obj:`VOC_BBOX_LABEL_NAMES`.
 36 | 
 37 |     The array :obj:`difficult` is a one dimensional boolean array of shape
 38 |     :math:`(R,)`. :math:`R` is the number of bounding boxes in the image.
 39 |     If :obj:`use_difficult` is :obj:`False`, this array is
 40 |     a boolean array with all :obj:`False`.
 41 | 
 42 |     The type of the image, the bounding boxes and the labels are as follows.
 43 | 
 44 |     * :obj:`img.dtype == numpy.float32`
 45 |     * :obj:`bbox.dtype == numpy.float32`
 46 |     * :obj:`label.dtype == numpy.int32`
 47 |     * :obj:`difficult.dtype == numpy.bool`
 48 | 
 49 |     Args:
 50 |         data_dir (string): Path to the root of the training data. 
 51 |             i.e. "/data/image/voc/VOCdevkit/VOC2007/"
 52 |         split ({'train', 'val', 'trainval', 'test'}): Select a split of the
 53 |             dataset. :obj:`test` split is only available for
 54 |             2007 dataset.
 55 |         year ({'2007', '2012'}): Use a dataset prepared for a challenge
 56 |             held in :obj:`year`.
 57 |         use_difficult (bool): If :obj:`True`, use images that are labeled as
 58 |             difficult in the original annotation.
 59 |         return_difficult (bool): If :obj:`True`, this dataset returns
 60 |             a boolean array
 61 |             that indicates whether bounding boxes are labeled as difficult
 62 |             or not. The default value is :obj:`False`.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, data_dir, split='trainval',
 67 |                  use_difficult=False, return_difficult=False,
 68 |                  ):
 69 | 
 70 |         # if split not in ['train', 'trainval', 'val']:
 71 |         #     if not (split == 'test' and year == '2007'):
 72 |         #         warnings.warn(
 73 |         #             'please pick split from \'train\', \'trainval\', \'val\''
 74 |         #             'for 2012 dataset. For 2007 dataset, you can pick \'test\''
 75 |         #             ' in addition to the above mentioned splits.'
 76 |         #         )
 77 |         id_list_file = os.path.join(
 78 |             data_dir, 'ImageSets/Main/{0}.txt'.format(split))
 79 | 
 80 |         self.ids = [id_.strip() for id_ in open(id_list_file)]
 81 |         self.data_dir = data_dir
 82 |         self.use_difficult = use_difficult
 83 |         self.return_difficult = return_difficult
 84 | 		#将voc标签名称赋值给VOCBboxDataset对象
 85 |         self.label_names = VOC_BBOX_LABEL_NAMES
 86 | 
 87 |     def __len__(self):
 88 |         return len(self.ids)
 89 |     #dataset用到该方法得到一张图片的各种信息
 90 |     def get_example(self, i):
 91 |         """Returns the i-th example.
 92 | 
 93 |         Returns a color image and bounding boxes. The image is in CHW format.
 94 |         The returned image is RGB.
 95 | 		返回彩色图像和bbox。图像大小为CHW（通道、高、宽）,返回图像为RGB
 96 | 
 97 |         Args:
 98 |             i (int): The index of the example.
 99 | 
100 |         Returns:
101 |             tuple of an image and bounding boxes
102 | 
103 |         """
104 |         id_ = self.ids[i]
105 | 		#=======================================================================在这里读取路径后只拿最后一个\后面的内容
106 |         anno = ET.parse(
107 |             os.path.join(self.data_dir, 'Annotations', id_ + '.xml'))
108 |         bbox = list()
109 |         label = list()
110 |         difficult = list()
111 |         for obj in anno.findall('object'):
112 |             # when in not using difficult split, and the object is difficult, skipt it.
113 | 			#在不使用困难分割时，对象是difficult，跳过它
114 |             if not self.use_difficult and int(obj.find('difficult').text) == 1:
115 |                 continue
116 | 
117 |             difficult.append(int(obj.find('difficult').text))
118 |             bndbox_anno = obj.find('bndbox')
119 |             # subtract 1 to make pixel indexes 0-based
120 | 			#减1 以使像素索引基于0
121 |             bbox.append([
122 |                 int(bndbox_anno.find(tag).text) - 1
123 |                 for tag in ('ymin', 'xmin', 'ymax', 'xmax')])
124 |             name = obj.find('name').text.lower().strip()
125 |             label.append(VOC_BBOX_LABEL_NAMES.index(name))
126 |         bbox = np.stack(bbox).astype(np.float32)
127 |         label = np.stack(label).astype(np.int32)
128 |         # When `use_difficult==False`, all elements in `difficult` are False.
129 |         difficult = np.array(difficult, dtype=np.bool).astype(np.uint8)  # PyTorch don't support np.bool
130 | 
131 |         # Load a image
132 |         img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg')
133 |         img = read_image(img_file, color=True)
134 | 
135 |         # if self.return_difficult:
136 |         #     return img, bbox, label, difficult
137 |         return img, bbox, label, difficult
138 | 
139 |     __getitem__ = get_example
140 | 
141 | #标签名称
142 | VOC_BBOX_LABEL_NAMES = (
143 |     'aeroplane',
144 |     'bicycle',
145 |     'bird',
146 |     'boat',
147 |     'bottle',
148 |     'bus',
149 |     'car',
150 |     'cat',
151 |     'chair',
152 |     'cow',
153 |     'diningtable',
154 |     'dog',
155 |     'horse',
156 |     'motorbike',
157 |     'person',
158 |     'pottedplant',
159 |     'sheep',
160 |     'sofa',
161 |     'train',
162 |     'tvmonitor')
163 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/convert_caffe_pretrain.py:
--------------------------------------------------------------------------------
 1 | # code from ruotian luo
 2 | # https://github.com/ruotianluo/pytorch-faster-rcnn
 3 | import torch
 4 | from torch.utils.model_zoo import load_url
 5 | from torchvision import models
 6 | 
 7 | sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth")
 8 | sd['classifier.0.weight'] = sd['classifier.1.weight']
 9 | sd['classifier.0.bias'] = sd['classifier.1.bias']
10 | del sd['classifier.1.weight']
11 | del sd['classifier.1.bias']
12 | 
13 | sd['classifier.3.weight'] = sd['classifier.4.weight']
14 | sd['classifier.3.bias'] = sd['classifier.4.bias']
15 | del sd['classifier.4.weight']
16 | del sd['classifier.4.bias']
17 | 
18 | 
19 | # speicify the path to save
20 | torch.save(sd, "vgg16_caffe.pth")


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/misc/demo.jpg


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/misc/train_fast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import ipdb
  4 | import matplotlib
  5 | from tqdm import tqdm
  6 | 
  7 | from utils.config import opt
  8 | from data.dataset import Dataset, TestDataset
  9 | from model import FasterRCNNVGG16
 10 | from torch.autograd import Variable
 11 | from torch.utils import data as data_
 12 | from trainer import FasterRCNNTrainer
 13 | from utils import array_tool as at
 14 | from utils.vis_tool import visdom_bbox
 15 | from utils.eval_tool import eval_detection_voc
 16 | 
 17 | matplotlib.use('agg')
 18 | 
 19 | def eval(dataloader, faster_rcnn, test_num=10000):
 20 |     pred_bboxes, pred_labels, pred_scores = list(), list(), list()
 21 |     gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
 22 |     for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(dataloader)):
 23 |         sizes = [sizes[0][0], sizes[1][0]]
 24 |         pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs, [sizes])
 25 |         gt_bboxes += list(gt_bboxes_.numpy())
 26 |         gt_labels += list(gt_labels_.numpy())
 27 |         gt_difficults += list(gt_difficults_.numpy())
 28 |         pred_bboxes += pred_bboxes_
 29 |         pred_labels += pred_labels_
 30 |         pred_scores += pred_scores_
 31 |         if ii == test_num: break
 32 | 
 33 |     result = eval_detection_voc(
 34 |         pred_bboxes, pred_labels, pred_scores,
 35 |         gt_bboxes, gt_labels, gt_difficults,
 36 |         use_07_metric=True)
 37 |     return result
 38 | 
 39 | 
 40 | def train(**kwargs):
 41 |     opt._parse(kwargs)
 42 | 
 43 |     dataset = Dataset(opt)
 44 |     print('load data')
 45 |     dataloader = data_.DataLoader(dataset, \
 46 |                                   batch_size=1, \
 47 |                                   shuffle=True, \
 48 |                                   # pin_memory=True,
 49 |                                   num_workers=opt.num_workers)
 50 |     testset = TestDataset(opt)
 51 |     test_dataloader = data_.DataLoader(testset,
 52 |                                        batch_size=1,
 53 |                                        num_workers=2,
 54 |                                        shuffle=False, \
 55 |                                        # pin_memory=True
 56 |                                        )
 57 |     faster_rcnn = FasterRCNNVGG16()
 58 |     print('model construct completed')
 59 |     trainer = FasterRCNNTrainer(faster_rcnn).cuda()
 60 |     if opt.load_path:
 61 |         trainer.load(opt.load_path)
 62 |         print('load pretrained model from %s' % opt.load_path)
 63 | 
 64 |     trainer.vis.text(dataset.db.label_names, win='labels')
 65 |     best_map = 0
 66 |     for epoch in range(7):
 67 |         trainer.reset_meters()
 68 |         for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)):
 69 |             scale = at.scalar(scale)
 70 |             img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
 71 |             img, bbox, label = Variable(img), Variable(bbox), Variable(label)
 72 |             losses = trainer.train_step(img, bbox, label, scale)
 73 | 
 74 |             if (ii + 1) % opt.plot_every == 0:
 75 |                 if os.path.exists(opt.debug_file):
 76 |                     ipdb.set_trace()
 77 | 
 78 |                 # plot loss
 79 |                 trainer.vis.plot_many(trainer.get_meter_data())
 80 | 
 81 |                 # plot groud truth bboxes
 82 |                 ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
 83 |                 gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
 84 |                                     at.tonumpy(bbox_)[0], 
 85 |                                     label_[0].numpy())
 86 |                 trainer.vis.img('gt_img', gt_img)
 87 | 
 88 |                 # plot predicti bboxes
 89 |                 _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
 90 |                 pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
 91 |                                         at.tonumpy(_bboxes[0]),
 92 |                                         at.tonumpy(_labels[0]).reshape(-1), 
 93 |                                         at.tonumpy(_scores[0]))
 94 |                 trainer.vis.img('pred_img', pred_img)
 95 | 
 96 |                 # rpn confusion matrix(meter)
 97 |                 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
 98 |                 # roi confusion matrix
 99 |                 trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
100 |         if epoch==4:
101 |             trainer.faster_rcnn.scale_lr(opt.lr_decay)
102 | 
103 |     eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
104 |     print('eval_result')
105 |     trainer.save(mAP=eval_result['map'])
106 | 
107 | if __name__ == '__main__':
108 |     import fire
109 | 
110 |     fire.Fire()
111 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .faster_rcnn_vgg16 import FasterRCNNVGG16
2 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/faster_rcnn_vgg16.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/region_proposal_network.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/__pycache__/roi_module.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/roi_module.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from string import Template
  3 | 
  4 | import cupy, torch
  5 | import cupy as cp
  6 | import torch as t
  7 | from torch.autograd import Function
  8 | 
  9 | from model.utils.roi_cupy import kernel_backward, kernel_forward
 10 | 
 11 | Stream = namedtuple('Stream', ['ptr'])
 12 | 
 13 | 
 14 | @cupy.util.memoize(for_each_device=True)
 15 | def load_kernel(kernel_name, code, **kwargs):
 16 |     cp.cuda.runtime.free(0)
 17 |     code = Template(code).substitute(**kwargs)
 18 |     kernel_code = cupy.cuda.compile_with_cache(code)
 19 |     return kernel_code.get_function(kernel_name)
 20 | 
 21 | 
 22 | CUDA_NUM_THREADS = 1024
 23 | 
 24 | 
 25 | def GET_BLOCKS(N, K=CUDA_NUM_THREADS):
 26 |     return (N + K - 1) // K
 27 | 
 28 | 
 29 | class RoI(Function):
 30 |     """
 31 |     NOTE：only CUDA-compatible
 32 |     """
 33 | 
 34 |     def __init__(self, outh, outw, spatial_scale):
 35 |         self.forward_fn = load_kernel('roi_forward', kernel_forward)
 36 |         self.backward_fn = load_kernel('roi_backward', kernel_backward)
 37 |         self.outh, self.outw, self.spatial_scale = outh, outw, spatial_scale
 38 | 
 39 |     def forward(self, x, rois):
 40 |         # NOTE: MAKE SURE input is contiguous too
 41 |         x = x.contiguous()
 42 |         rois = rois.contiguous()
 43 |         self.in_size = B, C, H, W = x.size()
 44 |         self.N = N = rois.size(0)
 45 |         output = t.zeros(N, C, self.outh, self.outw).cuda()
 46 |         self.argmax_data = t.zeros(N, C, self.outh, self.outw).int().cuda()
 47 |         self.rois = rois
 48 |         args = [x.data_ptr(), rois.data_ptr(),
 49 |                 output.data_ptr(),
 50 |                 self.argmax_data.data_ptr(),
 51 |                 self.spatial_scale, C, H, W,
 52 |                 self.outh, self.outw,
 53 |                 output.numel()]
 54 |         stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)
 55 |         self.forward_fn(args=args,
 56 |                         block=(CUDA_NUM_THREADS, 1, 1),
 57 |                         grid=(GET_BLOCKS(output.numel()), 1, 1),
 58 |                         stream=stream)
 59 |         return output
 60 | 
 61 |     def backward(self, grad_output):
 62 |         ##NOTE: IMPORTANT CONTIGUOUS
 63 |         # TODO: input
 64 |         grad_output = grad_output.contiguous()
 65 |         B, C, H, W = self.in_size
 66 |         grad_input = t.zeros(self.in_size).cuda()
 67 |         stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)
 68 |         args = [grad_output.data_ptr(),
 69 |                 self.argmax_data.data_ptr(),
 70 |                 self.rois.data_ptr(),
 71 |                 grad_input.data_ptr(),
 72 |                 self.N, self.spatial_scale, C, H, W, self.outh, self.outw,
 73 |                 grad_input.numel()]
 74 |         self.backward_fn(args=args,
 75 |                          block=(CUDA_NUM_THREADS, 1, 1),
 76 |                          grid=(GET_BLOCKS(grad_input.numel()), 1, 1),
 77 |                          stream=stream
 78 |                          )
 79 |         return grad_input, None
 80 | 
 81 | 
 82 | class RoIPooling2D(t.nn.Module):
 83 | 
 84 |     def __init__(self, outh, outw, spatial_scale):
 85 |         super(RoIPooling2D, self).__init__()
 86 |         self.RoI = RoI(outh, outw, spatial_scale)
 87 | 
 88 |     def forward(self, x, rois):
 89 |         return self.RoI(x, rois)
 90 | 
 91 | 
 92 | def test_roi_module():
 93 |     ## fake data###
 94 |     B, N, C, H, W, PH, PW = 2, 8, 4, 32, 32, 7, 7
 95 | 
 96 |     bottom_data = t.randn(B, C, H, W).cuda()
 97 |     bottom_rois = t.randn(N, 5)
 98 |     bottom_rois[:int(N / 2), 0] = 0
 99 |     bottom_rois[int(N / 2):, 0] = 1
100 |     bottom_rois[:, 1:] = (t.rand(N, 4) * 100).float()
101 |     bottom_rois = bottom_rois.cuda()
102 |     spatial_scale = 1. / 16
103 |     outh, outw = PH, PW
104 | 
105 |     # pytorch version
106 |     module = RoIPooling2D(outh, outw, spatial_scale)
107 |     x = t.autograd.Variable(bottom_data, requires_grad=True)
108 |     rois = t.autograd.Variable(bottom_rois)
109 |     output = module(x, rois)
110 |     output.sum().backward()
111 | 
112 |     def t2c(variable):
113 |         npa = variable.data.cpu().numpy()
114 |         return cp.array(npa)
115 | 
116 |     def test_eq(variable, array, info):
117 |         cc = cp.asnumpy(array)
118 |         neq = (cc != variable.data.cpu().numpy())
119 |         assert neq.sum() == 0, 'test failed: %s' % info
120 | 
121 |     # chainer version,if you're going to run this
122 |     # pip install chainer 
123 |     import chainer.functions as F
124 |     from chainer import Variable
125 |     x_cn = Variable(t2c(x))
126 | 
127 |     o_cn = F.roi_pooling_2d(x_cn, t2c(rois), outh, outw, spatial_scale)
128 |     test_eq(output, o_cn.array, 'forward')
129 |     F.sum(o_cn).backward()
130 |     test_eq(x.grad, x_cn.grad, 'backward')
131 |     print('test pass')
132 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__init__.py


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/bbox_tools.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/creator_tool.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/__pycache__/roi_cupy.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from model.utils.nms.non_maximum_suppression import non_maximum_suppression


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/_nms_gpu_post_py.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/__pycache__/non_maximum_suppression.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post.pyx:
--------------------------------------------------------------------------------
 1 | cimport numpy as np
 2 | from libc.stdint cimport uint64_t
 3 | 
 4 | import numpy as np
 5 | 
 6 | def _nms_gpu_post(np.ndarray[np.uint64_t, ndim=1] mask,
 7 |                   int n_bbox,
 8 |                   int threads_per_block,
 9 |                   int col_blocks
10 |                   ):
11 |     cdef:
12 |         int i, j, nblock, index
13 |         uint64_t inblock
14 |         int n_selection = 0
15 |         uint64_t one_ull = 1
16 |         np.ndarray[np.int32_t, ndim=1] selection
17 |         np.ndarray[np.uint64_t, ndim=1] remv
18 | 
19 |     selection = np.zeros((n_bbox,), dtype=np.int32)
20 |     remv = np.zeros((col_blocks,), dtype=np.uint64)
21 | 
22 |     for i in range(n_bbox):
23 |         nblock = i // threads_per_block
24 |         inblock = i % threads_per_block
25 | 
26 |         if not (remv[nblock] & one_ull << inblock):
27 |             selection[n_selection] = i
28 |             n_selection += 1
29 | 
30 |             index = i * col_blocks
31 |             for j in range(nblock, col_blocks):
32 |                 remv[j] |= mask[index + j]
33 |     return selection, n_selection
34 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/_nms_gpu_post_py.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def _nms_gpu_post( mask,
 5 |                   n_bbox,
 6 |                    threads_per_block,
 7 |                    col_blocks
 8 |                   ):
 9 |     n_selection = 0
10 |     one_ull = np.array([1],dtype=np.uint64)
11 |     selection = np.zeros((n_bbox,), dtype=np.int32)
12 |     remv = np.zeros((col_blocks,), dtype=np.uint64)
13 | 
14 |     for i in range(n_bbox):
15 |         nblock = i // threads_per_block
16 |         inblock = i % threads_per_block
17 | 
18 |         if not (remv[nblock] & one_ull << inblock):
19 |             selection[n_selection] = i
20 |             n_selection += 1
21 | 
22 |             index = i * col_blocks
23 |             for j in range(nblock, col_blocks):
24 |                 remv[j] |= mask[index + j]
25 |     return selection, n_selection
26 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Distutils import build_ext
 4 | 
 5 | ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])]
 6 | setup(
 7 |     name="Hello pyx",
 8 |     cmdclass={'build_ext': build_ext},
 9 |     ext_modules=ext_modules
10 | )
11 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/lib.linux-x86_64-3.5/_nms_gpu_post.cpython-35m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/model/utils/nms/build/temp.linux-x86_64-3.5/_nms_gpu_post.o


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/model/utils/roi_cupy.py:
--------------------------------------------------------------------------------
  1 | kernel_forward = '''
  2 |     extern "C"
  3 |     __global__ void roi_forward(const float* const bottom_data,const float* const bottom_rois,
  4 |                 float* top_data, int* argmax_data,
  5 |                 const double spatial_scale,const int channels,const int height, 
  6 |                 const int width, const int pooled_height, 
  7 |                 const int pooled_width,const int NN
  8 |     ){
  9 |         
 10 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 11 |     if(idx>=NN)
 12 |         return;
 13 |     const int pw = idx % pooled_width;
 14 |     const int ph = (idx / pooled_width) % pooled_height;
 15 |     const int c = (idx / pooled_width / pooled_height) % channels;
 16 |     int num = idx / pooled_width / pooled_height / channels;
 17 |     const int roi_batch_ind = bottom_rois[num * 5 + 0];
 18 |     const int roi_start_w = round(bottom_rois[num * 5 + 1] * spatial_scale);
 19 |     const int roi_start_h = round(bottom_rois[num * 5 + 2] * spatial_scale);
 20 |     const int roi_end_w = round(bottom_rois[num * 5 + 3] * spatial_scale);
 21 |     const int roi_end_h = round(bottom_rois[num * 5 + 4] * spatial_scale);
 22 |     // Force malformed ROIs to be 1x1
 23 |     const int roi_width = max(roi_end_w - roi_start_w + 1, 1);
 24 |     const int roi_height = max(roi_end_h - roi_start_h + 1, 1);
 25 |     const float bin_size_h = static_cast<float>(roi_height)
 26 |                     / static_cast<float>(pooled_height);
 27 |     const float bin_size_w = static_cast<float>(roi_width)
 28 |                     / static_cast<float>(pooled_width);
 29 | 
 30 |     int hstart = static_cast<int>(floor(static_cast<float>(ph)
 31 |                                     * bin_size_h));
 32 |         int wstart = static_cast<int>(floor(static_cast<float>(pw)
 33 |                                     * bin_size_w));
 34 |     int hend = static_cast<int>(ceil(static_cast<float>(ph + 1)
 35 |                                 * bin_size_h));
 36 |         int wend = static_cast<int>(ceil(static_cast<float>(pw + 1)
 37 |                                 * bin_size_w));
 38 | 
 39 |     // Add roi offsets and clip to input boundaries
 40 |     hstart = min(max(hstart + roi_start_h, 0), height);
 41 |     hend = min(max(hend + roi_start_h, 0), height);
 42 |     wstart = min(max(wstart + roi_start_w, 0), width);
 43 |     wend = min(max(wend + roi_start_w, 0), width);
 44 |     bool is_empty = (hend <= hstart) || (wend <= wstart);
 45 | 
 46 |     // Define an empty pooling region to be zero
 47 |     float maxval = is_empty ? 0 : -1E+37;
 48 |     // If nothing is pooled, argmax=-1 causes nothing to be backprop'd
 49 |     int maxidx = -1;
 50 |     const int data_offset = (roi_batch_ind * channels + c) * height * width;
 51 |     for (int h = hstart; h < hend; ++h) {
 52 |         for (int w = wstart; w < wend; ++w) {
 53 |             int bottom_index = h * width + w;
 54 |             if (bottom_data[data_offset + bottom_index] > maxval) {
 55 |                 maxval = bottom_data[data_offset + bottom_index];
 56 |                 maxidx = bottom_index;
 57 |             }
 58 |         }
 59 |     }
 60 |     top_data[idx]=maxval;
 61 |     argmax_data[idx]=maxidx;
 62 |     }
 63 | '''
 64 | kernel_backward = '''
 65 |     extern "C"
 66 |     __global__ void roi_backward(const float* const top_diff,
 67 |          const int* const argmax_data,const float* const bottom_rois,
 68 |          float* bottom_diff, const int num_rois,
 69 |          const double spatial_scale, int channels,
 70 |          int height, int width, int pooled_height,
 71 |           int pooled_width,const int NN)
 72 |     {
 73 | 
 74 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 75 |     ////Importtan >= instead of >
 76 |     if(idx>=NN)
 77 |         return;
 78 |     int w = idx % width;
 79 |     int h = (idx / width) % height;
 80 |     int c = (idx/ (width * height)) % channels;
 81 |     int num = idx / (width * height * channels);
 82 | 
 83 |     float gradient = 0;
 84 |     // Accumulate gradient over all ROIs that pooled this element
 85 |     for (int roi_n = 0; roi_n < num_rois; ++roi_n) {
 86 |         // Skip if ROI's batch index doesn't match num
 87 |         if (num != static_cast<int>(bottom_rois[roi_n * 5])) {
 88 |             continue;
 89 |         }
 90 | 
 91 |         int roi_start_w = round(bottom_rois[roi_n * 5 + 1]
 92 |                                 * spatial_scale);
 93 |         int roi_start_h = round(bottom_rois[roi_n * 5 + 2]
 94 |                                 * spatial_scale);
 95 |         int roi_end_w = round(bottom_rois[roi_n * 5 + 3]
 96 |                                 * spatial_scale);
 97 |         int roi_end_h = round(bottom_rois[roi_n * 5 + 4]
 98 |                                 * spatial_scale);
 99 | 
100 |         // Skip if ROI doesn't include (h, w)
101 |         const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
102 |                                 h >= roi_start_h && h <= roi_end_h);
103 |         if (!in_roi) {
104 |             continue;
105 |         }
106 | 
107 |         int offset = (roi_n * channels + c) * pooled_height
108 |                         * pooled_width;
109 | 
110 |         // Compute feasible set of pooled units that could have pooled
111 |         // this bottom unit
112 | 
113 |         // Force malformed ROIs to be 1x1
114 |         int roi_width = max(roi_end_w - roi_start_w + 1, 1);
115 |         int roi_height = max(roi_end_h - roi_start_h + 1, 1);
116 | 
117 |         float bin_size_h = static_cast<float>(roi_height)
118 |                         / static_cast<float>(pooled_height);
119 |         float bin_size_w = static_cast<float>(roi_width)
120 |                         / static_cast<float>(pooled_width);
121 | 
122 |         int phstart = floor(static_cast<float>(h - roi_start_h)
123 |                             / bin_size_h);
124 |         int phend = ceil(static_cast<float>(h - roi_start_h + 1)
125 |                             / bin_size_h);
126 |         int pwstart = floor(static_cast<float>(w - roi_start_w)
127 |                             / bin_size_w);
128 |         int pwend = ceil(static_cast<float>(w - roi_start_w + 1)
129 |                             / bin_size_w);
130 | 
131 |         phstart = min(max(phstart, 0), pooled_height);
132 |         phend = min(max(phend, 0), pooled_height);
133 |         pwstart = min(max(pwstart, 0), pooled_width);
134 |         pwend = min(max(pwend, 0), pooled_width);
135 |         for (int ph = phstart; ph < phend; ++ph) {
136 |             for (int pw = pwstart; pw < pwend; ++pw) {
137 |                 int index_ = ph * pooled_width + pw + offset;
138 |                 if (argmax_data[index_] == (h * width + w)) {
139 |                     gradient += top_diff[index_];
140 |                 }
141 |             }
142 |         }
143 |     }
144 |     bottom_diff[idx] = gradient;
145 |     }
146 | '''
147 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-image
2 | pprint
3 | cython
4 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #    Copyright 2017 cy
 2 | # 
 3 | #    Licensed under the Apache License, Version 2.0 (the "License");
 4 | #    you may not use this file except in compliance with the License.
 5 | #    You may obtain a copy of the License at
 6 | # 
 7 | #        http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | #    Unless required by applicable law or agreed to in writing, software
10 | #    distributed under the License is distributed on an "AS IS" BASIS,
11 | #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #    See the License for the specific language governing permissions and
13 | #    limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/array_tool.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/config.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/eval_tool.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/FasterRcnn_pytorch/utils/__pycache__/vis_tool.cpython-35.pyc


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/array_tool.py:
--------------------------------------------------------------------------------
 1 | """
 2 | tools to convert specified type
 3 | """
 4 | import torch as t
 5 | import numpy as np
 6 | 
 7 | 
 8 | def tonumpy(data):
 9 |     if isinstance(data, np.ndarray):
10 |         return data
11 |     if isinstance(data, t._TensorBase):
12 |         return data.cpu().numpy()
13 |     if isinstance(data, t.autograd.Variable):
14 |         return tonumpy(data.data)
15 | 
16 | 
17 | def totensor(data, cuda=True):
18 |     if isinstance(data, np.ndarray):
19 |         tensor = t.from_numpy(data)
20 |     if isinstance(data, t._TensorBase):
21 |         tensor = data
22 |     if isinstance(data, t.autograd.Variable):
23 |         tensor = data.data
24 |     if cuda:
25 |         tensor = tensor.cuda()
26 |     return tensor
27 | 
28 | 
29 | def tovariable(data):
30 |     if isinstance(data, np.ndarray):
31 |         return tovariable(totensor(data))
32 |     if isinstance(data, t._TensorBase):
33 |         return t.autograd.Variable(data)
34 |     if isinstance(data, t.autograd.Variable):
35 |         return data
36 |     else:
37 |         raise ValueError("UnKnow data type: %s, input should be {np.ndarray,Tensor,Variable}" %type(data))
38 | 
39 | 
40 | def scalar(data):
41 |     if isinstance(data, np.ndarray):
42 |         return data.reshape(1)[0]
43 |     if isinstance(data, t._TensorBase):
44 |         return data.view(1)[0]
45 |     if isinstance(data, t.autograd.Variable):
46 |         return data.data.view(1)[0]
47 | 


--------------------------------------------------------------------------------
/FasterRcnn_pytorch/utils/config.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | 
 3 | 
 4 | # Default Configs for training
 5 | # NOTE that, config items could be overwriten by passing argument through command line.
 6 | # e.g. --voc-data-dir='./data/'
 7 | 
 8 | class Config:
 9 |     # data
10 |     #更改_bobo
11 |     voc_data_dir = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/VOCdevkit/Pascal VOC2007/VOCdevkit/VOC2007'
12 |     min_size = 600  # image resize
13 |     max_size = 1000 # image resize
14 |     num_workers = 8
15 |     test_num_workers = 8
16 | 
17 |     # sigma for l1_smooth_loss
18 |     rpn_sigma = 3.
19 |     roi_sigma = 1.
20 | 
21 |     # param for optimizer
22 |     # 0.0005 in origin paper but 0.0001 in tf-faster-rcnn
23 |     weight_decay = 0.0005
24 |     lr_decay = 0.1  # 1e-3 -> 1e-4
25 |     lr = 1e-3
26 | 
27 | 
28 |     # visualization
29 |     env = 'faster-rcnn'  # visdom env
30 |     port = 8097
31 |     plot_every = 40  # vis every N iter
32 | 
33 |     # preset
34 |     data = 'voc'
35 |     pretrained_model = 'vgg16'
36 | 
37 |     # training
38 |     epoch = 14
39 | 
40 | 
41 |     use_adam = False # Use Adam optimizer
42 |     use_chainer = False # try match everything as chainer
43 |     use_drop = False # use dropout in RoIHead
44 |     # debug
45 |     debug_file = '/tmp/debugf'
46 | 
47 |     test_num = 10000
48 |     # model
49 |     load_path = None
50 | 
51 |     #caffe_pretrain = False # use caffe pretrained model instead of torchvision
52 |     caffe_pretrain = True # use caffe pretrained model instead of torchvision
53 |     caffe_pretrain_path = '/home/bobo/PycharmProjects/torchProjectss/fasterbychenyun/simplefasterrcnnpytorchmaster/checkpoints/vgg16_caffe.pth'
54 |     
55 | 
56 |     def _parse(self, kwargs):
57 |         state_dict = self._state_dict()
58 |         for k, v in kwargs.items():
59 |             if k not in state_dict:
60 |                 raise ValueError('UnKnown Option: "--%s"' % k)
61 |             setattr(self, k, v)
62 | 
63 |         print('======user config========')
64 |         pprint(self._state_dict())
65 |         print('==========end============')
66 | 
67 |     def _state_dict(self):
68 |         return {k: getattr(self, k) for k, _ in Config.__dict__.items() \
69 |                 if not k.startswith('_')}
70 | 
71 | 
72 | opt = Config()
73 | 


--------------------------------------------------------------------------------
/GhostNet/G-Ghost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/GhostNet/G-Ghost.png


--------------------------------------------------------------------------------
/GhostNet/readme.md:
--------------------------------------------------------------------------------
 1 | # GhostNets on Heterogeneous Devices via Cheap Operations
 2 | 
 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
 4 | 
 5 | # 说明
 6 | 
 7 | - CVPR2020 C-GhostNet
 8 | - IJCV 2022  G-GhostNet  [官方库](https://github.com/huawei-noah/Efficient-AI-Backbones)     [原作知乎解读](https://zhuanlan.zhihu.com/p/540547718)
 9 | 
10 | ## 简读
11 | 
12 | | 主题 | 描述                                                         |
13 | | ---- | :----------------------------------------------------------- |
14 | | 问题 | 问题1：深度可分离卷积（逐通道卷积+逐点卷积）、通道打乱等复杂操作在GPU下并行度不高，造成耗时。<br>问题2：观察到stage级别内部特征存在冗余。 |
15 | | 解决 | 问题1：仅采用普通卷积/分组卷积，加速GPU并行<br/>问题2：在stage级别应用Ghost形式，用"便宜操作"生成冗余特征。 |
16 | | 实现 | C-Ghost:  卷积级别特征冗余，代替原来的一个普通卷积 <br/>G-Ghost:  stage级别特征冗余，以代替原来的一个stage网络结构 |
17 | 
18 | # 具体实现
19 | 
20 | 
21 | 核心方法`Stage`,结构图如下
22 | 
23 | ![](G-Ghost.png)
24 | 
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Max deGroot, Ellis Brown
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/RepVGG/readme.md:
--------------------------------------------------------------------------------
 1 | # RepVGG: Making VGG-style ConvNets Great Again
 2 | 
 3 | #### 该仓库收录于[PytorchNetHub](https://github.com/bobo0810/PytorchNetHub)
 4 | 
 5 | 
 6 | 
 7 | # 说明
 8 | 
 9 | - CVPR-2021 [官方库](https://github.com/DingXiaoH/RepVGG)    [原作知乎解读](https://zhuanlan.zhihu.com/p/344324470)
10 | 
11 | - 本仓库仅提取网络定义部分，用以分析。
12 | 
13 | - 应用广泛
14 | 
15 |   > 1. [yolov6](https://zhuanlan.zhihu.com/p/533127196)、[yolov7](https://arxiv.org/abs/2207.02696)等：设计结构重参数化的网络进行训练，推理时转化为等价的简单结构，加速推理。
16 |   >2. [MNN线性超参数化工具](https://www.yuque.com/mnn/cn/ph6021)：设计小模型 --> 训练线性过参数化大模型 --> 转换等价小模型推理
17 | 
18 | ## 简读
19 | 
20 | |          |                                                              |
21 | | -------- | ------------------------------------------------------------ |
22 | | 问题     | 问题1：resnet等多分支结构，造成内存消耗，推理速度下降。<br>问题2：depthwise等复杂操作，造成内存消耗，硬件支持差。 |
23 | | 创新点   | RepVGG是VGG结构，快速、省内存、灵活 <br/>解决问题1：推理时无分支 <br/>解决问题2：仅包含3x3conv+ReLU的VGG style结构，无复杂操作 |
24 | | 具体实现 | 解耦训练和推理的网络结构。  <br/>训练：多分支提升性能 <br/>推理：结构重参数化，转为等价的VGG style结构。 |
25 | 
26 | # 具体实现
27 | 
28 | 核心方法`get_equivalent_kernel_bias`
29 | 
30 | 1. 融合BN
31 | 
32 |    `conv layer + BN layer  `-->  `conv layer`
33 | 
34 |    - [详解BN层](https://blog.csdn.net/ECNU_LZJ/article/details/104203604)
35 | 
36 |    - [卷积与BN层融合公式](https://blog.csdn.net/oYeZhou/article/details/112802348)
37 | 
38 | 2. 其余分支转为3x3卷积
39 | 
40 |    1x1conv -> 3x3conv        
41 | 
42 |    bn->3x3conv
43 | 
44 | 3. 三个分支合并
45 | 
46 |    `conv(x, W1) + conv(x, W2) + conv(x, W3) = conv(x, W1+W2+W3)）`
47 | 
48 |  ![](repvgg.png)
49 | 
50 | 


--------------------------------------------------------------------------------
/RepVGG/repvgg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/RepVGG/repvgg.png


--------------------------------------------------------------------------------
/SSD_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/SSD_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
 2 | import torch
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | def detection_collate(batch):
 7 |     """Custom collate fn for dealing with batches of images that have a different
 8 |     number of associated object annotations (bounding boxes).
 9 | 
10 |     Arguments:
11 |         batch: (tuple) A tuple of tensor images and lists of annotations
12 | 
13 |     Return:
14 |         A tuple containing:
15 |             1) (tensor) batch of images stacked on their 0 dim
16 |             2) (list of tensors) annotations for a given image are stacked on
17 |                                  0 dim
18 |     """
19 |     targets = []
20 |     imgs = []
21 |     for sample in batch:
22 |         imgs.append(sample[0])
23 |         targets.append(torch.FloatTensor(sample[1]))
24 |     return torch.stack(imgs, 0), targets
25 | 
26 | 
27 | def base_transform(image, size, mean):
28 |     x = cv2.resize(image, (size, size)).astype(np.float32)
29 |     x -= mean
30 |     x = x.astype(np.float32)
31 |     return x
32 | 
33 | 
34 | class BaseTransform:
35 |     def __init__(self, size, mean):
36 |         self.size = size
37 |         self.mean = np.array(mean, dtype=np.float32)
38 | 
39 |     def __call__(self, image, boxes=None, labels=None):
40 |         return base_transform(image, self.size, self.mean), boxes, labels
41 | 


--------------------------------------------------------------------------------
/SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/data/__pycache__/voc0712.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/box_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/__pycache__/ssd.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/detection.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/functions/__pycache__/prior_box.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ..box_utils import decode, nms
 4 | from SSD_pytorch.utils.config import opt
 5 | 
 6 | 
 7 | class Detect(Function):
 8 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
 9 |     apply non-maximum suppression to location predictions based on conf
10 |     scores and threshold to a top_k number of output predictions for both
11 |     confidence score and locations.
12 |     """
13 |     def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
14 |         self.num_classes = num_classes
15 |         self.background_label = bkg_label
16 |         self.top_k = top_k
17 |         # Parameters used in nms.
18 |         self.nms_thresh = nms_thresh
19 |         if nms_thresh <= 0:
20 |             raise ValueError('nms_threshold must be non negative.')
21 |         self.conf_thresh = conf_thresh
22 |         self.variance = opt.voc['variance']
23 | 
24 |     def forward(self, loc_data, conf_data, prior_data):
25 |         """
26 |         Args:
27 |             loc_data: (tensor) Loc preds from loc layers
28 |                 Shape: [batch,num_priors*4]
29 |             conf_data: (tensor) Shape: Conf preds from conf layers
30 |                 Shape: [batch*num_priors,num_classes]
31 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
32 |                 Shape: [1,num_priors,4]
33 |         """
34 |         num = loc_data.size(0)  # batch size
35 |         num_priors = prior_data.size(0)
36 |         output = torch.zeros(num, self.num_classes, self.top_k, 5)
37 |         conf_preds = conf_data.view(num, num_priors,
38 |                                     self.num_classes).transpose(2, 1)
39 | 
40 |         # Decode predictions into bboxes.
41 |         for i in range(num):
42 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance)
43 |             # For each class, perform nms
44 |             conf_scores = conf_preds[i].clone()
45 | 
46 |             for cl in range(1, self.num_classes):
47 |                 c_mask = conf_scores[cl].gt(self.conf_thresh)
48 |                 scores = conf_scores[cl][c_mask]
49 |                 if scores.dim() == 0:
50 |                     continue
51 |                 l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
52 |                 boxes = decoded_boxes[l_mask].view(-1, 4)
53 |                 # idx of highest scoring and non-overlapping boxes per class
54 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
55 |                 output[i, cl, :count] = \
56 |                     torch.cat((scores[ids[:count]].unsqueeze(1),
57 |                                boxes[ids[:count]]), 1)
58 |         flt = output.contiguous().view(num, -1, 5)
59 |         _, idx = flt[:, :, 0].sort(1, descending=True)
60 |         _, rank = idx.sort(1)
61 |         flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
62 |         return output
63 | 


--------------------------------------------------------------------------------
/SSD_pytorch/models/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from math import sqrt as sqrt
 3 | from itertools import product as product
 4 | import torch
 5 | 
 6 | 
 7 | class PriorBox(object):
 8 |     """Compute priorbox coordinates in center-offset form for each source
 9 |     feature map.
10 |     对于每个feature map，生成预测框（中心坐标及偏移量）
11 |     """
12 |     def __init__(self, cfg):
13 |         super(PriorBox, self).__init__()
14 |         # 300
15 |         self.image_size = cfg['min_dim']
16 |         # number of priors for feature map location (either 4 or 6)
17 |         # 每个网格的预测框数目 （4 or 6）
18 |         self.num_priors = len(cfg['aspect_ratios'])
19 |         #方差
20 |         self.variance = cfg['variance'] or [0.1]
21 |         # 值为[38, 19, 10, 5, 3, 1]  即feature map的尺寸大小
22 |         self.feature_maps = cfg['feature_maps']
23 |         #  s_k 表示先验框大小相对于图片的比例，而 s_{min} 和 s_{max} 表示比例的最小值与最大值
24 |         # min_sizes和max_sizes用来计算s_k,s_k_prime,以便计算 长宽比为1时的两个w.h
25 |         # 各个特征图的先验框尺度 [30, 60, 111, 162, 213, 264]
26 |         self.min_sizes = cfg['min_sizes']
27 |         # [60, 111, 162, 213, 264, 315]
28 |         self.max_sizes = cfg['max_sizes']
29 |         # 感受野大小，即相对于原图的缩小倍数
30 |         self.steps = cfg['steps']
31 |         # 纵横比[[2], [2, 3], [2, 3], [2, 3], [2], [2]]
32 |         self.aspect_ratios = cfg['aspect_ratios']
33 |         # True
34 |         self.clip = cfg['clip']
35 |         # VOC
36 |         self.version = cfg['name']
37 |         for v in self.variance:
38 |             if v <= 0:
39 |                 raise ValueError('Variances must be greater than 0')
40 | 
41 |     def forward(self):
42 |         # mean 是保存预测框的列表
43 |         mean = []
44 |         # 遍历不同feature map的尺寸大小
45 |         for k, f in enumerate(self.feature_maps):
46 |             # product用于求多个可迭代对象的笛卡尔积，它跟嵌套的 for 循环等价
47 |             # repeat用于指定重复生成序列的次数。
48 |             # 参考：http://funhacks.net/2017/02/13/itertools/
49 |             # 即若f为2，则i,j取值为00,01,10,11。即遍历每一个可能
50 | 
51 |             # 当k=0,f=38时，range(f)的值为（0,1，...,37）则product(range(f), repeat=2)的所有取值为（0,0）（0,1）...直到（37,0）,,,（37,37）
52 |             # 遍历一个feature map上的每一个网格
53 |             for i, j in product(range(f), repeat=2):
54 |                 # fk 是第 k 个 feature map 的大小
55 |                 #image_size=300  steps为每层feature maps的感受野
56 |                 f_k = self.image_size / self.steps[k]
57 |                 # 单位中心unit center x,y
58 |                 # 每一个网格的中心，设置为：(i+0.5|fk|,j+0.5|fk|)，其中，|fk| 是第 k 个 feature map 的大小，同时，i,j∈[0,|fk|)
59 |                 cx = (j + 0.5) / f_k
60 |                 cy = (i + 0.5) / f_k
61 | 
62 | 
63 |                 # 总体上：先添加长宽比为1的两个w、h（比较特殊），再通过循环添加其他长宽比
64 |                 # 长宽比aspect_ratio: 1
65 |                 # 真实大小rel size: min_size
66 |                 # 先验框大小相对于图片的比例
67 |                 #计算s_k 是为了求解w、h
68 |                 s_k = self.min_sizes[k]/self.image_size
69 |                 # 由于长宽比为1，则w=s_k  h=s_k
70 |                 mean += [cx, cy, s_k, s_k]
71 | 
72 |                 # 对于 aspect ratio 为 1 时，还增加了一个 default box长宽比aspect_ratio: 1
73 |                 # rel size: sqrt(s_k * s_(k+1))
74 |                 s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
75 |                 # 由于长宽比为1，则w=s_k_prime  h=s_k_prime
76 |                 mean += [cx, cy, s_k_prime, s_k_prime]
77 | 
78 |                 # 其余的长宽比
79 |                 for ar in self.aspect_ratios[k]:
80 |                     mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
81 |                     mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
82 |         # 将mean的list转化为tensor
83 |         output = torch.Tensor(mean).view(-1, 4)
84 | 
85 |         # clip:True 将输入input张量每个元素的夹紧到区间 [min,max]，并返回结果到一个新张量
86 |         # 操作为  如果元素>max，则置为max。min类似
87 |         if self.clip:
88 |             output.clamp_(max=1, min=0)
89 |         return output
90 | 


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | 
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 | 


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/init_weights.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/l2norm.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/models/modules/__pycache__/multibox_loss.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/init_weights.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.init as init
 3 | '''
 4 |  使用xavier方法来初始化vgg后面的新增层、loc用于回归层、conf用于分类层  的权重
 5 | '''
 6 | def xavier(param):
 7 |     '''
 8 |     使用xavier算法初始化新增层的权重
 9 |     '''
10 |     init.xavier_uniform(param)
11 | 
12 | 
13 | def weights_init(m):
14 |     if isinstance(m, nn.Conv2d):
15 |         xavier(m.weight.data)
16 |         m.bias.data.zero_()


--------------------------------------------------------------------------------
/SSD_pytorch/models/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         #x /= norm
22 |         x = torch.div(x,norm)
23 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 |         return out
25 | 


--------------------------------------------------------------------------------
/SSD_pytorch/readme.md:
--------------------------------------------------------------------------------
  1 | # 重构SSD代码实现
  2 | 
  3 | ----------
  4 | 
  5 | 该仓库基于[Max deGroot](https://github.com/amdegroot)与[Ellis Brown](https://github.com/ellisbrown)的[ssd.pytorch](https://github.com/amdegroot/ssd.pytorch)进行的，非常感谢他们无私的奉献。
  6 |  
  7 | 
  8 | - [原地址](https://github.com/amdegroot/ssd.pytorch) 
  9 | - [原地址的加注释版本](https://github.com/bobo0810/pytorchSSD) 
 10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/SSD_pytorch) 强烈推荐！（即本仓库）
 11 | 
 12 | ----------
 13 | 
 14 |  # 目前支持：
 15 | 
 16 | - 数据集：原作者支持VOC、COCO，该仓库仅支持VOC，如果有时间，考虑将COCO加上。
 17 | - 网络：支持SSD300
 18 | 
 19 | # 原因：
 20 | 
 21 |  大牛们写代码果然不拘小节，结构混乱依然不影响他们这么优秀。强迫症犯了，一周时间理解源码，一天内重构完成。哇，世界清爽了~
 22 | 
 23 |  ###### 注：该项目功能上并未进行任何修改，仅做重构，用于理解。
 24 | 
 25 | 
 26 |  # 相比原作者的特点：
 27 | 
 28 | - 所有参数均可在config.py中设置
 29 | - 重新整理结构，并加入大量代码注释
 30 | 
 31 |  ### 环境：
 32 | 
 33 | | python版本 | pytorch版本 |
 34 | |------------|-------------|
 35 | | 3.5        | 0.3.0       |
 36 | 
 37 | ----------
 38 | 
 39 | # 一般项目结构
 40 | 
 41 |   1、定义网络
 42 |   
 43 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/16409622.jpg) 
 44 |   
 45 |    2、封装数据集
 46 |    
 47 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/38894621.jpg)
 48 |   
 49 |    3、工具类
 50 |    
 51 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/98583532.jpg)
 52 |   
 53 |    4、主函数
 54 |    
 55 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/32257225.jpg)
 56 | 
 57 | 
 58 | ----------
 59 | 
 60 | # SSD网络结构
 61 | 
 62 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/78466722.jpg)
 63 | 
 64 | - vgg16网络结构
 65 | 
 66 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/87243004.jpg)
 67 | 
 68 | ----------
 69 | 
 70 | # 准备数据集：
 71 | 下载VOC2007和VOC2012的数据集，并在utils/config.py中的voc_data_root配置数据集的根目录。
 72 | ```
 73 | VOCdevkit
 74 | │
 75 | └───VOC2007
 76 | │   │   JPEGImages
 77 | │   │   ImageSets
 78 | │   │   Annotations
 79 | │   │   ...
 80 | │   
 81 | └───VOC2012
 82 | │   │   JPEGImages
 83 | │   │   ImageSets
 84 | │   │   Annotations
 85 | │   │   ...
 86 | ```
 87 | 
 88 | ----------
 89 | 
 90 | # Trian:
 91 | 
 92 | 作用：使用VOC2007和2012的训练集+验证集 开始训练
 93 | 
 94 | 1、开启Visdom（类似TnsorFlow的tensorboard,可视化工具）
 95 | ```
 96 | # First install Python server and client
 97 | pip install visdom
 98 | # Start the server 
 99 | python -m visdom.server
100 | ```
101 | 2、下载SSD的基础网络VGG16(去掉fc层)
102 | 
103 | 下载地址：[vgg16_reducedfc.pth](https://pan.baidu.com/s/19Iumt072GMiFGlS5lVNy1Q)
104 | 
105 | 下载完成后将其放置在checkpoint文件夹下即可。也可通过配置config.py中basenet的路径。
106 | 
107 | 3、开始训练
108 | 
109 | 在main.py中将train()注释取消，其他方法注释掉，即可运行。
110 | 
111 | ----------
112 | 
113 | # Eval:
114 | 
115 | 作用：VOC2007测试集,计算各分类AP及mAP
116 | 
117 | 1、在config.py中load_model_path配置预训练模型的路径
118 | 
119 | 预训练模型下载：[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
120 | 
121 | 
122 | 2、在main.py中将eval()注释取消，其他方法注释掉，即可运行。
123 | 
124 | ----------
125 | 
126 | # Test:
127 | 
128 | 功能：VOC2007测试集，将预测结果写入txt
129 | 
130 | 1、在config.py中load_model_path配置预训练模型的路径
131 | 
132 | 预训练模型下载：[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
133 | 
134 | 2、在main.py中将test()注释取消，其他方法注释掉，即可运行。
135 | 
136 | <h4 align="center">结果</h1>
137 | <div align="center">
138 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/68841398.jpg"  width="300px" height="400px" alt="图片说明" >
139 | </div>
140 | 
141 | ----------
142 | 
143 | # Predict:
144 | 
145 | 功能：可视化一张预测图片
146 | 
147 | 1、在config.py中load_model_path配置预训练模型的路径
148 | 
149 | 预训练模型下载：[ssd300_VOC_100000.pth](https://pan.baidu.com/s/1hrJo__owbF3ufepwJJ0uzA)
150 | 2、在main.py中将predict()注释取消，其他方法注释掉，即可运行。
151 | 
152 | 
153 | <h4 align="center">原图</h4>
154 | <div align="center">
155 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/88568756.jpg"  width="500px" height="300px" alt="图片说明" >
156 | </div>
157 | 
158 | 
159 | <h4 align="center">预处理之后的图像</h4>
160 | <div align="center">
161 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/33336596.jpg"  width="500px" height="300px" alt="图片说明" >
162 | </div>
163 | 
164 | 
165 | 
166 | <h4 align="center">预测结果</h4>
167 | <div align="center">
168 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/48601434.jpg"  width="500px" height="300px"     alt="图片说明" >
169 | </div>
170 | 
171 | ----------
172 | 
173 | # 关于作者
174 | 
175 | - 原作者 [Max deGroot](https://github.com/amdegroot)、[Ellis Brown](https://github.com/ellisbrown)
176 | 
177 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)


--------------------------------------------------------------------------------
/SSD_pytorch/temp/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/temp/test.png


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentations import SSDAugmentation


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/augmentations.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/config.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/eval_untils.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/timer.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/SSD_pytorch/utils/__pycache__/visualize.cpython-35.pyc


--------------------------------------------------------------------------------
/SSD_pytorch/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | # 设置默认参数
 4 | import os.path
 5 | class DefaultConfig():
 6 |     env = 'SSD_'  # visdom 环境的名字
 7 |     visdom=True # 是否可视化
 8 |     # 目前支持的网络
 9 |     model = 'vgg16'
10 | 
11 | 
12 |     voc_data_root='/home/bobo/data/VOCdevkit/'  # VOC数据集根目录,该文件夹下有两个子文件夹。一个叫VOC2007,一个叫VOC2012
13 | 
14 |     # 基础网络，即特征提取网络（去掉全连接的预训练模型vgg16）
15 |     basenet='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/vgg16_reducedfc.pth'  #应为全路径 预训练好的去掉全连接层的vgg16模型
16 |     batch_size = 32  # 训练集的batch size
17 |     start_iter=0  #训练从第几个item开始
18 |     num_workers = 4  # 加载数据时的线程数
19 |     use_gpu = True  # user GPU or not
20 |     lr = 0.001  # 初始的学习率
21 |     momentum=0.9 #优化器的动量值
22 |     weight_decay=5e-4 #随机梯度下降SGD的权重衰减
23 |     gamma=0.1  # Gamma update for SGD  学习率调整参数
24 | 
25 |     checkpoint_root ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/' #保存模型的目录
26 |     # load_model_path = None  # 加载预训练的模型的路径，为None代表不加载
27 |     load_model_path ='/home/bobo/windowsPycharmProject/SSD_pytorch/checkpoint/ssd300_COCO_100000.pth'
28 |     # load_model_path='C:\\Users\\Administrator\\Desktop\\ssd300_COCO_10000.pth'
29 | 
30 | 
31 |     # gets home dir cross platform
32 |     HOME = os.path.expanduser("~")
33 |     # 使边界框漂亮
34 |     COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
35 |               (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
36 |     MEANS = (104, 117, 123)
37 |     # SSD300 配置
38 |     voc = {
39 |         'num_classes': 21,  # 分类类别20+背景1
40 |         'lr_steps': (80000, 100000, 120000),
41 |         'max_iter': 120000,  # 迭代次数
42 |         'feature_maps': [38, 19, 10, 5, 3, 1],
43 |         'min_dim': 300,  # 当前SSD300只支持大小300×300的数据集训练
44 |         'steps': [8, 16, 32, 64, 100, 300],  # 感受野，相对于原图缩小的倍数
45 |         'min_sizes': [30, 60, 111, 162, 213, 264],
46 |         'max_sizes': [60, 111, 162, 213, 264, 315],
47 |         'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
48 |         'variance': [0.1, 0.2],  # 方差
49 |         'clip': True,
50 |         'name': 'VOC',
51 |     }
52 | 
53 |     # 验证
54 |     confidence_threshold=0.01   # 检测置信度阈值  or 0.05
55 |     top_k=5           # 进一步限制要解析的预测数量
56 |     cleanup=True               # 清除并删除eval后的结果文件
57 |     temp= '/home/bobo/windowsPycharmProject/SSD_pytorch/temp'   #保存验证的临时文件
58 |     annopath = os.path.join(voc_data_root, 'VOC2007', 'Annotations', '%s.xml')
59 |     imgpath = os.path.join(voc_data_root, 'VOC2007', 'JPEGImages', '%s.jpg')
60 |     imgsetpath = os.path.join(voc_data_root, 'VOC2007', 'ImageSets',
61 |                               'Main', '{:s}.txt')
62 | 
63 |     #测试
64 |     temp_test='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/' # 保存测试集（VOC2007测试集）的网络预测结果
65 | 
66 |     #预测，可视化一张预测图片
67 |     test_img='/home/bobo/windowsPycharmProject/SSD_pytorch/temp/test.png'
68 | 
69 | 
70 | #初始化该类的一个对象
71 | opt=DefaultConfig()


--------------------------------------------------------------------------------
/SSD_pytorch/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | class Timer(object):
 3 |     """A simple timer."""
 4 |     def __init__(self):
 5 |         self.total_time = 0.
 6 |         self.calls = 0
 7 |         self.start_time = 0.
 8 |         self.diff = 0.
 9 |         self.average_time = 0.
10 | 
11 |     def tic(self):
12 |         # using time.time instead of time.clock because time time.clock
13 |         # does not normalize for multithreading
14 |         self.start_time = time.time()
15 | 
16 |     def toc(self, average=True):
17 |         self.diff = time.time() - self.start_time
18 |         self.total_time += self.diff
19 |         self.calls += 1
20 |         self.average_time = self.total_time / self.calls
21 |         if average:
22 |             return self.average_time
23 |         else:
24 |             return self.diff


--------------------------------------------------------------------------------
/SSD_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding:utf-8 -*-
  3 | # power by Mr.Li
  4 | import visdom
  5 | import time
  6 | import numpy as np
  7 | import torch
  8 | class Visualizer(object):
  9 |     '''
 10 |     封装了visdom的基本操作，但是你仍然可以通过`self.vis.function`
 11 |     调用原生的visdom接口
 12 |     '''
 13 |     def __init__(self, env='default', **kwargs):
 14 |         self.vis = visdom.Visdom(env=env, **kwargs)
 15 |         # 画的第几个数，相当于横座标
 16 |         # 保存（’loss',23） 即loss的第23个点
 17 |         self.index = {}
 18 |         self.log_text = ''
 19 |     def reinit(self,env='default',**kwargs):
 20 |         '''
 21 |         修改visdom的配置  重新初始化
 22 |         '''
 23 |         self.vis = visdom.Visdom(env=env,**kwargs)
 24 |         return self
 25 |     def plot_many(self, d):
 26 |         '''
 27 |         一次plot多个损失图形
 28 |         @params d: dict (name,value) i.e. ('loss',0.11)
 29 |         '''
 30 |         for k, v in d.items():
 31 |             self.plot(k, v)
 32 |     def img_many(self, d):
 33 |         '''
 34 |      一次画多个图像
 35 |         '''
 36 |         for k, v in d.items():
 37 |             self.img(k, v)
 38 |     def plot(self, name, y,**kwargs):
 39 |         '''
 40 |         self.plot('loss',1.00)
 41 |         '''
 42 |         #得到下标序号
 43 |         x = self.index.get(name, 0)
 44 |         self.vis.line(Y=np.array([y]), X=np.array([x]),
 45 |                       win=name,#窗口名
 46 |                       opts=dict(title=name),
 47 |                       update=None if x == 0 else 'append', #按照append的画图形
 48 |                       **kwargs
 49 |                       )
 50 |         #下标累加1
 51 |         self.index[name] = x + 1
 52 |     def img(self, name, img_,**kwargs):
 53 |         '''
 54 |         self.img('input_img',t.Tensor(64,64))
 55 |         self.img('input_imgs',t.Tensor(3,64,64))
 56 |         self.img('input_imgs',t.Tensor(100,1,64,64))
 57 |         self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
 58 | 
 59 |         ！！！don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~！！！
 60 |         '''
 61 |         self.vis.images(img_.cpu().numpy(),
 62 |                        win=name,
 63 |                        opts=dict(title=name),
 64 |                        **kwargs
 65 |                        )
 66 |     def log(self,info,win='log_text'):
 67 |         '''
 68 |         self.log({'loss':1,'lr':0.0001})
 69 |         打印日志
 70 |         '''
 71 | 
 72 |         self.log_text += ('[{time}] {info} <br>'.format(
 73 |                             time=time.strftime('%m%d_%H%M%S'),\
 74 |                             info=info))
 75 |         self.vis.text(self.log_text,win)
 76 |     def __getattr__(self, name):
 77 |         return getattr(self.vis, name)
 78 | 
 79 |     def create_vis_plot(self,_xlabel, _ylabel, _title, _legend):
 80 |         viz = visdom.Visdom()
 81 |         '''
 82 |         新增可视化图形
 83 |         '''
 84 |         return viz.line(
 85 |             X=torch.zeros((1,)).cpu(),
 86 |             Y=torch.zeros((1, 3)).cpu(),
 87 |             opts=dict(
 88 |                 xlabel=_xlabel,
 89 |                 ylabel=_ylabel,
 90 |                 title=_title,
 91 |                 legend=_legend
 92 |             )
 93 |         )
 94 | 
 95 |     def update_vis_plot(self,iteration, loc, conf, window1, window2, update_type,
 96 |                         epoch_size=1):
 97 |         '''
 98 |         可视化图形里更新数据
 99 |         '''
100 |         viz = visdom.Visdom()
101 |         viz.line(
102 |             X=torch.ones((1, 3)).cpu() * iteration,
103 |             Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu() / epoch_size,
104 |             win=window1,
105 |             update=update_type
106 |         )
107 |         # initialize epoch plot on first iteration
108 | 
109 |         if iteration == 0:
110 |             viz.line(
111 |                 X=torch.zeros((1, 3)).cpu(),
112 |                 Y=torch.Tensor([loc, conf, loc + conf]).unsqueeze(0).cpu(),
113 |                 win=window2,
114 |                 update=True
115 |             )


--------------------------------------------------------------------------------
/UNet_pytorch/dice_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function, Variable
 3 | 
 4 | class DiceCoeff(Function):
 5 |     """Dice coeff for individual examples"""
 6 | 
 7 |     def forward(self, input, target):
 8 |         self.save_for_backward(input, target)
 9 |         eps = 0.0001
10 |         self.inter = torch.dot(input.view(-1), target.view(-1))
11 |         self.union = torch.sum(input) + torch.sum(target) + eps
12 | 
13 |         t = (2 * self.inter.float() + eps) / self.union.float()
14 |         return t
15 | 
16 |     # This function has only a single output, so it gets only one gradient
17 |     def backward(self, grad_output):
18 | 
19 |         input, target = self.saved_variables
20 |         grad_input = grad_target = None
21 | 
22 |         if self.needs_input_grad[0]:
23 |             grad_input = grad_output * 2 * (target * self.union - self.inter) \
24 |                          / self.union * self.union
25 |         if self.needs_input_grad[1]:
26 |             grad_target = None
27 | 
28 |         return grad_input, grad_target
29 | 
30 | 
31 | def dice_coeff(input, target):
32 |     """Dice coeff for batches"""
33 |     if input.is_cuda:
34 |         s = torch.FloatTensor(1).cuda().zero_()
35 |     else:
36 |         s = torch.FloatTensor(1).zero_()
37 | 
38 |     for i, c in enumerate(zip(input, target)):
39 |         s = s + DiceCoeff().forward(c[0], c[1])
40 | 
41 |     return s / (i + 1)
42 | 


--------------------------------------------------------------------------------
/UNet_pytorch/eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from dice_loss import dice_coeff
 5 | 
 6 | 
 7 | def eval_net(net, dataset, gpu=False):
 8 |     '''
 9 |     :param net: 训练的网络
10 |     :param dataset: 验证集
11 |     '''
12 |     """Evaluation without the densecrf with the dice coefficient"""
13 |     tot = 0
14 |     for i, b in enumerate(dataset):
15 |         img = b[0]
16 |         true_mask = b[1]
17 | 
18 |         img = torch.from_numpy(img).unsqueeze(0)
19 |         true_mask = torch.from_numpy(true_mask).unsqueeze(0)
20 | 
21 |         if gpu:
22 |             img = img.cuda()
23 |             true_mask = true_mask.cuda()
24 | 
25 |         mask_pred = net(img)[0]
26 |         mask_pred = (F.sigmoid(mask_pred) > 0.5).float()
27 |         # 评价函数：Dice系数   Dice距离用于度量两个集合的相似性
28 |         tot += dice_coeff(mask_pred, true_mask).item()
29 |     return tot / i
30 | 


--------------------------------------------------------------------------------
/UNet_pytorch/predict.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | 
  8 | from PIL import Image
  9 | 
 10 | from unet import UNet
 11 | from utils import resize_and_crop, normalize, split_img_into_squares, hwc_to_chw, merge_masks, dense_crf
 12 | from utils import plot_img_and_mask
 13 | 
 14 | from torchvision import transforms
 15 | 
 16 | 
 17 | def predict_img(net,
 18 |                 full_img,
 19 |                 scale_factor=0.5,
 20 |                 out_threshold=0.5,
 21 |                 use_dense_crf=True,
 22 |                 use_gpu=False):
 23 |     img_height = full_img.size[1]
 24 |     img_width = full_img.size[0]
 25 | 
 26 |     img = resize_and_crop(full_img, scale=scale_factor)
 27 |     img = normalize(img)
 28 | 
 29 |     left_square, right_square = split_img_into_squares(img)
 30 | 
 31 |     left_square = hwc_to_chw(left_square)
 32 |     right_square = hwc_to_chw(right_square)
 33 | 
 34 |     X_left = torch.from_numpy(left_square).unsqueeze(0)
 35 |     X_right = torch.from_numpy(right_square).unsqueeze(0)
 36 | 
 37 |     if use_gpu:
 38 |         X_left = X_left.cuda()
 39 |         X_right = X_right.cuda()
 40 | 
 41 |     with torch.no_grad():
 42 |         output_left = net(X_left)
 43 |         output_right = net(X_right)
 44 | 
 45 |         left_probs = F.sigmoid(output_left).squeeze(0)
 46 |         right_probs = F.sigmoid(output_right).squeeze(0)
 47 | 
 48 |         tf = transforms.Compose(
 49 |             [
 50 |                 transforms.ToPILImage(),
 51 |                 transforms.Resize(img_height),
 52 |                 transforms.ToTensor()
 53 |             ]
 54 |         )
 55 | 
 56 |         left_probs = tf(left_probs.cpu())
 57 |         right_probs = tf(right_probs.cpu())
 58 | 
 59 |         left_mask_np = left_probs.squeeze().cpu().numpy()
 60 |         right_mask_np = right_probs.squeeze().cpu().numpy()
 61 | 
 62 |     full_mask = merge_masks(left_mask_np, right_mask_np, img_width)
 63 | 
 64 |     if use_dense_crf:
 65 |         full_mask = dense_crf(np.array(full_img).astype(np.uint8), full_mask)
 66 | 
 67 |     return full_mask > out_threshold
 68 | 
 69 | 
 70 | def get_args():
 71 |     parser = argparse.ArgumentParser()
 72 |     parser.add_argument('--model', '-m', default='MODEL.pth',
 73 |                         metavar='FILE',
 74 |                         help="Specify the file in which is stored the model"
 75 |                              " (default : 'MODEL.pth')")
 76 |     parser.add_argument('--input', '-i', metavar='INPUT', nargs='+',
 77 |                         help='filenames of input images', required=True)
 78 | 
 79 |     parser.add_argument('--output', '-o', metavar='INPUT', nargs='+',
 80 |                         help='filenames of ouput images')
 81 |     parser.add_argument('--cpu', '-c', action='store_true',
 82 |                         help="Do not use the cuda version of the net",
 83 |                         default=False)
 84 |     parser.add_argument('--viz', '-v', action='store_true',
 85 |                         help="Visualize the images as they are processed",
 86 |                         default=False)
 87 |     parser.add_argument('--no-save', '-n', action='store_true',
 88 |                         help="Do not save the output masks",
 89 |                         default=False)
 90 |     parser.add_argument('--no-crf', '-r', action='store_true',
 91 |                         help="Do not use dense CRF postprocessing",
 92 |                         default=False)
 93 |     parser.add_argument('--mask-threshold', '-t', type=float,
 94 |                         help="Minimum probability value to consider a mask pixel white",
 95 |                         default=0.5)
 96 |     parser.add_argument('--scale', '-s', type=float,
 97 |                         help="Scale factor for the input images",
 98 |                         default=0.5)
 99 | 
100 |     return parser.parse_args()
101 | 
102 | 
103 | def get_output_filenames(args):
104 |     in_files = args.input
105 |     out_files = []
106 | 
107 |     if not args.output:
108 |         for f in in_files:
109 |             pathsplit = os.path.splitext(f)
110 |             out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1]))
111 |     elif len(in_files) != len(args.output):
112 |         print("Error : Input files and output files are not of the same length")
113 |         raise SystemExit()
114 |     else:
115 |         out_files = args.output
116 | 
117 |     return out_files
118 | 
119 | 
120 | def mask_to_image(mask):
121 |     return Image.fromarray((mask * 255).astype(np.uint8))
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     args = get_args()
126 |     in_files = args.input
127 |     out_files = get_output_filenames(args)
128 | 
129 |     net = UNet(n_channels=3, n_classes=1)
130 | 
131 |     print("Loading model {}".format(args.model))
132 | 
133 |     if not args.cpu:
134 |         print("Using CUDA version of the net, prepare your GPU !")
135 |         net.cuda()
136 |         net.load_state_dict(torch.load(args.model))
137 |     else:
138 |         net.cpu()
139 |         net.load_state_dict(torch.load(args.model, map_location='cpu'))
140 |         print("Using CPU version of the net, this may be very slow")
141 | 
142 |     print("Model loaded !")
143 | 
144 |     for i, fn in enumerate(in_files):
145 |         print("\nPredicting image {} ...".format(fn))
146 | 
147 |         img = Image.open(fn)
148 |         if img.size[0] < img.size[1]:
149 |             print("Error: image height larger than the width")
150 | 
151 |         mask = predict_img(net=net,
152 |                            full_img=img,
153 |                            scale_factor=args.scale,
154 |                            out_threshold=args.mask_threshold,
155 |                            use_dense_crf=not args.no_crf,
156 |                            use_gpu=not args.cpu)
157 | 
158 |         if args.viz:
159 |             print("Visualizing results for image {}, close to continue ...".format(fn))
160 |             plot_img_and_mask(img, mask)
161 | 
162 |         if not args.no_save:
163 |             out_fn = out_files[i]
164 |             result = mask_to_image(mask)
165 |             result.save(out_files[i])
166 | 
167 |             print("Mask saved to {}".format(out_files[i]))
168 | 


--------------------------------------------------------------------------------
/UNet_pytorch/readme.md:
--------------------------------------------------------------------------------
  1 | # U-Net网络
  2 | 
  3 | ----------
  4 | 
  5 | 该仓库基于[milesial](https://github.com/milesial)的[Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)进行的，非常感谢大佬无私的奉献。
  6 |  
  7 | 
  8 | - [原地址](https://github.com/milesial/Pytorch-UNet) 
  9 | - [原地址的加注释版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/UNet_pytorch) 
 10 | 
 11 | ----------
 12 | 
 13 |  # 目前支持：
 14 | 
 15 | - 数据集： Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q)
 16 | - 网络：U-Net
 17 | 
 18 | 
 19 |  # 相比原作者的特点：
 20 | 
 21 | - 所有参数均可在config.py中设置
 22 | - 重新整理结构，并加入大量代码注释
 23 | - loading
 24 | 
 25 | ----------
 26 | 
 27 |  - 环境：
 28 | 
 29 |     | python版本  |  pytorch版本 |
 30 |     | ----------- | ----------   |
 31 |     |  3.5  | 0.4   |
 32 |  
 33 |  - 依赖：
 34 | 
 35 |        pip install pydensecrf
 36 |       
 37 | ----------
 38 | 
 39 | # U-Net网络结构
 40 | 
 41 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/659347.jpg)
 42 | 
 43 | - ###### 原论文左侧 conv 3x3 无pad，故每次conv后feature map尺寸缩小。故与右侧feature map融合之前需要裁剪。
 44 | - ###### 该仓库左侧 conv 3x3 pad=1，故每次conv后feature map尺寸不变。故反卷积后保证尺度统一与右侧feature map融合即可。
 45 |   
 46 | 
 47 | ----------
 48 | 
 49 | # 准备数据集：
 50 | 下载Kaggle's [Carvana Image Masking Challenge](https://pan.baidu.com/s/1tQI7aQ4y9k0K3qBjCnJ53Q)数据集，并在utils/config.py中配置数据集的根目录。
 51 | ```
 52 | CarvanaImageMaskingChallenge
 53 | │
 54 | └───train
 55 | │   │   xxx.gif
 56 | │   │   ...
 57 | │   
 58 | └───train_masks
 59 | │   │   xxx.jpg
 60 | │   │   ...
 61 | ```
 62 | 
 63 | 
 64 | ----------
 65 | 
 66 | # Trian:
 67 | 
 68 | 1、在config.py中配置训练参数
 69 | 
 70 | 2、执行train.py开始训练
 71 | 
 72 | ----------
 73 | 
 74 | # Eval:
 75 | 
 76 | 每训练一轮epoch都将计算Dice距离（用于度量两个集合的相似性）
 77 | ----------
 78 | 
 79 | # Predict:
 80 | 
 81 | 功能：可视化一张预测图片
 82 | 
 83 | 1、将预训练模型放到项目根目录下
 84 | 
 85 | 预训练模型下载：[MODEL.pth](https://pan.baidu.com/s/1D_OtX16iL3aJefvOqyRWnw)
 86 | 
 87 | 2、预测单张图片
 88 | 
 89 |         python predict.py -i image.jpg -o output.jpg
 90 | 
 91 | 3、预测多张图片并显示
 92 | 
 93 |         python predict.py -i image1.jpg image2.jpg --viz --no-save
 94 | 
 95 | 
 96 | <div align="center">
 97 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/78620180.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/22328540.jpg" width="400px"  height="300px" alt="图片说明" > 
 98 | </div>
 99 | 
100 | 
101 | ----------
102 | 
103 | # 关于作者
104 | 
105 | - 原作者 [milesial](https://github.com/milesial)
106 | 
107 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)


--------------------------------------------------------------------------------
/UNet_pytorch/submit.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | 
 4 | import torch
 5 | 
 6 | from predict import predict_img
 7 | from utils import rle_encode
 8 | from unet import UNet
 9 | 
10 | 
11 | def submit(net, gpu=False):
12 |     """Used for Kaggle submission: predicts and encode all test images"""
13 |     dir = 'data/test/'
14 | 
15 |     N = len(list(os.listdir(dir)))
16 |     with open('SUBMISSION.csv', 'a') as f:
17 |         f.write('img,rle_mask\n')
18 |         for index, i in enumerate(os.listdir(dir)):
19 |             print('{}/{}'.format(index, N))
20 | 
21 |             img = Image.open(dir + i)
22 | 
23 |             mask = predict_img(net, img, gpu)
24 |             enc = rle_encode(mask)
25 |             f.write('{},{}\n'.format(i, ' '.join(map(str, enc))))
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     net = UNet(3, 1).cuda()
30 |     net.load_state_dict(torch.load('MODEL.pth'))
31 |     submit(net, True)
32 | 


--------------------------------------------------------------------------------
/UNet_pytorch/train.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.backends.cudnn as cudnn
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch import optim
 10 | 
 11 | from eval import eval_net
 12 | from unet import UNet
 13 | from utils import get_ids, split_ids, split_train_val, get_imgs_and_masks, batch
 14 | from utils.config import opt_train
 15 | 
 16 | def train_net(net,
 17 |               epochs=5,
 18 |               batch_size=1,
 19 |               lr=0.1,
 20 |               val_percent=0.05,  # 训练集：验证集= 0.95： 0.05
 21 |               save_cp=True,
 22 |               gpu=False,
 23 |               img_scale=0.5):
 24 | 
 25 |     dir_img = opt_train.dir_img
 26 |     dir_mask = opt_train.dir_mask
 27 |     dir_checkpoint = opt_train.dir_checkpoint
 28 | 
 29 |     # 得到 图片路径列表  ids为 图片名称（无后缀名）
 30 |     ids = get_ids(dir_img)
 31 |     # 得到truple元组  （无后缀名的 图片名称，序号）
 32 |     # eg:当n为2  图片名称为bobo.jpg 时, 得到（bobo,0） （bobo,1）
 33 |     # 当序号为0 时，裁剪宽度，得到左边部分图片  当序号为1 时，裁剪宽度，得到右边部分图片
 34 |     ids = split_ids(ids)
 35 |     # 打乱数据集后，按照val_percent的比例来 切分 训练集 和 验证集
 36 |     iddataset = split_train_val(ids, val_percent)
 37 | 
 38 | 
 39 |     print('''
 40 |     开始训练:
 41 |         Epochs: {}
 42 |         Batch size: {}
 43 |         Learning rate: {}
 44 |         训练集大小: {}
 45 |         验证集大小: {}
 46 |         GPU: {}
 47 |     '''.format(epochs, batch_size, lr, len(iddataset['train']),
 48 |                len(iddataset['val']), str(gpu)))
 49 | 
 50 |     #训练集大小
 51 |     N_train = len(iddataset['train'])
 52 | 
 53 |     optimizer = optim.SGD(net.parameters(),
 54 |                           lr=lr,
 55 |                           momentum=0.9,
 56 |                           weight_decay=0.0005)
 57 | 
 58 |     #二进制交叉熵
 59 |     criterion = nn.BCELoss()
 60 | 
 61 |     for epoch in range(epochs):
 62 |         print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
 63 | 
 64 |         # reset the generators
 65 |         # 每轮epoch得到 训练集  和 验证集
 66 |         train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale)
 67 |         val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale)
 68 | 
 69 | 
 70 | 
 71 | 
 72 |         # 重置epoch损失计数器
 73 |         epoch_loss = 0
 74 | 
 75 |         for i, b in enumerate(batch(train, batch_size)):
 76 |             # 得到 一个batch的 imgs tensor 及 对应真实mask值
 77 |             # 当序号为0 时，裁剪宽度，得到左边部分图片[384,384,3]   当序号为1 时，裁剪宽度，得到右边部分图片[384,190,3]
 78 |             imgs = np.array([i[0] for i in b]).astype(np.float32)
 79 |             true_masks = np.array([i[1] for i in b])
 80 | 
 81 |             # 将值转为 torch tensor
 82 |             imgs = torch.from_numpy(imgs)
 83 |             true_masks = torch.from_numpy(true_masks)
 84 | 
 85 |             # 训练数据转到GPU上
 86 |             if gpu:
 87 |                 imgs = imgs.cuda()
 88 |                 true_masks = true_masks.cuda()
 89 | 
 90 |             # 得到 网络输出的预测mask [10,1,384,384]
 91 |             masks_pred = net(imgs)
 92 |             # 经过sigmoid
 93 |             masks_probs = F.sigmoid(masks_pred)
 94 |             masks_probs_flat = masks_probs.view(-1)
 95 | 
 96 |             true_masks_flat = true_masks.view(-1)
 97 |             # 计算二进制交叉熵损失
 98 |             loss = criterion(masks_probs_flat, true_masks_flat)
 99 |             # 统计一个epoch的所有batch的loss之和，用以计算 一个epoch的 loss均值
100 |             epoch_loss += loss.item()
101 | 
102 |             # 输出 当前epoch的第几个batch  及 当前batch的loss
103 |             print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))
104 | 
105 |             # 优化器梯度清零
106 |             optimizer.zero_grad()
107 |             # 反向传播
108 |             loss.backward()
109 |             # 更新参数
110 |             optimizer.step()
111 | 
112 |         # 一轮epoch结束，该轮epoch的 loss均值
113 |         print('Epoch finished ! Loss: {}'.format(epoch_loss / i))
114 | 
115 |         # 每轮epoch之后使用验证集进行评价
116 |         if True:
117 |             # 评价函数：Dice系数   Dice距离用于度量两个集合的相似性
118 |             val_dice = eval_net(net, val, gpu)
119 |             print('Validation Dice Coeff: {}'.format(val_dice))
120 | 
121 |         # 保存模型
122 |         if save_cp:
123 |             torch.save(net.state_dict(),
124 |                        dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
125 |             print('Checkpoint {} saved !'.format(epoch + 1))
126 | 
127 | 
128 | 
129 | 
130 | 
131 | if __name__ == '__main__':
132 | 
133 |     # 获取训练参数
134 |     args = opt_train
135 |     # n_channels：输入图像的通道数   n_classes：二分类
136 |     net = UNet(n_channels=3, n_classes=1)
137 | 
138 |     # 加载预训练模型
139 |     if args.load:
140 |         net.load_state_dict(torch.load(args.load))
141 |         print('Model loaded from {}'.format(args.load))
142 | 
143 |     # 网络转移到GPU上
144 |     if args.gpu:
145 |         net.cuda()
146 |         cudnn.benchmark = True # 速度更快，但占用内存更多
147 | 
148 |     try:
149 |         train_net(net=net,
150 |                   epochs=args.epochs,
151 |                   batch_size=args.batchsize,
152 |                   lr=args.lr,
153 |                   gpu=args.gpu,
154 |                   img_scale=args.scale)
155 |     except KeyboardInterrupt:
156 |         # 当运行出错时，保存最新的模型
157 |         torch.save(net.state_dict(), 'INTERRUPTED.pth')
158 |         print('Saved interrupt')
159 |         try:
160 |             sys.exit(0)
161 |         except SystemExit:
162 |             os._exit(0)
163 | 


--------------------------------------------------------------------------------
/UNet_pytorch/unet/__init__.py:
--------------------------------------------------------------------------------
1 | from .unet_model import UNet
2 | 


--------------------------------------------------------------------------------
/UNet_pytorch/unet/unet_model.py:
--------------------------------------------------------------------------------
 1 | # full assembly of the sub-parts to form the complete net
 2 | 
 3 | from .unet_parts import *
 4 | 
 5 | class UNet(nn.Module):
 6 |     def __init__(self, n_channels, n_classes):
 7 |         super(UNet, self).__init__()
 8 |         self.inc = inconv(n_channels, 64)  # 输出层，n_channels=3 输入图像为3通道
 9 |         self.down1 = down(64, 128)
10 |         self.down2 = down(128, 256)
11 |         self.down3 = down(256, 512)
12 |         self.down4 = down(512, 512)
13 |         self.up1 = up(1024, 256)
14 |         self.up2 = up(512, 128)
15 |         self.up3 = up(256, 64)
16 |         self.up4 = up(128, 64)
17 |         # 最后一层的卷积核大小为1*1，将64通道的特征图转化为特定深度（分类数量，二分类为2）的结果
18 |         self.outc = outconv(64, n_classes) # 输出层，n_classes=1 二分类
19 | 
20 |     def forward(self, x):
21 |         x1 = self.inc(x)
22 |         x2 = self.down1(x1)
23 |         x3 = self.down2(x2)
24 |         x4 = self.down3(x3)
25 |         x5 = self.down4(x4)
26 |         x = self.up1(x5, x4)
27 |         x = self.up2(x, x3)
28 |         x = self.up3(x, x2)
29 |         x = self.up4(x, x1)
30 |         x = self.outc(x)
31 |         return x
32 | 


--------------------------------------------------------------------------------
/UNet_pytorch/unet/unet_parts.py:
--------------------------------------------------------------------------------
 1 | # sub-parts of the U-Net model
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class double_conv(nn.Module):
 9 |     '''(conv => BN => ReLU) * 2'''
10 |     def __init__(self, in_ch, out_ch):
11 |         super(double_conv, self).__init__()
12 |         self.conv = nn.Sequential(
13 |             # 每次重复中都有2个卷积层，卷积核大小均为3*3
14 |             nn.Conv2d(in_ch, out_ch, 3, padding=1),
15 |             nn.BatchNorm2d(out_ch),
16 |             nn.ReLU(inplace=True),
17 |             nn.Conv2d(out_ch, out_ch, 3, padding=1),
18 |             nn.BatchNorm2d(out_ch),
19 |             nn.ReLU(inplace=True)
20 |         )
21 | 
22 |     def forward(self, x):
23 |         x = self.conv(x)
24 |         return x
25 | 
26 | 
27 | class inconv(nn.Module):
28 |     def __init__(self, in_ch, out_ch):
29 |         # 输入层，in_ch=3输入通道数, out_ch=64输出通道数
30 |         super(inconv, self).__init__()
31 |         self.conv = double_conv(in_ch, out_ch)
32 | 
33 |     def forward(self, x):
34 |         x = self.conv(x)
35 |         return x
36 | 
37 | 
38 | class down(nn.Module):
39 |     def __init__(self, in_ch, out_ch):
40 |         super(down, self).__init__()
41 |         # 论文：它的架构是一种重复结构，每次重复中都有2个卷积层和一个pooling层，卷积层中卷积核大小均为3*3，激活函数使用ReLU
42 |         self.mpconv = nn.Sequential(
43 |             nn.MaxPool2d(2),
44 |             double_conv(in_ch, out_ch)
45 |         )
46 | 
47 |     def forward(self, x):
48 |         x = self.mpconv(x)
49 |         return x
50 | 
51 | 
52 | class up(nn.Module):
53 |     def __init__(self, in_ch, out_ch, bilinear=True):
54 |         super(up, self).__init__()
55 | 
56 |         #  would be a nice idea if the upsampling could be learned too,
57 |         #  but my machine do not have enough memory to handle all those weights
58 |         # 默认为全部为 上采样（作者因内存不足）。
59 |         # bilinear=False 改为  使用反卷积（论文方法），效果会更好？
60 | 
61 |         # 使用双线性上采样来放大输入
62 |         if bilinear:  #  batchsize=10   scale=0.3  时  占用9047M
63 |             self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
64 |         # （论文方法）二维反卷积层 反卷积层可以理解为输入的数据和卷积核的位置反转的卷积操作. 反卷积有时候也会被翻译成解卷积.
65 |         else:  # 论文方法   batchsize=10   scale=0.3  时 占用9040M
66 |             self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
67 | 
68 |         self.conv = double_conv(in_ch, out_ch)
69 | 
70 |     def forward(self, x1, x2):
71 |         x1 = self.up(x1)
72 |         diffX = x1.size()[2] - x2.size()[2]
73 |         diffY = x1.size()[3] - x2.size()[3]
74 |         x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
75 |                         diffY // 2, int(diffY / 2)))
76 |         # 深度相加
77 |         x = torch.cat([x2, x1], dim=1)
78 |         x = self.conv(x)
79 |         return x
80 | 
81 | 
82 | class outconv(nn.Module):
83 |     def __init__(self, in_ch, out_ch):
84 |         super(outconv, self).__init__()
85 |         # 最后一层的卷积核大小为1*1，将64通道的特征图转化为特定深度（分类数量，二分类为2）的结果
86 |         self.conv = nn.Conv2d(in_ch, out_ch, 1)
87 | 
88 |     def forward(self, x):
89 |         x = self.conv(x)
90 |         return x
91 | 


--------------------------------------------------------------------------------
/UNet_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .crf import *
2 | from .load import *
3 | from .utils import *
4 | from .data_vis import *
5 | 


--------------------------------------------------------------------------------
/UNet_pytorch/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | # 设置默认参数
 4 | import os.path
 5 | class DefaultConfig_train():
 6 |     epochs=5  #number of epochs
 7 |     batchsize= 10  #batch size
 8 |     lr=0.1  #learning rate
 9 |     gpu=True  #use cudas
10 |     load=False  #load file model
11 |     scale=0.3    #downscaling factor of the images  图像训练时缩小倍数       该值对内存影响较大（仓库默认0.5）
12 | 
13 |     # 数据集
14 |     dir_img = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train/'
15 |     dir_mask = '/home/bobo/data/CarvanaImageMaskingChallenge_UNet/train_masks/'
16 |     dir_checkpoint = './checkpoints/'  # 模型保存位置
17 | 
18 |     visdom=True   # 是否可视化
19 | 
20 |     env = 'U-Net'  # visdom 环境的名字
21 |     visdom = True  # 是否可视化
22 |     datesets_name='Carvana Image Masking Challenge'  # 数据集名称
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | class DefaultConfig_predict():
31 |     input='./intput.jpg'    #filenames of input images
32 |     output='./output.jpg'   #filenames of ouput images
33 |     model= './MODEL.pth'    # Specify the file in which is stored the model
34 |     cpu=False   #Do not use the cuda version of the net
35 |     scale=0.5    #Scale factor for the input images
36 |     mask_threshold=0.5   #Minimum probability value to consider a mask pixel white
37 |     no_crf=False    #Do not use dense CRF postprocessing
38 |     no_save=False   #Do not save the output masks
39 |     viz=False    #Visualize the images as they are processed
40 | #初始化该类的一个对象
41 | opt_train=DefaultConfig_train()
42 | 
43 | opt_predict=DefaultConfig_predict()
44 | 
45 | 


--------------------------------------------------------------------------------
/UNet_pytorch/utils/crf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pydensecrf.densecrf as dcrf
 3 | 
 4 | def dense_crf(img, output_probs):
 5 |     h = output_probs.shape[0]
 6 |     w = output_probs.shape[1]
 7 | 
 8 |     output_probs = np.expand_dims(output_probs, 0)
 9 |     output_probs = np.append(1 - output_probs, output_probs, axis=0)
10 | 
11 |     d = dcrf.DenseCRF2D(w, h, 2)
12 |     U = -np.log(output_probs)
13 |     U = U.reshape((2, -1))
14 |     U = np.ascontiguousarray(U)
15 |     img = np.ascontiguousarray(img)
16 | 
17 |     d.setUnaryEnergy(U)
18 | 
19 |     d.addPairwiseGaussian(sxy=20, compat=3)
20 |     d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10)
21 | 
22 |     Q = d.inference(5)
23 |     Q = np.argmax(np.array(Q), axis=0).reshape((h, w))
24 | 
25 |     return Q
26 | 


--------------------------------------------------------------------------------
/UNet_pytorch/utils/data_vis.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | def plot_img_and_mask(img, mask):
 4 |     fig = plt.figure()
 5 |     a = fig.add_subplot(1, 2, 1)
 6 |     a.set_title('Input image')
 7 |     plt.imshow(img)
 8 | 
 9 |     b = fig.add_subplot(1, 2, 2)
10 |     b.set_title('Output mask')
11 |     plt.imshow(mask)
12 |     plt.show()


--------------------------------------------------------------------------------
/UNet_pytorch/utils/load.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # load.py : utils on generators / lists of ids to transform from strings to
 3 | #           cropped images and masks
 4 | 
 5 | import os
 6 | 
 7 | import numpy as np
 8 | from PIL import Image
 9 | 
10 | from .utils import resize_and_crop, get_square, normalize, hwc_to_chw
11 | 
12 | 
13 | def get_ids(dir):
14 |     """Returns a list of the ids in the directory"""
15 |     # eg：f[:-4]是为了去掉 .jpg 后缀。结果只为 照片名称，无后缀。
16 |     return (f[:-4] for f in os.listdir(dir))
17 | 
18 | 
19 | def split_ids(ids, n=2):
20 |     """Split each id in n, creating n tuples (id, k) for each id"""
21 |     return ((id, i) for i in range(n) for id in ids)
22 | 
23 | 
24 | def to_cropped_imgs(ids, dir, suffix, scale):
25 |     """From a list of tuples, returns the correct cropped img"""
26 |     #返回  tuples，（img的resize后的tensor,序号）
27 |     for id, pos in ids:
28 |         im = resize_and_crop(Image.open(dir + id + suffix), scale=scale)
29 |         # get_square: 当pos为0 时，裁剪宽度，得到左边部分图片[384,384,3]   当pos为1 时，裁剪宽度，得到右边部分图片[384,190,3]
30 |         yield get_square(im, pos)
31 | 
32 | def get_imgs_and_masks(ids, dir_img, dir_mask, scale):
33 |     '''
34 |     :param ids:
35 |     :param dir_img: 图片路径
36 |     :param dir_mask: mask图片路径
37 |     :param scale: 图像训练时缩小倍数
38 |     :return:all the couples (img, mask)
39 |     '''
40 |     """Return all the couples (img, mask)"""
41 | 
42 |     # 读取图片，并按照scale进行resize
43 |     imgs = to_cropped_imgs(ids, dir_img, '.jpg', scale)
44 | 
45 |     # need to transform from HWC to CHW  转化（高H、宽W、通道C）为（通道C、高H、宽W）
46 |     imgs_switched = map(hwc_to_chw, imgs)
47 |     # 归一化（值转化到0-1之间）
48 |     imgs_normalized = map(normalize, imgs_switched)
49 | 
50 |     masks = to_cropped_imgs(ids, dir_mask, '_mask.gif', scale)
51 |     # list( rezise且经过转化和归一化后的图像tensor,resize后的mask图像tensor)
52 |     return zip(imgs_normalized, masks)
53 | 
54 | 
55 | def get_full_img_and_mask(id, dir_img, dir_mask):
56 |     im = Image.open(dir_img + id + '.jpg')
57 |     mask = Image.open(dir_mask + id + '_mask.gif')
58 |     return np.array(im), np.array(mask)
59 | 


--------------------------------------------------------------------------------
/UNet_pytorch/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_square(img, pos):
 6 |     """Extract a left or a right square from ndarray shape : (H, W, C))"""
 7 |     h = img.shape[0]
 8 |     if pos == 0:
 9 |         return img[:, :h]
10 |     else:
11 |         return img[:, -h:]
12 | 
13 | def split_img_into_squares(img):
14 |     return get_square(img, 0), get_square(img, 1)
15 | 
16 | def hwc_to_chw(img):
17 |     return np.transpose(img, axes=[2, 0, 1])
18 | 
19 | def resize_and_crop(pilimg, scale=0.5, final_height=None):
20 |     w = pilimg.size[0]
21 |     h = pilimg.size[1]
22 |     newW = int(w * scale)
23 |     newH = int(h * scale)
24 | 
25 |     if not final_height:
26 |         diff = 0
27 |     else:
28 |         diff = newH - final_height
29 | 
30 |     img = pilimg.resize((newW, newH))
31 |     # crop 从图像中提取出某个矩形大小的图像。它接收一个四元素的元组作为参数，(起始点的横坐标，起始点的纵坐标，宽度，高度），坐标系统的原点（0, 0）是左上角。
32 |     img = img.crop((0, diff // 2, newW, newH - diff // 2))
33 |     return np.array(img, dtype=np.float32)
34 | 
35 | def batch(iterable, batch_size):
36 |     """Yields lists by batch"""
37 |     b = []
38 |     for i, t in enumerate(iterable):
39 |         b.append(t)
40 |         if (i + 1) % batch_size == 0:
41 |             yield b
42 |             b = []
43 | 
44 |     if len(b) > 0:
45 |         yield b
46 | 
47 | def split_train_val(dataset, val_percent=0.05):
48 |     dataset = list(dataset)
49 |     length = len(dataset)
50 |     n = int(length * val_percent)
51 |     random.shuffle(dataset)
52 |     return {'train': dataset[:-n], 'val': dataset[-n:]}
53 | 
54 | 
55 | def normalize(x):
56 |     return x / 255
57 | 
58 | def merge_masks(img1, img2, full_w):
59 |     h = img1.shape[0]
60 | 
61 |     new = np.zeros((h, full_w), np.float32)
62 |     new[:, :full_w // 2 + 1] = img1[:, :full_w // 2 + 1]
63 |     new[:, full_w // 2 + 1:] = img2[:, -(full_w // 2 - 1):]
64 | 
65 |     return new
66 | 
67 | 
68 | # credits to https://stackoverflow.com/users/6076729/manuel-lagunas
69 | def rle_encode(mask_image):
70 |     pixels = mask_image.flatten()
71 |     # We avoid issues with '1' at the start or end (at the corners of
72 |     # the original image) by setting those pixels to '0' explicitly.
73 |     # We do not expect these to be non-zero for an accurate mask,
74 |     # so this should not harm the score.
75 |     pixels[0] = 0
76 |     pixels[-1] = 0
77 |     runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
78 |     runs[1::2] = runs[1::2] - runs[:-1:2]
79 |     return runs
80 | 


--------------------------------------------------------------------------------
/Yolov1_pytorch/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/Yolov1_pytorch/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | # 设置默认参数
 4 | class DefaultConfig():
 5 |     env = 'YOLOv1'  # visdom 环境的名字
 6 |     # model = 'NetWork'  # 使用的模型，名字必须与models/__init__.py中的名字一致
 7 |     file_root = '/home/zhuhui/data/VOCdevkit/VOC2012/JPEGImages/'  #VOC2012的训练集
 8 |     test_root = '/home/zhuhui/data/VOCdevkit/VOC2007/JPEGImages/'   #VOC2007的测试集
 9 |     train_Annotations = '/home/zhuhui/data/VOCdevkit/VOC2012/Annotations/'
10 |     voc_2007test='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2007test.txt'
11 |     voc_2012train='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/data/voc2012train.txt'
12 | 
13 |     test_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/a.jpg'
14 |     result_img_dir='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/testImgs/result_a.jpg'
15 | 
16 | 
17 | 
18 |     batch_size = 32  # batch size
19 |     use_gpu = True  # user GPU or not
20 |     num_workers = 4  # how many workers for loading data  加载数据时的线程
21 |     print_freq = 20  # print info every N batch
22 | 
23 |     # load_model_path =None  # 加载预训练的模型的路径，为None代表不加载
24 |     best_test_loss_model_path= '/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_val_best.pth'
25 |     current_epoch_model_path='/home/bobo/PycharmProjects/torchProjectss/YOLOv1ByBobo/checkpoint/yolo_bobo.pth'
26 |     load_model_path = None  # 加载预训练的模型的路径，为None代表不加载
27 |     num_epochs = 120   #训练的epoch次数
28 |     learning_rate = 0.001  # initial learning rate
29 |     lr_decay = 0.5  # when val_loss increase, lr = lr*lr_decay
30 |     momentum=0.95
31 |     weight_decay =5e-4  # 损失函数
32 |     # VOC的类别
33 |     VOC_CLASSES = (  # always index 0
34 |         'aeroplane', 'bicycle', 'bird', 'boat',
35 |         'bottle', 'bus', 'car', 'cat', 'chair',
36 |         'cow', 'diningtable', 'dog', 'horse',
37 |         'motorbike', 'person', 'pottedplant',
38 |         'sheep', 'sofa', 'train', 'tvmonitor')
39 | 
40 | 
41 |  #初始化该类的一个对象
42 | opt=DefaultConfig()


--------------------------------------------------------------------------------
/Yolov1_pytorch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/data/__init__.py


--------------------------------------------------------------------------------
/Yolov1_pytorch/data/xml_2_txt.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import os
 3 | from config import opt
 4 | 
 5 | def parse_rec(filename):
 6 |     """
 7 |     Parse a PASCAL VOC xml file
 8 |     解析一个 PASCAL VOC xml file
 9 |     将数据集从xml解析为txt  用于生成voc2007test.txt等
10 |     """
11 |     tree = ET.parse(filename)
12 |     objects = []
13 |     # 遍历一张图中的所有物体
14 |     for obj in tree.findall('object'):
15 |         obj_struct = {}
16 |         obj_struct['name'] = obj.find('name').text
17 |         #obj_struct['pose'] = obj.find('pose').text
18 |         #obj_struct['truncated'] = int(obj.find('truncated').text)
19 |         #obj_struct['difficult'] = int(obj.find('difficult').text)
20 |         bbox = obj.find('bndbox')
21 |         # 从原图左上角开始为原点，向右为x轴，向下为y轴。左上角（xmin，ymin）和右下角(xmax,ymax)
22 |         obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
23 |                               int(float(bbox.find('ymin').text)),
24 |                               int(float(bbox.find('xmax').text)),
25 |                               int(float(bbox.find('ymax').text))]
26 |         objects.append(obj_struct)
27 | 
28 |     return objects
29 | 
30 | # 新建一个名为voc2012train的txt文件，准备写入数据
31 | txt_file = open('data/voc2012train.txt','w')
32 | Annotations = opt.train_Annotations
33 | xml_files = os.listdir(Annotations)
34 | 
35 | # 遍历所有的xml
36 | for xml_file in xml_files:
37 |     image_path = xml_file.split('.')[0] + '.jpg'
38 |     # txt 写入图像名字   非完整路径
39 |     txt_file.write(image_path+' ')
40 |     results = parse_rec(Annotations + xml_file)
41 |     num_obj = len(results)
42 |     # txt写入  一张图中的物体总数
43 |     txt_file.write(str(num_obj)+' ')
44 |     # 遍历一张图片中的所有物体
45 |     for result in results:
46 |         class_name = result['name']
47 |         bbox = result['bbox']
48 |         class_name = opt.VOC_CLASSES.index(class_name)
49 |         # txt写入  bbox的坐标 以及  每个物体对应的类的序号
50 |         txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ')
51 |     txt_file.write('\n')
52 |     #最后格式:图像名（1个值）   物体总数（1个值）    bbox坐标（4个值）   物体对应的类的序号（1个值）
53 | 
54 | txt_file.close()


--------------------------------------------------------------------------------
/Yolov1_pytorch/main_resnet.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torchvision.transforms as transforms
  8 | from torch.autograd import Variable
  9 | from torch.utils.data import DataLoader
 10 | from tqdm import tqdm
 11 | 
 12 | from config import opt
 13 | from data.dataset import yoloDataset
 14 | from models.resnet import  resnet152_bo,resnet152
 15 | from utils.visualize import Visualizer
 16 | from utils.yoloLoss import yoloLoss
 17 | from utils.predictUtils import predict_result
 18 | from utils.predictUtils import voc_eval
 19 | from utils.predictUtils import  voc_ap
 20 | 
 21 | 
 22 | def train():
 23 |     vis=Visualizer(opt.env)
 24 |     # 网络部分======================================================开始
 25 |     # True则返回预训练好的resnet152_bo模型
 26 |     net=resnet152_bo(resnet152(pretrained=True))
 27 |     # 将模型加载到内存中（CPU）
 28 |     if opt.load_model_path:
 29 |         net.load_state_dict(torch.load(opt.load_model_path,map_location=lambda  storage,loc:storage))
 30 |     # 再将模型转移到GPU上
 31 |     if opt.use_gpu:
 32 |         net.cuda()
 33 |     # 输出网络结构
 34 |     print(net)
 35 |     print('加载好预先训练好的模型')
 36 |     # 将模型调整为训练模式
 37 |     net.train()
 38 |     # 网络部分======================================================结束
 39 | 
 40 |     # 加载数据部分====================================================开始
 41 |     # 自定义封装数据集
 42 |     train_dataset = yoloDataset(root=opt.file_root, list_file=opt.voc_2012train, train=True, transform=[transforms.ToTensor()])
 43 |     # 数据集加载器  shuffle：打乱顺序    num_workers：线程数
 44 |     train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4)
 45 |     test_dataset = yoloDataset(root=opt.test_root, list_file=opt.voc_2007test, train=False, transform=[transforms.ToTensor()])
 46 |     test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4)
 47 |     # 加载数据部分====================================================结束
 48 | 
 49 |     #自定义的损失函数  7代表将图像分为7x7的网格   2代表一个网格预测两个框   5代表 λcoord  更重视8维的坐标预测     0.5代表没有object的bbox的confidence loss
 50 |     criterion = yoloLoss(7, 2, 5, 0.5)
 51 |     learning_rate=opt.learning_rate
 52 |     # 优化器
 53 |     optimizer = torch.optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum, weight_decay=opt.weight_decay)
 54 |     #optimizer = torch.optim.Adam(net.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay)
 55 |     print('训练集有 %d 张图像' % (len(train_dataset)))
 56 |     print('一个batch的大小为 %d' % (opt.batch_size))
 57 |     # 将训练过程的信息写入log文件中
 58 |     logfile = open('log/log.txt', 'w')
 59 |     # inf为正无穷大
 60 |     best_test_loss = np.inf
 61 | 
 62 |     for epoch in range(opt.num_epochs):
 63 |         if epoch == 1:
 64 |             learning_rate = 0.0005
 65 |         if epoch == 2:
 66 |             learning_rate = 0.00075
 67 |         if epoch == 3:
 68 |             learning_rate = 0.001
 69 |         if epoch == 80:
 70 |             learning_rate = 0.0001
 71 |         if epoch == 100:
 72 |             learning_rate = 0.00001
 73 |         for param_group in optimizer.param_groups:
 74 |             param_group['lr'] = learning_rate
 75 |         # 第几次epoch  及 当前epoch的学习率
 76 |         print('\n\n当前的epoch为 %d / %d' % (epoch + 1, opt.num_epochs))
 77 |         print('当前epoch的学习率: {}'.format(learning_rate))
 78 | 
 79 |         # 每轮epoch的总loss
 80 |         total_loss = 0.
 81 |         # 开始训练
 82 |         for i, (images, target) in enumerate(train_loader):
 83 |             images = Variable(images)
 84 |             target = Variable(target)
 85 |             if opt.use_gpu:
 86 |                 images, target = images.cuda(), target.cuda()
 87 |             # 前向传播，得到预测值
 88 |             pred = net(images)
 89 |             # 计算损失  yoloLoss继承nn.Module，调用方法名即自动进行前向传播，执行forward方法
 90 |             loss = criterion(pred, target)
 91 |             total_loss += loss.data[0]
 92 |             # 优化器梯度清零
 93 |             optimizer.zero_grad()
 94 |             #loss反向传播
 95 |             loss.backward()
 96 |             # 更新参数
 97 |             optimizer.step()
 98 |             if (i + 1) % opt.print_freq == 0:
 99 |                 print('在训练集上：当前epoch为 [%d/%d], Iter [%d/%d] 当前batch损失为: %.4f, 当前epoch到目前为止平均损失为: %.4f'
100 |                       % (epoch + 1, opt.num_epochs, i + 1, len(train_loader), loss.data[0], total_loss / (i + 1)))
101 |                 # 画出训练集的平均损失
102 |                 vis.plot_train_val(loss_train=total_loss / (i + 1))
103 |         # 保存最新的模型
104 |         torch.save(net.state_dict(),opt.current_epoch_model_path)
105 |         vis.log("epoch:{epoch},lr:{lr}".format(
106 |             epoch=epoch, lr=learning_rate))
107 | 
108 |         # =========================================================看到此
109 |         # 一次epoch验证
110 |         validation_loss = 0.0
111 |         # 模型调整为验证模式
112 |         net.eval()
113 |         # 每轮epoch之后用VOC2007测试集进行验证
114 |         for i, (images, target) in enumerate(test_loader):
115 |             images = Variable(images, volatile=True)
116 |             target = Variable(target, volatile=True)
117 |             if opt.use_gpu:
118 |                 images, target = images.cuda(), target.cuda()
119 |             # 前向传播得到预测值
120 |             pred = net(images)
121 |             # loss
122 |             loss = criterion(pred, target)
123 |             validation_loss += loss.data[0]
124 |         # 计算在VOC2007测试集上的平均损失
125 |         validation_loss /= len(test_loader)
126 |         # 画出验证集的平均损失
127 |         vis.plot_train_val(loss_val=validation_loss)
128 |         # 训练模型的目标是 在验证集上的loss最小
129 |         # 保存到目前为止 在验证集上的loss最小 的模型
130 |         if best_test_loss > validation_loss:
131 |             best_test_loss = validation_loss
132 |             print('当前得到最好的验证集的平均损失为  %.5f' % best_test_loss)
133 |             torch.save(net.state_dict(),opt.best_test_loss_model_path)
134 |         # 将当前epoch的参数写入log文件中
135 |         logfile.writelines(str(epoch) + '\t' + str(validation_loss) + '\n')
136 |         logfile.flush()
137 | 
138 | def predict():
139 |     # fasle 返回 未训练的模型
140 |     predict_model = resnet152_bo(resnet152(pretrained=True))
141 | 
142 |     predict_model.load_state_dict(torch.load(opt.load_model_path,map_location=lambda  storage,loc:storage))
143 |     # 模型改为预测模式
144 |     predict_model.eval()
145 |     # 如果GPU可用，加载到GPU
146 |     if opt.use_gpu:
147 |         predict_model.cuda()
148 |     # 测试集照片地址
149 |     test_img_dir = opt.test_img_dir
150 |     image = cv2.imread(test_img_dir)
151 |     # result中内容为  左上角坐标、右下角坐标、类别名、输入的测试图地址、预测类别的可能性
152 |     result = predict_result(predict_model, test_img_dir)
153 |     for left_up, right_bottom, class_name, _, prob in result:
154 |         # 将预测框添加到测试图片中
155 |         cv2.rectangle(image, left_up, right_bottom, (0, 255, 0), 2)
156 |         # 预测框的左上角写入 所属类别
157 |         cv2.putText(image, class_name, left_up, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA)
158 |         print(prob)
159 |     # 将测试结果写入
160 |     cv2.imwrite(opt.result_img_dir,image)
161 | 
162 | 
163 | 
164 | # 主函数
165 | if __name__ == '__main__':
166 | 
167 |     # 命令行工具
168 |     import fire
169 |     fire.Fire()
170 | 
171 |     train()
172 |     # predict()
173 | 
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/Yolov1_pytorch/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/models/__init__.py


--------------------------------------------------------------------------------
/Yolov1_pytorch/models/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.utils.model_zoo as model_zoo
 3 | import math
 4 | import torch.nn.functional as F
 5 | from  torchvision.models  import resnet152
 6 | 
 7 | class resnet152_bo(nn.Module):
 8 | 
 9 |     def __init__(self, features, num_classes=1000):
10 |         super(resnet152_bo, self).__init__()
11 |         model=resnet152()
12 |         # 先不修改网络结构，先试试只修改最后一个输出层参数
13 |         # #取掉model的后两层(去掉最后的最大池化层和全连接层)
14 |         self.features=nn.Sequential(*list(model.children())[:-2])
15 |         self.classifier=nn.Sequential(
16 |             nn.Linear(2048 * 7 * 7, 4096),
17 |             nn.ReLU(True),
18 |             nn.Dropout(),
19 |             # 取消一层全连接层
20 |             # nn.Linear(4096, 4096),
21 |             # nn.ReLU(True),
22 |             # nn.Dropout(),
23 |             # 最后一层修改为1470   即为1470代表一张图的信息（1470=7x7x30）
24 |             nn.Linear(4096, 1470),
25 |         )
26 |         # model.fc = nn.Linear(2048, 1470)
27 |         # self.resnet152_bo=model
28 |         # 只修改了线形层  所以只给线形层初始化权重
29 |         self._initialize_weights()
30 | 
31 |     def _initialize_weights(self):
32 |         for m in self.modules():
33 |             # 只修改了线形层  所以只给线形层初始化权重
34 |             if isinstance(m, nn.Linear):
35 |                 m.weight.data.normal_(0, 0.01)
36 |                 m.bias.data.zero_()
37 |     def forward(self, x):
38 |         x = self.features(x)
39 |         x = x.view(x.size(0), -1)
40 |         x = self.classifier(x)
41 |         # 得到输出，经过sigmoid 归一化到0-1之间
42 |         x = F.sigmoid(x)
43 |         # 再改变形状，返回（xxx,7,7,30）  xxx代表几张照片，（7,7,30）代表一张照片的信息
44 |         x = x.view(-1,7,7,30)
45 |         return x
46 | 
47 | 
48 | 
49 | def test():
50 |     '''
51 |     测试使用
52 |     '''
53 |     import torch
54 |     from torch.autograd import Variable
55 | 
56 |     model = resnet152_bo(resnet152(pretrained=True))
57 |     img = torch.rand(2,3,224,224)
58 |     img = Variable(img)
59 |     output = model(img)
60 |     output = output.view(-1, 7, 7, 30)
61 |     print(output.size())
62 | 
63 | if __name__ == '__main__':
64 |     test()


--------------------------------------------------------------------------------
/Yolov1_pytorch/readme.md:
--------------------------------------------------------------------------------
  1 |  - 环境：
  2 | 
  3 |     | python版本  |  pytorch版本 |
  4 |     | ----------- | ----------   |
  5 |     |  3.5  | 0.3.0   |
  6 | 
  7 |  - 说明：
  8 | 
  9 |    1、基本实现参考： [pytorchYOLOv1master][1]
 10 | 
 11 |    2、仅重构代码，并未提升效果
 12 | 
 13 |    3、 测试时，在VOC2012训练集上loss为0.1左右，在VOC2007测试集上loss基本降低   很少。怀疑过拟合。
 14 |    
 15 |    4、运行main.py请确保启动可视化工具visdom
 16 | 
 17 |  - 当前工作：
 18 |    
 19 |    1、~~训练完，可视化测试图像和loss等~~
 20 | 
 21 |    2、~~将训练好的模型放到这里~~
 22 |    
 23 |    3、~~准备添加注释，以便理解~~
 24 |    
 25 |    4、尝试优化网络模型，提高mAP
 26 |    
 27 |  - 改进方向：
 28 | 
 29 |    1、更改学习率
 30 | 
 31 |    2、~~调整网络结构（参考版本为vgg16，试试残差）~~
 32 | 
 33 |    3、~~更改优化器从SGD到Adam~~
 34 |   
 35 | 
 36 | - 下载网络模型：
 37 | 
 38 |   1、在VOC2007测试集上验证的效果最好的一个网络模型（[百度网盘](https://pan.baidu.com/s/1HCO24KGqjJw01raiCB7f2A)）
 39 | 
 40 |   2、保存的最后一个网络模型 （[百度网盘](https://pan.baidu.com/s/1HKY7qGgK7i3Fv_ks9ldflw)）
 41 | 
 42 | - 效果:
 43 | 
 44 |    验证集:voc2012训练集
 45 |    
 46 |    模型:在VOC2007测试集上验证的效果最好的一个网络模型
 47 | 
 48 | 
 49 | <div align="center">
 50 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/60726940.jpg" width="300px"  height="300px" alt="图片说明" >
 51 | </div>
 52 | 
 53 | 
 54 | ### loss趋势
 55 | 
 56 | <div align="center"> 
 57 | 
 58 | | epoch | VOC2007测试集的loss |
 59 | |-------|---------------------|
 60 | | 0     | 5.806424896178707   |
 61 | | 1     | 5.855176733386132   |
 62 | | 2     | 5.9203009036279495  |
 63 | | ...   | ...                 |
 64 | | 118   | 5.187265388427242   |
 65 | | 119   | 5.190768877152474   |
 66 | 
 67 | </div>
 68 | 
 69 | 
 70 | <div align="center">
 71 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/15532458.jpg" width="500px" height="300px" alt="图片说明" >
 72 | </div>
 73 | <h4 align="center">注：蓝线为在VOC2012训练集上的loss，黄线为 VOC2007测试集的loss</h4>
 74 |  
 75 |  
 76 | ### 网络表现
 77 | 
 78 | - 最后保存的模型 在VOC2007验证集的表现
 79 | <div align="center">
 80 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/60783746.jpg" width="300px" height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/77507684.jpg" width="300px"  height="300px" alt="图片说明" > 
 81 | </div>
 82 | <div align="center">
 83 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/67512569.jpg" width="300px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/71549266.jpg"  width="300px"  height="300px" alt="图片说明" > 
 84 | </div>
 85 | <div align="center">
 86 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/62106896.jpg" width="300px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/409753.jpg" width="300px"  height="300px" alt="图片说明" > 
 87 | </div>
 88 | 
 89 | 
 90 | - 最后保存的模型 在VOC2012训练集的表现（可能过拟合，在训练集表现优秀）
 91 | 
 92 | <div align="center">
 93 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/10450103.jpg" width="300px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/91121778.jpg" width="300px"  height="300px" alt="图片说明" > 
 94 | </div>
 95 | <div align="center">
 96 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/4137984.jpg" width="300px"  height="300px" alt="图片说明" > 
 97 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/77285051.jpg" width="300px"  height="300px" alt="图片说明" > 
 98 | </div>
 99 | <div align="center">
100 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/25022049.jpg" width="300px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/49753476.jpg" width="300px"  height="300px" alt="图片说明" > 
101 | </div>
102 | 
103 | 
104 | #  以下为本人新增内容
105 | 
106 | - 新增内容：
107 | 
108 |    新增Resnet152网络来替换原作者的VGG16。（代码包括main_resnet.py 、models/resnet.py ）
109 |    
110 | - 实现细节：
111 | 
112 |    仅仅将Resnet152网络的最后一层全连接层的输出改为1470，再改变形状为7x7x30。
113 |    
114 | - 效果：
115 | 
116 |   极其不好，猜测原因为Resnet152网络是用来分类，将其直接用于回归导致效果不好。
117 | 
118 | - loss图：
119 | 
120 | | Resnet152+Ada优化器   | Resnet152+SGD优化器 |
121 | | --- | ------------------ |
122 | | 见左下 | 见右下  |
123 | 
124 | <div align="center">
125 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/94530436.jpg"width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/24366463.jpg" width="400px" height="300px" alt="图片说明" > 
126 | </div>
127 | 
128 |  - 优化建议：
129 |    
130 |    1、~~使用Resnet50,网络最后处理参考原文VGG16处理试试~~
131 |    
132 |    2、Resnet50去掉最后一层，加入类似VGG16的两层全连接层+Drop等 。loss仍降不下去，该项目以后不再提供提升效果的内容。
133 | 
134 | 
135 |  - 特别鸣谢：
136 |  
137 |     xiongzihua：[原作者][2]
138 |     
139 |     朱辉师兄：抽空帮我理清思路，讲清代码
140 | 
141 |    
142 | 
143 | 
144 |   [1]: https://github.com/xiongzihua/pytorch-YOLO-v1
145 |   [2]: https://github.com/xiongzihua/pytorch-YOLO-v1


--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/testImgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov1_pytorch/utils/testImgs/__init__.py


--------------------------------------------------------------------------------
/Yolov1_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | import visdom 
 2 | import numpy as np
 3 | 
 4 | class Visualizer():
 5 |     def __init__(self, env='main', **kwargs):
 6 |         '''
 7 |         **kwargs, dict option
 8 |         '''
 9 |         self.vis = visdom.Visdom(env=env)
10 |         self.index = {}  # x, dict
11 |         self.log_text = ''
12 |         self.env = env
13 |     
14 |     def plot_train_val(self, loss_train=None, loss_val=None):
15 |         '''
16 |         plot val loss and train loss in one figure
17 |         '''
18 |         x = self.index.get('train_val', 0)
19 | 
20 |         if x == 0:
21 |             loss = loss_train if loss_train else loss_val
22 |             win_y = np.column_stack((loss, loss))
23 |             win_x = np.column_stack((x, x))
24 |             self.win = self.vis.line(Y=win_y, X=win_x, 
25 |                                 env=self.env)
26 |                                 # opts=dict(
27 |                                 #     title='train_test_loss',
28 |                                 # ))
29 |             self.index['train_val'] = x + 1
30 |             return 
31 | 
32 |         if loss_train != None:
33 |             self.vis.line(Y=np.array([loss_train]), X=np.array([x]),
34 |                         win=self.win,
35 |                         name='1',
36 |                         update='append',
37 |                         env=self.env)
38 |             self.index['train_val'] = x + 5
39 |         else:
40 |             self.vis.line(Y=np.array([loss_val]), X=np.array([x]),
41 |                         win=self.win,
42 |                         name='2',
43 |                         update='append',
44 |                         env=self.env)
45 | 
46 |     def plot_many(self, d):
47 |         '''
48 |         d: dict {name, value}
49 |         '''
50 |         for k, v in d.iteritems():
51 |             self.plot(k, v)
52 | 
53 |     def plot(self, name, y, **kwargs):
54 |         '''
55 |         plot('loss', 1.00)
56 |         '''
57 |         x = self.index.get(name, 0) # if none, return 0
58 |         self.vis.line(Y=np.array([y]), X=np.array([x]),
59 |                     win=name,
60 |                     opts=dict(title=name),
61 |                     update=None if x== 0 else 'append',
62 |                     **kwargs)
63 |         self.index[name] = x + 1
64 |     
65 |     def log(self, info, win='log_text'):
66 |         '''
67 |         show text in box not write into txt?
68 |         '''
69 |         pass
70 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/checkpoints/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/checkpoints/.gitkeep


--------------------------------------------------------------------------------
/Yolov3_pytorch/checkpoints/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | wget https://pjreddie.com/media/files/yolov3.weights
4 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/config/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | train=data/coco/trainvalno5k.txt
3 | valid=data/coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
 4 | 
 5 | # Clone COCO API
 6 | git clone https://github.com/pdollar/coco
 7 | cd coco
 8 | 
 9 | mkdir images
10 | cd images
11 | 
12 | # 数据集自己已上传
13 | ## Download Images
14 | #wget -c https://pjreddie.com/media/files/train2014.zip
15 | #wget -c https://pjreddie.com/media/files/val2014.zip
16 | 
17 | ## Unzip
18 | #unzip -q train2014.zip
19 | #unzip -q val2014.zip
20 | 
21 | cd ..
22 | 
23 | # Download COCO Metadata
24 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
25 | wget -c https://pjreddie.com/media/files/coco/5k.part
26 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
27 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
28 | tar xzf labels.tgz
29 | unzip -q instances_train-val2014.zip
30 | 
31 | # Set Up Image Lists
32 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
33 | paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
34 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/dog.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/eagle.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/giraffe.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/herd_of_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/herd_of_horses.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img1.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img2.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img3.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/img4.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/messi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/messi.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/data/samples/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/data/samples/person.jpg


--------------------------------------------------------------------------------
/Yolov3_pytorch/datasets/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import random
  3 | import os
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | 
  8 | from torch.utils.data import Dataset
  9 | from PIL import Image
 10 | import torchvision.transforms as transforms
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import matplotlib.patches as patches
 14 | 
 15 | from skimage.transform import resize
 16 | 
 17 | import sys
 18 | 
 19 | 
 20 | import glob
 21 | import random
 22 | import os
 23 | import numpy as np
 24 | 
 25 | import torch
 26 | 
 27 | from torch.utils.data import Dataset
 28 | from PIL import Image
 29 | import torchvision.transforms as transforms
 30 | 
 31 | import matplotlib.pyplot as plt
 32 | import matplotlib.patches as patches
 33 | 
 34 | from skimage.transform import resize
 35 | 
 36 | import sys
 37 | 
 38 | class ImageFolder(Dataset):
 39 |     '''
 40 |     仅detect.py用到，用于测试例子
 41 |     '''
 42 |     def __init__(self, folder_path, img_size=416):
 43 |         self.files = sorted(glob.glob('%s/*.*' % folder_path))
 44 |         self.img_shape = (img_size, img_size)
 45 | 
 46 |     def __getitem__(self, index):
 47 |         img_path = self.files[index % len(self.files)]
 48 |         # Extract image
 49 |         img = np.array(Image.open(img_path))
 50 |         h, w, _ = img.shape
 51 |         dim_diff = np.abs(h - w)
 52 |         # Upper (left) and lower (right) padding
 53 |         pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
 54 |         # Determine padding
 55 |         pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
 56 |         # Add padding
 57 |         input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
 58 |         # Resize and normalize
 59 |         input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
 60 |         # Channels-first
 61 |         input_img = np.transpose(input_img, (2, 0, 1))
 62 |         # As pytorch tensor
 63 |         input_img = torch.from_numpy(input_img).float()
 64 | 
 65 |         # 返回图片路径、经过处理后的图像tensor
 66 |         return img_path, input_img
 67 | 
 68 |     def __len__(self):
 69 |         return len(self.files)
 70 | 
 71 | 
 72 | class ListDataset(Dataset):
 73 |     '''
 74 |     数据集加载器
 75 |     '''
 76 |     def __init__(self, list_path, img_size=416):
 77 |         # 读取  数据集中分配为训练集的txt文本，以list形式保存
 78 |         with open(list_path, 'r') as file:
 79 |             self.img_files = file.readlines()
 80 |         # 读取  数据集中分配为训练集的txt文本（即标签，coco数据集以txt保存 框真值），以list形式保存
 81 |         self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
 82 |         # 输入训练图像大小
 83 |         self.img_shape = (img_size, img_size)
 84 |         self.max_objects = 50  # 设定一张图像最多真实存在50个物体（封装 图像真值框时使用到）
 85 | 
 86 |     def __getitem__(self, index):
 87 | 
 88 |         '''
 89 |         训练时获取单张图像及真值
 90 |         '''
 91 | 
 92 |         # 读取图像为tensor
 93 |         img_path = self.img_files[index % len(self.img_files)].rstrip()
 94 | 
 95 |         copy_img=Image.open(img_path).copy()
 96 |         img = np.array(copy_img)
 97 | 
 98 |         # Handles images with less than three channels
 99 |         # 处理 图像的通道数不为3 时(即该图像损坏)，则 读取下一张图片
100 |         while len(img.shape) != 3:
101 |             index += 1
102 |             img_path = self.img_files[index % len(self.img_files)].rstrip()
103 |             img = np.array(Image.open(img_path))
104 | 
105 |         # 对图像tensor进行处理（数据增强、规范化）
106 | 
107 |         # w,h按照较大值填充成正方形
108 |         h, w, _ = img.shape
109 |         # np.abs 绝对值
110 |         dim_diff = np.abs(h - w)
111 |         # Upper (left) and lower (right) padding
112 |         # 上（左）和下（右）填充
113 |         pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
114 |         # Determine padding 确定填充
115 |         pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
116 |         # Add padding 添加填充
117 |         input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
118 | 
119 |         # 填充成正方形后 resize到 指定形状（一般为416x416）
120 |         padded_h, padded_w, _ = input_img.shape
121 |         # Resize and normalize  resize并规范化
122 |         input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
123 | 
124 |         # Channels-first  转换通道
125 |         input_img = np.transpose(input_img, (2, 0, 1))
126 |         # As pytorch tensor  转为pytorch tensor
127 |         input_img = torch.from_numpy(input_img).float()
128 | 
129 |         #---------
130 |         #  真值标签处理
131 |         #---------
132 |         label_path = self.label_files[index % len(self.img_files)].rstrip()
133 |         labels = None
134 |         if os.path.exists(label_path):
135 |             # eg：[8,5]   8：该图像有8个bbox   5: 0代表类别对应序号 1~4代表坐标（值在0~1之间）
136 |             labels = np.loadtxt(label_path).reshape(-1, 5)
137 |             # Extract coordinates for unpadded + unscaled image
138 |             # 提取未填充+未缩放图像的坐标
139 |             x1 = w * (labels[:, 1] - labels[:, 3]/2)
140 |             y1 = h * (labels[:, 2] - labels[:, 4]/2)
141 |             x2 = w * (labels[:, 1] + labels[:, 3]/2)
142 |             y2 = h * (labels[:, 2] + labels[:, 4]/2)
143 |             # Adjust for added padding
144 |             # 添加填充，以便于 图像调整一致
145 |             x1 += pad[1][0]
146 |             y1 += pad[0][0]
147 |             x2 += pad[1][0]
148 |             y2 += pad[0][0]
149 |             # Calculate ratios from coordinates
150 |             # 从坐标计算比率
151 |             labels[:, 1] = ((x1 + x2) / 2) / padded_w
152 |             labels[:, 2] = ((y1 + y2) / 2) / padded_h
153 |             labels[:, 3] *= w / padded_w
154 |             labels[:, 4] *= h / padded_h
155 |         # Fill matrix
156 |         # 填充矩阵（将 txt里的内容，即每张图像的所有物体填入，最多添加50个物体）
157 |         filled_labels = np.zeros((self.max_objects, 5))
158 |         if labels is not None:
159 |             filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
160 |         filled_labels = torch.from_numpy(filled_labels)
161 |         # 返回 图像路径、处理后的图像tensor、坐标被归一化后的真值框filled_labels[50,5] 值在0-1之间
162 |         return img_path, input_img, filled_labels
163 | 
164 |     def __len__(self):
165 |         return len(self.img_files)
166 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/readme.md:
--------------------------------------------------------------------------------
  1 | # 重构YOLO v3代码实现
  2 | 
  3 | ----------
  4 | 
  5 | 该仓库基于[eriklindernoren](https://github.com/eriklindernoren)的[PyTorch-YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3)进行的，非常感谢他无私的奉献。
  6 |  
  7 | 
  8 | - [原地址](https://github.com/eriklindernoren/PyTorch-YOLOv3) 
  9 | - [原地址的加注释版本](https://github.com/bobo0810/PyTorch-YOLOv3-master) 
 10 | - [重构版本](https://github.com/bobo0810/AnnotatedNetworkModelGit/tree/master/Yolov3_pytorch) 强烈推荐！（即本仓库）
 11 | 
 12 | ----------
 13 | 
 14 |  # 目前支持：
 15 | 
 16 | - 数据集：COCO
 17 | - 网络：Darknet-52
 18 | 
 19 |  # 相比原作者的特点：
 20 | 
 21 | - 所有参数均可在config.py中设置
 22 | - 重新整理结构，并加入大量代码注释
 23 | - 加入visdom可视化
 24 | 
 25 | 
 26 | ----------
 27 | 
 28 | # 一般项目结构
 29 | 
 30 |   1、定义网络
 31 |   
 32 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/16409622.jpg) 
 33 |   
 34 |    2、封装数据集
 35 |    
 36 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/38894621.jpg)
 37 |   
 38 |    3、工具类
 39 |    
 40 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/98583532.jpg)
 41 |   
 42 |    4、主函数
 43 |    
 44 |   ![](https://github.com/bobo0810/imageRepo/blob/master/img/32257225.jpg)
 45 | 
 46 | - 环境：
 47 | 
 48 |     | python版本  |  pytorch版本 |
 49 |     | ----------- | ----------   |
 50 |     |  3.5  | 0.4   |
 51 | 
 52 | ----------
 53 | 
 54 | # Darknet-52网络结构
 55 | 
 56 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/16734558.jpg)
 57 | 
 58 | 以下阅读源码有用：
 59 | 
 60 | hyperparams
 61 | 
 62 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/97781689.jpg)
 63 | 
 64 | module_list
 65 | 
 66 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/10165593.jpg)
 67 | 
 68 | module_defs
 69 | 
 70 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/56737437.jpg)
 71 | 
 72 | 
 73 | 
 74 | ----------
 75 | 
 76 | # 准备数据集：
 77 | 下载COCO的数据集
 78 | 
 79 | ```
 80 | $ cd data/
 81 | $ bash get_coco_dataset.sh
 82 | ```
 83 | 
 84 | 数据集结构
 85 | ```
 86 | data/coco
 87 | │
 88 | └───images
 89 | │   │   train2014
 90 | │   │   val2014
 91 | │   
 92 | └───labels
 93 | │   │   train2014
 94 | │   │   val2014
 95 | │   ...
 96 | │   ...
 97 | 
 98 | ```
 99 | 
100 | ----------
101 | 
102 | # Trian:
103 | 
104 | 1、开启Visdom（类似TnsorFlow的tensorboard,可视化工具）
105 | 
106 | ```
107 | # First install Python server and client
108 | pip install visdom
109 | # Start the server 
110 | python -m visdom.server
111 | ```
112 | 
113 | 2、开始训练
114 | 
115 | 在config.py中设置参数。
116 | 
117 | 在main.py中将运行train()。
118 | 
119 | ###### 由于原仓库保存、加载模型bug，故不支持保存为 .weight官方格式（二进制且仅保存conv和bn层参数，其余参数读取cfg文件即可），训练保存模型为.pt模型（保存整个模型）。
120 | 
121 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/68971633.jpg)
122 | 
123 | ----------
124 | 
125 | # Test:
126 | 
127 | 作用：测试，计算mAP
128 | 
129 | 1、下载官方的预训练模型
130 | 
131 | ```
132 | $ cd checkpoints/
133 | $ bash download_weights.sh
134 | ```
135 | 
136 | 2、在config.py中load_model_path配置预训练模型的路径
137 | 
138 | ###### 支持官方模型 .weight 和 自训练模型 .pt 
139 | 
140 | 3、 在config.py中设置参数。
141 |    
142 |     在main.py中运行test()。
143 | 
144 | 
145 | | Model               | mAP (min. 50 IoU) |
146 | |---------------------|-------------------|
147 | | YOLOv3 (paper)      | 57.9              |
148 | | YOLOv3 (官方)       | 58.38             |
149 | | YOLOv3 (this impl.) | 58.2              |
150 | 
151 | 
152 | 
153 | ![](https://github.com/bobo0810/imageRepo/blob/master/img/77791130.jpg)
154 | 
155 | ----------
156 | 
157 | # Predict:
158 | 
159 | 功能：可视化预测图片
160 | 
161 | 1、在config.py中load_model_path配置预训练模型的路径
162 |    ###### 支持官方模型 .weight 和 自训练模型 .pt 
163 | 2、在config.py中设置参数。
164 |    
165 |    在main.py中将运行detect()。
166 | 
167 | 
168 | 官方模型效果：
169 | 
170 | <div align="center">
171 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/11371083.jpg" width="400px"  height="300px" alt="图片说明" > 
172 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/39079856.jpg" width="400px"  height="300px" alt="图片说明" > 
173 | </div>
174 | <div align="center">
175 | <img src="https://github.com/bobo0810/imageRepo/blob/master/img/91451324.jpg" width="400px"  height="300px" alt="图片说明" ><img src="https://github.com/bobo0810/imageRepo/blob/master/img/28759426.jpg" width="400px"  height="300px" alt="图片说明" > 
176 | </div>
177 | 
178 | 
179 | 
180 | 
181 | 
182 | ----------
183 | 
184 | ## 参考文献：
185 | 
186 | 推荐配合阅读，效果更佳~
187 | 
188 | - [从0到1实现YOLOv3（part one）](https://blog.csdn.net/qq_25737169/article/details/80530579)
189 | 
190 | - [从0到1实现YOLO v3（part two）](https://blog.csdn.net/qq_25737169/article/details/80634360)
191 | 
192 | - [yolo v3 译文](https://zhuanlan.zhihu.com/p/34945787)
193 | 
194 | - [YOLO v3网络结构分析](https://blog.csdn.net/qq_37541097/article/details/81214953)
195 | 
196 | ----------
197 | 
198 | # 关于作者
199 | 
200 | - 原作者 [eriklindernoren](https://github.com/eriklindernoren)
201 | 
202 | - 本仓库作者 [Mr.Li](https://github.com/bobo0810)
203 | 
204 | 
205 | 
206 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bobo0810/PytorchNetHub/4bcfb3fa9b98a2aa3286bf5f627dab75b172f423/Yolov3_pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # power by Mr.Li
 3 | # 设置默认参数
 4 | import os.path
 5 | class DefaultConfig_train():
 6 |     epochs=30    # 训练轮数
 7 |     image_folder='data/samples'   #数据集地址
 8 |     batch_size=16    #batch大小
 9 |     model_config_path='config/yolov3.cfg'   # 模型网络结构
10 |     data_config_path='config/coco.data'    # 配置数据集的使用情况
11 |     class_path='data/coco.names'            #coco数据集类别标签
12 |     conf_thres=0.8                          # 物体置信度阈值
13 |     nms_thres=  0.4            # iou for nms的阈值
14 |     n_cpu=0                 # 批生成期间要使用的cpu线程数
15 |     img_size=416    # 输入图像尺寸的大小
16 |     use_cuda=True     # 是否使用GPU
17 |     visdom=True  # 是否使用visdom来可视化loss
18 |     print_freq = 8  # 训练时，每N个batch显示
19 |     lr_decay = 0.1  # 1e-3 -> 1e-4
20 | 
21 |     checkpoint_interval=1   # 每隔几个模型保存一次
22 |     checkpoint_dir='./checkpoints'   # 保存生成模型的路径
23 | 
24 |     load_model_path=None   # 加载预训练的模型的路径，为None代表不加载
25 |     # load_model_path=checkpoint_dir+'/latestbobo.pt'  # 预训练权重   (仅.pt)
26 | 
27 | class DefaultConfig_test():
28 |     epochs=200   #number of epochs
29 |     batch_size=16   #size of each image batch
30 |     model_config_path='config/yolov3.cfg'  #'path to model config file'
31 |     data_config_path='config/coco.data'   #'path to data config file'
32 | 
33 |     checkpoint_dir = './checkpoints'  # 保存生成模型的路径
34 |     # load_model_path=None   # 加载预训练的模型的路径，为None代表不加载
35 |     load_model_path=checkpoint_dir+'/8yolov3.pt'  # 预训练权重     (.weights或者.pt)
36 | 
37 |     class_path='data/coco.names'   #'path to class label file'
38 |     iou_thres=0.5  #'iou threshold required to qualify as detected'
39 |     conf_thres=0.5 #'object confidence threshold'
40 |     nms_thres=0.45  #'iou thresshold for non-maximum suppression'
41 |     n_cpu=0   #'number of cpu threads to use during batch generation'
42 |     img_size=416  #size of each image dimension
43 |     use_cuda=True  #'whether to use cuda if available'
44 | 
45 | 
46 | class DefaultConfig_detect():
47 |     image_folder= 'data/samples'  #path to dataset
48 |     config_path='config/yolov3.cfg'  #path to model config file
49 | 
50 | 
51 |     checkpoint_dir='./checkpoints'   # 保存生成模型的路径
52 |     # load_model_path=None   # 加载预训练的模型的路径，为None代表不加载
53 |     load_model_path = checkpoint_dir + '/yolov3.weights'  # 预训练权重  (.weights或者.pt)
54 | 
55 | 
56 |     class_path='data/coco.names'    #path to class label file
57 |     conf_thres=0.8    #object confidence threshold
58 |     nms_thres=0.4    #iou thresshold for non-maximum suppression
59 |     batch_size=1   #size of the batches
60 |     n_cpu=8   #number of cpu threads to use during batch generation
61 |     img_size=416   #size of each image dimension
62 |     use_cuda=True   #whether to use cuda if available
63 | 
64 | 
65 | 
66 | #初始化该类的一个对象
67 | opt_train=DefaultConfig_train()
68 | opt_test=DefaultConfig_test()
69 | opt_detect=DefaultConfig_detect()
70 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def parse_model_config(path):
 4 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 5 |     '''
 6 |     解析yolo-v3层配置文件并返回模块定义
 7 |     返回结果 为  每部分写为一行
 8 |     path： yolov3.cfg的路径
 9 |     '''
10 |     file = open(path, 'r')
11 |     # 按行读取，存为list
12 |     lines = file.read().split('\n')
13 |     # 过滤掉 "#"开头的内容，即注释信息
14 |     lines = [x for x in lines if x and not x.startswith('#')]
15 |     # lstrip去掉左边的(头部)，rstrip去掉右边的(尾部)  默认删除字符串头和尾的空白字符(包括\n，\r，\t这些)
16 |     lines = [x.rstrip().lstrip() for x in lines] # 去除边缘空白，即去掉左右两侧的空格等字符
17 |     module_defs = []
18 |     for line in lines:
19 |         # 检查字符串是否是以指定子字符串 [ 开头，如果是则返回 True，否则返回 False
20 |         if line.startswith('['): # This marks the start of a new block  标志着一个新区块的开始
21 |             module_defs.append({})
22 |             module_defs[-1]['type'] = line[1:-1].rstrip()
23 |             if module_defs[-1]['type'] == 'convolutional':
24 |                 module_defs[-1]['batch_normalize'] = 0
25 |         else:
26 |             key, value = line.split("=")
27 |             value = value.strip()
28 |             module_defs[-1][key.rstrip()] = value.strip()
29 | 
30 |     return module_defs
31 | 
32 | def parse_data_config(path):
33 |     """Parses the dataloader configuration file"""
34 |     '''
35 |     解析dataloader配置文件
36 |     '''
37 |     options = dict()
38 |     # 默认GPU有4个
39 |     options['gpus'] = '0,1,2,3'
40 |     # 数据集加载器加载数据时使用线程数
41 |     options['num_workers'] = '10'
42 |     with open(path, 'r') as fp:
43 |         lines = fp.readlines()
44 |     for line in lines:
45 |         line = line.strip()
46 |         if line == '' or line.startswith('#'):
47 |             continue
48 |         key, value = line.split('=')
49 |         options[key.strip()] = value.strip()
50 |     return options
51 | 


--------------------------------------------------------------------------------
/Yolov3_pytorch/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding:utf-8 -*-
 3 | # power by Mr.Li
 4 | import visdom
 5 | import time
 6 | import numpy as np
 7 | import torch
 8 | class Visualizer(object):
 9 |     '''
10 |     封装了visdom的基本操作，但是你仍然可以通过`self.vis.function`
11 |     调用原生的visdom接口
12 |     '''
13 |     def __init__(self, env='default', **kwargs):
14 |         self.vis = visdom.Visdom(env=env, **kwargs)
15 |         # 画的第几个数，相当于横座标
16 |         # 保存（’loss',23） 即loss的第23个点
17 |         self.index = {}
18 |         self.log_text = ''
19 |     def reinit(self,env='default',**kwargs):
20 |         '''
21 |         修改visdom的配置  重新初始化
22 |         '''
23 |         self.vis = visdom.Visdom(env=env,**kwargs)
24 |         return self
25 |     def plot_many(self, d):
26 |         '''
27 |         一次plot多个损失图形
28 |         @params d: dict (name,value) i.e. ('loss',0.11)
29 |         '''
30 |         for k, v in d.items():
31 |             self.plot(k, v)
32 |     def img_many(self, d):
33 |         '''
34 |      一次画多个图像
35 |         '''
36 |         for k, v in d.items():
37 |             self.img(k, v)
38 |     def plot(self, name, y,**kwargs):
39 |         '''
40 |         self.plot('loss',1.00)
41 |         '''
42 |         #得到下标序号
43 |         x = self.index.get(name, 0)
44 |         self.vis.line(Y=np.array([y]), X=np.array([x]),
45 |                       win=name,#窗口名
46 |                       opts=dict(title=name),
47 |                       update=None if x == 0 else 'append', #按照append的画图形
48 |                       **kwargs
49 |                       )
50 |         #下标累加1
51 |         self.index[name] = x + 1
52 |     def img(self, name, img_,**kwargs):
53 |         '''
54 |         self.img('input_img',t.Tensor(64,64))
55 |         self.img('input_imgs',t.Tensor(3,64,64))
56 |         self.img('input_imgs',t.Tensor(100,1,64,64))
57 |         self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
58 | 
59 |         ！！！don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~！！！
60 |         '''
61 |         self.vis.images(img_.cpu().numpy(),
62 |                        win=name,
63 |                        opts=dict(title=name),
64 |                        **kwargs
65 |                        )
66 |     def log(self,info,win='log_text'):
67 |         '''
68 |         self.log({'loss':1,'lr':0.0001})
69 |         打印日志
70 |         '''
71 | 
72 |         self.log_text += ('[{time}] {info} <br>'.format(
73 |                             time=time.strftime('%m%d_%H%M%S'),\
74 |                             info=info))
75 |         self.vis.text(self.log_text,win)
76 |     def __getattr__(self, name):
77 |         return getattr(self.vis, name)
78 | 
79 |     def create_vis_plot(self,_xlabel, _ylabel, _title, _legend):
80 |         viz = visdom.Visdom()
81 |         '''
82 |         新增可视化图形
83 |         '''
84 |         return viz.line(
85 |             X=torch.zeros((1,)).cpu(),
86 |             Y=torch.zeros((1, 3)).cpu(),
87 |             opts=dict(
88 |                 xlabel=_xlabel,
89 |                 ylabel=_ylabel,
90 |                 title=_title,
91 |                 legend=_legend
92 |             )
93 |         )
94 | 
95 | 


--------------------------------------------------------------------------------