├── .github └── ISSUE_TEMPLATE │ └── issue-template.md ├── .gitignore ├── README.md ├── article_link └── README.md ├── course_ppt └── README.md ├── data_set ├── README.md └── split_data.py ├── deploying_service ├── deploying_pytorch │ ├── convert_onnx_cls │ │ ├── class_indices.json │ │ ├── main.py │ │ └── model.py │ └── pytorch_flask_service │ │ ├── class_indices.json │ │ ├── main.py │ │ ├── model.py │ │ ├── requirements.txt │ │ ├── static │ │ └── js │ │ │ └── jquery.min.js │ │ └── templates │ │ └── up.html └── pruning_model_pytorch │ ├── class_indices.json │ ├── main.py │ ├── model.py │ ├── predict.py │ └── train.py ├── others_project ├── draw_dilated_conv │ └── main.py ├── kmeans_anchors │ ├── main.py │ ├── plot_kmeans.py │ ├── read_voc.py │ └── yolo_kmeans.py ├── openvinotest │ └── openvino_cls_test │ │ ├── class_indices.json │ │ ├── create_imagenet_annotation.py │ │ ├── float32vsint8.py │ │ ├── main.py │ │ ├── model.py │ │ └── speed_test.py ├── readPbFile │ ├── README.md │ ├── export │ │ └── checkpoint │ ├── pascal_label_map.pbtxt │ ├── readPb.py │ ├── test_images │ │ └── image_info.txt │ └── using_function.py ├── textcnnKeras │ ├── dataGenerator.py │ ├── data_link.txt │ ├── main.py │ └── models.py └── trans_widerface_to_xml │ ├── create_xml.py │ └── main.py ├── pytorch_classification ├── ConfusionMatrix │ ├── class_indices.json │ ├── main.py │ └── model.py ├── README.md ├── Test10_regnet │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── pretrain_weights.py │ ├── train.py │ └── utils.py ├── Test11_efficientnetV2 │ ├── class_indices.json │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── trans_effv2_weights.py │ └── utils.py ├── Test1_official_demo │ ├── model.py │ ├── predict.py │ └── train.py ├── Test2_alexnet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test3_vggnet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test4_googlenet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test5_resnet │ ├── README.md │ ├── batch_predict.py │ ├── class_indices.json │ ├── load_weights.py │ ├── model.py │ ├── predict.py │ └── train.py ├── Test6_mobilenet │ ├── class_indices.json │ ├── model_v2.py │ ├── model_v3.py │ ├── predict.py │ └── train.py ├── Test7_shufflenet │ ├── class_indices.json │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── Test8_densenet │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── Test9_efficientNet │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── requirements.txt │ ├── train.py │ ├── trans_weights_to_pytorch.py │ └── utils.py ├── analyze_weights_featuremap │ ├── alexnet_model.py │ ├── analyze_feature_map.py │ ├── analyze_kernel_weight.py │ └── resnet_model.py ├── custom_dataset │ ├── main.py │ ├── my_dataset.py │ └── utils.py ├── mini_imagenet │ ├── README.md │ ├── imagenet_class_index.json │ ├── model.py │ ├── multi_train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_eval_utils.py │ ├── my_dataset.py │ ├── restructure_csv.py │ ├── train_multi_gpu_using_launch.py │ └── train_single_gpu.py ├── model_complexity │ ├── main.py │ ├── model.py │ └── utils.py ├── swin_transformer │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── tensorboard_test │ ├── data_utils.py │ ├── model.py │ ├── my_dataset.py │ ├── requirements.txt │ ├── train.py │ └── train_eval_utils.py ├── train_multi_GPU │ ├── README.md │ ├── accuracy.png │ ├── model.py │ ├── multi_train_utils │ │ ├── distributed_utils.py │ │ └── train_eval_utils.py │ ├── my_dataset.py │ ├── plot_results.py │ ├── requirements.txt │ ├── runs │ │ └── Nov07_18-58-35_wz │ │ │ └── events.out.tfevents.1604746311.localhost.41577.0 │ ├── syncbn.png │ ├── train_multi_gpu_using_launch.py │ ├── train_multi_gpu_using_spawn.py │ ├── train_single_gpu.py │ ├── training_time.png │ └── utils.py └── vision_transformer │ ├── flops.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── utils.py │ └── vit_model.py ├── pytorch_object_detection ├── faster_rcnn │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ ├── mobilenetv2_model.py │ │ ├── resnet50_fpn_model.py │ │ └── vgg_model.py │ ├── draw_box_utils.py │ ├── fasterRCNN.png │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── faster_rcnn_framework.py │ │ ├── image_list.py │ │ ├── roi_head.py │ │ ├── rpn_function.py │ │ └── transform.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict.py │ ├── record_mAP.txt │ ├── requirements.txt │ ├── split_data.py │ ├── train_mobilenetv2.py │ ├── train_multi_GPU.py │ ├── train_res50_fpn.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── retinaNet │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ └── resnet50_fpn_model.py │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── anchor_utils.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── image_list.py │ │ ├── losses.py │ │ ├── retinanet.py │ │ └── transform.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── results20210421-142632.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── ssd │ ├── README.md │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict_test.py │ ├── record_mAP.txt │ ├── requirements.txt │ ├── res50_ssd.png │ ├── src │ │ ├── __init__.py │ │ ├── res50_backbone.py │ │ ├── ssd_model.py │ │ └── utils.py │ ├── train_multi_GPU.py │ ├── train_ssd300.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── train_coco_dataset │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ ├── mobilenetv2_model.py │ │ ├── resnet50_fpn_model.py │ │ └── vgg_model.py │ ├── coco80_indices.json │ ├── coco91_to_80.json │ ├── compute_receptive_field.py │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── faster_rcnn_framework.py │ │ ├── image_list.py │ │ ├── roi_head.py │ │ ├── rpn_function.py │ │ └── transform.py │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── results20210412-092355.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py └── yolov3_spp │ ├── README.md │ ├── build_utils │ ├── __init__.py │ ├── datasets.py │ ├── img_utils.py │ ├── layers.py │ ├── parse_config.py │ ├── torch_utils.py │ └── utils.py │ ├── calculate_dataset.py │ ├── cfg │ ├── hyp.yaml │ └── yolov3-spp.cfg │ ├── data │ └── pascal_voc_classes.json │ ├── draw_box_utils.py │ ├── export_onnx.py │ ├── load_onnx_test.py │ ├── models.py │ ├── predict_test.py │ ├── requirements.txt │ ├── results20210515-152935.txt │ ├── runs │ └── Oct28_17-55-29_wz │ │ └── events.out.tfevents.1603791769.localhost.localdomain.178338.0 │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ ├── __init__.py │ ├── coco_eval.py │ ├── coco_utils.py │ ├── distributed_utils.py │ ├── group_by_aspect_ratio.py │ └── train_eval_utils.py │ ├── trans_voc2yolo.py │ ├── validation.py │ └── yolov3spp.png ├── pytorch_segmentation ├── deeplab_v3 │ ├── README.md │ ├── deeplabv3_resnet50.png │ ├── get_palette.py │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20211027-104607.txt │ ├── src │ │ ├── __init__.py │ │ ├── deeplabv3_model.py │ │ ├── mobilenet_backbone.py │ │ └── resnet_backbone.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py ├── fcn │ ├── README.md │ ├── get_palette.py │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20210918-122740.txt │ ├── src │ │ ├── __init__.py │ │ ├── backbone.py │ │ └── fcn_model.py │ ├── torch_fcn.png │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py └── lraspp │ ├── README.md │ ├── get_palette.py │ ├── lraspp.png │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20211028-105233.txt │ ├── src │ ├── __init__.py │ ├── lraspp_model.py │ └── mobilenet_backbone.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ ├── __init__.py │ ├── distributed_utils.py │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py ├── summary_problem.md └── tensorflow_classification ├── ConfusionMatrix ├── class_indices.json ├── main.py └── model.py ├── README.md ├── Test11_efficientnetV2 ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── Test1_official_demo ├── model.py └── train.py ├── Test2_alexnet ├── class_indices.json ├── fine_train_alexnet.py ├── model.py ├── predict.py ├── read_pth.py ├── train.py └── trainGPU.py ├── Test3_vgg ├── class_indices.json ├── fine_train_vgg16.py ├── model.py ├── predict.py ├── read_ckpt.py ├── train.py └── trainGPU.py ├── Test4_goolenet ├── class_indices.json ├── model.py ├── model_add_bn.py ├── predict.py ├── read_pth.py ├── train.py ├── trainGPU.py └── train_add_bn.py ├── Test5_resnet ├── batch_predict.py ├── class_indices.json ├── model.py ├── predict.py ├── read_ckpt.py ├── read_h5.py ├── subclassed_model.py ├── train.py └── trainGPU.py ├── Test6_mobilenet ├── model_v2.py ├── model_v3.py ├── predict.py ├── read_ckpt.py ├── trainGPU_mobilenet_v2.py ├── train_mobilenet_v2.py ├── train_mobilenet_v3.py ├── trans_v3_weights.py └── utils.py ├── Test7_shuffleNet ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── Test9_efficientNet ├── model.py ├── predict.py ├── train.py └── utils.py ├── analyze_weights_featuremap ├── alexnet_model.py ├── analyze_feature_map.py └── analyze_kernel_weight.py ├── custom_dataset ├── train_fit.py └── utils.py ├── swin_transformer ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── tensorboard_test ├── train_fit.py └── train_not_fit.py └── vision_transformer ├── predict.py ├── train.py ├── trans_weights.py ├── utils.py └── vit_model.py /.github/ISSUE_TEMPLATE/issue-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue template 3 | about: Use this template for reporting your problem 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **System information** 11 | * Have I written custom code: 12 | * OS Platform(e.g., window10 or Linux Ubuntu 16.04): 13 | * Python version: 14 | * Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3): 15 | * Use GPU or not: 16 | * CUDA/cuDNN version(if you use GPU): 17 | * The network you trained(e.g., Resnet34 network): 18 | 19 | **Describe the current behavior** 20 | 21 | **Error info / logs** 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ##ignore this file## 2 | *.idea 3 | __pycache__ 4 | *.zip 5 | flower_data 6 | *.h5 7 | *.pth 8 | *.pt 9 | *.jpg 10 | *.ckpt.* 11 | *.ckpt 12 | *.config 13 | *.gz 14 | *.onnx 15 | *.xml 16 | *.bin 17 | *.mapping 18 | checkpoint 19 | data 20 | VOCdevkit 21 | ssd_resnet50_v1_fpn_shared_box_predictor 22 | -------------------------------------------------------------------------------- /course_ppt/README.md: -------------------------------------------------------------------------------- 1 | # 为了精简项目,课程中的所有ppt都已转存至百度云 2 | 3 | ## 分类网络相关 4 | - **AlexNet** 链接: https://pan.baidu.com/s/1RJn5lzY8LwrmckUPvXcjmg 密码: 34ue 5 | - **VGG** 链接: https://pan.baidu.com/s/1BnYpdaDwAIcgRm7YwakEZw 密码: 8ev0 6 | - **GoogleNet** 链接: https://pan.baidu.com/s/1XjZXprvayV3dDMvLjoOk3A 密码: 9hq4 7 | - **ResNet** 链接: https://pan.baidu.com/s/1I2LUlwCSjNKr37T0n3NKzg 密码: f1s9 8 | - **ResNext** 链接:https://pan.baidu.com/s/1-anFYX5572MJmiQym9D4Eg 密码:f8ob 9 | - **MobileNet_v1_v2** 链接: https://pan.baidu.com/s/1ReDDCuK8wyH0XqniUgiSYQ 密码: ipqv 10 | - **MobileNet_v3** 链接:https://pan.baidu.com/s/13mzSpyxuA4T4ki7kEN1Xqw 密码:fp5g 11 | - **ShuffleNet_v1_v2** 链接:https://pan.baidu.com/s/1-DDwePMPCDvjw08YU8nAAA 密码:ad6n 12 | - **EfficientNet_v1** 链接:https://pan.baidu.com/s/1Sep9W0vLzfjhcHAXr6Bv0Q 密码:eufl 13 | - **EfficientNet_v2** 链接:https://pan.baidu.com/s/1tesrgY4CHLmq6P7s7TcHCw 密码:y2kz 14 | - **Transformer** 链接:https://pan.baidu.com/s/1DE6RDySr7NS0HQ35gBqP_g 密码:y9e7 15 | - **Vision Transformer** 链接:https://pan.baidu.com/s/1wzpHG8EK5gxg6UCMscYqMw 密码:cm1m 16 | - **Swin Transformer** 链接:https://pan.baidu.com/s/1O6XEEZUb6B6AGYON7-EOgA 密码:qkrn 17 | - **ConfusionMatrix** 链接: https://pan.baidu.com/s/1EtKzHkZyv2XssYtqmGYCLg 密码: uoo5 18 | 19 | 20 | ## 目标检测网络相关 21 | - **R-CNN** 链接: https://pan.baidu.com/s/1l_ZxkfJdyp3KoMLqwWbx5A 密码: nm1l 22 | - **Fast R-CNN** 链接: https://pan.baidu.com/s/1Pe_Tg43OVo-yZWj7t-_L6Q 密码: fe73 23 | - **Faster R-CNN** 链接: https://pan.baidu.com/s/16AA-d7f15etLkgKajuzpSw 密码: 73h6 24 | - **FPN** 链接:https://pan.baidu.com/s/1O9H0iqQMg9f_FZezUEKZ9g 密码:qbl8 25 | - **SSD** 链接: https://pan.baidu.com/s/15zF3GhIdg-E_tZX2Y2X-rw 密码: u7k1 26 | - **RetinaNet** 链接:https://pan.baidu.com/s/1beW612VCSnSu-v8iu_2-fA 密码:vqbu 27 | - **YOLOv1** 链接: https://pan.baidu.com/s/1vVyUNQHYEGjqosezlx_1Mg 密码: b3i0 28 | - **YOLOv2** 链接: https://pan.baidu.com/s/132aW1e_NYbaxxGi3cDVLYg 密码: tak7 29 | - **YOLOv3** 链接: https://pan.baidu.com/s/10oqZewzJmx5ptT9A4t-64w 密码: npji 30 | - **YOLOv3SPP** 链接: https://pan.baidu.com/s/15LRssnPez9pn6jRpW89Wlw 密码: nv9f 31 | - **Calculate mAP** 链接: https://pan.baidu.com/s/1jdA_n78J7nSUoOg6TTO5Bg 密码: eh62 32 | - **coco数据集简介** 链接:https://pan.baidu.com/s/1HfCvjt-8o9j5a916IYNVjw 密码:6rec 33 | 34 | 35 | ## 图像分割网络相关 36 | - **语义分割前言** 链接:https://pan.baidu.com/s/1cwxe2wbaA_2DqNYADq3myA 密码:zzij 37 | - **转置卷积** 链接:https://pan.baidu.com/s/1A8688168fuWHyxJQtzupHw 密码:pgnf 38 | - **FCN** 链接:https://pan.baidu.com/s/1XLUneTLrdUyDAiV6kqi9rw 密码:126a -------------------------------------------------------------------------------- /data_set/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹是用来存放训练样本的目录 2 | ### 使用步骤如下: 3 | * (1)在data_set文件夹下创建新文件夹"flower_data" 4 | * (2)点击链接下载花分类数据集 [http://download.tensorflow.org/example_images/flower_photos.tgz](http://download.tensorflow.org/example_images/flower_photos.tgz) 5 | * (3)解压数据集到flower_data文件夹下 6 | * (4)执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val 7 | 8 | ``` 9 | ├── flower_data 10 | ├── flower_photos(解压的数据集文件夹,3670个样本) 11 | ├── train(生成的训练集,3306个样本) 12 | └── val(生成的验证集,364个样本) 13 | ``` -------------------------------------------------------------------------------- /data_set/split_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from shutil import copy, rmtree 3 | import random 4 | 5 | 6 | def mk_file(file_path: str): 7 | if os.path.exists(file_path): 8 | # 如果文件夹存在,则先删除原文件夹在重新创建 9 | rmtree(file_path) 10 | os.makedirs(file_path) 11 | 12 | 13 | def main(): 14 | # 保证随机可复现 15 | random.seed(0) 16 | 17 | # 将数据集中10%的数据划分到验证集中 18 | split_rate = 0.1 19 | 20 | # 指向你解压后的flower_photos文件夹 21 | cwd = os.getcwd() 22 | data_root = os.path.join(cwd, "flower_data") 23 | origin_flower_path = os.path.join(data_root, "flower_photos") 24 | assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path) 25 | 26 | flower_class = [cla for cla in os.listdir(origin_flower_path) 27 | if os.path.isdir(os.path.join(origin_flower_path, cla))] 28 | 29 | # 建立保存训练集的文件夹 30 | train_root = os.path.join(data_root, "train") 31 | mk_file(train_root) 32 | for cla in flower_class: 33 | # 建立每个类别对应的文件夹 34 | mk_file(os.path.join(train_root, cla)) 35 | 36 | # 建立保存验证集的文件夹 37 | val_root = os.path.join(data_root, "val") 38 | mk_file(val_root) 39 | for cla in flower_class: 40 | # 建立每个类别对应的文件夹 41 | mk_file(os.path.join(val_root, cla)) 42 | 43 | for cla in flower_class: 44 | cla_path = os.path.join(origin_flower_path, cla) 45 | images = os.listdir(cla_path) 46 | num = len(images) 47 | # 随机采样验证集的索引 48 | eval_index = random.sample(images, k=int(num*split_rate)) 49 | for index, image in enumerate(images): 50 | if image in eval_index: 51 | # 将分配至验证集中的文件复制到相应目录 52 | image_path = os.path.join(cla_path, image) 53 | new_path = os.path.join(val_root, cla) 54 | copy(image_path, new_path) 55 | else: 56 | # 将分配至训练集中的文件复制到相应目录 57 | image_path = os.path.join(cla_path, image) 58 | new_path = os.path.join(train_root, cla) 59 | copy(image_path, new_path) 60 | print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar 61 | print() 62 | 63 | print("processing done!") 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.1.1 2 | Flask_Cors==3.0.9 3 | Pillow 4 | -------------------------------------------------------------------------------- /deploying_service/pruning_model_pytorch/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/pruning_model_pytorch/predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import resnet34 3 | from PIL import Image 4 | from torchvision import transforms 5 | import matplotlib.pyplot as plt 6 | import json 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | data_transform = transforms.Compose( 11 | [transforms.Resize(256), 12 | transforms.CenterCrop(224), 13 | transforms.ToTensor(), 14 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 15 | 16 | # load image 17 | img = Image.open("../tulip.jpg") 18 | plt.imshow(img) 19 | # [N, C, H, W] 20 | img = data_transform(img) 21 | # expand batch dimension 22 | img = torch.unsqueeze(img, dim=0) 23 | 24 | # read class_indict 25 | try: 26 | json_file = open('./class_indices.json', 'r') 27 | class_indict = json.load(json_file) 28 | except Exception as e: 29 | print(e) 30 | exit(-1) 31 | 32 | # create model 33 | model = resnet34(num_classes=5) 34 | # load model weights 35 | model_weight_path = "./resNet34.pth" 36 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 37 | model.eval() 38 | with torch.no_grad(): 39 | # predict class 40 | output = torch.squeeze(model(img)) 41 | predict = torch.softmax(output, dim=0) 42 | predict_cla = torch.argmax(predict).numpy() 43 | print(class_indict[str(predict_cla)], predict[predict_cla].numpy()) 44 | plt.show() 45 | -------------------------------------------------------------------------------- /others_project/kmeans_anchors/plot_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | np.random.seed(0) 4 | 5 | colors = np.array(['blue', 'black']) 6 | 7 | 8 | def plot_clusters(data, cls, clusters, title=""): 9 | if cls is None: 10 | c = [colors[0]] * data.shape[0] 11 | else: 12 | c = colors[cls].tolist() 13 | 14 | plt.scatter(data[:, 0], data[:, 1], c=c) 15 | for i, clus in enumerate(clusters): 16 | plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150) 17 | plt.title(title) 18 | plt.show() 19 | plt.close() 20 | 21 | 22 | def distances(data, clusters): 23 | xy1 = data[:, None] # [N,1,2] 24 | xy2 = clusters[None] # [1,M,2] 25 | d = np.sum(np.power(xy2 - xy1, 2), axis=-1) 26 | return d 27 | 28 | 29 | def k_means(data, k, dist=np.mean): 30 | """ 31 | k-means methods 32 | Args: 33 | data: 需要聚类的data 34 | k: 簇数(聚成几类) 35 | dist: 更新簇坐标的方法 36 | """ 37 | data_number = data.shape[0] 38 | last_nearest = np.zeros((data_number,)) 39 | 40 | # init k clusters 41 | clusters = data[np.random.choice(data_number, k, replace=False)] 42 | print(f"random cluster: \n {clusters}") 43 | # plot 44 | plot_clusters(data, None, clusters, "random clusters") 45 | 46 | step = 0 47 | while True: 48 | d = distances(data, clusters) 49 | current_nearest = np.argmin(d, axis=1) 50 | 51 | # plot 52 | plot_clusters(data, current_nearest, clusters, f"step {step}") 53 | 54 | if (last_nearest == current_nearest).all(): 55 | break # clusters won't change 56 | for cluster in range(k): 57 | # update clusters 58 | clusters[cluster] = dist(data[current_nearest == cluster], axis=0) 59 | last_nearest = current_nearest 60 | step += 1 61 | 62 | return clusters 63 | 64 | 65 | def main(): 66 | x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)] 67 | x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)] 68 | 69 | x = np.concatenate([x1, x2]) 70 | y = np.concatenate([y1, y2]) 71 | 72 | plt.scatter(x, y, c='blue') 73 | plt.title("initial data") 74 | plt.show() 75 | plt.close() 76 | 77 | clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2) 78 | print(f"k-means fluster: \n {clusters}") 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /others_project/kmeans_anchors/yolo_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def wh_iou(wh1, wh2): 5 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 6 | wh1 = wh1[:, None] # [N,1,2] 7 | wh2 = wh2[None] # [1,M,2] 8 | inter = np.minimum(wh1, wh2).prod(2) # [N,M] 9 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 10 | 11 | 12 | def k_means(boxes, k, dist=np.median): 13 | """ 14 | yolo k-means methods 15 | refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py 16 | Args: 17 | boxes: 需要聚类的bboxes 18 | k: 簇数(聚成几类) 19 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好) 20 | """ 21 | box_number = boxes.shape[0] 22 | last_nearest = np.zeros((box_number,)) 23 | # np.random.seed(0) # 固定随机数种子 24 | 25 | # init k clusters 26 | clusters = boxes[np.random.choice(box_number, k, replace=False)] 27 | 28 | while True: 29 | distances = 1 - wh_iou(boxes, clusters) 30 | current_nearest = np.argmin(distances, axis=1) 31 | if (last_nearest == current_nearest).all(): 32 | break # clusters won't change 33 | for cluster in range(k): 34 | # update clusters 35 | clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0) 36 | 37 | last_nearest = current_nearest 38 | 39 | return clusters 40 | -------------------------------------------------------------------------------- /others_project/openvinotest/openvino_cls_test/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | image_dir = "/home/w180662/my_project/my_github/data_set/flower_data/train" 5 | assert os.path.exists(image_dir), "image dir does not exist..." 6 | 7 | img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg")) 8 | assert len(img_list) > 0, "No images(.jpg) were found in image dir..." 9 | 10 | classes_info = os.listdir(image_dir) 11 | classes_info.sort() 12 | classes_dict = {} 13 | 14 | # create label file 15 | with open("my_labels.txt", "w") as lw: 16 | # 注意,没有背景时,index要从0开始 17 | for index, c in enumerate(classes_info, start=0): 18 | txt = "{}:{}".format(index, c) 19 | if index != len(classes_info): 20 | txt += "\n" 21 | lw.write(txt) 22 | classes_dict.update({c: str(index)}) 23 | print("create my_labels.txt successful...") 24 | 25 | # create annotation file 26 | with open("my_annotation.txt", "w") as aw: 27 | for img in img_list: 28 | img_classes = classes_dict[img.split("/")[-2]] 29 | txt = "{} {}".format(img, img_classes) 30 | if index != len(img_list): 31 | txt += "\n" 32 | aw.write(txt) 33 | print("create my_annotation.txt successful...") 34 | -------------------------------------------------------------------------------- /others_project/readPbFile/README.md: -------------------------------------------------------------------------------- 1 | 该项目用于读取冻结后的pb文件并进行预测 2 | 使用步骤: 3 | (1)准备好需要使用的pb冻结文件,pbtxt标签文件,测试用的图片 4 | (2)修改info.config文件中的相关信息 5 | 6 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg) 7 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg) -------------------------------------------------------------------------------- /others_project/readPbFile/export/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt" 2 | all_model_checkpoint_paths: "model.ckpt" 3 | -------------------------------------------------------------------------------- /others_project/readPbFile/pascal_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: 'aeroplane' 4 | } 5 | 6 | item { 7 | id: 2 8 | name: 'bicycle' 9 | } 10 | 11 | item { 12 | id: 3 13 | name: 'bird' 14 | } 15 | 16 | item { 17 | id: 4 18 | name: 'boat' 19 | } 20 | 21 | item { 22 | id: 5 23 | name: 'bottle' 24 | } 25 | 26 | item { 27 | id: 6 28 | name: 'bus' 29 | } 30 | 31 | item { 32 | id: 7 33 | name: 'car' 34 | } 35 | 36 | item { 37 | id: 8 38 | name: 'cat' 39 | } 40 | 41 | item { 42 | id: 9 43 | name: 'chair' 44 | } 45 | 46 | item { 47 | id: 10 48 | name: 'cow' 49 | } 50 | 51 | item { 52 | id: 11 53 | name: 'diningtable' 54 | } 55 | 56 | item { 57 | id: 12 58 | name: 'dog' 59 | } 60 | 61 | item { 62 | id: 13 63 | name: 'horse' 64 | } 65 | 66 | item { 67 | id: 14 68 | name: 'motorbike' 69 | } 70 | 71 | item { 72 | id: 15 73 | name: 'person' 74 | } 75 | 76 | item { 77 | id: 16 78 | name: 'pottedplant' 79 | } 80 | 81 | item { 82 | id: 17 83 | name: 'sheep' 84 | } 85 | 86 | item { 87 | id: 18 88 | name: 'sofa' 89 | } 90 | 91 | item { 92 | id: 19 93 | name: 'train' 94 | } 95 | 96 | item { 97 | id: 20 98 | name: 'tvmonitor' 99 | } 100 | -------------------------------------------------------------------------------- /others_project/readPbFile/readPb.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import configparser 3 | from distutils.version import StrictVersion 4 | import cv2 5 | import glob 6 | from using_function import draw_box, read_pbtxt, get_inAndout_tensor, convert_type, read_image 7 | 8 | if StrictVersion(tf.__version__) < StrictVersion('1.12.0'): 9 | raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.') 10 | 11 | # 读取参数配置文件 12 | conf = configparser.ConfigParser() 13 | conf.read('info.config') 14 | path_to_frozen_graph = conf.get('tensorflow', 'path_to_frozen_graph') 15 | path_to_labels = conf.get('tensorflow', 'path_to_labels') 16 | path_to_images = conf.get('tensorflow', 'path_to_images') 17 | probability_thresh = float(conf.get('tensorflow', 'probability_thresh')) 18 | 19 | # 读取pbtxt标签信息 20 | category_index = read_pbtxt(path_to_labels) 21 | 22 | detection_graph = tf.Graph() 23 | with detection_graph.as_default(): 24 | od_graph_def = tf.GraphDef() 25 | with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid: 26 | serialized_graph = fid.read() 27 | od_graph_def.ParseFromString(serialized_graph) 28 | tf.import_graph_def(od_graph_def, name='') 29 | 30 | with detection_graph.as_default(): 31 | with tf.Session() as sess: 32 | # Get handles to input and output tensors 33 | tensor_dict, image_tensor = get_inAndout_tensor() 34 | test_image_paths = glob.glob(path_to_images) 35 | for image_path in test_image_paths: 36 | image_BGR, image_np_expanded = read_image(image_path) 37 | 38 | # Run inference 39 | output_dict = sess.run(tensor_dict, 40 | feed_dict={image_tensor: image_np_expanded}) 41 | # all outputs are float32 numpy arrays, so convert types as appropriate 42 | convert_type(output_dict) 43 | 44 | draw_box(image_BGR, 45 | output_dict['detection_boxes'], 46 | output_dict['detection_classes'], 47 | output_dict['detection_scores'], 48 | category_index, 49 | thresh=probability_thresh, 50 | line_thickness=5) 51 | cv2.namedWindow("prediction", cv2.WINDOW_AUTOSIZE) 52 | cv2.imshow("prediction", image_BGR) 53 | cv2.waitKey(0) 54 | -------------------------------------------------------------------------------- /others_project/readPbFile/test_images/image_info.txt: -------------------------------------------------------------------------------- 1 | 2 | Image provenance: 3 | image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg 4 | image2.jpg: Michael Miley, 5 | https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 6 | 7 | -------------------------------------------------------------------------------- /others_project/textcnnKeras/dataGenerator.py: -------------------------------------------------------------------------------- 1 | from tensorflow import keras 2 | from sklearn.preprocessing import LabelEncoder 3 | import random 4 | 5 | 6 | def content2idList(content, word2id_dict): 7 | """ 8 | 该函数的目的是将文本转换为对应的汉字数字id 9 | content:输入的文本 10 | word2id_dict:用于查找转换的字典 11 | """ 12 | idList = [] 13 | for word in content: # 遍历每一个汉字 14 | if word in word2id_dict: # 当刚文字在字典中时才进行转换,否则丢弃 15 | idList.append(word2id_dict[word]) 16 | return idList 17 | 18 | 19 | def generatorInfo(batch_size, seq_length, num_classes, file_name): 20 | """ 21 | batch_size:生成数据的batch size 22 | seq_length:输入文字序列长度 23 | num_classes:文本的类别数 24 | file_name:读取文件的路径 25 | """ 26 | # 读取词库文件 27 | with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file: 28 | vocabulary_list = [k.strip() for k in file.readlines()] 29 | word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)]) 30 | 31 | # 读取文本文件 32 | with open(file_name, encoding='utf-8') as file: 33 | line_list = [k.strip() for k in file.readlines()] 34 | data_label_list = [] # 创建数据标签文件 35 | data_content_list = [] # 创建数据文本文件 36 | for k in line_list: 37 | t = k.split(maxsplit=1) 38 | data_label_list.append(t[0]) 39 | data_content_list.append(t[1]) 40 | 41 | data_id_list = [content2idList(content, word2id_dict) for content in data_content_list] # 将文本数据转换拿为数字序列 42 | # 将list数据类型转换为ndarray数据类型,并按照seq_length长度去统一化文本序列长度, 43 | # 若长度超过设定值将其截断保留后半部分,若长度不足前面补0 44 | data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre') 45 | labelEncoder = LabelEncoder() 46 | data_y = labelEncoder.fit_transform(data_label_list) # 将文字标签转为数字标签 47 | data_Y = keras.utils.to_categorical(data_y, num_classes) # 将数字标签转为one-hot标签 48 | 49 | while True: 50 | selected_index = random.sample(list(range(len(data_y))), k=batch_size) # 按照数据集合的长度随机抽取batch_size个数据的index 51 | batch_X = data_X[selected_index] # 随机抽取的文本信息(数字化序列) 52 | batch_Y = data_Y[selected_index] # 随机抽取的标签信息(one-hot编码) 53 | yield (batch_X, batch_Y) 54 | 55 | -------------------------------------------------------------------------------- /others_project/textcnnKeras/data_link.txt: -------------------------------------------------------------------------------- 1 | baidupan_url = "https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg" 2 | extract_code = "8cwv" -------------------------------------------------------------------------------- /others_project/textcnnKeras/main.py: -------------------------------------------------------------------------------- 1 | from models import text_cnn, simpleNet, text_cnn_V2 2 | from dataGenerator import generatorInfo 3 | from tensorflow import keras 4 | 5 | vocab_size = 5000 6 | seq_length = 600 7 | embedding_dim = 64 8 | num_classes = 10 9 | trainBatchSize = 64 10 | evalBatchSize = 200 11 | steps_per_epoch = 50000 // trainBatchSize 12 | epoch = 2 13 | workers = 4 14 | logdir = './log/' 15 | trainFileName = './cnews/cnews.train.txt' 16 | evalFileName = './cnews/cnews.test.txt' 17 | 18 | model = text_cnn(seq_length=seq_length, 19 | vocab_size=vocab_size, 20 | embedding_dim=embedding_dim, 21 | num_cla=num_classes, 22 | kernelNum=64) 23 | 24 | trainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName) 25 | evalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName) 26 | 27 | 28 | def lrSchedule(epoch): 29 | lr = keras.backend.get_value(model.optimizer.lr) 30 | if epoch % 1 == 0 and epoch != 0: 31 | lr = lr * 0.5 32 | return lr 33 | 34 | 35 | log = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500) 36 | reduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1) 37 | 38 | model.fit_generator(generator=trainGenerator, 39 | steps_per_epoch=steps_per_epoch, 40 | epochs=epoch, 41 | validation_data=evalGenerator, 42 | validation_steps=10, 43 | workers=1, 44 | callbacks=[log, reduceLr]) 45 | model.save_weights(logdir + 'train_weight.h5') 46 | -------------------------------------------------------------------------------- /pytorch_classification/ConfusionMatrix/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹存放使用pytorch实现的代码版本 2 | **model.py**: 是模型文件 3 | **train.py**: 是调用模型训练的文件 4 | **predict.py**: 是调用模型进行预测的文件 5 | **class_indices.json**: 是训练数据集对应的标签文件 6 | 7 | ------ 8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 -------------------------------------------------------------------------------- /pytorch_classification/Test10_regnet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test10_regnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import create_regnet 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = create_regnet(model_name="RegNetY_400MF", num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-29.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import efficientnetv2_s as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | img_size = {"s": [300, 384], # train_size, val_size 16 | "m": [384, 480], 17 | "l": [384, 480]} 18 | num_model = "s" 19 | 20 | data_transform = transforms.Compose( 21 | [transforms.Resize(img_size[num_model][1]), 22 | transforms.CenterCrop(img_size[num_model][1]), 23 | transforms.ToTensor(), 24 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) 25 | 26 | # load image 27 | img_path = "../tulip.jpg" 28 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 29 | img = Image.open(img_path) 30 | plt.imshow(img) 31 | # [N, C, H, W] 32 | img = data_transform(img) 33 | # expand batch dimension 34 | img = torch.unsqueeze(img, dim=0) 35 | 36 | # read class_indict 37 | json_path = './class_indices.json' 38 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 39 | 40 | json_file = open(json_path, "r") 41 | class_indict = json.load(json_file) 42 | 43 | # create model 44 | model = create_model(num_classes=5).to(device) 45 | # load model weights 46 | model_weight_path = "./weights/model-29.pth" 47 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 48 | model.eval() 49 | with torch.no_grad(): 50 | # predict class 51 | output = torch.squeeze(model(img.to(device))).cpu() 52 | predict = torch.softmax(output, dim=0) 53 | predict_cla = torch.argmax(predict).numpy() 54 | 55 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 56 | predict[predict_cla].numpy()) 57 | plt.title(print_res) 58 | for i in range(len(predict)): 59 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 60 | predict[i].numpy())) 61 | plt.show() 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /pytorch_classification/Test1_official_demo/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(3, 16, 5) 9 | self.pool1 = nn.MaxPool2d(2, 2) 10 | self.conv2 = nn.Conv2d(16, 32, 5) 11 | self.pool2 = nn.MaxPool2d(2, 2) 12 | self.fc1 = nn.Linear(32*5*5, 120) 13 | self.fc2 = nn.Linear(120, 84) 14 | self.fc3 = nn.Linear(84, 10) 15 | 16 | def forward(self, x): 17 | x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28) 18 | x = self.pool1(x) # output(16, 14, 14) 19 | x = F.relu(self.conv2(x)) # output(32, 10, 10) 20 | x = self.pool2(x) # output(32, 5, 5) 21 | x = x.view(-1, 32*5*5) # output(32*5*5) 22 | x = F.relu(self.fc1(x)) # output(120) 23 | x = F.relu(self.fc2(x)) # output(84) 24 | x = self.fc3(x) # output(10) 25 | return x 26 | 27 | 28 | -------------------------------------------------------------------------------- /pytorch_classification/Test1_official_demo/predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | from PIL import Image 4 | 5 | from model import LeNet 6 | 7 | 8 | def main(): 9 | transform = transforms.Compose( 10 | [transforms.Resize((32, 32)), 11 | transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | classes = ('plane', 'car', 'bird', 'cat', 15 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 16 | 17 | net = LeNet() 18 | net.load_state_dict(torch.load('Lenet.pth')) 19 | 20 | im = Image.open('1.jpg') 21 | im = transform(im) # [C, H, W] 22 | im = torch.unsqueeze(im, dim=0) # [N, C, H, W] 23 | 24 | with torch.no_grad(): 25 | outputs = net(im) 26 | predict = torch.max(outputs, dim=1)[1].data.numpy() 27 | print(classes[int(predict)]) 28 | 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /pytorch_classification/Test2_alexnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test2_alexnet/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class AlexNet(nn.Module): 6 | def __init__(self, num_classes=1000, init_weights=False): 7 | super(AlexNet, self).__init__() 8 | self.features = nn.Sequential( 9 | nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55] 10 | nn.ReLU(inplace=True), 11 | nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27] 12 | nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27] 13 | nn.ReLU(inplace=True), 14 | nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13] 15 | nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13] 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13] 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13] 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6] 22 | ) 23 | self.classifier = nn.Sequential( 24 | nn.Dropout(p=0.5), 25 | nn.Linear(128 * 6 * 6, 2048), 26 | nn.ReLU(inplace=True), 27 | nn.Dropout(p=0.5), 28 | nn.Linear(2048, 2048), 29 | nn.ReLU(inplace=True), 30 | nn.Linear(2048, num_classes), 31 | ) 32 | if init_weights: 33 | self._initialize_weights() 34 | 35 | def forward(self, x): 36 | x = self.features(x) 37 | x = torch.flatten(x, start_dim=1) 38 | x = self.classifier(x) 39 | return x 40 | 41 | def _initialize_weights(self): 42 | for m in self.modules(): 43 | if isinstance(m, nn.Conv2d): 44 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 45 | if m.bias is not None: 46 | nn.init.constant_(m.bias, 0) 47 | elif isinstance(m, nn.Linear): 48 | nn.init.normal_(m.weight, 0, 0.01) 49 | nn.init.constant_(m.bias, 0) 50 | -------------------------------------------------------------------------------- /pytorch_classification/Test2_alexnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import AlexNet 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize((224, 224)), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = AlexNet(num_classes=5).to(device) 40 | 41 | # load model weights 42 | weights_path = "./AlexNet.pth" 43 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 44 | model.load_state_dict(torch.load(weights_path)) 45 | 46 | model.eval() 47 | with torch.no_grad(): 48 | # predict class 49 | output = torch.squeeze(model(img.to(device))).cpu() 50 | predict = torch.softmax(output, dim=0) 51 | predict_cla = torch.argmax(predict).numpy() 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 54 | predict[predict_cla].numpy()) 55 | plt.title(print_res) 56 | for i in range(len(predict)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | predict[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /pytorch_classification/Test3_vggnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test3_vggnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import vgg 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize((224, 224)), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | plt.imshow(img) 25 | # [N, C, H, W] 26 | img = data_transform(img) 27 | # expand batch dimension 28 | img = torch.unsqueeze(img, dim=0) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | json_file = open(json_path, "r") 35 | class_indict = json.load(json_file) 36 | 37 | # create model 38 | model = vgg(model_name="vgg16", num_classes=5).to(device) 39 | # load model weights 40 | weights_path = "./vgg16Net.pth" 41 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 42 | model.load_state_dict(torch.load(weights_path, map_location=device)) 43 | 44 | model.eval() 45 | with torch.no_grad(): 46 | # predict class 47 | output = torch.squeeze(model(img.to(device))).cpu() 48 | predict = torch.softmax(output, dim=0) 49 | predict_cla = torch.argmax(predict).numpy() 50 | 51 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 52 | predict[predict_cla].numpy()) 53 | plt.title(print_res) 54 | for i in range(len(predict)): 55 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 56 | predict[i].numpy())) 57 | plt.show() 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_classification/Test4_googlenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test4_googlenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import GoogLeNet 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize((224, 224)), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | plt.imshow(img) 25 | # [N, C, H, W] 26 | img = data_transform(img) 27 | # expand batch dimension 28 | img = torch.unsqueeze(img, dim=0) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | json_file = open(json_path, "r") 35 | class_indict = json.load(json_file) 36 | 37 | # create model 38 | model = GoogLeNet(num_classes=5, aux_logits=False).to(device) 39 | 40 | # load model weights 41 | weights_path = "./googleNet.pth" 42 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 43 | missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device), 44 | strict=False) 45 | 46 | model.eval() 47 | with torch.no_grad(): 48 | # predict class 49 | output = torch.squeeze(model(img.to(device))).cpu() 50 | predict = torch.softmax(output, dim=0) 51 | predict_cla = torch.argmax(predict).numpy() 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 54 | predict[predict_cla].numpy()) 55 | plt.title(print_res) 56 | for i in range(len(predict)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | predict[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/README.md: -------------------------------------------------------------------------------- 1 | ## 文件结构: 2 | ``` 3 | ├── model.py: ResNet模型搭建 4 | ├── train.py: 训练脚本 5 | ├── predict.py: 单张图像预测脚本 6 | └── batch_predict.py: 批量图像预测脚本 7 | ``` -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/batch_predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | 8 | from model import resnet34 9 | 10 | 11 | def main(): 12 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 13 | 14 | data_transform = transforms.Compose( 15 | [transforms.Resize(256), 16 | transforms.CenterCrop(224), 17 | transforms.ToTensor(), 18 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 19 | 20 | # load image 21 | # 指向需要遍历预测的图像文件夹 22 | imgs_root = "/data/imgs" 23 | assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist." 24 | # 读取指定文件夹下所有jpg图像路径 25 | img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")] 26 | 27 | # read class_indict 28 | json_path = './class_indices.json' 29 | assert os.path.exists(json_path), f"file: '{json_path}' dose not exist." 30 | 31 | json_file = open(json_path, "r") 32 | class_indict = json.load(json_file) 33 | 34 | # create model 35 | model = resnet34(num_classes=5).to(device) 36 | 37 | # load model weights 38 | weights_path = "./resNet34.pth" 39 | assert os.path.exists(weights_path), f"file: '{weights_path}' dose not exist." 40 | model.load_state_dict(torch.load(weights_path, map_location=device)) 41 | 42 | # prediction 43 | model.eval() 44 | batch_size = 8 # 每次预测时将多少张图片打包成一个batch 45 | with torch.no_grad(): 46 | for ids in range(0, len(img_path_list) // batch_size): 47 | img_list = [] 48 | for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: 49 | assert os.path.exists(img_path), f"file: '{img_path}' dose not exist." 50 | img = Image.open(img_path) 51 | img = data_transform(img) 52 | img_list.append(img) 53 | 54 | # batch img 55 | # 将img_list列表中的所有图像打包成一个batch 56 | batch_img = torch.stack(img_list, dim=0) 57 | # predict class 58 | output = model(batch_img.to(device)).cpu() 59 | predict = torch.softmax(output, dim=1) 60 | probs, classes = torch.max(predict, dim=1) 61 | 62 | for idx, (pro, cla) in enumerate(zip(probs, classes)): 63 | print("image: {} class: {} prob: {:.3}".format(img_path_list[ids * batch_size + idx], 64 | class_indict[str(cla.numpy())], 65 | pro.numpy())) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/load_weights.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | from model import resnet34 5 | 6 | 7 | def main(): 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | # load pretrain weights 11 | # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth 12 | model_weight_path = "./resnet34-pre.pth" 13 | assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path) 14 | 15 | # option1 16 | net = resnet34() 17 | net.load_state_dict(torch.load(model_weight_path, map_location=device)) 18 | # change fc layer structure 19 | in_channel = net.fc.in_features 20 | net.fc = nn.Linear(in_channel, 5) 21 | 22 | # option2 23 | # net = resnet34(num_classes=5) 24 | # pre_weights = torch.load(model_weight_path, map_location=device) 25 | # del_key = [] 26 | # for key, _ in pre_weights.items(): 27 | # if "fc" in key: 28 | # del_key.append(key) 29 | # 30 | # for key in del_key: 31 | # del pre_weights[key] 32 | # 33 | # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False) 34 | # print("[missing_keys]:", *missing_keys, sep="\n") 35 | # print("[unexpected_keys]:", *unexpected_keys, sep="\n") 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import resnet34 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = resnet34(num_classes=5).to(device) 40 | 41 | # load model weights 42 | weights_path = "./resNet34.pth" 43 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 44 | model.load_state_dict(torch.load(weights_path, map_location=device)) 45 | 46 | # prediction 47 | model.eval() 48 | with torch.no_grad(): 49 | # predict class 50 | output = torch.squeeze(model(img.to(device))).cpu() 51 | predict = torch.softmax(output, dim=0) 52 | predict_cla = torch.argmax(predict).numpy() 53 | 54 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 55 | predict[predict_cla].numpy()) 56 | plt.title(print_res) 57 | for i in range(len(predict)): 58 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 59 | predict[i].numpy())) 60 | plt.show() 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /pytorch_classification/Test6_mobilenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test6_mobilenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model_v2 import MobileNetV2 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = MobileNetV2(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./MobileNetV2.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import shufflenet_v2_x1_0 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = shufflenet_v2_x1_0(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-29.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test8_densenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test8_densenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import densenet121 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = densenet121(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-3.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import efficientnet_b0 as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | img_size = {"B0": 224, 16 | "B1": 240, 17 | "B2": 260, 18 | "B3": 300, 19 | "B4": 380, 20 | "B5": 456, 21 | "B6": 528, 22 | "B7": 600} 23 | num_model = "B0" 24 | 25 | data_transform = transforms.Compose( 26 | [transforms.Resize(img_size[num_model]), 27 | transforms.CenterCrop(img_size[num_model]), 28 | transforms.ToTensor(), 29 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 30 | 31 | # load image 32 | img_path = "../tulip.jpg" 33 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 34 | img = Image.open(img_path) 35 | plt.imshow(img) 36 | # [N, C, H, W] 37 | img = data_transform(img) 38 | # expand batch dimension 39 | img = torch.unsqueeze(img, dim=0) 40 | 41 | # read class_indict 42 | json_path = './class_indices.json' 43 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 44 | 45 | json_file = open(json_path, "r") 46 | class_indict = json.load(json_file) 47 | 48 | # create model 49 | model = create_model(num_classes=5).to(device) 50 | # load model weights 51 | model_weight_path = "./weights/model-29.pth" 52 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 53 | model.eval() 54 | with torch.no_grad(): 55 | # predict class 56 | output = torch.squeeze(model(img.to(device))).cpu() 57 | predict = torch.softmax(output, dim=0) 58 | predict_cla = torch.argmax(predict).numpy() 59 | 60 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 61 | predict[predict_cla].numpy()) 62 | plt.title(print_res) 63 | for i in range(len(predict)): 64 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 65 | predict[i].numpy())) 66 | plt.show() 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.5 2 | matplotlib==3.2.1 3 | tqdm==4.56.0 4 | torch>=1.7.1 5 | torchvision>=0.8.2 6 | -------------------------------------------------------------------------------- /pytorch_classification/analyze_weights_featuremap/alexnet_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class AlexNet(nn.Module): 6 | def __init__(self, num_classes=1000, init_weights=False): 7 | super(AlexNet, self).__init__() 8 | self.features = nn.Sequential( 9 | nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55] 10 | nn.ReLU(inplace=True), 11 | nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27] 12 | nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27] 13 | nn.ReLU(inplace=True), 14 | nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13] 15 | nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13] 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13] 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13] 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6] 22 | ) 23 | self.classifier = nn.Sequential( 24 | nn.Dropout(p=0.5), 25 | nn.Linear(128 * 6 * 6, 2048), 26 | nn.ReLU(inplace=True), 27 | nn.Dropout(p=0.5), 28 | nn.Linear(2048, 2048), 29 | nn.ReLU(inplace=True), 30 | nn.Linear(2048, num_classes), 31 | ) 32 | if init_weights: 33 | self._initialize_weights() 34 | 35 | def forward(self, x): 36 | outputs = [] 37 | for name, module in self.features.named_children(): 38 | x = module(x) 39 | if name in ["0", "3", "6"]: 40 | outputs.append(x) 41 | 42 | return outputs 43 | 44 | def _initialize_weights(self): 45 | for m in self.modules(): 46 | if isinstance(m, nn.Conv2d): 47 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 48 | if m.bias is not None: 49 | nn.init.constant_(m.bias, 0) 50 | elif isinstance(m, nn.Linear): 51 | nn.init.normal_(m.weight, 0, 0.01) 52 | nn.init.constant_(m.bias, 0) 53 | -------------------------------------------------------------------------------- /pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from alexnet_model import AlexNet 3 | from resnet_model import resnet34 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from PIL import Image 7 | from torchvision import transforms 8 | 9 | data_transform = transforms.Compose( 10 | [transforms.Resize((224, 224)), 11 | transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | # data_transform = transforms.Compose( 15 | # [transforms.Resize(256), 16 | # transforms.CenterCrop(224), 17 | # transforms.ToTensor(), 18 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 19 | 20 | # create model 21 | model = AlexNet(num_classes=5) 22 | # model = resnet34(num_classes=5) 23 | # load model weights 24 | model_weight_path = "./AlexNet.pth" # "./resNet34.pth" 25 | model.load_state_dict(torch.load(model_weight_path)) 26 | print(model) 27 | 28 | # load image 29 | img = Image.open("../tulip.jpg") 30 | # [N, C, H, W] 31 | img = data_transform(img) 32 | # expand batch dimension 33 | img = torch.unsqueeze(img, dim=0) 34 | 35 | # forward 36 | out_put = model(img) 37 | for feature_map in out_put: 38 | # [N, C, H, W] -> [C, H, W] 39 | im = np.squeeze(feature_map.detach().numpy()) 40 | # [C, H, W] -> [H, W, C] 41 | im = np.transpose(im, [1, 2, 0]) 42 | 43 | # show top 12 feature maps 44 | plt.figure() 45 | for i in range(12): 46 | ax = plt.subplot(3, 4, i+1) 47 | # [H, W, C] 48 | plt.imshow(im[:, :, i], cmap='gray') 49 | plt.show() 50 | 51 | -------------------------------------------------------------------------------- /pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from alexnet_model import AlexNet 3 | from resnet_model import resnet34 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | # create model 9 | model = AlexNet(num_classes=5) 10 | # model = resnet34(num_classes=5) 11 | # load model weights 12 | model_weight_path = "./AlexNet.pth" # "resNet34.pth" 13 | model.load_state_dict(torch.load(model_weight_path)) 14 | print(model) 15 | 16 | weights_keys = model.state_dict().keys() 17 | for key in weights_keys: 18 | # remove num_batches_tracked para(in bn) 19 | if "num_batches_tracked" in key: 20 | continue 21 | # [kernel_number, kernel_channel, kernel_height, kernel_width] 22 | weight_t = model.state_dict()[key].numpy() 23 | 24 | # read a kernel information 25 | # k = weight_t[0, :, :, :] 26 | 27 | # calculate mean, std, min, max 28 | weight_mean = weight_t.mean() 29 | weight_std = weight_t.std(ddof=1) 30 | weight_min = weight_t.min() 31 | weight_max = weight_t.max() 32 | print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, 33 | weight_std, 34 | weight_max, 35 | weight_min)) 36 | 37 | # plot hist image 38 | plt.close() 39 | weight_vec = np.reshape(weight_t, [-1]) 40 | plt.hist(weight_vec, bins=50) 41 | plt.title(key) 42 | plt.show() 43 | 44 | -------------------------------------------------------------------------------- /pytorch_classification/custom_dataset/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from torchvision import transforms 5 | 6 | from my_dataset import MyDataSet 7 | from utils import read_split_data, plot_data_loader_image 8 | 9 | # http://download.tensorflow.org/example_images/flower_photos.tgz 10 | root = "/home/wz/my_github/data_set/flower_data/flower_photos" # 数据集所在根目录 11 | 12 | 13 | def main(): 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | print("using {} device.".format(device)) 16 | 17 | train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root) 18 | 19 | data_transform = { 20 | "train": transforms.Compose([transforms.RandomResizedCrop(224), 21 | transforms.RandomHorizontalFlip(), 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), 24 | "val": transforms.Compose([transforms.Resize(256), 25 | transforms.CenterCrop(224), 26 | transforms.ToTensor(), 27 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} 28 | 29 | train_data_set = MyDataSet(images_path=train_images_path, 30 | images_class=train_images_label, 31 | transform=data_transform["train"]) 32 | 33 | batch_size = 8 34 | nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers 35 | print('Using {} dataloader workers'.format(nw)) 36 | train_loader = torch.utils.data.DataLoader(train_data_set, 37 | batch_size=batch_size, 38 | shuffle=True, 39 | num_workers=nw, 40 | collate_fn=train_data_set.collate_fn) 41 | 42 | # plot_data_loader_image(train_loader) 43 | 44 | for step, data in enumerate(train_loader): 45 | images, labels = data 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /pytorch_classification/custom_dataset/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | 39 | -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/README.md: -------------------------------------------------------------------------------- 1 | ## download mini-imagenet 2 | link: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ) password: hl31 3 | 4 | ## dataset path structure 5 | ``` 6 | ├── mini-imagenet: total 100 classes, 60000 images 7 | ├── images: 60000 images 8 | ├── train.csv: 64 classes, 38400 images 9 | ├── val.csv: 16 classes, 9600 images 10 | └── test.csv: 20 classes, 12000 images 11 | ``` -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/multi_train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_eval_utils import train_one_epoch, evaluate 2 | from .distributed_utils import init_distributed_mode, dist, cleanup 3 | -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/multi_train_utils/distributed_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.distributed as dist 5 | 6 | 7 | def init_distributed_mode(args): 8 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 9 | args.rank = int(os.environ["RANK"]) 10 | args.world_size = int(os.environ['WORLD_SIZE']) 11 | args.gpu = int(os.environ['LOCAL_RANK']) 12 | elif 'SLURM_PROCID' in os.environ: 13 | args.rank = int(os.environ['SLURM_PROCID']) 14 | args.gpu = args.rank % torch.cuda.device_count() 15 | else: 16 | print('Not using distributed mode') 17 | args.distributed = False 18 | return 19 | 20 | args.distributed = True 21 | 22 | torch.cuda.set_device(args.gpu) 23 | args.dist_backend = 'nccl' # 通信后端,nvidia GPU推荐使用NCCL 24 | print('| distributed init (rank {}): {}'.format( 25 | args.rank, args.dist_url), flush=True) 26 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 27 | world_size=args.world_size, rank=args.rank) 28 | dist.barrier() 29 | 30 | 31 | def cleanup(): 32 | dist.destroy_process_group() 33 | 34 | 35 | def is_dist_avail_and_initialized(): 36 | """检查是否支持分布式环境""" 37 | if not dist.is_available(): 38 | return False 39 | if not dist.is_initialized(): 40 | return False 41 | return True 42 | 43 | 44 | def get_world_size(): 45 | if not is_dist_avail_and_initialized(): 46 | return 1 47 | return dist.get_world_size() 48 | 49 | 50 | def get_rank(): 51 | if not is_dist_avail_and_initialized(): 52 | return 0 53 | return dist.get_rank() 54 | 55 | 56 | def is_main_process(): 57 | return get_rank() == 0 58 | 59 | 60 | def reduce_value(value, average=True): 61 | world_size = get_world_size() 62 | if world_size < 2: # 单GPU的情况 63 | return value 64 | 65 | with torch.no_grad(): 66 | dist.all_reduce(value) 67 | if average: 68 | value /= world_size 69 | 70 | return value 71 | 72 | 73 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): 74 | 75 | def f(x): 76 | """根据step数返回一个学习率倍率因子""" 77 | if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 78 | return 1 79 | alpha = float(x) / warmup_iters 80 | # 迭代过程中倍率因子从warmup_factor -> 1 81 | return warmup_factor * (1 - alpha) + alpha 82 | 83 | return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) 84 | -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from PIL import Image 4 | import pandas as pd 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | class MyDataSet(Dataset): 10 | """自定义数据集""" 11 | 12 | def __init__(self, 13 | root_dir: str, 14 | csv_name: str, 15 | json_path: str, 16 | transform=None): 17 | images_dir = os.path.join(root_dir, "images") 18 | assert os.path.exists(images_dir), "dir:'{}' not found.".format(images_dir) 19 | 20 | assert os.path.exists(json_path), "file:'{}' not found.".format(json_path) 21 | self.label_dict = json.load(open(json_path, "r")) 22 | 23 | csv_path = os.path.join(root_dir, csv_name) 24 | assert os.path.exists(csv_path), "file:'{}' not found.".format(csv_path) 25 | csv_data = pd.read_csv(csv_path) 26 | self.total_num = csv_data.shape[0] 27 | self.img_paths = [os.path.join(images_dir, i)for i in csv_data["filename"].values] 28 | self.img_label = [self.label_dict[i][0] for i in csv_data["label"].values] 29 | self.labels = set(csv_data["label"].values) 30 | 31 | self.transform = transform 32 | 33 | def __len__(self): 34 | return self.total_num 35 | 36 | def __getitem__(self, item): 37 | img = Image.open(self.img_paths[item]) 38 | # RGB为彩色图片,L为灰度图片 39 | if img.mode != 'RGB': 40 | raise ValueError("image: {} isn't RGB mode.".format(self.img_paths[item])) 41 | label = self.img_label[item] 42 | 43 | if self.transform is not None: 44 | img = self.transform(img) 45 | 46 | return img, label 47 | 48 | @staticmethod 49 | def collate_fn(batch): 50 | # 官方实现的default_collate可以参考 51 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 52 | images, labels = tuple(zip(*batch)) 53 | 54 | images = torch.stack(images, dim=0) 55 | labels = torch.as_tensor(labels) 56 | return images, labels 57 | -------------------------------------------------------------------------------- /pytorch_classification/model_complexity/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from fvcore.nn import FlopCountAnalysis, parameter_count_table 3 | from prettytable import PrettyTable 4 | from model import efficientnetv2_s 5 | 6 | 7 | def main(): 8 | model = efficientnetv2_s() 9 | 10 | # option1 11 | for name, para in model.named_parameters(): 12 | # 除head外,其他权重全部冻结 13 | if "head" not in name: 14 | para.requires_grad_(False) 15 | else: 16 | print("training {}".format(name)) 17 | 18 | complexity = model.complexity(224, 224, 3) 19 | table = PrettyTable() 20 | table.field_names = ["params", "freeze-params", "train-params", "FLOPs", "acts"] 21 | table.add_row([complexity["params"], 22 | complexity["freeze"], 23 | complexity["params"] - complexity["freeze"], 24 | complexity["flops"], 25 | complexity["acts"]]) 26 | print(table) 27 | 28 | # option2 29 | tensor = (torch.rand(1, 3, 224, 224),) 30 | flops = FlopCountAnalysis(model, tensor) 31 | print(flops.total()) 32 | 33 | print(parameter_count_table(model)) 34 | 35 | 36 | if __name__ == '__main__': 37 | main() 38 | -------------------------------------------------------------------------------- /pytorch_classification/model_complexity/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | these code refers to: 3 | https://github.com/facebookresearch/pycls/blob/master/pycls/models/blocks.py 4 | """ 5 | 6 | 7 | def conv2d_cx(cx, in_c, out_c, k, *, stride=1, groups=1, bias=False, trainable=True): 8 | """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts).""" 9 | assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." 10 | h, w, c = cx["h"], cx["w"], cx["c"] 11 | assert c == in_c 12 | h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 13 | cx["h"] = h 14 | cx["w"] = w 15 | cx["c"] = out_c 16 | cx["flops"] += k * k * in_c * out_c * h * w // groups + (out_c if bias else 0) 17 | cx["params"] += k * k * in_c * out_c // groups + (out_c if bias else 0) 18 | cx["acts"] += out_c * h * w 19 | if trainable is False: 20 | cx["freeze"] += k * k * in_c * out_c // groups + (out_c if bias else 0) 21 | return cx 22 | 23 | 24 | def pool2d_cx(cx, in_c, k, *, stride=1): 25 | """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts).""" 26 | assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." 27 | h, w, c = cx["h"], cx["w"], cx["c"] 28 | assert c == in_c 29 | h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 30 | cx["h"] = h 31 | cx["w"] = w 32 | cx["acts"] += in_c * h * w 33 | return cx 34 | 35 | 36 | def norm2d_cx(cx, in_c, trainable=True): 37 | """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts).""" 38 | c, params = cx["c"], cx["params"] 39 | assert c == in_c 40 | cx["params"] += 4 * c 41 | cx["freeze"] += 2 * c # moving_mean, variance 42 | if trainable is False: 43 | cx["freeze"] += 2 * c # beta, gamma 44 | return cx 45 | 46 | 47 | def gap2d_cx(cx): 48 | """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts).""" 49 | cx["h"] = 1 50 | cx["w"] = 1 51 | return cx 52 | 53 | 54 | def linear_cx(cx, in_units, out_units, *, bias=False, trainable=True): 55 | """Accumulates complexity of linear into cx = (h, w, flops, params, acts).""" 56 | c = cx["c"] 57 | assert c == in_units 58 | cx["c"] = out_units 59 | cx["flops"] += in_units * out_units + (out_units if bias else 0) 60 | cx["params"] += in_units * out_units + (out_units if bias else 0) 61 | cx["acts"] += out_units 62 | if trainable is False: 63 | cx["freeze"] += in_units * out_units + (out_units if bias else 0) 64 | return cx 65 | -------------------------------------------------------------------------------- /pytorch_classification/swin_transformer/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/swin_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import swin_tiny_patch4_window7_224 as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | img_size = 224 16 | data_transform = transforms.Compose( 17 | [transforms.Resize(int(img_size * 1.14)), 18 | transforms.CenterCrop(img_size), 19 | transforms.ToTensor(), 20 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 21 | 22 | # load image 23 | img_path = "../tulip.jpg" 24 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 25 | img = Image.open(img_path) 26 | plt.imshow(img) 27 | # [N, C, H, W] 28 | img = data_transform(img) 29 | # expand batch dimension 30 | img = torch.unsqueeze(img, dim=0) 31 | 32 | # read class_indict 33 | json_path = './class_indices.json' 34 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 35 | 36 | json_file = open(json_path, "r") 37 | class_indict = json.load(json_file) 38 | 39 | # create model 40 | model = create_model(num_classes=5).to(device) 41 | # load model weights 42 | model_weight_path = "./weights/model-9.pth" 43 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 44 | model.eval() 45 | with torch.no_grad(): 46 | # predict class 47 | output = torch.squeeze(model(img.to(device))).cpu() 48 | predict = torch.softmax(output, dim=0) 49 | predict_cla = torch.argmax(predict).numpy() 50 | 51 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 52 | predict[predict_cla].numpy()) 53 | plt.title(print_res) 54 | for i in range(len(predict)): 55 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 56 | predict[i].numpy())) 57 | plt.show() 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/my_dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from PIL import Image 3 | import torch 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class MyDataSet(Dataset): 8 | """自定义数据集""" 9 | 10 | def __init__(self, images_path: list, images_class: list, transform=None): 11 | self.images_path = images_path 12 | self.images_class = images_class 13 | self.transform = transform 14 | 15 | delete_img = [] 16 | for index, img_path in tqdm(enumerate(images_path)): 17 | img = Image.open(img_path) 18 | w, h = img.size 19 | ratio = w / h 20 | if ratio > 10 or ratio < 0.1: 21 | delete_img.append(index) 22 | # print(img_path, ratio) 23 | 24 | for index in delete_img[::-1]: 25 | self.images_path.pop(index) 26 | self.images_class.pop(index) 27 | 28 | def __len__(self): 29 | return len(self.images_path) 30 | 31 | def __getitem__(self, item): 32 | img = Image.open(self.images_path[item]) 33 | # RGB为彩色图片,L为灰度图片 34 | if img.mode != 'RGB': 35 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 36 | label = self.images_class[item] 37 | 38 | if self.transform is not None: 39 | img = self.transform(img) 40 | 41 | return img, label 42 | 43 | @staticmethod 44 | def collate_fn(batch): 45 | # 官方实现的default_collate可以参考 46 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 47 | images, labels = tuple(zip(*batch)) 48 | 49 | images = torch.stack(images, dim=0) 50 | labels = torch.as_tensor(labels) 51 | return images, labels 52 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/requirements.txt: -------------------------------------------------------------------------------- 1 | torchvision==0.7.0 2 | tqdm==4.42.1 3 | matplotlib==3.2.1 4 | torch==1.6.0 5 | Pillow 6 | tensorboard 7 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/train_eval_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from tqdm import tqdm 4 | import torch 5 | 6 | 7 | def train_one_epoch(model, optimizer, data_loader, device, epoch): 8 | model.train() 9 | loss_function = torch.nn.CrossEntropyLoss() 10 | mean_loss = torch.zeros(1).to(device) 11 | optimizer.zero_grad() 12 | 13 | data_loader = tqdm(data_loader) 14 | for step, data in enumerate(data_loader): 15 | images, labels = data 16 | pred = model(images.to(device)) 17 | 18 | loss = loss_function(pred, labels.to(device)) 19 | loss.backward() 20 | mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses 21 | 22 | # 打印平均loss 23 | data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) 24 | 25 | if not torch.isfinite(loss): 26 | print('WARNING: non-finite loss, ending training ', loss) 27 | sys.exit(1) 28 | 29 | optimizer.step() 30 | optimizer.zero_grad() 31 | 32 | return mean_loss.item() 33 | 34 | 35 | @torch.no_grad() 36 | def evaluate(model, data_loader, device): 37 | model.eval() 38 | 39 | # 用于存储预测正确的样本个数 40 | sum_num = torch.zeros(1).to(device) 41 | # 统计验证集样本总数目 42 | num_samples = len(data_loader.dataset) 43 | 44 | # 打印验证进度 45 | data_loader = tqdm(data_loader, desc="validation...") 46 | 47 | for step, data in enumerate(data_loader): 48 | images, labels = data 49 | pred = model(images.to(device)) 50 | pred = torch.max(pred, dim=1)[1] 51 | sum_num += torch.eq(pred, labels.to(device)).sum() 52 | 53 | # 计算预测正确的比例 54 | acc = sum_num.item() / num_samples 55 | 56 | return acc 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/README.md: -------------------------------------------------------------------------------- 1 | ## 多GPU启动指令 2 | - 如果要使用```train_multi_gpu_using_launch.py```脚本,使用以下指令启动 3 | - ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py``` 4 | - 其中```nproc_per_node```为并行GPU的数量 5 | - 如果要指定使用某几块GPU可使用如下指令,例如使用第1块和第4块GPU进行训练: 6 | - ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py``` 7 | 8 | ----- 9 | 10 | - 如果要使用```train_multi_gpu_using_spawn.py```脚本,使用以下指令启动 11 | - ```python train_multi_gpu_using_spawn.py``` 12 | 13 | ## 训练时间对比 14 | ![training time](training_time.png) 15 | 16 | ## 是否使用SyncBatchNorm 17 | ![syncbn](syncbn.png) 18 | 19 | ## 单GPU与多GPU训练曲线 20 | ![accuracy](accuracy.png) 21 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/accuracy.png -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.distributed as dist 5 | 6 | 7 | def init_distributed_mode(args): 8 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 9 | args.rank = int(os.environ["RANK"]) 10 | args.world_size = int(os.environ['WORLD_SIZE']) 11 | args.gpu = int(os.environ['LOCAL_RANK']) 12 | elif 'SLURM_PROCID' in os.environ: 13 | args.rank = int(os.environ['SLURM_PROCID']) 14 | args.gpu = args.rank % torch.cuda.device_count() 15 | else: 16 | print('Not using distributed mode') 17 | args.distributed = False 18 | return 19 | 20 | args.distributed = True 21 | 22 | torch.cuda.set_device(args.gpu) 23 | args.dist_backend = 'nccl' # 通信后端,nvidia GPU推荐使用NCCL 24 | print('| distributed init (rank {}): {}'.format( 25 | args.rank, args.dist_url), flush=True) 26 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 27 | world_size=args.world_size, rank=args.rank) 28 | dist.barrier() 29 | 30 | 31 | def cleanup(): 32 | dist.destroy_process_group() 33 | 34 | 35 | def is_dist_avail_and_initialized(): 36 | """检查是否支持分布式环境""" 37 | if not dist.is_available(): 38 | return False 39 | if not dist.is_initialized(): 40 | return False 41 | return True 42 | 43 | 44 | def get_world_size(): 45 | if not is_dist_avail_and_initialized(): 46 | return 1 47 | return dist.get_world_size() 48 | 49 | 50 | def get_rank(): 51 | if not is_dist_avail_and_initialized(): 52 | return 0 53 | return dist.get_rank() 54 | 55 | 56 | def is_main_process(): 57 | return get_rank() == 0 58 | 59 | 60 | def reduce_value(value, average=True): 61 | world_size = get_world_size() 62 | if world_size < 2: # 单GPU的情况 63 | return value 64 | 65 | with torch.no_grad(): 66 | dist.all_reduce(value) 67 | if average: 68 | value /= world_size 69 | 70 | return value 71 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from tqdm import tqdm 4 | import torch 5 | 6 | from multi_train_utils.distributed_utils import reduce_value, is_main_process 7 | 8 | 9 | def train_one_epoch(model, optimizer, data_loader, device, epoch): 10 | model.train() 11 | loss_function = torch.nn.CrossEntropyLoss() 12 | mean_loss = torch.zeros(1).to(device) 13 | optimizer.zero_grad() 14 | 15 | # 在进程0中打印训练进度 16 | if is_main_process(): 17 | data_loader = tqdm(data_loader) 18 | 19 | for step, data in enumerate(data_loader): 20 | images, labels = data 21 | 22 | pred = model(images.to(device)) 23 | 24 | loss = loss_function(pred, labels.to(device)) 25 | loss.backward() 26 | loss = reduce_value(loss, average=True) 27 | mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses 28 | 29 | # 在进程0中打印平均loss 30 | if is_main_process(): 31 | data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) 32 | 33 | if not torch.isfinite(loss): 34 | print('WARNING: non-finite loss, ending training ', loss) 35 | sys.exit(1) 36 | 37 | optimizer.step() 38 | optimizer.zero_grad() 39 | 40 | # 等待所有进程计算完毕 41 | if device != torch.device("cpu"): 42 | torch.cuda.synchronize(device) 43 | 44 | return mean_loss.item() 45 | 46 | 47 | @torch.no_grad() 48 | def evaluate(model, data_loader, device): 49 | model.eval() 50 | 51 | # 用于存储预测正确的样本个数 52 | sum_num = torch.zeros(1).to(device) 53 | 54 | # 在进程0中打印验证进度 55 | if is_main_process(): 56 | data_loader = tqdm(data_loader) 57 | 58 | for step, data in enumerate(data_loader): 59 | images, labels = data 60 | pred = model(images.to(device)) 61 | pred = torch.max(pred, dim=1)[1] 62 | sum_num += torch.eq(pred, labels.to(device)).sum() 63 | 64 | # 等待所有进程计算完毕 65 | if device != torch.device("cpu"): 66 | torch.cuda.synchronize(device) 67 | 68 | sum_num = reduce_value(sum_num, average=False) 69 | 70 | return sum_num.item() 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.2.1 2 | tqdm==4.42.1 3 | torchvision==0.7.0 4 | torch==1.6.0 5 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0 -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/syncbn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/syncbn.png -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/training_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/training_time.png -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from fvcore.nn import FlopCountAnalysis 3 | 4 | from vit_model import Attention 5 | 6 | 7 | def main(): 8 | # Self-Attention 9 | a1 = Attention(dim=512, num_heads=1) 10 | a1.proj = torch.nn.Identity() # remove Wo 11 | 12 | # Multi-Head Attention 13 | a2 = Attention(dim=512, num_heads=8) 14 | 15 | # [batch_size, num_tokens, total_embed_dim] 16 | t = (torch.rand(32, 1024, 512),) 17 | 18 | flops1 = FlopCountAnalysis(a1, t) 19 | print("Self-Attention FLOPs:", flops1.total()) 20 | 21 | flops2 = FlopCountAnalysis(a2, t) 22 | print("Multi-Head Attention FLOPs:", flops2.total()) 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | 28 | -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from vit_model import vit_base_patch16_224_in21k as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | json_file = open(json_path, "r") 36 | class_indict = json.load(json_file) 37 | 38 | # create model 39 | model = create_model(num_classes=5, has_logits=False).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-9.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet50_fpn_model import resnet50_fpn_backbone 2 | from .mobilenetv2_model import MobileNetV2 3 | from .vgg_model import vgg 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/fasterRCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/faster_rcnn/fasterRCNN.png -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor 2 | from .rpn_function import AnchorsGenerator 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/record_mAP.txt: -------------------------------------------------------------------------------- 1 | COCO results: 2 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.526 3 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.804 4 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.586 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211 6 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580 8 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.454 9 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.639 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.646 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693 14 | 15 | mAP(IoU=0.5) for each category: 16 | aeroplane : 0.8759546352558178 17 | bicycle : 0.8554609242543677 18 | bird : 0.8434943725365999 19 | boat : 0.6753024837855667 20 | bottle : 0.7185899054232459 21 | bus : 0.8691082170432654 22 | car : 0.8771002682431779 23 | cat : 0.9169138943375639 24 | chair : 0.6403466317122392 25 | cow : 0.8285552434280278 26 | diningtable : 0.6437938565684241 27 | dog : 0.8745793980119227 28 | horse : 0.8718238708874728 29 | motorbike : 0.8910672301923952 30 | person : 0.9047338725598096 31 | pottedplant : 0.5808810399193133 32 | sheep : 0.86045368568359 33 | sofa : 0.7239390963388067 34 | train : 0.8652277764020805 35 | tvmonitor : 0.7683550206571649 -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy==1.17.0 4 | tqdm==4.42.1 5 | torch==1.6.0 6 | torchvision==0.7.0 7 | pycocotools 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/split_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | 5 | def main(): 6 | random.seed(0) # 设置随机种子,保证随机结果可复现 7 | 8 | files_path = "./VOCdevkit/VOC2012/Annotations" 9 | assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path) 10 | 11 | val_rate = 0.5 12 | 13 | files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)]) 14 | files_num = len(files_name) 15 | val_index = random.sample(range(0, files_num), k=int(files_num*val_rate)) 16 | train_files = [] 17 | val_files = [] 18 | for index, file_name in enumerate(files_name): 19 | if index in val_index: 20 | val_files.append(file_name) 21 | else: 22 | train_files.append(file_name) 23 | 24 | try: 25 | train_f = open("train.txt", "x") 26 | eval_f = open("val.txt", "x") 27 | train_f.write("\n".join(train_files)) 28 | eval_f.write("\n".join(val_files)) 29 | except FileExistsError as e: 30 | print(e) 31 | exit(1) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_utils import get_coco_api_from_dataset 4 | from .coco_eval import CocoEvaluator 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data 4 | from pycocotools.coco import COCO 5 | 6 | 7 | def convert_to_coco_api(ds): 8 | coco_ds = COCO() 9 | # annotation IDs need to start at 1, not 0 10 | ann_id = 1 11 | dataset = {'images': [], 'categories': [], 'annotations': []} 12 | categories = set() 13 | for img_idx in range(len(ds)): 14 | # find better way to get target 15 | hw, targets = ds.coco_index(img_idx) 16 | image_id = targets["image_id"].item() 17 | img_dict = {} 18 | img_dict['id'] = image_id 19 | img_dict['height'] = hw[0] 20 | img_dict['width'] = hw[1] 21 | dataset['images'].append(img_dict) 22 | bboxes = targets["boxes"] 23 | bboxes[:, 2:] -= bboxes[:, :2] 24 | bboxes = bboxes.tolist() 25 | labels = targets['labels'].tolist() 26 | areas = targets['area'].tolist() 27 | iscrowd = targets['iscrowd'].tolist() 28 | num_objs = len(bboxes) 29 | for i in range(num_objs): 30 | ann = {} 31 | ann['image_id'] = image_id 32 | ann['bbox'] = bboxes[i] 33 | ann['category_id'] = labels[i] 34 | categories.add(labels[i]) 35 | ann['area'] = areas[i] 36 | ann['iscrowd'] = iscrowd[i] 37 | ann['id'] = ann_id 38 | dataset['annotations'].append(ann) 39 | ann_id += 1 40 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 41 | coco_ds.dataset = dataset 42 | coco_ds.createIndex() 43 | return coco_ds 44 | 45 | 46 | def get_coco_api_from_dataset(dataset): 47 | for _ in range(10): 48 | if isinstance(dataset, torchvision.datasets.CocoDetection): 49 | break 50 | if isinstance(dataset, torch.utils.data.Subset): 51 | dataset = dataset.dataset 52 | if isinstance(dataset, torchvision.datasets.CocoDetection): 53 | return dataset.coco 54 | return convert_to_coco_api(dataset) 55 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool 2 | from .resnet50_fpn_model import resnet50_fpn_backbone 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .retinanet import RetinaNet 2 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def sigmoid_focal_loss( 6 | inputs: torch.Tensor, 7 | targets: torch.Tensor, 8 | alpha: float = 0.25, 9 | gamma: float = 2, 10 | reduction: str = "none", 11 | ): 12 | """ 13 | Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py . 14 | Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. 15 | 16 | Args: 17 | inputs: A float tensor of arbitrary shape. 18 | The predictions for each example. 19 | targets: A float tensor with the same shape as inputs. Stores the binary 20 | classification label for each element in inputs 21 | (0 for the negative class and 1 for the positive class). 22 | alpha: (optional) Weighting factor in range (0,1) to balance 23 | positive vs negative examples or -1 for ignore. Default = 0.25 24 | gamma: Exponent of the modulating factor (1 - p_t) to 25 | balance easy vs hard examples. 26 | reduction: 'none' | 'mean' | 'sum' 27 | 'none': No reduction will be applied to the output. 28 | 'mean': The output will be averaged. 29 | 'sum': The output will be summed. 30 | Returns: 31 | Loss tensor with the reduction option applied. 32 | """ 33 | p = torch.sigmoid(inputs) 34 | ce_loss = F.binary_cross_entropy_with_logits( 35 | inputs, targets, reduction="none" 36 | ) 37 | p_t = p * targets + (1 - p) * (1 - targets) 38 | loss = ce_loss * ((1 - p_t) ** gamma) 39 | 40 | if alpha >= 0: 41 | alpha_t = alpha * targets + (1 - alpha) * (1 - targets) 42 | loss = alpha_t * loss 43 | 44 | if reduction == "mean": 45 | loss = loss.mean() 46 | elif reduction == "sum": 47 | loss = loss.sum() 48 | 49 | return loss 50 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 0, 3 | "bicycle": 1, 4 | "bird": 2, 5 | "boat": 3, 6 | "bottle": 4, 7 | "bus": 5, 8 | "car": 6, 9 | "cat": 7, 10 | "chair": 8, 11 | "cow": 9, 12 | "diningtable": 10, 13 | "dog": 11, 14 | "horse": 12, 15 | "motorbike": 13, 16 | "person": 14, 17 | "pottedplant": 15, 18 | "sheep": 16, 19 | "sofa": 17, 20 | "train": 18, 21 | "tvmonitor": 19 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy==1.17.0 4 | tqdm==4.42.1 5 | torch==1.7.1 6 | torchvision==0.8.2 7 | pycocotools 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/results20210421-142632.txt: -------------------------------------------------------------------------------- 1 | epoch:0 0.4012 0.6088 0.4334 0.1691 0.3113 0.4498 0.4265 0.6233 0.6478 0.3362 0.5541 0.6977 1.0681 0.01 2 | epoch:1 0.5028 0.7295 0.5441 0.2219 0.3913 0.5552 0.4624 0.6649 0.6875 0.4039 0.5928 0.7346 0.5422 0.01 3 | epoch:2 0.5311 0.7614 0.5784 0.2439 0.4189 0.5852 0.4733 0.6774 0.698 0.417 0.6105 0.7441 0.4456 0.01 4 | epoch:3 0.5439 0.7762 0.595 0.2412 0.4292 0.5996 0.4773 0.6835 0.7021 0.4137 0.6074 0.7494 0.3872 0.01 5 | epoch:4 0.5404 0.7739 0.5949 0.2457 0.426 0.5968 0.4723 0.6818 0.7007 0.4363 0.6047 0.7479 0.347 0.01 6 | epoch:5 0.5513 0.7867 0.6021 0.2415 0.4265 0.6087 0.4811 0.685 0.7041 0.4073 0.6088 0.7526 0.3166 0.01 7 | epoch:6 0.5508 0.7909 0.6014 0.2327 0.4211 0.6116 0.478 0.6803 0.699 0.4081 0.5994 0.7485 0.2884 0.01 8 | epoch:7 0.5617 0.7972 0.6142 0.2431 0.427 0.6223 0.4848 0.6862 0.7049 0.4184 0.6018 0.7551 0.2546 0.001 9 | epoch:8 0.561 0.7986 0.6117 0.2342 0.4268 0.6223 0.4842 0.6855 0.705 0.4153 0.6051 0.7551 0.2462 0.001 10 | epoch:9 0.563 0.7983 0.6153 0.2359 0.4336 0.6237 0.4849 0.6884 0.7068 0.4103 0.6063 0.7574 0.2428 0.001 11 | epoch:10 0.563 0.7991 0.6167 0.2363 0.4334 0.6234 0.4854 0.6879 0.7062 0.4152 0.6063 0.7558 0.2391 0.001 12 | epoch:11 0.5637 0.7984 0.6145 0.2341 0.4345 0.6241 0.4842 0.6894 0.7083 0.4136 0.6074 0.7581 0.2355 0.001 13 | epoch:12 0.5624 0.7969 0.6155 0.2373 0.4292 0.623 0.4853 0.6866 0.7055 0.4136 0.6026 0.756 0.2323 0.0001 14 | epoch:13 0.5632 0.7985 0.6155 0.2358 0.4342 0.6243 0.4858 0.6878 0.7065 0.4206 0.6039 0.7576 0.2307 0.0001 15 | epoch:14 0.562 0.7977 0.6155 0.2309 0.4291 0.6234 0.4849 0.6869 0.7051 0.4198 0.6023 0.7558 0.2305 0.0001 16 | epoch:15 0.5631 0.7984 0.6155 0.2324 0.4326 0.6238 0.4849 0.6876 0.706 0.4151 0.6039 0.7565 0.2313 0.0001 17 | epoch:16 0.5632 0.7992 0.6164 0.2349 0.429 0.6245 0.4859 0.6871 0.7063 0.4186 0.604 0.7569 0.2302 0.0001 18 | epoch:17 0.5637 0.7994 0.6164 0.2325 0.4312 0.6245 0.4854 0.6873 0.706 0.4109 0.6023 0.7567 0.2312 0.0001 19 | epoch:18 0.5626 0.7984 0.6132 0.2333 0.431 0.6238 0.4854 0.6873 0.7056 0.4158 0.6025 0.7564 0.2298 0.0001 20 | epoch:19 0.5613 0.7981 0.612 0.2365 0.4278 0.622 0.4855 0.6867 0.7047 0.4112 0.6 0.7554 0.2305 0.0001 21 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_utils import get_coco_api_from_dataset 4 | from .coco_eval import CocoEvaluator 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data 4 | from pycocotools.coco import COCO 5 | 6 | 7 | def convert_to_coco_api(ds): 8 | coco_ds = COCO() 9 | # annotation IDs need to start at 1, not 0 10 | ann_id = 1 11 | dataset = {'images': [], 'categories': [], 'annotations': []} 12 | categories = set() 13 | for img_idx in range(len(ds)): 14 | # find better way to get target 15 | hw, targets = ds.coco_index(img_idx) 16 | image_id = targets["image_id"].item() 17 | img_dict = {} 18 | img_dict['id'] = image_id 19 | img_dict['height'] = hw[0] 20 | img_dict['width'] = hw[1] 21 | dataset['images'].append(img_dict) 22 | bboxes = targets["boxes"] 23 | bboxes[:, 2:] -= bboxes[:, :2] 24 | bboxes = bboxes.tolist() 25 | labels = targets['labels'].tolist() 26 | areas = targets['area'].tolist() 27 | iscrowd = targets['iscrowd'].tolist() 28 | num_objs = len(bboxes) 29 | for i in range(num_objs): 30 | ann = {} 31 | ann['image_id'] = image_id 32 | ann['bbox'] = bboxes[i] 33 | ann['category_id'] = labels[i] 34 | categories.add(labels[i]) 35 | ann['area'] = areas[i] 36 | ann['iscrowd'] = iscrowd[i] 37 | ann['id'] = ann_id 38 | dataset['annotations'].append(ann) 39 | ann_id += 1 40 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 41 | coco_ds.dataset = dataset 42 | coco_ds.createIndex() 43 | return coco_ds 44 | 45 | 46 | def get_coco_api_from_dataset(dataset): 47 | for _ in range(10): 48 | if isinstance(dataset, torchvision.datasets.CocoDetection): 49 | break 50 | if isinstance(dataset, torch.utils.data.Subset): 51 | dataset = dataset.dataset 52 | if isinstance(dataset, torchvision.datasets.CocoDetection): 53 | return dataset.coco 54 | return convert_to_coco_api(dataset) 55 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/README.md: -------------------------------------------------------------------------------- 1 | # SSD: Single Shot MultiBox Detector 2 | 3 | ## 环境配置: 4 | * Python 3.6/3.7/3.8 5 | * Pytorch 1.6 6 | * pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs)) 7 | * Ubuntu或Centos(不建议Windows) 8 | * 最好使用GPU训练 9 | 10 | ## 文件结构: 11 | ``` 12 | ├── src: 实现SSD模型的相关模块 13 | │ ├── resnet50_backbone.py 使用resnet50网络作为SSD的backbone 14 | │ ├── ssd_model.py SSD网络结构文件 15 | │ └── utils.py 训练过程中使用到的一些功能实现 16 | ├── train_utils: 训练验证相关模块(包括cocotools) 17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集 18 | ├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练 19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 20 | ├── predict_test.py: 简易的预测脚本,使用训练好的权重进行预测测试 21 | ├── pascal_voc_classes.json: pascal_voc标签文件 22 | ├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP 23 | └── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 24 | ``` 25 | 26 | ## 预训练权重下载地址(下载后放入src文件夹中): 27 | * ResNet50+SSD: https://ngc.nvidia.com/catalog/models 28 | `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件` 29 | * 如果找不到可通过百度网盘下载,链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj 30 | 31 | ## 数据集,本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中) 32 | * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 33 | * Pascal VOC2007 test数据集请参考:http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 34 | * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK 35 | 36 | ## 训练方法 37 | * 确保提前准备好数据集 38 | * 确保提前下载好对应预训练模型权重 39 | * 单GPU训练或CPU,直接使用train_ssd300.py训练脚本 40 | * 若要使用多GPU训练,使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量 41 | 42 | ## 如果对SSD算法原理不是很理解可参考我的bilibili 43 | * https://www.bilibili.com/video/BV1fT4y1L7Gi 44 | 45 | ## 进一步了解该项目,以及对SSD算法代码的分析可参考我的bilibili 46 | * https://www.bilibili.com/video/BV1vK411H771/ 47 | 48 | ## Resnet50 + SSD算法框架图 49 | ![Resnet50 SSD](res50_ssd.png) 50 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("epoch") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/record_mAP.txt: -------------------------------------------------------------------------------- 1 | COCO results: 2 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.448 3 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.721 4 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.482 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099 6 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521 8 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.418 9 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.565 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.573 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641 14 | 15 | mAP(IoU=0.5) for each category: 16 | aeroplane : 0.8532360243584314 17 | bicycle : 0.7496603797780927 18 | bird : 0.7658478672087958 19 | boat : 0.6079142920471263 20 | bottle : 0.4986565020053691 21 | bus : 0.8229568428349553 22 | car : 0.7940868387465018 23 | cat : 0.8800145761338203 24 | chair : 0.5090524550010037 25 | cow : 0.7344958411899583 26 | diningtable : 0.5379541883401677 27 | dog : 0.8230037525430133 28 | horse : 0.7880475852689804 29 | motorbike : 0.7879788462924051 30 | person : 0.8351553291238482 31 | pottedplant : 0.4420858247895347 32 | sheep : 0.7466344247593008 33 | sofa : 0.6627392793997164 34 | train : 0.8380502070312741 35 | tvmonitor : 0.7445168617489237 -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.0 2 | matplotlib 3 | tqdm==4.42.1 4 | pycocotools 5 | torch==1.6.0 6 | torchvision==0.7.0 7 | lxml 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/res50_ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/ssd/res50_ssd.png -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .res50_backbone import resnet50 2 | from .ssd_model import SSD300, Backbone 3 | from .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_utils import get_coco_api_from_dataset 2 | from .coco_eval import CocoEvaluator 3 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 4 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | import torch 4 | import torchvision 5 | import torch.utils.data 6 | from pycocotools.coco import COCO 7 | 8 | 9 | def convert_to_coco_api(ds): 10 | coco_ds = COCO() 11 | # annotation IDs need to start at 1, not 0 12 | ann_id = 1 13 | dataset = {'images': [], 'categories': [], 'annotations': []} 14 | categories = set() 15 | for img_idx in range(len(ds)): 16 | # find better way to get target 17 | targets = ds.coco_index(img_idx) 18 | image_id = targets["image_id"].item() 19 | img_dict = {} 20 | img_dict['id'] = image_id 21 | # img_dict['height'] = img.shape[-2] 22 | # img_dict['width'] = img.shape[-1] 23 | img_dict['height'] = targets["height_width"][0] 24 | img_dict['width'] = targets["height_width"][1] 25 | dataset['images'].append(img_dict) 26 | 27 | # xmin, ymin, xmax, ymax 28 | bboxes = targets["boxes"] 29 | 30 | # (xmin, ymin, xmax, ymax) to (xmin, ymin, w, h) 31 | bboxes[:, 2:] -= bboxes[:, :2] 32 | # 将box的相对坐标信息(0-1)转为绝对值坐标 33 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_dict["width"] 34 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_dict["height"] 35 | bboxes = bboxes.tolist() 36 | labels = targets['labels'].tolist() 37 | # 注意这里的boxes area也要进行转换,否则导致(small, medium, large)计算错误 38 | areas = (targets['area'] * img_dict["width"] * img_dict["height"]).tolist() 39 | iscrowd = targets['iscrowd'].tolist() 40 | num_objs = len(bboxes) 41 | for i in range(num_objs): 42 | ann = {} 43 | ann['image_id'] = image_id 44 | ann['bbox'] = bboxes[i] 45 | ann['category_id'] = labels[i] 46 | categories.add(labels[i]) 47 | ann['area'] = areas[i] 48 | ann['iscrowd'] = iscrowd[i] 49 | ann['id'] = ann_id 50 | dataset['annotations'].append(ann) 51 | ann_id += 1 52 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 53 | coco_ds.dataset = dataset 54 | coco_ds.createIndex() 55 | return coco_ds 56 | 57 | 58 | def get_coco_api_from_dataset(dataset): 59 | for _ in range(10): 60 | if isinstance(dataset, torchvision.datasets.CocoDetection): 61 | break 62 | if isinstance(dataset, torch.utils.data.Subset): 63 | dataset = dataset.dataset 64 | if isinstance(dataset, torchvision.datasets.CocoDetection): 65 | return dataset.coco 66 | return convert_to_coco_api(dataset) 67 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet50_fpn_model import resnet50_fpn_backbone 2 | from .mobilenetv2_model import MobileNetV2 3 | from .vgg_model import vgg 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/coco80_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": "person", 3 | "2": "bicycle", 4 | "3": "car", 5 | "4": "motorcycle", 6 | "5": "airplane", 7 | "6": "bus", 8 | "7": "train", 9 | "8": "truck", 10 | "9": "boat", 11 | "10": "traffic light", 12 | "11": "fire hydrant", 13 | "12": "stop sign", 14 | "13": "parking meter", 15 | "14": "bench", 16 | "15": "bird", 17 | "16": "cat", 18 | "17": "dog", 19 | "18": "horse", 20 | "19": "sheep", 21 | "20": "cow", 22 | "21": "elephant", 23 | "22": "bear", 24 | "23": "zebra", 25 | "24": "giraffe", 26 | "25": "backpack", 27 | "26": "umbrella", 28 | "27": "handbag", 29 | "28": "tie", 30 | "29": "suitcase", 31 | "30": "frisbee", 32 | "31": "skis", 33 | "32": "snowboard", 34 | "33": "sports ball", 35 | "34": "kite", 36 | "35": "baseball bat", 37 | "36": "baseball glove", 38 | "37": "skateboard", 39 | "38": "surfboard", 40 | "39": "tennis racket", 41 | "40": "bottle", 42 | "41": "wine glass", 43 | "42": "cup", 44 | "43": "fork", 45 | "44": "knife", 46 | "45": "spoon", 47 | "46": "bowl", 48 | "47": "banana", 49 | "48": "apple", 50 | "49": "sandwich", 51 | "50": "orange", 52 | "51": "broccoli", 53 | "52": "carrot", 54 | "53": "hot dog", 55 | "54": "pizza", 56 | "55": "donut", 57 | "56": "cake", 58 | "57": "chair", 59 | "58": "couch", 60 | "59": "potted plant", 61 | "60": "bed", 62 | "61": "dining table", 63 | "62": "toilet", 64 | "63": "tv", 65 | "64": "laptop", 66 | "65": "mouse", 67 | "66": "remote", 68 | "67": "keyboard", 69 | "68": "cell phone", 70 | "69": "microwave", 71 | "70": "oven", 72 | "71": "toaster", 73 | "72": "sink", 74 | "73": "refrigerator", 75 | "74": "book", 76 | "75": "clock", 77 | "76": "vase", 78 | "77": "scissors", 79 | "78": "teddy bear", 80 | "79": "hair drier", 81 | "80": "toothbrush" 82 | } -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/coco91_to_80.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": 1, 3 | "2": 2, 4 | "3": 3, 5 | "4": 4, 6 | "5": 5, 7 | "6": 6, 8 | "7": 7, 9 | "8": 8, 10 | "9": 9, 11 | "10": 10, 12 | "11": 11, 13 | "13": 12, 14 | "14": 13, 15 | "15": 14, 16 | "16": 15, 17 | "17": 16, 18 | "18": 17, 19 | "19": 18, 20 | "20": 19, 21 | "21": 20, 22 | "22": 21, 23 | "23": 22, 24 | "24": 23, 25 | "25": 24, 26 | "27": 25, 27 | "28": 26, 28 | "31": 27, 29 | "32": 28, 30 | "33": 29, 31 | "34": 30, 32 | "35": 31, 33 | "36": 32, 34 | "37": 33, 35 | "38": 34, 36 | "39": 35, 37 | "40": 36, 38 | "41": 37, 39 | "42": 38, 40 | "43": 39, 41 | "44": 40, 42 | "46": 41, 43 | "47": 42, 44 | "48": 43, 45 | "49": 44, 46 | "50": 45, 47 | "51": 46, 48 | "52": 47, 49 | "53": 48, 50 | "54": 49, 51 | "55": 50, 52 | "56": 51, 53 | "57": 52, 54 | "58": 53, 55 | "59": 54, 56 | "60": 55, 57 | "61": 56, 58 | "62": 57, 59 | "63": 58, 60 | "64": 59, 61 | "65": 60, 62 | "67": 61, 63 | "70": 62, 64 | "72": 63, 65 | "73": 64, 66 | "74": 65, 67 | "75": 66, 68 | "76": 67, 69 | "77": 68, 70 | "78": 69, 71 | "79": 70, 72 | "80": 71, 73 | "81": 72, 74 | "82": 73, 75 | "84": 74, 76 | "85": 75, 77 | "86": 76, 78 | "87": 77, 79 | "88": 78, 80 | "89": 79, 81 | "90": 80 82 | } -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/compute_receptive_field.py: -------------------------------------------------------------------------------- 1 | # vgg16(D) 2 | model = [[3, 1], 3 | [3, 1], 4 | [2, 2], # maxpool 5 | [3, 1], 6 | [3, 1], 7 | [2, 2], # maxpool 8 | [3, 1], 9 | [3, 1], 10 | [3, 1], 11 | [2, 2], # maxpool 12 | [3, 1], 13 | [3, 1], 14 | [3, 1], 15 | [2, 2], # maxpool 16 | [3, 1], 17 | [3, 1], 18 | [3, 1]] 19 | 20 | field = model[-1][0] 21 | for kernel, stride in model[::-1]: 22 | field = (field - 1) * stride + kernel 23 | print(field) # 228 24 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor 2 | from .rpn_function import AnchorsGenerator 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy 4 | tqdm 5 | pycocotools 6 | Pillow 7 | torch==1.7.1 8 | torchvision==0.8.2 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/build_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/build_utils/__init__.py -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/build_utils/img_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def letterbox(img: np.ndarray, 6 | new_shape=(416, 416), 7 | color=(114, 114, 114), 8 | auto=True, 9 | scale_fill=False, 10 | scale_up=True): 11 | """ 12 | 将图片缩放调整到指定大小 13 | :param img: 输入的图像numpy格式 14 | :param new_shape: 输入网络的shape 15 | :param color: padding用什么颜色填充 16 | :param auto: 17 | :param scale_fill: 简单粗暴缩放到指定大小 18 | :param scale_up: 只缩小,不放大 19 | :return: 20 | """ 21 | 22 | shape = img.shape[:2] # [h, w] 23 | if isinstance(new_shape, int): 24 | new_shape = (new_shape, new_shape) 25 | 26 | # scale ratio (new / old) 27 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 28 | if not scale_up: # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变 29 | r = min(r, 1.0) 30 | 31 | # compute padding 32 | ratio = r, r # width, height ratios 33 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 34 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 35 | if auto: # minimun rectangle 保证原图比例不变,将图像最大边缩放到指定大小 36 | # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416),如果是(512x512)可以保证是64的整数倍 37 | dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding 38 | elif scale_fill: # stretch 简单粗暴的将图片缩放到指定尺寸 39 | dw, dh = 0, 0 40 | new_unpad = new_shape 41 | ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # wh ratios 42 | 43 | dw /= 2 # divide padding into 2 sides 将padding分到上下,左右两侧 44 | dh /= 2 45 | 46 | # shape:[h, w] new_unpad:[w, h] 47 | if shape[::-1] != new_unpad: 48 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 49 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) # 计算上下两侧的padding 50 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 计算左右两侧的padding 51 | 52 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 53 | return img, ratio, (dw, dh) 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/cfg/hyp.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | 3 | giou: 3.54 # giou loss gain 4 | cls: 37.4 # cls loss gain 5 | cls_pw: 1.0 # cls BCELoss positive_weight 6 | obj: 64.3 # obj loss gain (*=img_size/320 if img_size != 320) 7 | obj_pw: 1.0 # obj BCELoss positive_weight 8 | iou_t: 0.20 # iou training threshold 9 | lr0: 0.001 # initial learning rate (SGD=5E-3 Adam=5E-4) 10 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 11 | momentum: 0.937 # SGD momentum 12 | weight_decay: 0.0005 # optimizer weight decay 13 | fl_gamma: 0.0 # focal loss gamma (efficientDet default is gamma=1.5) 14 | hsv_h: 0.0138 # image HSV-Hue augmentation (fraction) 15 | hsv_s: 0.678 # image HSV-Saturation augmentation (fraction) 16 | hsv_v: 0.36 # image HSV-Value augmentation (fraction) 17 | degrees: 0. # image rotation (+/- deg) 18 | translate: 0. # image translation (+/- fraction) 19 | scale: 0. # image scale (+/- gain) 20 | shear: 0. # image shear (+/- deg) -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/data/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.0 2 | torchvision==0.7.0 3 | opencv_python==4.3.0.36 4 | lxml 5 | torch==1.6.0 6 | scipy 7 | pycocotools 8 | matplotlib 9 | tqdm==4.42.1 10 | tensorboard==2.1.0 11 | PyYAML 12 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0 -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_utils import get_coco_api_from_dataset 2 | from .coco_eval import CocoEvaluator 3 | from .distributed_utils import init_distributed_mode, torch_distributed_zero_first 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | import torch 4 | import torchvision 5 | import torch.utils.data 6 | from pycocotools.coco import COCO 7 | 8 | 9 | def convert_to_coco_api(ds): 10 | coco_ds = COCO() 11 | # annotation IDs need to start at 1, not 0 12 | ann_id = 1 13 | dataset = {'images': [], 'categories': [], 'annotations': []} 14 | categories = set() 15 | # 遍历dataset中的每张图像 16 | for img_idx in tqdm(range(len(ds)), desc="loading eval info for coco tools."): 17 | # find better way to get target 18 | targets, shapes = ds.coco_index(img_idx) 19 | # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h) 20 | img_dict = {} 21 | img_dict['id'] = img_idx 22 | img_dict['height'] = shapes[0] 23 | img_dict['width'] = shapes[1] 24 | dataset['images'].append(img_dict) 25 | 26 | for obj in targets: 27 | ann = {} 28 | ann["image_id"] = img_idx 29 | # 将相对坐标转为绝对坐标 30 | # box (x, y, w, h) 31 | boxes = obj[1:] 32 | # (x, y, w, h) to (xmin, ymin, w, h) 33 | boxes[:2] -= 0.5*boxes[2:] 34 | boxes[[0, 2]] *= img_dict["width"] 35 | boxes[[1, 3]] *= img_dict["height"] 36 | boxes = boxes.tolist() 37 | 38 | ann["bbox"] = boxes 39 | ann["category_id"] = int(obj[0]) 40 | categories.add(int(obj[0])) 41 | ann["area"] = boxes[2] * boxes[3] 42 | ann["iscrowd"] = 0 43 | ann["id"] = ann_id 44 | dataset["annotations"].append(ann) 45 | ann_id += 1 46 | 47 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 48 | coco_ds.dataset = dataset 49 | coco_ds.createIndex() 50 | return coco_ds 51 | 52 | 53 | def get_coco_api_from_dataset(dataset): 54 | for _ in range(10): 55 | if isinstance(dataset, torchvision.datasets.CocoDetection): 56 | break 57 | if isinstance(dataset, torch.utils.data.Subset): 58 | dataset = dataset.dataset 59 | if isinstance(dataset, torchvision.datasets.CocoDetection): 60 | return dataset.coco 61 | return convert_to_coco_api(dataset) 62 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/yolov3spp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/yolov3spp.png -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/README.md: -------------------------------------------------------------------------------- 1 | # DeepLabV3(Rethinking Atrous Convolution for Semantic Image Segmentation) 2 | 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation 5 | 6 | ## 环境配置: 7 | * Python3.6/3.7/3.8 8 | * Pytorch1.10 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练) 10 | * 最好使用GPU训练 11 | * 详细环境配置见```requirements.txt``` 12 | 13 | ## 文件结构: 14 | ``` 15 | ├── src: 模型的backbone以及DeepLabv3的搭建 16 | ├── train_utils: 训练、验证以及多GPU训练相关模块 17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集 18 | ├── train.py: 以deeplabv3_resnet50为例进行训练 19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 20 | ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 21 | ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 22 | └── pascal_voc_classes.json: pascal_voc标签文件 23 | ``` 24 | 25 | ## 预训练权重下载地址: 26 | * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) 27 | * deeplabv3_resnet50: https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth 28 | * deeplabv3_resnet101: https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth 29 | * deeplabv3_mobilenetv3_large_coco: https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth 30 | * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```deeplabv3_resnet50_coco.pth```文件, 31 | 不是```deeplabv3_resnet50_coco-cd0a2569.pth``` 32 | 33 | 34 | ## 数据集,本例程使用的是PASCAL VOC2012数据集 35 | * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 36 | * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 37 | 38 | ## 训练方法 39 | * 确保提前准备好数据集 40 | * 确保提前下载好对应预训练模型权重 41 | * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 42 | * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 43 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) 44 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` 45 | 46 | ## 注意事项 47 | * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** 48 | * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 49 | * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可,其他代码尽量不要改动 50 | 51 | ## 如果对DeepLabV3原理不是很理解可参考我的bilibili 52 | 53 | 54 | ## 进一步了解该项目,以及对DeepLabV3代码的分析可参考我的bilibili 55 | 56 | 57 | ## Pytorch官方实现的DeeplabV3网络框架图 58 | ![deeplabv3_resnet50_pytorch](./deeplabv3_resnet50.png) 59 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/my_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch.utils.data as data 4 | from PIL import Image 5 | 6 | 7 | class VOCSegmentation(data.Dataset): 8 | def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): 9 | super(VOCSegmentation, self).__init__() 10 | assert year in ["2007", "2012"], "year must be in ['2007', '2012']" 11 | root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") 12 | assert os.path.exists(root), "path '{}' does not exist.".format(root) 13 | image_dir = os.path.join(root, 'JPEGImages') 14 | mask_dir = os.path.join(root, 'SegmentationClass') 15 | 16 | txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) 17 | assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) 18 | with open(os.path.join(txt_path), "r") as f: 19 | file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] 20 | 21 | self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] 22 | self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] 23 | assert (len(self.images) == len(self.masks)) 24 | self.transforms = transforms 25 | 26 | def __getitem__(self, index): 27 | """ 28 | Args: 29 | index (int): Index 30 | 31 | Returns: 32 | tuple: (image, target) where target is the image segmentation. 33 | """ 34 | img = Image.open(self.images[index]).convert('RGB') 35 | target = Image.open(self.masks[index]) 36 | 37 | if self.transforms is not None: 38 | img, target = self.transforms(img, target) 39 | 40 | return img, target 41 | 42 | def __len__(self): 43 | return len(self.images) 44 | 45 | @staticmethod 46 | def collate_fn(batch): 47 | images, targets = list(zip(*batch)) 48 | batched_imgs = cat_list(images, fill_value=0) 49 | batched_targets = cat_list(targets, fill_value=255) 50 | return batched_imgs, batched_targets 51 | 52 | 53 | def cat_list(images, fill_value=0): 54 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) 55 | batch_shape = (len(images),) + max_size 56 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value) 57 | for img, pad_img in zip(images, batched_imgs): 58 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 59 | return batched_imgs 60 | 61 | 62 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) 63 | # d1 = dataset[0] 64 | # print(d1) 65 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.3 2 | torch==1.10.0 3 | torchvision==0.11.1 4 | Pillow==8.4.0 -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/results20211027-104607.txt: -------------------------------------------------------------------------------- 1 | [epoch: 0] 2 | train_loss: 0.7098 3 | lr: 0.000100 4 | global correct: 94.7 5 | average row correct: ['97.0', '93.9', '86.4', '93.1', '74.4', '64.0', '97.9', '84.7', '95.7', '63.8', '92.2', '68.8', '88.8', '90.5', '93.0', '95.3', '75.3', '94.2', '83.6', '91.9', '75.0'] 6 | IoU: ['93.9', '91.5', '42.6', '88.9', '65.3', '60.4', '95.9', '76.3', '90.7', '50.9', '87.3', '54.1', '86.1', '83.0', '87.2', '89.3', '64.3', '91.0', '58.9', '84.5', '73.6'] 7 | mean IoU: 76.9 8 | 9 | [epoch: 1] 10 | train_loss: 0.6005 11 | lr: 0.000077 12 | global correct: 94.7 13 | average row correct: ['96.1', '96.3', '84.1', '95.1', '84.1', '81.4', '98.6', '85.8', '96.6', '68.7', '91.9', '71.3', '93.9', '91.4', '96.4', '95.5', '81.0', '94.1', '85.4', '94.9', '83.6'] 14 | IoU: ['93.9', '92.3', '42.2', '88.8', '69.1', '71.2', '96.1', '75.1', '91.9', '48.9', '87.8', '56.0', '87.9', '85.7', '89.2', '89.5', '63.6', '90.3', '56.2', '85.3', '79.8'] 15 | mean IoU: 78.1 16 | 17 | [epoch: 2] 18 | train_loss: 0.5840 19 | lr: 0.000054 20 | global correct: 94.8 21 | average row correct: ['96.2', '95.5', '85.8', '94.6', '85.5', '83.7', '98.8', '87.5', '96.3', '71.4', '92.5', '72.8', '93.1', '91.9', '96.7', '94.9', '81.5', '95.3', '82.8', '95.3', '84.1'] 22 | IoU: ['94.0', '91.2', '42.7', '88.3', '69.2', '72.7', '96.4', '74.8', '92.0', '49.8', '87.5', '58.3', '87.3', '85.0', '89.3', '89.2', '62.6', '89.6', '58.1', '84.8', '80.3'] 23 | mean IoU: 78.2 24 | 25 | [epoch: 3] 26 | train_loss: 0.5637 27 | lr: 0.000029 28 | global correct: 94.8 29 | average row correct: ['96.1', '95.9', '81.7', '94.8', '86.5', '79.4', '99.0', '89.1', '95.8', '71.4', '93.8', '71.0', '93.4', '92.4', '97.3', '94.9', '80.4', '96.9', '83.3', '94.7', '84.4'] 30 | IoU: ['94.0', '89.5', '41.8', '87.6', '69.0', '70.4', '96.0', '75.9', '92.1', '49.7', '87.3', '58.1', '86.2', '83.9', '88.7', '89.2', '63.7', '88.8', '57.7', '85.3', '79.9'] 31 | mean IoU: 77.8 32 | 33 | [epoch: 4] 34 | train_loss: 0.5779 35 | lr: 0.000000 36 | global correct: 94.8 37 | average row correct: ['96.3', '93.6', '85.9', '95.1', '82.6', '83.8', '98.5', '90.0', '95.9', '71.1', '93.2', '68.4', '92.6', '93.9', '95.9', '94.5', '82.8', '96.3', '82.8', '94.5', '86.4'] 38 | IoU: ['94.1', '91.8', '42.5', '88.5', '67.8', '72.1', '96.6', '78.3', '92.0', '49.8', '88.3', '58.8', '86.7', '84.9', '89.0', '89.5', '61.0', '89.1', '56.6', '84.6', '80.2'] 39 | mean IoU: 78.2 40 | 41 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/README.md: -------------------------------------------------------------------------------- 1 | # FCN(Fully Convolutional Networks for Semantic Segmentation) 2 | 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation 5 | 6 | ## 环境配置: 7 | * Python3.6/3.7/3.8 8 | * Pytorch1.10 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练) 10 | * 最好使用GPU训练 11 | * 详细环境配置见```requirements.txt``` 12 | 13 | ## 文件结构: 14 | ``` 15 | ├── src: 模型的backbone以及FCN的搭建 16 | ├── train_utils: 训练、验证以及多GPU训练相关模块 17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集 18 | ├── train.py: 以fcn_resnet50(这里使用了Dilated/Atrous Convolution)进行训练 19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 20 | ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 21 | ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 22 | └── pascal_voc_classes.json: pascal_voc标签文件 23 | ``` 24 | 25 | ## 预训练权重下载地址: 26 | * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) 27 | * fcn_resnet50: https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth 28 | * fcn_resnet101: https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth 29 | * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```fcn_resnet50_coco.pth```文件, 30 | 不是```fcn_resnet50_coco-1167a1af.pth``` 31 | 32 | 33 | ## 数据集,本例程使用的是PASCAL VOC2012数据集 34 | * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 35 | * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 36 | 37 | ## 训练方法 38 | * 确保提前准备好数据集 39 | * 确保提前下载好对应预训练模型权重 40 | * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 41 | * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 42 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) 43 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` 44 | 45 | ## 注意事项 46 | * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** 47 | * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 48 | * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可,其他代码尽量不要改动 49 | 50 | ## 如果对FCN原理不是很理解可参考我的bilibili 51 | * https://www.bilibili.com/video/BV1J3411C7zd 52 | * https://www.bilibili.com/video/BV1ev411u7TX 53 | 54 | ## 进一步了解该项目,以及对FCN代码的分析可参考我的bilibili 55 | * https://www.bilibili.com/video/BV19q4y1971Q 56 | 57 | ## Pytorch官方实现的FCN网络框架图 58 | ![torch_fcn](torch_fcn.png) 59 | 60 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/my_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch.utils.data as data 4 | from PIL import Image 5 | 6 | 7 | class VOCSegmentation(data.Dataset): 8 | def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): 9 | super(VOCSegmentation, self).__init__() 10 | assert year in ["2007", "2012"], "year must be in ['2007', '2012']" 11 | root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") 12 | assert os.path.exists(root), "path '{}' does not exist.".format(root) 13 | image_dir = os.path.join(root, 'JPEGImages') 14 | mask_dir = os.path.join(root, 'SegmentationClass') 15 | 16 | txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) 17 | assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) 18 | with open(os.path.join(txt_path), "r") as f: 19 | file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] 20 | 21 | self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] 22 | self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] 23 | assert (len(self.images) == len(self.masks)) 24 | self.transforms = transforms 25 | 26 | def __getitem__(self, index): 27 | """ 28 | Args: 29 | index (int): Index 30 | 31 | Returns: 32 | tuple: (image, target) where target is the image segmentation. 33 | """ 34 | img = Image.open(self.images[index]).convert('RGB') 35 | target = Image.open(self.masks[index]) 36 | 37 | if self.transforms is not None: 38 | img, target = self.transforms(img, target) 39 | 40 | return img, target 41 | 42 | def __len__(self): 43 | return len(self.images) 44 | 45 | @staticmethod 46 | def collate_fn(batch): 47 | images, targets = list(zip(*batch)) 48 | batched_imgs = cat_list(images, fill_value=0) 49 | batched_targets = cat_list(targets, fill_value=255) 50 | return batched_imgs, batched_targets 51 | 52 | 53 | def cat_list(images, fill_value=0): 54 | # 计算该batch数据中,channel, h, w的最大值 55 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) 56 | batch_shape = (len(images),) + max_size 57 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value) 58 | for img, pad_img in zip(images, batched_imgs): 59 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 60 | return batched_imgs 61 | 62 | 63 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) 64 | # d1 = dataset[0] 65 | # print(d1) 66 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.3 2 | torch==1.10.0 3 | torchvision==0.11.1 4 | Pillow==8.4.0 -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_model import fcn_resnet50, fcn_resnet101 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/torch_fcn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/fcn/torch_fcn.png -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/README.md: -------------------------------------------------------------------------------- 1 | # LRASPP(Searching for MobileNetV3) 2 | 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation 5 | 6 | ## 环境配置: 7 | * Python3.6/3.7/3.8 8 | * Pytorch1.10 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练) 10 | * 最好使用GPU训练 11 | * 详细环境配置见```requirements.txt``` 12 | 13 | ## 文件结构: 14 | ``` 15 | ├── src: 模型的backbone以及LRASPP的搭建 16 | ├── train_utils: 训练、验证以及多GPU训练相关模块 17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集 18 | ├── train.py: 单GPU训练脚本 19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 20 | ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 21 | ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 22 | └── pascal_voc_classes.json: pascal_voc标签文件 23 | ``` 24 | 25 | ## 预训练权重下载地址: 26 | * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) 27 | * lraspp_mobilenet_v3_large: https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth 28 | * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```lraspp_mobilenet_v3_large.pth```文件, 29 | 不是```lraspp_mobilenet_v3_large-d234d4ea.pth``` 30 | 31 | 32 | ## 数据集,本例程使用的是PASCAL VOC2012数据集 33 | * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 34 | * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 35 | 36 | ## 训练方法 37 | * 确保提前准备好数据集 38 | * 确保提前下载好对应预训练模型权重 39 | * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 40 | * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 41 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) 42 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` 43 | 44 | ## 注意事项 45 | * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** 46 | * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 47 | * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--data-path'和'--weights'即可,其他代码尽量不要改动 48 | 49 | ## 如果对LRASPP原理不是很理解可参考我的bilibili 50 | 51 | 52 | ## 进一步了解该项目,以及对LRASPP代码的分析可参考我的bilibili 53 | 54 | 55 | ## Pytorch官方实现的LRASPP网络框架图 56 | ![lraspp](lraspp.png) -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/lraspp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/lraspp/lraspp.png -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/my_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch.utils.data as data 4 | from PIL import Image 5 | 6 | 7 | class VOCSegmentation(data.Dataset): 8 | def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): 9 | super(VOCSegmentation, self).__init__() 10 | assert year in ["2007", "2012"], "year must be in ['2007', '2012']" 11 | root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") 12 | assert os.path.exists(root), "path '{}' does not exist.".format(root) 13 | image_dir = os.path.join(root, 'JPEGImages') 14 | mask_dir = os.path.join(root, 'SegmentationClass') 15 | 16 | txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) 17 | assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) 18 | with open(os.path.join(txt_path), "r") as f: 19 | file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] 20 | 21 | self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] 22 | self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] 23 | assert (len(self.images) == len(self.masks)) 24 | self.transforms = transforms 25 | 26 | def __getitem__(self, index): 27 | """ 28 | Args: 29 | index (int): Index 30 | 31 | Returns: 32 | tuple: (image, target) where target is the image segmentation. 33 | """ 34 | img = Image.open(self.images[index]).convert('RGB') 35 | target = Image.open(self.masks[index]) 36 | 37 | if self.transforms is not None: 38 | img, target = self.transforms(img, target) 39 | 40 | return img, target 41 | 42 | def __len__(self): 43 | return len(self.images) 44 | 45 | @staticmethod 46 | def collate_fn(batch): 47 | images, targets = list(zip(*batch)) 48 | batched_imgs = cat_list(images, fill_value=0) 49 | batched_targets = cat_list(targets, fill_value=255) 50 | return batched_imgs, batched_targets 51 | 52 | 53 | def cat_list(images, fill_value=0): 54 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) 55 | batch_shape = (len(images),) + max_size 56 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value) 57 | for img, pad_img in zip(images, batched_imgs): 58 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 59 | return batched_imgs 60 | 61 | 62 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) 63 | # d1 = dataset[0] 64 | # print(d1) 65 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.3 2 | torch==1.10.0 3 | torchvision==0.11.1 4 | Pillow==8.4.0 -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/results20211028-105233.txt: -------------------------------------------------------------------------------- 1 | [epoch: 0] 2 | train_loss: 0.5343 3 | lr: 0.000100 4 | global correct: 93.1 5 | average row correct: ['96.8', '90.0', '73.9', '87.9', '79.4', '66.2', '92.1', '79.5', '90.9', '45.0', '88.9', '54.7', '85.8', '89.8', '87.5', '91.2', '66.8', '85.0', '68.4', '87.6', '71.6'] 6 | IoU: ['92.4', '85.7', '34.8', '84.3', '66.4', '59.9', '89.2', '71.2', '86.0', '34.6', '82.3', '46.1', '78.6', '82.1', '79.8', '82.5', '54.8', '79.4', '50.2', '83.8', '65.5'] 7 | mean IoU: 70.9 8 | 9 | [epoch: 1] 10 | train_loss: 0.4683 11 | lr: 0.000077 12 | global correct: 93.2 13 | average row correct: ['96.2', '92.6', '75.2', '92.3', '82.6', '70.9', '93.5', '83.9', '93.5', '47.9', '91.0', '61.9', '87.0', '90.5', '89.8', '90.0', '68.1', '86.4', '70.4', '90.4', '75.5'] 14 | IoU: ['92.5', '86.1', '34.9', '85.1', '65.3', '63.0', '90.0', '73.1', '86.0', '34.8', '83.2', '50.0', '77.6', '81.2', '79.8', '82.3', '54.3', '78.4', '49.8', '85.5', '67.3'] 15 | mean IoU: 71.4 16 | 17 | [epoch: 2] 18 | train_loss: 0.4053 19 | lr: 0.000054 20 | global correct: 93.1 21 | average row correct: ['95.9', '93.1', '75.9', '92.6', '83.8', '75.3', '94.4', '85.6', '93.7', '50.2', '91.2', '62.1', '87.1', '90.8', '90.3', '89.8', '71.2', '86.8', '71.8', '91.1', '77.5'] 22 | IoU: ['92.5', '86.0', '35.1', '84.7', '65.2', '65.6', '90.4', '73.3', '85.9', '34.8', '83.0', '50.0', '77.7', '81.7', '79.2', '82.3', '53.9', '78.5', '49.9', '85.6', '67.2'] 23 | mean IoU: 71.6 24 | 25 | [epoch: 3] 26 | train_loss: 0.4358 27 | lr: 0.000029 28 | global correct: 93.1 29 | average row correct: ['95.8', '93.4', '76.0', '92.3', '83.2', '78.1', '94.0', '86.3', '93.0', '50.9', '91.1', '62.9', '88.0', '90.9', '90.4', '89.6', '71.6', '87.0', '72.4', '92.4', '78.5'] 30 | IoU: ['92.5', '86.0', '35.3', '85.1', '66.1', '66.9', '89.8', '73.3', '85.9', '34.8', '83.0', '50.4', '78.0', '81.5', '79.0', '82.1', '54.1', '78.6', '50.0', '85.6', '67.1'] 31 | mean IoU: 71.7 32 | 33 | [epoch: 4] 34 | train_loss: 0.3886 35 | lr: 0.000000 36 | global correct: 93.1 37 | average row correct: ['95.6', '93.8', '76.0', '92.8', '83.6', '77.9', '94.2', '86.1', '93.5', '50.9', '92.0', '63.8', '88.8', '91.4', '90.6', '89.4', '73.2', '87.4', '73.0', '92.4', '78.9'] 38 | IoU: ['92.5', '86.0', '35.3', '84.4', '66.2', '66.5', '89.9', '73.2', '85.9', '34.6', '83.2', '50.8', '78.0', '81.4', '78.6', '82.0', '53.6', '78.4', '50.1', '85.7', '66.6'] 39 | mean IoU: 71.6 40 | 41 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .lraspp_model import lraspp_mobilenetv3_large 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/validation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from src import lraspp_mobilenetv3_large 5 | from train_utils import evaluate 6 | from my_dataset import VOCSegmentation 7 | import transforms as T 8 | 9 | 10 | class SegmentationPresetEval: 11 | def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): 12 | self.transforms = T.Compose([ 13 | T.RandomResize(base_size, base_size), 14 | T.ToTensor(), 15 | T.Normalize(mean=mean, std=std), 16 | ]) 17 | 18 | def __call__(self, img, target): 19 | return self.transforms(img, target) 20 | 21 | 22 | def main(args): 23 | device = torch.device(args.device if torch.cuda.is_available() else "cpu") 24 | assert os.path.exists(args.weights), f"weights {args.weights} not found." 25 | 26 | # segmentation nun_classes + background 27 | num_classes = args.num_classes + 1 28 | 29 | # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt 30 | val_dataset = VOCSegmentation(args.data_path, 31 | year="2012", 32 | transforms=SegmentationPresetEval(520), 33 | txt_name="val.txt") 34 | 35 | num_workers = 8 36 | val_loader = torch.utils.data.DataLoader(val_dataset, 37 | batch_size=1, 38 | num_workers=num_workers, 39 | pin_memory=True, 40 | collate_fn=val_dataset.collate_fn) 41 | 42 | model = lraspp_mobilenetv3_large(num_classes=num_classes) 43 | model.load_state_dict(torch.load(args.weights, map_location=device)['model']) 44 | model.to(device) 45 | 46 | confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) 47 | print(confmat) 48 | 49 | 50 | def parse_args(): 51 | import argparse 52 | parser = argparse.ArgumentParser(description="pytorch lraspp validation") 53 | 54 | parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") 55 | parser.add_argument("--weights", default="./save_weights/model_29.pth") 56 | parser.add_argument("--num-classes", default=20, type=int) 57 | parser.add_argument("--device", default="cuda", help="training device") 58 | parser.add_argument('--print-freq', default=10, type=int, help='print frequency') 59 | 60 | args = parser.parse_args() 61 | 62 | return args 63 | 64 | 65 | if __name__ == '__main__': 66 | args = parse_args() 67 | main(args) 68 | -------------------------------------------------------------------------------- /summary_problem.md: -------------------------------------------------------------------------------- 1 | ## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装 2 | 参考我之前写的博文:[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU(CUDA10.1)](https://blog.csdn.net/qq_37541097/article/details/103933366) 3 | 4 | 5 | ## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0] 6 | 强烈不建议混用,即使两个模型的名称结构完全一致也不要混用,里面有坑,用什么方法训练的模型就载入相应的模型权重 7 | 8 | 9 | ## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0] 10 | subclassed模型在实例化时没有自动进行build操作(只有在开始训练时,才会自动进行build),如果需要使用summary操作,需要提前手动build 11 | model.build((batch_size, height, width, channel)) 12 | 13 | 14 | ## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0] 15 | #### 在linux下你需要安装一些包: 16 | * pip install pydot==1.2.3 17 | * sudo apt-get install graphviz 18 | #### 在windows中,同样需要安装一些包(windows比较麻烦): 19 | * pip install pydot==1.2.3 20 | * 安装graphviz,并添加相关环境变量 21 | 参考连接:https://github.com/XifengGuo/CapsNet-Keras/issues/7 22 | 23 | ## 为什么每计算一个batch,就需要调用一次optimizer.zero_grad() [Pytorch1.3] 24 | 如果不清除历史梯度,就会对计算的历史梯度进行累加(通过这个特性你能够变相实现一个很大batch数值的训练) 25 | 参考链接:https://www.zhihu.com/question/303070254 26 | 27 | ## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3] 28 | pillow版本过高导致,安装版本号小于7.0.0即可 -------------------------------------------------------------------------------- /tensorflow_classification/ConfusionMatrix/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹存放使用tensorflow实现的代码版本 2 | **model.py**: 是模型文件 3 | **train.py**: 是调用模型训练的文件 4 | **predict.py**: 是调用模型进行预测的文件 5 | **class_indices.json**: 是训练数据集对应的标签文件 6 | 7 | ------ 8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 -------------------------------------------------------------------------------- /tensorflow_classification/Test11_efficientnetV2/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from model import efficientnetv2_s as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | 16 | img_size = {"s": 384, 17 | "m": 480, 18 | "l": 480} 19 | num_model = "s" 20 | im_height = im_width = img_size[num_model] 21 | 22 | # load image 23 | img_path = "../tulip.jpg" 24 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 25 | img = Image.open(img_path) 26 | # resize image 27 | img = img.resize((im_width, im_height)) 28 | plt.imshow(img) 29 | 30 | # read image 31 | img = np.array(img).astype(np.float32) 32 | 33 | # preprocess 34 | img = (img / 255. - 0.5) / 0.5 35 | 36 | # Add the image to a batch where it's the only member. 37 | img = (np.expand_dims(img, 0)) 38 | 39 | # read class_indict 40 | json_path = './class_indices.json' 41 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 42 | 43 | json_file = open(json_path, "r") 44 | class_indict = json.load(json_file) 45 | 46 | # create model 47 | model = create_model(num_classes=num_classes) 48 | 49 | weights_path = './save_weights/efficientnetv2.ckpt' 50 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 51 | model.load_weights(weights_path) 52 | 53 | result = np.squeeze(model.predict(img)) 54 | result = tf.keras.layers.Softmax()(result) 55 | predict_class = np.argmax(result) 56 | 57 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 58 | result[predict_class]) 59 | plt.title(print_res) 60 | for i in range(len(result)): 61 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 62 | result[i].numpy())) 63 | plt.show() 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tensorflow_classification/Test11_efficientnetV2/trans_weights.py: -------------------------------------------------------------------------------- 1 | from model import * 2 | 3 | 4 | def main(ckpt_path: str, 5 | model_name: str, 6 | model: tf.keras.Model): 7 | var_dict = {v.name.split(':')[0]: v for v in model.weights} 8 | 9 | reader = tf.train.load_checkpoint(ckpt_path) 10 | var_shape_map = reader.get_variable_to_shape_map() 11 | 12 | for key, var in var_dict.items(): 13 | key_ = model_name + "/" + key 14 | key_ = key_.replace("batch_normalization", "tpu_batch_normalization") 15 | if key_ in var_shape_map: 16 | if var_shape_map[key_] != var.shape: 17 | msg = "shape mismatch: {}".format(key) 18 | print(msg) 19 | else: 20 | var.assign(reader.get_tensor(key_), read_value=False) 21 | else: 22 | msg = "Not found {} in {}".format(key, ckpt_path) 23 | print(msg) 24 | 25 | model.save_weights("./{}.h5".format(model_name)) 26 | 27 | 28 | if __name__ == '__main__': 29 | model = efficientnetv2_s() 30 | model.build((1, 224, 224, 3)) 31 | main(ckpt_path="./efficientnetv2-s-21k-ft1k/model", 32 | model_name="efficientnetv2-s", 33 | model=model) 34 | 35 | # model = efficientnetv2_m() 36 | # model.build((1, 224, 224, 3)) 37 | # main(ckpt_path="./efficientnetv2-m-21k-ft1k/model", 38 | # model_name="efficientnetv2-m", 39 | # model=model) 40 | 41 | # model = efficientnetv2_l() 42 | # model.build((1, 224, 224, 3)) 43 | # main(ckpt_path="./efficientnetv2-l-21k-ft1k/model", 44 | # model_name="efficientnetv2-l", 45 | # model=model) 46 | -------------------------------------------------------------------------------- /tensorflow_classification/Test1_official_demo/model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Dense, Flatten, Conv2D 2 | from tensorflow.keras import Model 3 | 4 | 5 | class MyModel(Model): 6 | def __init__(self): 7 | super(MyModel, self).__init__() 8 | self.conv1 = Conv2D(32, 3, activation='relu') 9 | self.flatten = Flatten() 10 | self.d1 = Dense(128, activation='relu') 11 | self.d2 = Dense(10, activation='softmax') 12 | 13 | def call(self, x, **kwargs): 14 | x = self.conv1(x) # input[batch, 28, 28, 1] output[batch, 26, 26, 32] 15 | x = self.flatten(x) # output [batch, 21632] 16 | x = self.d1(x) # output [batch, 128] 17 | return self.d2(x) # output [batch, 10] 18 | -------------------------------------------------------------------------------- /tensorflow_classification/Test2_alexnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test2_alexnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from PIL import Image 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from model import AlexNet_v1, AlexNet_v2 9 | 10 | 11 | def main(): 12 | im_height = 224 13 | im_width = 224 14 | 15 | # load image 16 | img_path = "../tulip.jpg" 17 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 18 | img = Image.open(img_path) 19 | 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value to (0-1) 25 | img = np.array(img) / 255. 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | json_file = open(json_path, "r") 35 | class_indict = json.load(json_file) 36 | 37 | # create model 38 | model = AlexNet_v1(num_classes=5) 39 | weighs_path = "./save_weights/myAlex.h5" 40 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path) 41 | model.load_weights(weighs_path) 42 | 43 | # prediction 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i].numpy())) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test3_vgg/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test3_vgg/model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import layers, Model, Sequential 2 | 3 | CONV_KERNEL_INITIALIZER = { 4 | 'class_name': 'VarianceScaling', 5 | 'config': { 6 | 'scale': 2.0, 7 | 'mode': 'fan_out', 8 | 'distribution': 'truncated_normal' 9 | } 10 | } 11 | 12 | DENSE_KERNEL_INITIALIZER = { 13 | 'class_name': 'VarianceScaling', 14 | 'config': { 15 | 'scale': 1. / 3., 16 | 'mode': 'fan_out', 17 | 'distribution': 'uniform' 18 | } 19 | } 20 | 21 | 22 | def VGG(feature, im_height=224, im_width=224, num_classes=1000): 23 | # tensorflow中的tensor通道排序是NHWC 24 | input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") 25 | x = feature(input_image) 26 | x = layers.Flatten()(x) 27 | x = layers.Dropout(rate=0.5)(x) 28 | x = layers.Dense(2048, activation='relu', 29 | kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) 30 | x = layers.Dropout(rate=0.5)(x) 31 | x = layers.Dense(2048, activation='relu', 32 | kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) 33 | x = layers.Dense(num_classes, 34 | kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) 35 | output = layers.Softmax()(x) 36 | model = Model(inputs=input_image, outputs=output) 37 | return model 38 | 39 | 40 | def make_feature(cfg): 41 | feature_layers = [] 42 | for v in cfg: 43 | if v == "M": 44 | feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2)) 45 | else: 46 | conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu", 47 | kernel_initializer=CONV_KERNEL_INITIALIZER) 48 | feature_layers.append(conv2d) 49 | return Sequential(feature_layers, name="feature") 50 | 51 | 52 | cfgs = { 53 | 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 54 | 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 55 | 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 56 | 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 57 | } 58 | 59 | 60 | def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000): 61 | assert model_name in cfgs.keys(), "not support model {}".format(model_name) 62 | cfg = cfgs[model_name] 63 | model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes) 64 | return model 65 | -------------------------------------------------------------------------------- /tensorflow_classification/Test3_vgg/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from PIL import Image 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from model import vgg 9 | 10 | 11 | def main(): 12 | im_height = 224 13 | im_width = 224 14 | num_classes = 5 15 | 16 | # load image 17 | img_path = "../tulip.jpg" 18 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 19 | img = Image.open(img_path) 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value to (0-1) 25 | img = np.array(img) / 255. 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | json_file = open(json_path, "r") 35 | class_indict = json.load(json_file) 36 | 37 | # create model 38 | model = vgg("vgg16", im_height=im_height, im_width=im_width, num_classes=num_classes) 39 | weights_path = "./save_weights/myVGG.h5" 40 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weights_path) 41 | model.load_weights(weights_path) 42 | 43 | # prediction 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i].numpy())) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test4_goolenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test4_goolenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import json 4 | 5 | from PIL import Image 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | from model import GoogLeNet 10 | 11 | 12 | def main(): 13 | im_height = 224 14 | im_width = 224 15 | 16 | # load image 17 | img_path = "../tulip.jpg" 18 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 19 | img = Image.open(img_path) 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value and normalize 25 | img = ((np.array(img) / 255.) - 0.5) / 0.5 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | json_file = open(json_path, "r") 35 | class_indict = json.load(json_file) 36 | 37 | model = GoogLeNet(class_num=5, aux_logits=False) 38 | model.summary() 39 | # model.load_weights("./save_weights/myGoogLenet.h5", by_name=True) # h5 format 40 | weights_path = "./save_weights/myGoogLeNet.ckpt" 41 | assert len(glob.glob(weights_path + "*")), "cannot find {}".format(weights_path) 42 | model.load_weights(weights_path) 43 | 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i].numpy())) 53 | plt.show() 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | from PIL import Image 8 | import matplotlib.pyplot as plt 9 | 10 | from model import resnet50 11 | 12 | 13 | def main(): 14 | im_height = 224 15 | im_width = 224 16 | num_classes = 5 17 | 18 | # load image 19 | img_path = "../tulip.jpg" 20 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 21 | img = Image.open(img_path) 22 | # resize image to 224x224 23 | img = img.resize((im_width, im_height)) 24 | plt.imshow(img) 25 | 26 | # scaling pixel value to (0-1) 27 | _R_MEAN = 123.68 28 | _G_MEAN = 116.78 29 | _B_MEAN = 103.94 30 | img = np.array(img).astype(np.float32) 31 | img = img - [_R_MEAN, _G_MEAN, _B_MEAN] 32 | 33 | # Add the image to a batch where it's the only member. 34 | img = (np.expand_dims(img, 0)) 35 | 36 | # read class_indict 37 | json_path = './class_indices.json' 38 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 39 | 40 | json_file = open(json_path, "r") 41 | class_indict = json.load(json_file) 42 | 43 | # create model 44 | feature = resnet50(num_classes=num_classes, include_top=False) 45 | feature.trainable = False 46 | model = tf.keras.Sequential([feature, 47 | tf.keras.layers.GlobalAvgPool2D(), 48 | tf.keras.layers.Dropout(rate=0.5), 49 | tf.keras.layers.Dense(1024, activation="relu"), 50 | tf.keras.layers.Dropout(rate=0.5), 51 | tf.keras.layers.Dense(num_classes), 52 | tf.keras.layers.Softmax()]) 53 | 54 | # load weights 55 | weights_path = './save_weights/resNet_50.ckpt' 56 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 57 | model.load_weights(weights_path) 58 | 59 | # prediction 60 | result = np.squeeze(model.predict(img)) 61 | predict_class = np.argmax(result) 62 | 63 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 64 | result[predict_class]) 65 | plt.title(print_res) 66 | for i in range(len(result)): 67 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 68 | result[i].numpy())) 69 | plt.show() 70 | 71 | 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/read_ckpt.py: -------------------------------------------------------------------------------- 1 | """ 2 | 可直接下载我转好的权重 3 | 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg 密码: u199 4 | """ 5 | import tensorflow as tf 6 | 7 | 8 | def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list): 9 | with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: 10 | var_list = tf.train.list_variables(ckpt_path) 11 | new_var_list = [] 12 | 13 | for var_name, shape in var_list: 14 | print(var_name) 15 | if var_name in except_list: 16 | continue 17 | var = tf.train.load_variable(ckpt_path, var_name) 18 | new_var_name = var_name.replace('resnet_v1_50/', "") 19 | new_var_name = new_var_name.replace("bottleneck_v1/", "") 20 | new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel") 21 | new_var_name = new_var_name.replace("weights", "kernel") 22 | new_var_name = new_var_name.replace("biases", "bias") 23 | re_var = tf.Variable(var, name=new_var_name) 24 | new_var_list.append(re_var) 25 | 26 | re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel") 27 | new_var_list.append(re_var) 28 | re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias") 29 | new_var_list.append(re_var) 30 | saver = tf.compat.v1.train.Saver(new_var_list) 31 | sess.run(tf.compat.v1.global_variables_initializer()) 32 | saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) 33 | 34 | 35 | def main(): 36 | except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights'] 37 | ckpt_path = './resnet_v1_50.ckpt' 38 | new_ckpt_path = './pretrain_weights.ckpt' 39 | num_classes = 5 40 | rename_var(ckpt_path, new_ckpt_path, num_classes, except_list) 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/read_h5.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | 3 | f = h5py.File('./save_weights/resNet_1.h5', 'r') 4 | for root_name, g in f.items(): 5 | print(root_name) 6 | for _, weights_dirs in g.attrs.items(): 7 | for i in weights_dirs: 8 | name = root_name + "/" + str(i, encoding="utf-8") 9 | data = f[name] 10 | print(data.value) 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tensorflow_classification/Test6_mobilenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | import tensorflow as tf 9 | 10 | from model_v2 import MobileNetV2 11 | 12 | 13 | def main(): 14 | im_height = 224 15 | im_width = 224 16 | num_classes = 5 17 | 18 | # load image 19 | img_path = "../tulip.jpg" 20 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 21 | img = Image.open(img_path) 22 | # resize image to 224x224 23 | img = img.resize((im_width, im_height)) 24 | plt.imshow(img) 25 | 26 | # scaling pixel value to (-1,1) 27 | img = np.array(img).astype(np.float32) 28 | img = ((img / 255.) - 0.5) * 2.0 29 | 30 | # Add the image to a batch where it's the only member. 31 | img = (np.expand_dims(img, 0)) 32 | 33 | # read class_indict 34 | json_path = './class_indices.json' 35 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 36 | 37 | json_file = open(json_path, "r") 38 | class_indict = json.load(json_file) 39 | 40 | # create model 41 | feature = MobileNetV2(include_top=False) 42 | model = tf.keras.Sequential([feature, 43 | tf.keras.layers.GlobalAvgPool2D(), 44 | tf.keras.layers.Dropout(rate=0.5), 45 | tf.keras.layers.Dense(num_classes), 46 | tf.keras.layers.Softmax()]) 47 | weights_path = './save_weights/resMobileNetV2.ckpt' 48 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 49 | model.load_weights(weights_path) 50 | 51 | result = np.squeeze(model.predict(img)) 52 | predict_class = np.argmax(result) 53 | 54 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 55 | result[predict_class]) 56 | plt.title(print_res) 57 | for i in range(len(result)): 58 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 59 | result[i].numpy())) 60 | plt.show() 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /tensorflow_classification/Test6_mobilenet/trans_v3_weights.py: -------------------------------------------------------------------------------- 1 | import re 2 | import tensorflow as tf 3 | from model_v3 import mobilenet_v3_large 4 | 5 | 6 | def change_word(word: str): 7 | word = word.replace("MobilenetV3/", "") 8 | 9 | if "weights" in word: 10 | word = word.replace("weights", "kernel") 11 | elif "Conv" in word and "biases" in word: 12 | word = word.replace("biases", "bias") 13 | 14 | return word 15 | 16 | 17 | def rename_var(ckpt_path, m_info): 18 | with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: 19 | var_list = tf.train.list_variables(ckpt_path) 20 | pattern = "ExponentialMovingAverage|Momentum|global_step" 21 | 22 | var_dict = dict((change_word(name), [name, shape]) 23 | for name, shape in var_list 24 | if len(re.findall(pattern, name)) == 0) 25 | 26 | for k, v in m_info: 27 | assert k in var_dict, "{} not in var_dict".format(k) 28 | assert v == var_dict[k][1], "shape {} not equal {}".format(v, var_dict[k][1]) 29 | 30 | weights = [] 31 | for k, _ in m_info: 32 | var = tf.train.load_variable(ckpt_path, var_dict[k][0]) 33 | weights.append(var) 34 | 35 | return weights 36 | 37 | 38 | def main(): 39 | # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz 40 | ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000' 41 | save_path = './pre_mobilev3.h5' 42 | m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True) 43 | m_info = [(i.name.replace(":0", ""), list(i.shape)) 44 | for i in m.weights] 45 | weights = rename_var(ckpt_path, m_info) 46 | m.set_weights(weights) 47 | m.save_weights(save_path) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /tensorflow_classification/Test7_shuffleNet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | 9 | from model import shufflenet_v2_x1_0 10 | 11 | 12 | def main(): 13 | im_height = 224 14 | im_width = 224 15 | num_classes = 5 16 | 17 | mean = [0.485, 0.456, 0.406] 18 | std = [0.229, 0.224, 0.225] 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | # resize image to 224x224 25 | img = img.resize((im_width, im_height)) 26 | plt.imshow(img) 27 | 28 | # scaling pixel value to (-1,1) 29 | img = np.array(img).astype(np.float32) 30 | img = (img / 255. - mean) / std 31 | 32 | # Add the image to a batch where it's the only member. 33 | img = (np.expand_dims(img, 0)) 34 | 35 | # read class_indict 36 | json_path = './class_indices.json' 37 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 38 | 39 | json_file = open(json_path, "r") 40 | class_indict = json.load(json_file) 41 | 42 | # create model 43 | model = shufflenet_v2_x1_0(num_classes=num_classes) 44 | 45 | weights_path = './save_weights/shufflenetv2.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img)) 50 | predict_class = np.argmax(result) 51 | 52 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 53 | result[predict_class]) 54 | plt.title(print_res) 55 | for i in range(len(result)): 56 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 57 | result[i].numpy())) 58 | plt.show() 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /tensorflow_classification/Test9_efficientNet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | 9 | from model import efficientnet_b0 as create_model 10 | 11 | 12 | def main(): 13 | num_classes = 5 14 | 15 | img_size = {"B0": 224, 16 | "B1": 240, 17 | "B2": 260, 18 | "B3": 300, 19 | "B4": 380, 20 | "B5": 456, 21 | "B6": 528, 22 | "B7": 600} 23 | num_model = "B0" 24 | im_height = im_width = img_size[num_model] 25 | 26 | # load image 27 | img_path = "../tulip.jpg" 28 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 29 | img = Image.open(img_path) 30 | # resize image to 224x224 31 | img = img.resize((im_width, im_height)) 32 | plt.imshow(img) 33 | 34 | # read image 35 | img = np.array(img).astype(np.float32) 36 | 37 | # Add the image to a batch where it's the only member. 38 | img = (np.expand_dims(img, 0)) 39 | 40 | # read class_indict 41 | json_path = './class_indices.json' 42 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 43 | 44 | json_file = open(json_path, "r") 45 | class_indict = json.load(json_file) 46 | 47 | # create model 48 | model = create_model(num_classes=num_classes) 49 | 50 | weights_path = './save_weights/efficientnet.ckpt' 51 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 52 | model.load_weights(weights_path) 53 | 54 | result = np.squeeze(model.predict(img)) 55 | predict_class = np.argmax(result) 56 | 57 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 58 | result[predict_class]) 59 | plt.title(print_res) 60 | for i in range(len(result)): 61 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 62 | result[i].numpy())) 63 | plt.show() 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tensorflow_classification/analyze_weights_featuremap/analyze_feature_map.py: -------------------------------------------------------------------------------- 1 | from alexnet_model import AlexNet_v1, AlexNet_v2 2 | from PIL import Image 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from tensorflow.keras import Model, Input 6 | 7 | im_height = 224 8 | im_width = 224 9 | 10 | # load image 11 | img = Image.open("../tulip.jpg") 12 | # resize image to 224x224 13 | img = img.resize((im_width, im_height)) 14 | 15 | # scaling pixel value to (0-1) 16 | img = np.array(img) / 255. 17 | 18 | # Add the image to a batch where it's the only member. 19 | img = (np.expand_dims(img, 0)) 20 | 21 | 22 | model = AlexNet_v1(class_num=5) # functional api 23 | # model = AlexNet_v2(class_num=5) # subclass api 24 | # model.build((None, 224, 224, 3)) 25 | # If `by_name` is False weights are loaded based on the network's topology. 26 | model.load_weights("./myAlex.h5") 27 | # model.load_weights("./submodel.h5") 28 | # for layer in model.layers: 29 | # print(layer.name) 30 | model.summary() 31 | layers_name = ["conv2d", "conv2d_1"] 32 | 33 | # functional API 34 | try: 35 | input_node = model.input 36 | output_node = [model.get_layer(name=layer_name).output for layer_name in layers_name] 37 | model1 = Model(inputs=input_node, outputs=output_node) 38 | outputs = model1.predict(img) 39 | for index, feature_map in enumerate(outputs): 40 | # [N, H, W, C] -> [H, W, C] 41 | im = np.squeeze(feature_map) 42 | 43 | # show top 12 feature maps 44 | plt.figure() 45 | for i in range(12): 46 | ax = plt.subplot(3, 4, i + 1) 47 | # [H, W, C] 48 | plt.imshow(im[:, :, i], cmap='gray') 49 | plt.suptitle(layers_name[index]) 50 | plt.show() 51 | except Exception as e: 52 | print(e) 53 | 54 | # subclasses API 55 | # outputs = model.receive_feature_map(img, layers_name) 56 | # for index, feature_maps in enumerate(outputs): 57 | # # [N, H, W, C] -> [H, W, C] 58 | # im = np.squeeze(feature_maps) 59 | # 60 | # # show top 12 feature maps 61 | # plt.figure() 62 | # for i in range(12): 63 | # ax = plt.subplot(3, 4, i + 1) 64 | # # [H, W, C] 65 | # plt.imshow(im[:, :, i], cmap='gray') 66 | # plt.suptitle(layers_name[index]) 67 | # plt.show() 68 | -------------------------------------------------------------------------------- /tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py: -------------------------------------------------------------------------------- 1 | from alexnet_model import AlexNet_v1, AlexNet_v2 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | model = AlexNet_v1(class_num=5) # functional api 6 | # model = AlexNet_v2(class_num=5) # subclass api 7 | # model.build((None, 224, 224, 3)) 8 | model.load_weights("./myAlex.h5") 9 | # model.load_weights("./submodel.h5") 10 | model.summary() 11 | for layer in model.layers: 12 | for index, weight in enumerate(layer.weights): 13 | # [kernel_height, kernel_width, kernel_channel, kernel_number] 14 | weight_t = weight.numpy() 15 | # read a kernel information 16 | # k = weight_t[:, :, :, 0] 17 | 18 | # calculate mean, std, min, max 19 | weight_mean = weight_t.mean() 20 | weight_std = weight_t.std(ddof=1) 21 | weight_min = weight_t.min() 22 | weight_max = weight_t.max() 23 | print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, 24 | weight_std, 25 | weight_max, 26 | weight_min)) 27 | 28 | # plot hist image 29 | plt.close() 30 | weight_vec = np.reshape(weight_t, [-1]) 31 | plt.hist(weight_vec, bins=50) 32 | plt.title(weight.name) 33 | plt.show() -------------------------------------------------------------------------------- /tensorflow_classification/swin_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from model import swin_tiny_patch4_window7_224 as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | im_height = im_width = 224 16 | 17 | # load image 18 | img_path = "../tulip.jpg" 19 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 20 | img = Image.open(img_path) 21 | # resize image 22 | img = img.resize((im_width, im_height)) 23 | plt.imshow(img) 24 | 25 | # read image 26 | img = np.array(img).astype(np.float32) 27 | 28 | # preprocess 29 | img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] 30 | 31 | # Add the image to a batch where it's the only member. 32 | img = (np.expand_dims(img, 0)) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | json_file = open(json_path, "r") 39 | class_indict = json.load(json_file) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes) 43 | model.build([1, im_height, im_width, 3]) 44 | 45 | weights_path = './save_weights/model.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img, batch_size=1)) 50 | result = tf.keras.layers.Softmax()(result) 51 | predict_class = np.argmax(result) 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 54 | result[predict_class]) 55 | plt.title(print_res) 56 | for i in range(len(result)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | result[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /tensorflow_classification/vision_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from vit_model import vit_base_patch16_224_in21k as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | im_height = im_width = 224 16 | 17 | # load image 18 | img_path = "../tulip.jpg" 19 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 20 | img = Image.open(img_path) 21 | # resize image 22 | img = img.resize((im_width, im_height)) 23 | plt.imshow(img) 24 | 25 | # read image 26 | img = np.array(img).astype(np.float32) 27 | 28 | # preprocess 29 | img = (img / 255. - 0.5) / 0.5 30 | 31 | # Add the image to a batch where it's the only member. 32 | img = (np.expand_dims(img, 0)) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | json_file = open(json_path, "r") 39 | class_indict = json.load(json_file) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes, has_logits=False) 43 | model.build([1, 224, 224, 3]) 44 | 45 | weights_path = './save_weights/model.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img, batch_size=1)) 50 | result = tf.keras.layers.Softmax()(result) 51 | predict_class = np.argmax(result) 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 54 | result[predict_class]) 55 | plt.title(print_res) 56 | for i in range(len(result)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | result[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | --------------------------------------------------------------------------------