├── .github
    └── ISSUE_TEMPLATE
    │   └── issue-template.md
├── .gitignore
├── README.md
├── article_link
    └── README.md
├── course_ppt
    └── README.md
├── data_set
    ├── README.md
    └── split_data.py
├── deploying_service
    ├── deploying_pytorch
    │   ├── convert_onnx_cls
    │   │   ├── class_indices.json
    │   │   ├── main.py
    │   │   └── model.py
    │   └── pytorch_flask_service
    │   │   ├── class_indices.json
    │   │   ├── main.py
    │   │   ├── model.py
    │   │   ├── requirements.txt
    │   │   ├── static
    │   │       └── js
    │   │       │   └── jquery.min.js
    │   │   └── templates
    │   │       └── up.html
    └── pruning_model_pytorch
    │   ├── class_indices.json
    │   ├── main.py
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
├── others_project
    ├── draw_dilated_conv
    │   └── main.py
    ├── kmeans_anchors
    │   ├── main.py
    │   ├── plot_kmeans.py
    │   ├── read_voc.py
    │   └── yolo_kmeans.py
    ├── openvinotest
    │   └── openvino_cls_test
    │   │   ├── class_indices.json
    │   │   ├── create_imagenet_annotation.py
    │   │   ├── float32vsint8.py
    │   │   ├── main.py
    │   │   ├── model.py
    │   │   └── speed_test.py
    ├── readPbFile
    │   ├── README.md
    │   ├── export
    │   │   └── checkpoint
    │   ├── pascal_label_map.pbtxt
    │   ├── readPb.py
    │   ├── test_images
    │   │   └── image_info.txt
    │   └── using_function.py
    ├── textcnnKeras
    │   ├── dataGenerator.py
    │   ├── data_link.txt
    │   ├── main.py
    │   └── models.py
    └── trans_widerface_to_xml
    │   ├── create_xml.py
    │   └── main.py
├── pytorch_classification
    ├── ConfusionMatrix
    │   ├── class_indices.json
    │   ├── main.py
    │   └── model.py
    ├── README.md
    ├── Test10_regnet
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── pretrain_weights.py
    │   ├── train.py
    │   └── utils.py
    ├── Test11_efficientnetV2
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── trans_effv2_weights.py
    │   └── utils.py
    ├── Test1_official_demo
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
    ├── Test2_alexnet
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
    ├── Test3_vggnet
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
    ├── Test4_googlenet
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
    ├── Test5_resnet
    │   ├── README.md
    │   ├── batch_predict.py
    │   ├── class_indices.json
    │   ├── load_weights.py
    │   ├── model.py
    │   ├── predict.py
    │   └── train.py
    ├── Test6_mobilenet
    │   ├── class_indices.json
    │   ├── model_v2.py
    │   ├── model_v3.py
    │   ├── predict.py
    │   └── train.py
    ├── Test7_shufflenet
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── train.py
    │   └── utils.py
    ├── Test8_densenet
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── train.py
    │   └── utils.py
    ├── Test9_efficientNet
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── train.py
    │   ├── trans_weights_to_pytorch.py
    │   └── utils.py
    ├── analyze_weights_featuremap
    │   ├── alexnet_model.py
    │   ├── analyze_feature_map.py
    │   ├── analyze_kernel_weight.py
    │   └── resnet_model.py
    ├── custom_dataset
    │   ├── main.py
    │   ├── my_dataset.py
    │   └── utils.py
    ├── mini_imagenet
    │   ├── README.md
    │   ├── imagenet_class_index.json
    │   ├── model.py
    │   ├── multi_train_utils
    │   │   ├── __init__.py
    │   │   ├── distributed_utils.py
    │   │   └── train_eval_utils.py
    │   ├── my_dataset.py
    │   ├── restructure_csv.py
    │   ├── train_multi_gpu_using_launch.py
    │   └── train_single_gpu.py
    ├── model_complexity
    │   ├── main.py
    │   ├── model.py
    │   └── utils.py
    ├── swin_transformer
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── train.py
    │   └── utils.py
    ├── tensorboard_test
    │   ├── data_utils.py
    │   ├── model.py
    │   ├── my_dataset.py
    │   ├── requirements.txt
    │   ├── train.py
    │   └── train_eval_utils.py
    ├── train_multi_GPU
    │   ├── README.md
    │   ├── accuracy.png
    │   ├── model.py
    │   ├── multi_train_utils
    │   │   ├── distributed_utils.py
    │   │   └── train_eval_utils.py
    │   ├── my_dataset.py
    │   ├── plot_results.py
    │   ├── requirements.txt
    │   ├── runs
    │   │   └── Nov07_18-58-35_wz
    │   │   │   └── events.out.tfevents.1604746311.localhost.41577.0
    │   ├── syncbn.png
    │   ├── train_multi_gpu_using_launch.py
    │   ├── train_multi_gpu_using_spawn.py
    │   ├── train_single_gpu.py
    │   ├── training_time.png
    │   └── utils.py
    └── vision_transformer
    │   ├── flops.py
    │   ├── my_dataset.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── utils.py
    │   └── vit_model.py
├── pytorch_object_detection
    ├── faster_rcnn
    │   ├── README.md
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── feature_pyramid_network.py
    │   │   ├── mobilenetv2_model.py
    │   │   ├── resnet50_fpn_model.py
    │   │   └── vgg_model.py
    │   ├── draw_box_utils.py
    │   ├── fasterRCNN.png
    │   ├── my_dataset.py
    │   ├── network_files
    │   │   ├── __init__.py
    │   │   ├── boxes.py
    │   │   ├── det_utils.py
    │   │   ├── faster_rcnn_framework.py
    │   │   ├── image_list.py
    │   │   ├── roi_head.py
    │   │   ├── rpn_function.py
    │   │   └── transform.py
    │   ├── pascal_voc_classes.json
    │   ├── plot_curve.py
    │   ├── predict.py
    │   ├── record_mAP.txt
    │   ├── requirements.txt
    │   ├── split_data.py
    │   ├── train_mobilenetv2.py
    │   ├── train_multi_GPU.py
    │   ├── train_res50_fpn.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── coco_eval.py
    │   │   ├── coco_utils.py
    │   │   ├── distributed_utils.py
    │   │   ├── group_by_aspect_ratio.py
    │   │   └── train_eval_utils.py
    │   ├── transforms.py
    │   └── validation.py
    ├── retinaNet
    │   ├── README.md
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── feature_pyramid_network.py
    │   │   └── resnet50_fpn_model.py
    │   ├── draw_box_utils.py
    │   ├── my_dataset.py
    │   ├── network_files
    │   │   ├── __init__.py
    │   │   ├── anchor_utils.py
    │   │   ├── boxes.py
    │   │   ├── det_utils.py
    │   │   ├── image_list.py
    │   │   ├── losses.py
    │   │   ├── retinanet.py
    │   │   └── transform.py
    │   ├── pascal_voc_classes.json
    │   ├── plot_curve.py
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── results20210421-142632.txt
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── coco_eval.py
    │   │   ├── coco_utils.py
    │   │   ├── distributed_utils.py
    │   │   ├── group_by_aspect_ratio.py
    │   │   └── train_eval_utils.py
    │   ├── transforms.py
    │   └── validation.py
    ├── ssd
    │   ├── README.md
    │   ├── draw_box_utils.py
    │   ├── my_dataset.py
    │   ├── pascal_voc_classes.json
    │   ├── plot_curve.py
    │   ├── predict_test.py
    │   ├── record_mAP.txt
    │   ├── requirements.txt
    │   ├── res50_ssd.png
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── res50_backbone.py
    │   │   ├── ssd_model.py
    │   │   └── utils.py
    │   ├── train_multi_GPU.py
    │   ├── train_ssd300.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── coco_eval.py
    │   │   ├── coco_utils.py
    │   │   ├── distributed_utils.py
    │   │   ├── group_by_aspect_ratio.py
    │   │   └── train_eval_utils.py
    │   ├── transforms.py
    │   └── validation.py
    ├── train_coco_dataset
    │   ├── README.md
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── feature_pyramid_network.py
    │   │   ├── mobilenetv2_model.py
    │   │   ├── resnet50_fpn_model.py
    │   │   └── vgg_model.py
    │   ├── coco80_indices.json
    │   ├── coco91_to_80.json
    │   ├── compute_receptive_field.py
    │   ├── draw_box_utils.py
    │   ├── my_dataset.py
    │   ├── network_files
    │   │   ├── __init__.py
    │   │   ├── boxes.py
    │   │   ├── det_utils.py
    │   │   ├── faster_rcnn_framework.py
    │   │   ├── image_list.py
    │   │   ├── roi_head.py
    │   │   ├── rpn_function.py
    │   │   └── transform.py
    │   ├── plot_curve.py
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── results20210412-092355.txt
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── distributed_utils.py
    │   │   ├── group_by_aspect_ratio.py
    │   │   └── train_eval_utils.py
    │   ├── transforms.py
    │   └── validation.py
    └── yolov3_spp
    │   ├── README.md
    │   ├── build_utils
    │       ├── __init__.py
    │       ├── datasets.py
    │       ├── img_utils.py
    │       ├── layers.py
    │       ├── parse_config.py
    │       ├── torch_utils.py
    │       └── utils.py
    │   ├── calculate_dataset.py
    │   ├── cfg
    │       ├── hyp.yaml
    │       └── yolov3-spp.cfg
    │   ├── data
    │       └── pascal_voc_classes.json
    │   ├── draw_box_utils.py
    │   ├── export_onnx.py
    │   ├── load_onnx_test.py
    │   ├── models.py
    │   ├── predict_test.py
    │   ├── requirements.txt
    │   ├── results20210515-152935.txt
    │   ├── runs
    │       └── Oct28_17-55-29_wz
    │       │   └── events.out.tfevents.1603791769.localhost.localdomain.178338.0
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │       ├── __init__.py
    │       ├── coco_eval.py
    │       ├── coco_utils.py
    │       ├── distributed_utils.py
    │       ├── group_by_aspect_ratio.py
    │       └── train_eval_utils.py
    │   ├── trans_voc2yolo.py
    │   ├── validation.py
    │   └── yolov3spp.png
├── pytorch_segmentation
    ├── deeplab_v3
    │   ├── README.md
    │   ├── deeplabv3_resnet50.png
    │   ├── get_palette.py
    │   ├── my_dataset.py
    │   ├── palette.json
    │   ├── pascal_voc_classes.json
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── results20211027-104607.txt
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── deeplabv3_model.py
    │   │   ├── mobilenet_backbone.py
    │   │   └── resnet_backbone.py
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── distributed_utils.py
    │   │   └── train_and_eval.py
    │   ├── transforms.py
    │   └── validation.py
    ├── fcn
    │   ├── README.md
    │   ├── get_palette.py
    │   ├── my_dataset.py
    │   ├── palette.json
    │   ├── pascal_voc_classes.json
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── results20210918-122740.txt
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   └── fcn_model.py
    │   ├── torch_fcn.png
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │   │   ├── __init__.py
    │   │   ├── distributed_utils.py
    │   │   └── train_and_eval.py
    │   ├── transforms.py
    │   └── validation.py
    └── lraspp
    │   ├── README.md
    │   ├── get_palette.py
    │   ├── lraspp.png
    │   ├── my_dataset.py
    │   ├── palette.json
    │   ├── pascal_voc_classes.json
    │   ├── predict.py
    │   ├── requirements.txt
    │   ├── results20211028-105233.txt
    │   ├── src
    │       ├── __init__.py
    │       ├── lraspp_model.py
    │       └── mobilenet_backbone.py
    │   ├── train.py
    │   ├── train_multi_GPU.py
    │   ├── train_utils
    │       ├── __init__.py
    │       ├── distributed_utils.py
    │       └── train_and_eval.py
    │   ├── transforms.py
    │   └── validation.py
├── summary_problem.md
└── tensorflow_classification
    ├── ConfusionMatrix
        ├── class_indices.json
        ├── main.py
        └── model.py
    ├── README.md
    ├── Test11_efficientnetV2
        ├── model.py
        ├── predict.py
        ├── train.py
        ├── trans_weights.py
        └── utils.py
    ├── Test1_official_demo
        ├── model.py
        └── train.py
    ├── Test2_alexnet
        ├── class_indices.json
        ├── fine_train_alexnet.py
        ├── model.py
        ├── predict.py
        ├── read_pth.py
        ├── train.py
        └── trainGPU.py
    ├── Test3_vgg
        ├── class_indices.json
        ├── fine_train_vgg16.py
        ├── model.py
        ├── predict.py
        ├── read_ckpt.py
        ├── train.py
        └── trainGPU.py
    ├── Test4_goolenet
        ├── class_indices.json
        ├── model.py
        ├── model_add_bn.py
        ├── predict.py
        ├── read_pth.py
        ├── train.py
        ├── trainGPU.py
        └── train_add_bn.py
    ├── Test5_resnet
        ├── batch_predict.py
        ├── class_indices.json
        ├── model.py
        ├── predict.py
        ├── read_ckpt.py
        ├── read_h5.py
        ├── subclassed_model.py
        ├── train.py
        └── trainGPU.py
    ├── Test6_mobilenet
        ├── model_v2.py
        ├── model_v3.py
        ├── predict.py
        ├── read_ckpt.py
        ├── trainGPU_mobilenet_v2.py
        ├── train_mobilenet_v2.py
        ├── train_mobilenet_v3.py
        ├── trans_v3_weights.py
        └── utils.py
    ├── Test7_shuffleNet
        ├── model.py
        ├── predict.py
        ├── train.py
        ├── trans_weights.py
        └── utils.py
    ├── Test9_efficientNet
        ├── model.py
        ├── predict.py
        ├── train.py
        └── utils.py
    ├── analyze_weights_featuremap
        ├── alexnet_model.py
        ├── analyze_feature_map.py
        └── analyze_kernel_weight.py
    ├── custom_dataset
        ├── train_fit.py
        └── utils.py
    ├── swin_transformer
        ├── model.py
        ├── predict.py
        ├── train.py
        ├── trans_weights.py
        └── utils.py
    ├── tensorboard_test
        ├── train_fit.py
        └── train_not_fit.py
    └── vision_transformer
        ├── predict.py
        ├── train.py
        ├── trans_weights.py
        ├── utils.py
        └── vit_model.py


/.github/ISSUE_TEMPLATE/issue-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Issue template
 3 | about: Use this template for reporting your problem
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **System information**
11 | * Have I written custom code:
12 | * OS Platform(e.g., window10 or Linux Ubuntu 16.04):
13 | * Python version:
14 | * Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3):
15 | * Use GPU or not:
16 | * CUDA/cuDNN version(if you use GPU):
17 | * The network you trained(e.g., Resnet34 network):
18 | 
19 | **Describe the current behavior**
20 | 
21 | **Error info / logs**
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ##ignore this file##
 2 | *.idea
 3 | __pycache__
 4 | *.zip
 5 | flower_data
 6 | *.h5
 7 | *.pth
 8 | *.pt
 9 | *.jpg
10 | *.ckpt.*
11 | *.ckpt
12 | *.config
13 | *.gz
14 | *.onnx
15 | *.xml
16 | *.bin
17 | *.mapping
18 | checkpoint
19 | data
20 | VOCdevkit
21 | ssd_resnet50_v1_fpn_shared_box_predictor
22 | 


--------------------------------------------------------------------------------
/course_ppt/README.md:
--------------------------------------------------------------------------------
 1 | # 为了精简项目，课程中的所有ppt都已转存至百度云
 2 | 
 3 | ## 分类网络相关
 4 | - **AlexNet** 链接: https://pan.baidu.com/s/1RJn5lzY8LwrmckUPvXcjmg  密码: 34ue
 5 | - **VGG** 链接: https://pan.baidu.com/s/1BnYpdaDwAIcgRm7YwakEZw  密码: 8ev0
 6 | - **GoogleNet** 链接: https://pan.baidu.com/s/1XjZXprvayV3dDMvLjoOk3A  密码: 9hq4
 7 | - **ResNet** 链接: https://pan.baidu.com/s/1I2LUlwCSjNKr37T0n3NKzg  密码: f1s9
 8 | - **ResNext** 链接：https://pan.baidu.com/s/1-anFYX5572MJmiQym9D4Eg 密码：f8ob 
 9 | - **MobileNet_v1_v2** 链接: https://pan.baidu.com/s/1ReDDCuK8wyH0XqniUgiSYQ  密码: ipqv
10 | - **MobileNet_v3**  链接：https://pan.baidu.com/s/13mzSpyxuA4T4ki7kEN1Xqw 密码：fp5g 
11 | - **ShuffleNet_v1_v2** 链接：https://pan.baidu.com/s/1-DDwePMPCDvjw08YU8nAAA 密码：ad6n
12 | - **EfficientNet_v1** 链接：https://pan.baidu.com/s/1Sep9W0vLzfjhcHAXr6Bv0Q  密码：eufl 
13 | - **EfficientNet_v2** 链接：https://pan.baidu.com/s/1tesrgY4CHLmq6P7s7TcHCw  密码：y2kz
14 | - **Transformer** 链接：https://pan.baidu.com/s/1DE6RDySr7NS0HQ35gBqP_g 密码：y9e7
15 | - **Vision Transformer** 链接：https://pan.baidu.com/s/1wzpHG8EK5gxg6UCMscYqMw 密码：cm1m
16 | - **Swin Transformer** 链接：https://pan.baidu.com/s/1O6XEEZUb6B6AGYON7-EOgA 密码：qkrn
17 | - **ConfusionMatrix** 链接: https://pan.baidu.com/s/1EtKzHkZyv2XssYtqmGYCLg  密码: uoo5
18 | 
19 | 
20 | ## 目标检测网络相关
21 | - **R-CNN** 链接: https://pan.baidu.com/s/1l_ZxkfJdyp3KoMLqwWbx5A  密码: nm1l
22 | - **Fast R-CNN** 链接: https://pan.baidu.com/s/1Pe_Tg43OVo-yZWj7t-_L6Q  密码: fe73
23 | - **Faster R-CNN** 链接: https://pan.baidu.com/s/16AA-d7f15etLkgKajuzpSw  密码: 73h6
24 | - **FPN** 链接：https://pan.baidu.com/s/1O9H0iqQMg9f_FZezUEKZ9g 密码：qbl8 
25 | - **SSD** 链接: https://pan.baidu.com/s/15zF3GhIdg-E_tZX2Y2X-rw  密码: u7k1
26 | - **RetinaNet**  链接：https://pan.baidu.com/s/1beW612VCSnSu-v8iu_2-fA 密码：vqbu 
27 | - **YOLOv1** 链接: https://pan.baidu.com/s/1vVyUNQHYEGjqosezlx_1Mg  密码: b3i0
28 | - **YOLOv2** 链接: https://pan.baidu.com/s/132aW1e_NYbaxxGi3cDVLYg  密码: tak7
29 | - **YOLOv3** 链接: https://pan.baidu.com/s/10oqZewzJmx5ptT9A4t-64w  密码: npji
30 | - **YOLOv3SPP** 链接: https://pan.baidu.com/s/15LRssnPez9pn6jRpW89Wlw  密码: nv9f
31 | - **Calculate mAP** 链接: https://pan.baidu.com/s/1jdA_n78J7nSUoOg6TTO5Bg  密码: eh62
32 | - **coco数据集简介** 链接：https://pan.baidu.com/s/1HfCvjt-8o9j5a916IYNVjw  密码：6rec 
33 | 
34 | 
35 | ## 图像分割网络相关
36 | - **语义分割前言** 链接：https://pan.baidu.com/s/1cwxe2wbaA_2DqNYADq3myA 密码：zzij
37 | - **转置卷积** 链接：https://pan.baidu.com/s/1A8688168fuWHyxJQtzupHw 密码：pgnf
38 | - **FCN** 链接：https://pan.baidu.com/s/1XLUneTLrdUyDAiV6kqi9rw 密码：126a


--------------------------------------------------------------------------------
/data_set/README.md:
--------------------------------------------------------------------------------
 1 | ## 该文件夹是用来存放训练样本的目录
 2 | ### 使用步骤如下：
 3 | * （1）在data_set文件夹下创建新文件夹"flower_data"
 4 | * （2）点击链接下载花分类数据集 [http://download.tensorflow.org/example_images/flower_photos.tgz](http://download.tensorflow.org/example_images/flower_photos.tgz)
 5 | * （3）解压数据集到flower_data文件夹下
 6 | * （4）执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val    
 7 | 
 8 | ```
 9 | ├── flower_data   
10 |        ├── flower_photos（解压的数据集文件夹，3670个样本）  
11 |        ├── train（生成的训练集，3306个样本）  
12 |        └── val（生成的验证集，364个样本） 
13 | ```


--------------------------------------------------------------------------------
/data_set/split_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from shutil import copy, rmtree
 3 | import random
 4 | 
 5 | 
 6 | def mk_file(file_path: str):
 7 |     if os.path.exists(file_path):
 8 |         # 如果文件夹存在，则先删除原文件夹在重新创建
 9 |         rmtree(file_path)
10 |     os.makedirs(file_path)
11 | 
12 | 
13 | def main():
14 |     # 保证随机可复现
15 |     random.seed(0)
16 | 
17 |     # 将数据集中10%的数据划分到验证集中
18 |     split_rate = 0.1
19 | 
20 |     # 指向你解压后的flower_photos文件夹
21 |     cwd = os.getcwd()
22 |     data_root = os.path.join(cwd, "flower_data")
23 |     origin_flower_path = os.path.join(data_root, "flower_photos")
24 |     assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)
25 | 
26 |     flower_class = [cla for cla in os.listdir(origin_flower_path)
27 |                     if os.path.isdir(os.path.join(origin_flower_path, cla))]
28 | 
29 |     # 建立保存训练集的文件夹
30 |     train_root = os.path.join(data_root, "train")
31 |     mk_file(train_root)
32 |     for cla in flower_class:
33 |         # 建立每个类别对应的文件夹
34 |         mk_file(os.path.join(train_root, cla))
35 | 
36 |     # 建立保存验证集的文件夹
37 |     val_root = os.path.join(data_root, "val")
38 |     mk_file(val_root)
39 |     for cla in flower_class:
40 |         # 建立每个类别对应的文件夹
41 |         mk_file(os.path.join(val_root, cla))
42 | 
43 |     for cla in flower_class:
44 |         cla_path = os.path.join(origin_flower_path, cla)
45 |         images = os.listdir(cla_path)
46 |         num = len(images)
47 |         # 随机采样验证集的索引
48 |         eval_index = random.sample(images, k=int(num*split_rate))
49 |         for index, image in enumerate(images):
50 |             if image in eval_index:
51 |                 # 将分配至验证集中的文件复制到相应目录
52 |                 image_path = os.path.join(cla_path, image)
53 |                 new_path = os.path.join(val_root, cla)
54 |                 copy(image_path, new_path)
55 |             else:
56 |                 # 将分配至训练集中的文件复制到相应目录
57 |                 image_path = os.path.join(cla_path, image)
58 |                 new_path = os.path.join(train_root, cla)
59 |                 copy(image_path, new_path)
60 |             print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="")  # processing bar
61 |         print()
62 | 
63 |     print("processing done!")
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==1.1.1
2 | Flask_Cors==3.0.9
3 | Pillow
4 | 


--------------------------------------------------------------------------------
/deploying_service/pruning_model_pytorch/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/deploying_service/pruning_model_pytorch/predict.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from model import resnet34
 3 | from PIL import Image
 4 | from torchvision import transforms
 5 | import matplotlib.pyplot as plt
 6 | import json
 7 | 
 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 9 | 
10 | data_transform = transforms.Compose(
11 |     [transforms.Resize(256),
12 |      transforms.CenterCrop(224),
13 |      transforms.ToTensor(),
14 |      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
15 | 
16 | # load image
17 | img = Image.open("../tulip.jpg")
18 | plt.imshow(img)
19 | # [N, C, H, W]
20 | img = data_transform(img)
21 | # expand batch dimension
22 | img = torch.unsqueeze(img, dim=0)
23 | 
24 | # read class_indict
25 | try:
26 |     json_file = open('./class_indices.json', 'r')
27 |     class_indict = json.load(json_file)
28 | except Exception as e:
29 |     print(e)
30 |     exit(-1)
31 | 
32 | # create model
33 | model = resnet34(num_classes=5)
34 | # load model weights
35 | model_weight_path = "./resNet34.pth"
36 | model.load_state_dict(torch.load(model_weight_path, map_location=device))
37 | model.eval()
38 | with torch.no_grad():
39 |     # predict class
40 |     output = torch.squeeze(model(img))
41 |     predict = torch.softmax(output, dim=0)
42 |     predict_cla = torch.argmax(predict).numpy()
43 | print(class_indict[str(predict_cla)], predict[predict_cla].numpy())
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/others_project/kmeans_anchors/plot_kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from matplotlib import pyplot as plt
 3 | np.random.seed(0)
 4 | 
 5 | colors = np.array(['blue', 'black'])
 6 | 
 7 | 
 8 | def plot_clusters(data, cls, clusters, title=""):
 9 |     if cls is None:
10 |         c = [colors[0]] * data.shape[0]
11 |     else:
12 |         c = colors[cls].tolist()
13 | 
14 |     plt.scatter(data[:, 0], data[:, 1], c=c)
15 |     for i, clus in enumerate(clusters):
16 |         plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)
17 |     plt.title(title)
18 |     plt.show()
19 |     plt.close()
20 | 
21 | 
22 | def distances(data, clusters):
23 |     xy1 = data[:, None]  # [N,1,2]
24 |     xy2 = clusters[None]  # [1,M,2]
25 |     d = np.sum(np.power(xy2 - xy1, 2), axis=-1)
26 |     return d
27 | 
28 | 
29 | def k_means(data, k, dist=np.mean):
30 |     """
31 |     k-means methods
32 |     Args:
33 |         data: 需要聚类的data
34 |         k: 簇数(聚成几类)
35 |         dist: 更新簇坐标的方法
36 |     """
37 |     data_number = data.shape[0]
38 |     last_nearest = np.zeros((data_number,))
39 | 
40 |     # init k clusters
41 |     clusters = data[np.random.choice(data_number, k, replace=False)]
42 |     print(f"random cluster: \n {clusters}")
43 |     # plot
44 |     plot_clusters(data, None, clusters, "random clusters")
45 | 
46 |     step = 0
47 |     while True:
48 |         d = distances(data, clusters)
49 |         current_nearest = np.argmin(d, axis=1)
50 | 
51 |         # plot
52 |         plot_clusters(data, current_nearest, clusters, f"step {step}")
53 |         
54 |         if (last_nearest == current_nearest).all():
55 |             break  # clusters won't change
56 |         for cluster in range(k):
57 |             # update clusters
58 |             clusters[cluster] = dist(data[current_nearest == cluster], axis=0)
59 |         last_nearest = current_nearest
60 |         step += 1
61 | 
62 |     return clusters
63 | 
64 | 
65 | def main():
66 |     x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]
67 |     x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]
68 | 
69 |     x = np.concatenate([x1, x2])
70 |     y = np.concatenate([y1, y2])
71 | 
72 |     plt.scatter(x, y, c='blue')
73 |     plt.title("initial data")
74 |     plt.show()
75 |     plt.close()
76 | 
77 |     clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)
78 |     print(f"k-means fluster: \n {clusters}")
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/others_project/kmeans_anchors/yolo_kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def wh_iou(wh1, wh2):
 5 |     # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
 6 |     wh1 = wh1[:, None]  # [N,1,2]
 7 |     wh2 = wh2[None]  # [1,M,2]
 8 |     inter = np.minimum(wh1, wh2).prod(2)  # [N,M]
 9 |     return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
10 | 
11 | 
12 | def k_means(boxes, k, dist=np.median):
13 |     """
14 |     yolo k-means methods
15 |     refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
16 |     Args:
17 |         boxes: 需要聚类的bboxes
18 |         k: 簇数(聚成几类)
19 |         dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)
20 |     """
21 |     box_number = boxes.shape[0]
22 |     last_nearest = np.zeros((box_number,))
23 |     # np.random.seed(0)  # 固定随机数种子
24 | 
25 |     # init k clusters
26 |     clusters = boxes[np.random.choice(box_number, k, replace=False)]
27 | 
28 |     while True:
29 |         distances = 1 - wh_iou(boxes, clusters)
30 |         current_nearest = np.argmin(distances, axis=1)
31 |         if (last_nearest == current_nearest).all():
32 |             break  # clusters won't change
33 |         for cluster in range(k):
34 |             # update clusters
35 |             clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)
36 | 
37 |         last_nearest = current_nearest
38 | 
39 |     return clusters
40 | 


--------------------------------------------------------------------------------
/others_project/openvinotest/openvino_cls_test/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | 
 4 | image_dir = "/home/w180662/my_project/my_github/data_set/flower_data/train"
 5 | assert os.path.exists(image_dir), "image dir does not exist..."
 6 | 
 7 | img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg"))
 8 | assert len(img_list) > 0, "No images(.jpg) were found in image dir..."
 9 | 
10 | classes_info = os.listdir(image_dir)
11 | classes_info.sort()
12 | classes_dict = {}
13 | 
14 | # create label file
15 | with open("my_labels.txt", "w") as lw:
16 |     # 注意，没有背景时，index要从0开始
17 |     for index, c in enumerate(classes_info, start=0):
18 |         txt = "{}:{}".format(index, c)
19 |         if index != len(classes_info):
20 |             txt += "\n"
21 |         lw.write(txt)
22 |         classes_dict.update({c: str(index)})
23 | print("create my_labels.txt successful...")
24 | 
25 | # create annotation file
26 | with open("my_annotation.txt", "w") as aw:
27 |     for img in img_list:
28 |         img_classes = classes_dict[img.split("/")[-2]]
29 |         txt = "{} {}".format(img, img_classes)
30 |         if index != len(img_list):
31 |             txt += "\n"
32 |         aw.write(txt)
33 | print("create my_annotation.txt successful...")
34 | 


--------------------------------------------------------------------------------
/others_project/readPbFile/README.md:
--------------------------------------------------------------------------------
1 | 该项目用于读取冻结后的pb文件并进行预测  
2 | 使用步骤：   
3 | （1）准备好需要使用的pb冻结文件，pbtxt标签文件，测试用的图片  
4 | （2）修改info.config文件中的相关信息  
5 | 
6 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg)     
7 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg)


--------------------------------------------------------------------------------
/others_project/readPbFile/export/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model.ckpt"
2 | all_model_checkpoint_paths: "model.ckpt"
3 | 


--------------------------------------------------------------------------------
/others_project/readPbFile/pascal_label_map.pbtxt:
--------------------------------------------------------------------------------
  1 | item {
  2 |   id: 1
  3 |   name: 'aeroplane'
  4 | }
  5 | 
  6 | item {
  7 |   id: 2
  8 |   name: 'bicycle'
  9 | }
 10 | 
 11 | item {
 12 |   id: 3
 13 |   name: 'bird'
 14 | }
 15 | 
 16 | item {
 17 |   id: 4
 18 |   name: 'boat'
 19 | }
 20 | 
 21 | item {
 22 |   id: 5
 23 |   name: 'bottle'
 24 | }
 25 | 
 26 | item {
 27 |   id: 6
 28 |   name: 'bus'
 29 | }
 30 | 
 31 | item {
 32 |   id: 7
 33 |   name: 'car'
 34 | }
 35 | 
 36 | item {
 37 |   id: 8
 38 |   name: 'cat'
 39 | }
 40 | 
 41 | item {
 42 |   id: 9
 43 |   name: 'chair'
 44 | }
 45 | 
 46 | item {
 47 |   id: 10
 48 |   name: 'cow'
 49 | }
 50 | 
 51 | item {
 52 |   id: 11
 53 |   name: 'diningtable'
 54 | }
 55 | 
 56 | item {
 57 |   id: 12
 58 |   name: 'dog'
 59 | }
 60 | 
 61 | item {
 62 |   id: 13
 63 |   name: 'horse'
 64 | }
 65 | 
 66 | item {
 67 |   id: 14
 68 |   name: 'motorbike'
 69 | }
 70 | 
 71 | item {
 72 |   id: 15
 73 |   name: 'person'
 74 | }
 75 | 
 76 | item {
 77 |   id: 16
 78 |   name: 'pottedplant'
 79 | }
 80 | 
 81 | item {
 82 |   id: 17
 83 |   name: 'sheep'
 84 | }
 85 | 
 86 | item {
 87 |   id: 18
 88 |   name: 'sofa'
 89 | }
 90 | 
 91 | item {
 92 |   id: 19
 93 |   name: 'train'
 94 | }
 95 | 
 96 | item {
 97 |   id: 20
 98 |   name: 'tvmonitor'
 99 | }
100 | 


--------------------------------------------------------------------------------
/others_project/readPbFile/readPb.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import configparser
 3 | from distutils.version import StrictVersion
 4 | import cv2
 5 | import glob
 6 | from using_function import draw_box, read_pbtxt, get_inAndout_tensor, convert_type, read_image
 7 | 
 8 | if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
 9 |     raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')
10 | 
11 | # 读取参数配置文件
12 | conf = configparser.ConfigParser()
13 | conf.read('info.config')
14 | path_to_frozen_graph = conf.get('tensorflow', 'path_to_frozen_graph')
15 | path_to_labels = conf.get('tensorflow', 'path_to_labels')
16 | path_to_images = conf.get('tensorflow', 'path_to_images')
17 | probability_thresh = float(conf.get('tensorflow', 'probability_thresh'))
18 | 
19 | # 读取pbtxt标签信息
20 | category_index = read_pbtxt(path_to_labels)
21 | 
22 | detection_graph = tf.Graph()
23 | with detection_graph.as_default():
24 |     od_graph_def = tf.GraphDef()
25 |     with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:
26 |         serialized_graph = fid.read()
27 |         od_graph_def.ParseFromString(serialized_graph)
28 |         tf.import_graph_def(od_graph_def, name='')
29 | 
30 | with detection_graph.as_default():
31 |     with tf.Session() as sess:
32 |         # Get handles to input and output tensors
33 |         tensor_dict, image_tensor = get_inAndout_tensor()
34 |         test_image_paths = glob.glob(path_to_images)
35 |         for image_path in test_image_paths:
36 |             image_BGR, image_np_expanded = read_image(image_path)
37 | 
38 |             # Run inference
39 |             output_dict = sess.run(tensor_dict,
40 |                                    feed_dict={image_tensor: image_np_expanded})
41 |             # all outputs are float32 numpy arrays, so convert types as appropriate
42 |             convert_type(output_dict)
43 | 
44 |             draw_box(image_BGR,
45 |                      output_dict['detection_boxes'],
46 |                      output_dict['detection_classes'],
47 |                      output_dict['detection_scores'],
48 |                      category_index,
49 |                      thresh=probability_thresh,
50 |                      line_thickness=5)
51 |             cv2.namedWindow("prediction", cv2.WINDOW_AUTOSIZE)
52 |             cv2.imshow("prediction", image_BGR)
53 |             cv2.waitKey(0)
54 | 


--------------------------------------------------------------------------------
/others_project/readPbFile/test_images/image_info.txt:
--------------------------------------------------------------------------------
1 | 
2 | Image provenance:
3 | image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg
4 | image2.jpg: Michael Miley,
5 |   https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4
6 | 
7 | 


--------------------------------------------------------------------------------
/others_project/textcnnKeras/dataGenerator.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | from sklearn.preprocessing import LabelEncoder
 3 | import random
 4 | 
 5 | 
 6 | def content2idList(content, word2id_dict):
 7 |     """
 8 |     该函数的目的是将文本转换为对应的汉字数字id
 9 |     content：输入的文本
10 |     word2id_dict：用于查找转换的字典
11 |     """
12 |     idList = []
13 |     for word in content:  # 遍历每一个汉字
14 |         if word in word2id_dict:  # 当刚文字在字典中时才进行转换，否则丢弃
15 |             idList.append(word2id_dict[word])
16 |     return idList
17 | 
18 | 
19 | def generatorInfo(batch_size, seq_length, num_classes, file_name):
20 |     """
21 |     batch_size：生成数据的batch size
22 |     seq_length：输入文字序列长度
23 |     num_classes：文本的类别数
24 |     file_name：读取文件的路径
25 |     """
26 |     # 读取词库文件
27 |     with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file:
28 |         vocabulary_list = [k.strip() for k in file.readlines()]
29 |     word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)])
30 | 
31 |     # 读取文本文件
32 |     with open(file_name, encoding='utf-8') as file:
33 |         line_list = [k.strip() for k in file.readlines()]
34 |         data_label_list = []  # 创建数据标签文件
35 |         data_content_list = []  # 创建数据文本文件
36 |         for k in line_list:
37 |             t = k.split(maxsplit=1)
38 |             data_label_list.append(t[0])
39 |             data_content_list.append(t[1])
40 | 
41 |     data_id_list = [content2idList(content, word2id_dict) for content in data_content_list]  # 将文本数据转换拿为数字序列
42 |     # 将list数据类型转换为ndarray数据类型，并按照seq_length长度去统一化文本序列长度，
43 |     # 若长度超过设定值将其截断保留后半部分，若长度不足前面补0
44 |     data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre')
45 |     labelEncoder = LabelEncoder()
46 |     data_y = labelEncoder.fit_transform(data_label_list)  # 将文字标签转为数字标签
47 |     data_Y = keras.utils.to_categorical(data_y, num_classes)  # 将数字标签转为one-hot标签
48 | 
49 |     while True:
50 |         selected_index = random.sample(list(range(len(data_y))), k=batch_size)  # 按照数据集合的长度随机抽取batch_size个数据的index
51 |         batch_X = data_X[selected_index]  # 随机抽取的文本信息（数字化序列）
52 |         batch_Y = data_Y[selected_index]  # 随机抽取的标签信息（one-hot编码）
53 |         yield (batch_X, batch_Y)
54 | 
55 | 


--------------------------------------------------------------------------------
/others_project/textcnnKeras/data_link.txt:
--------------------------------------------------------------------------------
1 | baidupan_url = "https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg"
2 | extract_code = "8cwv"


--------------------------------------------------------------------------------
/others_project/textcnnKeras/main.py:
--------------------------------------------------------------------------------
 1 | from models import text_cnn, simpleNet, text_cnn_V2
 2 | from dataGenerator import generatorInfo
 3 | from tensorflow import keras
 4 | 
 5 | vocab_size = 5000
 6 | seq_length = 600
 7 | embedding_dim = 64
 8 | num_classes = 10
 9 | trainBatchSize = 64
10 | evalBatchSize = 200
11 | steps_per_epoch = 50000 // trainBatchSize
12 | epoch = 2
13 | workers = 4
14 | logdir = './log/'
15 | trainFileName = './cnews/cnews.train.txt'
16 | evalFileName = './cnews/cnews.test.txt'
17 | 
18 | model = text_cnn(seq_length=seq_length,
19 |                  vocab_size=vocab_size,
20 |                  embedding_dim=embedding_dim,
21 |                  num_cla=num_classes,
22 |                  kernelNum=64)
23 | 
24 | trainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName)
25 | evalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName)
26 | 
27 | 
28 | def lrSchedule(epoch):
29 |     lr = keras.backend.get_value(model.optimizer.lr)
30 |     if epoch % 1 == 0 and epoch != 0:
31 |         lr = lr * 0.5
32 |     return lr
33 | 
34 | 
35 | log = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500)
36 | reduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1)
37 | 
38 | model.fit_generator(generator=trainGenerator,
39 |                     steps_per_epoch=steps_per_epoch,
40 |                     epochs=epoch,
41 |                     validation_data=evalGenerator,
42 |                     validation_steps=10,
43 |                     workers=1,
44 |                     callbacks=[log, reduceLr])
45 | model.save_weights(logdir + 'train_weight.h5')
46 | 


--------------------------------------------------------------------------------
/pytorch_classification/ConfusionMatrix/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/README.md:
--------------------------------------------------------------------------------
1 | ## 该文件夹存放使用pytorch实现的代码版本
2 | **model.py**： 是模型文件  
3 | **train.py**： 是调用模型训练的文件    
4 | **predict.py**： 是调用模型进行预测的文件  
5 | **class_indices.json**： 是训练数据集对应的标签文件   
6 | 
7 | ------
8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   
9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  


--------------------------------------------------------------------------------
/pytorch_classification/Test10_regnet/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test10_regnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import create_regnet
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = create_regnet(model_name="RegNetY_400MF", num_classes=5).to(device)
40 |     # load model weights
41 |     model_weight_path = "./weights/model-29.pth"
42 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
43 |     model.eval()
44 |     with torch.no_grad():
45 |         # predict class
46 |         output = torch.squeeze(model(img.to(device))).cpu()
47 |         predict = torch.softmax(output, dim=0)
48 |         predict_cla = torch.argmax(predict).numpy()
49 | 
50 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
51 |                                                  predict[predict_cla].numpy())
52 |     plt.title(print_res)
53 |     for i in range(len(predict)):
54 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
55 |                                                   predict[i].numpy()))
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test11_efficientnetV2/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test11_efficientnetV2/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test11_efficientnetV2/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import efficientnetv2_s as create_model
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     img_size = {"s": [300, 384],  # train_size, val_size
16 |                 "m": [384, 480],
17 |                 "l": [384, 480]}
18 |     num_model = "s"
19 | 
20 |     data_transform = transforms.Compose(
21 |         [transforms.Resize(img_size[num_model][1]),
22 |          transforms.CenterCrop(img_size[num_model][1]),
23 |          transforms.ToTensor(),
24 |          transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
25 | 
26 |     # load image
27 |     img_path = "../tulip.jpg"
28 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
29 |     img = Image.open(img_path)
30 |     plt.imshow(img)
31 |     # [N, C, H, W]
32 |     img = data_transform(img)
33 |     # expand batch dimension
34 |     img = torch.unsqueeze(img, dim=0)
35 | 
36 |     # read class_indict
37 |     json_path = './class_indices.json'
38 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
39 | 
40 |     json_file = open(json_path, "r")
41 |     class_indict = json.load(json_file)
42 | 
43 |     # create model
44 |     model = create_model(num_classes=5).to(device)
45 |     # load model weights
46 |     model_weight_path = "./weights/model-29.pth"
47 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
48 |     model.eval()
49 |     with torch.no_grad():
50 |         # predict class
51 |         output = torch.squeeze(model(img.to(device))).cpu()
52 |         predict = torch.softmax(output, dim=0)
53 |         predict_cla = torch.argmax(predict).numpy()
54 | 
55 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
56 |                                                  predict[predict_cla].numpy())
57 |     plt.title(print_res)
58 |     for i in range(len(predict)):
59 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
60 |                                                   predict[i].numpy()))
61 |     plt.show()
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test1_official_demo/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class LeNet(nn.Module):
 6 |     def __init__(self):
 7 |         super(LeNet, self).__init__()
 8 |         self.conv1 = nn.Conv2d(3, 16, 5)
 9 |         self.pool1 = nn.MaxPool2d(2, 2)
10 |         self.conv2 = nn.Conv2d(16, 32, 5)
11 |         self.pool2 = nn.MaxPool2d(2, 2)
12 |         self.fc1 = nn.Linear(32*5*5, 120)
13 |         self.fc2 = nn.Linear(120, 84)
14 |         self.fc3 = nn.Linear(84, 10)
15 | 
16 |     def forward(self, x):
17 |         x = F.relu(self.conv1(x))    # input(3, 32, 32) output(16, 28, 28)
18 |         x = self.pool1(x)            # output(16, 14, 14)
19 |         x = F.relu(self.conv2(x))    # output(32, 10, 10)
20 |         x = self.pool2(x)            # output(32, 5, 5)
21 |         x = x.view(-1, 32*5*5)       # output(32*5*5)
22 |         x = F.relu(self.fc1(x))      # output(120)
23 |         x = F.relu(self.fc2(x))      # output(84)
24 |         x = self.fc3(x)              # output(10)
25 |         return x
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test1_official_demo/predict.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | from PIL import Image
 4 | 
 5 | from model import LeNet
 6 | 
 7 | 
 8 | def main():
 9 |     transform = transforms.Compose(
10 |         [transforms.Resize((32, 32)),
11 |          transforms.ToTensor(),
12 |          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
13 | 
14 |     classes = ('plane', 'car', 'bird', 'cat',
15 |                'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
16 | 
17 |     net = LeNet()
18 |     net.load_state_dict(torch.load('Lenet.pth'))
19 | 
20 |     im = Image.open('1.jpg')
21 |     im = transform(im)  # [C, H, W]
22 |     im = torch.unsqueeze(im, dim=0)  # [N, C, H, W]
23 | 
24 |     with torch.no_grad():
25 |         outputs = net(im)
26 |         predict = torch.max(outputs, dim=1)[1].data.numpy()
27 |     print(classes[int(predict)])
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     main()
32 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test2_alexnet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test2_alexnet/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | class AlexNet(nn.Module):
 6 |     def __init__(self, num_classes=1000, init_weights=False):
 7 |         super(AlexNet, self).__init__()
 8 |         self.features = nn.Sequential(
 9 |             nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
10 |             nn.ReLU(inplace=True),
11 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
12 |             nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
13 |             nn.ReLU(inplace=True),
14 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
15 |             nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
16 |             nn.ReLU(inplace=True),
17 |             nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
18 |             nn.ReLU(inplace=True),
19 |             nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
20 |             nn.ReLU(inplace=True),
21 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
22 |         )
23 |         self.classifier = nn.Sequential(
24 |             nn.Dropout(p=0.5),
25 |             nn.Linear(128 * 6 * 6, 2048),
26 |             nn.ReLU(inplace=True),
27 |             nn.Dropout(p=0.5),
28 |             nn.Linear(2048, 2048),
29 |             nn.ReLU(inplace=True),
30 |             nn.Linear(2048, num_classes),
31 |         )
32 |         if init_weights:
33 |             self._initialize_weights()
34 | 
35 |     def forward(self, x):
36 |         x = self.features(x)
37 |         x = torch.flatten(x, start_dim=1)
38 |         x = self.classifier(x)
39 |         return x
40 | 
41 |     def _initialize_weights(self):
42 |         for m in self.modules():
43 |             if isinstance(m, nn.Conv2d):
44 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
45 |                 if m.bias is not None:
46 |                     nn.init.constant_(m.bias, 0)
47 |             elif isinstance(m, nn.Linear):
48 |                 nn.init.normal_(m.weight, 0, 0.01)
49 |                 nn.init.constant_(m.bias, 0)
50 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test2_alexnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import AlexNet
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize((224, 224)),
17 |          transforms.ToTensor(),
18 |          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
19 | 
20 |     # load image
21 |     img_path = "../tulip.jpg"
22 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
23 |     img = Image.open(img_path)
24 | 
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = AlexNet(num_classes=5).to(device)
40 | 
41 |     # load model weights
42 |     weights_path = "./AlexNet.pth"
43 |     assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
44 |     model.load_state_dict(torch.load(weights_path))
45 | 
46 |     model.eval()
47 |     with torch.no_grad():
48 |         # predict class
49 |         output = torch.squeeze(model(img.to(device))).cpu()
50 |         predict = torch.softmax(output, dim=0)
51 |         predict_cla = torch.argmax(predict).numpy()
52 | 
53 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
54 |                                                  predict[predict_cla].numpy())
55 |     plt.title(print_res)
56 |     for i in range(len(predict)):
57 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
58 |                                                   predict[i].numpy()))
59 |     plt.show()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test3_vggnet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test3_vggnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import vgg
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize((224, 224)),
17 |          transforms.ToTensor(),
18 |          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
19 | 
20 |     # load image
21 |     img_path = "../tulip.jpg"
22 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
23 |     img = Image.open(img_path)
24 |     plt.imshow(img)
25 |     # [N, C, H, W]
26 |     img = data_transform(img)
27 |     # expand batch dimension
28 |     img = torch.unsqueeze(img, dim=0)
29 | 
30 |     # read class_indict
31 |     json_path = './class_indices.json'
32 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
33 | 
34 |     json_file = open(json_path, "r")
35 |     class_indict = json.load(json_file)
36 |     
37 |     # create model
38 |     model = vgg(model_name="vgg16", num_classes=5).to(device)
39 |     # load model weights
40 |     weights_path = "./vgg16Net.pth"
41 |     assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
42 |     model.load_state_dict(torch.load(weights_path, map_location=device))
43 | 
44 |     model.eval()
45 |     with torch.no_grad():
46 |         # predict class
47 |         output = torch.squeeze(model(img.to(device))).cpu()
48 |         predict = torch.softmax(output, dim=0)
49 |         predict_cla = torch.argmax(predict).numpy()
50 | 
51 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
52 |                                                  predict[predict_cla].numpy())
53 |     plt.title(print_res)
54 |     for i in range(len(predict)):
55 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
56 |                                                   predict[i].numpy()))
57 |     plt.show()
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test4_googlenet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test4_googlenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import GoogLeNet
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize((224, 224)),
17 |          transforms.ToTensor(),
18 |          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
19 | 
20 |     # load image
21 |     img_path = "../tulip.jpg"
22 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
23 |     img = Image.open(img_path)
24 |     plt.imshow(img)
25 |     # [N, C, H, W]
26 |     img = data_transform(img)
27 |     # expand batch dimension
28 |     img = torch.unsqueeze(img, dim=0)
29 | 
30 |     # read class_indict
31 |     json_path = './class_indices.json'
32 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
33 | 
34 |     json_file = open(json_path, "r")
35 |     class_indict = json.load(json_file)
36 | 
37 |     # create model
38 |     model = GoogLeNet(num_classes=5, aux_logits=False).to(device)
39 | 
40 |     # load model weights
41 |     weights_path = "./googleNet.pth"
42 |     assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
43 |     missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device),
44 |                                                           strict=False)
45 | 
46 |     model.eval()
47 |     with torch.no_grad():
48 |         # predict class
49 |         output = torch.squeeze(model(img.to(device))).cpu()
50 |         predict = torch.softmax(output, dim=0)
51 |         predict_cla = torch.argmax(predict).numpy()
52 | 
53 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
54 |                                                  predict[predict_cla].numpy())
55 |     plt.title(print_res)
56 |     for i in range(len(predict)):
57 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
58 |                                                   predict[i].numpy()))
59 |     plt.show()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test5_resnet/README.md:
--------------------------------------------------------------------------------
1 | ## 文件结构：
2 | ```
3 |   ├── model.py: ResNet模型搭建
4 |   ├── train.py: 训练脚本
5 |   ├── predict.py: 单张图像预测脚本
6 |   └── batch_predict.py: 批量图像预测脚本
7 | ```


--------------------------------------------------------------------------------
/pytorch_classification/Test5_resnet/batch_predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | 
 8 | from model import resnet34
 9 | 
10 | 
11 | def main():
12 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
13 | 
14 |     data_transform = transforms.Compose(
15 |         [transforms.Resize(256),
16 |          transforms.CenterCrop(224),
17 |          transforms.ToTensor(),
18 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
19 | 
20 |     # load image
21 |     # 指向需要遍历预测的图像文件夹
22 |     imgs_root = "/data/imgs"
23 |     assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist."
24 |     # 读取指定文件夹下所有jpg图像路径
25 |     img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")]
26 | 
27 |     # read class_indict
28 |     json_path = './class_indices.json'
29 |     assert os.path.exists(json_path), f"file: '{json_path}' dose not exist."
30 | 
31 |     json_file = open(json_path, "r")
32 |     class_indict = json.load(json_file)
33 | 
34 |     # create model
35 |     model = resnet34(num_classes=5).to(device)
36 | 
37 |     # load model weights
38 |     weights_path = "./resNet34.pth"
39 |     assert os.path.exists(weights_path), f"file: '{weights_path}' dose not exist."
40 |     model.load_state_dict(torch.load(weights_path, map_location=device))
41 | 
42 |     # prediction
43 |     model.eval()
44 |     batch_size = 8  # 每次预测时将多少张图片打包成一个batch
45 |     with torch.no_grad():
46 |         for ids in range(0, len(img_path_list) // batch_size):
47 |             img_list = []
48 |             for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:
49 |                 assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
50 |                 img = Image.open(img_path)
51 |                 img = data_transform(img)
52 |                 img_list.append(img)
53 | 
54 |             # batch img
55 |             # 将img_list列表中的所有图像打包成一个batch
56 |             batch_img = torch.stack(img_list, dim=0)
57 |             # predict class
58 |             output = model(batch_img.to(device)).cpu()
59 |             predict = torch.softmax(output, dim=1)
60 |             probs, classes = torch.max(predict, dim=1)
61 | 
62 |             for idx, (pro, cla) in enumerate(zip(probs, classes)):
63 |                 print("image: {}  class: {}  prob: {:.3}".format(img_path_list[ids * batch_size + idx],
64 |                                                                  class_indict[str(cla.numpy())],
65 |                                                                  pro.numpy()))
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test5_resnet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test5_resnet/load_weights.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.nn as nn
 4 | from model import resnet34
 5 | 
 6 | 
 7 | def main():
 8 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 9 | 
10 |     # load pretrain weights
11 |     # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
12 |     model_weight_path = "./resnet34-pre.pth"
13 |     assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
14 | 
15 |     # option1
16 |     net = resnet34()
17 |     net.load_state_dict(torch.load(model_weight_path, map_location=device))
18 |     # change fc layer structure
19 |     in_channel = net.fc.in_features
20 |     net.fc = nn.Linear(in_channel, 5)
21 | 
22 |     # option2
23 |     # net = resnet34(num_classes=5)
24 |     # pre_weights = torch.load(model_weight_path, map_location=device)
25 |     # del_key = []
26 |     # for key, _ in pre_weights.items():
27 |     #     if "fc" in key:
28 |     #         del_key.append(key)
29 |     #
30 |     # for key in del_key:
31 |     #     del pre_weights[key]
32 |     #
33 |     # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False)
34 |     # print("[missing_keys]:", *missing_keys, sep="\n")
35 |     # print("[unexpected_keys]:", *unexpected_keys, sep="\n")
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test5_resnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import resnet34
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = resnet34(num_classes=5).to(device)
40 | 
41 |     # load model weights
42 |     weights_path = "./resNet34.pth"
43 |     assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
44 |     model.load_state_dict(torch.load(weights_path, map_location=device))
45 | 
46 |     # prediction
47 |     model.eval()
48 |     with torch.no_grad():
49 |         # predict class
50 |         output = torch.squeeze(model(img.to(device))).cpu()
51 |         predict = torch.softmax(output, dim=0)
52 |         predict_cla = torch.argmax(predict).numpy()
53 | 
54 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
55 |                                                  predict[predict_cla].numpy())
56 |     plt.title(print_res)
57 |     for i in range(len(predict)):
58 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
59 |                                                   predict[i].numpy()))
60 |     plt.show()
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test6_mobilenet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test6_mobilenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model_v2 import MobileNetV2
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = MobileNetV2(num_classes=5).to(device)
40 |     # load model weights
41 |     model_weight_path = "./MobileNetV2.pth"
42 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
43 |     model.eval()
44 |     with torch.no_grad():
45 |         # predict class
46 |         output = torch.squeeze(model(img.to(device))).cpu()
47 |         predict = torch.softmax(output, dim=0)
48 |         predict_cla = torch.argmax(predict).numpy()
49 | 
50 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
51 |                                                  predict[predict_cla].numpy())
52 |     plt.title(print_res)
53 |     for i in range(len(predict)):
54 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
55 |                                                   predict[i].numpy()))
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test7_shufflenet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/pytorch_classification/Test7_shufflenet/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test7_shufflenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import shufflenet_v2_x1_0
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = shufflenet_v2_x1_0(num_classes=5).to(device)
40 |     # load model weights
41 |     model_weight_path = "./weights/model-29.pth"
42 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
43 |     model.eval()
44 |     with torch.no_grad():
45 |         # predict class
46 |         output = torch.squeeze(model(img.to(device))).cpu()
47 |         predict = torch.softmax(output, dim=0)
48 |         predict_cla = torch.argmax(predict).numpy()
49 | 
50 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
51 |                                                  predict[predict_cla].numpy())
52 |     plt.title(print_res)
53 |     for i in range(len(predict)):
54 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
55 |                                                   predict[i].numpy()))
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test8_densenet/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test8_densenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import densenet121
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = densenet121(num_classes=5).to(device)
40 |     # load model weights
41 |     model_weight_path = "./weights/model-3.pth"
42 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
43 |     model.eval()
44 |     with torch.no_grad():
45 |         # predict class
46 |         output = torch.squeeze(model(img.to(device))).cpu()
47 |         predict = torch.softmax(output, dim=0)
48 |         predict_cla = torch.argmax(predict).numpy()
49 | 
50 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
51 |                                                  predict[predict_cla].numpy())
52 |     plt.title(print_res)
53 |     for i in range(len(predict)):
54 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
55 |                                                   predict[i].numpy()))
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test9_efficientNet/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test9_efficientNet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import efficientnet_b0 as create_model
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     img_size = {"B0": 224,
16 |                 "B1": 240,
17 |                 "B2": 260,
18 |                 "B3": 300,
19 |                 "B4": 380,
20 |                 "B5": 456,
21 |                 "B6": 528,
22 |                 "B7": 600}
23 |     num_model = "B0"
24 | 
25 |     data_transform = transforms.Compose(
26 |         [transforms.Resize(img_size[num_model]),
27 |          transforms.CenterCrop(img_size[num_model]),
28 |          transforms.ToTensor(),
29 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
30 | 
31 |     # load image
32 |     img_path = "../tulip.jpg"
33 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
34 |     img = Image.open(img_path)
35 |     plt.imshow(img)
36 |     # [N, C, H, W]
37 |     img = data_transform(img)
38 |     # expand batch dimension
39 |     img = torch.unsqueeze(img, dim=0)
40 | 
41 |     # read class_indict
42 |     json_path = './class_indices.json'
43 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
44 | 
45 |     json_file = open(json_path, "r")
46 |     class_indict = json.load(json_file)
47 | 
48 |     # create model
49 |     model = create_model(num_classes=5).to(device)
50 |     # load model weights
51 |     model_weight_path = "./weights/model-29.pth"
52 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
53 |     model.eval()
54 |     with torch.no_grad():
55 |         # predict class
56 |         output = torch.squeeze(model(img.to(device))).cpu()
57 |         predict = torch.softmax(output, dim=0)
58 |         predict_cla = torch.argmax(predict).numpy()
59 | 
60 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
61 |                                                  predict[predict_cla].numpy())
62 |     plt.title(print_res)
63 |     for i in range(len(predict)):
64 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
65 |                                                   predict[i].numpy()))
66 |     plt.show()
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/pytorch_classification/Test9_efficientNet/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.5
2 | matplotlib==3.2.1
3 | tqdm==4.56.0
4 | torch>=1.7.1
5 | torchvision>=0.8.2
6 | 


--------------------------------------------------------------------------------
/pytorch_classification/analyze_weights_featuremap/alexnet_model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | class AlexNet(nn.Module):
 6 |     def __init__(self, num_classes=1000, init_weights=False):
 7 |         super(AlexNet, self).__init__()
 8 |         self.features = nn.Sequential(
 9 |             nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
10 |             nn.ReLU(inplace=True),
11 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
12 |             nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
13 |             nn.ReLU(inplace=True),
14 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
15 |             nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
16 |             nn.ReLU(inplace=True),
17 |             nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
18 |             nn.ReLU(inplace=True),
19 |             nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
20 |             nn.ReLU(inplace=True),
21 |             nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
22 |         )
23 |         self.classifier = nn.Sequential(
24 |             nn.Dropout(p=0.5),
25 |             nn.Linear(128 * 6 * 6, 2048),
26 |             nn.ReLU(inplace=True),
27 |             nn.Dropout(p=0.5),
28 |             nn.Linear(2048, 2048),
29 |             nn.ReLU(inplace=True),
30 |             nn.Linear(2048, num_classes),
31 |         )
32 |         if init_weights:
33 |             self._initialize_weights()
34 | 
35 |     def forward(self, x):
36 |         outputs = []
37 |         for name, module in self.features.named_children():
38 |             x = module(x)
39 |             if name in ["0", "3", "6"]:
40 |                 outputs.append(x)
41 | 
42 |         return outputs
43 | 
44 |     def _initialize_weights(self):
45 |         for m in self.modules():
46 |             if isinstance(m, nn.Conv2d):
47 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
48 |                 if m.bias is not None:
49 |                     nn.init.constant_(m.bias, 0)
50 |             elif isinstance(m, nn.Linear):
51 |                 nn.init.normal_(m.weight, 0, 0.01)
52 |                 nn.init.constant_(m.bias, 0)
53 | 


--------------------------------------------------------------------------------
/pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from alexnet_model import AlexNet
 3 | from resnet_model import resnet34
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | from PIL import Image
 7 | from torchvision import transforms
 8 | 
 9 | data_transform = transforms.Compose(
10 |     [transforms.Resize((224, 224)),
11 |      transforms.ToTensor(),
12 |      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
13 | 
14 | # data_transform = transforms.Compose(
15 | #     [transforms.Resize(256),
16 | #      transforms.CenterCrop(224),
17 | #      transforms.ToTensor(),
18 | #      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
19 | 
20 | # create model
21 | model = AlexNet(num_classes=5)
22 | # model = resnet34(num_classes=5)
23 | # load model weights
24 | model_weight_path = "./AlexNet.pth"  # "./resNet34.pth"
25 | model.load_state_dict(torch.load(model_weight_path))
26 | print(model)
27 | 
28 | # load image
29 | img = Image.open("../tulip.jpg")
30 | # [N, C, H, W]
31 | img = data_transform(img)
32 | # expand batch dimension
33 | img = torch.unsqueeze(img, dim=0)
34 | 
35 | # forward
36 | out_put = model(img)
37 | for feature_map in out_put:
38 |     # [N, C, H, W] -> [C, H, W]
39 |     im = np.squeeze(feature_map.detach().numpy())
40 |     # [C, H, W] -> [H, W, C]
41 |     im = np.transpose(im, [1, 2, 0])
42 | 
43 |     # show top 12 feature maps
44 |     plt.figure()
45 |     for i in range(12):
46 |         ax = plt.subplot(3, 4, i+1)
47 |         # [H, W, C]
48 |         plt.imshow(im[:, :, i], cmap='gray')
49 |     plt.show()
50 | 
51 | 


--------------------------------------------------------------------------------
/pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from alexnet_model import AlexNet
 3 | from resnet_model import resnet34
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | 
 7 | 
 8 | # create model
 9 | model = AlexNet(num_classes=5)
10 | # model = resnet34(num_classes=5)
11 | # load model weights
12 | model_weight_path = "./AlexNet.pth"  # "resNet34.pth"
13 | model.load_state_dict(torch.load(model_weight_path))
14 | print(model)
15 | 
16 | weights_keys = model.state_dict().keys()
17 | for key in weights_keys:
18 |     # remove num_batches_tracked para(in bn)
19 |     if "num_batches_tracked" in key:
20 |         continue
21 |     # [kernel_number, kernel_channel, kernel_height, kernel_width]
22 |     weight_t = model.state_dict()[key].numpy()
23 | 
24 |     # read a kernel information
25 |     # k = weight_t[0, :, :, :]
26 | 
27 |     # calculate mean, std, min, max
28 |     weight_mean = weight_t.mean()
29 |     weight_std = weight_t.std(ddof=1)
30 |     weight_min = weight_t.min()
31 |     weight_max = weight_t.max()
32 |     print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean,
33 |                                                                weight_std,
34 |                                                                weight_max,
35 |                                                                weight_min))
36 | 
37 |     # plot hist image
38 |     plt.close()
39 |     weight_vec = np.reshape(weight_t, [-1])
40 |     plt.hist(weight_vec, bins=50)
41 |     plt.title(key)
42 |     plt.show()
43 | 
44 | 


--------------------------------------------------------------------------------
/pytorch_classification/custom_dataset/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from torchvision import transforms
 5 | 
 6 | from my_dataset import MyDataSet
 7 | from utils import read_split_data, plot_data_loader_image
 8 | 
 9 | # http://download.tensorflow.org/example_images/flower_photos.tgz
10 | root = "/home/wz/my_github/data_set/flower_data/flower_photos"  # 数据集所在根目录
11 | 
12 | 
13 | def main():
14 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15 |     print("using {} device.".format(device))
16 | 
17 |     train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root)
18 | 
19 |     data_transform = {
20 |         "train": transforms.Compose([transforms.RandomResizedCrop(224),
21 |                                      transforms.RandomHorizontalFlip(),
22 |                                      transforms.ToTensor(),
23 |                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
24 |         "val": transforms.Compose([transforms.Resize(256),
25 |                                    transforms.CenterCrop(224),
26 |                                    transforms.ToTensor(),
27 |                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
28 | 
29 |     train_data_set = MyDataSet(images_path=train_images_path,
30 |                                images_class=train_images_label,
31 |                                transform=data_transform["train"])
32 | 
33 |     batch_size = 8
34 |     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
35 |     print('Using {} dataloader workers'.format(nw))
36 |     train_loader = torch.utils.data.DataLoader(train_data_set,
37 |                                                batch_size=batch_size,
38 |                                                shuffle=True,
39 |                                                num_workers=nw,
40 |                                                collate_fn=train_data_set.collate_fn)
41 | 
42 |     # plot_data_loader_image(train_loader)
43 | 
44 |     for step, data in enumerate(train_loader):
45 |         images, labels = data
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/pytorch_classification/custom_dataset/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 
39 | 


--------------------------------------------------------------------------------
/pytorch_classification/mini_imagenet/README.md:
--------------------------------------------------------------------------------
 1 | ## download mini-imagenet
 2 | link: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ)  password: hl31
 3 | 
 4 | ## dataset path structure
 5 | ```
 6 | ├── mini-imagenet: total 100 classes, 60000 images
 7 |      ├── images: 60000 images
 8 |      ├── train.csv: 64 classes, 38400 images
 9 |      ├── val.csv: 16 classes, 9600 images
10 |      └── test.csv: 20 classes, 12000 images
11 | ```


--------------------------------------------------------------------------------
/pytorch_classification/mini_imagenet/multi_train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .train_eval_utils import train_one_epoch, evaluate
2 | from .distributed_utils import init_distributed_mode, dist, cleanup
3 | 


--------------------------------------------------------------------------------
/pytorch_classification/mini_imagenet/multi_train_utils/distributed_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | def init_distributed_mode(args):
 8 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
 9 |         args.rank = int(os.environ["RANK"])
10 |         args.world_size = int(os.environ['WORLD_SIZE'])
11 |         args.gpu = int(os.environ['LOCAL_RANK'])
12 |     elif 'SLURM_PROCID' in os.environ:
13 |         args.rank = int(os.environ['SLURM_PROCID'])
14 |         args.gpu = args.rank % torch.cuda.device_count()
15 |     else:
16 |         print('Not using distributed mode')
17 |         args.distributed = False
18 |         return
19 | 
20 |     args.distributed = True
21 | 
22 |     torch.cuda.set_device(args.gpu)
23 |     args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL
24 |     print('| distributed init (rank {}): {}'.format(
25 |         args.rank, args.dist_url), flush=True)
26 |     dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
27 |                             world_size=args.world_size, rank=args.rank)
28 |     dist.barrier()
29 | 
30 | 
31 | def cleanup():
32 |     dist.destroy_process_group()
33 | 
34 | 
35 | def is_dist_avail_and_initialized():
36 |     """检查是否支持分布式环境"""
37 |     if not dist.is_available():
38 |         return False
39 |     if not dist.is_initialized():
40 |         return False
41 |     return True
42 | 
43 | 
44 | def get_world_size():
45 |     if not is_dist_avail_and_initialized():
46 |         return 1
47 |     return dist.get_world_size()
48 | 
49 | 
50 | def get_rank():
51 |     if not is_dist_avail_and_initialized():
52 |         return 0
53 |     return dist.get_rank()
54 | 
55 | 
56 | def is_main_process():
57 |     return get_rank() == 0
58 | 
59 | 
60 | def reduce_value(value, average=True):
61 |     world_size = get_world_size()
62 |     if world_size < 2:  # 单GPU的情况
63 |         return value
64 | 
65 |     with torch.no_grad():
66 |         dist.all_reduce(value)
67 |         if average:
68 |             value /= world_size
69 | 
70 |         return value
71 | 
72 | 
73 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
74 | 
75 |     def f(x):
76 |         """根据step数返回一个学习率倍率因子"""
77 |         if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
78 |             return 1
79 |         alpha = float(x) / warmup_iters
80 |         # 迭代过程中倍率因子从warmup_factor -> 1
81 |         return warmup_factor * (1 - alpha) + alpha
82 | 
83 |     return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)
84 | 


--------------------------------------------------------------------------------
/pytorch_classification/mini_imagenet/my_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from PIL import Image
 4 | import pandas as pd
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | 
 8 | 
 9 | class MyDataSet(Dataset):
10 |     """自定义数据集"""
11 | 
12 |     def __init__(self,
13 |                  root_dir: str,
14 |                  csv_name: str,
15 |                  json_path: str,
16 |                  transform=None):
17 |         images_dir = os.path.join(root_dir, "images")
18 |         assert os.path.exists(images_dir), "dir:'{}' not found.".format(images_dir)
19 | 
20 |         assert os.path.exists(json_path), "file:'{}' not found.".format(json_path)
21 |         self.label_dict = json.load(open(json_path, "r"))
22 | 
23 |         csv_path = os.path.join(root_dir, csv_name)
24 |         assert os.path.exists(csv_path), "file:'{}' not found.".format(csv_path)
25 |         csv_data = pd.read_csv(csv_path)
26 |         self.total_num = csv_data.shape[0]
27 |         self.img_paths = [os.path.join(images_dir, i)for i in csv_data["filename"].values]
28 |         self.img_label = [self.label_dict[i][0] for i in csv_data["label"].values]
29 |         self.labels = set(csv_data["label"].values)
30 | 
31 |         self.transform = transform
32 | 
33 |     def __len__(self):
34 |         return self.total_num
35 | 
36 |     def __getitem__(self, item):
37 |         img = Image.open(self.img_paths[item])
38 |         # RGB为彩色图片，L为灰度图片
39 |         if img.mode != 'RGB':
40 |             raise ValueError("image: {} isn't RGB mode.".format(self.img_paths[item]))
41 |         label = self.img_label[item]
42 | 
43 |         if self.transform is not None:
44 |             img = self.transform(img)
45 | 
46 |         return img, label
47 | 
48 |     @staticmethod
49 |     def collate_fn(batch):
50 |         # 官方实现的default_collate可以参考
51 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
52 |         images, labels = tuple(zip(*batch))
53 | 
54 |         images = torch.stack(images, dim=0)
55 |         labels = torch.as_tensor(labels)
56 |         return images, labels
57 | 


--------------------------------------------------------------------------------
/pytorch_classification/model_complexity/main.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from fvcore.nn import FlopCountAnalysis, parameter_count_table
 3 | from prettytable import PrettyTable
 4 | from model import efficientnetv2_s
 5 | 
 6 | 
 7 | def main():
 8 |     model = efficientnetv2_s()
 9 | 
10 |     # option1
11 |     for name, para in model.named_parameters():
12 |         # 除head外，其他权重全部冻结
13 |         if "head" not in name:
14 |             para.requires_grad_(False)
15 |         else:
16 |             print("training {}".format(name))
17 | 
18 |     complexity = model.complexity(224, 224, 3)
19 |     table = PrettyTable()
20 |     table.field_names = ["params", "freeze-params", "train-params", "FLOPs", "acts"]
21 |     table.add_row([complexity["params"],
22 |                    complexity["freeze"],
23 |                    complexity["params"] - complexity["freeze"],
24 |                    complexity["flops"],
25 |                    complexity["acts"]])
26 |     print(table)
27 | 
28 |     # option2
29 |     tensor = (torch.rand(1, 3, 224, 224),)
30 |     flops = FlopCountAnalysis(model, tensor)
31 |     print(flops.total())
32 | 
33 |     print(parameter_count_table(model))
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/model_complexity/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | these code refers to:
 3 | https://github.com/facebookresearch/pycls/blob/master/pycls/models/blocks.py
 4 | """
 5 | 
 6 | 
 7 | def conv2d_cx(cx, in_c, out_c, k, *, stride=1, groups=1, bias=False, trainable=True):
 8 |     """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts)."""
 9 |     assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
10 |     h, w, c = cx["h"], cx["w"], cx["c"]
11 |     assert c == in_c
12 |     h, w = (h - 1) // stride + 1, (w - 1) // stride + 1
13 |     cx["h"] = h
14 |     cx["w"] = w
15 |     cx["c"] = out_c
16 |     cx["flops"] += k * k * in_c * out_c * h * w // groups + (out_c if bias else 0)
17 |     cx["params"] += k * k * in_c * out_c // groups + (out_c if bias else 0)
18 |     cx["acts"] += out_c * h * w
19 |     if trainable is False:
20 |         cx["freeze"] += k * k * in_c * out_c // groups + (out_c if bias else 0)
21 |     return cx
22 | 
23 | 
24 | def pool2d_cx(cx, in_c, k, *, stride=1):
25 |     """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts)."""
26 |     assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
27 |     h, w, c = cx["h"], cx["w"], cx["c"]
28 |     assert c == in_c
29 |     h, w = (h - 1) // stride + 1, (w - 1) // stride + 1
30 |     cx["h"] = h
31 |     cx["w"] = w
32 |     cx["acts"] += in_c * h * w
33 |     return cx
34 | 
35 | 
36 | def norm2d_cx(cx, in_c, trainable=True):
37 |     """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts)."""
38 |     c, params = cx["c"], cx["params"]
39 |     assert c == in_c
40 |     cx["params"] += 4 * c
41 |     cx["freeze"] += 2 * c  # moving_mean, variance
42 |     if trainable is False:
43 |         cx["freeze"] += 2 * c  # beta, gamma
44 |     return cx
45 | 
46 | 
47 | def gap2d_cx(cx):
48 |     """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts)."""
49 |     cx["h"] = 1
50 |     cx["w"] = 1
51 |     return cx
52 | 
53 | 
54 | def linear_cx(cx, in_units, out_units, *, bias=False, trainable=True):
55 |     """Accumulates complexity of linear into cx = (h, w, flops, params, acts)."""
56 |     c = cx["c"]
57 |     assert c == in_units
58 |     cx["c"] = out_units
59 |     cx["flops"] += in_units * out_units + (out_units if bias else 0)
60 |     cx["params"] += in_units * out_units + (out_units if bias else 0)
61 |     cx["acts"] += out_units
62 |     if trainable is False:
63 |         cx["freeze"] += in_units * out_units + (out_units if bias else 0)
64 |     return cx
65 | 


--------------------------------------------------------------------------------
/pytorch_classification/swin_transformer/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/swin_transformer/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import swin_tiny_patch4_window7_224 as create_model
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     img_size = 224
16 |     data_transform = transforms.Compose(
17 |         [transforms.Resize(int(img_size * 1.14)),
18 |          transforms.CenterCrop(img_size),
19 |          transforms.ToTensor(),
20 |          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
21 | 
22 |     # load image
23 |     img_path = "../tulip.jpg"
24 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
25 |     img = Image.open(img_path)
26 |     plt.imshow(img)
27 |     # [N, C, H, W]
28 |     img = data_transform(img)
29 |     # expand batch dimension
30 |     img = torch.unsqueeze(img, dim=0)
31 | 
32 |     # read class_indict
33 |     json_path = './class_indices.json'
34 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
35 | 
36 |     json_file = open(json_path, "r")
37 |     class_indict = json.load(json_file)
38 | 
39 |     # create model
40 |     model = create_model(num_classes=5).to(device)
41 |     # load model weights
42 |     model_weight_path = "./weights/model-9.pth"
43 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
44 |     model.eval()
45 |     with torch.no_grad():
46 |         # predict class
47 |         output = torch.squeeze(model(img.to(device))).cpu()
48 |         predict = torch.softmax(output, dim=0)
49 |         predict_cla = torch.argmax(predict).numpy()
50 | 
51 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
52 |                                                  predict[predict_cla].numpy())
53 |     plt.title(print_res)
54 |     for i in range(len(predict)):
55 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
56 |                                                   predict[i].numpy()))
57 |     plt.show()
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/pytorch_classification/tensorboard_test/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | from PIL import Image
 3 | import torch
 4 | from torch.utils.data import Dataset
 5 | 
 6 | 
 7 | class MyDataSet(Dataset):
 8 |     """自定义数据集"""
 9 | 
10 |     def __init__(self, images_path: list, images_class: list, transform=None):
11 |         self.images_path = images_path
12 |         self.images_class = images_class
13 |         self.transform = transform
14 | 
15 |         delete_img = []
16 |         for index, img_path in tqdm(enumerate(images_path)):
17 |             img = Image.open(img_path)
18 |             w, h = img.size
19 |             ratio = w / h
20 |             if ratio > 10 or ratio < 0.1:
21 |                 delete_img.append(index)
22 |                 # print(img_path, ratio)
23 | 
24 |         for index in delete_img[::-1]:
25 |             self.images_path.pop(index)
26 |             self.images_class.pop(index)
27 | 
28 |     def __len__(self):
29 |         return len(self.images_path)
30 | 
31 |     def __getitem__(self, item):
32 |         img = Image.open(self.images_path[item])
33 |         # RGB为彩色图片，L为灰度图片
34 |         if img.mode != 'RGB':
35 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
36 |         label = self.images_class[item]
37 | 
38 |         if self.transform is not None:
39 |             img = self.transform(img)
40 | 
41 |         return img, label
42 | 
43 |     @staticmethod
44 |     def collate_fn(batch):
45 |         # 官方实现的default_collate可以参考
46 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
47 |         images, labels = tuple(zip(*batch))
48 | 
49 |         images = torch.stack(images, dim=0)
50 |         labels = torch.as_tensor(labels)
51 |         return images, labels
52 | 


--------------------------------------------------------------------------------
/pytorch_classification/tensorboard_test/requirements.txt:
--------------------------------------------------------------------------------
1 | torchvision==0.7.0
2 | tqdm==4.42.1
3 | matplotlib==3.2.1
4 | torch==1.6.0
5 | Pillow
6 | tensorboard
7 | 


--------------------------------------------------------------------------------
/pytorch_classification/tensorboard_test/train_eval_utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from tqdm import tqdm
 4 | import torch
 5 | 
 6 | 
 7 | def train_one_epoch(model, optimizer, data_loader, device, epoch):
 8 |     model.train()
 9 |     loss_function = torch.nn.CrossEntropyLoss()
10 |     mean_loss = torch.zeros(1).to(device)
11 |     optimizer.zero_grad()
12 | 
13 |     data_loader = tqdm(data_loader)
14 |     for step, data in enumerate(data_loader):
15 |         images, labels = data
16 |         pred = model(images.to(device))
17 | 
18 |         loss = loss_function(pred, labels.to(device))
19 |         loss.backward()
20 |         mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses
21 | 
22 |         # 打印平均loss
23 |         data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))
24 | 
25 |         if not torch.isfinite(loss):
26 |             print('WARNING: non-finite loss, ending training ', loss)
27 |             sys.exit(1)
28 | 
29 |         optimizer.step()
30 |         optimizer.zero_grad()
31 | 
32 |     return mean_loss.item()
33 | 
34 | 
35 | @torch.no_grad()
36 | def evaluate(model, data_loader, device):
37 |     model.eval()
38 | 
39 |     # 用于存储预测正确的样本个数
40 |     sum_num = torch.zeros(1).to(device)
41 |     # 统计验证集样本总数目
42 |     num_samples = len(data_loader.dataset)
43 | 
44 |     # 打印验证进度
45 |     data_loader = tqdm(data_loader, desc="validation...")
46 | 
47 |     for step, data in enumerate(data_loader):
48 |         images, labels = data
49 |         pred = model(images.to(device))
50 |         pred = torch.max(pred, dim=1)[1]
51 |         sum_num += torch.eq(pred, labels.to(device)).sum()
52 | 
53 |     # 计算预测正确的比例
54 |     acc = sum_num.item() / num_samples
55 | 
56 |     return acc
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/README.md:
--------------------------------------------------------------------------------
 1 | ## 多GPU启动指令
 2 | - 如果要使用```train_multi_gpu_using_launch.py```脚本，使用以下指令启动
 3 | - ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py```
 4 | - 其中```nproc_per_node```为并行GPU的数量
 5 | - 如果要指定使用某几块GPU可使用如下指令，例如使用第1块和第4块GPU进行训练：
 6 | - ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py```
 7 | 
 8 | -----
 9 | 
10 | - 如果要使用```train_multi_gpu_using_spawn.py```脚本，使用以下指令启动
11 | - ```python train_multi_gpu_using_spawn.py```
12 | 
13 | ## 训练时间对比
14 | ![training time](training_time.png)
15 | 
16 | ## 是否使用SyncBatchNorm
17 | ![syncbn](syncbn.png)
18 | 
19 | ## 单GPU与多GPU训练曲线
20 | ![accuracy](accuracy.png)
21 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/accuracy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/accuracy.png


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | def init_distributed_mode(args):
 8 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
 9 |         args.rank = int(os.environ["RANK"])
10 |         args.world_size = int(os.environ['WORLD_SIZE'])
11 |         args.gpu = int(os.environ['LOCAL_RANK'])
12 |     elif 'SLURM_PROCID' in os.environ:
13 |         args.rank = int(os.environ['SLURM_PROCID'])
14 |         args.gpu = args.rank % torch.cuda.device_count()
15 |     else:
16 |         print('Not using distributed mode')
17 |         args.distributed = False
18 |         return
19 | 
20 |     args.distributed = True
21 | 
22 |     torch.cuda.set_device(args.gpu)
23 |     args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL
24 |     print('| distributed init (rank {}): {}'.format(
25 |         args.rank, args.dist_url), flush=True)
26 |     dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
27 |                             world_size=args.world_size, rank=args.rank)
28 |     dist.barrier()
29 | 
30 | 
31 | def cleanup():
32 |     dist.destroy_process_group()
33 | 
34 | 
35 | def is_dist_avail_and_initialized():
36 |     """检查是否支持分布式环境"""
37 |     if not dist.is_available():
38 |         return False
39 |     if not dist.is_initialized():
40 |         return False
41 |     return True
42 | 
43 | 
44 | def get_world_size():
45 |     if not is_dist_avail_and_initialized():
46 |         return 1
47 |     return dist.get_world_size()
48 | 
49 | 
50 | def get_rank():
51 |     if not is_dist_avail_and_initialized():
52 |         return 0
53 |     return dist.get_rank()
54 | 
55 | 
56 | def is_main_process():
57 |     return get_rank() == 0
58 | 
59 | 
60 | def reduce_value(value, average=True):
61 |     world_size = get_world_size()
62 |     if world_size < 2:  # 单GPU的情况
63 |         return value
64 | 
65 |     with torch.no_grad():
66 |         dist.all_reduce(value)
67 |         if average:
68 |             value /= world_size
69 | 
70 |         return value
71 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from tqdm import tqdm
 4 | import torch
 5 | 
 6 | from multi_train_utils.distributed_utils import reduce_value, is_main_process
 7 | 
 8 | 
 9 | def train_one_epoch(model, optimizer, data_loader, device, epoch):
10 |     model.train()
11 |     loss_function = torch.nn.CrossEntropyLoss()
12 |     mean_loss = torch.zeros(1).to(device)
13 |     optimizer.zero_grad()
14 | 
15 |     # 在进程0中打印训练进度
16 |     if is_main_process():
17 |         data_loader = tqdm(data_loader)
18 | 
19 |     for step, data in enumerate(data_loader):
20 |         images, labels = data
21 | 
22 |         pred = model(images.to(device))
23 | 
24 |         loss = loss_function(pred, labels.to(device))
25 |         loss.backward()
26 |         loss = reduce_value(loss, average=True)
27 |         mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses
28 | 
29 |         # 在进程0中打印平均loss
30 |         if is_main_process():
31 |             data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))
32 | 
33 |         if not torch.isfinite(loss):
34 |             print('WARNING: non-finite loss, ending training ', loss)
35 |             sys.exit(1)
36 | 
37 |         optimizer.step()
38 |         optimizer.zero_grad()
39 | 
40 |     # 等待所有进程计算完毕
41 |     if device != torch.device("cpu"):
42 |         torch.cuda.synchronize(device)
43 | 
44 |     return mean_loss.item()
45 | 
46 | 
47 | @torch.no_grad()
48 | def evaluate(model, data_loader, device):
49 |     model.eval()
50 | 
51 |     # 用于存储预测正确的样本个数
52 |     sum_num = torch.zeros(1).to(device)
53 | 
54 |     # 在进程0中打印验证进度
55 |     if is_main_process():
56 |         data_loader = tqdm(data_loader)
57 | 
58 |     for step, data in enumerate(data_loader):
59 |         images, labels = data
60 |         pred = model(images.to(device))
61 |         pred = torch.max(pred, dim=1)[1]
62 |         sum_num += torch.eq(pred, labels.to(device)).sum()
63 | 
64 |     # 等待所有进程计算完毕
65 |     if device != torch.device("cpu"):
66 |         torch.cuda.synchronize(device)
67 | 
68 |     sum_num = reduce_value(sum_num, average=False)
69 | 
70 |     return sum_num.item()
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.2.1
2 | tqdm==4.42.1
3 | torchvision==0.7.0
4 | torch==1.6.0
5 | 


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/syncbn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/syncbn.png


--------------------------------------------------------------------------------
/pytorch_classification/train_multi_GPU/training_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_classification/train_multi_GPU/training_time.png


--------------------------------------------------------------------------------
/pytorch_classification/vision_transformer/flops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from fvcore.nn import FlopCountAnalysis
 3 | 
 4 | from vit_model import Attention
 5 | 
 6 | 
 7 | def main():
 8 |     # Self-Attention
 9 |     a1 = Attention(dim=512, num_heads=1)
10 |     a1.proj = torch.nn.Identity()  # remove Wo
11 | 
12 |     # Multi-Head Attention
13 |     a2 = Attention(dim=512, num_heads=8)
14 | 
15 |     # [batch_size, num_tokens, total_embed_dim]
16 |     t = (torch.rand(32, 1024, 512),)
17 | 
18 |     flops1 = FlopCountAnalysis(a1, t)
19 |     print("Self-Attention FLOPs:", flops1.total())
20 | 
21 |     flops2 = FlopCountAnalysis(a2, t)
22 |     print("Multi-Head Attention FLOPs:", flops2.total())
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 
28 | 


--------------------------------------------------------------------------------
/pytorch_classification/vision_transformer/my_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class MyDataSet(Dataset):
 7 |     """自定义数据集"""
 8 | 
 9 |     def __init__(self, images_path: list, images_class: list, transform=None):
10 |         self.images_path = images_path
11 |         self.images_class = images_class
12 |         self.transform = transform
13 | 
14 |     def __len__(self):
15 |         return len(self.images_path)
16 | 
17 |     def __getitem__(self, item):
18 |         img = Image.open(self.images_path[item])
19 |         # RGB为彩色图片，L为灰度图片
20 |         if img.mode != 'RGB':
21 |             raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
22 |         label = self.images_class[item]
23 | 
24 |         if self.transform is not None:
25 |             img = self.transform(img)
26 | 
27 |         return img, label
28 | 
29 |     @staticmethod
30 |     def collate_fn(batch):
31 |         # 官方实现的default_collate可以参考
32 |         # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
33 |         images, labels = tuple(zip(*batch))
34 | 
35 |         images = torch.stack(images, dim=0)
36 |         labels = torch.as_tensor(labels)
37 |         return images, labels
38 | 


--------------------------------------------------------------------------------
/pytorch_classification/vision_transformer/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from vit_model import vit_base_patch16_224_in21k as create_model
10 | 
11 | 
12 | def main():
13 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14 | 
15 |     data_transform = transforms.Compose(
16 |         [transforms.Resize(256),
17 |          transforms.CenterCrop(224),
18 |          transforms.ToTensor(),
19 |          transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
20 | 
21 |     # load image
22 |     img_path = "../tulip.jpg"
23 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
24 |     img = Image.open(img_path)
25 |     plt.imshow(img)
26 |     # [N, C, H, W]
27 |     img = data_transform(img)
28 |     # expand batch dimension
29 |     img = torch.unsqueeze(img, dim=0)
30 | 
31 |     # read class_indict
32 |     json_path = './class_indices.json'
33 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
34 | 
35 |     json_file = open(json_path, "r")
36 |     class_indict = json.load(json_file)
37 | 
38 |     # create model
39 |     model = create_model(num_classes=5, has_logits=False).to(device)
40 |     # load model weights
41 |     model_weight_path = "./weights/model-9.pth"
42 |     model.load_state_dict(torch.load(model_weight_path, map_location=device))
43 |     model.eval()
44 |     with torch.no_grad():
45 |         # predict class
46 |         output = torch.squeeze(model(img.to(device))).cpu()
47 |         predict = torch.softmax(output, dim=0)
48 |         predict_cla = torch.argmax(predict).numpy()
49 | 
50 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
51 |                                                  predict[predict_cla].numpy())
52 |     plt.title(print_res)
53 |     for i in range(len(predict)):
54 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
55 |                                                   predict[i].numpy()))
56 |     plt.show()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet50_fpn_model import resnet50_fpn_backbone
2 | from .mobilenetv2_model import MobileNetV2
3 | from .vgg_model import vgg
4 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/fasterRCNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/faster_rcnn/fasterRCNN.png


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/network_files/__init__.py:
--------------------------------------------------------------------------------
1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor
2 | from .rpn_function import AnchorsGenerator
3 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/network_files/image_list.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from torch import Tensor
 3 | 
 4 | 
 5 | class ImageList(object):
 6 |     """
 7 |     Structure that holds a list of images (of possibly
 8 |     varying sizes) as a single tensor.
 9 |     This works by padding the images to the same size,
10 |     and storing in a field the original sizes of each image
11 |     """
12 | 
13 |     def __init__(self, tensors, image_sizes):
14 |         # type: (Tensor, List[Tuple[int, int]]) -> None
15 |         """
16 |         Arguments:
17 |             tensors (tensor) padding后的图像数据
18 |             image_sizes (list[tuple[int, int]])  padding前的图像尺寸
19 |         """
20 |         self.tensors = tensors
21 |         self.image_sizes = image_sizes
22 | 
23 |     def to(self, device):
24 |         # type: (Device) -> ImageList # noqa
25 |         cast_tensor = self.tensors.to(device)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/plot_curve.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_loss_and_lr(train_loss, learning_rate):
 6 |     try:
 7 |         x = list(range(len(train_loss)))
 8 |         fig, ax1 = plt.subplots(1, 1)
 9 |         ax1.plot(x, train_loss, 'r', label='loss')
10 |         ax1.set_xlabel("step")
11 |         ax1.set_ylabel("loss")
12 |         ax1.set_title("Train Loss and lr")
13 |         plt.legend(loc='best')
14 | 
15 |         ax2 = ax1.twinx()
16 |         ax2.plot(x, learning_rate, label='lr')
17 |         ax2.set_ylabel("learning rate")
18 |         ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
19 |         plt.legend(loc='best')
20 | 
21 |         handles1, labels1 = ax1.get_legend_handles_labels()
22 |         handles2, labels2 = ax2.get_legend_handles_labels()
23 |         plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')
24 | 
25 |         fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
26 |         fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
27 |         plt.close()
28 |         print("successful save loss curve! ")
29 |     except Exception as e:
30 |         print(e)
31 | 
32 | 
33 | def plot_map(mAP):
34 |     try:
35 |         x = list(range(len(mAP)))
36 |         plt.plot(x, mAP, label='mAp')
37 |         plt.xlabel('epoch')
38 |         plt.ylabel('mAP')
39 |         plt.title('Eval mAP')
40 |         plt.xlim(0, len(mAP))
41 |         plt.legend(loc='best')
42 |         plt.savefig('./mAP.png')
43 |         plt.close()
44 |         print("successful save mAP curve!")
45 |     except Exception as e:
46 |         print(e)
47 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/record_mAP.txt:
--------------------------------------------------------------------------------
 1 | COCO results:
 2 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.526
 3 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.804
 4 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.586
 5 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211
 6 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403
 7 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580
 8 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.454
 9 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.639
10 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.646
11 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347
12 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540
13 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693
14 | 
15 | mAP(IoU=0.5) for each category:
16 |  aeroplane      : 0.8759546352558178
17 |  bicycle        : 0.8554609242543677
18 |  bird           : 0.8434943725365999
19 |  boat           : 0.6753024837855667
20 |  bottle         : 0.7185899054232459
21 |  bus            : 0.8691082170432654
22 |  car            : 0.8771002682431779
23 |  cat            : 0.9169138943375639
24 |  chair          : 0.6403466317122392
25 |  cow            : 0.8285552434280278
26 |  diningtable    : 0.6437938565684241
27 |  dog            : 0.8745793980119227
28 |  horse          : 0.8718238708874728
29 |  motorbike      : 0.8910672301923952
30 |  person         : 0.9047338725598096
31 |  pottedplant    : 0.5808810399193133
32 |  sheep          : 0.86045368568359
33 |  sofa           : 0.7239390963388067
34 |  train          : 0.8652277764020805
35 |  tvmonitor      : 0.7683550206571649


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | matplotlib
3 | numpy==1.17.0
4 | tqdm==4.42.1
5 | torch==1.6.0
6 | torchvision==0.7.0
7 | pycocotools
8 | Pillow
9 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/split_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | 
 5 | def main():
 6 |     random.seed(0)  # 设置随机种子，保证随机结果可复现
 7 | 
 8 |     files_path = "./VOCdevkit/VOC2012/Annotations"
 9 |     assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path)
10 | 
11 |     val_rate = 0.5
12 | 
13 |     files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)])
14 |     files_num = len(files_name)
15 |     val_index = random.sample(range(0, files_num), k=int(files_num*val_rate))
16 |     train_files = []
17 |     val_files = []
18 |     for index, file_name in enumerate(files_name):
19 |         if index in val_index:
20 |             val_files.append(file_name)
21 |         else:
22 |             train_files.append(file_name)
23 | 
24 |     try:
25 |         train_f = open("train.txt", "x")
26 |         eval_f = open("val.txt", "x")
27 |         train_f.write("\n".join(train_files))
28 |         eval_f.write("\n".join(val_files))
29 |     except FileExistsError as e:
30 |         print(e)
31 |         exit(1)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | from .coco_utils import get_coco_api_from_dataset
4 | from .coco_eval import CocoEvaluator
5 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torch.utils.data
 4 | from pycocotools.coco import COCO
 5 | 
 6 | 
 7 | def convert_to_coco_api(ds):
 8 |     coco_ds = COCO()
 9 |     # annotation IDs need to start at 1, not 0
10 |     ann_id = 1
11 |     dataset = {'images': [], 'categories': [], 'annotations': []}
12 |     categories = set()
13 |     for img_idx in range(len(ds)):
14 |         # find better way to get target
15 |         hw, targets = ds.coco_index(img_idx)
16 |         image_id = targets["image_id"].item()
17 |         img_dict = {}
18 |         img_dict['id'] = image_id
19 |         img_dict['height'] = hw[0]
20 |         img_dict['width'] = hw[1]
21 |         dataset['images'].append(img_dict)
22 |         bboxes = targets["boxes"]
23 |         bboxes[:, 2:] -= bboxes[:, :2]
24 |         bboxes = bboxes.tolist()
25 |         labels = targets['labels'].tolist()
26 |         areas = targets['area'].tolist()
27 |         iscrowd = targets['iscrowd'].tolist()
28 |         num_objs = len(bboxes)
29 |         for i in range(num_objs):
30 |             ann = {}
31 |             ann['image_id'] = image_id
32 |             ann['bbox'] = bboxes[i]
33 |             ann['category_id'] = labels[i]
34 |             categories.add(labels[i])
35 |             ann['area'] = areas[i]
36 |             ann['iscrowd'] = iscrowd[i]
37 |             ann['id'] = ann_id
38 |             dataset['annotations'].append(ann)
39 |             ann_id += 1
40 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
41 |     coco_ds.dataset = dataset
42 |     coco_ds.createIndex()
43 |     return coco_ds
44 | 
45 | 
46 | def get_coco_api_from_dataset(dataset):
47 |     for _ in range(10):
48 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
49 |             break
50 |         if isinstance(dataset, torch.utils.data.Subset):
51 |             dataset = dataset.dataset
52 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
53 |         return dataset.coco
54 |     return convert_to_coco_api(dataset)
55 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/faster_rcnn/transforms.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from torchvision.transforms import functional as F
 3 | 
 4 | 
 5 | class Compose(object):
 6 |     """组合多个transform函数"""
 7 |     def __init__(self, transforms):
 8 |         self.transforms = transforms
 9 | 
10 |     def __call__(self, image, target):
11 |         for t in self.transforms:
12 |             image, target = t(image, target)
13 |         return image, target
14 | 
15 | 
16 | class ToTensor(object):
17 |     """将PIL图像转为Tensor"""
18 |     def __call__(self, image, target):
19 |         image = F.to_tensor(image)
20 |         return image, target
21 | 
22 | 
23 | class RandomHorizontalFlip(object):
24 |     """随机水平翻转图像以及bboxes"""
25 |     def __init__(self, prob=0.5):
26 |         self.prob = prob
27 | 
28 |     def __call__(self, image, target):
29 |         if random.random() < self.prob:
30 |             height, width = image.shape[-2:]
31 |             image = image.flip(-1)  # 水平翻转图片
32 |             bbox = target["boxes"]
33 |             # bbox: xmin, ymin, xmax, ymax
34 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
35 |             target["boxes"] = bbox
36 |         return image, target
37 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool
2 | from .resnet50_fpn_model import resnet50_fpn_backbone
3 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/network_files/__init__.py:
--------------------------------------------------------------------------------
1 | from .retinanet import RetinaNet
2 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/network_files/image_list.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from torch import Tensor
 3 | 
 4 | 
 5 | class ImageList(object):
 6 |     """
 7 |     Structure that holds a list of images (of possibly
 8 |     varying sizes) as a single tensor.
 9 |     This works by padding the images to the same size,
10 |     and storing in a field the original sizes of each image
11 |     """
12 | 
13 |     def __init__(self, tensors, image_sizes):
14 |         # type: (Tensor, List[Tuple[int, int]]) -> None
15 |         """
16 |         Arguments:
17 |             tensors (tensor) padding后的图像数据
18 |             image_sizes (list[tuple[int, int]])  padding前的图像尺寸
19 |         """
20 |         self.tensors = tensors
21 |         self.image_sizes = image_sizes
22 | 
23 |     def to(self, device):
24 |         # type: (Device) -> ImageList # noqa
25 |         cast_tensor = self.tensors.to(device)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/network_files/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def sigmoid_focal_loss(
 6 |     inputs: torch.Tensor,
 7 |     targets: torch.Tensor,
 8 |     alpha: float = 0.25,
 9 |     gamma: float = 2,
10 |     reduction: str = "none",
11 | ):
12 |     """
13 |     Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py .
14 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
15 | 
16 |     Args:
17 |         inputs: A float tensor of arbitrary shape.
18 |                 The predictions for each example.
19 |         targets: A float tensor with the same shape as inputs. Stores the binary
20 |                 classification label for each element in inputs
21 |                 (0 for the negative class and 1 for the positive class).
22 |         alpha: (optional) Weighting factor in range (0,1) to balance
23 |                 positive vs negative examples or -1 for ignore. Default = 0.25
24 |         gamma: Exponent of the modulating factor (1 - p_t) to
25 |                balance easy vs hard examples.
26 |         reduction: 'none' | 'mean' | 'sum'
27 |                  'none': No reduction will be applied to the output.
28 |                  'mean': The output will be averaged.
29 |                  'sum': The output will be summed.
30 |     Returns:
31 |         Loss tensor with the reduction option applied.
32 |     """
33 |     p = torch.sigmoid(inputs)
34 |     ce_loss = F.binary_cross_entropy_with_logits(
35 |         inputs, targets, reduction="none"
36 |     )
37 |     p_t = p * targets + (1 - p) * (1 - targets)
38 |     loss = ce_loss * ((1 - p_t) ** gamma)
39 | 
40 |     if alpha >= 0:
41 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
42 |         loss = alpha_t * loss
43 | 
44 |     if reduction == "mean":
45 |         loss = loss.mean()
46 |     elif reduction == "sum":
47 |         loss = loss.sum()
48 | 
49 |     return loss
50 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 0,
 3 |     "bicycle": 1,
 4 |     "bird": 2,
 5 |     "boat": 3,
 6 |     "bottle": 4,
 7 |     "bus": 5,
 8 |     "car": 6,
 9 |     "cat": 7,
10 |     "chair": 8,
11 |     "cow": 9,
12 |     "diningtable": 10,
13 |     "dog": 11,
14 |     "horse": 12,
15 |     "motorbike": 13,
16 |     "person": 14,
17 |     "pottedplant": 15,
18 |     "sheep": 16,
19 |     "sofa": 17,
20 |     "train": 18,
21 |     "tvmonitor": 19
22 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/plot_curve.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_loss_and_lr(train_loss, learning_rate):
 6 |     try:
 7 |         x = list(range(len(train_loss)))
 8 |         fig, ax1 = plt.subplots(1, 1)
 9 |         ax1.plot(x, train_loss, 'r', label='loss')
10 |         ax1.set_xlabel("step")
11 |         ax1.set_ylabel("loss")
12 |         ax1.set_title("Train Loss and lr")
13 |         plt.legend(loc='best')
14 | 
15 |         ax2 = ax1.twinx()
16 |         ax2.plot(x, learning_rate, label='lr')
17 |         ax2.set_ylabel("learning rate")
18 |         ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
19 |         plt.legend(loc='best')
20 | 
21 |         handles1, labels1 = ax1.get_legend_handles_labels()
22 |         handles2, labels2 = ax2.get_legend_handles_labels()
23 |         plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')
24 | 
25 |         fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
26 |         fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
27 |         plt.close()
28 |         print("successful save loss curve! ")
29 |     except Exception as e:
30 |         print(e)
31 | 
32 | 
33 | def plot_map(mAP):
34 |     try:
35 |         x = list(range(len(mAP)))
36 |         plt.plot(x, mAP, label='mAp')
37 |         plt.xlabel('epoch')
38 |         plt.ylabel('mAP')
39 |         plt.title('Eval mAP')
40 |         plt.xlim(0, len(mAP))
41 |         plt.legend(loc='best')
42 |         plt.savefig('./mAP.png')
43 |         plt.close()
44 |         print("successful save mAP curve!")
45 |     except Exception as e:
46 |         print(e)
47 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | matplotlib
3 | numpy==1.17.0
4 | tqdm==4.42.1
5 | torch==1.7.1
6 | torchvision==0.8.2
7 | pycocotools
8 | Pillow
9 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/results20210421-142632.txt:
--------------------------------------------------------------------------------
 1 | epoch:0 0.4012  0.6088  0.4334  0.1691  0.3113  0.4498  0.4265  0.6233  0.6478  0.3362  0.5541  0.6977  1.0681  0.01
 2 | epoch:1 0.5028  0.7295  0.5441  0.2219  0.3913  0.5552  0.4624  0.6649  0.6875  0.4039  0.5928  0.7346  0.5422  0.01
 3 | epoch:2 0.5311  0.7614  0.5784  0.2439  0.4189  0.5852  0.4733  0.6774  0.698  0.417  0.6105  0.7441  0.4456  0.01
 4 | epoch:3 0.5439  0.7762  0.595  0.2412  0.4292  0.5996  0.4773  0.6835  0.7021  0.4137  0.6074  0.7494  0.3872  0.01
 5 | epoch:4 0.5404  0.7739  0.5949  0.2457  0.426  0.5968  0.4723  0.6818  0.7007  0.4363  0.6047  0.7479  0.347  0.01
 6 | epoch:5 0.5513  0.7867  0.6021  0.2415  0.4265  0.6087  0.4811  0.685  0.7041  0.4073  0.6088  0.7526  0.3166  0.01
 7 | epoch:6 0.5508  0.7909  0.6014  0.2327  0.4211  0.6116  0.478  0.6803  0.699  0.4081  0.5994  0.7485  0.2884  0.01
 8 | epoch:7 0.5617  0.7972  0.6142  0.2431  0.427  0.6223  0.4848  0.6862  0.7049  0.4184  0.6018  0.7551  0.2546  0.001
 9 | epoch:8 0.561  0.7986  0.6117  0.2342  0.4268  0.6223  0.4842  0.6855  0.705  0.4153  0.6051  0.7551  0.2462  0.001
10 | epoch:9 0.563  0.7983  0.6153  0.2359  0.4336  0.6237  0.4849  0.6884  0.7068  0.4103  0.6063  0.7574  0.2428  0.001
11 | epoch:10 0.563  0.7991  0.6167  0.2363  0.4334  0.6234  0.4854  0.6879  0.7062  0.4152  0.6063  0.7558  0.2391  0.001
12 | epoch:11 0.5637  0.7984  0.6145  0.2341  0.4345  0.6241  0.4842  0.6894  0.7083  0.4136  0.6074  0.7581  0.2355  0.001
13 | epoch:12 0.5624  0.7969  0.6155  0.2373  0.4292  0.623  0.4853  0.6866  0.7055  0.4136  0.6026  0.756  0.2323  0.0001
14 | epoch:13 0.5632  0.7985  0.6155  0.2358  0.4342  0.6243  0.4858  0.6878  0.7065  0.4206  0.6039  0.7576  0.2307  0.0001
15 | epoch:14 0.562  0.7977  0.6155  0.2309  0.4291  0.6234  0.4849  0.6869  0.7051  0.4198  0.6023  0.7558  0.2305  0.0001
16 | epoch:15 0.5631  0.7984  0.6155  0.2324  0.4326  0.6238  0.4849  0.6876  0.706  0.4151  0.6039  0.7565  0.2313  0.0001
17 | epoch:16 0.5632  0.7992  0.6164  0.2349  0.429  0.6245  0.4859  0.6871  0.7063  0.4186  0.604  0.7569  0.2302  0.0001
18 | epoch:17 0.5637  0.7994  0.6164  0.2325  0.4312  0.6245  0.4854  0.6873  0.706  0.4109  0.6023  0.7567  0.2312  0.0001
19 | epoch:18 0.5626  0.7984  0.6132  0.2333  0.431  0.6238  0.4854  0.6873  0.7056  0.4158  0.6025  0.7564  0.2298  0.0001
20 | epoch:19 0.5613  0.7981  0.612  0.2365  0.4278  0.622  0.4855  0.6867  0.7047  0.4112  0.6  0.7554  0.2305  0.0001
21 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | from .coco_utils import get_coco_api_from_dataset
4 | from .coco_eval import CocoEvaluator
5 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/train_utils/coco_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torch.utils.data
 4 | from pycocotools.coco import COCO
 5 | 
 6 | 
 7 | def convert_to_coco_api(ds):
 8 |     coco_ds = COCO()
 9 |     # annotation IDs need to start at 1, not 0
10 |     ann_id = 1
11 |     dataset = {'images': [], 'categories': [], 'annotations': []}
12 |     categories = set()
13 |     for img_idx in range(len(ds)):
14 |         # find better way to get target
15 |         hw, targets = ds.coco_index(img_idx)
16 |         image_id = targets["image_id"].item()
17 |         img_dict = {}
18 |         img_dict['id'] = image_id
19 |         img_dict['height'] = hw[0]
20 |         img_dict['width'] = hw[1]
21 |         dataset['images'].append(img_dict)
22 |         bboxes = targets["boxes"]
23 |         bboxes[:, 2:] -= bboxes[:, :2]
24 |         bboxes = bboxes.tolist()
25 |         labels = targets['labels'].tolist()
26 |         areas = targets['area'].tolist()
27 |         iscrowd = targets['iscrowd'].tolist()
28 |         num_objs = len(bboxes)
29 |         for i in range(num_objs):
30 |             ann = {}
31 |             ann['image_id'] = image_id
32 |             ann['bbox'] = bboxes[i]
33 |             ann['category_id'] = labels[i]
34 |             categories.add(labels[i])
35 |             ann['area'] = areas[i]
36 |             ann['iscrowd'] = iscrowd[i]
37 |             ann['id'] = ann_id
38 |             dataset['annotations'].append(ann)
39 |             ann_id += 1
40 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
41 |     coco_ds.dataset = dataset
42 |     coco_ds.createIndex()
43 |     return coco_ds
44 | 
45 | 
46 | def get_coco_api_from_dataset(dataset):
47 |     for _ in range(10):
48 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
49 |             break
50 |         if isinstance(dataset, torch.utils.data.Subset):
51 |             dataset = dataset.dataset
52 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
53 |         return dataset.coco
54 |     return convert_to_coco_api(dataset)
55 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/retinaNet/transforms.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from torchvision.transforms import functional as F
 3 | 
 4 | 
 5 | class Compose(object):
 6 |     """组合多个transform函数"""
 7 |     def __init__(self, transforms):
 8 |         self.transforms = transforms
 9 | 
10 |     def __call__(self, image, target):
11 |         for t in self.transforms:
12 |             image, target = t(image, target)
13 |         return image, target
14 | 
15 | 
16 | class ToTensor(object):
17 |     """将PIL图像转为Tensor"""
18 |     def __call__(self, image, target):
19 |         image = F.to_tensor(image)
20 |         return image, target
21 | 
22 | 
23 | class RandomHorizontalFlip(object):
24 |     """随机水平翻转图像以及bboxes"""
25 |     def __init__(self, prob=0.5):
26 |         self.prob = prob
27 | 
28 |     def __call__(self, image, target):
29 |         if random.random() < self.prob:
30 |             height, width = image.shape[-2:]
31 |             image = image.flip(-1)  # 水平翻转图片
32 |             bbox = target["boxes"]
33 |             # bbox: xmin, ymin, xmax, ymax
34 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
35 |             target["boxes"] = bbox
36 |         return image, target
37 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/README.md:
--------------------------------------------------------------------------------
 1 | # SSD: Single Shot MultiBox Detector
 2 | 
 3 | ## 环境配置：
 4 | * Python 3.6/3.7/3.8
 5 | * Pytorch 1.6
 6 | * pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs))
 7 | * Ubuntu或Centos(不建议Windows)
 8 | * 最好使用GPU训练
 9 | 
10 | ## 文件结构：
11 | ```
12 | ├── src: 实现SSD模型的相关模块    
13 | │     ├── resnet50_backbone.py   使用resnet50网络作为SSD的backbone  
14 | │     ├── ssd_model.py           SSD网络结构文件 
15 | │     └── utils.py               训练过程中使用到的一些功能实现
16 | ├── train_utils: 训练验证相关模块（包括cocotools）  
17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集    
18 | ├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练    
19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用    
20 | ├── predict_test.py: 简易的预测脚本，使用训练好的权重进行预测测试    
21 | ├── pascal_voc_classes.json: pascal_voc标签文件    
22 | ├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP
23 | └── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
24 | ```
25 | 
26 | ## 预训练权重下载地址（下载后放入src文件夹中）：
27 | * ResNet50+SSD: https://ngc.nvidia.com/catalog/models  
28 |  `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件`
29 | * 如果找不到可通过百度网盘下载，链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj 
30 | 
31 | ## 数据集，本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中)
32 | * Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
33 | * Pascal VOC2007 test数据集请参考：http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
34 | * 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK
35 | 
36 | ## 训练方法
37 | * 确保提前准备好数据集
38 | * 确保提前下载好对应预训练模型权重
39 | * 单GPU训练或CPU，直接使用train_ssd300.py训练脚本
40 | * 若要使用多GPU训练，使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量
41 | 
42 | ## 如果对SSD算法原理不是很理解可参考我的bilibili
43 | * https://www.bilibili.com/video/BV1fT4y1L7Gi
44 | 
45 | ## 进一步了解该项目，以及对SSD算法代码的分析可参考我的bilibili
46 | * https://www.bilibili.com/video/BV1vK411H771/
47 | 
48 | ## Resnet50 + SSD算法框架图
49 | ![Resnet50 SSD](res50_ssd.png) 
50 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/plot_curve.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_loss_and_lr(train_loss, learning_rate):
 6 |     try:
 7 |         x = list(range(len(train_loss)))
 8 |         fig, ax1 = plt.subplots(1, 1)
 9 |         ax1.plot(x, train_loss, 'r', label='loss')
10 |         ax1.set_xlabel("epoch")
11 |         ax1.set_ylabel("loss")
12 |         ax1.set_title("Train Loss and lr")
13 |         plt.legend(loc='best')
14 | 
15 |         ax2 = ax1.twinx()
16 |         ax2.plot(x, learning_rate, label='lr')
17 |         ax2.set_ylabel("learning rate")
18 |         ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
19 |         plt.legend(loc='best')
20 | 
21 |         handles1, labels1 = ax1.get_legend_handles_labels()
22 |         handles2, labels2 = ax2.get_legend_handles_labels()
23 |         plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')
24 | 
25 |         fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
26 |         fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
27 |         plt.close()
28 |         print("successful save loss curve! ")
29 |     except Exception as e:
30 |         print(e)
31 | 
32 | 
33 | def plot_map(mAP):
34 |     try:
35 |         x = list(range(len(mAP)))
36 |         plt.plot(x, mAP, label='mAp')
37 |         plt.xlabel('epoch')
38 |         plt.ylabel('mAP')
39 |         plt.title('Eval mAP')
40 |         plt.xlim(0, len(mAP))
41 |         plt.legend(loc='best')
42 |         plt.savefig('./mAP.png')
43 |         plt.close()
44 |         print("successful save mAP curve!")
45 |     except Exception as e:
46 |         print(e)
47 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/record_mAP.txt:
--------------------------------------------------------------------------------
 1 | COCO results:
 2 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.448
 3 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.721
 4 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.482
 5 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099
 6 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280
 7 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521
 8 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.418
 9 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565
10 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573
11 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166
12 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419
13 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641
14 | 
15 | mAP(IoU=0.5) for each category:
16 |  aeroplane      : 0.8532360243584314
17 |  bicycle        : 0.7496603797780927
18 |  bird           : 0.7658478672087958
19 |  boat           : 0.6079142920471263
20 |  bottle         : 0.4986565020053691
21 |  bus            : 0.8229568428349553
22 |  car            : 0.7940868387465018
23 |  cat            : 0.8800145761338203
24 |  chair          : 0.5090524550010037
25 |  cow            : 0.7344958411899583
26 |  diningtable    : 0.5379541883401677
27 |  dog            : 0.8230037525430133
28 |  horse          : 0.7880475852689804
29 |  motorbike      : 0.7879788462924051
30 |  person         : 0.8351553291238482
31 |  pottedplant    : 0.4420858247895347
32 |  sheep          : 0.7466344247593008
33 |  sofa           : 0.6627392793997164
34 |  train          : 0.8380502070312741
35 |  tvmonitor      : 0.7445168617489237


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.17.0
2 | matplotlib
3 | tqdm==4.42.1
4 | pycocotools
5 | torch==1.6.0
6 | torchvision==0.7.0
7 | lxml
8 | Pillow
9 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/res50_ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/ssd/res50_ssd.png


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .res50_backbone import resnet50
2 | from .ssd_model import SSD300, Backbone
3 | from .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess
4 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_utils import get_coco_api_from_dataset
2 | from .coco_eval import CocoEvaluator
3 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
4 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
5 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/ssd/train_utils/coco_utils.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | 
 3 | import torch
 4 | import torchvision
 5 | import torch.utils.data
 6 | from pycocotools.coco import COCO
 7 | 
 8 | 
 9 | def convert_to_coco_api(ds):
10 |     coco_ds = COCO()
11 |     # annotation IDs need to start at 1, not 0
12 |     ann_id = 1
13 |     dataset = {'images': [], 'categories': [], 'annotations': []}
14 |     categories = set()
15 |     for img_idx in range(len(ds)):
16 |         # find better way to get target
17 |         targets = ds.coco_index(img_idx)
18 |         image_id = targets["image_id"].item()
19 |         img_dict = {}
20 |         img_dict['id'] = image_id
21 |         # img_dict['height'] = img.shape[-2]
22 |         # img_dict['width'] = img.shape[-1]
23 |         img_dict['height'] = targets["height_width"][0]
24 |         img_dict['width'] = targets["height_width"][1]
25 |         dataset['images'].append(img_dict)
26 | 
27 |         # xmin, ymin, xmax, ymax
28 |         bboxes = targets["boxes"]
29 | 
30 |         # (xmin, ymin, xmax, ymax) to (xmin, ymin, w, h)
31 |         bboxes[:, 2:] -= bboxes[:, :2]
32 |         # 将box的相对坐标信息（0-1）转为绝对值坐标
33 |         bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_dict["width"]
34 |         bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_dict["height"]
35 |         bboxes = bboxes.tolist()
36 |         labels = targets['labels'].tolist()
37 |         # 注意这里的boxes area也要进行转换，否则导致(small, medium, large)计算错误
38 |         areas = (targets['area'] * img_dict["width"] * img_dict["height"]).tolist()
39 |         iscrowd = targets['iscrowd'].tolist()
40 |         num_objs = len(bboxes)
41 |         for i in range(num_objs):
42 |             ann = {}
43 |             ann['image_id'] = image_id
44 |             ann['bbox'] = bboxes[i]
45 |             ann['category_id'] = labels[i]
46 |             categories.add(labels[i])
47 |             ann['area'] = areas[i]
48 |             ann['iscrowd'] = iscrowd[i]
49 |             ann['id'] = ann_id
50 |             dataset['annotations'].append(ann)
51 |             ann_id += 1
52 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
53 |     coco_ds.dataset = dataset
54 |     coco_ds.createIndex()
55 |     return coco_ds
56 | 
57 | 
58 | def get_coco_api_from_dataset(dataset):
59 |     for _ in range(10):
60 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
61 |             break
62 |         if isinstance(dataset, torch.utils.data.Subset):
63 |             dataset = dataset.dataset
64 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
65 |         return dataset.coco
66 |     return convert_to_coco_api(dataset)
67 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet50_fpn_model import resnet50_fpn_backbone
2 | from .mobilenetv2_model import MobileNetV2
3 | from .vgg_model import vgg
4 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/coco80_indices.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "1": "person",
 3 |     "2": "bicycle",
 4 |     "3": "car",
 5 |     "4": "motorcycle",
 6 |     "5": "airplane",
 7 |     "6": "bus",
 8 |     "7": "train",
 9 |     "8": "truck",
10 |     "9": "boat",
11 |     "10": "traffic light",
12 |     "11": "fire hydrant",
13 |     "12": "stop sign",
14 |     "13": "parking meter",
15 |     "14": "bench",
16 |     "15": "bird",
17 |     "16": "cat",
18 |     "17": "dog",
19 |     "18": "horse",
20 |     "19": "sheep",
21 |     "20": "cow",
22 |     "21": "elephant",
23 |     "22": "bear",
24 |     "23": "zebra",
25 |     "24": "giraffe",
26 |     "25": "backpack",
27 |     "26": "umbrella",
28 |     "27": "handbag",
29 |     "28": "tie",
30 |     "29": "suitcase",
31 |     "30": "frisbee",
32 |     "31": "skis",
33 |     "32": "snowboard",
34 |     "33": "sports ball",
35 |     "34": "kite",
36 |     "35": "baseball bat",
37 |     "36": "baseball glove",
38 |     "37": "skateboard",
39 |     "38": "surfboard",
40 |     "39": "tennis racket",
41 |     "40": "bottle",
42 |     "41": "wine glass",
43 |     "42": "cup",
44 |     "43": "fork",
45 |     "44": "knife",
46 |     "45": "spoon",
47 |     "46": "bowl",
48 |     "47": "banana",
49 |     "48": "apple",
50 |     "49": "sandwich",
51 |     "50": "orange",
52 |     "51": "broccoli",
53 |     "52": "carrot",
54 |     "53": "hot dog",
55 |     "54": "pizza",
56 |     "55": "donut",
57 |     "56": "cake",
58 |     "57": "chair",
59 |     "58": "couch",
60 |     "59": "potted plant",
61 |     "60": "bed",
62 |     "61": "dining table",
63 |     "62": "toilet",
64 |     "63": "tv",
65 |     "64": "laptop",
66 |     "65": "mouse",
67 |     "66": "remote",
68 |     "67": "keyboard",
69 |     "68": "cell phone",
70 |     "69": "microwave",
71 |     "70": "oven",
72 |     "71": "toaster",
73 |     "72": "sink",
74 |     "73": "refrigerator",
75 |     "74": "book",
76 |     "75": "clock",
77 |     "76": "vase",
78 |     "77": "scissors",
79 |     "78": "teddy bear",
80 |     "79": "hair drier",
81 |     "80": "toothbrush"
82 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/coco91_to_80.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "1": 1,
 3 |     "2": 2,
 4 |     "3": 3,
 5 |     "4": 4,
 6 |     "5": 5,
 7 |     "6": 6,
 8 |     "7": 7,
 9 |     "8": 8,
10 |     "9": 9,
11 |     "10": 10,
12 |     "11": 11,
13 |     "13": 12,
14 |     "14": 13,
15 |     "15": 14,
16 |     "16": 15,
17 |     "17": 16,
18 |     "18": 17,
19 |     "19": 18,
20 |     "20": 19,
21 |     "21": 20,
22 |     "22": 21,
23 |     "23": 22,
24 |     "24": 23,
25 |     "25": 24,
26 |     "27": 25,
27 |     "28": 26,
28 |     "31": 27,
29 |     "32": 28,
30 |     "33": 29,
31 |     "34": 30,
32 |     "35": 31,
33 |     "36": 32,
34 |     "37": 33,
35 |     "38": 34,
36 |     "39": 35,
37 |     "40": 36,
38 |     "41": 37,
39 |     "42": 38,
40 |     "43": 39,
41 |     "44": 40,
42 |     "46": 41,
43 |     "47": 42,
44 |     "48": 43,
45 |     "49": 44,
46 |     "50": 45,
47 |     "51": 46,
48 |     "52": 47,
49 |     "53": 48,
50 |     "54": 49,
51 |     "55": 50,
52 |     "56": 51,
53 |     "57": 52,
54 |     "58": 53,
55 |     "59": 54,
56 |     "60": 55,
57 |     "61": 56,
58 |     "62": 57,
59 |     "63": 58,
60 |     "64": 59,
61 |     "65": 60,
62 |     "67": 61,
63 |     "70": 62,
64 |     "72": 63,
65 |     "73": 64,
66 |     "74": 65,
67 |     "75": 66,
68 |     "76": 67,
69 |     "77": 68,
70 |     "78": 69,
71 |     "79": 70,
72 |     "80": 71,
73 |     "81": 72,
74 |     "82": 73,
75 |     "84": 74,
76 |     "85": 75,
77 |     "86": 76,
78 |     "87": 77,
79 |     "88": 78,
80 |     "89": 79,
81 |     "90": 80
82 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/compute_receptive_field.py:
--------------------------------------------------------------------------------
 1 | # vgg16(D)
 2 | model = [[3, 1],
 3 |          [3, 1],
 4 |          [2, 2],  # maxpool
 5 |          [3, 1],
 6 |          [3, 1],
 7 |          [2, 2],  # maxpool
 8 |          [3, 1],
 9 |          [3, 1],
10 |          [3, 1],
11 |          [2, 2],  # maxpool
12 |          [3, 1],
13 |          [3, 1],
14 |          [3, 1],
15 |          [2, 2],  # maxpool
16 |          [3, 1],
17 |          [3, 1],
18 |          [3, 1]]
19 | 
20 | field = model[-1][0]
21 | for kernel, stride in model[::-1]:
22 |     field = (field - 1) * stride + kernel
23 | print(field)  # 228
24 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/network_files/__init__.py:
--------------------------------------------------------------------------------
1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor
2 | from .rpn_function import AnchorsGenerator
3 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/network_files/image_list.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from torch import Tensor
 3 | 
 4 | 
 5 | class ImageList(object):
 6 |     """
 7 |     Structure that holds a list of images (of possibly
 8 |     varying sizes) as a single tensor.
 9 |     This works by padding the images to the same size,
10 |     and storing in a field the original sizes of each image
11 |     """
12 | 
13 |     def __init__(self, tensors, image_sizes):
14 |         # type: (Tensor, List[Tuple[int, int]]) -> None
15 |         """
16 |         Arguments:
17 |             tensors (tensor) padding后的图像数据
18 |             image_sizes (list[tuple[int, int]])  padding前的图像尺寸
19 |         """
20 |         self.tensors = tensors
21 |         self.image_sizes = image_sizes
22 | 
23 |     def to(self, device):
24 |         # type: (Device) -> ImageList # noqa
25 |         cast_tensor = self.tensors.to(device)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/plot_curve.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_loss_and_lr(train_loss, learning_rate):
 6 |     try:
 7 |         x = list(range(len(train_loss)))
 8 |         fig, ax1 = plt.subplots(1, 1)
 9 |         ax1.plot(x, train_loss, 'r', label='loss')
10 |         ax1.set_xlabel("step")
11 |         ax1.set_ylabel("loss")
12 |         ax1.set_title("Train Loss and lr")
13 |         plt.legend(loc='best')
14 | 
15 |         ax2 = ax1.twinx()
16 |         ax2.plot(x, learning_rate, label='lr')
17 |         ax2.set_ylabel("learning rate")
18 |         ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
19 |         plt.legend(loc='best')
20 | 
21 |         handles1, labels1 = ax1.get_legend_handles_labels()
22 |         handles2, labels2 = ax2.get_legend_handles_labels()
23 |         plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')
24 | 
25 |         fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
26 |         fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
27 |         plt.close()
28 |         print("successful save loss curve! ")
29 |     except Exception as e:
30 |         print(e)
31 | 
32 | 
33 | def plot_map(mAP):
34 |     try:
35 |         x = list(range(len(mAP)))
36 |         plt.plot(x, mAP, label='mAp')
37 |         plt.xlabel('epoch')
38 |         plt.ylabel('mAP')
39 |         plt.title('Eval mAP')
40 |         plt.xlim(0, len(mAP))
41 |         plt.legend(loc='best')
42 |         plt.savefig('./mAP.png')
43 |         plt.close()
44 |         print("successful save mAP curve!")
45 |     except Exception as e:
46 |         print(e)
47 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | matplotlib
3 | numpy
4 | tqdm
5 | pycocotools
6 | Pillow
7 | torch==1.7.1
8 | torchvision==0.8.2
9 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/train_coco_dataset/transforms.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from torchvision.transforms import functional as F
 3 | 
 4 | 
 5 | class Compose(object):
 6 |     """组合多个transform函数"""
 7 |     def __init__(self, transforms):
 8 |         self.transforms = transforms
 9 | 
10 |     def __call__(self, image, target):
11 |         for t in self.transforms:
12 |             image, target = t(image, target)
13 |         return image, target
14 | 
15 | 
16 | class ToTensor(object):
17 |     """将PIL图像转为Tensor"""
18 |     def __call__(self, image, target):
19 |         image = F.to_tensor(image)
20 |         return image, target
21 | 
22 | 
23 | class RandomHorizontalFlip(object):
24 |     """随机水平翻转图像以及bboxes"""
25 |     def __init__(self, prob=0.5):
26 |         self.prob = prob
27 | 
28 |     def __call__(self, image, target):
29 |         if random.random() < self.prob:
30 |             height, width = image.shape[-2:]
31 |             image = image.flip(-1)  # 水平翻转图片
32 |             bbox = target["boxes"]
33 |             # bbox: xmin, ymin, xmax, ymax
34 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
35 |             target["boxes"] = bbox
36 |         return image, target
37 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/build_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/build_utils/__init__.py


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/build_utils/img_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def letterbox(img: np.ndarray,
 6 |               new_shape=(416, 416),
 7 |               color=(114, 114, 114),
 8 |               auto=True,
 9 |               scale_fill=False,
10 |               scale_up=True):
11 |     """
12 |     将图片缩放调整到指定大小
13 |     :param img: 输入的图像numpy格式
14 |     :param new_shape: 输入网络的shape
15 |     :param color: padding用什么颜色填充
16 |     :param auto:
17 |     :param scale_fill: 简单粗暴缩放到指定大小
18 |     :param scale_up:  只缩小，不放大
19 |     :return:
20 |     """
21 | 
22 |     shape = img.shape[:2]  # [h, w]
23 |     if isinstance(new_shape, int):
24 |         new_shape = (new_shape, new_shape)
25 | 
26 |     # scale ratio (new / old)
27 |     r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
28 |     if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变
29 |         r = min(r, 1.0)
30 | 
31 |     # compute padding
32 |     ratio = r, r  # width, height ratios
33 |     new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
34 |     dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
35 |     if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小
36 |         # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416)，如果是(512x512)可以保证是64的整数倍
37 |         dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
38 |     elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸
39 |         dw, dh = 0, 0
40 |         new_unpad = new_shape
41 |         ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # wh ratios
42 | 
43 |     dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧
44 |     dh /= 2
45 | 
46 |     # shape:[h, w]  new_unpad:[w, h]
47 |     if shape[::-1] != new_unpad:
48 |         img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
49 |     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding
50 |     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding
51 | 
52 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
53 |     return img, ratio, (dw, dh)
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/cfg/hyp.yaml:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for training
 2 | 
 3 | giou: 3.54  # giou loss gain
 4 | cls: 37.4  # cls loss gain
 5 | cls_pw: 1.0  # cls BCELoss positive_weight
 6 | obj: 64.3  # obj loss gain (*=img_size/320 if img_size != 320)
 7 | obj_pw: 1.0  # obj BCELoss positive_weight
 8 | iou_t: 0.20  # iou training threshold
 9 | lr0: 0.001  # initial learning rate (SGD=5E-3 Adam=5E-4)
10 | lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
11 | momentum: 0.937  # SGD momentum
12 | weight_decay: 0.0005  # optimizer weight decay
13 | fl_gamma: 0.0  # focal loss gamma (efficientDet default is gamma=1.5)
14 | hsv_h: 0.0138  # image HSV-Hue augmentation (fraction)
15 | hsv_s: 0.678  # image HSV-Saturation augmentation (fraction)
16 | hsv_v: 0.36  # image HSV-Value augmentation (fraction)
17 | degrees: 0.  # image rotation (+/- deg)
18 | translate: 0.  # image translation (+/- fraction)
19 | scale: 0.  # image scale (+/- gain)
20 | shear: 0.  # image shear (+/- deg)


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/data/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.17.0
 2 | torchvision==0.7.0
 3 | opencv_python==4.3.0.36
 4 | lxml
 5 | torch==1.6.0
 6 | scipy
 7 | pycocotools
 8 | matplotlib
 9 | tqdm==4.42.1
10 | tensorboard==2.1.0
11 | PyYAML
12 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_utils import get_coco_api_from_dataset
2 | from .coco_eval import CocoEvaluator
3 | from .distributed_utils import init_distributed_mode, torch_distributed_zero_first
4 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/train_utils/coco_utils.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | 
 3 | import torch
 4 | import torchvision
 5 | import torch.utils.data
 6 | from pycocotools.coco import COCO
 7 | 
 8 | 
 9 | def convert_to_coco_api(ds):
10 |     coco_ds = COCO()
11 |     # annotation IDs need to start at 1, not 0
12 |     ann_id = 1
13 |     dataset = {'images': [], 'categories': [], 'annotations': []}
14 |     categories = set()
15 |     # 遍历dataset中的每张图像
16 |     for img_idx in tqdm(range(len(ds)), desc="loading eval info for coco tools."):
17 |         # find better way to get target
18 |         targets, shapes = ds.coco_index(img_idx)
19 |         # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h)
20 |         img_dict = {}
21 |         img_dict['id'] = img_idx
22 |         img_dict['height'] = shapes[0]
23 |         img_dict['width'] = shapes[1]
24 |         dataset['images'].append(img_dict)
25 | 
26 |         for obj in targets:
27 |             ann = {}
28 |             ann["image_id"] = img_idx
29 |             # 将相对坐标转为绝对坐标
30 |             # box (x, y, w, h)
31 |             boxes = obj[1:]
32 |             # (x, y, w, h) to (xmin, ymin, w, h)
33 |             boxes[:2] -= 0.5*boxes[2:]
34 |             boxes[[0, 2]] *= img_dict["width"]
35 |             boxes[[1, 3]] *= img_dict["height"]
36 |             boxes = boxes.tolist()
37 | 
38 |             ann["bbox"] = boxes
39 |             ann["category_id"] = int(obj[0])
40 |             categories.add(int(obj[0]))
41 |             ann["area"] = boxes[2] * boxes[3]
42 |             ann["iscrowd"] = 0
43 |             ann["id"] = ann_id
44 |             dataset["annotations"].append(ann)
45 |             ann_id += 1
46 | 
47 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
48 |     coco_ds.dataset = dataset
49 |     coco_ds.createIndex()
50 |     return coco_ds
51 | 
52 | 
53 | def get_coco_api_from_dataset(dataset):
54 |     for _ in range(10):
55 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
56 |             break
57 |         if isinstance(dataset, torch.utils.data.Subset):
58 |             dataset = dataset.dataset
59 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
60 |         return dataset.coco
61 |     return convert_to_coco_api(dataset)
62 | 


--------------------------------------------------------------------------------
/pytorch_object_detection/yolov3_spp/yolov3spp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_object_detection/yolov3_spp/yolov3spp.png


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/README.md:
--------------------------------------------------------------------------------
 1 | # DeepLabV3(Rethinking Atrous Convolution for Semantic Image Segmentation)
 2 | 
 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码
 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation
 5 | 
 6 | ## 环境配置：
 7 | * Python3.6/3.7/3.8
 8 | * Pytorch1.10
 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练)
10 | * 最好使用GPU训练
11 | * 详细环境配置见```requirements.txt```
12 | 
13 | ## 文件结构：
14 | ```
15 |   ├── src: 模型的backbone以及DeepLabv3的搭建
16 |   ├── train_utils: 训练、验证以及多GPU训练相关模块
17 |   ├── my_dataset.py: 自定义dataset用于读取VOC数据集
18 |   ├── train.py: 以deeplabv3_resnet50为例进行训练
19 |   ├── train_multi_GPU.py: 针对使用多GPU的用户使用
20 |   ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
21 |   ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
22 |   └── pascal_voc_classes.json: pascal_voc标签文件
23 | ```
24 | 
25 | ## 预训练权重下载地址：
26 | * 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
27 | * deeplabv3_resnet50: https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth
28 | * deeplabv3_resnet101: https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth
29 | * deeplabv3_mobilenetv3_large_coco: https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth
30 | * 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```deeplabv3_resnet50_coco.pth```文件，
31 |   不是```deeplabv3_resnet50_coco-cd0a2569.pth```
32 |  
33 |  
34 | ## 数据集，本例程使用的是PASCAL VOC2012数据集
35 | * Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
36 | * 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033
37 | 
38 | ## 训练方法
39 | * 确保提前准备好数据集
40 | * 确保提前下载好对应预训练模型权重
41 | * 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
42 | * 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
43 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
44 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```
45 | 
46 | ## 注意事项
47 | * 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
48 | * 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
49 | * 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动
50 | 
51 | ## 如果对DeepLabV3原理不是很理解可参考我的bilibili
52 | 
53 | 
54 | ## 进一步了解该项目，以及对DeepLabV3代码的分析可参考我的bilibili
55 | 
56 | 
57 | ## Pytorch官方实现的DeeplabV3网络框架图
58 | ![deeplabv3_resnet50_pytorch](./deeplabv3_resnet50.png)
59 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/get_palette.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | # 读取mask标签
 6 | target = Image.open("./2007_001288.png")
 7 | # 获取调色板
 8 | palette = target.getpalette()
 9 | palette = np.reshape(palette, (-1, 3)).tolist()
10 | # 转换成字典子形式
11 | pd = dict((i, color) for i, color in enumerate(palette))
12 | 
13 | json_str = json.dumps(pd)
14 | with open("palette.json", "w") as f:
15 |     f.write(json_str)
16 | 
17 | # target = np.array(target)
18 | # print(target)
19 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/my_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch.utils.data as data
 4 | from PIL import Image
 5 | 
 6 | 
 7 | class VOCSegmentation(data.Dataset):
 8 |     def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
 9 |         super(VOCSegmentation, self).__init__()
10 |         assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
11 |         root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
12 |         assert os.path.exists(root), "path '{}' does not exist.".format(root)
13 |         image_dir = os.path.join(root, 'JPEGImages')
14 |         mask_dir = os.path.join(root, 'SegmentationClass')
15 | 
16 |         txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
17 |         assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
18 |         with open(os.path.join(txt_path), "r") as f:
19 |             file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]
20 | 
21 |         self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
22 |         self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
23 |         assert (len(self.images) == len(self.masks))
24 |         self.transforms = transforms
25 | 
26 |     def __getitem__(self, index):
27 |         """
28 |         Args:
29 |             index (int): Index
30 | 
31 |         Returns:
32 |             tuple: (image, target) where target is the image segmentation.
33 |         """
34 |         img = Image.open(self.images[index]).convert('RGB')
35 |         target = Image.open(self.masks[index])
36 | 
37 |         if self.transforms is not None:
38 |             img, target = self.transforms(img, target)
39 | 
40 |         return img, target
41 | 
42 |     def __len__(self):
43 |         return len(self.images)
44 | 
45 |     @staticmethod
46 |     def collate_fn(batch):
47 |         images, targets = list(zip(*batch))
48 |         batched_imgs = cat_list(images, fill_value=0)
49 |         batched_targets = cat_list(targets, fill_value=255)
50 |         return batched_imgs, batched_targets
51 | 
52 | 
53 | def cat_list(images, fill_value=0):
54 |     max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
55 |     batch_shape = (len(images),) + max_size
56 |     batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
57 |     for img, pad_img in zip(images, batched_imgs):
58 |         pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
59 |     return batched_imgs
60 | 
61 | 
62 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
63 | # d1 = dataset[0]
64 | # print(d1)
65 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.3
2 | torch==1.10.0
3 | torchvision==0.11.1
4 | Pillow==8.4.0


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/results20211027-104607.txt:
--------------------------------------------------------------------------------
 1 | [epoch: 0]
 2 | train_loss: 0.7098
 3 | lr: 0.000100
 4 | global correct: 94.7
 5 | average row correct: ['97.0', '93.9', '86.4', '93.1', '74.4', '64.0', '97.9', '84.7', '95.7', '63.8', '92.2', '68.8', '88.8', '90.5', '93.0', '95.3', '75.3', '94.2', '83.6', '91.9', '75.0']
 6 | IoU: ['93.9', '91.5', '42.6', '88.9', '65.3', '60.4', '95.9', '76.3', '90.7', '50.9', '87.3', '54.1', '86.1', '83.0', '87.2', '89.3', '64.3', '91.0', '58.9', '84.5', '73.6']
 7 | mean IoU: 76.9
 8 | 
 9 | [epoch: 1]
10 | train_loss: 0.6005
11 | lr: 0.000077
12 | global correct: 94.7
13 | average row correct: ['96.1', '96.3', '84.1', '95.1', '84.1', '81.4', '98.6', '85.8', '96.6', '68.7', '91.9', '71.3', '93.9', '91.4', '96.4', '95.5', '81.0', '94.1', '85.4', '94.9', '83.6']
14 | IoU: ['93.9', '92.3', '42.2', '88.8', '69.1', '71.2', '96.1', '75.1', '91.9', '48.9', '87.8', '56.0', '87.9', '85.7', '89.2', '89.5', '63.6', '90.3', '56.2', '85.3', '79.8']
15 | mean IoU: 78.1
16 | 
17 | [epoch: 2]
18 | train_loss: 0.5840
19 | lr: 0.000054
20 | global correct: 94.8
21 | average row correct: ['96.2', '95.5', '85.8', '94.6', '85.5', '83.7', '98.8', '87.5', '96.3', '71.4', '92.5', '72.8', '93.1', '91.9', '96.7', '94.9', '81.5', '95.3', '82.8', '95.3', '84.1']
22 | IoU: ['94.0', '91.2', '42.7', '88.3', '69.2', '72.7', '96.4', '74.8', '92.0', '49.8', '87.5', '58.3', '87.3', '85.0', '89.3', '89.2', '62.6', '89.6', '58.1', '84.8', '80.3']
23 | mean IoU: 78.2
24 | 
25 | [epoch: 3]
26 | train_loss: 0.5637
27 | lr: 0.000029
28 | global correct: 94.8
29 | average row correct: ['96.1', '95.9', '81.7', '94.8', '86.5', '79.4', '99.0', '89.1', '95.8', '71.4', '93.8', '71.0', '93.4', '92.4', '97.3', '94.9', '80.4', '96.9', '83.3', '94.7', '84.4']
30 | IoU: ['94.0', '89.5', '41.8', '87.6', '69.0', '70.4', '96.0', '75.9', '92.1', '49.7', '87.3', '58.1', '86.2', '83.9', '88.7', '89.2', '63.7', '88.8', '57.7', '85.3', '79.9']
31 | mean IoU: 77.8
32 | 
33 | [epoch: 4]
34 | train_loss: 0.5779
35 | lr: 0.000000
36 | global correct: 94.8
37 | average row correct: ['96.3', '93.6', '85.9', '95.1', '82.6', '83.8', '98.5', '90.0', '95.9', '71.1', '93.2', '68.4', '92.6', '93.9', '95.9', '94.5', '82.8', '96.3', '82.8', '94.5', '86.4']
38 | IoU: ['94.1', '91.8', '42.5', '88.5', '67.8', '72.1', '96.6', '78.3', '92.0', '49.8', '88.3', '58.8', '86.7', '84.9', '89.0', '89.5', '61.0', '89.1', '56.6', '84.6', '80.2']
39 | mean IoU: 78.2
40 | 
41 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large
2 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/deeplab_v3/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/README.md:
--------------------------------------------------------------------------------
 1 | # FCN(Fully Convolutional Networks for Semantic Segmentation)
 2 | 
 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码
 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation
 5 | 
 6 | ## 环境配置：
 7 | * Python3.6/3.7/3.8
 8 | * Pytorch1.10
 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练)
10 | * 最好使用GPU训练
11 | * 详细环境配置见```requirements.txt```
12 | 
13 | ## 文件结构：
14 | ```
15 |   ├── src: 模型的backbone以及FCN的搭建
16 |   ├── train_utils: 训练、验证以及多GPU训练相关模块
17 |   ├── my_dataset.py: 自定义dataset用于读取VOC数据集
18 |   ├── train.py: 以fcn_resnet50(这里使用了Dilated/Atrous Convolution)进行训练
19 |   ├── train_multi_GPU.py: 针对使用多GPU的用户使用
20 |   ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
21 |   ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
22 |   └── pascal_voc_classes.json: pascal_voc标签文件
23 | ```
24 | 
25 | ## 预训练权重下载地址：
26 | * 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
27 | * fcn_resnet50: https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth
28 | * fcn_resnet101: https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth
29 | * 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```fcn_resnet50_coco.pth```文件，
30 |   不是```fcn_resnet50_coco-1167a1af.pth```
31 |  
32 |  
33 | ## 数据集，本例程使用的是PASCAL VOC2012数据集
34 | * Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
35 | * 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033
36 | 
37 | ## 训练方法
38 | * 确保提前准备好数据集
39 | * 确保提前下载好对应预训练模型权重
40 | * 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
41 | * 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
42 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
43 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```
44 | 
45 | ## 注意事项
46 | * 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
47 | * 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
48 | * 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动
49 | 
50 | ## 如果对FCN原理不是很理解可参考我的bilibili
51 | * https://www.bilibili.com/video/BV1J3411C7zd
52 | * https://www.bilibili.com/video/BV1ev411u7TX
53 | 
54 | ## 进一步了解该项目，以及对FCN代码的分析可参考我的bilibili
55 | * https://www.bilibili.com/video/BV19q4y1971Q
56 | 
57 | ## Pytorch官方实现的FCN网络框架图
58 | ![torch_fcn](torch_fcn.png)
59 | 
60 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/get_palette.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | # 读取mask标签
 6 | target = Image.open("./2007_001288.png")
 7 | # 获取调色板
 8 | palette = target.getpalette()
 9 | palette = np.reshape(palette, (-1, 3)).tolist()
10 | # 转换成字典子形式
11 | pd = dict((i, color) for i, color in enumerate(palette))
12 | 
13 | json_str = json.dumps(pd)
14 | with open("palette.json", "w") as f:
15 |     f.write(json_str)
16 | 
17 | # target = np.array(target)
18 | # print(target)
19 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/my_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch.utils.data as data
 4 | from PIL import Image
 5 | 
 6 | 
 7 | class VOCSegmentation(data.Dataset):
 8 |     def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
 9 |         super(VOCSegmentation, self).__init__()
10 |         assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
11 |         root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
12 |         assert os.path.exists(root), "path '{}' does not exist.".format(root)
13 |         image_dir = os.path.join(root, 'JPEGImages')
14 |         mask_dir = os.path.join(root, 'SegmentationClass')
15 | 
16 |         txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
17 |         assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
18 |         with open(os.path.join(txt_path), "r") as f:
19 |             file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]
20 | 
21 |         self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
22 |         self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
23 |         assert (len(self.images) == len(self.masks))
24 |         self.transforms = transforms
25 | 
26 |     def __getitem__(self, index):
27 |         """
28 |         Args:
29 |             index (int): Index
30 | 
31 |         Returns:
32 |             tuple: (image, target) where target is the image segmentation.
33 |         """
34 |         img = Image.open(self.images[index]).convert('RGB')
35 |         target = Image.open(self.masks[index])
36 | 
37 |         if self.transforms is not None:
38 |             img, target = self.transforms(img, target)
39 | 
40 |         return img, target
41 | 
42 |     def __len__(self):
43 |         return len(self.images)
44 | 
45 |     @staticmethod
46 |     def collate_fn(batch):
47 |         images, targets = list(zip(*batch))
48 |         batched_imgs = cat_list(images, fill_value=0)
49 |         batched_targets = cat_list(targets, fill_value=255)
50 |         return batched_imgs, batched_targets
51 | 
52 | 
53 | def cat_list(images, fill_value=0):
54 |     # 计算该batch数据中，channel, h, w的最大值
55 |     max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
56 |     batch_shape = (len(images),) + max_size
57 |     batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
58 |     for img, pad_img in zip(images, batched_imgs):
59 |         pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
60 |     return batched_imgs
61 | 
62 | 
63 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
64 | # d1 = dataset[0]
65 | # print(d1)
66 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.3
2 | torch==1.10.0
3 | torchvision==0.11.1
4 | Pillow==8.4.0


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_model import fcn_resnet50, fcn_resnet101
2 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/torch_fcn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/fcn/torch_fcn.png


--------------------------------------------------------------------------------
/pytorch_segmentation/fcn/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/README.md:
--------------------------------------------------------------------------------
 1 | # LRASPP(Searching for MobileNetV3)
 2 | 
 3 | ## 该项目主要是来自pytorch官方torchvision模块中的源码
 4 | * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation
 5 | 
 6 | ## 环境配置：
 7 | * Python3.6/3.7/3.8
 8 | * Pytorch1.10
 9 | * Ubuntu或Centos(Windows暂不支持多GPU训练)
10 | * 最好使用GPU训练
11 | * 详细环境配置见```requirements.txt```
12 | 
13 | ## 文件结构：
14 | ```
15 |   ├── src: 模型的backbone以及LRASPP的搭建
16 |   ├── train_utils: 训练、验证以及多GPU训练相关模块
17 |   ├── my_dataset.py: 自定义dataset用于读取VOC数据集
18 |   ├── train.py: 单GPU训练脚本
19 |   ├── train_multi_GPU.py: 针对使用多GPU的用户使用
20 |   ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
21 |   ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
22 |   └── pascal_voc_classes.json: pascal_voc标签文件
23 | ```
24 | 
25 | ## 预训练权重下载地址：
26 | * 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
27 | * lraspp_mobilenet_v3_large: https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth
28 | * 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```lraspp_mobilenet_v3_large.pth```文件，
29 |   不是```lraspp_mobilenet_v3_large-d234d4ea.pth```
30 |  
31 |  
32 | ## 数据集，本例程使用的是PASCAL VOC2012数据集
33 | * Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
34 | * 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033
35 | 
36 | ## 训练方法
37 | * 确保提前准备好数据集
38 | * 确保提前下载好对应预训练模型权重
39 | * 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
40 | * 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
41 | * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
42 | * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```
43 | 
44 | ## 注意事项
45 | * 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
46 | * 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
47 | * 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--data-path'和'--weights'即可，其他代码尽量不要改动
48 | 
49 | ## 如果对LRASPP原理不是很理解可参考我的bilibili
50 | 
51 | 
52 | ## 进一步了解该项目，以及对LRASPP代码的分析可参考我的bilibili
53 | 
54 | 
55 | ## Pytorch官方实现的LRASPP网络框架图
56 | ![lraspp](lraspp.png)


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/get_palette.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | # 读取mask标签
 6 | target = Image.open("./2007_001288.png")
 7 | # 获取调色板
 8 | palette = target.getpalette()
 9 | palette = np.reshape(palette, (-1, 3)).tolist()
10 | # 转换成字典子形式
11 | pd = dict((i, color) for i, color in enumerate(palette))
12 | 
13 | json_str = json.dumps(pd)
14 | with open("palette.json", "w") as f:
15 |     f.write(json_str)
16 | 
17 | # target = np.array(target)
18 | # print(target)
19 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/lraspp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ydjiao/deep-learning-for-image-processing/34a06af362ff54a23e0a112a633e9dbac83d94cf/pytorch_segmentation/lraspp/lraspp.png


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/my_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch.utils.data as data
 4 | from PIL import Image
 5 | 
 6 | 
 7 | class VOCSegmentation(data.Dataset):
 8 |     def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
 9 |         super(VOCSegmentation, self).__init__()
10 |         assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
11 |         root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
12 |         assert os.path.exists(root), "path '{}' does not exist.".format(root)
13 |         image_dir = os.path.join(root, 'JPEGImages')
14 |         mask_dir = os.path.join(root, 'SegmentationClass')
15 | 
16 |         txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
17 |         assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
18 |         with open(os.path.join(txt_path), "r") as f:
19 |             file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]
20 | 
21 |         self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
22 |         self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
23 |         assert (len(self.images) == len(self.masks))
24 |         self.transforms = transforms
25 | 
26 |     def __getitem__(self, index):
27 |         """
28 |         Args:
29 |             index (int): Index
30 | 
31 |         Returns:
32 |             tuple: (image, target) where target is the image segmentation.
33 |         """
34 |         img = Image.open(self.images[index]).convert('RGB')
35 |         target = Image.open(self.masks[index])
36 | 
37 |         if self.transforms is not None:
38 |             img, target = self.transforms(img, target)
39 | 
40 |         return img, target
41 | 
42 |     def __len__(self):
43 |         return len(self.images)
44 | 
45 |     @staticmethod
46 |     def collate_fn(batch):
47 |         images, targets = list(zip(*batch))
48 |         batched_imgs = cat_list(images, fill_value=0)
49 |         batched_targets = cat_list(targets, fill_value=255)
50 |         return batched_imgs, batched_targets
51 | 
52 | 
53 | def cat_list(images, fill_value=0):
54 |     max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
55 |     batch_shape = (len(images),) + max_size
56 |     batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
57 |     for img, pad_img in zip(images, batched_imgs):
58 |         pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
59 |     return batched_imgs
60 | 
61 | 
62 | # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
63 | # d1 = dataset[0]
64 | # print(d1)
65 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/pascal_voc_classes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.3
2 | torch==1.10.0
3 | torchvision==0.11.1
4 | Pillow==8.4.0


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/results20211028-105233.txt:
--------------------------------------------------------------------------------
 1 | [epoch: 0]
 2 | train_loss: 0.5343
 3 | lr: 0.000100
 4 | global correct: 93.1
 5 | average row correct: ['96.8', '90.0', '73.9', '87.9', '79.4', '66.2', '92.1', '79.5', '90.9', '45.0', '88.9', '54.7', '85.8', '89.8', '87.5', '91.2', '66.8', '85.0', '68.4', '87.6', '71.6']
 6 | IoU: ['92.4', '85.7', '34.8', '84.3', '66.4', '59.9', '89.2', '71.2', '86.0', '34.6', '82.3', '46.1', '78.6', '82.1', '79.8', '82.5', '54.8', '79.4', '50.2', '83.8', '65.5']
 7 | mean IoU: 70.9
 8 | 
 9 | [epoch: 1]
10 | train_loss: 0.4683
11 | lr: 0.000077
12 | global correct: 93.2
13 | average row correct: ['96.2', '92.6', '75.2', '92.3', '82.6', '70.9', '93.5', '83.9', '93.5', '47.9', '91.0', '61.9', '87.0', '90.5', '89.8', '90.0', '68.1', '86.4', '70.4', '90.4', '75.5']
14 | IoU: ['92.5', '86.1', '34.9', '85.1', '65.3', '63.0', '90.0', '73.1', '86.0', '34.8', '83.2', '50.0', '77.6', '81.2', '79.8', '82.3', '54.3', '78.4', '49.8', '85.5', '67.3']
15 | mean IoU: 71.4
16 | 
17 | [epoch: 2]
18 | train_loss: 0.4053
19 | lr: 0.000054
20 | global correct: 93.1
21 | average row correct: ['95.9', '93.1', '75.9', '92.6', '83.8', '75.3', '94.4', '85.6', '93.7', '50.2', '91.2', '62.1', '87.1', '90.8', '90.3', '89.8', '71.2', '86.8', '71.8', '91.1', '77.5']
22 | IoU: ['92.5', '86.0', '35.1', '84.7', '65.2', '65.6', '90.4', '73.3', '85.9', '34.8', '83.0', '50.0', '77.7', '81.7', '79.2', '82.3', '53.9', '78.5', '49.9', '85.6', '67.2']
23 | mean IoU: 71.6
24 | 
25 | [epoch: 3]
26 | train_loss: 0.4358
27 | lr: 0.000029
28 | global correct: 93.1
29 | average row correct: ['95.8', '93.4', '76.0', '92.3', '83.2', '78.1', '94.0', '86.3', '93.0', '50.9', '91.1', '62.9', '88.0', '90.9', '90.4', '89.6', '71.6', '87.0', '72.4', '92.4', '78.5']
30 | IoU: ['92.5', '86.0', '35.3', '85.1', '66.1', '66.9', '89.8', '73.3', '85.9', '34.8', '83.0', '50.4', '78.0', '81.5', '79.0', '82.1', '54.1', '78.6', '50.0', '85.6', '67.1']
31 | mean IoU: 71.7
32 | 
33 | [epoch: 4]
34 | train_loss: 0.3886
35 | lr: 0.000000
36 | global correct: 93.1
37 | average row correct: ['95.6', '93.8', '76.0', '92.8', '83.6', '77.9', '94.2', '86.1', '93.5', '50.9', '92.0', '63.8', '88.8', '91.4', '90.6', '89.4', '73.2', '87.4', '73.0', '92.4', '78.9']
38 | IoU: ['92.5', '86.0', '35.3', '84.4', '66.2', '66.5', '89.9', '73.2', '85.9', '34.6', '83.2', '50.8', '78.0', '81.4', '78.6', '82.0', '53.6', '78.4', '50.1', '85.7', '66.6']
39 | mean IoU: 71.6
40 | 
41 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .lraspp_model import lraspp_mobilenetv3_large
2 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/train_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir
3 | 


--------------------------------------------------------------------------------
/pytorch_segmentation/lraspp/validation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from src import lraspp_mobilenetv3_large
 5 | from train_utils import evaluate
 6 | from my_dataset import VOCSegmentation
 7 | import transforms as T
 8 | 
 9 | 
10 | class SegmentationPresetEval:
11 |     def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
12 |         self.transforms = T.Compose([
13 |             T.RandomResize(base_size, base_size),
14 |             T.ToTensor(),
15 |             T.Normalize(mean=mean, std=std),
16 |         ])
17 | 
18 |     def __call__(self, img, target):
19 |         return self.transforms(img, target)
20 | 
21 | 
22 | def main(args):
23 |     device = torch.device(args.device if torch.cuda.is_available() else "cpu")
24 |     assert os.path.exists(args.weights), f"weights {args.weights} not found."
25 | 
26 |     # segmentation nun_classes + background
27 |     num_classes = args.num_classes + 1
28 | 
29 |     # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
30 |     val_dataset = VOCSegmentation(args.data_path,
31 |                                   year="2012",
32 |                                   transforms=SegmentationPresetEval(520),
33 |                                   txt_name="val.txt")
34 | 
35 |     num_workers = 8
36 |     val_loader = torch.utils.data.DataLoader(val_dataset,
37 |                                              batch_size=1,
38 |                                              num_workers=num_workers,
39 |                                              pin_memory=True,
40 |                                              collate_fn=val_dataset.collate_fn)
41 | 
42 |     model = lraspp_mobilenetv3_large(num_classes=num_classes)
43 |     model.load_state_dict(torch.load(args.weights, map_location=device)['model'])
44 |     model.to(device)
45 | 
46 |     confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
47 |     print(confmat)
48 | 
49 | 
50 | def parse_args():
51 |     import argparse
52 |     parser = argparse.ArgumentParser(description="pytorch lraspp validation")
53 | 
54 |     parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
55 |     parser.add_argument("--weights", default="./save_weights/model_29.pth")
56 |     parser.add_argument("--num-classes", default=20, type=int)
57 |     parser.add_argument("--device", default="cuda", help="training device")
58 |     parser.add_argument('--print-freq', default=10, type=int, help='print frequency')
59 | 
60 |     args = parser.parse_args()
61 | 
62 |     return args
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     args = parse_args()
67 |     main(args)
68 | 


--------------------------------------------------------------------------------
/summary_problem.md:
--------------------------------------------------------------------------------
 1 | ## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装
 2 | 参考我之前写的博文：[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU（CUDA10.1）](https://blog.csdn.net/qq_37541097/article/details/103933366)
 3 | 
 4 | 
 5 | ## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0]
 6 | 强烈不建议混用，即使两个模型的名称结构完全一致也不要混用，里面有坑，用什么方法训练的模型就载入相应的模型权重
 7 | 
 8 | 
 9 | ## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0]
10 | subclassed模型在实例化时没有自动进行build操作（只有在开始训练时，才会自动进行build），如果需要使用summary操作，需要提前手动build  
11 | model.build((batch_size, height, width, channel))
12 | 
13 | 
14 | ## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0]
15 | #### 在linux下你需要安装一些包：
16 | * pip install pydot==1.2.3
17 | * sudo apt-get install graphviz   
18 | #### 在windows中，同样需要安装一些包（windows比较麻烦）：
19 | * pip install pydot==1.2.3
20 | * 安装graphviz，并添加相关环境变量  
21 | 参考连接：https://github.com/XifengGuo/CapsNet-Keras/issues/7
22 | 
23 | ## 为什么每计算一个batch，就需要调用一次optimizer.zero_grad() [Pytorch1.3]   
24 | 如果不清除历史梯度，就会对计算的历史梯度进行累加（通过这个特性你能够变相实现一个很大batch数值的训练）   
25 | 参考链接：https://www.zhihu.com/question/303070254    
26 | 
27 | ## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3]  
28 | pillow版本过高导致，安装版本号小于7.0.0即可


--------------------------------------------------------------------------------
/tensorflow_classification/ConfusionMatrix/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/tensorflow_classification/README.md:
--------------------------------------------------------------------------------
1 | ## 该文件夹存放使用tensorflow实现的代码版本
2 | **model.py**： 是模型文件  
3 | **train.py**： 是调用模型训练的文件    
4 | **predict.py**： 是调用模型进行预测的文件  
5 | **class_indices.json**： 是训练数据集对应的标签文件   
6 | 
7 | ------
8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   
9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  


--------------------------------------------------------------------------------
/tensorflow_classification/Test11_efficientnetV2/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import tensorflow as tf
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from model import efficientnetv2_s as create_model
11 | 
12 | 
13 | def main():
14 |     num_classes = 5
15 | 
16 |     img_size = {"s": 384,
17 |                 "m": 480,
18 |                 "l": 480}
19 |     num_model = "s"
20 |     im_height = im_width = img_size[num_model]
21 | 
22 |     # load image
23 |     img_path = "../tulip.jpg"
24 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
25 |     img = Image.open(img_path)
26 |     # resize image
27 |     img = img.resize((im_width, im_height))
28 |     plt.imshow(img)
29 | 
30 |     # read image
31 |     img = np.array(img).astype(np.float32)
32 | 
33 |     # preprocess
34 |     img = (img / 255. - 0.5) / 0.5
35 | 
36 |     # Add the image to a batch where it's the only member.
37 |     img = (np.expand_dims(img, 0))
38 | 
39 |     # read class_indict
40 |     json_path = './class_indices.json'
41 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
42 | 
43 |     json_file = open(json_path, "r")
44 |     class_indict = json.load(json_file)
45 | 
46 |     # create model
47 |     model = create_model(num_classes=num_classes)
48 | 
49 |     weights_path = './save_weights/efficientnetv2.ckpt'
50 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
51 |     model.load_weights(weights_path)
52 | 
53 |     result = np.squeeze(model.predict(img))
54 |     result = tf.keras.layers.Softmax()(result)
55 |     predict_class = np.argmax(result)
56 | 
57 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
58 |                                                  result[predict_class])
59 |     plt.title(print_res)
60 |     for i in range(len(result)):
61 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
62 |                                                   result[i].numpy()))
63 |     plt.show()
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test11_efficientnetV2/trans_weights.py:
--------------------------------------------------------------------------------
 1 | from model import *
 2 | 
 3 | 
 4 | def main(ckpt_path: str,
 5 |          model_name: str,
 6 |          model: tf.keras.Model):
 7 |     var_dict = {v.name.split(':')[0]: v for v in model.weights}
 8 | 
 9 |     reader = tf.train.load_checkpoint(ckpt_path)
10 |     var_shape_map = reader.get_variable_to_shape_map()
11 | 
12 |     for key, var in var_dict.items():
13 |         key_ = model_name + "/" + key
14 |         key_ = key_.replace("batch_normalization", "tpu_batch_normalization")
15 |         if key_ in var_shape_map:
16 |             if var_shape_map[key_] != var.shape:
17 |                 msg = "shape mismatch: {}".format(key)
18 |                 print(msg)
19 |             else:
20 |                 var.assign(reader.get_tensor(key_), read_value=False)
21 |         else:
22 |             msg = "Not found {} in {}".format(key, ckpt_path)
23 |             print(msg)
24 | 
25 |     model.save_weights("./{}.h5".format(model_name))
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     model = efficientnetv2_s()
30 |     model.build((1, 224, 224, 3))
31 |     main(ckpt_path="./efficientnetv2-s-21k-ft1k/model",
32 |          model_name="efficientnetv2-s",
33 |          model=model)
34 | 
35 |     # model = efficientnetv2_m()
36 |     # model.build((1, 224, 224, 3))
37 |     # main(ckpt_path="./efficientnetv2-m-21k-ft1k/model",
38 |     #      model_name="efficientnetv2-m",
39 |     #      model=model)
40 | 
41 |     # model = efficientnetv2_l()
42 |     # model.build((1, 224, 224, 3))
43 |     # main(ckpt_path="./efficientnetv2-l-21k-ft1k/model",
44 |     #      model_name="efficientnetv2-l",
45 |     #      model=model)
46 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test1_official_demo/model.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import Dense, Flatten, Conv2D
 2 | from tensorflow.keras import Model
 3 | 
 4 | 
 5 | class MyModel(Model):
 6 |     def __init__(self):
 7 |         super(MyModel, self).__init__()
 8 |         self.conv1 = Conv2D(32, 3, activation='relu')
 9 |         self.flatten = Flatten()
10 |         self.d1 = Dense(128, activation='relu')
11 |         self.d2 = Dense(10, activation='softmax')
12 | 
13 |     def call(self, x, **kwargs):
14 |         x = self.conv1(x)      # input[batch, 28, 28, 1] output[batch, 26, 26, 32]
15 |         x = self.flatten(x)    # output [batch, 21632]
16 |         x = self.d1(x)         # output [batch, 128]
17 |         return self.d2(x)      # output [batch, 10]
18 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test2_alexnet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/tensorflow_classification/Test2_alexnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from PIL import Image
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from model import AlexNet_v1, AlexNet_v2
 9 | 
10 | 
11 | def main():
12 |     im_height = 224
13 |     im_width = 224
14 | 
15 |     # load image
16 |     img_path = "../tulip.jpg"
17 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
18 |     img = Image.open(img_path)
19 | 
20 |     # resize image to 224x224
21 |     img = img.resize((im_width, im_height))
22 |     plt.imshow(img)
23 | 
24 |     # scaling pixel value to (0-1)
25 |     img = np.array(img) / 255.
26 | 
27 |     # Add the image to a batch where it's the only member.
28 |     img = (np.expand_dims(img, 0))
29 | 
30 |     # read class_indict
31 |     json_path = './class_indices.json'
32 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
33 | 
34 |     json_file = open(json_path, "r")
35 |     class_indict = json.load(json_file)
36 | 
37 |     # create model
38 |     model = AlexNet_v1(num_classes=5)
39 |     weighs_path = "./save_weights/myAlex.h5"
40 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path)
41 |     model.load_weights(weighs_path)
42 | 
43 |     # prediction
44 |     result = np.squeeze(model.predict(img))
45 |     predict_class = np.argmax(result)
46 | 
47 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
48 |                                                  result[predict_class])
49 |     plt.title(print_res)
50 |     for i in range(len(result)):
51 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
52 |                                                   result[i].numpy()))
53 |     plt.show()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test3_vgg/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/tensorflow_classification/Test3_vgg/model.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras import layers, Model, Sequential
 2 | 
 3 | CONV_KERNEL_INITIALIZER = {
 4 |     'class_name': 'VarianceScaling',
 5 |     'config': {
 6 |         'scale': 2.0,
 7 |         'mode': 'fan_out',
 8 |         'distribution': 'truncated_normal'
 9 |     }
10 | }
11 | 
12 | DENSE_KERNEL_INITIALIZER = {
13 |     'class_name': 'VarianceScaling',
14 |     'config': {
15 |         'scale': 1. / 3.,
16 |         'mode': 'fan_out',
17 |         'distribution': 'uniform'
18 |     }
19 | }
20 | 
21 | 
22 | def VGG(feature, im_height=224, im_width=224, num_classes=1000):
23 |     # tensorflow中的tensor通道排序是NHWC
24 |     input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
25 |     x = feature(input_image)
26 |     x = layers.Flatten()(x)
27 |     x = layers.Dropout(rate=0.5)(x)
28 |     x = layers.Dense(2048, activation='relu',
29 |                      kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
30 |     x = layers.Dropout(rate=0.5)(x)
31 |     x = layers.Dense(2048, activation='relu',
32 |                      kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
33 |     x = layers.Dense(num_classes,
34 |                      kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
35 |     output = layers.Softmax()(x)
36 |     model = Model(inputs=input_image, outputs=output)
37 |     return model
38 | 
39 | 
40 | def make_feature(cfg):
41 |     feature_layers = []
42 |     for v in cfg:
43 |         if v == "M":
44 |             feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2))
45 |         else:
46 |             conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu",
47 |                                    kernel_initializer=CONV_KERNEL_INITIALIZER)
48 |             feature_layers.append(conv2d)
49 |     return Sequential(feature_layers, name="feature")
50 | 
51 | 
52 | cfgs = {
53 |     'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
54 |     'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
55 |     'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
56 |     'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
57 | }
58 | 
59 | 
60 | def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000):
61 |     assert model_name in cfgs.keys(), "not support model {}".format(model_name)
62 |     cfg = cfgs[model_name]
63 |     model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes)
64 |     return model
65 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test3_vgg/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from PIL import Image
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from model import vgg
 9 | 
10 | 
11 | def main():
12 |     im_height = 224
13 |     im_width = 224
14 |     num_classes = 5
15 | 
16 |     # load image
17 |     img_path = "../tulip.jpg"
18 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
19 |     img = Image.open(img_path)
20 |     # resize image to 224x224
21 |     img = img.resize((im_width, im_height))
22 |     plt.imshow(img)
23 | 
24 |     # scaling pixel value to (0-1)
25 |     img = np.array(img) / 255.
26 | 
27 |     # Add the image to a batch where it's the only member.
28 |     img = (np.expand_dims(img, 0))
29 | 
30 |     # read class_indict
31 |     json_path = './class_indices.json'
32 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
33 | 
34 |     json_file = open(json_path, "r")
35 |     class_indict = json.load(json_file)
36 | 
37 |     # create model
38 |     model = vgg("vgg16", im_height=im_height, im_width=im_width, num_classes=num_classes)
39 |     weights_path = "./save_weights/myVGG.h5"
40 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weights_path)
41 |     model.load_weights(weights_path)
42 | 
43 |     # prediction
44 |     result = np.squeeze(model.predict(img))
45 |     predict_class = np.argmax(result)
46 | 
47 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
48 |                                                  result[predict_class])
49 |     plt.title(print_res)
50 |     for i in range(len(result)):
51 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
52 |                                                   result[i].numpy()))
53 |     plt.show()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test4_goolenet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/tensorflow_classification/Test4_goolenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import json
 4 | 
 5 | from PIL import Image
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import GoogLeNet
10 | 
11 | 
12 | def main():
13 |     im_height = 224
14 |     im_width = 224
15 | 
16 |     # load image
17 |     img_path = "../tulip.jpg"
18 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
19 |     img = Image.open(img_path)
20 |     # resize image to 224x224
21 |     img = img.resize((im_width, im_height))
22 |     plt.imshow(img)
23 | 
24 |     # scaling pixel value and normalize
25 |     img = ((np.array(img) / 255.) - 0.5) / 0.5
26 | 
27 |     # Add the image to a batch where it's the only member.
28 |     img = (np.expand_dims(img, 0))
29 | 
30 |     # read class_indict
31 |     json_path = './class_indices.json'
32 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
33 | 
34 |     json_file = open(json_path, "r")
35 |     class_indict = json.load(json_file)
36 | 
37 |     model = GoogLeNet(class_num=5, aux_logits=False)
38 |     model.summary()
39 |     # model.load_weights("./save_weights/myGoogLenet.h5", by_name=True)  # h5 format
40 |     weights_path = "./save_weights/myGoogLeNet.ckpt"
41 |     assert len(glob.glob(weights_path + "*")), "cannot find {}".format(weights_path)
42 |     model.load_weights(weights_path)
43 | 
44 |     result = np.squeeze(model.predict(img))
45 |     predict_class = np.argmax(result)
46 | 
47 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
48 |                                                  result[predict_class])
49 |     plt.title(print_res)
50 |     for i in range(len(result)):
51 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
52 |                                                   result[i].numpy()))
53 |     plt.show()
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test5_resnet/class_indices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "daisy",
3 |     "1": "dandelion",
4 |     "2": "roses",
5 |     "3": "sunflowers",
6 |     "4": "tulips"
7 | }


--------------------------------------------------------------------------------
/tensorflow_classification/Test5_resnet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | from PIL import Image
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from model import resnet50
11 | 
12 | 
13 | def main():
14 |     im_height = 224
15 |     im_width = 224
16 |     num_classes = 5
17 | 
18 |     # load image
19 |     img_path = "../tulip.jpg"
20 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
21 |     img = Image.open(img_path)
22 |     # resize image to 224x224
23 |     img = img.resize((im_width, im_height))
24 |     plt.imshow(img)
25 | 
26 |     # scaling pixel value to (0-1)
27 |     _R_MEAN = 123.68
28 |     _G_MEAN = 116.78
29 |     _B_MEAN = 103.94
30 |     img = np.array(img).astype(np.float32)
31 |     img = img - [_R_MEAN, _G_MEAN, _B_MEAN]
32 | 
33 |     # Add the image to a batch where it's the only member.
34 |     img = (np.expand_dims(img, 0))
35 | 
36 |     # read class_indict
37 |     json_path = './class_indices.json'
38 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
39 | 
40 |     json_file = open(json_path, "r")
41 |     class_indict = json.load(json_file)
42 | 
43 |     # create model
44 |     feature = resnet50(num_classes=num_classes, include_top=False)
45 |     feature.trainable = False
46 |     model = tf.keras.Sequential([feature,
47 |                                  tf.keras.layers.GlobalAvgPool2D(),
48 |                                  tf.keras.layers.Dropout(rate=0.5),
49 |                                  tf.keras.layers.Dense(1024, activation="relu"),
50 |                                  tf.keras.layers.Dropout(rate=0.5),
51 |                                  tf.keras.layers.Dense(num_classes),
52 |                                  tf.keras.layers.Softmax()])
53 | 
54 |     # load weights
55 |     weights_path = './save_weights/resNet_50.ckpt'
56 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
57 |     model.load_weights(weights_path)
58 | 
59 |     # prediction
60 |     result = np.squeeze(model.predict(img))
61 |     predict_class = np.argmax(result)
62 | 
63 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
64 |                                                  result[predict_class])
65 |     plt.title(print_res)
66 |     for i in range(len(result)):
67 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
68 |                                                   result[i].numpy()))
69 |     plt.show()
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     main()
74 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test5_resnet/read_ckpt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 可直接下载我转好的权重
 3 | 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg  密码: u199
 4 | """
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list):
 9 |     with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
10 |         var_list = tf.train.list_variables(ckpt_path)
11 |         new_var_list = []
12 | 
13 |         for var_name, shape in var_list:
14 |             print(var_name)
15 |             if var_name in except_list:
16 |                 continue
17 |             var = tf.train.load_variable(ckpt_path, var_name)
18 |             new_var_name = var_name.replace('resnet_v1_50/', "")
19 |             new_var_name = new_var_name.replace("bottleneck_v1/", "")
20 |             new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel")
21 |             new_var_name = new_var_name.replace("weights", "kernel")
22 |             new_var_name = new_var_name.replace("biases", "bias")
23 |             re_var = tf.Variable(var, name=new_var_name)
24 |             new_var_list.append(re_var)
25 | 
26 |         re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel")
27 |         new_var_list.append(re_var)
28 |         re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias")
29 |         new_var_list.append(re_var)
30 |         saver = tf.compat.v1.train.Saver(new_var_list)
31 |         sess.run(tf.compat.v1.global_variables_initializer())
32 |         saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)
33 | 
34 | 
35 | def main():
36 |     except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights']
37 |     ckpt_path = './resnet_v1_50.ckpt'
38 |     new_ckpt_path = './pretrain_weights.ckpt'
39 |     num_classes = 5
40 |     rename_var(ckpt_path, new_ckpt_path, num_classes, except_list)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test5_resnet/read_h5.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | 
 3 | f = h5py.File('./save_weights/resNet_1.h5', 'r')
 4 | for root_name, g in f.items():
 5 |     print(root_name)
 6 |     for _, weights_dirs in g.attrs.items():
 7 |         for i in weights_dirs:
 8 |             name = root_name + "/" + str(i, encoding="utf-8")
 9 |             data = f[name]
10 |             print(data.value)
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test6_mobilenet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import matplotlib.pyplot as plt
 8 | import tensorflow as tf
 9 | 
10 | from model_v2 import MobileNetV2
11 | 
12 | 
13 | def main():
14 |     im_height = 224
15 |     im_width = 224
16 |     num_classes = 5
17 | 
18 |     # load image
19 |     img_path = "../tulip.jpg"
20 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
21 |     img = Image.open(img_path)
22 |     # resize image to 224x224
23 |     img = img.resize((im_width, im_height))
24 |     plt.imshow(img)
25 | 
26 |     # scaling pixel value to (-1,1)
27 |     img = np.array(img).astype(np.float32)
28 |     img = ((img / 255.) - 0.5) * 2.0
29 | 
30 |     # Add the image to a batch where it's the only member.
31 |     img = (np.expand_dims(img, 0))
32 | 
33 |     # read class_indict
34 |     json_path = './class_indices.json'
35 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
36 | 
37 |     json_file = open(json_path, "r")
38 |     class_indict = json.load(json_file)
39 | 
40 |     # create model
41 |     feature = MobileNetV2(include_top=False)
42 |     model = tf.keras.Sequential([feature,
43 |                                  tf.keras.layers.GlobalAvgPool2D(),
44 |                                  tf.keras.layers.Dropout(rate=0.5),
45 |                                  tf.keras.layers.Dense(num_classes),
46 |                                  tf.keras.layers.Softmax()])
47 |     weights_path = './save_weights/resMobileNetV2.ckpt'
48 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
49 |     model.load_weights(weights_path)
50 | 
51 |     result = np.squeeze(model.predict(img))
52 |     predict_class = np.argmax(result)
53 | 
54 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
55 |                                                  result[predict_class])
56 |     plt.title(print_res)
57 |     for i in range(len(result)):
58 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
59 |                                                   result[i].numpy()))
60 |     plt.show()
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test6_mobilenet/trans_v3_weights.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import tensorflow as tf
 3 | from model_v3 import mobilenet_v3_large
 4 | 
 5 | 
 6 | def change_word(word: str):
 7 |     word = word.replace("MobilenetV3/", "")
 8 | 
 9 |     if "weights" in word:
10 |         word = word.replace("weights", "kernel")
11 |     elif "Conv" in word and "biases" in word:
12 |         word = word.replace("biases", "bias")
13 | 
14 |     return word
15 | 
16 | 
17 | def rename_var(ckpt_path, m_info):
18 |     with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
19 |         var_list = tf.train.list_variables(ckpt_path)
20 |         pattern = "ExponentialMovingAverage|Momentum|global_step"
21 | 
22 |         var_dict = dict((change_word(name), [name, shape])
23 |                         for name, shape in var_list
24 |                         if len(re.findall(pattern, name)) == 0)
25 | 
26 |         for k, v in m_info:
27 |             assert k in var_dict, "{} not in var_dict".format(k)
28 |             assert v == var_dict[k][1], "shape {} not equal {}".format(v, var_dict[k][1])
29 | 
30 |         weights = []
31 |         for k, _ in m_info:
32 |             var = tf.train.load_variable(ckpt_path, var_dict[k][0])
33 |             weights.append(var)
34 | 
35 |         return weights
36 | 
37 | 
38 | def main():
39 |     # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz
40 |     ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000'
41 |     save_path = './pre_mobilev3.h5'
42 |     m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True)
43 |     m_info = [(i.name.replace(":0", ""), list(i.shape))
44 |               for i in m.weights]
45 |     weights = rename_var(ckpt_path, m_info)
46 |     m.set_weights(weights)
47 |     m.save_weights(save_path)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test7_shuffleNet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import shufflenet_v2_x1_0
10 | 
11 | 
12 | def main():
13 |     im_height = 224
14 |     im_width = 224
15 |     num_classes = 5
16 | 
17 |     mean = [0.485, 0.456, 0.406]
18 |     std = [0.229, 0.224, 0.225]
19 | 
20 |     # load image
21 |     img_path = "../tulip.jpg"
22 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
23 |     img = Image.open(img_path)
24 |     # resize image to 224x224
25 |     img = img.resize((im_width, im_height))
26 |     plt.imshow(img)
27 | 
28 |     # scaling pixel value to (-1,1)
29 |     img = np.array(img).astype(np.float32)
30 |     img = (img / 255. - mean) / std
31 | 
32 |     # Add the image to a batch where it's the only member.
33 |     img = (np.expand_dims(img, 0))
34 | 
35 |     # read class_indict
36 |     json_path = './class_indices.json'
37 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
38 | 
39 |     json_file = open(json_path, "r")
40 |     class_indict = json.load(json_file)
41 | 
42 |     # create model
43 |     model = shufflenet_v2_x1_0(num_classes=num_classes)
44 | 
45 |     weights_path = './save_weights/shufflenetv2.ckpt'
46 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
47 |     model.load_weights(weights_path)
48 | 
49 |     result = np.squeeze(model.predict(img))
50 |     predict_class = np.argmax(result)
51 | 
52 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
53 |                                                  result[predict_class])
54 |     plt.title(print_res)
55 |     for i in range(len(result)):
56 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
57 |                                                   result[i].numpy()))
58 |     plt.show()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/tensorflow_classification/Test9_efficientNet/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from model import efficientnet_b0 as create_model
10 | 
11 | 
12 | def main():
13 |     num_classes = 5
14 | 
15 |     img_size = {"B0": 224,
16 |                 "B1": 240,
17 |                 "B2": 260,
18 |                 "B3": 300,
19 |                 "B4": 380,
20 |                 "B5": 456,
21 |                 "B6": 528,
22 |                 "B7": 600}
23 |     num_model = "B0"
24 |     im_height = im_width = img_size[num_model]
25 | 
26 |     # load image
27 |     img_path = "../tulip.jpg"
28 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
29 |     img = Image.open(img_path)
30 |     # resize image to 224x224
31 |     img = img.resize((im_width, im_height))
32 |     plt.imshow(img)
33 | 
34 |     # read image
35 |     img = np.array(img).astype(np.float32)
36 | 
37 |     # Add the image to a batch where it's the only member.
38 |     img = (np.expand_dims(img, 0))
39 | 
40 |     # read class_indict
41 |     json_path = './class_indices.json'
42 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
43 | 
44 |     json_file = open(json_path, "r")
45 |     class_indict = json.load(json_file)
46 | 
47 |     # create model
48 |     model = create_model(num_classes=num_classes)
49 | 
50 |     weights_path = './save_weights/efficientnet.ckpt'
51 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
52 |     model.load_weights(weights_path)
53 | 
54 |     result = np.squeeze(model.predict(img))
55 |     predict_class = np.argmax(result)
56 | 
57 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
58 |                                                  result[predict_class])
59 |     plt.title(print_res)
60 |     for i in range(len(result)):
61 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
62 |                                                   result[i].numpy()))
63 |     plt.show()
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/tensorflow_classification/analyze_weights_featuremap/analyze_feature_map.py:
--------------------------------------------------------------------------------
 1 | from alexnet_model import AlexNet_v1, AlexNet_v2
 2 | from PIL import Image
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from tensorflow.keras import Model, Input
 6 | 
 7 | im_height = 224
 8 | im_width = 224
 9 | 
10 | # load image
11 | img = Image.open("../tulip.jpg")
12 | # resize image to 224x224
13 | img = img.resize((im_width, im_height))
14 | 
15 | # scaling pixel value to (0-1)
16 | img = np.array(img) / 255.
17 | 
18 | # Add the image to a batch where it's the only member.
19 | img = (np.expand_dims(img, 0))
20 | 
21 | 
22 | model = AlexNet_v1(class_num=5)  # functional api
23 | # model = AlexNet_v2(class_num=5)  # subclass api
24 | # model.build((None, 224, 224, 3))
25 | # If `by_name` is False weights are loaded based on the network's topology.
26 | model.load_weights("./myAlex.h5")
27 | # model.load_weights("./submodel.h5")
28 | # for layer in model.layers:
29 | #     print(layer.name)
30 | model.summary()
31 | layers_name = ["conv2d", "conv2d_1"]
32 | 
33 | # functional API
34 | try:
35 |     input_node = model.input
36 |     output_node = [model.get_layer(name=layer_name).output for layer_name in layers_name]
37 |     model1 = Model(inputs=input_node, outputs=output_node)
38 |     outputs = model1.predict(img)
39 |     for index, feature_map in enumerate(outputs):
40 |         # [N, H, W, C] -> [H, W, C]
41 |         im = np.squeeze(feature_map)
42 | 
43 |         # show top 12 feature maps
44 |         plt.figure()
45 |         for i in range(12):
46 |             ax = plt.subplot(3, 4, i + 1)
47 |             # [H, W, C]
48 |             plt.imshow(im[:, :, i], cmap='gray')
49 |         plt.suptitle(layers_name[index])
50 |         plt.show()
51 | except Exception as e:
52 |     print(e)
53 | 
54 | # subclasses API
55 | # outputs = model.receive_feature_map(img, layers_name)
56 | # for index, feature_maps in enumerate(outputs):
57 | #     # [N, H, W, C] -> [H, W, C]
58 | #     im = np.squeeze(feature_maps)
59 | #
60 | #     # show top 12 feature maps
61 | #     plt.figure()
62 | #     for i in range(12):
63 | #         ax = plt.subplot(3, 4, i + 1)
64 | #         # [H, W, C]
65 | #         plt.imshow(im[:, :, i], cmap='gray')
66 | #     plt.suptitle(layers_name[index])
67 | #     plt.show()
68 | 


--------------------------------------------------------------------------------
/tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py:
--------------------------------------------------------------------------------
 1 | from alexnet_model import AlexNet_v1, AlexNet_v2
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | model = AlexNet_v1(class_num=5)  # functional api
 6 | # model = AlexNet_v2(class_num=5)  # subclass api
 7 | # model.build((None, 224, 224, 3))
 8 | model.load_weights("./myAlex.h5")
 9 | # model.load_weights("./submodel.h5")
10 | model.summary()
11 | for layer in model.layers:
12 |     for index, weight in enumerate(layer.weights):
13 |         # [kernel_height, kernel_width, kernel_channel, kernel_number]
14 |         weight_t = weight.numpy()
15 |         # read a kernel information
16 |         # k = weight_t[:, :, :, 0]
17 | 
18 |         # calculate mean, std, min, max
19 |         weight_mean = weight_t.mean()
20 |         weight_std = weight_t.std(ddof=1)
21 |         weight_min = weight_t.min()
22 |         weight_max = weight_t.max()
23 |         print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean,
24 |                                                                    weight_std,
25 |                                                                    weight_max,
26 |                                                                    weight_min))
27 | 
28 |         # plot hist image
29 |         plt.close()
30 |         weight_vec = np.reshape(weight_t, [-1])
31 |         plt.hist(weight_vec, bins=50)
32 |         plt.title(weight.name)
33 |         plt.show()


--------------------------------------------------------------------------------
/tensorflow_classification/swin_transformer/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import tensorflow as tf
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from model import swin_tiny_patch4_window7_224 as create_model
11 | 
12 | 
13 | def main():
14 |     num_classes = 5
15 |     im_height = im_width = 224
16 | 
17 |     # load image
18 |     img_path = "../tulip.jpg"
19 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
20 |     img = Image.open(img_path)
21 |     # resize image
22 |     img = img.resize((im_width, im_height))
23 |     plt.imshow(img)
24 | 
25 |     # read image
26 |     img = np.array(img).astype(np.float32)
27 | 
28 |     # preprocess
29 |     img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
30 | 
31 |     # Add the image to a batch where it's the only member.
32 |     img = (np.expand_dims(img, 0))
33 | 
34 |     # read class_indict
35 |     json_path = './class_indices.json'
36 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
37 | 
38 |     json_file = open(json_path, "r")
39 |     class_indict = json.load(json_file)
40 | 
41 |     # create model
42 |     model = create_model(num_classes=num_classes)
43 |     model.build([1, im_height, im_width, 3])
44 | 
45 |     weights_path = './save_weights/model.ckpt'
46 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
47 |     model.load_weights(weights_path)
48 | 
49 |     result = np.squeeze(model.predict(img, batch_size=1))
50 |     result = tf.keras.layers.Softmax()(result)
51 |     predict_class = np.argmax(result)
52 | 
53 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
54 |                                                  result[predict_class])
55 |     plt.title(print_res)
56 |     for i in range(len(result)):
57 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
58 |                                                   result[i].numpy()))
59 |     plt.show()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/tensorflow_classification/vision_transformer/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import glob
 4 | import numpy as np
 5 | 
 6 | from PIL import Image
 7 | import tensorflow as tf
 8 | import matplotlib.pyplot as plt
 9 | 
10 | from vit_model import vit_base_patch16_224_in21k as create_model
11 | 
12 | 
13 | def main():
14 |     num_classes = 5
15 |     im_height = im_width = 224
16 | 
17 |     # load image
18 |     img_path = "../tulip.jpg"
19 |     assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
20 |     img = Image.open(img_path)
21 |     # resize image
22 |     img = img.resize((im_width, im_height))
23 |     plt.imshow(img)
24 | 
25 |     # read image
26 |     img = np.array(img).astype(np.float32)
27 | 
28 |     # preprocess
29 |     img = (img / 255. - 0.5) / 0.5
30 | 
31 |     # Add the image to a batch where it's the only member.
32 |     img = (np.expand_dims(img, 0))
33 | 
34 |     # read class_indict
35 |     json_path = './class_indices.json'
36 |     assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
37 | 
38 |     json_file = open(json_path, "r")
39 |     class_indict = json.load(json_file)
40 | 
41 |     # create model
42 |     model = create_model(num_classes=num_classes, has_logits=False)
43 |     model.build([1, 224, 224, 3])
44 | 
45 |     weights_path = './save_weights/model.ckpt'
46 |     assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
47 |     model.load_weights(weights_path)
48 | 
49 |     result = np.squeeze(model.predict(img, batch_size=1))
50 |     result = tf.keras.layers.Softmax()(result)
51 |     predict_class = np.argmax(result)
52 | 
53 |     print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
54 |                                                  result[predict_class])
55 |     plt.title(print_res)
56 |     for i in range(len(result)):
57 |         print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
58 |                                                   result[i].numpy()))
59 |     plt.show()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------