├── .github └── ISSUE_TEMPLATE │ └── issue-template.md ├── .gitignore ├── LICENSE ├── README.md ├── article_link └── README.md ├── course_ppt └── README.md ├── data_set ├── README.md └── split_data.py ├── deploying_service ├── deploying_pytorch │ ├── convert_onnx_cls │ │ ├── class_indices.json │ │ ├── main.py │ │ └── model.py │ ├── convert_openvino │ │ ├── convert_resnet34 │ │ │ ├── README.md │ │ │ ├── compare_fps.py │ │ │ ├── compare_onnx_and_ir.py │ │ │ ├── convert_pytorch2onnx.py │ │ │ ├── model.py │ │ │ ├── quantization_int8.py │ │ │ ├── requirements.txt │ │ │ └── utils.py │ │ └── convert_yolov5 │ │ │ ├── README.md │ │ │ ├── compare_fps.py │ │ │ ├── compare_onnx_and_ir.py │ │ │ ├── draw_box_utils.py │ │ │ ├── evaluation.py │ │ │ ├── predict.py │ │ │ ├── quantization_int8.py │ │ │ ├── requirements.txt │ │ │ └── utils.py │ ├── convert_tensorrt │ │ └── convert_resnet34 │ │ │ ├── compare_onnx_and_trt.py │ │ │ ├── convert_pytorch2onnx.py │ │ │ ├── my_dataset.py │ │ │ ├── quantization.py │ │ │ └── utils.py │ └── pytorch_flask_service │ │ ├── class_indices.json │ │ ├── main.py │ │ ├── model.py │ │ ├── requirements.txt │ │ ├── static │ │ └── js │ │ │ └── jquery.min.js │ │ └── templates │ │ └── up.html └── pruning_model_pytorch │ ├── class_indices.json │ ├── main.py │ ├── model.py │ ├── predict.py │ └── train.py ├── others_project ├── draw_dilated_conv │ └── main.py ├── kmeans_anchors │ ├── main.py │ ├── plot_kmeans.py │ ├── read_voc.py │ └── yolo_kmeans.py ├── openvinotest │ └── openvino_cls_test │ │ ├── class_indices.json │ │ ├── create_imagenet_annotation.py │ │ ├── float32vsint8.py │ │ ├── main.py │ │ ├── model.py │ │ └── speed_test.py ├── readPbFile │ ├── README.md │ ├── export │ │ └── checkpoint │ ├── pascal_label_map.pbtxt │ ├── readPb.py │ ├── test_images │ │ └── image_info.txt │ └── using_function.py ├── textcnnKeras │ ├── dataGenerator.py │ ├── data_link.txt │ ├── main.py │ └── models.py └── trans_widerface_to_xml │ ├── create_xml.py │ └── main.py ├── pytorch_classification ├── ConfusionMatrix │ ├── class_indices.json │ ├── main.py │ └── model.py ├── ConvNeXt │ ├── README.md │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── MobileViT │ ├── README.md │ ├── model.py │ ├── model_config.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── transformer.py │ ├── unfold_test.py │ └── utils.py ├── README.md ├── Test10_regnet │ ├── README.md │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── pretrain_weights.py │ ├── train.py │ └── utils.py ├── Test11_efficientnetV2 │ ├── README.md │ ├── class_indices.json │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── trans_effv2_weights.py │ └── utils.py ├── Test1_official_demo │ ├── model.py │ ├── predict.py │ └── train.py ├── Test2_alexnet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test3_vggnet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test4_googlenet │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ └── train.py ├── Test5_resnet │ ├── README.md │ ├── batch_predict.py │ ├── class_indices.json │ ├── load_weights.py │ ├── model.py │ ├── predict.py │ └── train.py ├── Test6_mobilenet │ ├── class_indices.json │ ├── model_v2.py │ ├── model_v3.py │ ├── predict.py │ └── train.py ├── Test7_shufflenet │ ├── README.md │ ├── class_indices.json │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── Test8_densenet │ ├── README.md │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── Test9_efficientNet │ ├── README.md │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── requirements.txt │ ├── train.py │ ├── trans_weights_to_pytorch.py │ └── utils.py ├── analyze_weights_featuremap │ ├── alexnet_model.py │ ├── analyze_feature_map.py │ ├── analyze_kernel_weight.py │ └── resnet_model.py ├── custom_dataset │ ├── main.py │ ├── my_dataset.py │ └── utils.py ├── grad_cam │ ├── README.md │ ├── both.png │ ├── imagenet1k_classes.txt │ ├── imagenet21k_classes.txt │ ├── main_cnn.py │ ├── main_swin.py │ ├── main_vit.py │ ├── swin_model.py │ ├── utils.py │ └── vit_model.py ├── mini_imagenet │ ├── README.md │ ├── imagenet_class_index.json │ ├── model.py │ ├── multi_train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_eval_utils.py │ ├── my_dataset.py │ ├── restructure_csv.py │ ├── train_multi_gpu_using_launch.py │ └── train_single_gpu.py ├── model_complexity │ ├── main.py │ ├── model.py │ └── utils.py ├── swin_transformer │ ├── README.md │ ├── create_confusion_matrix.py │ ├── model.py │ ├── my_dataset.py │ ├── predict.py │ ├── select_incorrect_samples.py │ ├── train.py │ └── utils.py ├── tensorboard_test │ ├── data_utils.py │ ├── model.py │ ├── my_dataset.py │ ├── requirements.txt │ ├── train.py │ └── train_eval_utils.py ├── train_multi_GPU │ ├── README.md │ ├── accuracy.png │ ├── model.py │ ├── multi_train_utils │ │ ├── distributed_utils.py │ │ └── train_eval_utils.py │ ├── my_dataset.py │ ├── plot_results.py │ ├── requirements.txt │ ├── runs │ │ └── Nov07_18-58-35_wz │ │ │ └── events.out.tfevents.1604746311.localhost.41577.0 │ ├── syncbn.png │ ├── train_multi_gpu_using_launch.py │ ├── train_multi_gpu_using_spawn.py │ ├── train_single_gpu.py │ ├── training_time.png │ └── utils.py └── vision_transformer │ ├── README.md │ ├── flops.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── utils.py │ └── vit_model.py ├── pytorch_keypoint ├── DeepPose │ ├── README.md │ ├── datasets.py │ ├── export_onnx.py │ ├── model.py │ ├── predict.jpg │ ├── predict.py │ ├── requirements.txt │ ├── test_img.jpg │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── distributed_utils.py │ │ ├── losses.py │ │ ├── metrics.py │ │ └── train_eval_utils.py │ ├── transforms.py │ ├── utils.py │ └── wflw_horizontal_flip_indices.py └── HRNet │ ├── HRNet.png │ ├── README.md │ ├── draw_utils.py │ ├── model │ ├── __init__.py │ └── hrnet.py │ ├── my_dataset_coco.py │ ├── person.png │ ├── person_keypoints.json │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ ├── __init__.py │ ├── coco_eval.py │ ├── coco_utils.py │ ├── distributed_utils.py │ ├── group_by_aspect_ratio.py │ ├── loss.py │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── pytorch_object_detection ├── faster_rcnn │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ ├── mobilenetv2_model.py │ │ ├── resnet50_fpn_model.py │ │ └── vgg_model.py │ ├── change_backbone_with_fpn.py │ ├── change_backbone_without_fpn.py │ ├── draw_box_utils.py │ ├── fasterRCNN.png │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── faster_rcnn_framework.py │ │ ├── image_list.py │ │ ├── roi_head.py │ │ ├── rpn_function.py │ │ └── transform.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict.py │ ├── record_mAP.txt │ ├── requirements.txt │ ├── split_data.py │ ├── train_mobilenetv2.py │ ├── train_multi_GPU.py │ ├── train_res50_fpn.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── mask_rcnn │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ └── resnet50_fpn_model.py │ ├── coco91_indices.json │ ├── det_results20220406-141544.txt │ ├── draw_box_utils.py │ ├── my_dataset_coco.py │ ├── my_dataset_voc.py │ ├── network_files │ │ ├── __init__.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── faster_rcnn_framework.py │ │ ├── image_list.py │ │ ├── mask_rcnn.py │ │ ├── roi_head.py │ │ ├── rpn_function.py │ │ └── transform.py │ ├── pascal_voc_indices.json │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── seg_results20220406-141544.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── retinaNet │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ └── resnet50_fpn_model.py │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── anchor_utils.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── image_list.py │ │ ├── losses.py │ │ ├── retinanet.py │ │ └── transform.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── results20210421-142632.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── ssd │ ├── README.md │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── pascal_voc_classes.json │ ├── plot_curve.py │ ├── predict_test.py │ ├── record_mAP.txt │ ├── requirements.txt │ ├── res50_ssd.png │ ├── src │ │ ├── __init__.py │ │ ├── res50_backbone.py │ │ ├── ssd_model.py │ │ └── utils.py │ ├── train_multi_GPU.py │ ├── train_ssd300.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── train_coco_dataset │ ├── README.md │ ├── backbone │ │ ├── __init__.py │ │ ├── feature_pyramid_network.py │ │ ├── mobilenetv2_model.py │ │ ├── resnet.py │ │ ├── resnet50_fpn_model.py │ │ └── vgg_model.py │ ├── change_backbone_with_fpn.py │ ├── coco91_indices.json │ ├── compute_receptive_field.py │ ├── draw_box_utils.py │ ├── my_dataset.py │ ├── network_files │ │ ├── __init__.py │ │ ├── boxes.py │ │ ├── det_utils.py │ │ ├── faster_rcnn_framework.py │ │ ├── image_list.py │ │ ├── roi_head.py │ │ ├── rpn_function.py │ │ └── transform.py │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── results20220408-201436.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py └── yolov3_spp │ ├── README.md │ ├── build_utils │ ├── __init__.py │ ├── datasets.py │ ├── img_utils.py │ ├── layers.py │ ├── parse_config.py │ ├── torch_utils.py │ └── utils.py │ ├── calculate_dataset.py │ ├── cfg │ ├── hyp.yaml │ └── yolov3-spp.cfg │ ├── data │ └── pascal_voc_classes.json │ ├── draw_box_utils.py │ ├── export_onnx.py │ ├── load_onnx_test.py │ ├── models.py │ ├── predict_test.py │ ├── requirements.txt │ ├── results20210515-152935.txt │ ├── runs │ └── Oct28_17-55-29_wz │ │ └── events.out.tfevents.1603791769.localhost.localdomain.178338.0 │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ ├── __init__.py │ ├── coco_eval.py │ ├── coco_utils.py │ ├── distributed_utils.py │ ├── group_by_aspect_ratio.py │ └── train_eval_utils.py │ ├── trans_voc2yolo.py │ ├── validation.py │ └── yolov3spp.png ├── pytorch_segmentation ├── deeplab_v3 │ ├── README.md │ ├── deeplabv3_resnet50.png │ ├── get_palette.py │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20211027-104607.txt │ ├── src │ │ ├── __init__.py │ │ ├── deeplabv3_model.py │ │ ├── mobilenet_backbone.py │ │ └── resnet_backbone.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py ├── fcn │ ├── README.md │ ├── get_palette.py │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20210918-122740.txt │ ├── src │ │ ├── __init__.py │ │ ├── backbone.py │ │ └── fcn_model.py │ ├── torch_fcn.png │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py ├── lraspp │ ├── README.md │ ├── get_palette.py │ ├── lraspp.png │ ├── my_dataset.py │ ├── palette.json │ ├── pascal_voc_classes.json │ ├── predict.py │ ├── requirements.txt │ ├── results20211028-105233.txt │ ├── src │ │ ├── __init__.py │ │ ├── lraspp_model.py │ │ └── mobilenet_backbone.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ └── validation.py ├── u2net │ ├── README.md │ ├── convert_weight.py │ ├── my_dataset.py │ ├── predict.py │ ├── requirements.txt │ ├── results20220723-123632.txt │ ├── src │ │ ├── __init__.py │ │ └── model.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ │ ├── __init__.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ ├── transforms.py │ ├── u2net.png │ └── validation.py └── unet │ ├── README.md │ ├── compute_mean_std.py │ ├── my_dataset.py │ ├── predict.py │ ├── requirements.txt │ ├── results20220109-165837.txt │ ├── src │ ├── __init__.py │ ├── mobilenet_unet.py │ ├── unet.py │ └── vgg_unet.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils │ ├── __init__.py │ ├── dice_coefficient_loss.py │ ├── distributed_utils.py │ └── train_and_eval.py │ ├── transforms.py │ └── unet.png ├── summary_problem.md └── tensorflow_classification ├── ConfusionMatrix ├── class_indices.json ├── main.py └── model.py ├── ConvNeXt ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── README.md ├── Test11_efficientnetV2 ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── Test1_official_demo ├── model.py └── train.py ├── Test2_alexnet ├── class_indices.json ├── fine_train_alexnet.py ├── model.py ├── predict.py ├── read_pth.py ├── train.py └── trainGPU.py ├── Test3_vgg ├── class_indices.json ├── fine_train_vgg16.py ├── model.py ├── predict.py ├── read_ckpt.py ├── train.py └── trainGPU.py ├── Test4_goolenet ├── class_indices.json ├── model.py ├── model_add_bn.py ├── predict.py ├── read_pth.py ├── train.py ├── trainGPU.py └── train_add_bn.py ├── Test5_resnet ├── batch_predict.py ├── class_indices.json ├── model.py ├── predict.py ├── read_ckpt.py ├── read_h5.py ├── subclassed_model.py ├── train.py └── trainGPU.py ├── Test6_mobilenet ├── model_v2.py ├── model_v3.py ├── predict.py ├── read_ckpt.py ├── trainGPU_mobilenet_v2.py ├── train_mobilenet_v2.py ├── train_mobilenet_v3.py ├── trans_v3_weights.py └── utils.py ├── Test7_shuffleNet ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── Test9_efficientNet ├── model.py ├── predict.py ├── train.py └── utils.py ├── analyze_weights_featuremap ├── alexnet_model.py ├── analyze_feature_map.py └── analyze_kernel_weight.py ├── custom_dataset ├── train_fit.py └── utils.py ├── swin_transformer ├── model.py ├── predict.py ├── train.py ├── trans_weights.py └── utils.py ├── tensorboard_test ├── train_fit.py └── train_not_fit.py └── vision_transformer ├── predict.py ├── train.py ├── trans_weights.py ├── utils.py └── vit_model.py /.github/ISSUE_TEMPLATE/issue-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue template 3 | about: Use this template for reporting your problem 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **System information** 11 | * Have I written custom code: 12 | * OS Platform(e.g., window10 or Linux Ubuntu 16.04): 13 | * Python version: 14 | * Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3): 15 | * Use GPU or not: 16 | * CUDA/cuDNN version(if you use GPU): 17 | * The network you trained(e.g., Resnet34 network): 18 | 19 | **Describe the current behavior** 20 | 21 | **Error info / logs** 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ##ignore this file## 2 | *.idea 3 | __pycache__ 4 | *.zip 5 | flower_data 6 | *.h5 7 | *.pth 8 | *.pt 9 | *.jpg 10 | *.ckpt.* 11 | *.ckpt 12 | *.config 13 | *.gz 14 | *.onnx 15 | *.xml 16 | *.bin 17 | *.mapping 18 | *.csv 19 | checkpoint 20 | data 21 | VOCdevkit 22 | ssd_resnet50_v1_fpn_shared_box_predictor 23 | runs 24 | -------------------------------------------------------------------------------- /data_set/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹是用来存放训练数据的目录 2 | ### 使用步骤如下: 3 | * (1)在data_set文件夹下创建新文件夹"flower_data" 4 | * (2)点击链接下载花分类数据集 [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz) 5 | * (3)解压数据集到flower_data文件夹下 6 | * (4)执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val 7 | 8 | ``` 9 | ├── flower_data 10 | ├── flower_photos(解压的数据集文件夹,3670个样本) 11 | ├── train(生成的训练集,3306个样本) 12 | └── val(生成的验证集,364个样本) 13 | ``` 14 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/README.md: -------------------------------------------------------------------------------- 1 | 本项目展示如何将Pytorch中的ResNet34网络转成Openvino的IR格式,并进行量化处理,具体使用流程如下: 2 | 1. 按照`requirements.txt`配置环境 3 | 2. 下载事先训练好的ResNet34权重(之前在花分类数据集上训练得到的)放在当前文件夹下。百度云链接: https://pan.baidu.com/s/1x4WFX1HynYcXLium3UaaFQ 密码: qvi6 4 | 3. 使用`convert_pytorch2onnx.py`将Resnet34转成ONNX格式 5 | 4. 在命令行中使用以下指令将ONNX转成IR格式: 6 | ``` 7 | mo --input_model resnet34.onnx \ 8 | --input_shape "[1,3,224,224]" \ 9 | --mean_values="[123.675,116.28,103.53]" \ 10 | --scale_values="[58.395,57.12,57.375]" \ 11 | --data_type FP32 \ 12 | --output_dir ir_output 13 | ``` 14 | 5. 下载并解压花分类数据集,将`quantization_int8.py`中的`data_path`指向解压后的`flower_photos` 15 | 6. 使用`quantization_int8.py`量化模型 -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/convert_pytorch2onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.onnx 3 | import onnx 4 | import onnxruntime 5 | import numpy as np 6 | from torchvision.models import resnet34 7 | 8 | device = torch.device("cpu") 9 | 10 | 11 | def to_numpy(tensor): 12 | return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() 13 | 14 | 15 | def main(): 16 | weights_path = "resNet34(flower).pth" 17 | onnx_file_name = "resnet34.onnx" 18 | batch_size = 1 19 | img_h = 224 20 | img_w = 224 21 | img_channel = 3 22 | 23 | # create model and load pretrain weights 24 | model = resnet34(pretrained=False, num_classes=5) 25 | model.load_state_dict(torch.load(weights_path, map_location='cpu')) 26 | 27 | model.eval() 28 | # input to the model 29 | # [batch, channel, height, width] 30 | x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True) 31 | torch_out = model(x) 32 | 33 | # export the model 34 | torch.onnx.export(model, # model being run 35 | x, # model input (or a tuple for multiple inputs) 36 | onnx_file_name, # where to save the model (can be a file or file-like object) 37 | verbose=False) 38 | 39 | # check onnx model 40 | onnx_model = onnx.load(onnx_file_name) 41 | onnx.checker.check_model(onnx_model) 42 | 43 | ort_session = onnxruntime.InferenceSession(onnx_file_name) 44 | 45 | # compute ONNX Runtime output prediction 46 | ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} 47 | ort_outs = ort_session.run(None, ort_inputs) 48 | 49 | # compare ONNX Runtime and Pytorch results 50 | # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. 51 | np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) 52 | print("Exported model has been tested with ONNXRuntime, and the result looks good!") 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.11.0 2 | torchvision==0.12.0 3 | onnx==1.13.0 4 | onnxruntime==1.8.0 5 | protobuf==3.19.5 6 | openvino-dev==2022.1.0 7 | matplotlib -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/README.md: -------------------------------------------------------------------------------- 1 | OpenVINO量化YOLOv5 2 | 3 | 1. 按照`requirements.txt`配置环境 4 | 2. 将YOLOv5转为ONNX 5 | YOLOv5官方有提供导出ONNX以及OpenVINO的方法,但我这里仅导出成ONNX,这里以YOLOv5s为例 6 | ``` 7 | python export.py --weights yolov5s.pt --include onnx 8 | ``` 9 | 10 | 3. ONNX转换为IR 11 | 使用OpenVINO的`mo`工具将ONNX转为OpenVINO的IR格式 12 | ``` 13 | mo --input_model yolov5s.onnx \ 14 | --input_shape "[1,3,640,640]" \ 15 | --scale 255 \ 16 | --data_type FP32 \ 17 | --output_dir ir_output 18 | ``` 19 | 20 | 4. 量化模型 21 | 使用`quantization_int8.py`进行模型的量化,量化过程中需要使用到COCO2017数据集,需要将`data_path`指向coco2017目录 22 | ``` 23 | ├── coco2017: 数据集根目录 24 | ├── train2017: 所有训练图像文件夹(118287张) 25 | ├── val2017: 所有验证图像文件夹(5000张) 26 | └── annotations: 对应标注文件夹 27 | ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 28 | ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 29 | ├── captions_train2017.json: 对应图像描述的训练集标注文件 30 | ├── captions_val2017.json: 对应图像描述的验证集标注文件 31 | ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 32 | └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 33 | ``` 34 | 35 | 5. benchmark 36 | 直接利用`benchmark_app`工具测试量化前后的`Throughput`,这里以`CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz`设备为例 37 | ``` 38 | benchmark_app -m ir_output/yolov5s.xml -d CPU -api sync 39 | ``` 40 | output: 41 | ``` 42 | Latency: 43 | Median: 59.56 ms 44 | AVG: 63.30 ms 45 | MIN: 57.88 ms 46 | MAX: 99.89 ms 47 | Throughput: 16.79 FPS 48 | ``` 49 | 50 | ``` 51 | benchmark_app -m quant_ir_output/quantized_yolov5s.xml -d CPU -api sync 52 | ``` 53 | output: 54 | ``` 55 | Latency: 56 | Median: 42.97 ms 57 | AVG: 46.56 ms 58 | MIN: 41.18 ms 59 | MAX: 95.75 ms 60 | Throughput: 23.27 FPS 61 | ``` -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_onnx_and_ir.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime 3 | from openvino.runtime import Core 4 | 5 | 6 | def normalize(image: np.ndarray) -> np.ndarray: 7 | """ 8 | Normalize the image to the given mean and standard deviation 9 | """ 10 | image = image.astype(np.float32) 11 | image /= 255.0 12 | return image 13 | 14 | 15 | def onnx_inference(onnx_path: str, image: np.ndarray): 16 | # load onnx model 17 | ort_session = onnxruntime.InferenceSession(onnx_path) 18 | 19 | # compute onnx Runtime output prediction 20 | ort_inputs = {ort_session.get_inputs()[0].name: image} 21 | res_onnx = ort_session.run(None, ort_inputs)[0] 22 | return res_onnx 23 | 24 | 25 | def ir_inference(ir_path: str, image: np.ndarray): 26 | # Load the network in Inference Engine 27 | ie = Core() 28 | model_ir = ie.read_model(model=ir_path) 29 | compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU") 30 | 31 | # Get input and output layers 32 | input_layer_ir = next(iter(compiled_model_ir.inputs)) 33 | output_layer_ir = next(iter(compiled_model_ir.outputs)) 34 | 35 | # Run inference on the input image 36 | res_ir = compiled_model_ir([image])[output_layer_ir] 37 | return res_ir 38 | 39 | 40 | def main(): 41 | image_h = 640 42 | image_w = 640 43 | onnx_path = "yolov5s.onnx" 44 | ir_path = "ir_output/yolov5s.xml" 45 | 46 | image = np.random.randn(image_h, image_w, 3) 47 | normalized_image = normalize(image) 48 | 49 | # Convert the resized images to network input shape 50 | # [h, w, c] -> [c, h, w] -> [1, c, h, w] 51 | input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0) 52 | normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) 53 | 54 | onnx_res = onnx_inference(onnx_path, normalized_input_image) 55 | ir_res = ir_inference(ir_path, input_image) 56 | np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05) 57 | print("Exported model has been tested with OpenvinoRuntime, and the result looks good!") 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/evaluation.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | from openvino.runtime import Core 4 | from utils import MyDataLoader, EvalCOCOMetric, non_max_suppression 5 | 6 | 7 | def main(): 8 | data_path = "/data/coco2017" 9 | ir_model_xml = "quant_ir_output/quantized_yolov5s.xml" 10 | img_size = (640, 640) # h, w 11 | 12 | data_loader = MyDataLoader(data_path, "val", size=img_size) 13 | coco80_to_91 = data_loader.coco_id80_to_id91 14 | metrics = EvalCOCOMetric(coco=data_loader.coco, classes_mapping=coco80_to_91) 15 | 16 | # Load the network in Inference Engine 17 | ie = Core() 18 | model_ir = ie.read_model(model=ir_model_xml) 19 | compiled_model = ie.compile_model(model=model_ir, device_name="CPU") 20 | inputs_names = compiled_model.inputs 21 | outputs_names = compiled_model.outputs 22 | 23 | # inference 24 | request = compiled_model.create_infer_request() 25 | for i in tqdm(range(len(data_loader))): 26 | data = data_loader[i] 27 | ann, img, info = data 28 | ann = ann + (info,) 29 | 30 | request.infer(inputs={inputs_names[0]: img}) 31 | result = request.get_output_tensor(outputs_names[0].index).data 32 | 33 | # post-process 34 | result = non_max_suppression(torch.Tensor(result), conf_thres=0.001, iou_thres=0.6, multi_label=True)[0] 35 | boxes = result[:, :4].numpy() 36 | scores = result[:, 4].numpy() 37 | cls = result[:, 5].numpy().astype(int) 38 | metrics.update(ann, [boxes, cls, scores]) 39 | 40 | metrics.evaluate() 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/predict.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | from openvino.runtime import Core 7 | from utils import letterbox, scale_coords, non_max_suppression, coco80_names 8 | from draw_box_utils import draw_objs 9 | 10 | 11 | def main(): 12 | img_path = "test.jpg" 13 | ir_model_xml = "ir_output/yolov5s.xml" 14 | img_size = (640, 640) # h, w 15 | 16 | origin_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) 17 | reshape_img, ratio, pad = letterbox(origin_img, img_size, auto=False) 18 | input_img = np.expand_dims(np.transpose(reshape_img, [2, 0, 1]), 0).astype(np.float32) 19 | 20 | # Load the network in Inference Engine 21 | ie = Core() 22 | model_ir = ie.read_model(model=ir_model_xml) 23 | compiled_model = ie.compile_model(model=model_ir, device_name="CPU") 24 | inputs_names = compiled_model.inputs 25 | outputs_names = compiled_model.outputs 26 | 27 | # inference 28 | request = compiled_model.create_infer_request() 29 | request.infer(inputs={inputs_names[0]: input_img}) 30 | result = request.get_output_tensor(outputs_names[0].index).data 31 | 32 | # post-process 33 | result = non_max_suppression(torch.Tensor(result))[0] 34 | boxes = result[:, :4].numpy() 35 | scores = result[:, 4].numpy() 36 | cls = result[:, 5].numpy().astype(int) 37 | boxes = scale_coords(reshape_img.shape, boxes, origin_img.shape, (ratio, pad)) 38 | 39 | draw_img = draw_objs(Image.fromarray(origin_img), 40 | boxes, 41 | cls, 42 | scores, 43 | category_index=dict([(str(i), v) for i, v in enumerate(coco80_names)])) 44 | plt.imshow(draw_img) 45 | plt.show() 46 | draw_img.save("predict.jpg") 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.13.1 2 | torchvision==0.12.0 3 | onnx==1.13.0 4 | onnxruntime==1.8.0 5 | protobuf==3.19.5 6 | openvino-dev==2022.1.0 7 | matplotlib 8 | torchmetrics==0.9.1 -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/convert_pytorch2onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.onnx 3 | import onnx 4 | import onnxruntime 5 | import numpy as np 6 | from torchvision.models import resnet34 7 | 8 | device = torch.device("cpu") 9 | 10 | 11 | def to_numpy(tensor): 12 | return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() 13 | 14 | 15 | def main(): 16 | weights_path = "resNet34(flower).pth" 17 | onnx_file_name = "resnet34.onnx" 18 | batch_size = 1 19 | img_h = 224 20 | img_w = 224 21 | img_channel = 3 22 | 23 | # create model and load pretrain weights 24 | model = resnet34(pretrained=False, num_classes=5) 25 | model.load_state_dict(torch.load(weights_path, map_location='cpu')) 26 | 27 | model.eval() 28 | # input to the model 29 | # [batch, channel, height, width] 30 | x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True) 31 | torch_out = model(x) 32 | 33 | # export the model 34 | torch.onnx.export(model, # model being run 35 | x, # model input (or a tuple for multiple inputs) 36 | onnx_file_name, # where to save the model (can be a file or file-like object) 37 | input_names=["input"], 38 | output_names=["output"], 39 | verbose=False) 40 | 41 | # check onnx model 42 | onnx_model = onnx.load(onnx_file_name) 43 | onnx.checker.check_model(onnx_model) 44 | 45 | ort_session = onnxruntime.InferenceSession(onnx_file_name) 46 | 47 | # compute ONNX Runtime output prediction 48 | ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} 49 | ort_outs = ort_session.run(None, ort_inputs) 50 | 51 | # compare ONNX Runtime and Pytorch results 52 | # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. 53 | np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) 54 | print("Exported model has been tested with ONNXRuntime, and the result looks good!") 55 | 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==2.2.5 2 | Flask_Cors==3.0.9 3 | Pillow 4 | -------------------------------------------------------------------------------- /deploying_service/pruning_model_pytorch/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /deploying_service/pruning_model_pytorch/predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import resnet34 3 | from PIL import Image 4 | from torchvision import transforms 5 | import matplotlib.pyplot as plt 6 | import json 7 | 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | data_transform = transforms.Compose( 11 | [transforms.Resize(256), 12 | transforms.CenterCrop(224), 13 | transforms.ToTensor(), 14 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 15 | 16 | # load image 17 | img = Image.open("../tulip.jpg") 18 | plt.imshow(img) 19 | # [N, C, H, W] 20 | img = data_transform(img) 21 | # expand batch dimension 22 | img = torch.unsqueeze(img, dim=0) 23 | 24 | # read class_indict 25 | try: 26 | json_file = open('./class_indices.json', 'r') 27 | class_indict = json.load(json_file) 28 | except Exception as e: 29 | print(e) 30 | exit(-1) 31 | 32 | # create model 33 | model = resnet34(num_classes=5) 34 | # load model weights 35 | model_weight_path = "./resNet34.pth" 36 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 37 | model.eval() 38 | with torch.no_grad(): 39 | # predict class 40 | output = torch.squeeze(model(img)) 41 | predict = torch.softmax(output, dim=0) 42 | predict_cla = torch.argmax(predict).numpy() 43 | print(class_indict[str(predict_cla)], predict[predict_cla].numpy()) 44 | plt.show() 45 | -------------------------------------------------------------------------------- /others_project/kmeans_anchors/yolo_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def wh_iou(wh1, wh2): 5 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 6 | wh1 = wh1[:, None] # [N,1,2] 7 | wh2 = wh2[None] # [1,M,2] 8 | inter = np.minimum(wh1, wh2).prod(2) # [N,M] 9 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 10 | 11 | 12 | def k_means(boxes, k, dist=np.median): 13 | """ 14 | yolo k-means methods 15 | refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py 16 | Args: 17 | boxes: 需要聚类的bboxes 18 | k: 簇数(聚成几类) 19 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好) 20 | """ 21 | box_number = boxes.shape[0] 22 | last_nearest = np.zeros((box_number,)) 23 | # np.random.seed(0) # 固定随机数种子 24 | 25 | # init k clusters 26 | clusters = boxes[np.random.choice(box_number, k, replace=False)] 27 | 28 | while True: 29 | distances = 1 - wh_iou(boxes, clusters) 30 | current_nearest = np.argmin(distances, axis=1) 31 | if (last_nearest == current_nearest).all(): 32 | break # clusters won't change 33 | for cluster in range(k): 34 | # update clusters 35 | clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0) 36 | 37 | last_nearest = current_nearest 38 | 39 | return clusters 40 | -------------------------------------------------------------------------------- /others_project/openvinotest/openvino_cls_test/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | image_dir = "/home/w180662/my_project/my_github/data_set/flower_data/train" 5 | assert os.path.exists(image_dir), "image dir does not exist..." 6 | 7 | img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg")) 8 | assert len(img_list) > 0, "No images(.jpg) were found in image dir..." 9 | 10 | classes_info = os.listdir(image_dir) 11 | classes_info.sort() 12 | classes_dict = {} 13 | 14 | # create label file 15 | with open("my_labels.txt", "w") as lw: 16 | # 注意,没有背景时,index要从0开始 17 | for index, c in enumerate(classes_info, start=0): 18 | txt = "{}:{}".format(index, c) 19 | if index != len(classes_info): 20 | txt += "\n" 21 | lw.write(txt) 22 | classes_dict.update({c: str(index)}) 23 | print("create my_labels.txt successful...") 24 | 25 | # create annotation file 26 | with open("my_annotation.txt", "w") as aw: 27 | for img in img_list: 28 | img_classes = classes_dict[img.split("/")[-2]] 29 | txt = "{} {}".format(img, img_classes) 30 | if index != len(img_list): 31 | txt += "\n" 32 | aw.write(txt) 33 | print("create my_annotation.txt successful...") 34 | -------------------------------------------------------------------------------- /others_project/readPbFile/README.md: -------------------------------------------------------------------------------- 1 | 该项目用于读取冻结后的pb文件并进行预测 2 | 使用步骤: 3 | (1)准备好需要使用的pb冻结文件,pbtxt标签文件,测试用的图片 4 | (2)修改info.config文件中的相关信息 5 | 6 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg) 7 | ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg) -------------------------------------------------------------------------------- /others_project/readPbFile/export/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt" 2 | all_model_checkpoint_paths: "model.ckpt" 3 | -------------------------------------------------------------------------------- /others_project/readPbFile/pascal_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: 'aeroplane' 4 | } 5 | 6 | item { 7 | id: 2 8 | name: 'bicycle' 9 | } 10 | 11 | item { 12 | id: 3 13 | name: 'bird' 14 | } 15 | 16 | item { 17 | id: 4 18 | name: 'boat' 19 | } 20 | 21 | item { 22 | id: 5 23 | name: 'bottle' 24 | } 25 | 26 | item { 27 | id: 6 28 | name: 'bus' 29 | } 30 | 31 | item { 32 | id: 7 33 | name: 'car' 34 | } 35 | 36 | item { 37 | id: 8 38 | name: 'cat' 39 | } 40 | 41 | item { 42 | id: 9 43 | name: 'chair' 44 | } 45 | 46 | item { 47 | id: 10 48 | name: 'cow' 49 | } 50 | 51 | item { 52 | id: 11 53 | name: 'diningtable' 54 | } 55 | 56 | item { 57 | id: 12 58 | name: 'dog' 59 | } 60 | 61 | item { 62 | id: 13 63 | name: 'horse' 64 | } 65 | 66 | item { 67 | id: 14 68 | name: 'motorbike' 69 | } 70 | 71 | item { 72 | id: 15 73 | name: 'person' 74 | } 75 | 76 | item { 77 | id: 16 78 | name: 'pottedplant' 79 | } 80 | 81 | item { 82 | id: 17 83 | name: 'sheep' 84 | } 85 | 86 | item { 87 | id: 18 88 | name: 'sofa' 89 | } 90 | 91 | item { 92 | id: 19 93 | name: 'train' 94 | } 95 | 96 | item { 97 | id: 20 98 | name: 'tvmonitor' 99 | } 100 | -------------------------------------------------------------------------------- /others_project/readPbFile/test_images/image_info.txt: -------------------------------------------------------------------------------- 1 | 2 | Image provenance: 3 | image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg 4 | image2.jpg: Michael Miley, 5 | https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 6 | 7 | -------------------------------------------------------------------------------- /others_project/textcnnKeras/dataGenerator.py: -------------------------------------------------------------------------------- 1 | from tensorflow import keras 2 | from sklearn.preprocessing import LabelEncoder 3 | import random 4 | 5 | 6 | def content2idList(content, word2id_dict): 7 | """ 8 | 该函数的目的是将文本转换为对应的汉字数字id 9 | content:输入的文本 10 | word2id_dict:用于查找转换的字典 11 | """ 12 | idList = [] 13 | for word in content: # 遍历每一个汉字 14 | if word in word2id_dict: # 当刚文字在字典中时才进行转换,否则丢弃 15 | idList.append(word2id_dict[word]) 16 | return idList 17 | 18 | 19 | def generatorInfo(batch_size, seq_length, num_classes, file_name): 20 | """ 21 | batch_size:生成数据的batch size 22 | seq_length:输入文字序列长度 23 | num_classes:文本的类别数 24 | file_name:读取文件的路径 25 | """ 26 | # 读取词库文件 27 | with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file: 28 | vocabulary_list = [k.strip() for k in file.readlines()] 29 | word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)]) 30 | 31 | # 读取文本文件 32 | with open(file_name, encoding='utf-8') as file: 33 | line_list = [k.strip() for k in file.readlines()] 34 | data_label_list = [] # 创建数据标签文件 35 | data_content_list = [] # 创建数据文本文件 36 | for k in line_list: 37 | t = k.split(maxsplit=1) 38 | data_label_list.append(t[0]) 39 | data_content_list.append(t[1]) 40 | 41 | data_id_list = [content2idList(content, word2id_dict) for content in data_content_list] # 将文本数据转换拿为数字序列 42 | # 将list数据类型转换为ndarray数据类型,并按照seq_length长度去统一化文本序列长度, 43 | # 若长度超过设定值将其截断保留后半部分,若长度不足前面补0 44 | data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre') 45 | labelEncoder = LabelEncoder() 46 | data_y = labelEncoder.fit_transform(data_label_list) # 将文字标签转为数字标签 47 | data_Y = keras.utils.to_categorical(data_y, num_classes) # 将数字标签转为one-hot标签 48 | 49 | while True: 50 | selected_index = random.sample(list(range(len(data_y))), k=batch_size) # 按照数据集合的长度随机抽取batch_size个数据的index 51 | batch_X = data_X[selected_index] # 随机抽取的文本信息(数字化序列) 52 | batch_Y = data_Y[selected_index] # 随机抽取的标签信息(one-hot编码) 53 | yield (batch_X, batch_Y) 54 | 55 | -------------------------------------------------------------------------------- /others_project/textcnnKeras/data_link.txt: -------------------------------------------------------------------------------- 1 | baidupan_url = "https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg" 2 | extract_code = "8cwv" -------------------------------------------------------------------------------- /others_project/textcnnKeras/main.py: -------------------------------------------------------------------------------- 1 | from models import text_cnn, simpleNet, text_cnn_V2 2 | from dataGenerator import generatorInfo 3 | from tensorflow import keras 4 | 5 | vocab_size = 5000 6 | seq_length = 600 7 | embedding_dim = 64 8 | num_classes = 10 9 | trainBatchSize = 64 10 | evalBatchSize = 200 11 | steps_per_epoch = 50000 // trainBatchSize 12 | epoch = 2 13 | workers = 4 14 | logdir = './log/' 15 | trainFileName = './cnews/cnews.train.txt' 16 | evalFileName = './cnews/cnews.test.txt' 17 | 18 | model = text_cnn(seq_length=seq_length, 19 | vocab_size=vocab_size, 20 | embedding_dim=embedding_dim, 21 | num_cla=num_classes, 22 | kernelNum=64) 23 | 24 | trainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName) 25 | evalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName) 26 | 27 | 28 | def lrSchedule(epoch): 29 | lr = keras.backend.get_value(model.optimizer.lr) 30 | if epoch % 1 == 0 and epoch != 0: 31 | lr = lr * 0.5 32 | return lr 33 | 34 | 35 | log = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500) 36 | reduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1) 37 | 38 | model.fit_generator(generator=trainGenerator, 39 | steps_per_epoch=steps_per_epoch, 40 | epochs=epoch, 41 | validation_data=evalGenerator, 42 | validation_steps=10, 43 | workers=1, 44 | callbacks=[log, reduceLr]) 45 | model.save_weights(logdir + 'train_weight.h5') 46 | -------------------------------------------------------------------------------- /pytorch_classification/ConfusionMatrix/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/ConvNeXt/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/ConvNeXt/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/ConvNeXt/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import convnext_tiny as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | print(f"using {device} device.") 15 | 16 | num_classes = 5 17 | img_size = 224 18 | data_transform = transforms.Compose( 19 | [transforms.Resize(int(img_size * 1.14)), 20 | transforms.CenterCrop(img_size), 21 | transforms.ToTensor(), 22 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 23 | 24 | # load image 25 | img_path = "../tulip.jpg" 26 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 27 | img = Image.open(img_path) 28 | plt.imshow(img) 29 | # [N, C, H, W] 30 | img = data_transform(img) 31 | # expand batch dimension 32 | img = torch.unsqueeze(img, dim=0) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | with open(json_path, "r") as f: 39 | class_indict = json.load(f) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes).to(device) 43 | # load model weights 44 | model_weight_path = "./weights/best_model.pth" 45 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 46 | model.eval() 47 | with torch.no_grad(): 48 | # predict class 49 | output = torch.squeeze(model(img.to(device))).cpu() 50 | predict = torch.softmax(output, dim=0) 51 | predict_cla = torch.argmax(predict).numpy() 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 54 | predict[predict_cla].numpy()) 55 | plt.title(print_res) 56 | for i in range(len(predict)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | predict[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /pytorch_classification/MobileViT/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/MobileViT/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/MobileViT/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import mobile_vit_xx_small as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | img_size = 224 16 | data_transform = transforms.Compose( 17 | [transforms.Resize(int(img_size * 1.14)), 18 | transforms.CenterCrop(img_size), 19 | transforms.ToTensor(), 20 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 21 | 22 | # load image 23 | img_path = "../tulip.jpg" 24 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 25 | img = Image.open(img_path) 26 | plt.imshow(img) 27 | # [N, C, H, W] 28 | img = data_transform(img) 29 | # expand batch dimension 30 | img = torch.unsqueeze(img, dim=0) 31 | 32 | # read class_indict 33 | json_path = './class_indices.json' 34 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 35 | 36 | with open(json_path, "r") as f: 37 | class_indict = json.load(f) 38 | 39 | # create model 40 | model = create_model(num_classes=5).to(device) 41 | # load model weights 42 | model_weight_path = "./weights/best_model.pth" 43 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 44 | model.eval() 45 | with torch.no_grad(): 46 | # predict class 47 | output = torch.squeeze(model(img.to(device))).cpu() 48 | predict = torch.softmax(output, dim=0) 49 | predict_cla = torch.argmax(predict).numpy() 50 | 51 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 52 | predict[predict_cla].numpy()) 53 | plt.title(print_res) 54 | for i in range(len(predict)): 55 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 56 | predict[i].numpy())) 57 | plt.show() 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_classification/MobileViT/unfold_test.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | 4 | batch_size = 8 5 | in_channels = 32 6 | patch_h = 2 7 | patch_w = 2 8 | num_patch_h = 16 9 | num_patch_w = 16 10 | num_patches = num_patch_h * num_patch_w 11 | patch_area = patch_h * patch_w 12 | 13 | 14 | def official(x: torch.Tensor): 15 | # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w] 16 | x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w) 17 | # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w] 18 | x = x.transpose(1, 2) 19 | # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w 20 | x = x.reshape(batch_size, in_channels, num_patches, patch_area) 21 | # [B, C, N, P] -> [B, P, N, C] 22 | x = x.transpose(1, 3) 23 | # [B, P, N, C] -> [BP, N, C] 24 | x = x.reshape(batch_size * patch_area, num_patches, -1) 25 | 26 | return x 27 | 28 | 29 | def my_self(x: torch.Tensor): 30 | # [B, C, H, W] -> [B, C, n_h, p_h, n_w, p_w] 31 | x = x.reshape(batch_size, in_channels, num_patch_h, patch_h, num_patch_w, patch_w) 32 | # [B, C, n_h, p_h, n_w, p_w] -> [B, C, n_h, n_w, p_h, p_w] 33 | x = x.transpose(3, 4) 34 | # [B, C, n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w 35 | x = x.reshape(batch_size, in_channels, num_patches, patch_area) 36 | # [B, C, N, P] -> [B, P, N, C] 37 | x = x.transpose(1, 3) 38 | # [B, P, N, C] -> [BP, N, C] 39 | x = x.reshape(batch_size * patch_area, num_patches, -1) 40 | 41 | return x 42 | 43 | 44 | if __name__ == '__main__': 45 | t = torch.randn(batch_size, in_channels, num_patch_h * patch_h, num_patch_w * patch_w) 46 | print(torch.equal(official(t), my_self(t))) 47 | 48 | t1 = time.time() 49 | for _ in range(1000): 50 | official(t) 51 | print(f"official time: {time.time() - t1}") 52 | 53 | t1 = time.time() 54 | for _ in range(1000): 55 | my_self(t) 56 | print(f"self time: {time.time() - t1}") 57 | -------------------------------------------------------------------------------- /pytorch_classification/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹存放使用pytorch实现的代码版本 2 | **model.py**: 是模型文件 3 | **train.py**: 是调用模型训练的文件 4 | **predict.py**: 是调用模型进行预测的文件 5 | **class_indices.json**: 是训练数据集对应的标签文件 6 | 7 | ------ 8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 -------------------------------------------------------------------------------- /pytorch_classification/Test10_regnet/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA 密码: 8lmu 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/Test10_regnet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test10_regnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import create_regnet 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = create_regnet(model_name="RegNetY_400MF", num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-29.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ 密码: 5gu1 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test11_efficientnetV2/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test1_official_demo/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(3, 16, 5) 9 | self.pool1 = nn.MaxPool2d(2, 2) 10 | self.conv2 = nn.Conv2d(16, 32, 5) 11 | self.pool2 = nn.MaxPool2d(2, 2) 12 | self.fc1 = nn.Linear(32*5*5, 120) 13 | self.fc2 = nn.Linear(120, 84) 14 | self.fc3 = nn.Linear(84, 10) 15 | 16 | def forward(self, x): 17 | x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28) 18 | x = self.pool1(x) # output(16, 14, 14) 19 | x = F.relu(self.conv2(x)) # output(32, 10, 10) 20 | x = self.pool2(x) # output(32, 5, 5) 21 | x = x.view(-1, 32*5*5) # output(32*5*5) 22 | x = F.relu(self.fc1(x)) # output(120) 23 | x = F.relu(self.fc2(x)) # output(84) 24 | x = self.fc3(x) # output(10) 25 | return x 26 | 27 | 28 | -------------------------------------------------------------------------------- /pytorch_classification/Test1_official_demo/predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | from PIL import Image 4 | 5 | from model import LeNet 6 | 7 | 8 | def main(): 9 | transform = transforms.Compose( 10 | [transforms.Resize((32, 32)), 11 | transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | classes = ('plane', 'car', 'bird', 'cat', 15 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 16 | 17 | net = LeNet() 18 | net.load_state_dict(torch.load('Lenet.pth')) 19 | 20 | im = Image.open('1.jpg') 21 | im = transform(im) # [C, H, W] 22 | im = torch.unsqueeze(im, dim=0) # [N, C, H, W] 23 | 24 | with torch.no_grad(): 25 | outputs = net(im) 26 | predict = torch.max(outputs, dim=1)[1].numpy() 27 | print(classes[int(predict)]) 28 | 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /pytorch_classification/Test2_alexnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test2_alexnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import AlexNet 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize((224, 224)), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = AlexNet(num_classes=5).to(device) 40 | 41 | # load model weights 42 | weights_path = "./AlexNet.pth" 43 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 44 | model.load_state_dict(torch.load(weights_path)) 45 | 46 | model.eval() 47 | with torch.no_grad(): 48 | # predict class 49 | output = torch.squeeze(model(img.to(device))).cpu() 50 | predict = torch.softmax(output, dim=0) 51 | predict_cla = torch.argmax(predict).numpy() 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 54 | predict[predict_cla].numpy()) 55 | plt.title(print_res) 56 | for i in range(len(predict)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | predict[i].numpy())) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /pytorch_classification/Test3_vggnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test3_vggnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import vgg 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize((224, 224)), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | plt.imshow(img) 25 | # [N, C, H, W] 26 | img = data_transform(img) 27 | # expand batch dimension 28 | img = torch.unsqueeze(img, dim=0) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | with open(json_path, "r") as f: 35 | class_indict = json.load(f) 36 | 37 | # create model 38 | model = vgg(model_name="vgg16", num_classes=5).to(device) 39 | # load model weights 40 | weights_path = "./vgg16Net.pth" 41 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 42 | model.load_state_dict(torch.load(weights_path, map_location=device)) 43 | 44 | model.eval() 45 | with torch.no_grad(): 46 | # predict class 47 | output = torch.squeeze(model(img.to(device))).cpu() 48 | predict = torch.softmax(output, dim=0) 49 | predict_cla = torch.argmax(predict).numpy() 50 | 51 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 52 | predict[predict_cla].numpy()) 53 | plt.title(print_res) 54 | for i in range(len(predict)): 55 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 56 | predict[i].numpy())) 57 | plt.show() 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_classification/Test4_googlenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/README.md: -------------------------------------------------------------------------------- 1 | ## 文件结构: 2 | ``` 3 | ├── model.py: ResNet模型搭建 4 | ├── train.py: 训练脚本 5 | ├── predict.py: 单张图像预测脚本 6 | └── batch_predict.py: 批量图像预测脚本 7 | ``` -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/load_weights.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | from model import resnet34 5 | 6 | 7 | def main(): 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 9 | 10 | # load pretrain weights 11 | # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth 12 | model_weight_path = "./resnet34-pre.pth" 13 | assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path) 14 | 15 | # option1 16 | net = resnet34() 17 | net.load_state_dict(torch.load(model_weight_path, map_location=device)) 18 | # change fc layer structure 19 | in_channel = net.fc.in_features 20 | net.fc = nn.Linear(in_channel, 5) 21 | 22 | # option2 23 | # net = resnet34(num_classes=5) 24 | # pre_weights = torch.load(model_weight_path, map_location=device) 25 | # del_key = [] 26 | # for key, _ in pre_weights.items(): 27 | # if "fc" in key: 28 | # del_key.append(key) 29 | # 30 | # for key in del_key: 31 | # del pre_weights[key] 32 | # 33 | # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False) 34 | # print("[missing_keys]:", *missing_keys, sep="\n") 35 | # print("[unexpected_keys]:", *unexpected_keys, sep="\n") 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /pytorch_classification/Test5_resnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import resnet34 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = resnet34(num_classes=5).to(device) 40 | 41 | # load model weights 42 | weights_path = "./resNet34.pth" 43 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 44 | model.load_state_dict(torch.load(weights_path, map_location=device)) 45 | 46 | # prediction 47 | model.eval() 48 | with torch.no_grad(): 49 | # predict class 50 | output = torch.squeeze(model(img.to(device))).cpu() 51 | predict = torch.softmax(output, dim=0) 52 | predict_cla = torch.argmax(predict).numpy() 53 | 54 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 55 | predict[predict_cla].numpy()) 56 | plt.title(print_res) 57 | for i in range(len(predict)): 58 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 59 | predict[i].numpy())) 60 | plt.show() 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /pytorch_classification/Test6_mobilenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test6_mobilenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model_v2 import MobileNetV2 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = MobileNetV2(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./MobileNetV2.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test7_shufflenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import shufflenet_v2_x1_0 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = shufflenet_v2_x1_0(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-29.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test8_densenet/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/Test8_densenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test8_densenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import densenet121 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = densenet121(num_classes=5).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-3.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw 密码: 090i 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/Test9_efficientNet/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | tqdm==4.56.0 4 | torch>=1.7.1 5 | torchvision>=0.8.2 6 | -------------------------------------------------------------------------------- /pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from alexnet_model import AlexNet 3 | from resnet_model import resnet34 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from PIL import Image 7 | from torchvision import transforms 8 | 9 | data_transform = transforms.Compose( 10 | [transforms.Resize((224, 224)), 11 | transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | # data_transform = transforms.Compose( 15 | # [transforms.Resize(256), 16 | # transforms.CenterCrop(224), 17 | # transforms.ToTensor(), 18 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 19 | 20 | # create model 21 | model = AlexNet(num_classes=5) 22 | # model = resnet34(num_classes=5) 23 | # load model weights 24 | model_weight_path = "./AlexNet.pth" # "./resNet34.pth" 25 | model.load_state_dict(torch.load(model_weight_path)) 26 | print(model) 27 | 28 | # load image 29 | img = Image.open("../tulip.jpg") 30 | # [N, C, H, W] 31 | img = data_transform(img) 32 | # expand batch dimension 33 | img = torch.unsqueeze(img, dim=0) 34 | 35 | # forward 36 | out_put = model(img) 37 | for feature_map in out_put: 38 | # [N, C, H, W] -> [C, H, W] 39 | im = np.squeeze(feature_map.detach().numpy()) 40 | # [C, H, W] -> [H, W, C] 41 | im = np.transpose(im, [1, 2, 0]) 42 | 43 | # show top 12 feature maps 44 | plt.figure() 45 | for i in range(12): 46 | ax = plt.subplot(3, 4, i+1) 47 | # [H, W, C] 48 | plt.imshow(im[:, :, i], cmap='gray') 49 | plt.show() 50 | 51 | -------------------------------------------------------------------------------- /pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from alexnet_model import AlexNet 3 | from resnet_model import resnet34 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | # create model 9 | model = AlexNet(num_classes=5) 10 | # model = resnet34(num_classes=5) 11 | # load model weights 12 | model_weight_path = "./AlexNet.pth" # "resNet34.pth" 13 | model.load_state_dict(torch.load(model_weight_path)) 14 | print(model) 15 | 16 | weights_keys = model.state_dict().keys() 17 | for key in weights_keys: 18 | # remove num_batches_tracked para(in bn) 19 | if "num_batches_tracked" in key: 20 | continue 21 | # [kernel_number, kernel_channel, kernel_height, kernel_width] 22 | weight_t = model.state_dict()[key].numpy() 23 | 24 | # read a kernel information 25 | # k = weight_t[0, :, :, :] 26 | 27 | # calculate mean, std, min, max 28 | weight_mean = weight_t.mean() 29 | weight_std = weight_t.std(ddof=1) 30 | weight_min = weight_t.min() 31 | weight_max = weight_t.max() 32 | print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, 33 | weight_std, 34 | weight_max, 35 | weight_min)) 36 | 37 | # plot hist image 38 | plt.close() 39 | weight_vec = np.reshape(weight_t, [-1]) 40 | plt.hist(weight_vec, bins=50) 41 | plt.title(key) 42 | plt.show() 43 | 44 | -------------------------------------------------------------------------------- /pytorch_classification/custom_dataset/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from torchvision import transforms 5 | 6 | from my_dataset import MyDataSet 7 | from utils import read_split_data, plot_data_loader_image 8 | 9 | # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz 10 | root = "/home/wz/my_github/data_set/flower_data/flower_photos" # 数据集所在根目录 11 | 12 | 13 | def main(): 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | print("using {} device.".format(device)) 16 | 17 | train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root) 18 | 19 | data_transform = { 20 | "train": transforms.Compose([transforms.RandomResizedCrop(224), 21 | transforms.RandomHorizontalFlip(), 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), 24 | "val": transforms.Compose([transforms.Resize(256), 25 | transforms.CenterCrop(224), 26 | transforms.ToTensor(), 27 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} 28 | 29 | train_data_set = MyDataSet(images_path=train_images_path, 30 | images_class=train_images_label, 31 | transform=data_transform["train"]) 32 | 33 | batch_size = 8 34 | nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers 35 | print('Using {} dataloader workers'.format(nw)) 36 | train_loader = torch.utils.data.DataLoader(train_data_set, 37 | batch_size=batch_size, 38 | shuffle=True, 39 | num_workers=nw, 40 | collate_fn=train_data_set.collate_fn) 41 | 42 | # plot_data_loader_image(train_loader) 43 | 44 | for step, data in enumerate(train_loader): 45 | images, labels = data 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /pytorch_classification/custom_dataset/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | 39 | -------------------------------------------------------------------------------- /pytorch_classification/grad_cam/README.md: -------------------------------------------------------------------------------- 1 | ## Grad-CAM 2 | - Original Impl: [https://github.com/jacobgil/pytorch-grad-cam](https://github.com/jacobgil/pytorch-grad-cam) 3 | - Grad-CAM简介: [https://b23.tv/1kccjmb](https://b23.tv/1kccjmb) 4 | - 使用Pytorch实现Grad-CAM并绘制热力图: [https://b23.tv/n1e60vN](https://b23.tv/n1e60vN) 5 | 6 | ## 使用流程(替换成自己的网络) 7 | 1. 将创建模型部分代码替换成自己创建模型的代码,并载入自己训练好的权重 8 | 2. 根据自己网络设置合适的`target_layers` 9 | 3. 根据自己的网络设置合适的预处理方法 10 | 4. 将要预测的图片路径赋值给`img_path` 11 | 5. 将感兴趣的类别id赋值给`target_category` 12 | 13 | -------------------------------------------------------------------------------- /pytorch_classification/grad_cam/both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_classification/grad_cam/both.png -------------------------------------------------------------------------------- /pytorch_classification/grad_cam/main_cnn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | from torchvision import models 7 | from torchvision import transforms 8 | from utils import GradCAM, show_cam_on_image, center_crop_img 9 | 10 | 11 | def main(): 12 | model = models.mobilenet_v3_large(pretrained=True) 13 | target_layers = [model.features[-1]] 14 | 15 | # model = models.vgg16(pretrained=True) 16 | # target_layers = [model.features] 17 | 18 | # model = models.resnet34(pretrained=True) 19 | # target_layers = [model.layer4] 20 | 21 | # model = models.regnet_y_800mf(pretrained=True) 22 | # target_layers = [model.trunk_output] 23 | 24 | # model = models.efficientnet_b0(pretrained=True) 25 | # target_layers = [model.features] 26 | 27 | data_transform = transforms.Compose([transforms.ToTensor(), 28 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 29 | # load image 30 | img_path = "both.png" 31 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 32 | img = Image.open(img_path).convert('RGB') 33 | img = np.array(img, dtype=np.uint8) 34 | # img = center_crop_img(img, 224) 35 | 36 | # [C, H, W] 37 | img_tensor = data_transform(img) 38 | # expand batch dimension 39 | # [C, H, W] -> [N, C, H, W] 40 | input_tensor = torch.unsqueeze(img_tensor, dim=0) 41 | 42 | cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False) 43 | target_category = 281 # tabby, tabby cat 44 | # target_category = 254 # pug, pug-dog 45 | 46 | grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category) 47 | 48 | grayscale_cam = grayscale_cam[0, :] 49 | visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255., 50 | grayscale_cam, 51 | use_rgb=True) 52 | plt.imshow(visualization) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/README.md: -------------------------------------------------------------------------------- 1 | ## download mini-imagenet 2 | link: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ) password: hl31 3 | 4 | ## dataset path structure 5 | ``` 6 | ├── mini-imagenet: total 100 classes, 60000 images 7 | ├── images: 60000 images 8 | ├── train.csv: 64 classes, 38400 images 9 | ├── val.csv: 16 classes, 9600 images 10 | └── test.csv: 20 classes, 12000 images 11 | ``` -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/multi_train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_eval_utils import train_one_epoch, evaluate 2 | from .distributed_utils import init_distributed_mode, dist, cleanup 3 | -------------------------------------------------------------------------------- /pytorch_classification/mini_imagenet/my_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from PIL import Image 4 | import pandas as pd 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | class MyDataSet(Dataset): 10 | """自定义数据集""" 11 | 12 | def __init__(self, 13 | root_dir: str, 14 | csv_name: str, 15 | json_path: str, 16 | transform=None): 17 | images_dir = os.path.join(root_dir, "images") 18 | assert os.path.exists(images_dir), "dir:'{}' not found.".format(images_dir) 19 | 20 | assert os.path.exists(json_path), "file:'{}' not found.".format(json_path) 21 | self.label_dict = json.load(open(json_path, "r")) 22 | 23 | csv_path = os.path.join(root_dir, csv_name) 24 | assert os.path.exists(csv_path), "file:'{}' not found.".format(csv_path) 25 | csv_data = pd.read_csv(csv_path) 26 | self.total_num = csv_data.shape[0] 27 | self.img_paths = [os.path.join(images_dir, i)for i in csv_data["filename"].values] 28 | self.img_label = [self.label_dict[i][0] for i in csv_data["label"].values] 29 | self.labels = set(csv_data["label"].values) 30 | 31 | self.transform = transform 32 | 33 | def __len__(self): 34 | return self.total_num 35 | 36 | def __getitem__(self, item): 37 | img = Image.open(self.img_paths[item]) 38 | # RGB为彩色图片,L为灰度图片 39 | if img.mode != 'RGB': 40 | raise ValueError("image: {} isn't RGB mode.".format(self.img_paths[item])) 41 | label = self.img_label[item] 42 | 43 | if self.transform is not None: 44 | img = self.transform(img) 45 | 46 | return img, label 47 | 48 | @staticmethod 49 | def collate_fn(batch): 50 | # 官方实现的default_collate可以参考 51 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 52 | images, labels = tuple(zip(*batch)) 53 | 54 | images = torch.stack(images, dim=0) 55 | labels = torch.as_tensor(labels) 56 | return images, labels 57 | -------------------------------------------------------------------------------- /pytorch_classification/model_complexity/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from fvcore.nn import FlopCountAnalysis, parameter_count_table 3 | from prettytable import PrettyTable 4 | from model import efficientnetv2_s 5 | 6 | 7 | def main(): 8 | model = efficientnetv2_s() 9 | 10 | # option1 11 | for name, para in model.named_parameters(): 12 | # 除head外,其他权重全部冻结 13 | if "head" not in name: 14 | para.requires_grad_(False) 15 | else: 16 | print("training {}".format(name)) 17 | 18 | complexity = model.complexity(224, 224, 3) 19 | table = PrettyTable() 20 | table.field_names = ["params", "freeze-params", "train-params", "FLOPs", "acts"] 21 | table.add_row([complexity["params"], 22 | complexity["freeze"], 23 | complexity["params"] - complexity["freeze"], 24 | complexity["flops"], 25 | complexity["acts"]]) 26 | print(table) 27 | 28 | # option2 29 | tensor = (torch.rand(1, 3, 224, 224),) 30 | flops = FlopCountAnalysis(model, tensor) 31 | print(flops.total()) 32 | 33 | print(parameter_count_table(model)) 34 | 35 | 36 | if __name__ == '__main__': 37 | main() 38 | -------------------------------------------------------------------------------- /pytorch_classification/swin_transformer/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/swin_transformer/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/swin_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from model import swin_tiny_patch4_window7_224 as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | img_size = 224 16 | data_transform = transforms.Compose( 17 | [transforms.Resize(int(img_size * 1.14)), 18 | transforms.CenterCrop(img_size), 19 | transforms.ToTensor(), 20 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 21 | 22 | # load image 23 | img_path = "../tulip.jpg" 24 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 25 | img = Image.open(img_path) 26 | plt.imshow(img) 27 | # [N, C, H, W] 28 | img = data_transform(img) 29 | # expand batch dimension 30 | img = torch.unsqueeze(img, dim=0) 31 | 32 | # read class_indict 33 | json_path = './class_indices.json' 34 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 35 | 36 | with open(json_path, "r") as f: 37 | class_indict = json.load(f) 38 | 39 | # create model 40 | model = create_model(num_classes=5).to(device) 41 | # load model weights 42 | model_weight_path = "./weights/model-9.pth" 43 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 44 | model.eval() 45 | with torch.no_grad(): 46 | # predict class 47 | output = torch.squeeze(model(img.to(device))).cpu() 48 | predict = torch.softmax(output, dim=0) 49 | predict_cla = torch.argmax(predict).numpy() 50 | 51 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 52 | predict[predict_cla].numpy()) 53 | plt.title(print_res) 54 | for i in range(len(predict)): 55 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 56 | predict[i].numpy())) 57 | plt.show() 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/my_dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from PIL import Image 3 | import torch 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class MyDataSet(Dataset): 8 | """自定义数据集""" 9 | 10 | def __init__(self, images_path: list, images_class: list, transform=None): 11 | self.images_path = images_path 12 | self.images_class = images_class 13 | self.transform = transform 14 | 15 | delete_img = [] 16 | for index, img_path in tqdm(enumerate(images_path)): 17 | img = Image.open(img_path) 18 | w, h = img.size 19 | ratio = w / h 20 | if ratio > 10 or ratio < 0.1: 21 | delete_img.append(index) 22 | # print(img_path, ratio) 23 | 24 | for index in delete_img[::-1]: 25 | self.images_path.pop(index) 26 | self.images_class.pop(index) 27 | 28 | def __len__(self): 29 | return len(self.images_path) 30 | 31 | def __getitem__(self, item): 32 | img = Image.open(self.images_path[item]) 33 | # RGB为彩色图片,L为灰度图片 34 | if img.mode != 'RGB': 35 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 36 | label = self.images_class[item] 37 | 38 | if self.transform is not None: 39 | img = self.transform(img) 40 | 41 | return img, label 42 | 43 | @staticmethod 44 | def collate_fn(batch): 45 | # 官方实现的default_collate可以参考 46 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 47 | images, labels = tuple(zip(*batch)) 48 | 49 | images = torch.stack(images, dim=0) 50 | labels = torch.as_tensor(labels) 51 | return images, labels 52 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/requirements.txt: -------------------------------------------------------------------------------- 1 | torchvision==0.7.0 2 | tqdm==4.42.1 3 | matplotlib==3.2.1 4 | torch==1.13.1 5 | Pillow 6 | tensorboard 7 | -------------------------------------------------------------------------------- /pytorch_classification/tensorboard_test/train_eval_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from tqdm import tqdm 4 | import torch 5 | 6 | 7 | def train_one_epoch(model, optimizer, data_loader, device, epoch): 8 | model.train() 9 | loss_function = torch.nn.CrossEntropyLoss() 10 | mean_loss = torch.zeros(1).to(device) 11 | optimizer.zero_grad() 12 | 13 | data_loader = tqdm(data_loader, file=sys.stdout) 14 | for step, data in enumerate(data_loader): 15 | images, labels = data 16 | pred = model(images.to(device)) 17 | 18 | loss = loss_function(pred, labels.to(device)) 19 | loss.backward() 20 | mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses 21 | 22 | # 打印平均loss 23 | data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) 24 | 25 | if not torch.isfinite(loss): 26 | print('WARNING: non-finite loss, ending training ', loss) 27 | sys.exit(1) 28 | 29 | optimizer.step() 30 | optimizer.zero_grad() 31 | 32 | return mean_loss.item() 33 | 34 | 35 | @torch.no_grad() 36 | def evaluate(model, data_loader, device): 37 | model.eval() 38 | 39 | # 用于存储预测正确的样本个数 40 | sum_num = torch.zeros(1).to(device) 41 | # 统计验证集样本总数目 42 | num_samples = len(data_loader.dataset) 43 | 44 | # 打印验证进度 45 | data_loader = tqdm(data_loader, desc="validation...", file=sys.stdout) 46 | 47 | for step, data in enumerate(data_loader): 48 | images, labels = data 49 | pred = model(images.to(device)) 50 | pred = torch.max(pred, dim=1)[1] 51 | sum_num += torch.eq(pred, labels.to(device)).sum() 52 | 53 | # 计算预测正确的比例 54 | acc = sum_num.item() / num_samples 55 | 56 | return acc 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/README.md: -------------------------------------------------------------------------------- 1 | ## 多GPU启动指令 2 | - 如果要使用```train_multi_gpu_using_launch.py```脚本,使用以下指令启动 3 | - ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py``` 4 | - 其中```nproc_per_node```为并行GPU的数量 5 | - 如果要指定使用某几块GPU可使用如下指令,例如使用第1块和第4块GPU进行训练: 6 | - ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py``` 7 | 8 | ----- 9 | 10 | - 如果要使用```train_multi_gpu_using_spawn.py```脚本,使用以下指令启动 11 | - ```python train_multi_gpu_using_spawn.py``` 12 | 13 | ## 训练时间对比 14 | ![training time](training_time.png) 15 | 16 | ## 是否使用SyncBatchNorm 17 | ![syncbn](syncbn.png) 18 | 19 | ## 单GPU与多GPU训练曲线 20 | ![accuracy](accuracy.png) 21 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_classification/train_multi_GPU/accuracy.png -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.distributed as dist 5 | 6 | 7 | def init_distributed_mode(args): 8 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 9 | args.rank = int(os.environ["RANK"]) 10 | args.world_size = int(os.environ['WORLD_SIZE']) 11 | args.gpu = int(os.environ['LOCAL_RANK']) 12 | elif 'SLURM_PROCID' in os.environ: 13 | args.rank = int(os.environ['SLURM_PROCID']) 14 | args.gpu = args.rank % torch.cuda.device_count() 15 | else: 16 | print('Not using distributed mode') 17 | args.distributed = False 18 | return 19 | 20 | args.distributed = True 21 | 22 | torch.cuda.set_device(args.gpu) 23 | args.dist_backend = 'nccl' # 通信后端,nvidia GPU推荐使用NCCL 24 | print('| distributed init (rank {}): {}'.format( 25 | args.rank, args.dist_url), flush=True) 26 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 27 | world_size=args.world_size, rank=args.rank) 28 | dist.barrier() 29 | 30 | 31 | def cleanup(): 32 | dist.destroy_process_group() 33 | 34 | 35 | def is_dist_avail_and_initialized(): 36 | """检查是否支持分布式环境""" 37 | if not dist.is_available(): 38 | return False 39 | if not dist.is_initialized(): 40 | return False 41 | return True 42 | 43 | 44 | def get_world_size(): 45 | if not is_dist_avail_and_initialized(): 46 | return 1 47 | return dist.get_world_size() 48 | 49 | 50 | def get_rank(): 51 | if not is_dist_avail_and_initialized(): 52 | return 0 53 | return dist.get_rank() 54 | 55 | 56 | def is_main_process(): 57 | return get_rank() == 0 58 | 59 | 60 | def reduce_value(value, average=True): 61 | world_size = get_world_size() 62 | if world_size < 2: # 单GPU的情况 63 | return value 64 | 65 | with torch.no_grad(): 66 | dist.all_reduce(value) 67 | if average: 68 | value /= world_size 69 | 70 | return value 71 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from tqdm import tqdm 4 | import torch 5 | 6 | from multi_train_utils.distributed_utils import reduce_value, is_main_process 7 | 8 | 9 | def train_one_epoch(model, optimizer, data_loader, device, epoch): 10 | model.train() 11 | loss_function = torch.nn.CrossEntropyLoss() 12 | mean_loss = torch.zeros(1).to(device) 13 | optimizer.zero_grad() 14 | 15 | # 在进程0中打印训练进度 16 | if is_main_process(): 17 | data_loader = tqdm(data_loader, file=sys.stdout) 18 | 19 | for step, data in enumerate(data_loader): 20 | images, labels = data 21 | 22 | pred = model(images.to(device)) 23 | 24 | loss = loss_function(pred, labels.to(device)) 25 | loss.backward() 26 | loss = reduce_value(loss, average=True) 27 | mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses 28 | 29 | # 在进程0中打印平均loss 30 | if is_main_process(): 31 | data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) 32 | 33 | if not torch.isfinite(loss): 34 | print('WARNING: non-finite loss, ending training ', loss) 35 | sys.exit(1) 36 | 37 | optimizer.step() 38 | optimizer.zero_grad() 39 | 40 | # 等待所有进程计算完毕 41 | if device != torch.device("cpu"): 42 | torch.cuda.synchronize(device) 43 | 44 | return mean_loss.item() 45 | 46 | 47 | @torch.no_grad() 48 | def evaluate(model, data_loader, device): 49 | model.eval() 50 | 51 | # 用于存储预测正确的样本个数 52 | sum_num = torch.zeros(1).to(device) 53 | 54 | # 在进程0中打印验证进度 55 | if is_main_process(): 56 | data_loader = tqdm(data_loader, file=sys.stdout) 57 | 58 | for step, data in enumerate(data_loader): 59 | images, labels = data 60 | pred = model(images.to(device)) 61 | pred = torch.max(pred, dim=1)[1] 62 | sum_num += torch.eq(pred, labels.to(device)).sum() 63 | 64 | # 等待所有进程计算完毕 65 | if device != torch.device("cpu"): 66 | torch.cuda.synchronize(device) 67 | 68 | sum_num = reduce_value(sum_num, average=False) 69 | 70 | return sum_num.item() 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.2.1 2 | tqdm==4.42.1 3 | torchvision==0.7.0 4 | torch==1.13.1 5 | -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_classification/train_multi_GPU/runs/Nov07_18-58-35_wz/events.out.tfevents.1604746311.localhost.41577.0 -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/syncbn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_classification/train_multi_GPU/syncbn.png -------------------------------------------------------------------------------- /pytorch_classification/train_multi_GPU/training_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_classification/train_multi_GPU/training_time.png -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/README.md: -------------------------------------------------------------------------------- 1 | ## 代码使用简介 2 | 3 | 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 4 | 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 5 | 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 6 | 3. 下载预训练权重,在`vit_model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 7 | 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 8 | 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 9 | 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 10 | 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 11 | 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 12 | 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 13 | -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from fvcore.nn import FlopCountAnalysis 3 | 4 | from vit_model import Attention 5 | 6 | 7 | def main(): 8 | # Self-Attention 9 | a1 = Attention(dim=512, num_heads=1) 10 | a1.proj = torch.nn.Identity() # remove Wo 11 | 12 | # Multi-Head Attention 13 | a2 = Attention(dim=512, num_heads=8) 14 | 15 | # [batch_size, num_tokens, total_embed_dim] 16 | t = (torch.rand(32, 1024, 512),) 17 | 18 | flops1 = FlopCountAnalysis(a1, t) 19 | print("Self-Attention FLOPs:", flops1.total()) 20 | 21 | flops2 = FlopCountAnalysis(a2, t) 22 | print("Multi-Head Attention FLOPs:", flops2.total()) 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | 28 | -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/my_dataset.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | from torch.utils.data import Dataset 4 | 5 | 6 | class MyDataSet(Dataset): 7 | """自定义数据集""" 8 | 9 | def __init__(self, images_path: list, images_class: list, transform=None): 10 | self.images_path = images_path 11 | self.images_class = images_class 12 | self.transform = transform 13 | 14 | def __len__(self): 15 | return len(self.images_path) 16 | 17 | def __getitem__(self, item): 18 | img = Image.open(self.images_path[item]) 19 | # RGB为彩色图片,L为灰度图片 20 | if img.mode != 'RGB': 21 | raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) 22 | label = self.images_class[item] 23 | 24 | if self.transform is not None: 25 | img = self.transform(img) 26 | 27 | return img, label 28 | 29 | @staticmethod 30 | def collate_fn(batch): 31 | # 官方实现的default_collate可以参考 32 | # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py 33 | images, labels = tuple(zip(*batch)) 34 | 35 | images = torch.stack(images, dim=0) 36 | labels = torch.as_tensor(labels) 37 | return images, labels 38 | -------------------------------------------------------------------------------- /pytorch_classification/vision_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | from PIL import Image 6 | from torchvision import transforms 7 | import matplotlib.pyplot as plt 8 | 9 | from vit_model import vit_base_patch16_224_in21k as create_model 10 | 11 | 12 | def main(): 13 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 14 | 15 | data_transform = transforms.Compose( 16 | [transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) 20 | 21 | # load image 22 | img_path = "../tulip.jpg" 23 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 24 | img = Image.open(img_path) 25 | plt.imshow(img) 26 | # [N, C, H, W] 27 | img = data_transform(img) 28 | # expand batch dimension 29 | img = torch.unsqueeze(img, dim=0) 30 | 31 | # read class_indict 32 | json_path = './class_indices.json' 33 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 34 | 35 | with open(json_path, "r") as f: 36 | class_indict = json.load(f) 37 | 38 | # create model 39 | model = create_model(num_classes=5, has_logits=False).to(device) 40 | # load model weights 41 | model_weight_path = "./weights/model-9.pth" 42 | model.load_state_dict(torch.load(model_weight_path, map_location=device)) 43 | model.eval() 44 | with torch.no_grad(): 45 | # predict class 46 | output = torch.squeeze(model(img.to(device))).cpu() 47 | predict = torch.softmax(output, dim=0) 48 | predict_cla = torch.argmax(predict).numpy() 49 | 50 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], 51 | predict[predict_cla].numpy()) 52 | plt.title(print_res) 53 | for i in range(len(predict)): 54 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 55 | predict[i].numpy())) 56 | plt.show() 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/README.md: -------------------------------------------------------------------------------- 1 | # DeepPose 2 | ## 对应论文 3 | 论文名称:`DeepPose: Human Pose Estimation via Deep Neural Networks` 4 | 论文arxiv链接:[https://arxiv.org/abs/1312.4659](https://arxiv.org/abs/1312.4659) 5 | 6 | ## 开发环境 7 | 开发环境主要信息如下,其他Python依赖详情可见`requirements.txt`文件 8 | - Python3.10 9 | - torch2.0.1+cu118(建议大于等于此版本) 10 | - torchvision0.15.2+cu118(建议大于等于此版本) 11 | 12 | ## 训练数据集准备 13 | 该项目采用的训练数据是WFLW数据集(人脸98点检测),官方链接:[https://wywu.github.io/projects/LAB/WFLW.html](https://wywu.github.io/projects/LAB/WFLW.html) 14 | 15 | 在官方网页下载数据集后解压并组织成如下目录形式: 16 | ``` 17 | WFLW 18 | ├── WFLW_annotations 19 | │ ├── list_98pt_rect_attr_train_test 20 | │ └── list_98pt_test 21 | └── WFLW_images 22 | ├── 0--Parade 23 | ├── 1--Handshaking 24 | ├── 10--People_Marching 25 | ├── 11--Meeting 26 | ├── 12--Group 27 | └── ...... 28 | ``` 29 | 30 | ## 预训练权重准备 31 | 由于该项目默认使用的backbone是torchvision中的resnet50,在实例化模型时会自动下载在imagenet上的预训练权重。 32 | - 若训练环境可正常联网,则会自动下载预训练权重 33 | - 若训练环境无法正常链接网络,可预先在联网的机器上手动下载,下载链接:[https://download.pytorch.org/models/resnet50-11ad3fa6.pth](https://download.pytorch.org/models/resnet50-11ad3fa6.pth) 下载完成后将权重拷贝至训练服务器的`~/.cache/torch/hub/checkpoints`目录下即可 34 | 35 | ## 启动训练 36 | 将训练脚本中的`--dataset_dir`设置成自己构建的`WFLW`数据集绝对路径,例如`/home/wz/datasets/WFLW` 37 | ### 单卡训练 38 | 使用`train.py`脚本: 39 | ```bash 40 | python train.py 41 | ``` 42 | ### 多卡训练 43 | 使用`train_multi_GPU.py`脚本: 44 | ``` 45 | torchrun --nproc_per_node=8 train_multi_GPU.py 46 | ``` 47 | 若要单独指定使用某些卡可在启动指令前加入`CUDA_VISIBLE_DEVICES`参数,例如: 48 | ``` 49 | CUDA_VISIBLE_DEVICES=4,5,6,7 torchrun --nproc_per_node=4 train_multi_GPU.py 50 | ``` 51 | 52 | ## 训练好的权重下载地址 53 | 若没有训练条件或者只想简单体验下,可使用本人训练好的模型权重(包含optimizer等信息故文件会略大),该权重在WFLW验证集上的NME指标为`0.048`,百度网盘下载地址:[https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw](https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw) 54 | 提取码:8fux 55 | 56 | 下载完成后在当前项目下创建一个`weights`文件夹,并将权重放置该文件夹内。 57 | 58 | ## 测试图片 59 | 可参考`predict.py`文件,将`img_path`设置成自己要预测的人脸图片(注意这里只支持单人脸的关键点检测,故需要提供单独的人脸图片,具体使用时可配合一个人脸检测器联合使用),例如输入图片: 60 | 61 | ![test.jpg](./test_img.jpg) 62 | 63 | 网络预测可视化结果为: 64 | 65 | ![predict.jpg](./predict.jpg) 66 | 67 | ## 导出ONNX模型(可选) 68 | 若需要导出ONNX模型可使用`export_onnx.py`脚本。 -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/export_onnx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from model import create_deep_pose_model 4 | 5 | 6 | def main(): 7 | img_hw = [256, 256] 8 | num_keypoints = 98 9 | weights_path = "./weights/model_weights_209.pth" 10 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 11 | 12 | # create model 13 | model = create_deep_pose_model(num_keypoints=num_keypoints) 14 | 15 | # load model weights 16 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 17 | model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"]) 18 | model.to(device) 19 | 20 | model.eval() 21 | with torch.inference_mode(): 22 | x = torch.randn(size=(1, 3, img_hw[0], img_hw[1]), device=device) 23 | torch.onnx.export(model=model, 24 | args=(x,), 25 | f="deeppose.onnx") 26 | 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.models import resnet50, ResNet50_Weights 4 | 5 | 6 | def create_deep_pose_model(num_keypoints: int) -> nn.Module: 7 | res50 = resnet50(ResNet50_Weights.IMAGENET1K_V2) 8 | in_features = res50.fc.in_features 9 | res50.fc = nn.Linear(in_features=in_features, out_features=num_keypoints * 2) 10 | 11 | return res50 12 | 13 | 14 | if __name__ == '__main__': 15 | torch.manual_seed(1234) 16 | model = create_deep_pose_model(98) 17 | model.eval() 18 | with torch.inference_mode(): 19 | x = torch.randn(1, 3, 224, 224) 20 | res = model(x) 21 | print(res.shape) 22 | -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/predict.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_keypoint/DeepPose/predict.jpg -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | 7 | import transforms 8 | from model import create_deep_pose_model 9 | from utils import draw_keypoints 10 | 11 | 12 | def main(): 13 | img_hw = [256, 256] 14 | num_keypoints = 98 15 | img_path = "./test_img.jpg" 16 | weights_path = "./weights/model_weights_209.pth" 17 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 18 | 19 | transform = transforms.Compose([ 20 | transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw), 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 23 | ]) 24 | 25 | # load image 26 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 27 | img = np.array(Image.open(img_path)) 28 | h, w, c = img.shape 29 | target = {"box": [0, 0, w, h]} 30 | img_tensor, target = transform(img, target=target) 31 | # expand batch dimension 32 | img_tensor = img_tensor.unsqueeze(0) 33 | 34 | # create model 35 | model = create_deep_pose_model(num_keypoints=num_keypoints) 36 | 37 | # load model weights 38 | assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) 39 | model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"]) 40 | model.to(device) 41 | 42 | # prediction 43 | model.eval() 44 | with torch.inference_mode(): 45 | with torch.autocast(device_type=device.type): 46 | pred = torch.squeeze(model(img_tensor.to(device))).reshape([-1, 2]).cpu().numpy() 47 | 48 | wh_tensor = np.array(img_hw[::-1], dtype=np.float32).reshape([1, 2]) 49 | pred = pred * wh_tensor # rel coord to abs coord 50 | pred = transforms.affine_points_np(pred, target["m_inv"].numpy()) 51 | draw_keypoints(img, coordinate=pred, save_path="predict.jpg", radius=2) 52 | 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=2.0.1 2 | torchvision>=0.15.2 3 | opencv-python 4 | tqdm 5 | tensorboard -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/test_img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_keypoint/DeepPose/test_img.jpg -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def draw_keypoints(img: np.ndarray, coordinate: np.ndarray, save_path: str, radius: int = 3, is_rel: bool = False): 6 | coordinate_ = coordinate.copy() 7 | if is_rel: 8 | h, w, c = img.shape 9 | coordinate_[:, 0] *= w 10 | coordinate_[:, 1] *= h 11 | coordinate_ = coordinate_.astype(np.int64).tolist() 12 | 13 | img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 14 | for x, y in coordinate_: 15 | cv2.circle(img_bgr, center=(x, y), radius=radius, color=(255, 0, 0), thickness=-1) 16 | 17 | cv2.imwrite(save_path, img_bgr) 18 | -------------------------------------------------------------------------------- /pytorch_keypoint/DeepPose/wflw_horizontal_flip_indices.py: -------------------------------------------------------------------------------- 1 | wflw_flip_indices_dict = { 2 | 0: 32, 3 | 1: 31, 4 | 2: 30, 5 | 3: 29, 6 | 4: 28, 7 | 5: 27, 8 | 6: 26, 9 | 7: 25, 10 | 8: 24, 11 | 9: 23, 12 | 10: 22, 13 | 11: 21, 14 | 12: 20, 15 | 13: 19, 16 | 14: 18, 17 | 15: 17, 18 | 16: 16, 19 | 17: 15, 20 | 18: 14, 21 | 19: 13, 22 | 20: 12, 23 | 21: 11, 24 | 22: 10, 25 | 23: 9, 26 | 24: 8, 27 | 25: 7, 28 | 26: 6, 29 | 27: 5, 30 | 28: 4, 31 | 29: 3, 32 | 30: 2, 33 | 31: 1, 34 | 32: 0, 35 | 33: 46, 36 | 34: 45, 37 | 35: 44, 38 | 36: 43, 39 | 37: 42, 40 | 38: 50, 41 | 39: 49, 42 | 40: 48, 43 | 41: 47, 44 | 42: 37, 45 | 43: 36, 46 | 44: 35, 47 | 45: 34, 48 | 46: 33, 49 | 47: 41, 50 | 48: 40, 51 | 49: 39, 52 | 50: 38, 53 | 51: 51, 54 | 52: 52, 55 | 53: 53, 56 | 54: 54, 57 | 55: 59, 58 | 56: 58, 59 | 57: 57, 60 | 58: 56, 61 | 59: 55, 62 | 60: 72, 63 | 61: 71, 64 | 62: 70, 65 | 63: 69, 66 | 64: 68, 67 | 65: 75, 68 | 66: 74, 69 | 67: 73, 70 | 68: 64, 71 | 69: 63, 72 | 70: 62, 73 | 71: 61, 74 | 72: 60, 75 | 73: 67, 76 | 74: 66, 77 | 75: 65, 78 | 76: 82, 79 | 77: 81, 80 | 78: 80, 81 | 79: 79, 82 | 80: 78, 83 | 81: 77, 84 | 82: 76, 85 | 83: 87, 86 | 84: 86, 87 | 85: 85, 88 | 86: 84, 89 | 87: 83, 90 | 88: 92, 91 | 89: 91, 92 | 90: 90, 93 | 91: 89, 94 | 92: 88, 95 | 93: 95, 96 | 94: 94, 97 | 95: 93, 98 | 96: 97, 99 | 97: 96, 100 | } 101 | -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/HRNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_keypoint/HRNet/HRNet.png -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import HighResolutionNet 2 | -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/person.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_keypoint/HRNet/person.png -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/person_keypoints.json: -------------------------------------------------------------------------------- 1 | { 2 | "keypoints": ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"], 3 | "skeleton": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]], 4 | "flip_pairs": [[1,2], [3,4], [5,6], [7,8], [9,10], [11,12], [13,14], [15,16]], 5 | "kps_weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5], 6 | "upper_body_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 7 | "lower_body_ids": [11, 12, 13, 14, 15, 16] 8 | } -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv_python==4.5.4.60 3 | lxml 4 | torch==1.10.1 5 | torchvision==0.11.1 6 | pycocotools 7 | matplotlib 8 | tqdm -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_eval import EvalCOCOMetric 4 | from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api 5 | -------------------------------------------------------------------------------- /pytorch_keypoint/HRNet/train_utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class KpLoss(object): 5 | def __init__(self): 6 | self.criterion = torch.nn.MSELoss(reduction='none') 7 | 8 | def __call__(self, logits, targets): 9 | assert len(logits.shape) == 4, 'logits should be 4-ndim' 10 | device = logits.device 11 | bs = logits.shape[0] 12 | # [num_kps, H, W] -> [B, num_kps, H, W] 13 | heatmaps = torch.stack([t["heatmap"].to(device) for t in targets]) 14 | # [num_kps] -> [B, num_kps] 15 | kps_weights = torch.stack([t["kps_weights"].to(device) for t in targets]) 16 | 17 | # [B, num_kps, H, W] -> [B, num_kps] 18 | loss = self.criterion(logits, heatmaps).mean(dim=[2, 3]) 19 | loss = torch.sum(loss * kps_weights) / bs 20 | return loss 21 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet50_fpn_model import resnet50_fpn_backbone 2 | from .mobilenetv2_model import MobileNetV2 3 | from .vgg_model import vgg 4 | from .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/fasterRCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_object_detection/faster_rcnn/fasterRCNN.png -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor 2 | from .rpn_function import AnchorsGenerator 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/record_mAP.txt: -------------------------------------------------------------------------------- 1 | COCO results: 2 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.526 3 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.804 4 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.586 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211 6 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580 8 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.454 9 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.639 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.646 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693 14 | 15 | mAP(IoU=0.5) for each category: 16 | aeroplane : 0.8759546352558178 17 | bicycle : 0.8554609242543677 18 | bird : 0.8434943725365999 19 | boat : 0.6753024837855667 20 | bottle : 0.7185899054232459 21 | bus : 0.8691082170432654 22 | car : 0.8771002682431779 23 | cat : 0.9169138943375639 24 | chair : 0.6403466317122392 25 | cow : 0.8285552434280278 26 | diningtable : 0.6437938565684241 27 | dog : 0.8745793980119227 28 | horse : 0.8718238708874728 29 | motorbike : 0.8910672301923952 30 | person : 0.9047338725598096 31 | pottedplant : 0.5808810399193133 32 | sheep : 0.86045368568359 33 | sofa : 0.7239390963388067 34 | train : 0.8652277764020805 35 | tvmonitor : 0.7683550206571649 -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy 4 | tqdm 5 | torch==1.7.1 6 | torchvision==0.8.2 7 | pycocotools 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/split_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | 5 | def main(): 6 | random.seed(0) # 设置随机种子,保证随机结果可复现 7 | 8 | files_path = "./VOCdevkit/VOC2012/Annotations" 9 | assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path) 10 | 11 | val_rate = 0.5 12 | 13 | files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)]) 14 | files_num = len(files_name) 15 | val_index = random.sample(range(0, files_num), k=int(files_num*val_rate)) 16 | train_files = [] 17 | val_files = [] 18 | for index, file_name in enumerate(files_name): 19 | if index in val_index: 20 | val_files.append(file_name) 21 | else: 22 | train_files.append(file_name) 23 | 24 | try: 25 | train_f = open("train.txt", "x") 26 | eval_f = open("val.txt", "x") 27 | train_f.write("\n".join(train_files)) 28 | eval_f.write("\n".join(val_files)) 29 | except FileExistsError as e: 30 | print(e) 31 | exit(1) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_utils import get_coco_api_from_dataset 4 | from .coco_eval import CocoEvaluator 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data 4 | from pycocotools.coco import COCO 5 | 6 | 7 | def convert_to_coco_api(ds): 8 | coco_ds = COCO() 9 | # annotation IDs need to start at 1, not 0 10 | ann_id = 1 11 | dataset = {'images': [], 'categories': [], 'annotations': []} 12 | categories = set() 13 | for img_idx in range(len(ds)): 14 | # find better way to get target 15 | hw, targets = ds.coco_index(img_idx) 16 | image_id = targets["image_id"].item() 17 | img_dict = {} 18 | img_dict['id'] = image_id 19 | img_dict['height'] = hw[0] 20 | img_dict['width'] = hw[1] 21 | dataset['images'].append(img_dict) 22 | bboxes = targets["boxes"] 23 | bboxes[:, 2:] -= bboxes[:, :2] 24 | bboxes = bboxes.tolist() 25 | labels = targets['labels'].tolist() 26 | areas = targets['area'].tolist() 27 | iscrowd = targets['iscrowd'].tolist() 28 | num_objs = len(bboxes) 29 | for i in range(num_objs): 30 | ann = {} 31 | ann['image_id'] = image_id 32 | ann['bbox'] = bboxes[i] 33 | ann['category_id'] = labels[i] 34 | categories.add(labels[i]) 35 | ann['area'] = areas[i] 36 | ann['iscrowd'] = iscrowd[i] 37 | ann['id'] = ann_id 38 | dataset['annotations'].append(ann) 39 | ann_id += 1 40 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 41 | coco_ds.dataset = dataset 42 | coco_ds.createIndex() 43 | return coco_ds 44 | 45 | 46 | def get_coco_api_from_dataset(dataset): 47 | for _ in range(10): 48 | if isinstance(dataset, torchvision.datasets.CocoDetection): 49 | break 50 | if isinstance(dataset, torch.utils.data.Subset): 51 | dataset = dataset.dataset 52 | if isinstance(dataset, torchvision.datasets.CocoDetection): 53 | return dataset.coco 54 | return convert_to_coco_api(dataset) 55 | -------------------------------------------------------------------------------- /pytorch_object_detection/faster_rcnn/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet50_fpn_model import resnet50_fpn_backbone 2 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/coco91_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": "person", 3 | "2": "bicycle", 4 | "3": "car", 5 | "4": "motorcycle", 6 | "5": "airplane", 7 | "6": "bus", 8 | "7": "train", 9 | "8": "truck", 10 | "9": "boat", 11 | "10": "traffic light", 12 | "11": "fire hydrant", 13 | "12": "N/A", 14 | "13": "stop sign", 15 | "14": "parking meter", 16 | "15": "bench", 17 | "16": "bird", 18 | "17": "cat", 19 | "18": "dog", 20 | "19": "horse", 21 | "20": "sheep", 22 | "21": "cow", 23 | "22": "elephant", 24 | "23": "bear", 25 | "24": "zebra", 26 | "25": "giraffe", 27 | "26": "N/A", 28 | "27": "backpack", 29 | "28": "umbrella", 30 | "29": "N/A", 31 | "30": "N/A", 32 | "31": "handbag", 33 | "32": "tie", 34 | "33": "suitcase", 35 | "34": "frisbee", 36 | "35": "skis", 37 | "36": "snowboard", 38 | "37": "sports ball", 39 | "38": "kite", 40 | "39": "baseball bat", 41 | "40": "baseball glove", 42 | "41": "skateboard", 43 | "42": "surfboard", 44 | "43": "tennis racket", 45 | "44": "bottle", 46 | "45": "N/A", 47 | "46": "wine glass", 48 | "47": "cup", 49 | "48": "fork", 50 | "49": "knife", 51 | "50": "spoon", 52 | "51": "bowl", 53 | "52": "banana", 54 | "53": "apple", 55 | "54": "sandwich", 56 | "55": "orange", 57 | "56": "broccoli", 58 | "57": "carrot", 59 | "58": "hot dog", 60 | "59": "pizza", 61 | "60": "donut", 62 | "61": "cake", 63 | "62": "chair", 64 | "63": "couch", 65 | "64": "potted plant", 66 | "65": "bed", 67 | "66": "N/A", 68 | "67": "dining table", 69 | "68": "N/A", 70 | "69": "N/A", 71 | "70": "toilet", 72 | "71": "N/A", 73 | "72": "tv", 74 | "73": "laptop", 75 | "74": "mouse", 76 | "75": "remote", 77 | "76": "keyboard", 78 | "77": "cell phone", 79 | "78": "microwave", 80 | "79": "oven", 81 | "80": "toaster", 82 | "81": "sink", 83 | "82": "refrigerator", 84 | "83": "N/A", 85 | "84": "book", 86 | "85": "clock", 87 | "86": "vase", 88 | "87": "scissors", 89 | "88": "teddy bear", 90 | "89": "hair drier", 91 | "90": "toothbrush" 92 | } -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor 2 | from .rpn_function import AnchorsGenerator 3 | from .mask_rcnn import MaskRCNN 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/pascal_voc_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": "aeroplane", 3 | "2": "bicycle", 4 | "3": "bird", 5 | "4": "boat", 6 | "5": "bottle", 7 | "6": "bus", 8 | "7": "car", 9 | "8": "cat", 10 | "9": "chair", 11 | "10": "cow", 12 | "11": "diningtable", 13 | "12": "dog", 14 | "13": "horse", 15 | "14": "motorbike", 16 | "15": "person", 17 | "16": "pottedplant", 18 | "17": "sheep", 19 | "18": "sofa", 20 | "19": "train", 21 | "20": "tvmonitor" 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy 4 | tqdm 5 | pycocotools 6 | Pillow 7 | torch==1.13.1 8 | torchvision==0.11.1 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_eval import EvalCOCOMetric 4 | from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/mask_rcnn/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | if "masks" in target: 37 | target["masks"] = target["masks"].flip(-1) 38 | return image, target 39 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool 2 | from .resnet50_fpn_model import resnet50_fpn_backbone 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .retinanet import RetinaNet 2 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/network_files/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def sigmoid_focal_loss( 6 | inputs: torch.Tensor, 7 | targets: torch.Tensor, 8 | alpha: float = 0.25, 9 | gamma: float = 2, 10 | reduction: str = "none", 11 | ): 12 | """ 13 | Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py . 14 | Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. 15 | 16 | Args: 17 | inputs: A float tensor of arbitrary shape. 18 | The predictions for each example. 19 | targets: A float tensor with the same shape as inputs. Stores the binary 20 | classification label for each element in inputs 21 | (0 for the negative class and 1 for the positive class). 22 | alpha: (optional) Weighting factor in range (0,1) to balance 23 | positive vs negative examples or -1 for ignore. Default = 0.25 24 | gamma: Exponent of the modulating factor (1 - p_t) to 25 | balance easy vs hard examples. 26 | reduction: 'none' | 'mean' | 'sum' 27 | 'none': No reduction will be applied to the output. 28 | 'mean': The output will be averaged. 29 | 'sum': The output will be summed. 30 | Returns: 31 | Loss tensor with the reduction option applied. 32 | """ 33 | p = torch.sigmoid(inputs) 34 | ce_loss = F.binary_cross_entropy_with_logits( 35 | inputs, targets, reduction="none" 36 | ) 37 | p_t = p * targets + (1 - p) * (1 - targets) 38 | loss = ce_loss * ((1 - p_t) ** gamma) 39 | 40 | if alpha >= 0: 41 | alpha_t = alpha * targets + (1 - alpha) * (1 - targets) 42 | loss = alpha_t * loss 43 | 44 | if reduction == "mean": 45 | loss = loss.mean() 46 | elif reduction == "sum": 47 | loss = loss.sum() 48 | 49 | return loss 50 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 0, 3 | "bicycle": 1, 4 | "bird": 2, 5 | "boat": 3, 6 | "bottle": 4, 7 | "bus": 5, 8 | "car": 6, 9 | "cat": 7, 10 | "chair": 8, 11 | "cow": 9, 12 | "diningtable": 10, 13 | "dog": 11, 14 | "horse": 12, 15 | "motorbike": 13, 16 | "person": 14, 17 | "pottedplant": 15, 18 | "sheep": 16, 19 | "sofa": 17, 20 | "train": 18, 21 | "tvmonitor": 19 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy 4 | tqdm 5 | torch==1.7.1 6 | torchvision==0.8.2 7 | pycocotools 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_utils import get_coco_api_from_dataset 4 | from .coco_eval import CocoEvaluator 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/train_utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data 4 | from pycocotools.coco import COCO 5 | 6 | 7 | def convert_to_coco_api(ds): 8 | coco_ds = COCO() 9 | # annotation IDs need to start at 1, not 0 10 | ann_id = 1 11 | dataset = {'images': [], 'categories': [], 'annotations': []} 12 | categories = set() 13 | for img_idx in range(len(ds)): 14 | # find better way to get target 15 | hw, targets = ds.coco_index(img_idx) 16 | image_id = targets["image_id"].item() 17 | img_dict = {} 18 | img_dict['id'] = image_id 19 | img_dict['height'] = hw[0] 20 | img_dict['width'] = hw[1] 21 | dataset['images'].append(img_dict) 22 | bboxes = targets["boxes"] 23 | bboxes[:, 2:] -= bboxes[:, :2] 24 | bboxes = bboxes.tolist() 25 | labels = targets['labels'].tolist() 26 | areas = targets['area'].tolist() 27 | iscrowd = targets['iscrowd'].tolist() 28 | num_objs = len(bboxes) 29 | for i in range(num_objs): 30 | ann = {} 31 | ann['image_id'] = image_id 32 | ann['bbox'] = bboxes[i] 33 | ann['category_id'] = labels[i] 34 | categories.add(labels[i]) 35 | ann['area'] = areas[i] 36 | ann['iscrowd'] = iscrowd[i] 37 | ann['id'] = ann_id 38 | dataset['annotations'].append(ann) 39 | ann_id += 1 40 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 41 | coco_ds.dataset = dataset 42 | coco_ds.createIndex() 43 | return coco_ds 44 | 45 | 46 | def get_coco_api_from_dataset(dataset): 47 | for _ in range(10): 48 | if isinstance(dataset, torchvision.datasets.CocoDetection): 49 | break 50 | if isinstance(dataset, torch.utils.data.Subset): 51 | dataset = dataset.dataset 52 | if isinstance(dataset, torchvision.datasets.CocoDetection): 53 | return dataset.coco 54 | return convert_to_coco_api(dataset) 55 | -------------------------------------------------------------------------------- /pytorch_object_detection/retinaNet/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/README.md: -------------------------------------------------------------------------------- 1 | # SSD: Single Shot MultiBox Detector 2 | 3 | ## 环境配置: 4 | * Python 3.6/3.7/3.8 5 | * Pytorch 1.7.1 6 | * pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs)) 7 | * Ubuntu或Centos(不建议Windows) 8 | * 最好使用GPU训练 9 | 10 | ## 文件结构: 11 | ``` 12 | ├── src: 实现SSD模型的相关模块 13 | │ ├── resnet50_backbone.py 使用resnet50网络作为SSD的backbone 14 | │ ├── ssd_model.py SSD网络结构文件 15 | │ └── utils.py 训练过程中使用到的一些功能实现 16 | ├── train_utils: 训练验证相关模块(包括cocotools) 17 | ├── my_dataset.py: 自定义dataset用于读取VOC数据集 18 | ├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练 19 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 20 | ├── predict_test.py: 简易的预测脚本,使用训练好的权重进行预测测试 21 | ├── pascal_voc_classes.json: pascal_voc标签文件 22 | ├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP 23 | └── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 24 | ``` 25 | 26 | ## 预训练权重下载地址(下载后放入src文件夹中): 27 | * ResNet50+SSD: https://ngc.nvidia.com/catalog/models 28 | `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件` 29 | * 如果找不到可通过百度网盘下载,链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj 30 | 31 | ## 数据集,本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中) 32 | * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 33 | * Pascal VOC2007 test数据集请参考:http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 34 | * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK 35 | 36 | ## 训练方法 37 | * 确保提前准备好数据集 38 | * 确保提前下载好对应预训练模型权重 39 | * 单GPU训练或CPU,直接使用train_ssd300.py训练脚本 40 | * 若要使用多GPU训练,使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量 41 | * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 42 | 43 | ## 如果对SSD算法原理不是很理解可参考我的bilibili 44 | * https://www.bilibili.com/video/BV1fT4y1L7Gi 45 | 46 | ## 进一步了解该项目,以及对SSD算法代码的分析可参考我的bilibili 47 | * https://www.bilibili.com/video/BV1vK411H771/ 48 | 49 | ## Resnet50 + SSD算法框架图 50 | ![Resnet50 SSD](res50_ssd.png) 51 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("epoch") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/record_mAP.txt: -------------------------------------------------------------------------------- 1 | COCO results: 2 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.448 3 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.721 4 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.482 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099 6 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521 8 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.418 9 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.565 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.573 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641 14 | 15 | mAP(IoU=0.5) for each category: 16 | aeroplane : 0.8532360243584314 17 | bicycle : 0.7496603797780927 18 | bird : 0.7658478672087958 19 | boat : 0.6079142920471263 20 | bottle : 0.4986565020053691 21 | bus : 0.8229568428349553 22 | car : 0.7940868387465018 23 | cat : 0.8800145761338203 24 | chair : 0.5090524550010037 25 | cow : 0.7344958411899583 26 | diningtable : 0.5379541883401677 27 | dog : 0.8230037525430133 28 | horse : 0.7880475852689804 29 | motorbike : 0.7879788462924051 30 | person : 0.8351553291238482 31 | pottedplant : 0.4420858247895347 32 | sheep : 0.7466344247593008 33 | sofa : 0.6627392793997164 34 | train : 0.8380502070312741 35 | tvmonitor : 0.7445168617489237 -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | tqdm 4 | pycocotools 5 | torch==1.7.1 6 | torchvision==0.8.2 7 | lxml 8 | Pillow 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/res50_ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_object_detection/ssd/res50_ssd.png -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .res50_backbone import resnet50 2 | from .ssd_model import SSD300, Backbone 3 | from .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/ssd/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_utils import get_coco_api_from_dataset 2 | from .coco_eval import CocoEvaluator 3 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 4 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 5 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet50_fpn_model import resnet50_fpn_backbone 2 | from .mobilenetv2_model import MobileNetV2 3 | from .vgg_model import vgg 4 | from .resnet import * 5 | from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool 6 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/coco91_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": "person", 3 | "2": "bicycle", 4 | "3": "car", 5 | "4": "motorcycle", 6 | "5": "airplane", 7 | "6": "bus", 8 | "7": "train", 9 | "8": "truck", 10 | "9": "boat", 11 | "10": "traffic light", 12 | "11": "fire hydrant", 13 | "12": "N/A", 14 | "13": "stop sign", 15 | "14": "parking meter", 16 | "15": "bench", 17 | "16": "bird", 18 | "17": "cat", 19 | "18": "dog", 20 | "19": "horse", 21 | "20": "sheep", 22 | "21": "cow", 23 | "22": "elephant", 24 | "23": "bear", 25 | "24": "zebra", 26 | "25": "giraffe", 27 | "26": "N/A", 28 | "27": "backpack", 29 | "28": "umbrella", 30 | "29": "N/A", 31 | "30": "N/A", 32 | "31": "handbag", 33 | "32": "tie", 34 | "33": "suitcase", 35 | "34": "frisbee", 36 | "35": "skis", 37 | "36": "snowboard", 38 | "37": "sports ball", 39 | "38": "kite", 40 | "39": "baseball bat", 41 | "40": "baseball glove", 42 | "41": "skateboard", 43 | "42": "surfboard", 44 | "43": "tennis racket", 45 | "44": "bottle", 46 | "45": "N/A", 47 | "46": "wine glass", 48 | "47": "cup", 49 | "48": "fork", 50 | "49": "knife", 51 | "50": "spoon", 52 | "51": "bowl", 53 | "52": "banana", 54 | "53": "apple", 55 | "54": "sandwich", 56 | "55": "orange", 57 | "56": "broccoli", 58 | "57": "carrot", 59 | "58": "hot dog", 60 | "59": "pizza", 61 | "60": "donut", 62 | "61": "cake", 63 | "62": "chair", 64 | "63": "couch", 65 | "64": "potted plant", 66 | "65": "bed", 67 | "66": "N/A", 68 | "67": "dining table", 69 | "68": "N/A", 70 | "69": "N/A", 71 | "70": "toilet", 72 | "71": "N/A", 73 | "72": "tv", 74 | "73": "laptop", 75 | "74": "mouse", 76 | "75": "remote", 77 | "76": "keyboard", 78 | "77": "cell phone", 79 | "78": "microwave", 80 | "79": "oven", 81 | "80": "toaster", 82 | "81": "sink", 83 | "82": "refrigerator", 84 | "83": "N/A", 85 | "84": "book", 86 | "85": "clock", 87 | "86": "vase", 88 | "87": "scissors", 89 | "88": "teddy bear", 90 | "89": "hair drier", 91 | "90": "toothbrush" 92 | } -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/compute_receptive_field.py: -------------------------------------------------------------------------------- 1 | # vgg16(D) 2 | model = [[3, 1], 3 | [3, 1], 4 | [2, 2], # maxpool 5 | [3, 1], 6 | [3, 1], 7 | [2, 2], # maxpool 8 | [3, 1], 9 | [3, 1], 10 | [3, 1], 11 | [2, 2], # maxpool 12 | [3, 1], 13 | [3, 1], 14 | [3, 1], 15 | [2, 2], # maxpool 16 | [3, 1], 17 | [3, 1], 18 | [3, 1]] 19 | 20 | field = model[-1][0] 21 | for kernel, stride in model[::-1]: 22 | field = (field - 1) * stride + kernel 23 | print(field) # 228 24 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/network_files/__init__.py: -------------------------------------------------------------------------------- 1 | from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor 2 | from .rpn_function import AnchorsGenerator 3 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/network_files/image_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch import Tensor 3 | 4 | 5 | class ImageList(object): 6 | """ 7 | Structure that holds a list of images (of possibly 8 | varying sizes) as a single tensor. 9 | This works by padding the images to the same size, 10 | and storing in a field the original sizes of each image 11 | """ 12 | 13 | def __init__(self, tensors, image_sizes): 14 | # type: (Tensor, List[Tuple[int, int]]) -> None 15 | """ 16 | Arguments: 17 | tensors (tensor) padding后的图像数据 18 | image_sizes (list[tuple[int, int]]) padding前的图像尺寸 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, device): 24 | # type: (Device) -> ImageList # noqa 25 | cast_tensor = self.tensors.to(device) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/plot_curve.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_loss_and_lr(train_loss, learning_rate): 6 | try: 7 | x = list(range(len(train_loss))) 8 | fig, ax1 = plt.subplots(1, 1) 9 | ax1.plot(x, train_loss, 'r', label='loss') 10 | ax1.set_xlabel("step") 11 | ax1.set_ylabel("loss") 12 | ax1.set_title("Train Loss and lr") 13 | plt.legend(loc='best') 14 | 15 | ax2 = ax1.twinx() 16 | ax2.plot(x, learning_rate, label='lr') 17 | ax2.set_ylabel("learning rate") 18 | ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 19 | plt.legend(loc='best') 20 | 21 | handles1, labels1 = ax1.get_legend_handles_labels() 22 | handles2, labels2 = ax2.get_legend_handles_labels() 23 | plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') 24 | 25 | fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 26 | fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) 27 | plt.close() 28 | print("successful save loss curve! ") 29 | except Exception as e: 30 | print(e) 31 | 32 | 33 | def plot_map(mAP): 34 | try: 35 | x = list(range(len(mAP))) 36 | plt.plot(x, mAP, label='mAp') 37 | plt.xlabel('epoch') 38 | plt.ylabel('mAP') 39 | plt.title('Eval mAP') 40 | plt.xlim(0, len(mAP)) 41 | plt.legend(loc='best') 42 | plt.savefig('./mAP.png') 43 | plt.close() 44 | print("successful save mAP curve!") 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | numpy 4 | tqdm 5 | pycocotools 6 | Pillow 7 | torch==1.10 8 | torchvision==0.11.1 9 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | from .coco_eval import EvalCOCOMetric 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/train_coco_dataset/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torchvision.transforms import functional as F 3 | 4 | 5 | class Compose(object): 6 | """组合多个transform函数""" 7 | def __init__(self, transforms): 8 | self.transforms = transforms 9 | 10 | def __call__(self, image, target): 11 | for t in self.transforms: 12 | image, target = t(image, target) 13 | return image, target 14 | 15 | 16 | class ToTensor(object): 17 | """将PIL图像转为Tensor""" 18 | def __call__(self, image, target): 19 | image = F.to_tensor(image) 20 | return image, target 21 | 22 | 23 | class RandomHorizontalFlip(object): 24 | """随机水平翻转图像以及bboxes""" 25 | def __init__(self, prob=0.5): 26 | self.prob = prob 27 | 28 | def __call__(self, image, target): 29 | if random.random() < self.prob: 30 | height, width = image.shape[-2:] 31 | image = image.flip(-1) # 水平翻转图片 32 | bbox = target["boxes"] 33 | # bbox: xmin, ymin, xmax, ymax 34 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 35 | target["boxes"] = bbox 36 | return image, target 37 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/build_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_object_detection/yolov3_spp/build_utils/__init__.py -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/build_utils/img_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def letterbox(img: np.ndarray, 6 | new_shape=(416, 416), 7 | color=(114, 114, 114), 8 | auto=True, 9 | scale_fill=False, 10 | scale_up=True): 11 | """ 12 | 将图片缩放调整到指定大小 13 | :param img: 输入的图像numpy格式 14 | :param new_shape: 输入网络的shape 15 | :param color: padding用什么颜色填充 16 | :param auto: 17 | :param scale_fill: 简单粗暴缩放到指定大小 18 | :param scale_up: 只缩小,不放大 19 | :return: 20 | """ 21 | 22 | shape = img.shape[:2] # [h, w] 23 | if isinstance(new_shape, int): 24 | new_shape = (new_shape, new_shape) 25 | 26 | # scale ratio (new / old) 27 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 28 | if not scale_up: # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变 29 | r = min(r, 1.0) 30 | 31 | # compute padding 32 | ratio = r, r # width, height ratios 33 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 34 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 35 | if auto: # minimun rectangle 保证原图比例不变,将图像最大边缩放到指定大小 36 | # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416),如果是(512x512)可以保证是64的整数倍 37 | dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding 38 | elif scale_fill: # stretch 简单粗暴的将图片缩放到指定尺寸 39 | dw, dh = 0, 0 40 | new_unpad = new_shape[::-1] # [h, w] -> [w, h] 41 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # wh ratios 42 | 43 | dw /= 2 # divide padding into 2 sides 将padding分到上下,左右两侧 44 | dh /= 2 45 | 46 | # shape:[h, w] new_unpad:[w, h] 47 | if shape[::-1] != new_unpad: 48 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 49 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) # 计算上下两侧的padding 50 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 计算左右两侧的padding 51 | 52 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 53 | return img, ratio, (dw, dh) 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/cfg/hyp.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | 3 | giou: 3.54 # giou loss gain 4 | cls: 37.4 # cls loss gain 5 | cls_pw: 1.0 # cls BCELoss positive_weight 6 | obj: 64.3 # obj loss gain (*=img_size/320 if img_size != 320) 7 | obj_pw: 1.0 # obj BCELoss positive_weight 8 | iou_t: 0.20 # iou training threshold 9 | lr0: 0.001 # initial learning rate (SGD=5E-3 Adam=5E-4) 10 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 11 | momentum: 0.937 # SGD momentum 12 | weight_decay: 0.0005 # optimizer weight decay 13 | fl_gamma: 0.0 # focal loss gamma (efficientDet default is gamma=1.5) 14 | hsv_h: 0.0138 # image HSV-Hue augmentation (fraction) 15 | hsv_s: 0.678 # image HSV-Saturation augmentation (fraction) 16 | hsv_v: 0.36 # image HSV-Value augmentation (fraction) 17 | degrees: 0. # image rotation (+/- deg) 18 | translate: 0. # image translation (+/- fraction) 19 | scale: 0. # image scale (+/- gain) 20 | shear: 0. # image shear (+/- deg) -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/data/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv_python==4.3.0.36 3 | lxml 4 | torch==1.7.1 5 | torchvision==0.8.2 6 | scipy 7 | pycocotools 8 | matplotlib 9 | tqdm 10 | tensorboard==2.1.0 11 | PyYAML 12 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_object_detection/yolov3_spp/runs/Oct28_17-55-29_wz/events.out.tfevents.1603791769.localhost.localdomain.178338.0 -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_utils import get_coco_api_from_dataset 2 | from .coco_eval import CocoEvaluator 3 | from .distributed_utils import init_distributed_mode, torch_distributed_zero_first 4 | -------------------------------------------------------------------------------- /pytorch_object_detection/yolov3_spp/yolov3spp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_object_detection/yolov3_spp/yolov3spp.png -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_segmentation/deeplab_v3/deeplabv3_resnet50.png -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | torch==1.10.0 3 | torchvision==0.11.1 4 | Pillow 5 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/deeplab_v3/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | torch==1.13.1 3 | torchvision==0.11.1 4 | Pillow 5 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_model import fcn_resnet50, fcn_resnet101 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/torch_fcn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_segmentation/fcn/torch_fcn.png -------------------------------------------------------------------------------- /pytorch_segmentation/fcn/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/get_palette.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from PIL import Image 4 | 5 | # 读取mask标签 6 | target = Image.open("./2007_001288.png") 7 | # 获取调色板 8 | palette = target.getpalette() 9 | palette = np.reshape(palette, (-1, 3)).tolist() 10 | # 转换成字典子形式 11 | pd = dict((i, color) for i, color in enumerate(palette)) 12 | 13 | json_str = json.dumps(pd) 14 | with open("palette.json", "w") as f: 15 | f.write(json_str) 16 | 17 | # target = np.array(target) 18 | # print(target) 19 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/lraspp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_segmentation/lraspp/lraspp.png -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/pascal_voc_classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | torch==1.10.0 3 | torchvision==0.11.1 4 | Pillow 5 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .lraspp_model import lraspp_mobilenetv3_large 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/lraspp/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/u2net/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | torch==1.13.1 3 | torchvision==0.11.1 4 | opencv_python==4.5.4.60 5 | -------------------------------------------------------------------------------- /pytorch_segmentation/u2net/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import u2net_full, u2net_lite 2 | -------------------------------------------------------------------------------- /pytorch_segmentation/u2net/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler, get_params_groups 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/u2net/u2net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_segmentation/u2net/u2net.png -------------------------------------------------------------------------------- /pytorch_segmentation/unet/README.md: -------------------------------------------------------------------------------- 1 | # U-Net(Convolutional Networks for Biomedical Image Segmentation) 2 | 3 | ## 该项目主要参考以下开源仓库 4 | * [https://github.com/milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet) 5 | * [https://github.com/pytorch/vision](https://github.com/pytorch/vision) 6 | 7 | ## 环境配置: 8 | * Python3.6/3.7/3.8 9 | * Pytorch1.10 10 | * Ubuntu或Centos(Windows暂不支持多GPU训练) 11 | * 最好使用GPU训练 12 | * 详细环境配置见`requirements.txt` 13 | 14 | ## 文件结构: 15 | ``` 16 | ├── src: 搭建U-Net模型代码 17 | ├── train_utils: 训练、验证以及多GPU训练相关模块 18 | ├── my_dataset.py: 自定义dataset用于读取DRIVE数据集(视网膜血管分割) 19 | ├── train.py: 以单GPU为例进行训练 20 | ├── train_multi_GPU.py: 针对使用多GPU的用户使用 21 | ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 22 | └── compute_mean_std.py: 统计数据集各通道的均值和标准差 23 | ``` 24 | 25 | ## DRIVE数据集下载地址: 26 | * 官网地址: [https://drive.grand-challenge.org/](https://drive.grand-challenge.org/) 27 | * 百度云链接: [https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw](https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw) 密码: 8no8 28 | 29 | 30 | ## 训练方法 31 | * 确保提前准备好数据集 32 | * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 33 | * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 34 | * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) 35 | * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` 36 | 37 | ## 注意事项 38 | * 在使用训练脚本时,注意要将`--data-path`设置为自己存放`DRIVE`文件夹所在的**根目录** 39 | * 在使用预测脚本时,要将`weights_path`设置为你自己生成的权重路径。 40 | * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改`--num-classes`、`--data-path`和`--weights`即可,其他代码尽量不要改动 41 | 42 | ## 使用U-Net在DRIVE数据集上训练得到的权重(仅供测试使用) 43 | - 链接: https://pan.baidu.com/s/1BOqkEpgt1XRqziyc941Hcw 密码: p50a 44 | 45 | ## 如果对U-Net网络不了解的可参考我的bilibili 46 | * [https://www.bilibili.com/video/BV1Vq4y127fB/](https://www.bilibili.com/video/BV1Vq4y127fB/) 47 | 48 | 49 | ## 进一步了解该项目,以及对U-Net代码的分析可参考我的bilibili 50 | * [https://b23.tv/PCJJmqN](https://b23.tv/PCJJmqN) 51 | 52 | ## 本项目U-Net默认使用双线性插值做为上采样,结构图如下 53 | ![u-net](unet.png) 54 | -------------------------------------------------------------------------------- /pytorch_segmentation/unet/compute_mean_std.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import numpy as np 4 | 5 | 6 | def main(): 7 | img_channels = 3 8 | img_dir = "./DRIVE/training/images" 9 | roi_dir = "./DRIVE/training/mask" 10 | assert os.path.exists(img_dir), f"image dir: '{img_dir}' does not exist." 11 | assert os.path.exists(roi_dir), f"roi dir: '{roi_dir}' does not exist." 12 | 13 | img_name_list = [i for i in os.listdir(img_dir) if i.endswith(".tif")] 14 | cumulative_mean = np.zeros(img_channels) 15 | cumulative_std = np.zeros(img_channels) 16 | for img_name in img_name_list: 17 | img_path = os.path.join(img_dir, img_name) 18 | ori_path = os.path.join(roi_dir, img_name.replace(".tif", "_mask.gif")) 19 | img = np.array(Image.open(img_path)) / 255. 20 | roi_img = np.array(Image.open(ori_path).convert('L')) 21 | 22 | img = img[roi_img == 255] 23 | cumulative_mean += img.mean(axis=0) 24 | cumulative_std += img.std(axis=0) 25 | 26 | mean = cumulative_mean / len(img_name_list) 27 | std = cumulative_std / len(img_name_list) 28 | print(f"mean: {mean}") 29 | print(f"std: {std}") 30 | 31 | 32 | if __name__ == '__main__': 33 | main() 34 | -------------------------------------------------------------------------------- /pytorch_segmentation/unet/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.0 2 | torch==1.13.1 3 | torchvision==0.11.1 4 | Pillow 5 | -------------------------------------------------------------------------------- /pytorch_segmentation/unet/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet import UNet 2 | from .mobilenet_unet import MobileV3Unet 3 | from .vgg_unet import VGG16UNet 4 | -------------------------------------------------------------------------------- /pytorch_segmentation/unet/train_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler 2 | from .distributed_utils import init_distributed_mode, save_on_master, mkdir 3 | -------------------------------------------------------------------------------- /pytorch_segmentation/unet/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WZMIAOMIAO/deep-learning-for-image-processing/481081c19fb51a94df6533775f4499fbafe1e0ac/pytorch_segmentation/unet/unet.png -------------------------------------------------------------------------------- /summary_problem.md: -------------------------------------------------------------------------------- 1 | ## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装 2 | 参考我之前写的博文:[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU(CUDA10.1)](https://blog.csdn.net/qq_37541097/article/details/103933366) 3 | 4 | 5 | ## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0] 6 | 强烈不建议混用,即使两个模型的名称结构完全一致也不要混用,里面有坑,用什么方法训练的模型就载入相应的模型权重 7 | 8 | 9 | ## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0] 10 | subclassed模型在实例化时没有自动进行build操作(只有在开始训练时,才会自动进行build),如果需要使用summary操作,需要提前手动build 11 | model.build((batch_size, height, width, channel)) 12 | 13 | 14 | ## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0] 15 | #### 在linux下你需要安装一些包: 16 | * pip install pydot==1.2.3 17 | * sudo apt-get install graphviz 18 | #### 在windows中,同样需要安装一些包(windows比较麻烦): 19 | * pip install pydot==1.2.3 20 | * 安装graphviz,并添加相关环境变量 21 | 参考连接:https://github.com/XifengGuo/CapsNet-Keras/issues/7 22 | 23 | ## 为什么每计算一个batch,就需要调用一次optimizer.zero_grad() [Pytorch1.3] 24 | 如果不清除历史梯度,就会对计算的历史梯度进行累加(通过这个特性你能够变相实现一个很大batch数值的训练) 25 | 参考链接:https://www.zhihu.com/question/303070254 26 | 27 | ## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3] 28 | pillow版本过高导致,安装版本号小于7.0.0即可 -------------------------------------------------------------------------------- /tensorflow_classification/ConfusionMatrix/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/ConvNeXt/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from model import convnext_tiny as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | im_height = im_width = 224 16 | 17 | # load image 18 | img_path = "../tulip.jpg" 19 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 20 | img = Image.open(img_path) 21 | # resize image 22 | img = img.resize((im_width, im_height)) 23 | plt.imshow(img) 24 | 25 | # read image 26 | img = np.array(img).astype(np.float32) 27 | 28 | # preprocess 29 | img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] 30 | 31 | # Add the image to a batch where it's the only member. 32 | img = (np.expand_dims(img, 0)) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | with open(json_path, "r") as f: 39 | class_indict = json.load(f) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes) 43 | model.build([1, 224, 224, 3]) 44 | 45 | weights_path = './save_weights/model.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img, batch_size=1)) 50 | result = tf.keras.layers.Softmax()(result) 51 | predict_class = np.argmax(result) 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 54 | result[predict_class]) 55 | plt.title(print_res) 56 | for i in range(len(result)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | result[i])) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /tensorflow_classification/README.md: -------------------------------------------------------------------------------- 1 | ## 该文件夹存放使用tensorflow实现的代码版本 2 | **model.py**: 是模型文件 3 | **train.py**: 是调用模型训练的文件 4 | **predict.py**: 是调用模型进行预测的文件 5 | **class_indices.json**: 是训练数据集对应的标签文件 6 | 7 | ------ 8 | 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 9 | [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 -------------------------------------------------------------------------------- /tensorflow_classification/Test11_efficientnetV2/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from model import efficientnetv2_s as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | 16 | img_size = {"s": 384, 17 | "m": 480, 18 | "l": 480} 19 | num_model = "s" 20 | im_height = im_width = img_size[num_model] 21 | 22 | # load image 23 | img_path = "../tulip.jpg" 24 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 25 | img = Image.open(img_path) 26 | # resize image 27 | img = img.resize((im_width, im_height)) 28 | plt.imshow(img) 29 | 30 | # read image 31 | img = np.array(img).astype(np.float32) 32 | 33 | # preprocess 34 | img = (img / 255. - 0.5) / 0.5 35 | 36 | # Add the image to a batch where it's the only member. 37 | img = (np.expand_dims(img, 0)) 38 | 39 | # read class_indict 40 | json_path = './class_indices.json' 41 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 42 | 43 | with open(json_path, "r") as f: 44 | class_indict = json.load(f) 45 | 46 | # create model 47 | model = create_model(num_classes=num_classes) 48 | 49 | weights_path = './save_weights/efficientnetv2.ckpt' 50 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 51 | model.load_weights(weights_path) 52 | 53 | result = np.squeeze(model.predict(img)) 54 | result = tf.keras.layers.Softmax()(result) 55 | predict_class = np.argmax(result) 56 | 57 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 58 | result[predict_class]) 59 | plt.title(print_res) 60 | for i in range(len(result)): 61 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 62 | result[i])) 63 | plt.show() 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tensorflow_classification/Test11_efficientnetV2/trans_weights.py: -------------------------------------------------------------------------------- 1 | from model import * 2 | 3 | 4 | def main(ckpt_path: str, 5 | model_name: str, 6 | model: tf.keras.Model): 7 | var_dict = {v.name.split(':')[0]: v for v in model.weights} 8 | 9 | reader = tf.train.load_checkpoint(ckpt_path) 10 | var_shape_map = reader.get_variable_to_shape_map() 11 | 12 | for key, var in var_dict.items(): 13 | key_ = model_name + "/" + key 14 | key_ = key_.replace("batch_normalization", "tpu_batch_normalization") 15 | if key_ in var_shape_map: 16 | if var_shape_map[key_] != var.shape: 17 | msg = "shape mismatch: {}".format(key) 18 | print(msg) 19 | else: 20 | var.assign(reader.get_tensor(key_), read_value=False) 21 | else: 22 | msg = "Not found {} in {}".format(key, ckpt_path) 23 | print(msg) 24 | 25 | model.save_weights("./{}.h5".format(model_name)) 26 | 27 | 28 | if __name__ == '__main__': 29 | model = efficientnetv2_s() 30 | model.build((1, 224, 224, 3)) 31 | main(ckpt_path="./efficientnetv2-s-21k-ft1k/model", 32 | model_name="efficientnetv2-s", 33 | model=model) 34 | 35 | # model = efficientnetv2_m() 36 | # model.build((1, 224, 224, 3)) 37 | # main(ckpt_path="./efficientnetv2-m-21k-ft1k/model", 38 | # model_name="efficientnetv2-m", 39 | # model=model) 40 | 41 | # model = efficientnetv2_l() 42 | # model.build((1, 224, 224, 3)) 43 | # main(ckpt_path="./efficientnetv2-l-21k-ft1k/model", 44 | # model_name="efficientnetv2-l", 45 | # model=model) 46 | -------------------------------------------------------------------------------- /tensorflow_classification/Test1_official_demo/model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Dense, Flatten, Conv2D 2 | from tensorflow.keras import Model 3 | 4 | 5 | class MyModel(Model): 6 | def __init__(self): 7 | super(MyModel, self).__init__() 8 | self.conv1 = Conv2D(32, 3, activation='relu') 9 | self.flatten = Flatten() 10 | self.d1 = Dense(128, activation='relu') 11 | self.d2 = Dense(10, activation='softmax') 12 | 13 | def call(self, x, **kwargs): 14 | x = self.conv1(x) # input[batch, 28, 28, 1] output[batch, 26, 26, 32] 15 | x = self.flatten(x) # output [batch, 21632] 16 | x = self.d1(x) # output [batch, 128] 17 | return self.d2(x) # output [batch, 10] 18 | -------------------------------------------------------------------------------- /tensorflow_classification/Test2_alexnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test2_alexnet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from PIL import Image 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from model import AlexNet_v1, AlexNet_v2 9 | 10 | 11 | def main(): 12 | im_height = 224 13 | im_width = 224 14 | 15 | # load image 16 | img_path = "../tulip.jpg" 17 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 18 | img = Image.open(img_path) 19 | 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value to (0-1) 25 | img = np.array(img) / 255. 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | with open(json_path, "r") as f: 35 | class_indict = json.load(f) 36 | 37 | # create model 38 | model = AlexNet_v1(num_classes=5) 39 | weighs_path = "./save_weights/myAlex.h5" 40 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path) 41 | model.load_weights(weighs_path) 42 | 43 | # prediction 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i])) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test3_vgg/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test3_vgg/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from PIL import Image 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from model import vgg 9 | 10 | 11 | def main(): 12 | im_height = 224 13 | im_width = 224 14 | num_classes = 5 15 | 16 | # load image 17 | img_path = "../tulip.jpg" 18 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 19 | img = Image.open(img_path) 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value to (0-1) 25 | img = np.array(img) / 255. 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | with open(json_path, "r") as f: 35 | class_indict = json.load(f) 36 | 37 | # create model 38 | model = vgg("vgg16", im_height=im_height, im_width=im_width, num_classes=num_classes) 39 | weights_path = "./save_weights/myVGG.h5" 40 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weights_path) 41 | model.load_weights(weights_path) 42 | 43 | # prediction 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i])) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test4_goolenet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test4_goolenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import json 4 | 5 | from PIL import Image 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | from model import GoogLeNet 10 | 11 | 12 | def main(): 13 | im_height = 224 14 | im_width = 224 15 | 16 | # load image 17 | img_path = "../tulip.jpg" 18 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 19 | img = Image.open(img_path) 20 | # resize image to 224x224 21 | img = img.resize((im_width, im_height)) 22 | plt.imshow(img) 23 | 24 | # scaling pixel value and normalize 25 | img = ((np.array(img) / 255.) - 0.5) / 0.5 26 | 27 | # Add the image to a batch where it's the only member. 28 | img = (np.expand_dims(img, 0)) 29 | 30 | # read class_indict 31 | json_path = './class_indices.json' 32 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 33 | 34 | with open(json_path, "r") as f: 35 | class_indict = json.load(f) 36 | 37 | model = GoogLeNet(class_num=5, aux_logits=False) 38 | model.summary() 39 | # model.load_weights("./save_weights/myGoogLenet.h5", by_name=True) # h5 format 40 | weights_path = "./save_weights/myGoogLeNet.ckpt" 41 | assert len(glob.glob(weights_path + "*")), "cannot find {}".format(weights_path) 42 | model.load_weights(weights_path) 43 | 44 | result = np.squeeze(model.predict(img)) 45 | predict_class = np.argmax(result) 46 | 47 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 48 | result[predict_class]) 49 | plt.title(print_res) 50 | for i in range(len(result)): 51 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 52 | result[i])) 53 | plt.show() 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/class_indices.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "daisy", 3 | "1": "dandelion", 4 | "2": "roses", 5 | "3": "sunflowers", 6 | "4": "tulips" 7 | } -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/read_ckpt.py: -------------------------------------------------------------------------------- 1 | """ 2 | 可直接下载我转好的权重 3 | 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg 密码: u199 4 | """ 5 | import tensorflow as tf 6 | 7 | 8 | def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list): 9 | with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: 10 | var_list = tf.train.list_variables(ckpt_path) 11 | new_var_list = [] 12 | 13 | for var_name, shape in var_list: 14 | print(var_name) 15 | if var_name in except_list: 16 | continue 17 | var = tf.train.load_variable(ckpt_path, var_name) 18 | new_var_name = var_name.replace('resnet_v1_50/', "") 19 | new_var_name = new_var_name.replace("bottleneck_v1/", "") 20 | new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel") 21 | new_var_name = new_var_name.replace("weights", "kernel") 22 | new_var_name = new_var_name.replace("biases", "bias") 23 | re_var = tf.Variable(var, name=new_var_name) 24 | new_var_list.append(re_var) 25 | 26 | re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel") 27 | new_var_list.append(re_var) 28 | re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias") 29 | new_var_list.append(re_var) 30 | saver = tf.compat.v1.train.Saver(new_var_list) 31 | sess.run(tf.compat.v1.global_variables_initializer()) 32 | saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) 33 | 34 | 35 | def main(): 36 | except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights'] 37 | ckpt_path = './resnet_v1_50.ckpt' 38 | new_ckpt_path = './pretrain_weights.ckpt' 39 | num_classes = 5 40 | rename_var(ckpt_path, new_ckpt_path, num_classes, except_list) 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /tensorflow_classification/Test5_resnet/read_h5.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | 3 | f = h5py.File('./save_weights/resNet_1.h5', 'r') 4 | for root_name, g in f.items(): 5 | print(root_name) 6 | for _, weights_dirs in g.attrs.items(): 7 | for i in weights_dirs: 8 | name = root_name + "/" + str(i, encoding="utf-8") 9 | data = f[name] 10 | print(data.value) 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tensorflow_classification/Test6_mobilenet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | import tensorflow as tf 9 | 10 | from model_v2 import MobileNetV2 11 | 12 | 13 | def main(): 14 | im_height = 224 15 | im_width = 224 16 | num_classes = 5 17 | 18 | # load image 19 | img_path = "../tulip.jpg" 20 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 21 | img = Image.open(img_path) 22 | # resize image to 224x224 23 | img = img.resize((im_width, im_height)) 24 | plt.imshow(img) 25 | 26 | # scaling pixel value to (-1,1) 27 | img = np.array(img).astype(np.float32) 28 | img = ((img / 255.) - 0.5) * 2.0 29 | 30 | # Add the image to a batch where it's the only member. 31 | img = (np.expand_dims(img, 0)) 32 | 33 | # read class_indict 34 | json_path = './class_indices.json' 35 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 36 | 37 | with open(json_path, "r") as f: 38 | class_indict = json.load(f) 39 | 40 | # create model 41 | feature = MobileNetV2(include_top=False) 42 | model = tf.keras.Sequential([feature, 43 | tf.keras.layers.GlobalAvgPool2D(), 44 | tf.keras.layers.Dropout(rate=0.5), 45 | tf.keras.layers.Dense(num_classes), 46 | tf.keras.layers.Softmax()]) 47 | weights_path = './save_weights/resMobileNetV2.ckpt' 48 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 49 | model.load_weights(weights_path) 50 | 51 | result = np.squeeze(model.predict(img)) 52 | predict_class = np.argmax(result) 53 | 54 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 55 | result[predict_class]) 56 | plt.title(print_res) 57 | for i in range(len(result)): 58 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 59 | result[i])) 60 | plt.show() 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /tensorflow_classification/Test6_mobilenet/trans_v3_weights.py: -------------------------------------------------------------------------------- 1 | import re 2 | import tensorflow as tf 3 | from model_v3 import mobilenet_v3_large 4 | 5 | 6 | def change_word(word: str): 7 | word = word.replace("MobilenetV3/", "") 8 | 9 | if "weights" in word: 10 | word = word.replace("weights", "kernel") 11 | elif "Conv" in word and "biases" in word: 12 | word = word.replace("biases", "bias") 13 | 14 | return word 15 | 16 | 17 | def rename_var(ckpt_path, m_info): 18 | with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: 19 | var_list = tf.train.list_variables(ckpt_path) 20 | pattern = "ExponentialMovingAverage|Momentum|global_step" 21 | 22 | var_dict = dict((change_word(name), [name, shape]) 23 | for name, shape in var_list 24 | if len(re.findall(pattern, name)) == 0) 25 | 26 | for k, v in m_info: 27 | assert k in var_dict, "{} not in var_dict".format(k) 28 | assert v == var_dict[k][1], "shape {} not equal {}".format(v, var_dict[k][1]) 29 | 30 | weights = [] 31 | for k, _ in m_info: 32 | var = tf.train.load_variable(ckpt_path, var_dict[k][0]) 33 | weights.append(var) 34 | 35 | return weights 36 | 37 | 38 | def main(): 39 | # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz 40 | ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000' 41 | save_path = './pre_mobilev3.h5' 42 | m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True) 43 | m_info = [(i.name.replace(":0", ""), list(i.shape)) 44 | for i in m.weights] 45 | weights = rename_var(ckpt_path, m_info) 46 | m.set_weights(weights) 47 | m.save_weights(save_path) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /tensorflow_classification/Test7_shuffleNet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | 9 | from model import shufflenet_v2_x1_0 10 | 11 | 12 | def main(): 13 | im_height = 224 14 | im_width = 224 15 | num_classes = 5 16 | 17 | mean = [0.485, 0.456, 0.406] 18 | std = [0.229, 0.224, 0.225] 19 | 20 | # load image 21 | img_path = "../tulip.jpg" 22 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 23 | img = Image.open(img_path) 24 | # resize image to 224x224 25 | img = img.resize((im_width, im_height)) 26 | plt.imshow(img) 27 | 28 | # scaling pixel value to (-1,1) 29 | img = np.array(img).astype(np.float32) 30 | img = (img / 255. - mean) / std 31 | 32 | # Add the image to a batch where it's the only member. 33 | img = (np.expand_dims(img, 0)) 34 | 35 | # read class_indict 36 | json_path = './class_indices.json' 37 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 38 | 39 | with open(json_path, "r") as f: 40 | class_indict = json.load(f) 41 | 42 | # create model 43 | model = shufflenet_v2_x1_0(num_classes=num_classes) 44 | 45 | weights_path = './save_weights/shufflenetv2.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img)) 50 | predict_class = np.argmax(result) 51 | 52 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 53 | result[predict_class]) 54 | plt.title(print_res) 55 | for i in range(len(result)): 56 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 57 | result[i])) 58 | plt.show() 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /tensorflow_classification/Test9_efficientNet/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | 9 | from model import efficientnet_b0 as create_model 10 | 11 | 12 | def main(): 13 | num_classes = 5 14 | 15 | img_size = {"B0": 224, 16 | "B1": 240, 17 | "B2": 260, 18 | "B3": 300, 19 | "B4": 380, 20 | "B5": 456, 21 | "B6": 528, 22 | "B7": 600} 23 | num_model = "B0" 24 | im_height = im_width = img_size[num_model] 25 | 26 | # load image 27 | img_path = "../tulip.jpg" 28 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 29 | img = Image.open(img_path) 30 | # resize image to 224x224 31 | img = img.resize((im_width, im_height)) 32 | plt.imshow(img) 33 | 34 | # read image 35 | img = np.array(img).astype(np.float32) 36 | 37 | # Add the image to a batch where it's the only member. 38 | img = (np.expand_dims(img, 0)) 39 | 40 | # read class_indict 41 | json_path = './class_indices.json' 42 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 43 | 44 | with open(json_path, "r") as f: 45 | class_indict = json.load(f) 46 | 47 | # create model 48 | model = create_model(num_classes=num_classes) 49 | 50 | weights_path = './save_weights/efficientnet.ckpt' 51 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 52 | model.load_weights(weights_path) 53 | 54 | result = np.squeeze(model.predict(img)) 55 | predict_class = np.argmax(result) 56 | 57 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 58 | result[predict_class]) 59 | plt.title(print_res) 60 | for i in range(len(result)): 61 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 62 | result[i])) 63 | plt.show() 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py: -------------------------------------------------------------------------------- 1 | from alexnet_model import AlexNet_v1, AlexNet_v2 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | model = AlexNet_v1(class_num=5) # functional api 6 | # model = AlexNet_v2(class_num=5) # subclass api 7 | # model.build((None, 224, 224, 3)) 8 | model.load_weights("./myAlex.h5") 9 | # model.load_weights("./submodel.h5") 10 | model.summary() 11 | for layer in model.layers: 12 | for index, weight in enumerate(layer.weights): 13 | # [kernel_height, kernel_width, kernel_channel, kernel_number] 14 | weight_t = weight.numpy() 15 | # read a kernel information 16 | # k = weight_t[:, :, :, 0] 17 | 18 | # calculate mean, std, min, max 19 | weight_mean = weight_t.mean() 20 | weight_std = weight_t.std(ddof=1) 21 | weight_min = weight_t.min() 22 | weight_max = weight_t.max() 23 | print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, 24 | weight_std, 25 | weight_max, 26 | weight_min)) 27 | 28 | # plot hist image 29 | plt.close() 30 | weight_vec = np.reshape(weight_t, [-1]) 31 | plt.hist(weight_vec, bins=50) 32 | plt.title(weight.name) 33 | plt.show() -------------------------------------------------------------------------------- /tensorflow_classification/swin_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from model import swin_tiny_patch4_window7_224 as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | im_height = im_width = 224 16 | 17 | # load image 18 | img_path = "../tulip.jpg" 19 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 20 | img = Image.open(img_path) 21 | # resize image 22 | img = img.resize((im_width, im_height)) 23 | plt.imshow(img) 24 | 25 | # read image 26 | img = np.array(img).astype(np.float32) 27 | 28 | # preprocess 29 | img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] 30 | 31 | # Add the image to a batch where it's the only member. 32 | img = (np.expand_dims(img, 0)) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | with open(json_path, "r") as f: 39 | class_indict = json.load(f) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes) 43 | model.build([1, im_height, im_width, 3]) 44 | 45 | weights_path = './save_weights/model.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img, batch_size=1)) 50 | result = tf.keras.layers.Softmax()(result) 51 | predict_class = np.argmax(result) 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 54 | result[predict_class]) 55 | plt.title(print_res) 56 | for i in range(len(result)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | result[i])) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /tensorflow_classification/vision_transformer/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import glob 4 | import numpy as np 5 | 6 | from PIL import Image 7 | import tensorflow as tf 8 | import matplotlib.pyplot as plt 9 | 10 | from vit_model import vit_base_patch16_224_in21k as create_model 11 | 12 | 13 | def main(): 14 | num_classes = 5 15 | im_height = im_width = 224 16 | 17 | # load image 18 | img_path = "../tulip.jpg" 19 | assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) 20 | img = Image.open(img_path) 21 | # resize image 22 | img = img.resize((im_width, im_height)) 23 | plt.imshow(img) 24 | 25 | # read image 26 | img = np.array(img).astype(np.float32) 27 | 28 | # preprocess 29 | img = (img / 255. - 0.5) / 0.5 30 | 31 | # Add the image to a batch where it's the only member. 32 | img = (np.expand_dims(img, 0)) 33 | 34 | # read class_indict 35 | json_path = './class_indices.json' 36 | assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) 37 | 38 | with open(json_path, "r") as f: 39 | class_indict = json.load(f) 40 | 41 | # create model 42 | model = create_model(num_classes=num_classes, has_logits=False) 43 | model.build([1, 224, 224, 3]) 44 | 45 | weights_path = './save_weights/model.ckpt' 46 | assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) 47 | model.load_weights(weights_path) 48 | 49 | result = np.squeeze(model.predict(img, batch_size=1)) 50 | result = tf.keras.layers.Softmax()(result) 51 | predict_class = np.argmax(result) 52 | 53 | print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], 54 | result[predict_class]) 55 | plt.title(print_res) 56 | for i in range(len(result)): 57 | print("class: {:10} prob: {:.3}".format(class_indict[str(i)], 58 | result[i])) 59 | plt.show() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | --------------------------------------------------------------------------------