├── readme.md ├── ssd ├── DEVELOP_GUIDE.md ├── README.md ├── TROUBLESHOOTING.md ├── configs │ ├── efficient_net_b3_ssd300_voc0712.yaml │ ├── mobilenet_v2_ssd320_voc0712.yaml │ ├── vgg_ssd300_coco_trainval35k.yaml │ ├── vgg_ssd300_visdrone0413.yaml │ ├── vgg_ssd300_voc0712.yaml │ ├── vgg_ssd512_coco_trainval35k.yaml │ └── vgg_ssd512_voc0712.yaml ├── data_loading.py ├── demo.py ├── demo │ ├── 000342.jpg │ ├── 000542.jpg │ ├── 003123.jpg │ ├── 004101.jpg │ ├── 008591.jpg │ └── result │ │ ├── 000342.jpg │ │ ├── 000542.jpg │ │ ├── 003123.jpg │ │ ├── 004101.jpg │ │ └── 008591.jpg ├── ext │ ├── __init__.py │ ├── build.py │ ├── cpu │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── nms.cu │ │ └── vision.h │ ├── nms.h │ └── vision.cpp ├── figures │ ├── 004545.jpg │ ├── losses.png │ ├── lr.png │ └── metrics.png ├── seesee │ ├── Main │ │ ├── test.txt │ │ ├── train.txt │ │ ├── train_val.txt │ │ └── val.txt │ ├── draw_region.py │ ├── files │ │ ├── 0000002_00005_d_0000014.jpg │ │ ├── 0000002_00005_d_0000014.txt │ │ ├── added_customer.xml │ │ └── test.xml │ ├── txt2xml.py │ ├── write_txt.py │ └── write_xml.py ├── setup.py ├── ssd │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ ├── config │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── defaults.cpython-36.pyc │ │ │ └── path_catlog.cpython-36.pyc │ │ ├── defaults.py │ │ └── path_catlog.py │ ├── data │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── build.cpython-36.pyc │ │ ├── build.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── coco.cpython-36.pyc │ │ │ │ ├── visdrone.cpython-36.pyc │ │ │ │ └── voc.cpython-36.pyc │ │ │ ├── coco.py │ │ │ ├── evaluation │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ ├── coco │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── __pycache__ │ │ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ ├── visdrone │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ │ │ └── eval_detection_voc.cpython-36.pyc │ │ │ │ │ └── eval_detection_voc.py │ │ │ │ └── voc │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ │ └── eval_detection_voc.cpython-36.pyc │ │ │ │ │ └── eval_detection_voc.py │ │ │ ├── visdrone.py │ │ │ └── voc.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── distributed.cpython-36.pyc │ │ │ │ └── iteration_based_batch_sampler.cpython-36.pyc │ │ │ ├── distributed.py │ │ │ └── iteration_based_batch_sampler.py │ │ └── transforms │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── target_transform.cpython-36.pyc │ │ │ └── transforms.cpython-36.pyc │ │ │ ├── target_transform.py │ │ │ └── transforms.py │ ├── engine │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── inference.cpython-36.pyc │ │ │ └── trainer.cpython-36.pyc │ │ ├── inference.py │ │ └── trainer.py │ ├── layers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── separable_conv.cpython-36.pyc │ │ └── separable_conv.py │ ├── modeling │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── registry.cpython-36.pyc │ │ ├── anchors │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── prior_box.cpython-36.pyc │ │ │ └── prior_box.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── mobilenet.cpython-36.pyc │ │ │ │ └── vgg.cpython-36.pyc │ │ │ ├── efficient_net │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ │ ├── efficient_net.cpython-36.pyc │ │ │ │ │ └── utils.cpython-36.pyc │ │ │ │ ├── efficient_net.py │ │ │ │ └── utils.py │ │ │ ├── mobilenet.py │ │ │ └── vgg.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── box_head.cpython-36.pyc │ │ │ │ ├── box_predictor.cpython-36.pyc │ │ │ │ ├── inference.cpython-36.pyc │ │ │ │ └── loss.cpython-36.pyc │ │ │ ├── box_head.py │ │ │ ├── box_predictor.py │ │ │ ├── inference.py │ │ │ └── loss.py │ │ ├── detector │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── ssd_detector.cpython-36.pyc │ │ │ └── ssd_detector.py │ │ └── registry.py │ ├── solver │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── build.cpython-36.pyc │ │ │ └── lr_scheduler.cpython-36.pyc │ │ ├── build.py │ │ └── lr_scheduler.py │ ├── structures │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── container.cpython-36.pyc │ │ └── container.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── box_utils.cpython-36.pyc │ │ ├── checkpoint.cpython-36.pyc │ │ ├── dist_util.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ ├── metric_logger.cpython-36.pyc │ │ ├── misc.cpython-36.pyc │ │ ├── model_zoo.cpython-36.pyc │ │ ├── nms.cpython-36.pyc │ │ └── registry.cpython-36.pyc │ │ ├── box_utils.py │ │ ├── checkpoint.py │ │ ├── dist_util.py │ │ ├── logger.py │ │ ├── metric_logger.py │ │ ├── misc.py │ │ ├── model_zoo.py │ │ ├── nms.py │ │ └── registry.py ├── test.py ├── train.py ├── train_visdrone.py ├── video_demo.py ├── visdrone_demo.py ├── visdrone_test.py └── visdrone_train.py ├── utils ├── .idea │ ├── .gitignore │ ├── data_transformer.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ └── modules.xml ├── ssd │ ├── test.py │ ├── visdrone_demo.py │ └── visdrone_test.py ├── txts │ ├── test.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt ├── voc2yolo.py └── yolo │ ├── detect.py │ ├── move_imgs.py │ ├── test.txt │ ├── train.py │ ├── train.txt │ ├── train_val_go.py │ └── valid.txt ├── yolo ├── README.md ├── config │ ├── coco.data │ ├── create_custom_model.sh │ ├── custom.data │ ├── yolov3-custom.cfg │ ├── yolov3-tiny.cfg │ └── yolov3.cfg ├── data │ ├── coco.names │ ├── custom │ │ ├── classes.names │ │ ├── images │ │ │ └── 0000001_02999_d_0000005.jpg │ │ ├── labels │ │ │ └── 0000001_02999_d_0000005.txt │ │ ├── train.txt │ │ └── valid.txt │ ├── get_coco_dataset.sh │ └── samples │ │ ├── dog.jpg │ │ └── visdrone │ │ └── 0000006_00159_d_0000001.jpg ├── detect.py ├── detect_vis.py ├── models.py ├── nohup.out ├── output │ └── dog.png ├── requirements.txt ├── test.py ├── train.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── augmentations.cpython-36.pyc │ │ ├── datasets.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ ├── parse_config.cpython-36.pyc │ │ └── utils.cpython-36.pyc │ ├── augmentations.py │ ├── datasets.py │ ├── logger.py │ ├── parse_config.py │ └── utils.py └── weights │ └── download_weights.sh └── 人工智能大作业_流程.md /readme.md: -------------------------------------------------------------------------------- 1 | # Visdrone无人机图像目标检测 2 | 3 | 本仓库是人工智能课程的课程作业仓库,主要是完成无人机图像目标检测的任务,我们对visdrone数据集进行了处理,在yolo和ssd两种框架下进行了训练和测试,并编写demo用于实时的无人机图像目标检测。 4 | 5 | > 小组成员:宋晨明、王有发、刘竹风、王伟业 6 | > 7 | > 仅用于西安交通大学人工智能大作业 8 | > 9 | > 模型及数据集下载地址:[百度网盘](https://pan.baidu.com/s/1voGZhYyvEHal_uRPxUownQ) 密码:c7z2 10 | 11 | ## requirements 12 | 13 | ssd: pytorch1.4 14 | 15 | yolo:pytorch1.0 tensorflow1.14 16 | 17 | ## 结构 18 | 19 | ``` 20 | visdrone_detection 21 | ├─ readme.md 22 | ├─ ssd 23 | ├─ yolo 24 | └─ 人工智能大作业_流程.md 25 | ``` 26 | 27 | ## 数据集 28 | 29 | 本作业使用的数据集是VisDrone数据集,包含了10个类(即行人、人、汽车、面包车、公共汽车、卡车、汽车、自行车、遮阳三轮车和三轮车),数据集下载地址为:[VisDrone数据集下载地址](http://aiskyeye.com/challenge/object-detection/) 30 | 31 | 由于本次作业我们使用SSD和YOLO框架来完成目标检测的任务,需要对原先的数据标注格式进行处理,数据集原始的标注形式为xml文件,我们将标注转化为了常用的voc形式和yolo形式,您可以使用代码中的utils下的代码文件自行处理数据,也可以联系我们获取。 32 | 33 | ## 训练 34 | 35 | ### ssd训练 36 | 37 | 首先请cd到`ssd`目录下,调试`visdrone_train.py`下数据集和配置文件的路径信息,本作业的配置文件为`configs/vgg_ssd300_visdrone0413.yaml`,执行: 38 | 39 | ``` 40 | python visdrone_train.py 41 | ``` 42 | 43 | 训练好的模型文件将会保存在`outputs`目录下 44 | 45 | ### yolo训练 46 | 47 | 首先请cd到`yolo`目录下,生成cfg文件和custom.data 48 | 49 | ``` 50 | python train.py --model_def config/yolov3-custom.cfg --data_config config/custom.data--pretrained_weights weights/darknet53.conv.74 51 | ``` 52 | 53 | 训练好的模型文件将会保存在`checkpoints`目录下 54 | 55 | ## 测试 56 | 57 | ### ssd测试 58 | 59 | 首先请cd到`ssd`目录下, 执行 60 | 61 | ``` 62 | python visdrone_test.py 63 | python visdrone_demo.py 64 | ``` 65 | 66 | ### yolo测试 67 | 68 | 首先请cd到`yolo`目录下, 执行 69 | 70 | ``` 71 | python test.py --weights_path weights/yolov3.weights 72 | python3 detect.py --image_folder data/samples/ 73 | ``` 74 | 75 | ## 结果 76 | 77 | * mAP指标 78 | 79 | ``` 80 | # yolo 81 | +-------+-----------------+---------+ 82 | | Index | Class name | AP | 83 | +-------+-----------------+---------+ 84 | | 0 | pedestrian | 0.08822 | 85 | | 1 | people | 0.02340 | 86 | | 2 | bicycle | 0.00165 | 87 | | 3 | car | 0.43279 | 88 | | 4 | van | 0.07407 | 89 | | 5 | truck | 0.07747 | 90 | | 6 | tricycle | 0.00995 | 91 | | 7 | awning-tricycle | 0.01727 | 92 | | 8 | bus | 0.25008 | 93 | | 9 | motor | 0.05405 | 94 | | 10 | others | 0.00366 | 95 | +-------+-----------------+---------+ 96 | ---- mAP 0.09387386786609676 97 | # ssd 98 | 2020-11-04 14:56:20,698 SSD.inference INFO: mAP: 0.1524 99 | pedestrian : 0.1170 100 | people : 0.0909 101 | bicycle : 0.0909 102 | car : 0.4377 103 | van : 0.1740 104 | truck : 0.2258 105 | tricycle : 0.1048 106 | awning-tricycle : 0.0413 107 | bus : 0.3754 108 | motor : 0.0800 109 | others : 0.0909 110 | ``` 111 | 112 | * FPS 113 | 114 | ``` 115 | # yolo 116 | fps: 19.17227602398063 117 | 83.97542357444763s / 1610imgs 118 | # ssd 119 | FPS:64.44748916337679 120 | 24.98157835006714s / 1610imgs 121 | ``` 122 | 123 | * 部分结果 124 | 125 | **yolo** 126 | 127 | ![0000006_00159_d_0000001](https://vehicle4cm.oss-cn-beijing.aliyuncs.com/typoraimgs/0000006_00159_d_0000001.png) 128 | 129 | **ssd** 130 | 131 | ![0000006_00159_d_0000001](https://vehicle4cm.oss-cn-beijing.aliyuncs.com/typoraimgs/0000006_00159_d_0000001.jpg) 132 | 133 | ## References 134 | 135 | * [目录生成工具](http://dir.yardtea.cc/) 136 | * [YOLOv3: An Incremental Improvement](https://github.com/eriklindernoren/PyTorch-YOLOv3) 137 | 138 | * [SSD: Single Shot MultiBox Object Detector, in PyTorch](https://github.com/amdegroot/ssd.pytorch) 139 | 140 | 141 | 142 | ## TODO 143 | 144 | 好多要做的,完善流程 145 | 图片显示问题 146 | -------------------------------------------------------------------------------- /ssd/DEVELOP_GUIDE.md: -------------------------------------------------------------------------------- 1 | # Develop Guide 2 | 3 | ## Custom Dataset 4 | Add your custom dataset is simple and flexible. 5 | For example, create `ssd/data/datasets/my_dataset.py`: 6 | ```python 7 | import torch.utils.data 8 | 9 | from ssd.structures.container import Container 10 | 11 | class MyDataset(torch.utils.data.Dataset): 12 | def __init__(self, ..., transform=None, target_transform=None): 13 | # as you would do normally 14 | ... 15 | self.transform = transform 16 | self.target_transform = target_transform 17 | 18 | def __getitem__(self, index): 19 | # load the image as a PIL Image 20 | image = ... 21 | 22 | # load the bounding boxes in x1, y1, x2, y2 order. 23 | boxes = np.array((N, 4), dtype=np.float32) 24 | # and labels 25 | labels = np.array((N, ), dtype=np.int64) 26 | 27 | if self.transform: 28 | image, boxes, labels = self.transform(image, boxes, labels) 29 | if self.target_transform: 30 | boxes, labels = self.target_transform(boxes, labels) 31 | targets = Container( 32 | boxes=boxes, 33 | labels=labels, 34 | ) 35 | # return the image, the targets and the index in your dataset 36 | return image, targets, index 37 | ``` 38 | 39 | in `ssd/data/datasets/__init__.py` 40 | ```python 41 | from .my_dataset import MyDataset 42 | 43 | _DATASETS = { 44 | 'VOCDataset': VOCDataset, 45 | 'COCODataset': COCODataset, 46 | 'MyDataset': MyDataset, 47 | } 48 | ``` 49 | 50 | in `ssd/config/path_catlog.py`: 51 | ```python 52 | DATASETS = { 53 | ... 54 | 'my_custom_dataset': { 55 | "arg1": "your/arg", 56 | "arg2": "your/arg", 57 | }, 58 | ... 59 | } 60 | 61 | @staticmethod 62 | def get(name): 63 | ... 64 | if name == 'my_custom_dataset': 65 | attrs = DatasetCatalog.DATASETS[name] 66 | return dict(factory="MyDataset", args=attrs) 67 | ... 68 | ``` 69 | 70 | in your `config.ymal`: 71 | ```yaml 72 | DATASETS: 73 | TRAIN: ("my_custom_dataset", ) 74 | TEST: ("my_custom_test_dataset", ) 75 | ``` 76 | 77 | ### Test 78 | While the aforementioned example should work for training, it's also easy to add your custom test code: 79 | in `ssd/data/datasets/evaluation/__init__.py` 80 | ```python 81 | if isinstance(dataset, MyDataset): 82 | return my_own_evaluation(**args) 83 | ``` 84 | 85 | ## Custom Backbone 86 | 87 | It very simple to add your own backbone for SSD. 88 | For example, create `ssd/modeling/backbone/my_backbone.py`: 89 | ```python 90 | import torch.nn as nn 91 | 92 | from ssd.modeling import registry 93 | from ssd.utils.model_zoo import load_state_dict_from_url 94 | 95 | 96 | class MyBackbone(nn.Module): 97 | def __init__(self, cfg): 98 | super().__init__() 99 | ... 100 | 101 | def forward(self, x): 102 | features = [] 103 | 104 | # forward your network 105 | 106 | # add arbitrary feature you want to do prediction upon it. 107 | 108 | features.append(feature1) 109 | features.append(feature2) 110 | features.append(feature3) 111 | features.append(feature4) 112 | 113 | # return them as a tuple 114 | return tuple(features) 115 | 116 | @registry.BACKBONES.register('my_backbone') 117 | def my_backbone(cfg, pretrained=True): 118 | model = MyBackbone(cfg) 119 | model_url = 'you_model_url' 120 | if pretrained: 121 | model.init_from_pretrain(load_state_dict_from_url(model_url)) 122 | return model 123 | ``` 124 | in `ssd/modeling/backbone/__init__.py`: 125 | ```python 126 | from .my_backbone import MyBackbone 127 | ``` 128 | 129 | in your `config.ymal`: 130 | ```yaml 131 | MODEL: 132 | BACKBONE: 133 | NAME: 'my_backbone' 134 | OUT_CHANNELS: (-, -, -, -) # should match feature1 - feature4's out_channels in MyBackbone 135 | PRIORS: 136 | FEATURE_MAPS: [-, -, -, -] # feature1 - feature4's size 137 | STRIDES: [-, -, -, -] # feature1 - feature4's output stride 138 | MIN_SIZES: [21, 45, 99, 153] # your custom anchor settings 139 | MAX_SIZES: [45, 99, 153, 207] 140 | ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3]] 141 | BOXES_PER_LOCATION: [6, 6, 6, 6] 142 | ``` -------------------------------------------------------------------------------- /ssd/TROUBLESHOOTING.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | ## RuntimeError: merge_sort: failed to synchronize: an illegal memory access was encountered 4 | 5 | This is caused in multi-box loss. The sort method failed due to NaN numbers. This may be a bug in `log_softmax`: https://github.com/pytorch/pytorch/issues/14335 .Three ways to solve : 6 | 1. Use a smaller warmup factor, like 0.1. (append `SOLVER.WARMUP_FACTOR 0.1` to your train cmd's end). 7 | 1. Use a longer warmup iters, like 1000. (append `SOLVER.WARMUP_ITERS 1000` to your train cmd's end). 8 | 1. [Described in the forums by Jinserk Baik](https://discuss.pytorch.org/t/ctcloss-performance-of-pytorch-1-0-0/27524/29) -------------------------------------------------------------------------------- /ssd/configs/efficient_net_b3_ssd300_voc0712.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 21 3 | BACKBONE: 4 | NAME: 'efficient_net-b3' 5 | OUT_CHANNELS: (48, 136, 384, 256, 256, 256) 6 | INPUT: 7 | IMAGE_SIZE: 300 8 | DATASETS: 9 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval") 10 | TEST: ("voc_2007_test", ) 11 | SOLVER: 12 | MAX_ITER: 160000 13 | LR_STEPS: [105000, 135000] 14 | GAMMA: 0.1 15 | BATCH_SIZE: 24 16 | LR: 1e-3 17 | 18 | OUTPUT_DIR: 'outputs/efficient_net_b3_ssd300_voc0712' -------------------------------------------------------------------------------- /ssd/configs/mobilenet_v2_ssd320_voc0712.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 21 3 | BOX_HEAD: 4 | PREDICTOR: 'SSDLiteBoxPredictor' 5 | BACKBONE: 6 | NAME: 'mobilenet_v2' 7 | OUT_CHANNELS: (96, 1280, 512, 256, 256, 64) 8 | PRIORS: 9 | FEATURE_MAPS: [20, 10, 5, 3, 2, 1] 10 | STRIDES: [16, 32, 64, 107, 160, 320] 11 | MIN_SIZES: [60, 105, 150, 195, 240, 285] 12 | MAX_SIZES: [105, 150, 195, 240, 285, 330] 13 | ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]] 14 | BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6] 15 | INPUT: 16 | IMAGE_SIZE: 320 17 | DATASETS: 18 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval") 19 | TEST: ("voc_2007_test", ) 20 | SOLVER: 21 | MAX_ITER: 120000 22 | LR_STEPS: [80000, 100000] 23 | GAMMA: 0.1 24 | BATCH_SIZE: 32 25 | LR: 1e-3 26 | 27 | OUTPUT_DIR: 'outputs/mobilenet_v2_ssd320_voc0712' -------------------------------------------------------------------------------- /ssd/configs/vgg_ssd300_coco_trainval35k.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 81 3 | PRIORS: 4 | FEATURE_MAPS: [38, 19, 10, 5, 3, 1] 5 | STRIDES: [8, 16, 32, 64, 100, 300] 6 | MIN_SIZES: [21, 45, 99, 153, 207, 261] 7 | MAX_SIZES: [45, 99, 153, 207, 261, 315] 8 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2], [2]] 9 | BOXES_PER_LOCATION: [4, 6, 6, 6, 4, 4] 10 | INPUT: 11 | IMAGE_SIZE: 300 12 | DATASETS: 13 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 14 | TEST: ("coco_2014_minival", ) 15 | SOLVER: 16 | MAX_ITER: 400000 17 | LR_STEPS: [280000, 360000] 18 | GAMMA: 0.1 19 | BATCH_SIZE: 32 20 | LR: 1e-3 21 | 22 | OUTPUT_DIR: 'outputs/vgg_ssd300_coco_trainval35k' -------------------------------------------------------------------------------- /ssd/configs/vgg_ssd300_visdrone0413.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 12 3 | INPUT: 4 | IMAGE_SIZE: 300 5 | DATASETS: 6 | TRAIN: ("VisDrone_2019__trainval",) 7 | TEST: ("VisDrone_2019__test", ) 8 | SOLVER: 9 | MAX_ITER: 120000 10 | LR_STEPS: [80000, 100000] 11 | GAMMA: 0.1 12 | BATCH_SIZE: 32 13 | LR: 1e-3 14 | 15 | OUTPUT_DIR: 'outputs/vgg_ssd300_visdrone0413' 16 | -------------------------------------------------------------------------------- /ssd/configs/vgg_ssd300_voc0712.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 21 3 | INPUT: 4 | IMAGE_SIZE: 300 5 | DATASETS: 6 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval") 7 | TEST: ("voc_2007_test", ) 8 | SOLVER: 9 | MAX_ITER: 120000 10 | LR_STEPS: [80000, 100000] 11 | GAMMA: 0.1 12 | BATCH_SIZE: 16 13 | LR: 1e-3 14 | 15 | OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712' -------------------------------------------------------------------------------- /ssd/configs/vgg_ssd512_coco_trainval35k.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 81 3 | BACKBONE: 4 | OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) 5 | PRIORS: 6 | FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1] 7 | STRIDES: [8, 16, 32, 64, 128, 256, 512] 8 | MIN_SIZES: [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8] 9 | MAX_SIZES: [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72] 10 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]] 11 | BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4] 12 | INPUT: 13 | IMAGE_SIZE: 512 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival", ) 17 | SOLVER: 18 | MAX_ITER: 520000 19 | LR_STEPS: [360000, 480000] 20 | GAMMA: 0.1 21 | BATCH_SIZE: 24 22 | LR: 1e-3 23 | 24 | OUTPUT_DIR: 'outputs/vgg_ssd512_coco_trainval35k' -------------------------------------------------------------------------------- /ssd/configs/vgg_ssd512_voc0712.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | NUM_CLASSES: 21 3 | BACKBONE: 4 | OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) 5 | PRIORS: 6 | FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1] 7 | STRIDES: [8, 16, 32, 64, 128, 256, 512] 8 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8] 9 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.65] 10 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]] 11 | BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4] 12 | INPUT: 13 | IMAGE_SIZE: 512 14 | DATASETS: 15 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval") 16 | TEST: ("voc_2007_test", ) 17 | SOLVER: 18 | MAX_ITER: 120000 19 | LR_STEPS: [80000, 100000] 20 | GAMMA: 0.1 21 | BATCH_SIZE: 24 22 | LR: 1e-3 23 | 24 | OUTPUT_DIR: 'outputs/vgg_ssd512_voc0712' -------------------------------------------------------------------------------- /ssd/demo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | import torch 6 | from PIL import Image 7 | from vizer.draw import draw_boxes 8 | 9 | from ssd.config import cfg 10 | from ssd.data.datasets import COCODataset, VOCDataset 11 | import argparse 12 | import numpy as np 13 | 14 | from ssd.data.transforms import build_transforms 15 | from ssd.modeling.detector import build_detection_model 16 | from ssd.utils import mkdir 17 | from ssd.utils.checkpoint import CheckPointer 18 | 19 | 20 | @torch.no_grad() 21 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): 22 | if dataset_type == "voc": 23 | class_names = VOCDataset.class_names 24 | elif dataset_type == 'coco': 25 | class_names = COCODataset.class_names 26 | else: 27 | raise NotImplementedError('Not implemented now.') 28 | device = torch.device(cfg.MODEL.DEVICE) 29 | 30 | model = build_detection_model(cfg) 31 | model = model.to(device) 32 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) 33 | checkpointer.load(ckpt, use_latest=ckpt is None) 34 | weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() 35 | print('Loaded weights from {}'.format(weight_file)) 36 | 37 | image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) 38 | mkdir(output_dir) 39 | 40 | cpu_device = torch.device("cpu") 41 | transforms = build_transforms(cfg, is_train=False) 42 | model.eval() 43 | for i, image_path in enumerate(image_paths): 44 | start = time.time() 45 | image_name = os.path.basename(image_path) 46 | 47 | image = np.array(Image.open(image_path).convert("RGB")) 48 | height, width = image.shape[:2] 49 | images = transforms(image)[0].unsqueeze(0) 50 | load_time = time.time() - start 51 | 52 | start = time.time() 53 | result = model(images.to(device))[0] 54 | inference_time = time.time() - start 55 | 56 | result = result.resize((width, height)).to(cpu_device).numpy() 57 | boxes, labels, scores = result['boxes'], result['labels'], result['scores'] 58 | 59 | indices = scores > score_threshold 60 | boxes = boxes[indices] 61 | labels = labels[indices] 62 | scores = scores[indices] 63 | meters = ' | '.join( 64 | [ 65 | 'objects {:02d}'.format(len(boxes)), 66 | 'load {:03d}ms'.format(round(load_time * 1000)), 67 | 'inference {:03d}ms'.format(round(inference_time * 1000)), 68 | 'FPS {}'.format(round(1.0 / inference_time)) 69 | ] 70 | ) 71 | print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) 72 | 73 | drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) 74 | Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) 75 | 76 | 77 | def main(): 78 | parser = argparse.ArgumentParser(description="SSD Demo.") 79 | parser.add_argument( 80 | "--config-file", 81 | default="", 82 | metavar="FILE", 83 | help="path to config file", 84 | type=str, 85 | ) 86 | parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.") 87 | parser.add_argument("--score_threshold", type=float, default=0.7) 88 | parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.') 89 | parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.') 90 | parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.') 91 | 92 | parser.add_argument( 93 | "opts", 94 | help="Modify config options using the command-line", 95 | default=None, 96 | nargs=argparse.REMAINDER, 97 | ) 98 | args = parser.parse_args() 99 | print(args) 100 | 101 | cfg.merge_from_file(args.config_file) 102 | cfg.merge_from_list(args.opts) 103 | cfg.freeze() 104 | 105 | print("Loaded configuration file {}".format(args.config_file)) 106 | with open(args.config_file, "r") as cf: 107 | config_str = "\n" + cf.read() 108 | print(config_str) 109 | print("Running with config:\n{}".format(cfg)) 110 | 111 | run_demo(cfg=cfg, 112 | ckpt=args.ckpt, 113 | score_threshold=args.score_threshold, 114 | images_dir=args.images_dir, 115 | output_dir=args.output_dir, 116 | dataset_type=args.dataset_type) 117 | 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /ssd/demo/000342.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/000342.jpg -------------------------------------------------------------------------------- /ssd/demo/000542.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/000542.jpg -------------------------------------------------------------------------------- /ssd/demo/003123.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/003123.jpg -------------------------------------------------------------------------------- /ssd/demo/004101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/004101.jpg -------------------------------------------------------------------------------- /ssd/demo/008591.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/008591.jpg -------------------------------------------------------------------------------- /ssd/demo/result/000342.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/result/000342.jpg -------------------------------------------------------------------------------- /ssd/demo/result/000542.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/result/000542.jpg -------------------------------------------------------------------------------- /ssd/demo/result/003123.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/result/003123.jpg -------------------------------------------------------------------------------- /ssd/demo/result/004101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/result/004101.jpg -------------------------------------------------------------------------------- /ssd/demo/result/008591.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/demo/result/008591.jpg -------------------------------------------------------------------------------- /ssd/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ext/__init__.py -------------------------------------------------------------------------------- /ssd/ext/build.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import torch 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import CUDA_HOME 7 | from torch.utils.cpp_extension import CppExtension 8 | from torch.utils.cpp_extension import CUDAExtension 9 | 10 | requirements = ["torch"] 11 | 12 | 13 | def get_extensions(): 14 | extensions_dir = os.path.dirname(os.path.abspath(__file__)) 15 | 16 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 17 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 18 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 19 | 20 | sources = main_file + source_cpu 21 | extension = CppExtension 22 | 23 | extra_compile_args = {"cxx": []} 24 | define_macros = [] 25 | 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | extension = CUDAExtension 28 | sources += source_cuda 29 | define_macros += [("WITH_CUDA", None)] 30 | extra_compile_args["nvcc"] = [ 31 | "-DCUDA_HAS_FP16=1", 32 | "-D__CUDA_NO_HALF_OPERATORS__", 33 | "-D__CUDA_NO_HALF_CONVERSIONS__", 34 | "-D__CUDA_NO_HALF2_OPERATORS__", 35 | ] 36 | 37 | sources = [os.path.join(extensions_dir, s) for s in sources] 38 | 39 | include_dirs = [extensions_dir] 40 | 41 | ext_modules = [ 42 | extension( 43 | "torch_extension", 44 | sources, 45 | include_dirs=include_dirs, 46 | define_macros=define_macros, 47 | extra_compile_args=extra_compile_args, 48 | ) 49 | ] 50 | 51 | return ext_modules 52 | 53 | 54 | setup( 55 | name="torch_extension", 56 | version="0.1", 57 | ext_modules=get_extensions(), 58 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}) 59 | -------------------------------------------------------------------------------- /ssd/ext/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1); 57 | auto h = std::max(static_cast(0), yy2 - yy1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } -------------------------------------------------------------------------------- /ssd/ext/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | at::Tensor nms_cpu(const at::Tensor& dets, 6 | const at::Tensor& scores, 7 | const float threshold); 8 | -------------------------------------------------------------------------------- /ssd/ext/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left, 0.f), height = max(bottom - top, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0]) * (a[3] - a[1]); 19 | float Sb = (b[2] - b[0]) * (b[3] - b[1]); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } -------------------------------------------------------------------------------- /ssd/ext/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 6 | 7 | 8 | -------------------------------------------------------------------------------- /ssd/ext/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /ssd/ext/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 6 | m.def("nms", &nms, "non-maximum suppression"); 7 | } 8 | -------------------------------------------------------------------------------- /ssd/figures/004545.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/figures/004545.jpg -------------------------------------------------------------------------------- /ssd/figures/losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/figures/losses.png -------------------------------------------------------------------------------- /ssd/figures/lr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/figures/lr.png -------------------------------------------------------------------------------- /ssd/figures/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/figures/metrics.png -------------------------------------------------------------------------------- /ssd/seesee/draw_region.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)#frame图像,起点坐标,终点坐标(在这里是x+w,y+h,因为w,h分别是人脸的长宽)颜色,线宽) 4 | 5 | # regions = [[232, 385, 13, 31, 0], [234, 391, 8, 17, 1]] 6 | 7 | 8 | def draw(img_path, regions): 9 | img = cv2.imread(img_path) 10 | for x, y, w, h, score in regions: 11 | cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) 12 | cv2.putText(score, 'face', (int(w / 2 + x), int(y - h / 5)), cv2.FONT_HERSHEY_PLAIN, 2.0, (255, 255, 255), 2, 1) 13 | cv2.imshow('result', img) 14 | cv2.waitKey(0) 15 | cv2.destroyAllWindows() 16 | 17 | 18 | if __name__ == '__main__': 19 | regions = [[232, 385, 13, 31, 0], [234, 391, 8, 17, 1]] 20 | region1 = [[232, 385, 13, 31, 0]] 21 | draw('files/0000002_00005_d_0000014.jpg', region1) 22 | -------------------------------------------------------------------------------- /ssd/seesee/files/0000002_00005_d_0000014.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/seesee/files/0000002_00005_d_0000014.jpg -------------------------------------------------------------------------------- /ssd/seesee/files/0000002_00005_d_0000014.txt: -------------------------------------------------------------------------------- 1 | 684,8,273,116,0,0,0,0 2 | 406,119,265,70,0,0,0,0 3 | 255,22,119,128,0,0,0,0 4 | 1,3,209,78,0,0,0,0 5 | 708,471,74,33,1,4,0,1 6 | 639,425,61,46,1,4,0,0 7 | 594,399,64,51,1,4,0,0 8 | 562,390,61,38,1,4,0,0 9 | 540,372,65,33,1,4,0,1 10 | 514,333,68,35,1,4,0,0 11 | 501,317,64,31,1,4,0,1 12 | 501,299,45,28,1,4,0,1 13 | 489,284,48,27,1,4,0,1 14 | 463,262,48,29,1,4,0,0 15 | 458,252,49,22,1,4,0,1 16 | 448,242,45,20,1,4,0,1 17 | 442,230,49,19,1,4,0,1 18 | 439,214,45,21,1,4,0,1 19 | 429,208,42,19,1,4,0,1 20 | 420,199,43,20,1,4,0,1 21 | 398,188,41,18,1,4,0,1 22 | 46,391,14,26,1,2,0,0 23 | 421,433,74,44,1,4,0,1 24 | 369,346,64,34,1,4,0,0 25 | 398,410,72,46,1,4,0,1 26 | 394,393,70,36,1,4,0,1 27 | 377,364,71,38,1,4,0,0 28 | 357,312,58,31,1,4,0,0 29 | 359,298,54,22,1,4,0,2 30 | 348,283,43,28,1,5,0,1 31 | 345,271,52,19,1,4,0,1 32 | 340,260,60,18,1,5,0,1 33 | 340,250,52,16,1,4,0,1 34 | 332,231,54,22,1,5,0,1 35 | 323,213,45,25,1,5,0,0 36 | 317,195,45,31,1,6,0,1 37 | 316,188,36,15,1,4,0,2 38 | 308,179,44,17,1,4,0,1 39 | 345,163,37,18,1,4,0,0 40 | 384,164,26,22,1,4,0,1 41 | 43,398,18,17,1,10,0,1 42 | 324,167,14,6,1,3,0,0 43 | 362,143,30,18,1,4,1,0 44 | 310,150,36,15,1,4,0,0 45 | 258,375,30,43,1,4,0,2 46 | 218,348,18,28,1,7,0,0 47 | 249,297,23,30,1,4,0,2 48 | 0,298,26,34,1,8,1,0 49 | 33,279,24,30,1,4,0,0 50 | 47,243,26,28,1,4,0,0 51 | 72,202,28,28,1,5,0,0 52 | 87,125,15,15,1,4,0,0 53 | 170,113,15,13,1,4,0,0 54 | 102,81,13,13,1,4,0,0 55 | 415,207,5,9,1,1,0,0 56 | 428,197,8,13,1,1,0,1 57 | 232,385,13,31,0,11,0,0 58 | 234,391,8,17,1,2,0,2 59 | 879,256,12,19,1,1,0,0 60 | 897,225,8,17,1,1,0,0 61 | 57,341,11,20,1,2,0,0 62 | 55,345,13,19,1,3,0,1 63 | 55,313,10,20,1,1,0,0 64 | 53,320,14,13,1,3,0,1 65 | 232,236,7,14,1,1,0,1 66 | 237,236,6,14,1,1,0,1 67 | 193,235,7,13,1,2,0,1 68 | 192,241,11,11,1,10,0,1 69 | 178,234,6,10,1,2,0,2 70 | 177,237,8,12,1,10,0,1 71 | 117,202,7,11,1,2,0,1 72 | 118,206,7,12,1,10,0,1 73 | 59,211,9,13,1,1,0,0 74 | 86,194,8,17,1,1,0,2 75 | 219,118,7,10,1,1,0,0 76 | 193,99,6,10,1,10,0,2 77 | 137,115,6,9,1,2,0,1 78 | 109,107,4,7,1,2,0,1 79 | 109,110,5,8,1,10,0,1 80 | 87,102,9,13,1,8,0,1 81 | 112,100,4,9,1,2,0,1 82 | 59,280,11,18,1,1,0,0 83 | 89,81,10,10,1,5,1,0 84 | 136,119,6,8,1,10,0,1 85 | 841,487,21,26,1,10,0,0 86 | 912,139,34,73,0,0,0,0 87 | 223,345,8,11,1,2,0,2 88 | 329,161,7,11,1,2,0,0 89 | -------------------------------------------------------------------------------- /ssd/seesee/files/added_customer.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ssd/seesee/files/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 公司名称 5 | 吴总 6 | 7 | 8 | -------------------------------------------------------------------------------- /ssd/seesee/write_txt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 用于制作train val 和test文本文件,通过遍历的形式 3 | 4 | ''' 5 | import os 6 | 7 | 8 | def files2txt(folder_path, txt_save_path): 9 | # txt_annotations_path = 'F:\\datas\\VOC\\DET\\VisDrone2019-DET-train\\annotations_txt\\' 10 | names = os.listdir(folder_path) 11 | with open(txt_save_path, 'w') as f: 12 | for name in names: 13 | x = name.strip('.xml') 14 | f.writelines(x) 15 | f.write('\n') 16 | 17 | 18 | if __name__ == '__main__': 19 | # folder_path = 'F:\\datas\\VOC\\VisDrone_ROOT\\VisDrone2019\\annotations_xml' 20 | # folder_path = 'F:\\datas\\VOC\\DET\\VisDrone2019-DET-val\\annotations_xml' 21 | folder_path = 'F:\\datas\\VOC\\DET\\VisDrone2019-DET-test_dev\\annotations_xml' 22 | txt_save_path = 'Main/test.txt' 23 | 24 | files2txt(folder_path, txt_save_path) 25 | 26 | 27 | # file_path = "te.txt" 28 | # mylist = ["100", "200", "300"] 29 | # file_write_obj = open(file_path, 'w') # 以写的方式打开文件,如果文件不存在,就会自动创建 30 | # for var in mylist: 31 | # file_write_obj.writelines(var) 32 | # file_write_obj.write('\n') 33 | # file_write_obj.close() 34 | -------------------------------------------------------------------------------- /ssd/seesee/write_xml.py: -------------------------------------------------------------------------------- 1 | # 写xml 2 | 3 | from xml.dom.minidom import * 4 | 5 | # 创建一个文档对象 6 | doc = Document() 7 | 8 | # 创建一个根节点 9 | root = doc.createElement('managers') 10 | 11 | # 根节点添加属性 12 | root.setAttribute('company', '中体彩') 13 | print(root.getAttribute('company')) 14 | 15 | # 根节点加入到tree 16 | doc.appendChild(root) 17 | 18 | # 创建二级节点 19 | company = doc.createElement('gloryroad') 20 | name = doc.createElement('name') 21 | name.appendChild(doc.createTextNode('公司名称')) # 添加文本节点 22 | 23 | # 创建一个带着文本节点的子节点 24 | ceo = doc.createElement('ceo') 25 | ceo.appendChild(doc.createTextNode('吴总')) # 吴总 26 | 27 | company.appendChild(name) # name加入到company 28 | company.appendChild(ceo) 29 | root.appendChild(company) # company加入到根节点 30 | 31 | print(ceo.tagName) 32 | 33 | print(doc.toxml()) 34 | 35 | # 存成xml文件 36 | fp = open('files/test.xml', 'w', encoding='utf-8') 37 | doc.writexml(fp, indent='', addindent='\t', newl='\n', encoding='utf-8') 38 | fp.close() 39 | -------------------------------------------------------------------------------- /ssd/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="torch-ssd", 8 | version="1.2.0", 9 | packages=find_packages(exclude=['ext']), 10 | install_requires=[ 11 | "torch~=1.0", 12 | "torchvision~=0.3", 13 | "opencv-python~=4.0", 14 | "yacs==0.1.6", 15 | "Vizer~=0.1.4", 16 | ], 17 | author="Congcong Li", 18 | author_email="luffy.lcc@gmail.com", 19 | description="High quality, fast, modular reference implementation of SSD in PyTorch", 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | url="https://github.com/lufficc/SSD", 23 | classifiers=[ 24 | "Programming Language :: Python :: 3", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 28 | ], 29 | license="MIT", 30 | python_requires=">=3.6", 31 | include_package_data=True, 32 | ) 33 | -------------------------------------------------------------------------------- /ssd/ssd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import _C as cfg 2 | -------------------------------------------------------------------------------- /ssd/ssd/config/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/config/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/config/__pycache__/defaults.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/config/__pycache__/defaults.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/config/__pycache__/path_catlog.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/config/__pycache__/path_catlog.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/config/defaults.py: -------------------------------------------------------------------------------- 1 | from yacs.config import CfgNode as CN 2 | 3 | _C = CN() 4 | 5 | _C.MODEL = CN() 6 | _C.MODEL.META_ARCHITECTURE = 'SSDDetector' 7 | _C.MODEL.DEVICE = "cuda" 8 | # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5) 9 | _C.MODEL.THRESHOLD = 0.5 10 | _C.MODEL.NUM_CLASSES = 21 11 | # Hard negative mining 12 | _C.MODEL.NEG_POS_RATIO = 3 13 | _C.MODEL.CENTER_VARIANCE = 0.1 14 | _C.MODEL.SIZE_VARIANCE = 0.2 15 | 16 | # ---------------------------------------------------------------------------- # 17 | # Backbone 18 | # ---------------------------------------------------------------------------- # 19 | _C.MODEL.BACKBONE = CN() 20 | _C.MODEL.BACKBONE.NAME = 'vgg' 21 | _C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256) 22 | _C.MODEL.BACKBONE.PRETRAINED = True 23 | 24 | # ----------------------------------------------------------------------------- 25 | # PRIORS 26 | # ----------------------------------------------------------------------------- 27 | _C.MODEL.PRIORS = CN() 28 | _C.MODEL.PRIORS.FEATURE_MAPS = [38, 19, 10, 5, 3, 1] 29 | _C.MODEL.PRIORS.STRIDES = [8, 16, 32, 64, 100, 300] 30 | _C.MODEL.PRIORS.MIN_SIZES = [30, 60, 111, 162, 213, 264] 31 | _C.MODEL.PRIORS.MAX_SIZES = [60, 111, 162, 213, 264, 315] 32 | _C.MODEL.PRIORS.ASPECT_RATIOS = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] 33 | # When has 1 aspect ratio, every location has 4 boxes, 2 ratio 6 boxes. 34 | # #boxes = 2 + #ratio * 2 35 | _C.MODEL.PRIORS.BOXES_PER_LOCATION = [4, 6, 6, 6, 4, 4] # number of boxes per feature map location 36 | _C.MODEL.PRIORS.CLIP = True 37 | 38 | # ----------------------------------------------------------------------------- 39 | # Box Head 40 | # ----------------------------------------------------------------------------- 41 | _C.MODEL.BOX_HEAD = CN() 42 | _C.MODEL.BOX_HEAD.NAME = 'SSDBoxHead' 43 | _C.MODEL.BOX_HEAD.PREDICTOR = 'SSDBoxPredictor' 44 | 45 | # ----------------------------------------------------------------------------- 46 | # INPUT 47 | # ----------------------------------------------------------------------------- 48 | _C.INPUT = CN() 49 | # Image size 50 | _C.INPUT.IMAGE_SIZE = 300 51 | # Values to be used for image normalization, RGB layout 52 | _C.INPUT.PIXEL_MEAN = [123, 117, 104] 53 | 54 | # ----------------------------------------------------------------------------- 55 | # Dataset 56 | # ----------------------------------------------------------------------------- 57 | _C.DATASETS = CN() 58 | # List of the dataset names for training, as present in paths_catalog.py 59 | _C.DATASETS.TRAIN = () 60 | # List of the dataset names for testing, as present in paths_catalog.py 61 | _C.DATASETS.TEST = () 62 | 63 | # ----------------------------------------------------------------------------- 64 | # DataLoader 65 | # ----------------------------------------------------------------------------- 66 | _C.DATA_LOADER = CN() 67 | # Number of data loading threads 68 | _C.DATA_LOADER.NUM_WORKERS = 8 69 | _C.DATA_LOADER.PIN_MEMORY = True 70 | 71 | # ---------------------------------------------------------------------------- # 72 | # Solver 73 | # ---------------------------------------------------------------------------- # 74 | _C.SOLVER = CN() 75 | # train configs 76 | _C.SOLVER.MAX_ITER = 120000 77 | _C.SOLVER.LR_STEPS = [80000, 100000] 78 | _C.SOLVER.GAMMA = 0.1 79 | _C.SOLVER.BATCH_SIZE = 32 80 | _C.SOLVER.LR = 1e-3 81 | _C.SOLVER.MOMENTUM = 0.9 82 | _C.SOLVER.WEIGHT_DECAY = 5e-4 83 | _C.SOLVER.WARMUP_FACTOR = 1.0 / 3 84 | _C.SOLVER.WARMUP_ITERS = 500 85 | 86 | # ---------------------------------------------------------------------------- # 87 | # Specific test options 88 | # ---------------------------------------------------------------------------- # 89 | _C.TEST = CN() 90 | _C.TEST.NMS_THRESHOLD = 0.45 91 | _C.TEST.CONFIDENCE_THRESHOLD = 0.01 92 | _C.TEST.MAX_PER_CLASS = -1 93 | _C.TEST.MAX_PER_IMAGE = 100 94 | _C.TEST.BATCH_SIZE = 10 95 | 96 | _C.OUTPUT_DIR = 'outputs' 97 | -------------------------------------------------------------------------------- /ssd/ssd/config/path_catlog.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class DatasetCatalog: 5 | # DATA_DIR = 'datasets' 6 | # DATA_DIR = 'F:/datas/VOC/VOC_ROOT' 7 | # DATA_DIR = 'F:/datas/VOC/VisDrone_ROOT' 8 | DATA_DIR = '/mnt/data/vehicle_reid/ssd/data/voc/VisDrone_ROOT/' 9 | # DATA_DIR = '/home/chenmingsong/coding_data/coco/VOC_ROOT/VOC2012' 10 | DATASETS = { 11 | 'VisDrone_2019_train': { 12 | "data_dir": "DET2019", 13 | "split": "train" 14 | }, 15 | 'VisDrone_2019__val': { 16 | "data_dir": "DET2019", 17 | "split": "val" 18 | }, 19 | 'VisDrone_2019__trainval': { 20 | "data_dir": "DET2019", 21 | "split": "trainval" 22 | }, 23 | 'VisDrone_2019__test': { 24 | "data_dir": "DET2019", 25 | "split": "test" 26 | }, 27 | 'voc_2007_train': { 28 | "data_dir": "VOC2007", 29 | "split": "train" 30 | }, 31 | 'voc_2007_val': { 32 | "data_dir": "VOC2007", 33 | "split": "val" 34 | }, 35 | 'voc_2007_trainval': { 36 | "data_dir": "VOC2007", 37 | "split": "trainval" 38 | }, 39 | 'voc_2007_test': { 40 | "data_dir": "VOC2007", 41 | "split": "test" 42 | }, 43 | 'voc_2012_train': { 44 | "data_dir": "VOC2012", 45 | "split": "train" 46 | }, 47 | 'voc_2012_val': { 48 | "data_dir": "VOC2012", 49 | "split": "val" 50 | }, 51 | 'voc_2012_trainval': { 52 | "data_dir": "VOC2012", 53 | "split": "trainval" 54 | }, 55 | 'voc_2012_test': { 56 | "data_dir": "VOC2012", 57 | "split": "test" 58 | }, 59 | 'coco_2014_valminusminival': { 60 | "data_dir": "val2014", 61 | "ann_file": "annotations/instances_valminusminival2014.json" 62 | }, 63 | 'coco_2014_minival': { 64 | "data_dir": "val2014", 65 | "ann_file": "annotations/instances_minival2014.json" 66 | }, 67 | 'coco_2014_train': { 68 | "data_dir": "train2014", 69 | "ann_file": "annotations/instances_train2014.json" 70 | }, 71 | 'coco_2014_val': { 72 | "data_dir": "val2014", 73 | "ann_file": "annotations/instances_val2014.json" 74 | }, 75 | } 76 | 77 | @staticmethod 78 | def get(name): 79 | if 'VisDrone' in name: 80 | visdron_root = DatasetCatalog.DATA_DIR 81 | # voc_root = DatasetCatalog.DATA_DIR 82 | if 'VisDrone_ROOT' in os.environ: 83 | voc_root = os.environ['VisDrone_ROOT'] 84 | attrs = DatasetCatalog.DATASETS[name] 85 | args = dict( 86 | data_dir=os.path.join(visdron_root, attrs["data_dir"]), 87 | split=attrs["split"], 88 | ) 89 | return dict(factory="VisDroneDataset", args=args) 90 | 91 | elif "voc" in name: 92 | voc_root = DatasetCatalog.DATA_DIR 93 | if 'VOC_ROOT' in os.environ: 94 | voc_root = os.environ['VOC_ROOT'] 95 | attrs = DatasetCatalog.DATASETS[name] 96 | args = dict( 97 | data_dir=os.path.join(voc_root, attrs["data_dir"]), 98 | split=attrs["split"], 99 | ) 100 | return dict(factory="VOCDataset", args=args) 101 | 102 | elif "coco" in name: 103 | coco_root = DatasetCatalog.DATA_DIR 104 | if 'COCO_ROOT' in os.environ: 105 | coco_root = os.environ['COCO_ROOT'] 106 | 107 | attrs = DatasetCatalog.DATASETS[name] 108 | args = dict( 109 | data_dir=os.path.join(coco_root, attrs["data_dir"]), 110 | ann_file=os.path.join(coco_root, attrs["ann_file"]), 111 | ) 112 | return dict(factory="COCODataset", args=args) 113 | 114 | raise RuntimeError("Dataset not available: {}".format(name)) 115 | -------------------------------------------------------------------------------- /ssd/ssd/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/__pycache__/build.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/__pycache__/build.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/build.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torch.utils.data.dataloader import default_collate 4 | 5 | from ssd.data import samplers 6 | from ssd.data.datasets import build_dataset 7 | from ssd.data.transforms import build_transforms, build_target_transform 8 | from ssd.structures.container import Container 9 | 10 | 11 | class BatchCollator: 12 | def __init__(self, is_train=True): 13 | self.is_train = is_train 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = default_collate(transposed_batch[0]) 18 | img_ids = default_collate(transposed_batch[2]) 19 | 20 | if self.is_train: 21 | list_targets = transposed_batch[1] 22 | targets = Container( 23 | {key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]} 24 | ) 25 | else: 26 | targets = None 27 | return images, targets, img_ids 28 | 29 | 30 | def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0): 31 | train_transform = build_transforms(cfg, is_train=is_train) 32 | target_transform = build_target_transform(cfg) if is_train else None 33 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 34 | print('数据集....') 35 | print(dataset_list) 36 | # 1. 首先是建立数据集dataset 37 | 38 | datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) 39 | 40 | shuffle = is_train or distributed 41 | 42 | data_loaders = [] 43 | 44 | for dataset in datasets: 45 | if distributed: 46 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 47 | elif shuffle: 48 | sampler = torch.utils.data.RandomSampler(dataset) 49 | else: 50 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 51 | 52 | batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE 53 | # 这里的batchsize是32,给力! 54 | batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False) 55 | if max_iter is not None: 56 | batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter) 57 | 58 | # 2. 然后建立的是数据加载器,这里指明了使用的CPU的线程和batch_sampler 59 | data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, 60 | pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train)) 61 | data_loaders.append(data_loader) 62 | 63 | if is_train: 64 | # during training, a single (possibly concatenated) data_loader is returned 65 | assert len(data_loaders) == 1 66 | return data_loaders[0] 67 | return data_loaders 68 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import ConcatDataset 2 | 3 | from ssd.config.path_catlog import DatasetCatalog 4 | from .voc import VOCDataset 5 | from .coco import COCODataset 6 | from .visdrone import VisDroneDataset 7 | 8 | _DATASETS = { 9 | 'VOCDataset': VOCDataset, 10 | 'COCODataset': COCODataset, 11 | 'VisDroneDataset': VisDroneDataset 12 | } 13 | 14 | 15 | def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True): 16 | # 直接按照工厂模型进行的build 17 | assert len(dataset_list) > 0 18 | datasets = [] 19 | for dataset_name in dataset_list: 20 | data = DatasetCatalog.get(dataset_name) 21 | args = data['args'] 22 | factory = _DATASETS[data['factory']] 23 | args['transform'] = transform 24 | args['target_transform'] = target_transform 25 | if factory == VOCDataset: 26 | args['keep_difficult'] = not is_train 27 | elif factory == COCODataset: 28 | args['remove_empty'] = is_train 29 | dataset = factory(**args) 30 | # print('参数为....') 31 | # print(**args) 32 | # 测试跑步不来 33 | datasets.append(dataset) 34 | # for testing, return a list of datasets 35 | if not is_train: 36 | return datasets 37 | dataset = datasets[0] 38 | if len(datasets) > 1: 39 | dataset = ConcatDataset(datasets) 40 | 41 | return [dataset] 42 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/__pycache__/coco.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/__pycache__/coco.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/__pycache__/visdrone.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/__pycache__/visdrone.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/__pycache__/voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/__pycache__/voc.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch.utils.data 3 | import numpy as np 4 | from PIL import Image 5 | 6 | from ssd.structures.container import Container 7 | 8 | 9 | class COCODataset(torch.utils.data.Dataset): 10 | class_names = ('__background__', 11 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 12 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 13 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 14 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 15 | 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 16 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 17 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 18 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 19 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 20 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 21 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 22 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 23 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 24 | 'refrigerator', 'book', 'clock', 'vase', 'scissors', 25 | 'teddy bear', 'hair drier', 'toothbrush') 26 | 27 | def __init__(self, data_dir, ann_file, transform=None, target_transform=None, remove_empty=False): 28 | from pycocotools.coco import COCO 29 | self.coco = COCO(ann_file) 30 | self.data_dir = data_dir 31 | self.transform = transform 32 | self.target_transform = target_transform 33 | self.remove_empty = remove_empty 34 | if self.remove_empty: 35 | # when training, images without annotations are removed. 36 | self.ids = list(self.coco.imgToAnns.keys()) 37 | else: 38 | # when testing, all images used. 39 | self.ids = list(self.coco.imgs.keys()) 40 | coco_categories = sorted(self.coco.getCatIds()) 41 | self.coco_id_to_contiguous_id = {coco_id: i + 1 for i, coco_id in enumerate(coco_categories)} 42 | self.contiguous_id_to_coco_id = {v: k for k, v in self.coco_id_to_contiguous_id.items()} 43 | 44 | def __getitem__(self, index): 45 | image_id = self.ids[index] 46 | boxes, labels = self._get_annotation(image_id) 47 | image = self._read_image(image_id) 48 | if self.transform: 49 | image, boxes, labels = self.transform(image, boxes, labels) 50 | if self.target_transform: 51 | boxes, labels = self.target_transform(boxes, labels) 52 | targets = Container( 53 | boxes=boxes, 54 | labels=labels, 55 | ) 56 | return image, targets, index 57 | 58 | def get_annotation(self, index): 59 | image_id = self.ids[index] 60 | return image_id, self._get_annotation(image_id) 61 | 62 | def __len__(self): 63 | return len(self.ids) 64 | 65 | def _get_annotation(self, image_id): 66 | ann_ids = self.coco.getAnnIds(imgIds=image_id) 67 | ann = self.coco.loadAnns(ann_ids) 68 | # filter crowd annotations 69 | ann = [obj for obj in ann if obj["iscrowd"] == 0] 70 | boxes = np.array([self._xywh2xyxy(obj["bbox"]) for obj in ann], np.float32).reshape((-1, 4)) 71 | labels = np.array([self.coco_id_to_contiguous_id[obj["category_id"]] for obj in ann], np.int64).reshape((-1,)) 72 | # remove invalid boxes 73 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 74 | boxes = boxes[keep] 75 | labels = labels[keep] 76 | return boxes, labels 77 | 78 | def _xywh2xyxy(self, box): 79 | x1, y1, w, h = box 80 | return [x1, y1, x1 + w, y1 + h] 81 | 82 | def get_img_info(self, index): 83 | image_id = self.ids[index] 84 | img_data = self.coco.imgs[image_id] 85 | return img_data 86 | 87 | def _read_image(self, image_id): 88 | file_name = self.coco.loadImgs(image_id)[0]['file_name'] 89 | image_file = os.path.join(self.data_dir, file_name) 90 | image = Image.open(image_file).convert("RGB") 91 | image = np.array(image) 92 | return image 93 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from ssd.data.datasets import VOCDataset, COCODataset, VisDroneDataset 2 | from .coco import coco_evaluation 3 | from .voc import voc_evaluation 4 | from .visdrone import visdrone_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_dir, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[(boxes, labels, scores)]): Each item in the list represents the 12 | prediction results for one image. And the index should match the dataset index. 13 | output_dir: output folder, to save evaluation files or results. 14 | Returns: 15 | evaluation result 16 | """ 17 | args = dict( 18 | dataset=dataset, predictions=predictions, output_dir=output_dir, **kwargs, 19 | ) 20 | if isinstance(dataset, VOCDataset): 21 | return voc_evaluation(**args) 22 | elif isinstance(dataset, COCODataset): 23 | return coco_evaluation(**args) 24 | elif isinstance(dataset, VisDroneDataset): 25 | return visdrone_evaluation(**args) 26 | else: 27 | raise NotImplementedError 28 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from datetime import datetime 5 | 6 | 7 | def coco_evaluation(dataset, predictions, output_dir, iteration=None): 8 | coco_results = [] 9 | for i, prediction in enumerate(predictions): 10 | img_info = dataset.get_img_info(i) 11 | prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() 12 | boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] 13 | 14 | image_id, annotation = dataset.get_annotation(i) 15 | class_mapper = dataset.contiguous_id_to_coco_id 16 | if labels.shape[0] == 0: 17 | continue 18 | 19 | boxes = boxes.tolist() 20 | labels = labels.tolist() 21 | scores = scores.tolist() 22 | coco_results.extend( 23 | [ 24 | { 25 | "image_id": image_id, 26 | "category_id": class_mapper[labels[k]], 27 | "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]], # to xywh format 28 | "score": scores[k], 29 | } 30 | for k, box in enumerate(boxes) 31 | ] 32 | ) 33 | iou_type = 'bbox' 34 | json_result_file = os.path.join(output_dir, iou_type + ".json") 35 | logger = logging.getLogger("SSD.inference") 36 | logger.info('Writing results to {}...'.format(json_result_file)) 37 | with open(json_result_file, "w") as f: 38 | json.dump(coco_results, f) 39 | from pycocotools.cocoeval import COCOeval 40 | coco_gt = dataset.coco 41 | coco_dt = coco_gt.loadRes(json_result_file) 42 | coco_eval = COCOeval(coco_gt, coco_dt, iou_type) 43 | coco_eval.evaluate() 44 | coco_eval.accumulate() 45 | coco_eval.summarize() 46 | 47 | result_strings = [] 48 | keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"] 49 | metrics = {} 50 | for i, key in enumerate(keys): 51 | metrics[key] = coco_eval.stats[i] 52 | logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3))) 53 | result_strings.append('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3))) 54 | 55 | if iteration is not None: 56 | result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration)) 57 | else: 58 | result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) 59 | with open(result_path, "w") as f: 60 | f.write('\n'.join(result_strings)) 61 | 62 | return dict(metrics=metrics) 63 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/coco/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/coco/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/visdrone/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime 4 | 5 | import numpy as np 6 | 7 | from .eval_detection_voc import eval_detection_voc 8 | 9 | 10 | def visdrone_evaluation(dataset, predictions, output_dir, iteration=None): 11 | class_names = dataset.class_names 12 | 13 | pred_boxes_list = [] 14 | pred_labels_list = [] 15 | pred_scores_list = [] 16 | gt_boxes_list = [] 17 | gt_labels_list = [] 18 | gt_difficults = [] 19 | 20 | for i in range(len(dataset)): 21 | image_id, annotation = dataset.get_annotation(i) 22 | gt_boxes, gt_labels, is_difficult = annotation 23 | gt_boxes_list.append(gt_boxes) 24 | gt_labels_list.append(gt_labels) 25 | gt_difficults.append(is_difficult.astype(np.bool)) 26 | 27 | img_info = dataset.get_img_info(i) 28 | prediction = predictions[i] 29 | prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() 30 | boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] 31 | 32 | pred_boxes_list.append(boxes) 33 | pred_labels_list.append(labels) 34 | pred_scores_list.append(scores) 35 | result = eval_detection_voc(pred_bboxes=pred_boxes_list, 36 | pred_labels=pred_labels_list, 37 | pred_scores=pred_scores_list, 38 | gt_bboxes=gt_boxes_list, 39 | gt_labels=gt_labels_list, 40 | gt_difficults=gt_difficults, 41 | iou_thresh=0.5, 42 | use_07_metric=True) 43 | logger = logging.getLogger("SSD.inference") 44 | result_str = "mAP: {:.4f}\n".format(result["map"]) 45 | metrics = {'mAP': result["map"]} 46 | for i, ap in enumerate(result["ap"]): 47 | if i == 0: # skip background 48 | continue 49 | metrics[class_names[i]] = ap 50 | result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap) 51 | logger.info(result_str) 52 | 53 | if iteration is not None: 54 | result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration)) 55 | else: 56 | result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) 57 | with open(result_path, "w") as f: 58 | f.write(result_str) 59 | 60 | return dict(metrics=metrics) 61 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/visdrone/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/visdrone/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/visdrone/__pycache__/eval_detection_voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/visdrone/__pycache__/eval_detection_voc.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime 4 | 5 | import numpy as np 6 | 7 | from .eval_detection_voc import eval_detection_voc 8 | 9 | 10 | def voc_evaluation(dataset, predictions, output_dir, iteration=None): 11 | class_names = dataset.class_names 12 | 13 | pred_boxes_list = [] 14 | pred_labels_list = [] 15 | pred_scores_list = [] 16 | gt_boxes_list = [] 17 | gt_labels_list = [] 18 | gt_difficults = [] 19 | 20 | for i in range(len(dataset)): 21 | image_id, annotation = dataset.get_annotation(i) 22 | gt_boxes, gt_labels, is_difficult = annotation 23 | gt_boxes_list.append(gt_boxes) 24 | gt_labels_list.append(gt_labels) 25 | gt_difficults.append(is_difficult.astype(np.bool)) 26 | 27 | img_info = dataset.get_img_info(i) 28 | prediction = predictions[i] 29 | prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() 30 | boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] 31 | 32 | pred_boxes_list.append(boxes) 33 | pred_labels_list.append(labels) 34 | pred_scores_list.append(scores) 35 | result = eval_detection_voc(pred_bboxes=pred_boxes_list, 36 | pred_labels=pred_labels_list, 37 | pred_scores=pred_scores_list, 38 | gt_bboxes=gt_boxes_list, 39 | gt_labels=gt_labels_list, 40 | gt_difficults=gt_difficults, 41 | iou_thresh=0.5, 42 | use_07_metric=True) 43 | logger = logging.getLogger("SSD.inference") 44 | result_str = "mAP: {:.4f}\n".format(result["map"]) 45 | metrics = {'mAP': result["map"]} 46 | for i, ap in enumerate(result["ap"]): 47 | if i == 0: # skip background 48 | continue 49 | metrics[class_names[i]] = ap 50 | result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap) 51 | logger.info(result_str) 52 | 53 | if iteration is not None: 54 | result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration)) 55 | else: 56 | result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) 57 | with open(result_path, "w") as f: 58 | f.write(result_str) 59 | 60 | return dict(metrics=metrics) 61 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/voc/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/voc/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/evaluation/voc/__pycache__/eval_detection_voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/datasets/evaluation/voc/__pycache__/eval_detection_voc.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/visdrone.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch.utils.data 3 | import numpy as np 4 | import xml.etree.ElementTree as ET 5 | from PIL import Image 6 | 7 | from ssd.structures.container import Container 8 | 9 | 10 | class VisDroneDataset(torch.utils.data.Dataset): 11 | # 问题是datasets是在哪里进行设置的,应该会有一个参数,这就很麻烦 12 | # 也就是11个类别再加上background 13 | # 这边基本完事了,把文件加载进来就可以,修改一下类名, 然后看下主启动函数那边 14 | class_names = ('ignored regions', 'pedestrian', 'people', 'bicycle', 'car', 'van', 15 | 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor', 'others') 16 | 17 | def __init__(self, data_dir, split, transform=None, target_transform=None, keep_difficult=False): 18 | """Dataset for VOC data. 19 | Args: 20 | data_dir: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 21 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 22 | 我一开始是把occlusion为1的地方对应到voc中的diffcult,但是训练过程会出问题,然后我就默认diffcult为0了,应该是可以跑起来。还有一个问题,有个进程训练过程中就占用两张显卡,而且都占一半,这就很尴尬。 23 | """ 24 | self.data_dir = data_dir 25 | # self.data_dir = "/home/chenmingsong/coding_data/coco/VOC_ROOT" 26 | self.split = split 27 | self.transform = transform 28 | self.target_transform = target_transform 29 | image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split) 30 | # 涉及到的路径信息 31 | print('训练涉及到的路径信息') 32 | print('image_sets_file:{}'.format(image_sets_file)) 33 | self.ids = VisDroneDataset._read_image_ids(image_sets_file) 34 | self.keep_difficult = keep_difficult 35 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 36 | 37 | def __getitem__(self, index): 38 | image_id = self.ids[index] 39 | boxes, labels, is_difficult = self._get_annotation(image_id) 40 | if not self.keep_difficult: 41 | boxes = boxes[is_difficult == 0] 42 | labels = labels[is_difficult == 0] 43 | image = self._read_image(image_id) 44 | if self.transform: 45 | image, boxes, labels = self.transform(image, boxes, labels) 46 | if self.target_transform: 47 | boxes, labels = self.target_transform(boxes, labels) 48 | targets = Container( 49 | boxes=boxes, 50 | labels=labels, 51 | ) 52 | return image, targets, index 53 | 54 | def get_annotation(self, index): 55 | image_id = self.ids[index] 56 | return image_id, self._get_annotation(image_id) 57 | 58 | def __len__(self): 59 | return len(self.ids) 60 | 61 | @staticmethod 62 | def _read_image_ids(image_sets_file): 63 | ids = [] 64 | with open(image_sets_file) as f: 65 | for line in f: 66 | ids.append(line.rstrip()) 67 | return ids 68 | 69 | def _get_annotation(self, image_id): 70 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id) 71 | # print('annotation_file1:{}'.format(annotation_file)) 72 | objects = ET.parse(annotation_file).findall("object") 73 | boxes = [] 74 | labels = [] 75 | is_difficult = [] 76 | for obj in objects: 77 | class_name = obj.find('name').text.lower().strip() 78 | bbox = obj.find('bndbox') 79 | # VOC dataset format follows Matlab, in which indexes start from 0 80 | x1 = float(bbox.find('xmin').text) - 1 81 | y1 = float(bbox.find('ymin').text) - 1 82 | x2 = float(bbox.find('xmax').text) - 1 83 | y2 = float(bbox.find('ymax').text) - 1 84 | boxes.append([x1, y1, x2, y2]) 85 | labels.append(self.class_dict[class_name]) 86 | # is_difficult_str = obj.find('difficult').text 87 | is_difficult_str = '0' 88 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 89 | 90 | return (np.array(boxes, dtype=np.float32), 91 | np.array(labels, dtype=np.int64), 92 | np.array(is_difficult, dtype=np.uint8)) 93 | 94 | def get_img_info(self, index): 95 | img_id = self.ids[index] 96 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id) 97 | # print('annotation_file2:{}'.format(annotation_file)) 98 | anno = ET.parse(annotation_file).getroot() 99 | size = anno.find("size") 100 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 101 | return {"height": im_info[0], "width": im_info[1]} 102 | 103 | def _read_image(self, image_id): 104 | image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) 105 | # print('image_file:{}'.format(image_file)) 106 | image = Image.open(image_file).convert("RGB") 107 | image = np.array(image) 108 | return image 109 | -------------------------------------------------------------------------------- /ssd/ssd/data/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch.utils.data 3 | import numpy as np 4 | import xml.etree.ElementTree as ET 5 | from PIL import Image 6 | 7 | from ssd.structures.container import Container 8 | 9 | 10 | class VOCDataset(torch.utils.data.Dataset): 11 | # 问题是datasets是在哪里进行设置的,应该会有一个参数,这就很麻烦 12 | # 也就是11个类别再加上background 13 | class_names = ('__background__', 14 | 'aeroplane', 'bicycle', 'bird', 'boat', 15 | 'bottle', 'bus', 'car', 'cat', 'chair', 16 | 'cow', 'diningtable', 'dog', 'horse', 17 | 'motorbike', 'person', 'pottedplant', 18 | 'sheep', 'sofa', 'train', 'tvmonitor') 19 | 20 | def __init__(self, data_dir, split, transform=None, target_transform=None, keep_difficult=False): 21 | """Dataset for VOC data. 22 | Args: 23 | data_dir: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 24 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 25 | """ 26 | self.data_dir = data_dir 27 | # print("*******") 28 | # print(data_dir) 29 | # self.data_dir = "/home/chenmingsong/coding_data/coco/VOC_ROOT" 30 | self.split = split 31 | self.transform = transform 32 | self.target_transform = target_transform 33 | image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split) 34 | # 涉及到的路径信息 35 | print('训练涉及到的路径信息') 36 | print('image_sets_file:{}'.format(image_sets_file)) 37 | self.ids = VOCDataset._read_image_ids(image_sets_file) 38 | self.keep_difficult = keep_difficult 39 | 40 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 41 | 42 | def __getitem__(self, index): 43 | image_id = self.ids[index] 44 | boxes, labels, is_difficult = self._get_annotation(image_id) 45 | if not self.keep_difficult: 46 | boxes = boxes[is_difficult == 0] 47 | labels = labels[is_difficult == 0] 48 | image = self._read_image(image_id) 49 | if self.transform: 50 | image, boxes, labels = self.transform(image, boxes, labels) 51 | if self.target_transform: 52 | boxes, labels = self.target_transform(boxes, labels) 53 | targets = Container( 54 | boxes=boxes, 55 | labels=labels, 56 | ) 57 | return image, targets, index 58 | 59 | def get_annotation(self, index): 60 | image_id = self.ids[index] 61 | return image_id, self._get_annotation(image_id) 62 | 63 | def __len__(self): 64 | return len(self.ids) 65 | 66 | @staticmethod 67 | def _read_image_ids(image_sets_file): 68 | ids = [] 69 | with open(image_sets_file) as f: 70 | for line in f: 71 | ids.append(line.rstrip()) 72 | return ids 73 | 74 | def _get_annotation(self, image_id): 75 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id) 76 | # print('annotation_file1:{}'.format(annotation_file)) 77 | objects = ET.parse(annotation_file).findall("object") 78 | boxes = [] 79 | labels = [] 80 | is_difficult = [] 81 | for obj in objects: 82 | class_name = obj.find('name').text.lower().strip() 83 | bbox = obj.find('bndbox') 84 | # VOC dataset format follows Matlab, in which indexes start from 0 85 | x1 = float(bbox.find('xmin').text) - 1 86 | y1 = float(bbox.find('ymin').text) - 1 87 | x2 = float(bbox.find('xmax').text) - 1 88 | y2 = float(bbox.find('ymax').text) - 1 89 | boxes.append([x1, y1, x2, y2]) 90 | labels.append(self.class_dict[class_name]) 91 | is_difficult_str = obj.find('difficult').text 92 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 93 | 94 | return (np.array(boxes, dtype=np.float32), 95 | np.array(labels, dtype=np.int64), 96 | np.array(is_difficult, dtype=np.uint8)) 97 | 98 | def get_img_info(self, index): 99 | img_id = self.ids[index] 100 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id) 101 | # print('annotation_file2:{}'.format(annotation_file)) 102 | anno = ET.parse(annotation_file).getroot() 103 | size = anno.find("size") 104 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 105 | return {"height": im_info[0], "width": im_info[1]} 106 | 107 | def _read_image(self, image_id): 108 | image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) 109 | # print('image_file:{}'.format(image_file)) 110 | image = Image.open(image_file).convert("RGB") 111 | image = np.array(image) 112 | return image 113 | -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 2 | from .distributed import DistributedSampler 3 | 4 | __all__ = ['IterationBasedBatchSampler', 'DistributedSampler'] 5 | -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/samplers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/__pycache__/distributed.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/samplers/__pycache__/distributed.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/__pycache__/iteration_based_batch_sampler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/samplers/__pycache__/iteration_based_batch_sampler.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset: offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /ssd/ssd/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.sampler import BatchSampler 2 | 3 | 4 | class IterationBasedBatchSampler(BatchSampler): 5 | """ 6 | Wraps a BatchSampler, re-sampling from it until 7 | a specified number of iterations have been sampled 8 | """ 9 | 10 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 11 | self.batch_sampler = batch_sampler 12 | self.num_iterations = num_iterations 13 | self.start_iter = start_iter 14 | 15 | def __iter__(self): 16 | iteration = self.start_iter 17 | while iteration <= self.num_iterations: 18 | # if the underlying sampler has a set_epoch method, like 19 | # DistributedSampler, used for making each process see 20 | # a different split of the dataset, then set it 21 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 22 | self.batch_sampler.sampler.set_epoch(iteration) 23 | for batch in self.batch_sampler: 24 | iteration += 1 25 | if iteration > self.num_iterations: 26 | break 27 | yield batch 28 | 29 | def __len__(self): 30 | return self.num_iterations 31 | -------------------------------------------------------------------------------- /ssd/ssd/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from ssd.modeling.anchors.prior_box import PriorBox 2 | from .target_transform import SSDTargetTransform 3 | from .transforms import * 4 | 5 | 6 | def build_transforms(cfg, is_train=True): 7 | if is_train: 8 | transform = [ 9 | ConvertFromInts(), 10 | PhotometricDistort(), 11 | Expand(cfg.INPUT.PIXEL_MEAN), 12 | RandomSampleCrop(), 13 | RandomMirror(), 14 | ToPercentCoords(), 15 | Resize(cfg.INPUT.IMAGE_SIZE), 16 | SubtractMeans(cfg.INPUT.PIXEL_MEAN), 17 | ToTensor(), 18 | ] 19 | else: 20 | transform = [ 21 | Resize(cfg.INPUT.IMAGE_SIZE), 22 | SubtractMeans(cfg.INPUT.PIXEL_MEAN), 23 | ToTensor() 24 | ] 25 | transform = Compose(transform) 26 | return transform 27 | 28 | 29 | def build_target_transform(cfg): 30 | transform = SSDTargetTransform(PriorBox(cfg)(), 31 | cfg.MODEL.CENTER_VARIANCE, 32 | cfg.MODEL.SIZE_VARIANCE, 33 | cfg.MODEL.THRESHOLD) 34 | return transform 35 | -------------------------------------------------------------------------------- /ssd/ssd/data/transforms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/transforms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/transforms/__pycache__/target_transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/transforms/__pycache__/target_transform.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/data/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/data/transforms/target_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from ssd.utils import box_utils 5 | 6 | 7 | class SSDTargetTransform: 8 | def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold): 9 | self.center_form_priors = center_form_priors 10 | self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors) 11 | self.center_variance = center_variance 12 | self.size_variance = size_variance 13 | self.iou_threshold = iou_threshold 14 | 15 | def __call__(self, gt_boxes, gt_labels): 16 | if type(gt_boxes) is np.ndarray: 17 | gt_boxes = torch.from_numpy(gt_boxes) 18 | if type(gt_labels) is np.ndarray: 19 | gt_labels = torch.from_numpy(gt_labels) 20 | boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels, 21 | self.corner_form_priors, self.iou_threshold) 22 | boxes = box_utils.corner_form_to_center_form(boxes) 23 | locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance) 24 | 25 | return locations, labels 26 | -------------------------------------------------------------------------------- /ssd/ssd/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/engine/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/engine/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/engine/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/engine/__pycache__/inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/engine/__pycache__/inference.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/engine/__pycache__/trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/engine/__pycache__/trainer.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/engine/inference.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import torch 5 | import torch.utils.data 6 | from tqdm import tqdm 7 | 8 | from ssd.data.build import make_data_loader 9 | from ssd.data.datasets.evaluation import evaluate 10 | 11 | from ssd.utils import dist_util, mkdir 12 | from ssd.utils.dist_util import synchronize, is_main_process 13 | 14 | 15 | def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): 16 | all_predictions = dist_util.all_gather(predictions_per_gpu) 17 | if not dist_util.is_main_process(): 18 | return 19 | # merge the list of dicts 20 | predictions = {} 21 | for p in all_predictions: 22 | predictions.update(p) 23 | # convert a dict where the key is the index in a list 24 | image_ids = list(sorted(predictions.keys())) 25 | if len(image_ids) != image_ids[-1] + 1: 26 | logger = logging.getLogger("SSD.inference") 27 | logger.warning( 28 | "Number of images that were gathered from multiple processes is not " 29 | "a contiguous set. Some images might be missing from the evaluation" 30 | ) 31 | 32 | # convert to a list 33 | predictions = [predictions[i] for i in image_ids] 34 | return predictions 35 | 36 | 37 | def compute_on_dataset(model, data_loader, device): 38 | results_dict = {} 39 | for batch in tqdm(data_loader): 40 | images, targets, image_ids = batch 41 | cpu_device = torch.device("cpu") 42 | with torch.no_grad(): 43 | outputs = model(images.to(device)) 44 | 45 | outputs = [o.to(cpu_device) for o in outputs] 46 | results_dict.update( 47 | {img_id: result for img_id, result in zip(image_ids, outputs)} 48 | ) 49 | return results_dict 50 | 51 | 52 | def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs): 53 | dataset = data_loader.dataset 54 | logger = logging.getLogger("SSD.inference") 55 | logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset))) 56 | predictions_path = os.path.join(output_folder, 'predictions.pth') 57 | if use_cached and os.path.exists(predictions_path): 58 | predictions = torch.load(predictions_path, map_location='cpu') 59 | else: 60 | predictions = compute_on_dataset(model, data_loader, device) 61 | synchronize() 62 | predictions = _accumulate_predictions_from_multiple_gpus(predictions) 63 | if not is_main_process(): 64 | return 65 | if output_folder: 66 | torch.save(predictions, predictions_path) 67 | return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs) 68 | 69 | 70 | @torch.no_grad() 71 | def do_evaluation(cfg, model, distributed, **kwargs): 72 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 73 | model = model.module 74 | model.eval() 75 | device = torch.device(cfg.MODEL.DEVICE) 76 | data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed) 77 | eval_results = [] 78 | for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val): 79 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 80 | if not os.path.exists(output_folder): 81 | mkdir(output_folder) 82 | eval_result = inference(model, data_loader, dataset_name, device, output_folder, **kwargs) 83 | eval_results.append(eval_result) 84 | return eval_results 85 | -------------------------------------------------------------------------------- /ssd/ssd/layers/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | from .separable_conv import SeparableConv2d 5 | 6 | __all__ = ['L2Norm', 'SeparableConv2d'] 7 | 8 | 9 | class L2Norm(nn.Module): 10 | def __init__(self, n_channels, scale): 11 | super(L2Norm, self).__init__() 12 | self.n_channels = n_channels 13 | self.gamma = scale or None 14 | self.eps = 1e-10 15 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 16 | self.reset_parameters() 17 | 18 | def reset_parameters(self): 19 | init.constant_(self.weight, self.gamma) 20 | 21 | def forward(self, x): 22 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps 23 | x = torch.div(x, norm) 24 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 25 | return out 26 | -------------------------------------------------------------------------------- /ssd/ssd/layers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/layers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/layers/__pycache__/separable_conv.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/layers/__pycache__/separable_conv.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/layers/separable_conv.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class SeparableConv2d(nn.Module): 5 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): 6 | super().__init__() 7 | ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 8 | self.conv = nn.Sequential( 9 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 10 | groups=in_channels, stride=stride, padding=padding), 11 | nn.BatchNorm2d(in_channels), 12 | ReLU(), 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 14 | ) 15 | 16 | def forward(self, x): 17 | return self.conv(x) 18 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/modeling/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/__pycache__/registry.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/__pycache__/registry.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/anchors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/anchors/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/modeling/anchors/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/anchors/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/anchors/__pycache__/prior_box.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/anchors/__pycache__/prior_box.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/anchors/prior_box.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | import torch 4 | from math import sqrt 5 | 6 | 7 | class PriorBox: 8 | def __init__(self, cfg): 9 | self.image_size = cfg.INPUT.IMAGE_SIZE 10 | prior_config = cfg.MODEL.PRIORS 11 | self.feature_maps = prior_config.FEATURE_MAPS 12 | self.min_sizes = prior_config.MIN_SIZES 13 | self.max_sizes = prior_config.MAX_SIZES 14 | self.strides = prior_config.STRIDES 15 | self.aspect_ratios = prior_config.ASPECT_RATIOS 16 | self.clip = prior_config.CLIP 17 | 18 | def __call__(self): 19 | """Generate SSD Prior Boxes. 20 | It returns the center, height and width of the priors. The values are relative to the image size 21 | Returns: 22 | priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values 23 | are relative to the image size. 24 | """ 25 | priors = [] 26 | for k, f in enumerate(self.feature_maps): 27 | scale = self.image_size / self.strides[k] 28 | for i, j in product(range(f), repeat=2): 29 | # unit center x,y 30 | cx = (j + 0.5) / scale 31 | cy = (i + 0.5) / scale 32 | 33 | # small sized square box 34 | size = self.min_sizes[k] 35 | h = w = size / self.image_size 36 | priors.append([cx, cy, w, h]) 37 | 38 | # big sized square box 39 | size = sqrt(self.min_sizes[k] * self.max_sizes[k]) 40 | h = w = size / self.image_size 41 | priors.append([cx, cy, w, h]) 42 | 43 | # change h/w ratio of the small sized box 44 | size = self.min_sizes[k] 45 | h = w = size / self.image_size 46 | for ratio in self.aspect_ratios[k]: 47 | ratio = sqrt(ratio) 48 | priors.append([cx, cy, w * ratio, h / ratio]) 49 | priors.append([cx, cy, w / ratio, h * ratio]) 50 | 51 | priors = torch.tensor(priors) 52 | if self.clip: 53 | priors.clamp_(max=1, min=0) 54 | return priors 55 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from ssd.modeling import registry 2 | from .vgg import VGG 3 | from .mobilenet import MobileNetV2 4 | from .efficient_net import EfficientNet 5 | 6 | __all__ = ['build_backbone', 'VGG', 'MobileNetV2', 'EfficientNet'] 7 | 8 | 9 | def build_backbone(cfg): 10 | return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg, cfg.MODEL.BACKBONE.PRETRAINED) 11 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/__pycache__/mobilenet.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/__pycache__/vgg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/__pycache__/vgg.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/efficient_net/__init__.py: -------------------------------------------------------------------------------- 1 | from ssd.modeling import registry 2 | from .efficient_net import EfficientNet 3 | 4 | __all__ = ['efficient_net_b3', 'EfficientNet'] 5 | 6 | 7 | @registry.BACKBONES.register('efficient_net-b3') 8 | def efficient_net_b3(cfg, pretrained=True): 9 | if pretrained: 10 | model = EfficientNet.from_pretrained('efficientnet-b3') 11 | else: 12 | model = EfficientNet.from_name('efficientnet-b3') 13 | 14 | return model 15 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/efficient_net/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/efficient_net/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/efficient_net/__pycache__/efficient_net.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/efficient_net/__pycache__/efficient_net.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/efficient_net/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/backbone/efficient_net/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/mobilenet.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ssd.modeling import registry 4 | from ssd.utils.model_zoo import load_state_dict_from_url 5 | 6 | model_urls = { 7 | 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', 8 | } 9 | 10 | 11 | class ConvBNReLU(nn.Sequential): 12 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): 13 | padding = (kernel_size - 1) // 2 14 | super(ConvBNReLU, self).__init__( 15 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), 16 | nn.BatchNorm2d(out_planes), 17 | nn.ReLU6(inplace=True) 18 | ) 19 | 20 | 21 | class InvertedResidual(nn.Module): 22 | def __init__(self, inp, oup, stride, expand_ratio): 23 | super(InvertedResidual, self).__init__() 24 | self.stride = stride 25 | assert stride in [1, 2] 26 | 27 | hidden_dim = int(round(inp * expand_ratio)) 28 | self.use_res_connect = self.stride == 1 and inp == oup 29 | 30 | layers = [] 31 | if expand_ratio != 1: 32 | # pw 33 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) 34 | layers.extend([ 35 | # dw 36 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), 37 | # pw-linear 38 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 39 | nn.BatchNorm2d(oup), 40 | ]) 41 | self.conv = nn.Sequential(*layers) 42 | 43 | def forward(self, x): 44 | if self.use_res_connect: 45 | return x + self.conv(x) 46 | else: 47 | return self.conv(x) 48 | 49 | 50 | class MobileNetV2(nn.Module): 51 | def __init__(self, width_mult=1.0, inverted_residual_setting=None): 52 | super(MobileNetV2, self).__init__() 53 | block = InvertedResidual 54 | input_channel = 32 55 | last_channel = 1280 56 | 57 | if inverted_residual_setting is None: 58 | inverted_residual_setting = [ 59 | # t, c, n, s 60 | [1, 16, 1, 1], 61 | [6, 24, 2, 2], 62 | [6, 32, 3, 2], 63 | [6, 64, 4, 2], 64 | [6, 96, 3, 1], 65 | [6, 160, 3, 2], 66 | [6, 320, 1, 1], 67 | ] 68 | 69 | # only check the first element, assuming user knows t,c,n,s are required 70 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 71 | raise ValueError("inverted_residual_setting should be non-empty " 72 | "or a 4-element list, got {}".format(inverted_residual_setting)) 73 | 74 | # building first layer 75 | input_channel = int(input_channel * width_mult) 76 | self.last_channel = int(last_channel * max(1.0, width_mult)) 77 | features = [ConvBNReLU(3, input_channel, stride=2)] 78 | # building inverted residual blocks 79 | for t, c, n, s in inverted_residual_setting: 80 | output_channel = int(c * width_mult) 81 | for i in range(n): 82 | stride = s if i == 0 else 1 83 | features.append(block(input_channel, output_channel, stride, expand_ratio=t)) 84 | input_channel = output_channel 85 | # building last several layers 86 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) 87 | # make it nn.Sequential 88 | self.features = nn.Sequential(*features) 89 | self.extras = nn.ModuleList([ 90 | InvertedResidual(1280, 512, 2, 0.2), 91 | InvertedResidual(512, 256, 2, 0.25), 92 | InvertedResidual(256, 256, 2, 0.5), 93 | InvertedResidual(256, 64, 2, 0.25) 94 | ]) 95 | 96 | self.reset_parameters() 97 | 98 | def reset_parameters(self): 99 | # weight initialization 100 | for m in self.modules(): 101 | if isinstance(m, nn.Conv2d): 102 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 103 | if m.bias is not None: 104 | nn.init.zeros_(m.bias) 105 | elif isinstance(m, nn.BatchNorm2d): 106 | nn.init.ones_(m.weight) 107 | nn.init.zeros_(m.bias) 108 | elif isinstance(m, nn.Linear): 109 | nn.init.normal_(m.weight, 0, 0.01) 110 | nn.init.zeros_(m.bias) 111 | 112 | def forward(self, x): 113 | features = [] 114 | for i in range(14): 115 | x = self.features[i](x) 116 | features.append(x) 117 | 118 | for i in range(14, len(self.features)): 119 | x = self.features[i](x) 120 | features.append(x) 121 | 122 | for i in range(len(self.extras)): 123 | x = self.extras[i](x) 124 | features.append(x) 125 | 126 | return tuple(features) 127 | 128 | 129 | @registry.BACKBONES.register('mobilenet_v2') 130 | def mobilenet_v2(cfg, pretrained=True): 131 | model = MobileNetV2() 132 | if pretrained: 133 | model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v2']), strict=False) 134 | return model 135 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/backbone/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ssd.layers import L2Norm 5 | from ssd.modeling import registry 6 | from ssd.utils.model_zoo import load_state_dict_from_url 7 | 8 | model_urls = { 9 | 'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth', 10 | } 11 | 12 | 13 | # borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py 14 | def add_vgg(cfg, batch_norm=False): 15 | layers = [] 16 | in_channels = 3 17 | for v in cfg: 18 | if v == 'M': 19 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 20 | elif v == 'C': 21 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 22 | else: 23 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 24 | if batch_norm: 25 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 26 | else: 27 | layers += [conv2d, nn.ReLU(inplace=True)] 28 | in_channels = v 29 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 30 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) 31 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 32 | layers += [pool5, conv6, 33 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] 34 | return layers 35 | 36 | 37 | def add_extras(cfg, i, size=300): 38 | # Extra layers added to VGG for feature scaling 39 | layers = [] 40 | in_channels = i 41 | flag = False 42 | for k, v in enumerate(cfg): 43 | if in_channels != 'S': 44 | if v == 'S': 45 | layers += [nn.Conv2d(in_channels, cfg[k + 1], kernel_size=(1, 3)[flag], stride=2, padding=1)] 46 | else: 47 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] 48 | flag = not flag 49 | in_channels = v 50 | if size == 512: 51 | layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1)) 52 | layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1)) 53 | return layers 54 | 55 | 56 | vgg_base = { 57 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 58 | 512, 512, 512], 59 | '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 60 | 512, 512, 512], 61 | } 62 | extras_base = { 63 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 64 | '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256], 65 | } 66 | 67 | 68 | class VGG(nn.Module): 69 | def __init__(self, cfg): 70 | super().__init__() 71 | size = cfg.INPUT.IMAGE_SIZE 72 | vgg_config = vgg_base[str(size)] 73 | extras_config = extras_base[str(size)] 74 | 75 | self.vgg = nn.ModuleList(add_vgg(vgg_config)) 76 | self.extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size)) 77 | self.l2_norm = L2Norm(512, scale=20) 78 | self.reset_parameters() 79 | 80 | def reset_parameters(self): 81 | for m in self.extras.modules(): 82 | if isinstance(m, nn.Conv2d): 83 | nn.init.xavier_uniform_(m.weight) 84 | nn.init.zeros_(m.bias) 85 | 86 | def init_from_pretrain(self, state_dict): 87 | self.vgg.load_state_dict(state_dict) 88 | 89 | def forward(self, x): 90 | features = [] 91 | for i in range(23): 92 | x = self.vgg[i](x) 93 | s = self.l2_norm(x) # Conv4_3 L2 normalization 94 | features.append(s) 95 | 96 | # apply vgg up to fc7 97 | for i in range(23, len(self.vgg)): 98 | x = self.vgg[i](x) 99 | features.append(x) 100 | 101 | for k, v in enumerate(self.extras): 102 | x = F.relu(v(x), inplace=True) 103 | if k % 2 == 1: 104 | features.append(x) 105 | 106 | return tuple(features) 107 | 108 | 109 | @registry.BACKBONES.register('vgg') 110 | def vgg(cfg, pretrained=True): 111 | model = VGG(cfg) 112 | if pretrained: 113 | model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg'])) 114 | return model 115 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__init__.py: -------------------------------------------------------------------------------- 1 | from ssd.modeling import registry 2 | from .box_head import SSDBoxHead 3 | 4 | __all__ = ['build_box_head', 'SSDBoxHead'] 5 | 6 | 7 | def build_box_head(cfg): 8 | return registry.BOX_HEADS[cfg.MODEL.BOX_HEAD.NAME](cfg) 9 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/box_head/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__pycache__/box_head.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/box_head/__pycache__/box_head.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__pycache__/box_predictor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/box_head/__pycache__/box_predictor.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__pycache__/inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/box_head/__pycache__/inference.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/box_head/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch.nn.functional as F 3 | 4 | from ssd.modeling import registry 5 | from ssd.modeling.anchors.prior_box import PriorBox 6 | from ssd.modeling.box_head.box_predictor import make_box_predictor 7 | from ssd.utils import box_utils 8 | from .inference import PostProcessor 9 | from .loss import MultiBoxLoss 10 | 11 | 12 | @registry.BOX_HEADS.register('SSDBoxHead') 13 | class SSDBoxHead(nn.Module): 14 | def __init__(self, cfg): 15 | super().__init__() 16 | self.cfg = cfg 17 | self.predictor = make_box_predictor(cfg) 18 | self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) 19 | self.post_processor = PostProcessor(cfg) 20 | self.priors = None 21 | 22 | def forward(self, features, targets=None): 23 | cls_logits, bbox_pred = self.predictor(features) 24 | if self.training: 25 | return self._forward_train(cls_logits, bbox_pred, targets) 26 | else: 27 | return self._forward_test(cls_logits, bbox_pred) 28 | 29 | def _forward_train(self, cls_logits, bbox_pred, targets): 30 | gt_boxes, gt_labels = targets['boxes'], targets['labels'] 31 | reg_loss, cls_loss = self.loss_evaluator(cls_logits, bbox_pred, gt_labels, gt_boxes) 32 | loss_dict = dict( 33 | reg_loss=reg_loss, 34 | cls_loss=cls_loss, 35 | ) 36 | detections = (cls_logits, bbox_pred) 37 | return detections, loss_dict 38 | 39 | def _forward_test(self, cls_logits, bbox_pred): 40 | if self.priors is None: 41 | self.priors = PriorBox(self.cfg)().to(bbox_pred.device) 42 | scores = F.softmax(cls_logits, dim=2) 43 | boxes = box_utils.convert_locations_to_boxes( 44 | bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE 45 | ) 46 | boxes = box_utils.center_form_to_corner_form(boxes) 47 | detections = (scores, boxes) 48 | detections = self.post_processor(detections) 49 | return detections, {} 50 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/box_predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from ssd.layers import SeparableConv2d 5 | from ssd.modeling import registry 6 | 7 | 8 | class BoxPredictor(nn.Module): 9 | def __init__(self, cfg): 10 | super().__init__() 11 | self.cfg = cfg 12 | self.cls_headers = nn.ModuleList() 13 | self.reg_headers = nn.ModuleList() 14 | for level, (boxes_per_location, out_channels) in enumerate(zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS)): 15 | self.cls_headers.append(self.cls_block(level, out_channels, boxes_per_location)) 16 | self.reg_headers.append(self.reg_block(level, out_channels, boxes_per_location)) 17 | self.reset_parameters() 18 | 19 | def cls_block(self, level, out_channels, boxes_per_location): 20 | raise NotImplementedError 21 | 22 | def reg_block(self, level, out_channels, boxes_per_location): 23 | raise NotImplementedError 24 | 25 | def reset_parameters(self): 26 | for m in self.modules(): 27 | if isinstance(m, nn.Conv2d): 28 | nn.init.xavier_uniform_(m.weight) 29 | nn.init.zeros_(m.bias) 30 | 31 | def forward(self, features): 32 | cls_logits = [] 33 | bbox_pred = [] 34 | for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers): 35 | cls_logits.append(cls_header(feature).permute(0, 2, 3, 1).contiguous()) 36 | bbox_pred.append(reg_header(feature).permute(0, 2, 3, 1).contiguous()) 37 | 38 | batch_size = features[0].shape[0] 39 | cls_logits = torch.cat([c.view(c.shape[0], -1) for c in cls_logits], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES) 40 | bbox_pred = torch.cat([l.view(l.shape[0], -1) for l in bbox_pred], dim=1).view(batch_size, -1, 4) 41 | 42 | return cls_logits, bbox_pred 43 | 44 | 45 | @registry.BOX_PREDICTORS.register('SSDBoxPredictor') 46 | class SSDBoxPredictor(BoxPredictor): 47 | def cls_block(self, level, out_channels, boxes_per_location): 48 | return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) 49 | 50 | def reg_block(self, level, out_channels, boxes_per_location): 51 | return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) 52 | 53 | 54 | @registry.BOX_PREDICTORS.register('SSDLiteBoxPredictor') 55 | class SSDLiteBoxPredictor(BoxPredictor): 56 | def cls_block(self, level, out_channels, boxes_per_location): 57 | num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS) 58 | if level == num_levels - 1: 59 | return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=1) 60 | return SeparableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) 61 | 62 | def reg_block(self, level, out_channels, boxes_per_location): 63 | num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS) 64 | if level == num_levels - 1: 65 | return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=1) 66 | return SeparableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) 67 | 68 | 69 | def make_box_predictor(cfg): 70 | return registry.BOX_PREDICTORS[cfg.MODEL.BOX_HEAD.PREDICTOR](cfg) 71 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ssd.structures.container import Container 4 | from ssd.utils.nms import batched_nms 5 | 6 | 7 | class PostProcessor: 8 | def __init__(self, cfg): 9 | super().__init__() 10 | self.cfg = cfg 11 | self.width = cfg.INPUT.IMAGE_SIZE 12 | self.height = cfg.INPUT.IMAGE_SIZE 13 | 14 | def __call__(self, detections): 15 | batches_scores, batches_boxes = detections 16 | device = batches_scores.device 17 | batch_size = batches_scores.size(0) 18 | results = [] 19 | for batch_id in range(batch_size): 20 | scores, boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4) 21 | num_boxes = scores.shape[0] 22 | num_classes = scores.shape[1] 23 | 24 | boxes = boxes.view(num_boxes, 1, 4).expand(num_boxes, num_classes, 4) 25 | labels = torch.arange(num_classes, device=device) 26 | labels = labels.view(1, num_classes).expand_as(scores) 27 | 28 | # remove predictions with the background label 29 | boxes = boxes[:, 1:] 30 | scores = scores[:, 1:] 31 | labels = labels[:, 1:] 32 | 33 | # batch everything, by making every class prediction be a separate instance 34 | boxes = boxes.reshape(-1, 4) 35 | scores = scores.reshape(-1) 36 | labels = labels.reshape(-1) 37 | 38 | # remove low scoring boxes 39 | indices = torch.nonzero(scores > self.cfg.TEST.CONFIDENCE_THRESHOLD).squeeze(1) 40 | boxes, scores, labels = boxes[indices], scores[indices], labels[indices] 41 | 42 | boxes[:, 0::2] *= self.width 43 | boxes[:, 1::2] *= self.height 44 | 45 | keep = batched_nms(boxes, scores, labels, self.cfg.TEST.NMS_THRESHOLD) 46 | # keep only topk scoring predictions 47 | keep = keep[:self.cfg.TEST.MAX_PER_IMAGE] 48 | boxes, scores, labels = boxes[keep], scores[keep], labels[keep] 49 | 50 | container = Container(boxes=boxes, labels=labels, scores=scores) 51 | container.img_width = self.width 52 | container.img_height = self.height 53 | results.append(container) 54 | return results 55 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/box_head/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | 5 | from ssd.utils import box_utils 6 | 7 | 8 | class MultiBoxLoss(nn.Module): 9 | def __init__(self, neg_pos_ratio): 10 | """Implement SSD MultiBox Loss. 11 | 12 | Basically, MultiBox loss combines classification loss 13 | and Smooth L1 regression loss. 14 | """ 15 | super(MultiBoxLoss, self).__init__() 16 | self.neg_pos_ratio = neg_pos_ratio 17 | 18 | def forward(self, confidence, predicted_locations, labels, gt_locations): 19 | """Compute classification loss and smooth l1 loss. 20 | 21 | Args: 22 | confidence (batch_size, num_priors, num_classes): class predictions. 23 | predicted_locations (batch_size, num_priors, 4): predicted locations. 24 | labels (batch_size, num_priors): real labels of all the priors. 25 | gt_locations (batch_size, num_priors, 4): real boxes corresponding all the priors. 26 | """ 27 | num_classes = confidence.size(2) 28 | with torch.no_grad(): 29 | # derived from cross_entropy=sum(log(p)) 30 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0] 31 | mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) 32 | 33 | confidence = confidence[mask, :] 34 | classification_loss = F.cross_entropy(confidence.view(-1, num_classes), labels[mask], reduction='sum') 35 | 36 | pos_mask = labels > 0 37 | predicted_locations = predicted_locations[pos_mask, :].view(-1, 4) 38 | gt_locations = gt_locations[pos_mask, :].view(-1, 4) 39 | smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum') 40 | num_pos = gt_locations.size(0) 41 | return smooth_l1_loss / num_pos, classification_loss / num_pos 42 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .ssd_detector import SSDDetector 2 | 3 | _DETECTION_META_ARCHITECTURES = { 4 | "SSDDetector": SSDDetector 5 | } 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/detector/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/detector/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/detector/__pycache__/ssd_detector.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/modeling/detector/__pycache__/ssd_detector.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/modeling/detector/ssd_detector.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ssd.modeling.backbone import build_backbone 4 | from ssd.modeling.box_head import build_box_head 5 | 6 | 7 | class SSDDetector(nn.Module): 8 | def __init__(self, cfg): 9 | super().__init__() 10 | self.cfg = cfg 11 | self.backbone = build_backbone(cfg) 12 | self.box_head = build_box_head(cfg) 13 | 14 | def forward(self, images, targets=None): 15 | # 前向传播一次,返回loss和detections 16 | features = self.backbone(images) 17 | detections, detector_losses = self.box_head(features, targets) 18 | if self.training: 19 | return detector_losses 20 | return detections 21 | -------------------------------------------------------------------------------- /ssd/ssd/modeling/registry.py: -------------------------------------------------------------------------------- 1 | from ssd.utils.registry import Registry 2 | 3 | BACKBONES = Registry() 4 | BOX_HEADS = Registry() 5 | BOX_PREDICTORS = Registry() 6 | -------------------------------------------------------------------------------- /ssd/ssd/solver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/solver/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/solver/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/solver/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/solver/__pycache__/build.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/solver/__pycache__/build.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/solver/__pycache__/lr_scheduler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/solver/__pycache__/lr_scheduler.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/solver/build.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .lr_scheduler import WarmupMultiStepLR 4 | 5 | 6 | def make_optimizer(cfg, model, lr=None): 7 | lr = cfg.SOLVER.BASE_LR if lr is None else lr 8 | return torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) 9 | 10 | 11 | def make_lr_scheduler(cfg, optimizer, milestones=None): 12 | return WarmupMultiStepLR(optimizer=optimizer, 13 | milestones=cfg.SOLVER.LR_STEPS if milestones is None else milestones, 14 | gamma=cfg.SOLVER.GAMMA, 15 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 16 | warmup_iters=cfg.SOLVER.WARMUP_ITERS) 17 | -------------------------------------------------------------------------------- /ssd/ssd/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect_right 2 | 3 | from torch.optim.lr_scheduler import _LRScheduler 4 | 5 | 6 | class WarmupMultiStepLR(_LRScheduler): 7 | def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3, 8 | warmup_iters=500, last_epoch=-1): 9 | if not list(milestones) == sorted(milestones): 10 | raise ValueError( 11 | "Milestones should be a list of" " increasing integers. Got {}", 12 | milestones, 13 | ) 14 | 15 | self.milestones = milestones 16 | self.gamma = gamma 17 | self.warmup_factor = warmup_factor 18 | self.warmup_iters = warmup_iters 19 | super().__init__(optimizer, last_epoch) 20 | 21 | def get_lr(self): 22 | warmup_factor = 1 23 | if self.last_epoch < self.warmup_iters: 24 | alpha = float(self.last_epoch) / self.warmup_iters 25 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 26 | return [ 27 | base_lr 28 | * warmup_factor 29 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 30 | for base_lr in self.base_lrs 31 | ] 32 | -------------------------------------------------------------------------------- /ssd/ssd/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/structures/__init__.py -------------------------------------------------------------------------------- /ssd/ssd/structures/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/structures/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/structures/__pycache__/container.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/structures/__pycache__/container.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/structures/container.py: -------------------------------------------------------------------------------- 1 | class Container: 2 | """ 3 | Help class for manage boxes, labels, etc... 4 | Not inherit dict due to `default_collate` will change dict's subclass to dict. 5 | """ 6 | 7 | def __init__(self, *args, **kwargs): 8 | self._data_dict = dict(*args, **kwargs) 9 | 10 | def __setattr__(self, key, value): 11 | object.__setattr__(self, key, value) 12 | 13 | def __getitem__(self, key): 14 | return self._data_dict[key] 15 | 16 | def __iter__(self): 17 | return self._data_dict.__iter__() 18 | 19 | def __setitem__(self, key, value): 20 | self._data_dict[key] = value 21 | 22 | def _call(self, name, *args, **kwargs): 23 | keys = list(self._data_dict.keys()) 24 | for key in keys: 25 | value = self._data_dict[key] 26 | if hasattr(value, name): 27 | self._data_dict[key] = getattr(value, name)(*args, **kwargs) 28 | return self 29 | 30 | def to(self, *args, **kwargs): 31 | return self._call('to', *args, **kwargs) 32 | 33 | def numpy(self): 34 | return self._call('numpy') 35 | 36 | def resize(self, size): 37 | """resize boxes 38 | Args: 39 | size: (width, height) 40 | Returns: 41 | self 42 | """ 43 | img_width = getattr(self, 'img_width', -1) 44 | img_height = getattr(self, 'img_height', -1) 45 | assert img_width > 0 and img_height > 0 46 | assert 'boxes' in self._data_dict 47 | boxes = self._data_dict['boxes'] 48 | new_width, new_height = size 49 | boxes[:, 0::2] *= (new_width / img_width) 50 | boxes[:, 1::2] *= (new_height / img_height) 51 | return self 52 | 53 | def __repr__(self): 54 | return self._data_dict.__repr__() 55 | -------------------------------------------------------------------------------- /ssd/ssd/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import * 2 | -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/box_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/box_utils.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/checkpoint.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/checkpoint.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/dist_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/dist_util.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/metric_logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/metric_logger.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/model_zoo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/model_zoo.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/nms.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/__pycache__/registry.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/ssd/ssd/utils/__pycache__/registry.cpython-36.pyc -------------------------------------------------------------------------------- /ssd/ssd/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import torch 5 | from torch.nn.parallel import DistributedDataParallel 6 | 7 | from ssd.utils.model_zoo import cache_url 8 | 9 | 10 | class CheckPointer: 11 | _last_checkpoint_name = 'last_checkpoint.txt' 12 | 13 | def __init__(self, 14 | model, 15 | optimizer=None, 16 | scheduler=None, 17 | save_dir="", 18 | save_to_disk=None, 19 | logger=None): 20 | self.model = model 21 | self.optimizer = optimizer 22 | self.scheduler = scheduler 23 | self.save_dir = save_dir 24 | self.save_to_disk = save_to_disk 25 | if logger is None: 26 | logger = logging.getLogger(__name__) 27 | self.logger = logger 28 | 29 | def save(self, name, **kwargs): 30 | if not self.save_dir: 31 | return 32 | 33 | if not self.save_to_disk: 34 | return 35 | 36 | data = {} 37 | if isinstance(self.model, DistributedDataParallel): 38 | data['model'] = self.model.module.state_dict() 39 | else: 40 | data['model'] = self.model.state_dict() 41 | if self.optimizer is not None: 42 | data["optimizer"] = self.optimizer.state_dict() 43 | if self.scheduler is not None: 44 | data["scheduler"] = self.scheduler.state_dict() 45 | data.update(kwargs) 46 | 47 | save_file = os.path.join(self.save_dir, "{}.pth".format(name)) 48 | self.logger.info("Saving checkpoint to {}".format(save_file)) 49 | torch.save(data, save_file) 50 | 51 | self.tag_last_checkpoint(save_file) 52 | 53 | def load(self, f=None, use_latest=True): 54 | if self.has_checkpoint() and use_latest: 55 | # override argument with existing checkpoint 56 | f = self.get_checkpoint_file() 57 | if not f: 58 | # no checkpoint could be found 59 | self.logger.info("No checkpoint found.") 60 | return {} 61 | 62 | self.logger.info("Loading checkpoint from {}".format(f)) 63 | checkpoint = self._load_file(f) 64 | model = self.model 65 | if isinstance(model, DistributedDataParallel): 66 | model = self.model.module 67 | 68 | model.load_state_dict(checkpoint.pop("model")) 69 | if "optimizer" in checkpoint and self.optimizer: 70 | self.logger.info("Loading optimizer from {}".format(f)) 71 | self.optimizer.load_state_dict(checkpoint.pop("optimizer")) 72 | if "scheduler" in checkpoint and self.scheduler: 73 | self.logger.info("Loading scheduler from {}".format(f)) 74 | self.scheduler.load_state_dict(checkpoint.pop("scheduler")) 75 | 76 | # return any further checkpoint data 77 | return checkpoint 78 | 79 | def get_checkpoint_file(self): 80 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name) 81 | try: 82 | with open(save_file, "r") as f: 83 | last_saved = f.read() 84 | last_saved = last_saved.strip() 85 | except IOError: 86 | # if file doesn't exist, maybe because it has just been 87 | # deleted by a separate process 88 | last_saved = "" 89 | return last_saved 90 | 91 | def has_checkpoint(self): 92 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name) 93 | return os.path.exists(save_file) 94 | 95 | def tag_last_checkpoint(self, last_filename): 96 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name) 97 | with open(save_file, "w") as f: 98 | f.write(last_filename) 99 | 100 | def _load_file(self, f): 101 | # download url files 102 | if f.startswith("http"): 103 | # if the file is a url path, download it and cache it 104 | cached_f = cache_url(f) 105 | self.logger.info("url {} cached in {}".format(f, cached_f)) 106 | f = cached_f 107 | return torch.load(f, map_location=torch.device("cpu")) 108 | -------------------------------------------------------------------------------- /ssd/ssd/utils/dist_util.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import torch 4 | import torch.distributed as dist 5 | 6 | 7 | def get_world_size(): 8 | if not dist.is_available(): 9 | return 1 10 | if not dist.is_initialized(): 11 | return 1 12 | return dist.get_world_size() 13 | 14 | 15 | def get_rank(): 16 | if not dist.is_available(): 17 | return 0 18 | if not dist.is_initialized(): 19 | return 0 20 | return dist.get_rank() 21 | 22 | 23 | def is_main_process(): 24 | return get_rank() == 0 25 | 26 | 27 | def synchronize(): 28 | """ 29 | Helper function to synchronize (barrier) among all processes when 30 | using distributed training 31 | """ 32 | if not dist.is_available(): 33 | return 34 | if not dist.is_initialized(): 35 | return 36 | world_size = dist.get_world_size() 37 | if world_size == 1: 38 | return 39 | dist.barrier() 40 | 41 | 42 | def _encode(encoded_data, data): 43 | # gets a byte representation for the data 44 | encoded_bytes = pickle.dumps(data) 45 | # convert this byte string into a byte tensor 46 | storage = torch.ByteStorage.from_buffer(encoded_bytes) 47 | tensor = torch.ByteTensor(storage).to("cuda") 48 | # encoding: first byte is the size and then rest is the data 49 | s = tensor.numel() 50 | assert s <= 255, "Can't encode data greater than 255 bytes" 51 | # put the encoded data in encoded_data 52 | encoded_data[0] = s 53 | encoded_data[1: (s + 1)] = tensor 54 | 55 | 56 | def all_gather(data): 57 | """ 58 | Run all_gather on arbitrary picklable data (not necessarily tensors) 59 | Args: 60 | data: any picklable object 61 | Returns: 62 | list[data]: list of data gathered from each rank 63 | """ 64 | world_size = get_world_size() 65 | if world_size == 1: 66 | return [data] 67 | 68 | # serialized to a Tensor 69 | buffer = pickle.dumps(data) 70 | storage = torch.ByteStorage.from_buffer(buffer) 71 | tensor = torch.ByteTensor(storage).to("cuda") 72 | 73 | # obtain Tensor size of each rank 74 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 75 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 76 | dist.all_gather(size_list, local_size) 77 | size_list = [int(size.item()) for size in size_list] 78 | max_size = max(size_list) 79 | 80 | # receiving Tensor from all ranks 81 | # we pad the tensor because torch all_gather does not support 82 | # gathering tensors of different shapes 83 | tensor_list = [] 84 | for _ in size_list: 85 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 86 | if local_size != max_size: 87 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 88 | tensor = torch.cat((tensor, padding), dim=0) 89 | dist.all_gather(tensor_list, tensor) 90 | 91 | data_list = [] 92 | for size, tensor in zip(size_list, tensor_list): 93 | buffer = tensor.cpu().numpy().tobytes()[:size] 94 | data_list.append(pickle.loads(buffer)) 95 | 96 | return data_list 97 | -------------------------------------------------------------------------------- /ssd/ssd/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | def setup_logger(name, distributed_rank, save_dir=None): 7 | logger = logging.getLogger(name) 8 | logger.setLevel(logging.DEBUG) 9 | # don't log results for the non-master process 10 | if distributed_rank > 0: 11 | return logger 12 | stream_handler = logging.StreamHandler(stream=sys.stdout) 13 | stream_handler.setLevel(logging.DEBUG) 14 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 15 | stream_handler.setFormatter(formatter) 16 | logger.addHandler(stream_handler) 17 | if save_dir: 18 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) 19 | fh.setLevel(logging.DEBUG) 20 | fh.setFormatter(formatter) 21 | logger.addHandler(fh) 22 | return logger 23 | -------------------------------------------------------------------------------- /ssd/ssd/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | from collections import deque, defaultdict 2 | import numpy as np 3 | import torch 4 | 5 | 6 | class SmoothedValue: 7 | """Track a series of values and provide access to smoothed values over a 8 | window or the global series average. 9 | """ 10 | 11 | def __init__(self, window_size=10): 12 | self.deque = deque(maxlen=window_size) 13 | self.value = np.nan 14 | self.series = [] 15 | self.total = 0.0 16 | self.count = 0 17 | 18 | def update(self, value): 19 | self.deque.append(value) 20 | self.series.append(value) 21 | self.count += 1 22 | self.total += value 23 | self.value = value 24 | 25 | @property 26 | def median(self): 27 | values = np.array(self.deque) 28 | return np.median(values) 29 | 30 | @property 31 | def avg(self): 32 | values = np.array(self.deque) 33 | return np.mean(values) 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger: 41 | def __init__(self, delimiter=", "): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.3f} ({:.3f})".format(name, meter.avg, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /ssd/ssd/utils/misc.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | 5 | def str2bool(s): 6 | return s.lower() in ('true', '1') 7 | 8 | 9 | def mkdir(path): 10 | try: 11 | os.makedirs(path) 12 | except OSError as e: 13 | if e.errno != errno.EEXIST: 14 | raise 15 | -------------------------------------------------------------------------------- /ssd/ssd/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | import torch 6 | 7 | from ssd.utils.dist_util import is_main_process, synchronize 8 | 9 | try: 10 | from torch.hub import _download_url_to_file 11 | from torch.hub import urlparse 12 | from torch.hub import HASH_REGEX 13 | except ImportError: 14 | from torch.utils.model_zoo import _download_url_to_file 15 | from torch.utils.model_zoo import urlparse 16 | from torch.utils.model_zoo import HASH_REGEX 17 | 18 | 19 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 20 | # but with a few improvements and modifications 21 | def cache_url(url, model_dir=None, progress=True): 22 | r"""Loads the Torch serialized object at the given URL. 23 | If the object is already present in `model_dir`, it's deserialized and 24 | returned. The filename part of the URL should follow the naming convention 25 | ``filename-.ext`` where ```` is the first eight or more 26 | digits of the SHA256 hash of the contents of the file. The hash is used to 27 | ensure unique names and to verify the contents of the file. 28 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 29 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 30 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 31 | Args: 32 | url (string): URL of the object to download 33 | model_dir (string, optional): directory in which to save the object 34 | progress (bool, optional): whether or not to display a progress bar to stderr 35 | Example: 36 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 37 | """ 38 | if model_dir is None: 39 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 40 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 41 | if not os.path.exists(model_dir): 42 | os.makedirs(model_dir) 43 | parts = urlparse(url) 44 | filename = os.path.basename(parts.path) 45 | if filename == "model_final.pkl": 46 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 47 | # so make the full path the filename by replacing / with _ 48 | filename = parts.path.replace("/", "_") 49 | cached_file = os.path.join(model_dir, filename) 50 | if not os.path.exists(cached_file) and is_main_process(): 51 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 52 | hash_prefix = HASH_REGEX.search(filename) 53 | if hash_prefix is not None: 54 | hash_prefix = hash_prefix.group(1) 55 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 56 | # which matches the hash PyTorch uses. So we skip the hash matching 57 | # if the hash_prefix is less than 6 characters 58 | if len(hash_prefix) < 6: 59 | hash_prefix = None 60 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 61 | synchronize() 62 | return cached_file 63 | 64 | 65 | def load_state_dict_from_url(url, map_location='cpu'): 66 | cached_file = cache_url(url) 67 | return torch.load(cached_file, map_location=map_location) 68 | -------------------------------------------------------------------------------- /ssd/ssd/utils/nms.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import warnings 3 | import torchvision 4 | 5 | try: 6 | import torch 7 | import torch_extension 8 | 9 | _nms = torch_extension.nms 10 | except ImportError: 11 | if torchvision.__version__ >= '0.3.0': 12 | _nms = torchvision.ops.nms 13 | else: 14 | warnings.warn('No NMS is available. Please upgrade torchvision to 0.3.0+ or compile c++ NMS ' 15 | 'using `cd ext & python build.py build_ext develop`') 16 | sys.exit(-1) 17 | 18 | 19 | def nms(boxes, scores, nms_thresh): 20 | """ Performs non-maximum suppression, run on GPU or CPU according to 21 | boxes's device. 22 | Args: 23 | boxes(Tensor[N, 4]): boxes in (x1, y1, x2, y2) format, use absolute coordinates(or relative coordinates) 24 | scores(Tensor[N]): scores 25 | nms_thresh(float): thresh 26 | Returns: 27 | indices kept. 28 | """ 29 | keep = _nms(boxes, scores, nms_thresh) 30 | return keep 31 | 32 | 33 | def batched_nms(boxes, scores, idxs, iou_threshold): 34 | """ 35 | Performs non-maximum suppression in a batched fashion. 36 | 37 | Each index value correspond to a category, and NMS 38 | will not be applied between elements of different categories. 39 | 40 | Parameters 41 | ---------- 42 | boxes : Tensor[N, 4] 43 | boxes where NMS will be performed. They 44 | are expected to be in (x1, y1, x2, y2) format 45 | scores : Tensor[N] 46 | scores for each one of the boxes 47 | idxs : Tensor[N] 48 | indices of the categories for each one of the boxes. 49 | iou_threshold : float 50 | discards all overlapping boxes 51 | with IoU < iou_threshold 52 | 53 | Returns 54 | ------- 55 | keep : Tensor 56 | int64 tensor with the indices of 57 | the elements that have been kept by NMS, sorted 58 | in decreasing order of scores 59 | """ 60 | if boxes.numel() == 0: 61 | return torch.empty((0,), dtype=torch.int64, device=boxes.device) 62 | # strategy: in order to perform NMS independently per class. 63 | # we add an offset to all the boxes. The offset is dependent 64 | # only on the class idx, and is large enough so that boxes 65 | # from different classes do not overlap 66 | max_coordinate = boxes.max() 67 | offsets = idxs.to(boxes) * (max_coordinate + 1) 68 | boxes_for_nms = boxes + offsets[:, None] 69 | keep = nms(boxes_for_nms, scores, iou_threshold) 70 | return keep 71 | -------------------------------------------------------------------------------- /ssd/ssd/utils/registry.py: -------------------------------------------------------------------------------- 1 | def _register_generic(module_dict, module_name, module): 2 | assert module_name not in module_dict 3 | module_dict[module_name] = module 4 | 5 | 6 | class Registry(dict): 7 | """ 8 | A helper class for managing registering modules, it extends a dictionary 9 | and provides a register functions. 10 | Eg. creating a registry: 11 | some_registry = Registry({"default": default_module}) 12 | There're two ways of registering new modules: 13 | 1): normal way is just calling register function: 14 | def foo(): 15 | ... 16 | some_registry.register("foo_module", foo) 17 | 2): used as decorator when declaring the module: 18 | @some_registry.register("foo_module") 19 | @some_registry.register("foo_module_nickname") 20 | def foo(): 21 | ... 22 | Access of module is just like using a dictionary, eg: 23 | f = some_registry["foo_module"] 24 | """ 25 | 26 | def __init__(self, *args, **kwargs): 27 | super(Registry, self).__init__(*args, **kwargs) 28 | 29 | def register(self, module_name, module=None): 30 | # used as function call 31 | if module is not None: 32 | _register_generic(self, module_name, module) 33 | return 34 | 35 | # used as decorator 36 | def register_fn(fn): 37 | _register_generic(self, module_name, fn) 38 | return fn 39 | 40 | return register_fn 41 | -------------------------------------------------------------------------------- /ssd/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import torch 6 | import torch.utils.data 7 | 8 | from ssd.config import cfg 9 | from ssd.engine.inference import do_evaluation 10 | from ssd.modeling.detector import build_detection_model 11 | from ssd.utils import dist_util 12 | from ssd.utils.checkpoint import CheckPointer 13 | from ssd.utils.dist_util import synchronize 14 | from ssd.utils.logger import setup_logger 15 | 16 | 17 | def evaluation(cfg, ckpt, distributed): 18 | logger = logging.getLogger("SSD.inference") 19 | 20 | model = build_detection_model(cfg) 21 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) 22 | device = torch.device(cfg.MODEL.DEVICE) 23 | model.to(device) 24 | checkpointer.load(ckpt, use_latest=ckpt is None) 25 | do_evaluation(cfg, model, distributed) 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.') 30 | parser.add_argument( 31 | "--config-file", 32 | default="", 33 | metavar="FILE", 34 | help="path to config file", 35 | type=str, 36 | ) 37 | parser.add_argument("--local_rank", type=int, default=0) 38 | parser.add_argument( 39 | "--ckpt", 40 | help="The path to the checkpoint for test, default is the latest checkpoint.", 41 | default=None, 42 | type=str, 43 | ) 44 | 45 | parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") 46 | 47 | parser.add_argument( 48 | "opts", 49 | help="Modify config options using the command-line", 50 | default=None, 51 | nargs=argparse.REMAINDER, 52 | ) 53 | args = parser.parse_args() 54 | 55 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 56 | distributed = num_gpus > 1 57 | 58 | if torch.cuda.is_available(): 59 | # This flag allows you to enable the inbuilt cudnn auto-tuner to 60 | # find the best algorithm to use for your hardware. 61 | torch.backends.cudnn.benchmark = True 62 | if distributed: 63 | torch.cuda.set_device(args.local_rank) 64 | torch.distributed.init_process_group(backend="nccl", init_method="env://") 65 | synchronize() 66 | 67 | cfg.merge_from_file(args.config_file) 68 | cfg.merge_from_list(args.opts) 69 | cfg.freeze() 70 | 71 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) 72 | logger.info("Using {} GPUs".format(num_gpus)) 73 | logger.info(args) 74 | 75 | logger.info("Loaded configuration file {}".format(args.config_file)) 76 | with open(args.config_file, "r") as cf: 77 | config_str = "\n" + cf.read() 78 | logger.info(config_str) 79 | logger.info("Running with config:\n{}".format(cfg)) 80 | evaluation(cfg, ckpt=args.ckpt, distributed=distributed) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /ssd/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import torch 6 | import torch.distributed as dist 7 | 8 | from ssd.engine.inference import do_evaluation 9 | from ssd.config import cfg 10 | from ssd.data.build import make_data_loader 11 | from ssd.engine.trainer import do_train 12 | # 这里是比较关键的一步,通过这步来构建目标检测模型 13 | from ssd.modeling.detector import build_detection_model 14 | from ssd.solver.build import make_optimizer, make_lr_scheduler 15 | from ssd.utils import dist_util, mkdir 16 | from ssd.utils.checkpoint import CheckPointer 17 | from ssd.utils.dist_util import synchronize 18 | from ssd.utils.logger import setup_logger 19 | from ssd.utils.misc import str2bool 20 | 21 | 22 | # 训练主函数 23 | def train(cfg, args): 24 | # 工厂模式,加载日志文件设置,这里暂时不同管 25 | logger = logging.getLogger('SSD.trainer') 26 | # 建立目标检测模型 27 | model = build_detection_model(cfg) 28 | # 设置Device并且把模型部署到设备上 29 | device = torch.device(cfg.MODEL.DEVICE) 30 | model.to(device) 31 | if args.distributed: 32 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) 33 | 34 | # 设置学习率、优化器还有学习率变化步长,可以理解为模拟退火这种,前面的步长比较大,后面的步长比较小 35 | lr = cfg.SOLVER.LR * args.num_gpus # scale by num gpus 36 | optimizer = make_optimizer(cfg, model, lr) 37 | 38 | milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS] 39 | scheduler = make_lr_scheduler(cfg, optimizer, milestones) 40 | 41 | arguments = {"iteration": 0} 42 | save_to_disk = dist_util.get_rank() == 0 43 | # **** 这里应该是从断点开始对模型进行训练 **** 44 | checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger) 45 | extra_checkpoint_data = checkpointer.load() 46 | arguments.update(extra_checkpoint_data) 47 | 48 | # Important 通过torch的形式去加载数据集 49 | # 关键在于如何加载数据集,模型的构建过程可以简单地看成是黑盒 50 | max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus 51 | train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration']) 52 | 53 | # 正式开始训练 54 | model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args) 55 | return model 56 | 57 | 58 | def main(): 59 | # 解析命令行 读取配置文件 60 | ''' 61 | 规定了模型的基本参数,训练的类,一共是20类加上背景所以是21 62 | 模型的输入大小,为了不对原图造成影响,一般是填充为300*300的图像 63 | 训练的文件夹路径2007和2012,测试的文件夹路径2007 64 | 最大迭代次数为120000.学习率还有gamma的值,总之就是一系列的超参数 65 | 输出的文件目录 66 | MODEL: 67 | NUM_CLASSES: 21 68 | INPUT: 69 | IMAGE_SIZE: 300 70 | DATASETS: 71 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval") 72 | TEST: ("voc_2007_test", ) 73 | SOLVER: 74 | MAX_ITER: 120000 75 | LR_STEPS: [80000, 100000] 76 | GAMMA: 0.1 77 | BATCH_SIZE: 32 78 | LR: 1e-3 79 | OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712' 80 | Returns: 81 | ''' 82 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch') 83 | parser.add_argument( 84 | "--config-file", 85 | default="configs/vgg_ssd300_voc0712.yaml", 86 | metavar="FILE", 87 | help="path to config file", 88 | type=str, 89 | ) 90 | # 每2500步保存一次文件,并且验证一次文件,记录是每10次记录一次,然后如果不想看tensor的记录的话,可以关闭,使用的是tensorboardX 91 | parser.add_argument("--local_rank", type=int, default=0) 92 | parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step') 93 | parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step') 94 | parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') 95 | parser.add_argument('--use_tensorboard', default=True, type=str2bool) 96 | parser.add_argument( 97 | "--skip-test", 98 | dest="skip_test", 99 | help="Do not test the final model", 100 | action="store_true", 101 | ) 102 | parser.add_argument( 103 | "opts", 104 | help="Modify config options using the command-line", 105 | default=None, 106 | nargs=argparse.REMAINDER, 107 | ) 108 | # 参数解析,可以使用多GPU进行训练 109 | args = parser.parse_args() 110 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 111 | args.distributed = num_gpus > 1 112 | args.num_gpus = num_gpus 113 | 114 | # 做一些启动前必要的检查 115 | if torch.cuda.is_available(): 116 | # This flag allows you to enable the inbuilt cudnn auto-tuner to 117 | # find the best algorithm to use for your hardware. 118 | torch.backends.cudnn.benchmark = True 119 | if args.distributed: 120 | torch.cuda.set_device(args.local_rank) 121 | torch.distributed.init_process_group(backend="nccl", init_method="env://") 122 | synchronize() 123 | 124 | cfg.merge_from_file(args.config_file) 125 | cfg.merge_from_list(args.opts) 126 | cfg.freeze() 127 | 128 | # 创建模型输出文件夹 129 | if cfg.OUTPUT_DIR: 130 | mkdir(cfg.OUTPUT_DIR) 131 | 132 | # 使用logger来进行记录 133 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) 134 | logger.info("Using {} GPUs".format(num_gpus)) 135 | logger.info(args) 136 | 137 | # 加载配置文件 138 | logger.info("Loaded configuration file {}".format(args.config_file)) 139 | with open(args.config_file, "r") as cf: 140 | config_str = "\n" + cf.read() 141 | logger.info(config_str) 142 | logger.info("Running with config:\n{}".format(cfg)) 143 | 144 | # 模型训练 145 | model = train(cfg, args) 146 | 147 | # 开始进行验证 148 | if not args.skip_test: 149 | logger.info('Start evaluating...') 150 | torch.cuda.empty_cache() # speed up evaluating after training finished 151 | do_evaluation(cfg, model, distributed=args.distributed) 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /ssd/video_demo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | import torch 6 | from PIL import Image 7 | from vizer.draw import draw_boxes 8 | 9 | from ssd.config import cfg 10 | from ssd.data.datasets import COCODataset, VOCDataset 11 | import argparse 12 | import numpy as np 13 | 14 | from ssd.data.transforms import build_transforms 15 | from ssd.modeling.detector import build_detection_model 16 | from ssd.utils import mkdir 17 | from ssd.utils.checkpoint import CheckPointer 18 | 19 | 20 | # 这里是对文件夹下的数据集进行检测 21 | @torch.no_grad() 22 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): 23 | if dataset_type == "voc": 24 | class_names = VOCDataset.class_names 25 | elif dataset_type == 'coco': 26 | class_names = COCODataset.class_names 27 | else: 28 | raise NotImplementedError('Not implemented now.') 29 | device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | model = build_detection_model(cfg) 32 | model = model.to(device) 33 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) 34 | checkpointer.load(ckpt, use_latest=ckpt is None) 35 | weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() 36 | print('Loaded weights from {}'.format(weight_file)) 37 | 38 | image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) 39 | mkdir(output_dir) 40 | 41 | cpu_device = torch.device("cpu") 42 | transforms = build_transforms(cfg, is_train=False) 43 | model.eval() 44 | # 这边会对单张图片进行检测,这里要修改为单张图片并返回标记结果 45 | # 和之前的案例一样,返回的同样是Pilow处理的图像,很帅 46 | for i, image_path in enumerate(image_paths): 47 | start = time.time() 48 | image_name = os.path.basename(image_path) 49 | 50 | image = np.array(Image.open(image_path).convert("RGB")) 51 | height, width = image.shape[:2] 52 | images = transforms(image)[0].unsqueeze(0) 53 | load_time = time.time() - start 54 | 55 | start = time.time() 56 | result = model(images.to(device))[0] 57 | inference_time = time.time() - start 58 | 59 | result = result.resize((width, height)).to(cpu_device).numpy() 60 | boxes, labels, scores = result['boxes'], result['labels'], result['scores'] 61 | 62 | indices = scores > score_threshold 63 | boxes = boxes[indices] 64 | labels = labels[indices] 65 | scores = scores[indices] 66 | meters = ' | '.join( 67 | [ 68 | 'objects {:02d}'.format(len(boxes)), 69 | 'load {:03d}ms'.format(round(load_time * 1000)), 70 | 'inference {:03d}ms'.format(round(inference_time * 1000)), 71 | 'FPS {}'.format(round(1.0 / inference_time)) 72 | ] 73 | ) 74 | print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) 75 | 76 | drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) 77 | Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description="SSD Demo.") 82 | parser.add_argument( 83 | "--config-file", 84 | default="", 85 | metavar="FILE", 86 | help="path to config file", 87 | type=str, 88 | ) 89 | parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.") 90 | parser.add_argument("--score_threshold", type=float, default=0.7) 91 | parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.') 92 | parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.') 93 | parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.') 94 | 95 | parser.add_argument( 96 | "opts", 97 | help="Modify config options using the command-line", 98 | default=None, 99 | nargs=argparse.REMAINDER, 100 | ) 101 | args = parser.parse_args() 102 | print(args) 103 | 104 | cfg.merge_from_file(args.config_file) 105 | cfg.merge_from_list(args.opts) 106 | cfg.freeze() 107 | 108 | print("Loaded configuration file {}".format(args.config_file)) 109 | with open(args.config_file, "r") as cf: 110 | config_str = "\n" + cf.read() 111 | print(config_str) 112 | print("Running with config:\n{}".format(cfg)) 113 | 114 | run_demo(cfg=cfg, 115 | ckpt=args.ckpt, 116 | score_threshold=args.score_threshold, 117 | images_dir=args.images_dir, 118 | output_dir=args.output_dir, 119 | dataset_type=args.dataset_type) 120 | 121 | 122 | if __name__ == '__main__': 123 | main() 124 | -------------------------------------------------------------------------------- /ssd/visdrone_demo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | import torch 6 | from PIL import Image 7 | from vizer.draw import draw_boxes 8 | 9 | from ssd.config import cfg 10 | from ssd.data.datasets import COCODataset, VOCDataset, VisDroneDataset 11 | import argparse 12 | import numpy as np 13 | 14 | from ssd.data.transforms import build_transforms 15 | from ssd.modeling.detector import build_detection_model 16 | from ssd.utils import mkdir 17 | from ssd.utils.checkpoint import CheckPointer 18 | 19 | 20 | # 原始数据集可以直接进行加载,现在需要转化一下 21 | @torch.no_grad() 22 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): 23 | if dataset_type == "voc": 24 | class_names = VOCDataset.class_names 25 | elif dataset_type == 'coco': 26 | class_names = COCODataset.class_names 27 | elif dataset_type == 'visdrone': 28 | class_names = VisDroneDataset.class_names 29 | else: 30 | raise NotImplementedError('Not implemented now.') 31 | device = torch.device(cfg.MODEL.DEVICE) 32 | 33 | model = build_detection_model(cfg) 34 | model = model.to(device) 35 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) 36 | checkpointer.load(ckpt, use_latest=ckpt is None) 37 | weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() 38 | print('Loaded weights from {}'.format(weight_file)) 39 | 40 | image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) 41 | mkdir(output_dir) 42 | 43 | cpu_device = torch.device("cpu") 44 | transforms = build_transforms(cfg, is_train=False) 45 | model.eval() 46 | for i, image_path in enumerate(image_paths): 47 | start = time.time() 48 | image_name = os.path.basename(image_path) 49 | 50 | image = np.array(Image.open(image_path).convert("RGB")) 51 | height, width = image.shape[:2] 52 | images = transforms(image)[0].unsqueeze(0) 53 | load_time = time.time() - start 54 | 55 | start = time.time() 56 | result = model(images.to(device))[0] 57 | inference_time = time.time() - start 58 | 59 | result = result.resize((width, height)).to(cpu_device).numpy() 60 | boxes, labels, scores = result['boxes'], result['labels'], result['scores'] 61 | 62 | indices = scores > score_threshold 63 | boxes = boxes[indices] 64 | labels = labels[indices] 65 | scores = scores[indices] 66 | meters = ' | '.join( 67 | [ 68 | 'objects {:02d}'.format(len(boxes)), 69 | 'load {:03d}ms'.format(round(load_time * 1000)), 70 | 'inference {:03d}ms'.format(round(inference_time * 1000)), 71 | 'FPS {}'.format(round(1.0 / inference_time)) 72 | ] 73 | ) 74 | print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) 75 | 76 | drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) 77 | Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description="SSD Demo.") 82 | parser.add_argument( 83 | "--config-file", 84 | default="configs/vgg_ssd300_visdrone0413.yaml", 85 | metavar="FILE", 86 | help="path to config file", 87 | type=str, 88 | ) 89 | parser.add_argument("--ckpt", type=str, default='F:/服务器记录/无人机记录/model_final.pth', help="Trained weights.") 90 | parser.add_argument("--score_threshold", type=float, default=0.7) 91 | parser.add_argument("--images_dir", default='F:/datas/VOC/VisDrone_ROOT/DET2019/demo', type=str, help='Specify a image dir to do prediction.') 92 | parser.add_argument("--output_dir", default='F:/datas/VOC/VisDrone_ROOT/DET2019/result', type=str, help='Specify a image dir to save predicted images.') 93 | parser.add_argument("--dataset_type", default="visdrone", type=str, help='Specify dataset type. Currently support voc and coco.') 94 | 95 | parser.add_argument( 96 | "opts", 97 | help="Modify config options using the command-line", 98 | default=None, 99 | nargs=argparse.REMAINDER, 100 | ) 101 | args = parser.parse_args() 102 | print(args) 103 | 104 | cfg.merge_from_file(args.config_file) 105 | cfg.merge_from_list(args.opts) 106 | cfg.freeze() 107 | 108 | print("Loaded configuration file {}".format(args.config_file)) 109 | with open(args.config_file, "r") as cf: 110 | config_str = "\n" + cf.read() 111 | print(config_str) 112 | print("Running with config:\n{}".format(cfg)) 113 | 114 | run_demo(cfg=cfg, 115 | ckpt=args.ckpt, 116 | score_threshold=args.score_threshold, 117 | images_dir=args.images_dir, 118 | output_dir=args.output_dir, 119 | dataset_type=args.dataset_type) 120 | 121 | 122 | if __name__ == '__main__': 123 | main() 124 | -------------------------------------------------------------------------------- /ssd/visdrone_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import torch 6 | import torch.utils.data 7 | 8 | from ssd.config import cfg 9 | from ssd.engine.inference import do_evaluation 10 | from ssd.modeling.detector import build_detection_model 11 | from ssd.utils import dist_util 12 | from ssd.utils.checkpoint import CheckPointer 13 | from ssd.utils.dist_util import synchronize 14 | from ssd.utils.logger import setup_logger 15 | 16 | 17 | def evaluation(cfg, ckpt, distributed): 18 | # 测试的过程需要把所有的文件都选择出来,而且涉及到设备的问题 19 | logger = logging.getLogger("SSD.inference") 20 | 21 | model = build_detection_model(cfg) 22 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) 23 | device = torch.device(cfg.MODEL.DEVICE) 24 | model.to(device) 25 | checkpointer.load(ckpt, use_latest=ckpt is None) 26 | do_evaluation(cfg, model, distributed) 27 | 28 | 29 | def main(): 30 | parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.') 31 | parser.add_argument( 32 | "--config-file", 33 | default="configs/vgg_ssd300_visdrone0413.yaml", 34 | metavar="FILE", 35 | help="path to config file", 36 | type=str, 37 | ) 38 | parser.add_argument("--local_rank", type=int, default=0) 39 | parser.add_argument( 40 | "--ckpt", 41 | help="The path to the checkpoint for test, default is the latest checkpoint.", 42 | default="F:/服务器记录/无人机记录/model_final.pth", 43 | type=str, 44 | ) 45 | 46 | parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") 47 | 48 | parser.add_argument( 49 | "opts", 50 | help="Modify config options using the command-line", 51 | default=None, 52 | nargs=argparse.REMAINDER, 53 | ) 54 | args = parser.parse_args() 55 | 56 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 57 | distributed = num_gpus > 1 58 | 59 | if torch.cuda.is_available(): 60 | # This flag allows you to enable the inbuilt cudnn auto-tuner to 61 | # find the best algorithm to use for your hardware. 62 | torch.backends.cudnn.benchmark = True 63 | if distributed: 64 | torch.cuda.set_device(args.local_rank) 65 | torch.distributed.init_process_group(backend="nccl", init_method="env://") 66 | synchronize() 67 | 68 | cfg.merge_from_file(args.config_file) 69 | cfg.merge_from_list(args.opts) 70 | cfg.freeze() 71 | 72 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) 73 | logger.info("Using {} GPUs".format(num_gpus)) 74 | logger.info(args) 75 | 76 | logger.info("Loaded configuration file {}".format(args.config_file)) 77 | with open(args.config_file, "r") as cf: 78 | config_str = "\n" + cf.read() 79 | logger.info(config_str) 80 | logger.info("Running with config:\n{}".format(cfg)) 81 | evaluation(cfg, ckpt=args.ckpt, distributed=distributed) 82 | 83 | 84 | if __name__ == '__main__': 85 | # TODO 修改模型文件位置和配置文件位置, 这里需要完整的文件进行验证,将会采用最好的模型进行 86 | main() 87 | -------------------------------------------------------------------------------- /utils/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /utils/.idea/data_transformer.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /utils/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /utils/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /utils/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /utils/ssd/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import torch 6 | import torch.utils.data 7 | 8 | from ssd.config import cfg 9 | from ssd.engine.inference import do_evaluation 10 | from ssd.modeling.detector import build_detection_model 11 | from ssd.utils import dist_util 12 | from ssd.utils.checkpoint import CheckPointer 13 | from ssd.utils.dist_util import synchronize 14 | from ssd.utils.logger import setup_logger 15 | 16 | 17 | def evaluation(cfg, ckpt, distributed): 18 | logger = logging.getLogger("SSD.inference") 19 | 20 | model = build_detection_model(cfg) 21 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) 22 | device = torch.device(cfg.MODEL.DEVICE) 23 | model.to(device) 24 | checkpointer.load(ckpt, use_latest=ckpt is None) 25 | do_evaluation(cfg, model, distributed) 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.') 30 | parser.add_argument( 31 | "--config-file", 32 | default="", 33 | metavar="FILE", 34 | help="path to config file", 35 | type=str, 36 | ) 37 | parser.add_argument("--local_rank", type=int, default=0) 38 | parser.add_argument( 39 | "--ckpt", 40 | help="The path to the checkpoint for test, default is the latest checkpoint.", 41 | default=None, 42 | type=str, 43 | ) 44 | 45 | parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") 46 | 47 | parser.add_argument( 48 | "opts", 49 | help="Modify config options using the command-line", 50 | default=None, 51 | nargs=argparse.REMAINDER, 52 | ) 53 | args = parser.parse_args() 54 | 55 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 56 | distributed = num_gpus > 1 57 | 58 | if torch.cuda.is_available(): 59 | # This flag allows you to enable the inbuilt cudnn auto-tuner to 60 | # find the best algorithm to use for your hardware. 61 | torch.backends.cudnn.benchmark = True 62 | if distributed: 63 | torch.cuda.set_device(args.local_rank) 64 | torch.distributed.init_process_group(backend="nccl", init_method="env://") 65 | synchronize() 66 | 67 | cfg.merge_from_file(args.config_file) 68 | cfg.merge_from_list(args.opts) 69 | cfg.freeze() 70 | 71 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) 72 | logger.info("Using {} GPUs".format(num_gpus)) 73 | logger.info(args) 74 | 75 | logger.info("Loaded configuration file {}".format(args.config_file)) 76 | with open(args.config_file, "r") as cf: 77 | config_str = "\n" + cf.read() 78 | logger.info(config_str) 79 | logger.info("Running with config:\n{}".format(cfg)) 80 | evaluation(cfg, ckpt=args.ckpt, distributed=distributed) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /utils/ssd/visdrone_demo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | import torch 6 | from PIL import Image 7 | from vizer.draw import draw_boxes 8 | 9 | from ssd.config import cfg 10 | from ssd.data.datasets import COCODataset, VOCDataset, VisDroneDataset 11 | import argparse 12 | import numpy as np 13 | 14 | from ssd.data.transforms import build_transforms 15 | from ssd.modeling.detector import build_detection_model 16 | from ssd.utils import mkdir 17 | from ssd.utils.checkpoint import CheckPointer 18 | 19 | 20 | # 原始数据集可以直接进行加载,现在需要转化一下 21 | @torch.no_grad() 22 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): 23 | if dataset_type == "voc": 24 | class_names = VOCDataset.class_names 25 | elif dataset_type == 'coco': 26 | class_names = COCODataset.class_names 27 | elif dataset_type == 'visdrone': 28 | class_names = VisDroneDataset.class_names 29 | else: 30 | raise NotImplementedError('Not implemented now.') 31 | device = torch.device(cfg.MODEL.DEVICE) 32 | 33 | model = build_detection_model(cfg) 34 | model = model.to(device) 35 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) 36 | checkpointer.load(ckpt, use_latest=ckpt is None) 37 | weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() 38 | print('Loaded weights from {}'.format(weight_file)) 39 | 40 | image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) 41 | mkdir(output_dir) 42 | 43 | cpu_device = torch.device("cpu") 44 | transforms = build_transforms(cfg, is_train=False) 45 | model.eval() 46 | for i, image_path in enumerate(image_paths): 47 | start = time.time() 48 | image_name = os.path.basename(image_path) 49 | 50 | image = np.array(Image.open(image_path).convert("RGB")) 51 | height, width = image.shape[:2] 52 | images = transforms(image)[0].unsqueeze(0) 53 | load_time = time.time() - start 54 | 55 | start = time.time() 56 | result = model(images.to(device))[0] 57 | inference_time = time.time() - start 58 | 59 | result = result.resize((width, height)).to(cpu_device).numpy() 60 | boxes, labels, scores = result['boxes'], result['labels'], result['scores'] 61 | 62 | indices = scores > score_threshold 63 | boxes = boxes[indices] 64 | labels = labels[indices] 65 | scores = scores[indices] 66 | meters = ' | '.join( 67 | [ 68 | 'objects {:02d}'.format(len(boxes)), 69 | 'load {:03d}ms'.format(round(load_time * 1000)), 70 | 'inference {:03d}ms'.format(round(inference_time * 1000)), 71 | 'FPS {}'.format(round(1.0 / inference_time)) 72 | ] 73 | ) 74 | print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) 75 | 76 | drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) 77 | Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description="SSD Demo.") 82 | parser.add_argument( 83 | "--config-file", 84 | default="configs/vgg_ssd300_visdrone0413.yaml", 85 | metavar="FILE", 86 | help="path to config file", 87 | type=str, 88 | ) 89 | parser.add_argument("--ckpt", type=str, default='outputs/vgg_ssd300_visdrone0418/model_final.pth', help="Trained weights.") 90 | parser.add_argument("--score_threshold", type=float, default=0.7) 91 | parser.add_argument("--images_dir", default='imgs/src', type=str, help='Specify a image dir to do prediction.') 92 | parser.add_argument("--output_dir", default='imgs/dst', type=str, help='Specify a image dir to save predicted images.') 93 | parser.add_argument("--dataset_type", default="visdrone", type=str, help='Specify dataset type. Currently support voc and coco.') 94 | 95 | parser.add_argument( 96 | "opts", 97 | help="Modify config options using the command-line", 98 | default=None, 99 | nargs=argparse.REMAINDER, 100 | ) 101 | args = parser.parse_args() 102 | print(args) 103 | 104 | cfg.merge_from_file(args.config_file) 105 | cfg.merge_from_list(args.opts) 106 | cfg.freeze() 107 | 108 | print("Loaded configuration file {}".format(args.config_file)) 109 | with open(args.config_file, "r") as cf: 110 | config_str = "\n" + cf.read() 111 | print(config_str) 112 | print("Running with config:\n{}".format(cfg)) 113 | 114 | run_demo(cfg=cfg, 115 | ckpt=args.ckpt, 116 | score_threshold=args.score_threshold, 117 | images_dir=args.images_dir, 118 | output_dir=args.output_dir, 119 | dataset_type=args.dataset_type) 120 | 121 | 122 | if __name__ == '__main__': 123 | main() 124 | -------------------------------------------------------------------------------- /utils/ssd/visdrone_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import torch 6 | import torch.utils.data 7 | 8 | from ssd.config import cfg 9 | from ssd.engine.inference import do_evaluation 10 | from ssd.modeling.detector import build_detection_model 11 | from ssd.utils import dist_util 12 | from ssd.utils.checkpoint import CheckPointer 13 | from ssd.utils.dist_util import synchronize 14 | from ssd.utils.logger import setup_logger 15 | import time 16 | 17 | 18 | def evaluation(cfg, ckpt, distributed): 19 | # 测试的过程需要把所有的文件都选择出来,而且涉及到设备的问题 20 | logger = logging.getLogger("SSD.inference") 21 | 22 | model = build_detection_model(cfg) 23 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) 24 | device = torch.device(cfg.MODEL.DEVICE) 25 | model.to(device) 26 | checkpointer.load(ckpt, use_latest=ckpt is None) 27 | do_evaluation(cfg, model, distributed) 28 | 29 | 30 | def main(): 31 | parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.') 32 | parser.add_argument( 33 | "--config-file", 34 | default="configs/vgg_ssd300_visdrone0413.yaml", 35 | metavar="FILE", 36 | help="path to config file", 37 | type=str, 38 | ) 39 | parser.add_argument("--local_rank", type=int, default=0) 40 | parser.add_argument( 41 | "--ckpt", 42 | help="The path to the checkpoint for test, default is the latest checkpoint.", 43 | default="outputs/vgg_ssd300_visdrone0418/model_final.pth", 44 | type=str, 45 | ) 46 | 47 | parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") 48 | 49 | parser.add_argument( 50 | "opts", 51 | help="Modify config options using the command-line", 52 | default=None, 53 | nargs=argparse.REMAINDER, 54 | ) 55 | args = parser.parse_args() 56 | 57 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 58 | distributed = num_gpus > 1 59 | 60 | if torch.cuda.is_available(): 61 | # This flag allows you to enable the inbuilt cudnn auto-tuner to 62 | # find the best algorithm to use for your hardware. 63 | torch.backends.cudnn.benchmark = True 64 | if distributed: 65 | torch.cuda.set_device(args.local_rank) 66 | torch.distributed.init_process_group(backend="nccl", init_method="env://") 67 | synchronize() 68 | 69 | cfg.merge_from_file(args.config_file) 70 | cfg.merge_from_list(args.opts) 71 | cfg.freeze() 72 | 73 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) 74 | logger.info("Using {} GPUs".format(num_gpus)) 75 | logger.info(args) 76 | 77 | logger.info("Loaded configuration file {}".format(args.config_file)) 78 | with open(args.config_file, "r") as cf: 79 | config_str = "\n" + cf.read() 80 | logger.info(config_str) 81 | logger.info("Running with config:\n{}".format(cfg)) 82 | s_time = time.time() 83 | evaluation(cfg, ckpt=args.ckpt, distributed=distributed) 84 | e_time = time.time() 85 | all_time = e_time - s_time 86 | print(all_time) 87 | print("FPS:{}".format(1610/all_time)) 88 | 89 | 90 | if __name__ == '__main__': 91 | # TODO 修改模型文件位置和配置文件位置, 这里需要完整的文件进行验证,将会采用最好的模型进行 92 | main() 93 | -------------------------------------------------------------------------------- /utils/voc2yolo.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | import sys 7 | 8 | # sets=[('2018', 'train'), ('2018', 'val')] 9 | 10 | # classes = ["a", "b", "c", "d"] 11 | 12 | # soft link your VOC2018 under here 13 | # root_dir = sys.argv[1] 14 | classes = ['pedestrian', 'people', 'bicycle', 'car', 'van', 15 | 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor', 'others'] 16 | 17 | 18 | # 直接使用 19 | def convert(size, box): 20 | dw = 1. / (size[0]) 21 | dh = 1. / (size[1]) 22 | x = (box[0] + box[1]) / 2.0 - 1 23 | y = (box[2] + box[3]) / 2.0 - 1 24 | w = box[1] - box[0] 25 | h = box[3] - box[2] 26 | x = x * dw 27 | w = w * dw 28 | y = y * dh 29 | h = h * dh 30 | return (x, y, w, h) 31 | 32 | 33 | def single_voc2yolo(voc_folder, voc_name, yolo_path): 34 | voc_path = os.path.join(voc_folder, voc_name) 35 | yolo_name = voc_name.split(".")[0] + ".txt" 36 | yolo_path = os.path.join(yolo_path, yolo_name) 37 | with open(voc_path) as voc_f, open(yolo_path, "w") as yolo_f: 38 | tree = ET.parse(voc_f) 39 | root = tree.getroot() 40 | size = root.find('size') 41 | w = int(size.find('width').text) 42 | h = int(size.find('height').text) 43 | for obj in root.iter('object'): 44 | difficult = obj.find('difficult').text 45 | cls = obj.find('name').text 46 | if cls not in classes or int(difficult) == 1: 47 | continue 48 | cls_id = classes.index(cls) 49 | xmlbox = obj.find('bndbox') 50 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), 51 | float(xmlbox.find('ymax').text)) 52 | bb = convert((w, h), b) 53 | yolo_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 54 | 55 | 56 | def transform_all(voc_folder, yolo_path): 57 | voc_names = os.listdir(voc_folder) 58 | for voc_name in voc_names: 59 | single_voc2yolo(voc_folder, voc_name, yolo_path) 60 | print("{} done!".format(voc_name)) 61 | 62 | 63 | if __name__ == '__main__': 64 | voc_folder = "E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/Annotations/" 65 | # voc_name = "0000001_02999_d_0000005.xml" 66 | yolo_path = "E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/annotations_yolo/" 67 | # single_voc2yolo(voc_folder, voc_name, yolo_path) 68 | transform_all(voc_folder, yolo_path) 69 | 70 | # def convert_annotation(year, image_id): 71 | # in_file = open(os.path.join(root_dir, 'VOC%s/Annotations/%s.xml'%(year, image_id))) 72 | # out_file = open(os.path.join(root_dir, 'VOC%s/labels/%s.txt'%(year, image_id)), 'w') 73 | # tree=ET.parse(in_file) 74 | # root = tree.getroot() 75 | # size = root.find('size') 76 | # w = int(size.find('width').text) 77 | # h = int(size.find('height').text) 78 | # 79 | # for obj in root.iter('object'): 80 | # difficult = obj.find('difficult').text 81 | # cls = obj.find('name').text 82 | # if cls not in classes or int(difficult)==1: 83 | # continue 84 | # cls_id = classes.index(cls) 85 | # xmlbox = obj.find('bndbox') 86 | # b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 87 | # bb = convert((w,h), b) 88 | # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 89 | # 90 | # wd = getcwd() 91 | # 92 | # for year, image_set in sets: 93 | # labels_target = os.path.join(root_dir, 'VOC%s/labels/'%(year)) 94 | # print('labels dir to save: {}'.format(labels_target)) 95 | # if not os.path.exists(labels_target): 96 | # os.makedirs(labels_target) 97 | # image_ids = open(os.path.join(root_dir, 'VOC{}/ImageSets/Main/{}.txt'.format(year, image_set))).read().strip().split() 98 | # list_file = open(os.path.join(root_dir, '%s_%s.txt'%(year, image_set)), 'w') 99 | # for image_id in image_ids: 100 | # img_f = os.path.join(root_dir, 'VOC%s/JPEGImages/%s.jpg\n'%(year, image_id)) 101 | # list_file.write(os.path.abspath(img_f)) 102 | # convert_annotation(year, image_id) 103 | # list_file.close() 104 | 105 | # print('done.') 106 | -------------------------------------------------------------------------------- /utils/yolo/move_imgs.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import numpy as np 4 | import time 5 | 6 | 7 | def move_imgs(img_folder, txt_file, dst_folder): 8 | data = np.loadtxt(txt_file, dtype=str) 9 | for img_name in data: 10 | src_img_name = img_name + ".jpg" 11 | src_img_path = os.path.join(img_folder, src_img_name) 12 | shutil.copy(src_img_path, dst_folder) 13 | print("{} copy done!".format(src_img_name)) 14 | 15 | 16 | def timeTest(): 17 | start = time.time() 18 | print("Start: " + str(start)) 19 | for i in range(1, 100000000): 20 | pass 21 | stop = time.time() 22 | print("Stop: " + str(stop)) 23 | print(str(stop - start) + "秒") 24 | 25 | 26 | if __name__ == '__main__': 27 | # move_imgs("E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/JPEGImages", "../txts/test.txt", "E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/Images_split/test") 28 | # move_imgs("E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/JPEGImages", "../txts/val.txt", "E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/Images_split/val") 29 | # move_imgs("E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/JPEGImages", "../txts/train.txt", "E:/datas/ai/voc/VisDrone_ROOT/VisDrone2019/Images_split/train") 30 | timeTest() 31 | -------------------------------------------------------------------------------- /utils/yolo/train_val_go.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def get_server_data(txt_path): 4 | datas = np.loadtxt(txt_path, dtype=str) 5 | new_datas = [] 6 | for data in datas: 7 | new_data = "data/custom/images/" + data + ".jpg" 8 | new_datas.append(new_data) 9 | print(new_data) 10 | # print(data) 11 | np.savetxt("test.txt", np.array(new_datas), fmt="%s", delimiter=" ") 12 | 13 | if __name__ == '__main__': 14 | get_server_data(txt_path="../txts/test.txt") -------------------------------------------------------------------------------- /yolo/config/coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | train=data/coco/trainvalno5k.txt 3 | valid=data/coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /yolo/config/custom.data: -------------------------------------------------------------------------------- 1 | classes= 11 2 | train=data/custom/train.txt 3 | valid=data/custom/valid.txt 4 | names=data/custom/classes.names 5 | -------------------------------------------------------------------------------- /yolo/config/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | # 0 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # 1 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | # 2 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | # 3 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | # 4 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | # 5 63 | [maxpool] 64 | size=2 65 | stride=2 66 | 67 | # 6 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | # 7 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | # 8 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | # 9 91 | [maxpool] 92 | size=2 93 | stride=2 94 | 95 | # 10 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | # 11 105 | [maxpool] 106 | size=2 107 | stride=1 108 | 109 | # 12 110 | [convolutional] 111 | batch_normalize=1 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | ########### 119 | 120 | # 13 121 | [convolutional] 122 | batch_normalize=1 123 | filters=256 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | # 14 130 | [convolutional] 131 | batch_normalize=1 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | # 15 139 | [convolutional] 140 | size=1 141 | stride=1 142 | pad=1 143 | filters=255 144 | activation=linear 145 | 146 | 147 | 148 | # 16 149 | [yolo] 150 | mask = 3,4,5 151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 152 | classes=80 153 | num=6 154 | jitter=.3 155 | ignore_thresh = .7 156 | truth_thresh = 1 157 | random=1 158 | 159 | # 17 160 | [route] 161 | layers = -4 162 | 163 | # 18 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | # 19 173 | [upsample] 174 | stride=2 175 | 176 | # 20 177 | [route] 178 | layers = -1, 8 179 | 180 | # 21 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | # 22 190 | [convolutional] 191 | size=1 192 | stride=1 193 | pad=1 194 | filters=255 195 | activation=linear 196 | 197 | # 23 198 | [yolo] 199 | mask = 1,2,3 200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 201 | classes=80 202 | num=6 203 | jitter=.3 204 | ignore_thresh = .7 205 | truth_thresh = 1 206 | random=1 207 | -------------------------------------------------------------------------------- /yolo/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /yolo/data/custom/classes.names: -------------------------------------------------------------------------------- 1 | pedestrian 2 | people 3 | bicycle 4 | car 5 | van 6 | truck 7 | tricycle 8 | awning-tricycle 9 | bus 10 | motor 11 | others 12 | -------------------------------------------------------------------------------- /yolo/data/custom/images/0000001_02999_d_0000005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/data/custom/images/0000001_02999_d_0000005.jpg -------------------------------------------------------------------------------- /yolo/data/custom/labels/0000001_02999_d_0000005.txt: -------------------------------------------------------------------------------- 1 | 3 0.4671875 0.5712962962962963 0.028125 0.08518518518518518 2 | 3 0.509375 0.5898148148148148 0.03229166666666666 0.08518518518518518 3 | 0 0.23932291666666666 0.987037037037037 0.0140625 0.024074074074074074 4 | 7 0.24583333333333332 0.525 0.027083333333333334 0.03888888888888889 5 | 9 0.32369791666666664 0.487962962962963 0.028645833333333332 0.024074074074074074 6 | 7 0.26458333333333334 0.42638888888888893 0.020833333333333332 0.03981481481481482 7 | 9 0.2713541666666667 0.4101851851851852 0.01875 0.024074074074074074 8 | 0 0.32421875 0.21203703703703705 0.005729166666666666 0.02777777777777778 9 | 1 0.3338541666666667 0.1712962962962963 0.007291666666666667 0.022222222222222223 10 | 3 0.31015625 0.08148148148148149 0.018229166666666668 0.02777777777777778 11 | 9 0.3932291666666667 0.4810185185185185 0.01875 0.02314814814814815 12 | 4 0.41770833333333335 0.23842592592592593 0.019791666666666666 0.05648148148148149 13 | 3 0.41432291666666665 0.18472222222222223 0.011979166666666666 0.03425925925925926 14 | 1 0.490625 0.23796296296296296 0.00625 0.024074074074074074 15 | 9 0.38567708333333334 0.2087962962962963 0.019270833333333334 0.019444444444444445 16 | 1 0.42604166666666665 0.1726851851851852 0.007291666666666667 0.021296296296296296 17 | 0 0.37421875 0.09351851851851853 0.005729166666666666 0.02592592592592593 18 | 9 0.4536458333333333 0.0587962962962963 0.005208333333333333 0.017592592592592594 19 | 1 0.4479166666666667 0.08611111111111111 0.00625 0.016666666666666666 20 | 1 0.44453125 0.11944444444444445 0.005729166666666666 0.020370370370370372 21 | -------------------------------------------------------------------------------- /yolo/data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 4 | 5 | # Clone COCO API 6 | git clone https://github.com/pdollar/coco 7 | cd coco 8 | 9 | mkdir images 10 | cd images 11 | 12 | # Download Images 13 | wget -c https://pjreddie.com/media/files/train2014.zip 14 | wget -c https://pjreddie.com/media/files/val2014.zip 15 | 16 | # Unzip 17 | unzip -q train2014.zip 18 | unzip -q val2014.zip 19 | 20 | cd .. 21 | 22 | # Download COCO Metadata 23 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 24 | wget -c https://pjreddie.com/media/files/coco/5k.part 25 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 26 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 27 | tar xzf labels.tgz 28 | unzip -q instances_train-val2014.zip 29 | 30 | # Set Up Image Lists 31 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 32 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 33 | -------------------------------------------------------------------------------- /yolo/data/samples/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/data/samples/dog.jpg -------------------------------------------------------------------------------- /yolo/data/samples/visdrone/0000006_00159_d_0000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/data/samples/visdrone/0000006_00159_d_0000001.jpg -------------------------------------------------------------------------------- /yolo/output/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/output/dog.png -------------------------------------------------------------------------------- /yolo/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | tensorflow 4 | tensorboard 5 | terminaltables 6 | pillow 7 | tqdm 8 | -------------------------------------------------------------------------------- /yolo/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.utils import * 5 | from utils.datasets import * 6 | from utils.parse_config import * 7 | 8 | import os 9 | import sys 10 | import time 11 | import datetime 12 | import argparse 13 | import tqdm 14 | 15 | import torch 16 | from torch.utils.data import DataLoader 17 | from torchvision import datasets 18 | from torchvision import transforms 19 | from torch.autograd import Variable 20 | import torch.optim as optim 21 | 22 | 23 | def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): 24 | model.eval() 25 | 26 | # Get dataloader 27 | dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) 28 | dataloader = torch.utils.data.DataLoader( 29 | dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn 30 | ) 31 | 32 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 33 | 34 | labels = [] 35 | sample_metrics = [] # List of tuples (TP, confs, pred) 36 | for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")): 37 | 38 | # Extract labels 39 | labels += targets[:, 1].tolist() 40 | # Rescale target 41 | targets[:, 2:] = xywh2xyxy(targets[:, 2:]) 42 | targets[:, 2:] *= img_size 43 | 44 | imgs = Variable(imgs.type(Tensor), requires_grad=False) 45 | 46 | with torch.no_grad(): 47 | outputs = model(imgs) 48 | outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) 49 | 50 | sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) 51 | 52 | # Concatenate sample statistics 53 | true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))] 54 | precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) 55 | 56 | return precision, recall, AP, f1, ap_class 57 | 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch") 62 | parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") 63 | parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file") 64 | parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") 65 | parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") 66 | parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected") 67 | parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold") 68 | parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression") 69 | parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation") 70 | parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") 71 | opt = parser.parse_args() 72 | print(opt) 73 | 74 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 75 | 76 | data_config = parse_data_config(opt.data_config) 77 | valid_path = data_config["valid"] 78 | class_names = load_classes(data_config["names"]) 79 | 80 | # Initiate model 81 | model = Darknet(opt.model_def).to(device) 82 | if opt.weights_path.endswith(".weights"): 83 | # Load darknet weights 84 | model.load_darknet_weights(opt.weights_path) 85 | else: 86 | # Load checkpoint weights 87 | model.load_state_dict(torch.load(opt.weights_path)) 88 | 89 | print("Compute mAP...") 90 | 91 | precision, recall, AP, f1, ap_class = evaluate( 92 | model, 93 | path=valid_path, 94 | iou_thres=opt.iou_thres, 95 | conf_thres=opt.conf_thres, 96 | nms_thres=opt.nms_thres, 97 | img_size=opt.img_size, 98 | batch_size=8, 99 | ) 100 | 101 | print("Average Precisions:") 102 | for i, c in enumerate(ap_class): 103 | print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}") 104 | 105 | print(f"mAP: {AP.mean()}") 106 | -------------------------------------------------------------------------------- /yolo/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__init__.py -------------------------------------------------------------------------------- /yolo/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/__pycache__/augmentations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/augmentations.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/__pycache__/parse_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/parse_config.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmFighting/visdrone_detection/40591850480ecaaa168ec2ea88a5534a770327c6/yolo/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /yolo/utils/augmentations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | 6 | def horisontal_flip(images, targets): 7 | images = torch.flip(images, [-1]) 8 | targets[:, 2] = 1 - targets[:, 2] 9 | return images, targets 10 | -------------------------------------------------------------------------------- /yolo/utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import sys 5 | import numpy as np 6 | from PIL import Image 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from utils.augmentations import horisontal_flip 11 | from torch.utils.data import Dataset 12 | import torchvision.transforms as transforms 13 | 14 | 15 | def pad_to_square(img, pad_value): 16 | c, h, w = img.shape 17 | dim_diff = np.abs(h - w) 18 | # (upper / left) padding and (lower / right) padding 19 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 20 | # Determine padding 21 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 22 | # Add padding 23 | img = F.pad(img, pad, "constant", value=pad_value) 24 | 25 | return img, pad 26 | 27 | 28 | def resize(image, size): 29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 30 | return image 31 | 32 | 33 | def random_resize(images, min_size=288, max_size=448): 34 | new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] 35 | images = F.interpolate(images, size=new_size, mode="nearest") 36 | return images 37 | 38 | 39 | class ImageFolder(Dataset): 40 | def __init__(self, folder_path, img_size=416): 41 | self.files = sorted(glob.glob("%s/*.*" % folder_path)) 42 | self.img_size = img_size 43 | 44 | def __getitem__(self, index): 45 | img_path = self.files[index % len(self.files)] 46 | # Extract image as PyTorch tensor 47 | img = transforms.ToTensor()(Image.open(img_path)) 48 | # Pad to square resolution 49 | img, _ = pad_to_square(img, 0) 50 | # Resize 51 | img = resize(img, self.img_size) 52 | 53 | return img_path, img 54 | 55 | def __len__(self): 56 | return len(self.files) 57 | 58 | 59 | class ListDataset(Dataset): 60 | def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): 61 | with open(list_path, "r") as file: 62 | self.img_files = file.readlines() 63 | 64 | self.label_files = [ 65 | path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") 66 | for path in self.img_files 67 | ] 68 | self.img_size = img_size 69 | self.max_objects = 100 70 | self.augment = augment 71 | self.multiscale = multiscale 72 | self.normalized_labels = normalized_labels 73 | self.min_size = self.img_size - 3 * 32 74 | self.max_size = self.img_size + 3 * 32 75 | self.batch_count = 0 76 | 77 | def __getitem__(self, index): 78 | 79 | # --------- 80 | # Image 81 | # --------- 82 | 83 | img_path = self.img_files[index % len(self.img_files)].rstrip() 84 | 85 | # Extract image as PyTorch tensor 86 | img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) 87 | 88 | # Handle images with less than three channels 89 | if len(img.shape) != 3: 90 | img = img.unsqueeze(0) 91 | img = img.expand((3, img.shape[1:])) 92 | 93 | _, h, w = img.shape 94 | h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) 95 | # Pad to square resolution 96 | img, pad = pad_to_square(img, 0) 97 | _, padded_h, padded_w = img.shape 98 | 99 | # --------- 100 | # Label 101 | # --------- 102 | 103 | label_path = self.label_files[index % len(self.img_files)].rstrip() 104 | 105 | targets = None 106 | if os.path.exists(label_path): 107 | boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) 108 | # Extract coordinates for unpadded + unscaled image 109 | x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) 110 | y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) 111 | x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) 112 | y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) 113 | # Adjust for added padding 114 | x1 += pad[0] 115 | y1 += pad[2] 116 | x2 += pad[1] 117 | y2 += pad[3] 118 | # Returns (x, y, w, h) 119 | boxes[:, 1] = ((x1 + x2) / 2) / padded_w 120 | boxes[:, 2] = ((y1 + y2) / 2) / padded_h 121 | boxes[:, 3] *= w_factor / padded_w 122 | boxes[:, 4] *= h_factor / padded_h 123 | 124 | targets = torch.zeros((len(boxes), 6)) 125 | targets[:, 1:] = boxes 126 | 127 | # Apply augmentations 128 | if self.augment: 129 | if np.random.random() < 0.5: 130 | img, targets = horisontal_flip(img, targets) 131 | 132 | return img_path, img, targets 133 | 134 | def collate_fn(self, batch): 135 | paths, imgs, targets = list(zip(*batch)) 136 | # Remove empty placeholder targets 137 | targets = [boxes for boxes in targets if boxes is not None] 138 | # Add sample index to targets 139 | for i, boxes in enumerate(targets): 140 | boxes[:, 0] = i 141 | targets = torch.cat(targets, 0) 142 | # Selects new image size every tenth batch 143 | if self.multiscale and self.batch_count % 10 == 0: 144 | self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) 145 | # Resize images to input shape 146 | imgs = torch.stack([resize(img, self.img_size) for img in imgs]) 147 | self.batch_count += 1 148 | return paths, imgs, targets 149 | 150 | def __len__(self): 151 | return len(self.img_files) 152 | -------------------------------------------------------------------------------- /yolo/utils/logger.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, log_dir): 6 | """Create a summary writer logging to log_dir.""" 7 | self.writer = tf.summary.FileWriter(log_dir) 8 | 9 | def scalar_summary(self, tag, value, step): 10 | """Log a scalar variable.""" 11 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 12 | self.writer.add_summary(summary, step) 13 | 14 | def list_of_scalars_summary(self, tag_value_pairs, step): 15 | """Log scalar variables.""" 16 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) 17 | self.writer.add_summary(summary, step) 18 | -------------------------------------------------------------------------------- /yolo/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def parse_model_config(path): 4 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 5 | file = open(path, 'r') 6 | lines = file.read().split('\n') 7 | lines = [x for x in lines if x and not x.startswith('#')] 8 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 9 | module_defs = [] 10 | for line in lines: 11 | if line.startswith('['): # This marks the start of a new block 12 | module_defs.append({}) 13 | module_defs[-1]['type'] = line[1:-1].rstrip() 14 | if module_defs[-1]['type'] == 'convolutional': 15 | module_defs[-1]['batch_normalize'] = 0 16 | else: 17 | key, value = line.split("=") 18 | value = value.strip() 19 | module_defs[-1][key.rstrip()] = value.strip() 20 | 21 | return module_defs 22 | 23 | def parse_data_config(path): 24 | """Parses the data configuration file""" 25 | options = dict() 26 | options['gpus'] = '0,1,2,3' 27 | options['num_workers'] = '10' 28 | with open(path, 'r') as fp: 29 | lines = fp.readlines() 30 | for line in lines: 31 | line = line.strip() 32 | if line == '' or line.startswith('#'): 33 | continue 34 | key, value = line.split('=') 35 | options[key.strip()] = value.strip() 36 | return options 37 | -------------------------------------------------------------------------------- /yolo/weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download weights for vanilla YOLOv3 3 | wget -c https://pjreddie.com/media/files/yolov3.weights 4 | # # Download weights for tiny YOLOv3 5 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 6 | # Download weights for backbone network 7 | wget -c https://pjreddie.com/media/files/darknet53.conv.74 8 | --------------------------------------------------------------------------------