├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── Yolov7 Segmentation.ipynb ├── data ├── coco.yaml ├── hyp.scratch.custom.yaml ├── hyp.scratch.mask.yaml ├── hyp.scratch.p5.yaml ├── hyp.scratch.p6.yaml ├── hyp.scratch.tiny.yaml ├── hyps │ ├── hyp.scratch-high.yaml │ ├── hyp.scratch-low.yaml │ └── hyp.scratch-med.yaml └── scripts │ ├── get_coco.sh │ └── get_imagenet.sh ├── export.py ├── football1.mp4 ├── models ├── __init__.py ├── common.py ├── experimental.py ├── segment │ └── yolov7-seg.yaml ├── tf.py └── yolo.py ├── requirements.txt ├── scripts └── get_coco.sh ├── segment ├── predict.py ├── sort_count.py ├── train.py └── val.py ├── train.py ├── utils ├── __init__.py ├── activations.py ├── add_nms.py ├── augmentations.py ├── autoanchor.py ├── autobatch.py ├── benchmarks.py ├── callbacks.py ├── dataloaders.py ├── datasets.py ├── downloads.py ├── general.py ├── google_utils.py ├── loggers │ ├── __init__.py │ ├── clearml │ │ ├── README.md │ │ ├── __init__.py │ │ ├── clearml_utils.py │ │ └── hpo.py │ └── wandb │ │ ├── README.md │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ ├── sweep.yaml │ │ └── wandb_utils.py ├── loss.py ├── metrics.py ├── plots.py ├── segment │ ├── __init__.py │ ├── augmentations.py │ ├── dataloaders.py │ ├── general.py │ ├── loss.py │ ├── metrics.py │ └── plots.py ├── torch_utils.py └── wandb_logging │ ├── __init__.py │ ├── log_dataset.py │ └── wandb_utils.py └── val.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: muhammadrizwanm 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | __pycache__/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv7 Instance Segmentation 2 | 3 | 🚀 [Read the blog](https://muhammadrizwanmunawar.medium.com/train-yolov7-segmentation-on-custom-data-b91237bd2a29) 4 | 5 | ## Steps to run Code 6 | 7 | - Clone the repository 8 | ```bash 9 | git clone https://github.com/RizwanMunawar/yolov7-segmentation.git 10 | ``` 11 | 12 | - Goto the cloned folder. 13 | ```bash 14 | cd yolov7-segmentation 15 | ``` 16 | 17 | - Create a virtual envirnoment (Recommended, If you dont want to disturb python packages) 18 | ```bash 19 | 20 | ### For Linux Users 21 | python3 -m venv yolov7seg 22 | source yolov7seg/bin/activate 23 | 24 | ### For Window Users 25 | python3 -m venv yolov7seg 26 | cd yolov7seg 27 | cd Scripts 28 | activate 29 | cd .. 30 | cd .. 31 | ``` 32 | 33 | - Upgrade pip with mentioned command below. 34 | ```bash 35 | pip install --upgrade pip 36 | ``` 37 | 38 | - Install requirements with mentioned command below. 39 | ```bash 40 | pip install -r requirements.txt 41 | ``` 42 | 43 | - Download weights from [link](https://github.com/RizwanMunawar/yolov7-segmentation/releases/download/yolov7-segmentation/yolov7-seg.pt) and store in "yolov7-segmentation" directory. 44 | 45 | - Run the code with mentioned command below. 46 | ```bash 47 | #for segmentation with detection 48 | python3 segment/predict.py --weights yolov7-seg.pt --source "videopath.mp4" 49 | 50 | #for segmentation with detection + Tracking 51 | python3 segment/predict.py --weights yolov7-seg.pt --source "videopath.mp4" --trk 52 | 53 | #save the labels files of segmentation 54 | python3 segment/predict.py --weights yolov7-seg.pt --source "videopath.mp4" --save-txt 55 | ``` 56 | 57 | - Output file will be created in the working directory with name `yolov7-segmentation/runs/predict-seg/exp/original-video-name.mp4` 58 | 59 | ### RESULTS 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 |
Car Semantic SegmentationCar Semantic SegmentationPerson Segmentation + Tracking
73 | 74 | 75 | ### Custom Data Labelling 76 | 77 | - I have used [roboflow](https://roboflow.com/) for data labelling. The data labelling for Segmentation will be a Polygon box,While data labelling for object detection will be a bounding box 78 | 79 | - Go to the [link](https://app.roboflow.com/my-personal-workspace/createSample) and create a new workspace. Make sure to login with roboflow account. 80 | 81 | 82 | ![1](https://user-images.githubusercontent.com/62513924/190390384-db8f71fa-e963-4ee6-aaca-c49e993c64ae.png) 83 | 84 | 85 | - Once you will click on create workspace, You will see the popup as shown below to upload the dataset. 86 | 87 | ![2](https://user-images.githubusercontent.com/62513924/190390882-fe08559d-ef47-450e-8613-2de899fffa4c.png) 88 | 89 | 90 | - Click on upload dataset and roboflow will ask for workspace name as shown below. Fill that form and then click on Create Private Project 91 | - Note: Make sure to select Instance Segmentation Option in below image. 92 | ![dataset](https://user-images.githubusercontent.com/62513924/190853038-612791d0-9b33-4222-b28a-63ac4c13ed83.png) 93 | 94 | 95 | -You can upload your dataset now. 96 | 97 | ![Screenshot 2022-09-17 155330](https://user-images.githubusercontent.com/62513924/190853135-887b389c-2356-4435-a946-867bb05ac4f2.png) 98 | 99 | - Once files will upload, you can click on Finish Uploading. 100 | 101 | - Roboflow will ask you to assign Images to someone, click on Assign Images. 102 | 103 | - After that, you will see the tab shown below. 104 | 105 | ![6](https://user-images.githubusercontent.com/62513924/190392948-90010cd0-ef88-437a-b94f-44ee93d8bc31.png) 106 | 107 | 108 | - Click on any Image in Unannotated tab, and then you can start labelling. 109 | 110 | - Note: Press p and then draw polygon points for segmentation 111 | 112 | ![10](https://user-images.githubusercontent.com/62513924/190394353-d7dd7b7f-7a07-4738-99b6-1d5ae66b5bca.png) 113 | 114 | 115 | - Once you will complete labelling, you can then export the data and follow mentioned steps below to start training. 116 | 117 | ### Custom Training 118 | 119 | - Move your (segmentation custom labelled data) inside "yolov7-segmentation\data" folder by following mentioned structure. 120 | 121 | 122 | 123 | ![ss](https://user-images.githubusercontent.com/62513924/190388927-62a3ee84-bad8-4f59-806f-1185acdc8acb.png) 124 | 125 | 126 | 127 | - Go to the data folder, create a file with name custom.yaml and paste the mentioned code below inside that. 128 | 129 | ```yaml 130 | train: "path to train folder" 131 | val: "path to validation folder" 132 | # number of classes 133 | nc: 1 134 | # class names 135 | names: [ 'car'] 136 | ``` 137 | 138 | - Download weights from the link and move to yolov7-segmentation folder. 139 | - Go to the terminal, and run mentioned command below to start training. 140 | ```bash 141 | python3 segment/train.py --data data/custom.yaml \ 142 | --batch 4 \ 143 | --weights "yolov7-seg.pt" 144 | --cfg yolov7-seg.yaml \ 145 | --epochs 10 \ 146 | --name yolov7-seg \ 147 | --img 640 \ 148 | --hyp hyp.scratch-high.yaml 149 | ``` 150 | 151 | ### Custom Model Detection Command 152 | ```bash 153 | python3 segment/predict.py --weights "runs/yolov7-seg/exp/weights/best.pt" --source "videopath.mp4" 154 | ``` 155 | 156 | ### RESULTS 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 |
Car Semantic SegmentationCar Semantic SegmentationPerson Segmentation + Tracking
170 | 171 | 172 | ### References 173 | - https://github.com/WongKinYiu/yolov7/tree/u7/seg 174 | - https://github.com/ultralytics/yolov5 175 | 176 | **Some of my articles/research papers | computer vision awesome resources for learning | How do I appear to the world? 🚀** 177 | 178 | [Ultralytics YOLO11: Object Detection and Instance Segmentation🤯](https://muhammadrizwanmunawar.medium.com/ultralytics-yolo11-object-detection-and-instance-segmentation-88ef0239a811) ![Published Date](https://img.shields.io/badge/published_Date-2024--10--27-brightgreen) 179 | 180 | [Parking Management using Ultralytics YOLO11](https://muhammadrizwanmunawar.medium.com/parking-management-using-ultralytics-yolo11-fba4c6bc62bc) ![Published Date](https://img.shields.io/badge/published_Date-2024--11--10-brightgreen) 181 | 182 | [My 🖐️Computer Vision Hobby Projects that Yielded Earnings](https://muhammadrizwanmunawar.medium.com/my-️computer-vision-hobby-projects-that-yielded-earnings-7923c9b9eead) ![Published Date](https://img.shields.io/badge/published_Date-2023--09--10-brightgreen) 183 | 184 | [Best Resources to Learn Computer Vision](https://muhammadrizwanmunawar.medium.com/best-resources-to-learn-computer-vision-311352ed0833) ![Published Date](https://img.shields.io/badge/published_Date-2023--06--30-brightgreen) 185 | 186 | [Roadmap for Computer Vision Engineer](https://medium.com/augmented-startups/roadmap-for-computer-vision-engineer-45167b94518c) ![Published Date](https://img.shields.io/badge/published_Date-2022--08--07-brightgreen) 187 | 188 | [How did I spend 2022 in the Computer Vision Field](https://www.linkedin.com/pulse/how-did-i-spend-2022-computer-vision-field-muhammad-rizwan-munawar) ![Published Date](https://img.shields.io/badge/published_Date-2022--12--20-brightgreen) 189 | 190 | [Domain Feature Mapping with YOLOv7 for Automated Edge-Based Pallet Racking Inspections](https://www.mdpi.com/1424-8220/22/18/6927) ![Published Date](https://img.shields.io/badge/published_Date-2022--09--13-brightgreen) 191 | 192 | [Exudate Regeneration for Automated Exudate Detection in Retinal Fundus Images](https://ieeexplore.ieee.org/document/9885192) ![Published Date](https://img.shields.io/badge/published_Date-2022--09--12-brightgreen) 193 | 194 | [Feature Mapping for Rice Leaf Defect Detection Based on a Custom Convolutional Architecture](https://www.mdpi.com/2304-8158/11/23/3914) ![Published Date](https://img.shields.io/badge/published_Date-2022--12--04-brightgreen) 195 | 196 | [Yolov5, Yolo-x, Yolo-r, Yolov7 Performance Comparison: A Survey](https://aircconline.com/csit/papers/vol12/csit121602.pdf) ![Published Date](https://img.shields.io/badge/published_Date-2022--09--24-brightgreen) 197 | 198 | [Explainable AI in Drug Sensitivity Prediction on Cancer Cell Lines](https://ieeexplore.ieee.org/document/9922931) ![Published Date](https://img.shields.io/badge/published_Date-2022--09--23-brightgreen) 199 | 200 | [Train YOLOv8 on Custom Data](https://medium.com/augmented-startups/train-yolov8-on-custom-data-6d28cd348262) ![Published Date](https://img.shields.io/badge/published_Date-2022--09--23-brightgreen) 201 | 202 | 203 | **More Information** 204 | 205 | For more details, you can reach out to me on [Medium](https://muhammadrizwanmunawar.medium.com/) or can connect with me on [LinkedIn](https://www.linkedin.com/in/muhammadrizwanmunawar/) 206 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org 2 | 3 | # download command/URL (optional) 4 | download: bash ./scripts/get_coco.sh 5 | 6 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 7 | train: ./coco/train2017.txt # 118287 images 8 | val: ./coco/val2017.txt # 5000 images 9 | test: ./coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 10 | 11 | # number of classes 12 | nc: 80 13 | 14 | # class names 15 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 16 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 17 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 18 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 19 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 20 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 21 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 22 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 23 | 'hair drier', 'toothbrush' ] 24 | -------------------------------------------------------------------------------- /data/hyp.scratch.custom.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.5 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.0 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.0 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /data/hyp.scratch.mask.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | mask: 0.05 # mask loss gain 14 | mask_pw: 1.0 # obj BCELoss positive_weight 15 | pointrend: 0.05 # pointrend loss gain 16 | iou_t: 0.20 # IoU training threshold 17 | anchor_t: 4.0 # anchor-multiple threshold 18 | # anchors: 3 # anchors per output layer (0 to ignore) 19 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 20 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 21 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 22 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 23 | degrees: 0.0 # image rotation (+/- deg) 24 | translate: 0.1 # image translation (+/- fraction) 25 | scale: 0.5 # image scale (+/- gain) 26 | shear: 0.0 # image shear (+/- deg) 27 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 28 | flipud: 0.0 # image flip up-down (probability) 29 | fliplr: 0.5 # image flip left-right (probability) 30 | mosaic: 1.0 # image mosaic (probability) 31 | mixup: 0. # image mixup (probability) 32 | copy_paste: 0. #15 # image copy paste (probability) 33 | paste_in: 0. # image copy paste (probability) 34 | attn_resolution: 14 35 | num_base: 5 36 | mask_resolution: 56 37 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training 38 | -------------------------------------------------------------------------------- /data/hyp.scratch.p5.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.9 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.15 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.15 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /data/hyp.scratch.p6.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.9 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.15 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.15 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /data/hyp.scratch.tiny.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.5 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 1.0 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.1 # image translation (+/- fraction) 22 | scale: 0.5 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.05 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.05 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training 32 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-high.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for high-augmentation COCO training from scratch 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.9 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.1 # image mixup (probability) 34 | copy_paste: 0.1 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-low.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for low-augmentation COCO training from scratch 3 | # python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-med.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for medium-augmentation COCO training from scratch 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.9 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.1 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /data/scripts/get_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 3 | # Download COCO 2017 dataset http://cocodataset.org 4 | # Example usage: bash data/scripts/get_coco.sh 5 | # parent 6 | # ├── yolov5 7 | # └── datasets 8 | # └── coco ← downloads here 9 | 10 | # Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments 11 | if [ "$#" -gt 0 ]; then 12 | for opt in "$@"; do 13 | case "${opt}" in 14 | --train) train=true ;; 15 | --val) val=true ;; 16 | --test) test=true ;; 17 | --segments) segments=true ;; 18 | esac 19 | done 20 | else 21 | train=False 22 | val=False 23 | test=false 24 | segments=true 25 | fi 26 | 27 | # Download/unzip labels 28 | d='../datasets' # unzip directory 29 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 30 | if [ "$segments" == "true" ]; then 31 | f='coco2017labels-segments.zip' # 168 MB 32 | else 33 | f='coco2017labels.zip' # 168 MB 34 | fi 35 | echo 'Downloading' $url$f ' ...' 36 | curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f & 37 | 38 | # Download/unzip images 39 | d='../datasets/coco/images' # unzip directory 40 | url=http://images.cocodataset.org/zips/ 41 | if [ "$train" == "true" ]; then 42 | f='train2017.zip' # 19G, 118k images 43 | echo 'Downloading' $url$f '...' 44 | curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f & 45 | fi 46 | if [ "$val" == "true" ]; then 47 | f='val2017.zip' # 1G, 5k images 48 | echo 'Downloading' $url$f '...' 49 | curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f & 50 | fi 51 | wait # finish background tasks 52 | -------------------------------------------------------------------------------- /data/scripts/get_imagenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 3 | # Download ILSVRC2012 ImageNet dataset https://image-net.org 4 | # Example usage: bash data/scripts/get_imagenet.sh 5 | # parent 6 | # ├── yolov5 7 | # └── datasets 8 | # └── imagenet ← downloads here 9 | 10 | # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val 11 | if [ "$#" -gt 0 ]; then 12 | for opt in "$@"; do 13 | case "${opt}" in 14 | --train) train=true ;; 15 | --val) val=true ;; 16 | esac 17 | done 18 | else 19 | train=true 20 | val=true 21 | fi 22 | 23 | # Make dir 24 | d='../datasets/imagenet' # unzip directory 25 | mkdir -p $d && cd $d 26 | 27 | # Download/unzip train 28 | if [ "$train" == "true" ]; then 29 | wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images 30 | mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train 31 | tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar 32 | find . -name "*.tar" | while read NAME; do 33 | mkdir -p "${NAME%.tar}" 34 | tar -xf "${NAME}" -C "${NAME%.tar}" 35 | rm -f "${NAME}" 36 | done 37 | cd .. 38 | fi 39 | 40 | # Download/unzip val 41 | if [ "$val" == "true" ]; then 42 | wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images 43 | mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar 44 | wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs 45 | fi 46 | 47 | # Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail) 48 | # rm train/n04266014/n04266014_10835.JPEG 49 | 50 | # TFRecords (optional) 51 | # wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt 52 | -------------------------------------------------------------------------------- /football1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RizwanMunawar/yolov7-segmentation/ae97fd0aac67f774ef89d91c2a6302fc7551e90f/football1.mp4 -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Experimental modules 4 | """ 5 | import math 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from utils.downloads import attempt_download 12 | 13 | 14 | class Sum(nn.Module): 15 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 16 | def __init__(self, n, weight=False): # n: number of inputs 17 | super().__init__() 18 | self.weight = weight # apply weights boolean 19 | self.iter = range(n - 1) # iter object 20 | if weight: 21 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights 22 | 23 | def forward(self, x): 24 | y = x[0] # no weight 25 | if self.weight: 26 | w = torch.sigmoid(self.w) * 2 27 | for i in self.iter: 28 | y = y + x[i + 1] * w[i] 29 | else: 30 | for i in self.iter: 31 | y = y + x[i + 1] 32 | return y 33 | 34 | 35 | class MixConv2d(nn.Module): 36 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 37 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy 38 | super().__init__() 39 | n = len(k) # number of convolutions 40 | if equal_ch: # equal c_ per group 41 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices 42 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels 43 | else: # equal weight.numel() per group 44 | b = [c2] + [0] * n 45 | a = np.eye(n + 1, n, k=-1) 46 | a -= np.roll(a, 1, axis=1) 47 | a *= np.array(k) ** 2 48 | a[0] = 1 49 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 50 | 51 | self.m = nn.ModuleList([ 52 | nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) 53 | self.bn = nn.BatchNorm2d(c2) 54 | self.act = nn.SiLU() 55 | 56 | def forward(self, x): 57 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 58 | 59 | 60 | class Ensemble(nn.ModuleList): 61 | # Ensemble of models 62 | def __init__(self): 63 | super().__init__() 64 | 65 | def forward(self, x, augment=False, profile=False, visualize=False): 66 | y = [module(x, augment, profile, visualize)[0] for module in self] 67 | # y = torch.stack(y).max(0)[0] # max ensemble 68 | # y = torch.stack(y).mean(0) # mean ensemble 69 | y = torch.cat(y, 1) # nms ensemble 70 | return y, None # inference, train output 71 | 72 | 73 | def attempt_load(weights, device=None, inplace=True, fuse=True): 74 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 75 | from models.yolo import Detect, Model 76 | 77 | model = Ensemble() 78 | for w in weights if isinstance(weights, list) else [weights]: 79 | ckpt = torch.load(attempt_download(w), map_location='cpu') # load 80 | ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model 81 | 82 | # Model compatibility updates 83 | if not hasattr(ckpt, 'stride'): 84 | ckpt.stride = torch.tensor([32.]) 85 | if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): 86 | ckpt.names = dict(enumerate(ckpt.names)) # convert to dict 87 | 88 | model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode 89 | 90 | # Module compatibility updates 91 | for m in model.modules(): 92 | t = type(m) 93 | if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): 94 | m.inplace = inplace # torch 1.7.0 compatibility 95 | if t is Detect and not isinstance(m.anchor_grid, list): 96 | delattr(m, 'anchor_grid') 97 | setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) 98 | elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): 99 | m.recompute_scale_factor = None # torch 1.11.0 compatibility 100 | 101 | # Return model 102 | if len(model) == 1: 103 | return model[-1] 104 | 105 | # Return detection ensemble 106 | print(f'Ensemble created with {weights}\n') 107 | for k in 'names', 'nc', 'yaml': 108 | setattr(model, k, getattr(model[0], k)) 109 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 110 | assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' 111 | return model 112 | -------------------------------------------------------------------------------- /models/segment/yolov7-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv7 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # YOLOv7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [[-1, -3, -5, -6], 1, Concat, [1]], 28 | [-1, 1, Conv, [256, 1, 1]], # 11 29 | 30 | [-1, 1, MP, []], 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-3, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 2]], 34 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [[-1, -3, -5, -6], 1, Concat, [1]], 42 | [-1, 1, Conv, [512, 1, 1]], # 24 43 | 44 | [-1, 1, MP, []], 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-3, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 49 | [-1, 1, Conv, [256, 1, 1]], 50 | [-2, 1, Conv, [256, 1, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [[-1, -3, -5, -6], 1, Concat, [1]], 56 | [-1, 1, Conv, [1024, 1, 1]], # 37 57 | 58 | [-1, 1, MP, []], 59 | [-1, 1, Conv, [512, 1, 1]], 60 | [-3, 1, Conv, [512, 1, 1]], 61 | [-1, 1, Conv, [512, 3, 2]], 62 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 63 | [-1, 1, Conv, [256, 1, 1]], 64 | [-2, 1, Conv, [256, 1, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [-1, 1, Conv, [256, 3, 1]], 67 | [-1, 1, Conv, [256, 3, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [[-1, -3, -5, -6], 1, Concat, [1]], 70 | [-1, 1, Conv, [1024, 1, 1]], # 50 71 | ] 72 | 73 | # yolov7 head 74 | head: 75 | [[-1, 1, SPPCSPC, [512]], # 51 76 | 77 | [-1, 1, Conv, [256, 1, 1]], 78 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 79 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 80 | [[-1, -2], 1, Concat, [1]], 81 | 82 | [-1, 1, Conv, [256, 1, 1]], 83 | [-2, 1, Conv, [256, 1, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [-1, 1, Conv, [128, 3, 1]], 86 | [-1, 1, Conv, [128, 3, 1]], 87 | [-1, 1, Conv, [128, 3, 1]], 88 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 89 | [-1, 1, Conv, [256, 1, 1]], # 63 90 | 91 | [-1, 1, Conv, [128, 1, 1]], 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 94 | [[-1, -2], 1, Concat, [1]], 95 | 96 | [-1, 1, Conv, [128, 1, 1]], 97 | [-2, 1, Conv, [128, 1, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [-1, 1, Conv, [64, 3, 1]], 100 | [-1, 1, Conv, [64, 3, 1]], 101 | [-1, 1, Conv, [64, 3, 1]], 102 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 103 | [-1, 1, Conv, [128, 1, 1]], # 75 104 | 105 | [-1, 1, MP, []], 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-3, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 2]], 109 | [[-1, -3, 63], 1, Concat, [1]], 110 | 111 | [-1, 1, Conv, [256, 1, 1]], 112 | [-2, 1, Conv, [256, 1, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [-1, 1, Conv, [128, 3, 1]], 115 | [-1, 1, Conv, [128, 3, 1]], 116 | [-1, 1, Conv, [128, 3, 1]], 117 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 118 | [-1, 1, Conv, [256, 1, 1]], # 88 119 | 120 | [-1, 1, MP, []], 121 | [-1, 1, Conv, [256, 1, 1]], 122 | [-3, 1, Conv, [256, 1, 1]], 123 | [-1, 1, Conv, [256, 3, 2]], 124 | [[-1, -3, 51], 1, Concat, [1]], 125 | 126 | [-1, 1, Conv, [512, 1, 1]], 127 | [-2, 1, Conv, [512, 1, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [-1, 1, Conv, [256, 3, 1]], 132 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 133 | [-1, 1, Conv, [512, 1, 1]], # 101 134 | 135 | [75, 1, Conv, [256, 3, 1]], 136 | [88, 1, Conv, [512, 3, 1]], 137 | [101, 1, Conv, [1024, 3, 1]], 138 | 139 | [[102, 103, 104], 1, ISegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 140 | ] 141 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Base ---------------------------------------- 2 | matplotlib>=3.2.2 3 | numpy>=1.18.5 4 | opencv-python>=4.1.1 5 | Pillow>=7.1.2 6 | PyYAML>=5.3.1 7 | requests>=2.23.0 8 | scipy>=1.4.1 9 | torch>=1.7.0 10 | torchvision>=0.8.1 11 | tqdm>=4.64.0 12 | protobuf<=3.21.6 13 | 14 | # Logging ------------------------------------- 15 | tensorboard>=2.4.1 16 | # wandb 17 | # clearml 18 | 19 | # Plotting ------------------------------------ 20 | pandas>=1.1.4 21 | seaborn>=0.11.0 22 | 23 | #Tracking ..................................... 24 | filterpy 25 | scikit-image 26 | 27 | # Export -------------------------------------- 28 | # coremltools>=5.2 # CoreML export 29 | # onnx>=1.9.0 # ONNX export 30 | # onnx-simplifier>=0.4.1 # ONNX simplifier 31 | # nvidia-pyindex # TensorRT export 32 | # nvidia-tensorrt # TensorRT export 33 | # scikit-learn==0.19.2 # CoreML quantization 34 | # tensorflow>=2.4.1 # TFLite export (or tensorflow-cpu, tensorflow-aarch64) 35 | # tensorflowjs>=3.9.0 # TF.js export 36 | # openvino-dev # OpenVINO export 37 | 38 | # Extras -------------------------------------- 39 | ipython # interactive notebook 40 | psutil # system utilization 41 | thop>=0.1.1 # FLOPs computation 42 | # albumentations>=1.0.3 43 | # pycocotools>=2.0 # COCO mAP 44 | # roboflow 45 | -------------------------------------------------------------------------------- /scripts/get_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # COCO 2017 dataset http://cocodataset.org 3 | # Download command: bash ./scripts/get_coco.sh 4 | 5 | # Download/unzip labels 6 | d='./' # unzip directory 7 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 8 | f='coco2017labels-segments.zip' # or 'coco2017labels.zip', 68 MB 9 | echo 'Downloading' $url$f ' ...' 10 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background 11 | 12 | # Download/unzip images 13 | d='./coco/images' # unzip directory 14 | url=http://images.cocodataset.org/zips/ 15 | f1='train2017.zip' # 19G, 118k images 16 | f2='val2017.zip' # 1G, 5k images 17 | f3='test2017.zip' # 7G, 41k images (optional) 18 | for f in $f1 $f2 $f3; do 19 | echo 'Downloading' $url$f '...' 20 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background 21 | done 22 | wait # finish background tasks 23 | -------------------------------------------------------------------------------- /segment/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import platform 4 | import sys 5 | from pathlib import Path 6 | 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | 10 | #..... Tracker modules...... 11 | import skimage 12 | from sort_count import * 13 | import numpy as np 14 | #........................... 15 | 16 | 17 | FILE = Path(__file__).resolve() 18 | ROOT = FILE.parents[1] # YOLOv5 root directory 19 | if str(ROOT) not in sys.path: 20 | sys.path.append(str(ROOT)) # add ROOT to PATH 21 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative 22 | 23 | from models.common import DetectMultiBackend 24 | from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams 25 | from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, 26 | increment_path, non_max_suppression,scale_segments, print_args, scale_coords, strip_optimizer, xyxy2xywh) 27 | from utils.plots import Annotator, colors, save_one_box 28 | from utils.segment.general import process_mask, scale_masks, masks2segments 29 | from utils.segment.plots import plot_masks 30 | from utils.torch_utils import select_device, smart_inference_mode 31 | 32 | 33 | @smart_inference_mode() 34 | def run( 35 | weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) 36 | source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam 37 | data=ROOT / 'data/coco128.yaml', # dataset.yaml path 38 | imgsz=(640, 640), # inference size (height, width) 39 | conf_thres=0.25, # confidence threshold 40 | iou_thres=0.45, # NMS IOU threshold 41 | max_det=1000, # maximum detections per image 42 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 43 | view_img=False, # show results 44 | save_txt=False, # save results to *.txt 45 | save_conf=False, # save confidences in --save-txt labels 46 | save_crop=False, # save cropped prediction boxes 47 | nosave=False, # do not save images/videos 48 | classes=None, # filter by class: --class 0, or --class 0 2 3 49 | agnostic_nms=False, # class-agnostic NMS 50 | augment=False, # augmented inference 51 | visualize=False, # visualize features 52 | update=False, # update all models 53 | project=ROOT / 'runs/predict-seg', # save results to project/name 54 | name='exp', # save results to project/name 55 | exist_ok=False, # existing project/name ok, do not increment 56 | line_thickness=3, # bounding box thickness (pixels) 57 | hide_labels=False, # hide labels 58 | hide_conf=False, # hide confidences 59 | half=False, # use FP16 half-precision inference 60 | dnn=False, # use OpenCV DNN for ONNX inference 61 | trk = False, 62 | ): 63 | 64 | #.... Initialize SORT .... 65 | 66 | sort_max_age = 5 67 | sort_min_hits = 2 68 | sort_iou_thresh = 0.2 69 | sort_tracker = Sort(max_age=sort_max_age, 70 | min_hits=sort_min_hits, 71 | iou_threshold=sort_iou_thresh) 72 | #......................... 73 | 74 | source = str(source) 75 | save_img = not nosave and not source.endswith('.txt') # save inference images 76 | is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) 77 | is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) 78 | webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) 79 | if is_url and is_file: 80 | source = check_file(source) # download 81 | 82 | # Directories 83 | save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run 84 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 85 | 86 | # Load model 87 | device = select_device(device) 88 | model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) 89 | stride, names, pt = model.stride, model.names, model.pt 90 | imgsz = check_img_size(imgsz, s=stride) # check image size 91 | 92 | # Dataloader 93 | if webcam: 94 | view_img = check_imshow() 95 | cudnn.benchmark = True # set True to speed up constant image size inference 96 | dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) 97 | bs = len(dataset) # batch_size 98 | else: 99 | dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) 100 | bs = 1 # batch_size 101 | vid_path, vid_writer = [None] * bs, [None] * bs 102 | 103 | # Run inference 104 | model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup 105 | seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) 106 | for path, im, im0s, vid_cap, s in dataset: 107 | with dt[0]: 108 | im = torch.from_numpy(im).to(device) 109 | im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 110 | im /= 255 # 0 - 255 to 0.0 - 1.0 111 | if len(im.shape) == 3: 112 | im = im[None] # expand for batch dim 113 | 114 | # Inference 115 | with dt[1]: 116 | visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False 117 | pred, out = model(im, augment=augment, visualize=visualize) 118 | proto = out[1] 119 | 120 | # NMS 121 | with dt[2]: 122 | pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32) 123 | 124 | # Second-stage classifier (optional) 125 | # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) 126 | 127 | # Process predictions 128 | for i, det in enumerate(pred): # per image 129 | seen += 1 130 | if webcam: # batch_size >= 1 131 | p, im0, frame = path[i], im0s[i].copy(), dataset.count 132 | s += f'{i}: ' 133 | else: 134 | p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) 135 | 136 | p = Path(p) # to Path 137 | save_path = str(save_dir / p.name) # im.jpg 138 | txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt 139 | s += '%gx%g ' % im.shape[2:] # print string 140 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 141 | imc = im0.copy() if save_crop else im0 # for save_crop 142 | annotator = Annotator(im0, line_width=line_thickness, example=str(names)) 143 | if len(det): 144 | masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC 145 | 146 | # Rescale boxes from img_size to im0 size 147 | det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() 148 | 149 | # Segments 150 | if save_txt: 151 | segments = reversed(masks2segments(masks)) 152 | segments = [scale_segments(im.shape[2:], x, im0.shape).round() for x in segments] 153 | 154 | # Print results 155 | for c in det[:, 5].unique(): 156 | n = (det[:, 5] == c).sum() # detections per class 157 | s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string 158 | 159 | # Mask plotting ---------------------------------------------------------------------------------------- 160 | mcolors = [colors(int(cls), True) for cls in det[:, 5]] 161 | im_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) 162 | annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape) # scale to original h, w 163 | # Mask plotting ---------------------------------------------------------------------------------------- 164 | 165 | if trk: 166 | #Tracking ---------------------------------------------------- 167 | dets_to_sort = np.empty((0,6)) 168 | for x1,y1,x2,y2,conf,detclass in det[:, :6].cpu().detach().numpy(): 169 | dets_to_sort = np.vstack((dets_to_sort, 170 | np.array([x1, y1, x2, y2, 171 | conf, detclass]))) 172 | 173 | tracked_dets = sort_tracker.update(dets_to_sort) 174 | tracks =sort_tracker.getTrackers() 175 | 176 | for track in tracks: 177 | annotator.draw_trk(line_thickness,track) 178 | 179 | if len(tracked_dets)>0: 180 | bbox_xyxy = tracked_dets[:,:4] 181 | identities = tracked_dets[:, 8] 182 | categories = tracked_dets[:, 4] 183 | annotator.draw_id(bbox_xyxy, identities, categories, names) 184 | 185 | # Write results 186 | for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])): 187 | if save_txt: # Write to file 188 | segj = segments[j].reshape(-1) # (n,2) to (n*2) 189 | line = (cls, *segj, conf) if save_conf else (cls, *segj) # label format 190 | with open(f'{txt_path}.txt', 'a') as f: 191 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 192 | 193 | if save_img or save_crop or view_img: # Add bbox to image 194 | c = int(cls) # integer class 195 | label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') 196 | annotator.box_label(xyxy, label, color=colors(c, True)) 197 | if save_crop: 198 | save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) 199 | 200 | # Stream results 201 | im0 = annotator.result() 202 | if view_img: 203 | if platform.system() == 'Linux' and p not in windows: 204 | windows.append(p) 205 | cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) 206 | cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) 207 | cv2.imshow(str(p), im0) 208 | cv2.waitKey(1) # 1 millisecond 209 | 210 | # Save results (image with detections) 211 | if save_img: 212 | if dataset.mode == 'image': 213 | cv2.imwrite(save_path, im0) 214 | else: # 'video' or 'stream' 215 | if vid_path[i] != save_path: # new video 216 | vid_path[i] = save_path 217 | if isinstance(vid_writer[i], cv2.VideoWriter): 218 | vid_writer[i].release() # release previous video writer 219 | if vid_cap: # video 220 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 221 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 222 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 223 | else: # stream 224 | fps, w, h = 30, im0.shape[1], im0.shape[0] 225 | save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos 226 | vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) 227 | vid_writer[i].write(im0) 228 | 229 | # Print time (inference-only) 230 | LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") 231 | 232 | # Print results 233 | t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image 234 | LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) 235 | if save_txt or save_img: 236 | s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' 237 | LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") 238 | if update: 239 | strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) 240 | 241 | 242 | def parse_opt(): 243 | parser = argparse.ArgumentParser() 244 | parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') 245 | parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') 246 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') 247 | parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') 248 | parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') 249 | parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') 250 | parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') 251 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 252 | parser.add_argument('--view-img', action='store_true', help='show results') 253 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 254 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 255 | parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') 256 | parser.add_argument('--nosave', action='store_true', help='do not save images/videos') 257 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') 258 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 259 | parser.add_argument('--augment', action='store_true', help='augmented inference') 260 | parser.add_argument('--visualize', action='store_true', help='visualize features') 261 | parser.add_argument('--update', action='store_true', help='update all models') 262 | parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name') 263 | parser.add_argument('--name', default='exp', help='save results to project/name') 264 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 265 | parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') 266 | parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') 267 | parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') 268 | parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') 269 | parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') 270 | parser.add_argument('--trk', action='store_true', help='Apply Sort Tracking') 271 | opt = parser.parse_args() 272 | opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand 273 | print_args(vars(opt)) 274 | return opt 275 | 276 | 277 | def main(opt): 278 | check_requirements(exclude=('tensorboard', 'thop')) 279 | run(**vars(opt)) 280 | 281 | 282 | if __name__ == "__main__": 283 | opt = parse_opt() 284 | main(opt) 285 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | utils/initialization 4 | """ 5 | 6 | import contextlib 7 | import threading 8 | 9 | 10 | class TryExcept(contextlib.ContextDecorator): 11 | # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager 12 | def __init__(self, msg='default message here'): 13 | self.msg = msg 14 | 15 | def __enter__(self): 16 | pass 17 | 18 | def __exit__(self, exc_type, value, traceback): 19 | if value: 20 | print(f'{self.msg}: {value}') 21 | return True 22 | 23 | 24 | def threaded(func): 25 | # Multi-threads a target function and returns thread. Usage: @threaded decorator 26 | def wrapper(*args, **kwargs): 27 | thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) 28 | thread.start() 29 | return thread 30 | 31 | return wrapper 32 | 33 | 34 | def notebook_init(verbose=True): 35 | # Check system software and hardware 36 | print('Checking setup...') 37 | 38 | import os 39 | import shutil 40 | 41 | from utils.general import check_requirements, emojis, is_colab 42 | from utils.torch_utils import select_device # imports 43 | 44 | check_requirements(('psutil', 'IPython')) 45 | import psutil 46 | from IPython import display # to display images and clear console output 47 | 48 | if is_colab(): 49 | shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory 50 | 51 | # System info 52 | if verbose: 53 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 54 | ram = psutil.virtual_memory().total 55 | total, used, free = shutil.disk_usage("/") 56 | display.clear_output() 57 | s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' 58 | else: 59 | s = '' 60 | 61 | select_device(newline=False) 62 | print(emojis(f'Setup complete ✅ {s}')) 63 | return display 64 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Activation functions 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class SiLU(nn.Module): 12 | # SiLU activation https://arxiv.org/pdf/1606.08415.pdf 13 | @staticmethod 14 | def forward(x): 15 | return x * torch.sigmoid(x) 16 | 17 | 18 | class Hardswish(nn.Module): 19 | # Hard-SiLU activation 20 | @staticmethod 21 | def forward(x): 22 | # return x * F.hardsigmoid(x) # for TorchScript and CoreML 23 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX 24 | 25 | 26 | class Mish(nn.Module): 27 | # Mish activation https://github.com/digantamisra98/Mish 28 | @staticmethod 29 | def forward(x): 30 | return x * F.softplus(x).tanh() 31 | 32 | 33 | class MemoryEfficientMish(nn.Module): 34 | # Mish activation memory-efficient 35 | class F(torch.autograd.Function): 36 | 37 | @staticmethod 38 | def forward(ctx, x): 39 | ctx.save_for_backward(x) 40 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 41 | 42 | @staticmethod 43 | def backward(ctx, grad_output): 44 | x = ctx.saved_tensors[0] 45 | sx = torch.sigmoid(x) 46 | fx = F.softplus(x).tanh() 47 | return grad_output * (fx + x * sx * (1 - fx * fx)) 48 | 49 | def forward(self, x): 50 | return self.F.apply(x) 51 | 52 | 53 | class FReLU(nn.Module): 54 | # FReLU activation https://arxiv.org/abs/2007.11824 55 | def __init__(self, c1, k=3): # ch_in, kernel 56 | super().__init__() 57 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 58 | self.bn = nn.BatchNorm2d(c1) 59 | 60 | def forward(self, x): 61 | return torch.max(x, self.bn(self.conv(x))) 62 | 63 | 64 | class AconC(nn.Module): 65 | r""" ACON activation (activate or not) 66 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 67 | according to "Activate or Not: Learning Customized Activation" . 68 | """ 69 | 70 | def __init__(self, c1): 71 | super().__init__() 72 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 73 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 74 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 75 | 76 | def forward(self, x): 77 | dpx = (self.p1 - self.p2) * x 78 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 79 | 80 | 81 | class MetaAconC(nn.Module): 82 | r""" ACON activation (activate or not) 83 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 84 | according to "Activate or Not: Learning Customized Activation" . 85 | """ 86 | 87 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 88 | super().__init__() 89 | c2 = max(r, c1 // r) 90 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 91 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 92 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 93 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 94 | # self.bn1 = nn.BatchNorm2d(c2) 95 | # self.bn2 = nn.BatchNorm2d(c1) 96 | 97 | def forward(self, x): 98 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 99 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 100 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 101 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 102 | dpx = (self.p1 - self.p2) * x 103 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 104 | -------------------------------------------------------------------------------- /utils/add_nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnx 3 | from onnx import shape_inference 4 | try: 5 | import onnx_graphsurgeon as gs 6 | except Exception as e: 7 | print('Import onnx_graphsurgeon failure: %s' % e) 8 | 9 | import logging 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | class RegisterNMS(object): 14 | def __init__( 15 | self, 16 | onnx_model_path: str, 17 | precision: str = "fp32", 18 | ): 19 | 20 | self.graph = gs.import_onnx(onnx.load(onnx_model_path)) 21 | assert self.graph 22 | LOGGER.info("ONNX graph created successfully") 23 | # Fold constants via ONNX-GS that PyTorch2ONNX may have missed 24 | self.graph.fold_constants() 25 | self.precision = precision 26 | self.batch_size = 1 27 | def infer(self): 28 | """ 29 | Sanitize the graph by cleaning any unconnected nodes, do a topological resort, 30 | and fold constant inputs values. When possible, run shape inference on the 31 | ONNX graph to determine tensor shapes. 32 | """ 33 | for _ in range(3): 34 | count_before = len(self.graph.nodes) 35 | 36 | self.graph.cleanup().toposort() 37 | try: 38 | for node in self.graph.nodes: 39 | for o in node.outputs: 40 | o.shape = None 41 | model = gs.export_onnx(self.graph) 42 | model = shape_inference.infer_shapes(model) 43 | self.graph = gs.import_onnx(model) 44 | except Exception as e: 45 | LOGGER.info(f"Shape inference could not be performed at this time:\n{e}") 46 | try: 47 | self.graph.fold_constants(fold_shapes=True) 48 | except TypeError as e: 49 | LOGGER.error( 50 | "This version of ONNX GraphSurgeon does not support folding shapes, " 51 | f"please upgrade your onnx_graphsurgeon module. Error:\n{e}" 52 | ) 53 | raise 54 | 55 | count_after = len(self.graph.nodes) 56 | if count_before == count_after: 57 | # No new folding occurred in this iteration, so we can stop for now. 58 | break 59 | 60 | def save(self, output_path): 61 | """ 62 | Save the ONNX model to the given location. 63 | Args: 64 | output_path: Path pointing to the location where to write 65 | out the updated ONNX model. 66 | """ 67 | self.graph.cleanup().toposort() 68 | model = gs.export_onnx(self.graph) 69 | onnx.save(model, output_path) 70 | LOGGER.info(f"Saved ONNX model to {output_path}") 71 | 72 | def register_nms( 73 | self, 74 | *, 75 | score_thresh: float = 0.25, 76 | nms_thresh: float = 0.45, 77 | detections_per_img: int = 100, 78 | ): 79 | """ 80 | Register the ``EfficientNMS_TRT`` plugin node. 81 | NMS expects these shapes for its input tensors: 82 | - box_net: [batch_size, number_boxes, 4] 83 | - class_net: [batch_size, number_boxes, number_labels] 84 | Args: 85 | score_thresh (float): The scalar threshold for score (low scoring boxes are removed). 86 | nms_thresh (float): The scalar threshold for IOU (new boxes that have high IOU 87 | overlap with previously selected boxes are removed). 88 | detections_per_img (int): Number of best detections to keep after NMS. 89 | """ 90 | 91 | self.infer() 92 | # Find the concat node at the end of the network 93 | op_inputs = self.graph.outputs 94 | op = "EfficientNMS_TRT" 95 | attrs = { 96 | "plugin_version": "1", 97 | "background_class": -1, # no background class 98 | "max_output_boxes": detections_per_img, 99 | "score_threshold": score_thresh, 100 | "iou_threshold": nms_thresh, 101 | "score_activation": False, 102 | "box_coding": 0, 103 | } 104 | 105 | if self.precision == "fp32": 106 | dtype_output = np.float32 107 | elif self.precision == "fp16": 108 | dtype_output = np.float16 109 | else: 110 | raise NotImplementedError(f"Currently not supports precision: {self.precision}") 111 | 112 | # NMS Outputs 113 | output_num_detections = gs.Variable( 114 | name="num_detections", 115 | dtype=np.int32, 116 | shape=[self.batch_size, 1], 117 | ) # A scalar indicating the number of valid detections per batch image. 118 | output_boxes = gs.Variable( 119 | name="detection_boxes", 120 | dtype=dtype_output, 121 | shape=[self.batch_size, detections_per_img, 4], 122 | ) 123 | output_scores = gs.Variable( 124 | name="detection_scores", 125 | dtype=dtype_output, 126 | shape=[self.batch_size, detections_per_img], 127 | ) 128 | output_labels = gs.Variable( 129 | name="detection_classes", 130 | dtype=np.int32, 131 | shape=[self.batch_size, detections_per_img], 132 | ) 133 | 134 | op_outputs = [output_num_detections, output_boxes, output_scores, output_labels] 135 | 136 | # Create the NMS Plugin node with the selected inputs. The outputs of the node will also 137 | # become the final outputs of the graph. 138 | self.graph.layer(op=op, name="batched_nms", inputs=op_inputs, outputs=op_outputs, attrs=attrs) 139 | LOGGER.info(f"Created NMS plugin '{op}' with attributes: {attrs}") 140 | 141 | self.graph.outputs = op_outputs 142 | 143 | self.infer() 144 | 145 | def save(self, output_path): 146 | """ 147 | Save the ONNX model to the given location. 148 | Args: 149 | output_path: Path pointing to the location where to write 150 | out the updated ONNX model. 151 | """ 152 | self.graph.cleanup().toposort() 153 | model = gs.export_onnx(self.graph) 154 | onnx.save(model, output_path) 155 | LOGGER.info(f"Saved ONNX model to {output_path}") 156 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | AutoAnchor utils 4 | """ 5 | 6 | import random 7 | 8 | import numpy as np 9 | import torch 10 | import yaml 11 | from tqdm import tqdm 12 | 13 | from utils.general import LOGGER, colorstr 14 | 15 | PREFIX = colorstr('AutoAnchor: ') 16 | 17 | 18 | def check_anchor_order(m): 19 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 20 | a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer 21 | da = a[-1] - a[0] # delta a 22 | ds = m.stride[-1] - m.stride[0] # delta s 23 | if da and (da.sign() != ds.sign()): # same order 24 | LOGGER.info(f'{PREFIX}Reversing anchor order') 25 | m.anchors[:] = m.anchors.flip(0) 26 | 27 | 28 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 29 | # Check anchor fit to data, recompute if necessary 30 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 31 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 32 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 33 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 34 | 35 | def metric(k): # compute metric 36 | r = wh[:, None] / k[None] 37 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 38 | best = x.max(1)[0] # best_x 39 | aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold 40 | bpr = (best > 1 / thr).float().mean() # best possible recall 41 | return bpr, aat 42 | 43 | stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides 44 | anchors = m.anchors.clone() * stride # current anchors 45 | bpr, aat = metric(anchors.cpu().view(-1, 2)) 46 | s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' 47 | if bpr > 0.98: # threshold to recompute 48 | LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅') 49 | else: 50 | LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...') 51 | na = m.anchors.numel() // 2 # number of anchors 52 | try: 53 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 54 | except Exception as e: 55 | LOGGER.info(f'{PREFIX}ERROR: {e}') 56 | new_bpr = metric(anchors)[0] 57 | if new_bpr > bpr: # replace anchors 58 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 59 | m.anchors[:] = anchors.clone().view_as(m.anchors) 60 | check_anchor_order(m) # must be in pixel-space (not grid-space) 61 | m.anchors /= stride 62 | s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' 63 | else: 64 | s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' 65 | LOGGER.info(s) 66 | 67 | 68 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 69 | """ Creates kmeans-evolved anchors from training dataset 70 | 71 | Arguments: 72 | dataset: path to data.yaml, or a loaded dataset 73 | n: number of anchors 74 | img_size: image size used for training 75 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 76 | gen: generations to evolve anchors using genetic algorithm 77 | verbose: print all results 78 | 79 | Return: 80 | k: kmeans evolved anchors 81 | 82 | Usage: 83 | from utils.autoanchor import *; _ = kmean_anchors() 84 | """ 85 | from scipy.cluster.vq import kmeans 86 | 87 | npr = np.random 88 | thr = 1 / thr 89 | 90 | def metric(k, wh): # compute metrics 91 | r = wh[:, None] / k[None] 92 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 93 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 94 | return x, x.max(1)[0] # x, best_x 95 | 96 | def anchor_fitness(k): # mutation fitness 97 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 98 | return (best * (best > thr).float()).mean() # fitness 99 | 100 | def print_results(k, verbose=True): 101 | k = k[np.argsort(k.prod(1))] # sort small to large 102 | x, best = metric(k, wh0) 103 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 104 | s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \ 105 | f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \ 106 | f'past_thr={x[x > thr].mean():.3f}-mean: ' 107 | for x in k: 108 | s += '%i,%i, ' % (round(x[0]), round(x[1])) 109 | if verbose: 110 | LOGGER.info(s[:-2]) 111 | return k 112 | 113 | if isinstance(dataset, str): # *.yaml file 114 | with open(dataset, errors='ignore') as f: 115 | data_dict = yaml.safe_load(f) # model dict 116 | from utils.dataloaders import LoadImagesAndLabels 117 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 118 | 119 | # Get label wh 120 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 121 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 122 | 123 | # Filter 124 | i = (wh0 < 3.0).any(1).sum() 125 | if i: 126 | LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found: {i} of {len(wh0)} labels are < 3 pixels in size') 127 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 128 | # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 129 | 130 | # Kmeans init 131 | try: 132 | LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') 133 | assert n <= len(wh) # apply overdetermined constraint 134 | s = wh.std(0) # sigmas for whitening 135 | k = kmeans(wh / s, n, iter=30)[0] * s # points 136 | assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar 137 | except Exception: 138 | LOGGER.warning(f'{PREFIX}WARNING: switching strategies from kmeans to random init') 139 | k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init 140 | wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) 141 | k = print_results(k, verbose=False) 142 | 143 | # Plot 144 | # k, d = [None] * 20, [None] * 20 145 | # for i in tqdm(range(1, 21)): 146 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 147 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 148 | # ax = ax.ravel() 149 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 150 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 151 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 152 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 153 | # fig.savefig('wh.png', dpi=200) 154 | 155 | # Evolve 156 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 157 | pbar = tqdm(range(gen), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar 158 | for _ in pbar: 159 | v = np.ones(sh) 160 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 161 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 162 | kg = (k.copy() * v).clip(min=2.0) 163 | fg = anchor_fitness(kg) 164 | if fg > f: 165 | f, k = fg, kg.copy() 166 | pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 167 | if verbose: 168 | print_results(k, verbose) 169 | 170 | return print_results(k) 171 | -------------------------------------------------------------------------------- /utils/autobatch.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Auto-batch utils 4 | """ 5 | 6 | from copy import deepcopy 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from utils.general import LOGGER, colorstr 12 | from utils.torch_utils import profile 13 | 14 | 15 | def check_train_batch_size(model, imgsz=640, amp=True): 16 | # Check YOLOv5 training batch size 17 | with torch.cuda.amp.autocast(amp): 18 | return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size 19 | 20 | 21 | def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): 22 | # Automatically estimate best batch size to use `fraction` of available CUDA memory 23 | # Usage: 24 | # import torch 25 | # from utils.autobatch import autobatch 26 | # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) 27 | # print(autobatch(model)) 28 | 29 | # Check device 30 | prefix = colorstr('AutoBatch: ') 31 | LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') 32 | device = next(model.parameters()).device # get model device 33 | if device.type == 'cpu': 34 | LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') 35 | return batch_size 36 | 37 | # Inspect CUDA memory 38 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 39 | d = str(device).upper() # 'CUDA:0' 40 | properties = torch.cuda.get_device_properties(device) # device properties 41 | t = properties.total_memory / gb # GiB total 42 | r = torch.cuda.memory_reserved(device) / gb # GiB reserved 43 | a = torch.cuda.memory_allocated(device) / gb # GiB allocated 44 | f = t - (r + a) # GiB free 45 | LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') 46 | 47 | # Profile batch sizes 48 | batch_sizes = [1, 2, 4, 8, 16] 49 | try: 50 | img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] 51 | results = profile(img, model, n=3, device=device) 52 | except Exception as e: 53 | LOGGER.warning(f'{prefix}{e}') 54 | 55 | # Fit a solution 56 | y = [x[2] for x in results if x] # memory [2] 57 | p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit 58 | b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) 59 | if None in results: # some sizes failed 60 | i = results.index(None) # first fail index 61 | if b >= batch_sizes[i]: # y intercept above failure point 62 | b = batch_sizes[max(i - 1, 0)] # select prior safe point 63 | if b < 1: # zero or negative batch size 64 | b = 16 65 | LOGGER.warning(f'{prefix}WARNING: ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') 66 | 67 | fraction = np.polyval(p, b) / t # actual fraction predicted 68 | LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') 69 | return b 70 | -------------------------------------------------------------------------------- /utils/benchmarks.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Run YOLOv5 benchmarks on all supported export formats 4 | 5 | Format | `export.py --include` | Model 6 | --- | --- | --- 7 | PyTorch | - | yolov5s.pt 8 | TorchScript | `torchscript` | yolov5s.torchscript 9 | ONNX | `onnx` | yolov5s.onnx 10 | OpenVINO | `openvino` | yolov5s_openvino_model/ 11 | TensorRT | `engine` | yolov5s.engine 12 | CoreML | `coreml` | yolov5s.mlmodel 13 | TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/ 14 | TensorFlow GraphDef | `pb` | yolov5s.pb 15 | TensorFlow Lite | `tflite` | yolov5s.tflite 16 | TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite 17 | TensorFlow.js | `tfjs` | yolov5s_web_model/ 18 | 19 | Requirements: 20 | $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU 21 | $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU 22 | $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com # TensorRT 23 | 24 | Usage: 25 | $ python utils/benchmarks.py --weights yolov5s.pt --img 640 26 | """ 27 | 28 | import argparse 29 | import platform 30 | import sys 31 | import time 32 | from pathlib import Path 33 | 34 | import pandas as pd 35 | 36 | FILE = Path(__file__).resolve() 37 | ROOT = FILE.parents[1] # YOLOv5 root directory 38 | if str(ROOT) not in sys.path: 39 | sys.path.append(str(ROOT)) # add ROOT to PATH 40 | # ROOT = ROOT.relative_to(Path.cwd()) # relative 41 | 42 | import export 43 | import val 44 | from utils import notebook_init 45 | from utils.general import LOGGER, check_yaml, file_size, print_args 46 | from utils.torch_utils import select_device 47 | 48 | 49 | def run( 50 | weights=ROOT / 'yolov5s.pt', # weights path 51 | imgsz=640, # inference size (pixels) 52 | batch_size=1, # batch size 53 | data=ROOT / 'data/coco128.yaml', # dataset.yaml path 54 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 55 | half=False, # use FP16 half-precision inference 56 | test=False, # test exports only 57 | pt_only=False, # test PyTorch only 58 | hard_fail=False, # throw error on benchmark failure 59 | ): 60 | y, t = [], time.time() 61 | device = select_device(device) 62 | for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU) 63 | try: 64 | assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported 65 | assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML 66 | if 'cpu' in device.type: 67 | assert cpu, 'inference not supported on CPU' 68 | if 'cuda' in device.type: 69 | assert gpu, 'inference not supported on GPU' 70 | 71 | # Export 72 | if f == '-': 73 | w = weights # PyTorch format 74 | else: 75 | w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others 76 | assert suffix in str(w), 'export failed' 77 | 78 | # Validate 79 | result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half) 80 | metrics = result[0] # metrics (mp, mr, map50, map, *losses(box, obj, cls)) 81 | speeds = result[2] # times (preprocess, inference, postprocess) 82 | y.append([name, round(file_size(w), 1), round(metrics[3], 4), round(speeds[1], 2)]) # MB, mAP, t_inference 83 | except Exception as e: 84 | if hard_fail: 85 | assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}' 86 | LOGGER.warning(f'WARNING: Benchmark failure for {name}: {e}') 87 | y.append([name, None, None, None]) # mAP, t_inference 88 | if pt_only and i == 0: 89 | break # break after PyTorch 90 | 91 | # Print results 92 | LOGGER.info('\n') 93 | parse_opt() 94 | notebook_init() # print system info 95 | c = ['Format', 'Size (MB)', 'mAP@0.5:0.95', 'Inference time (ms)'] if map else ['Format', 'Export', '', ''] 96 | py = pd.DataFrame(y, columns=c) 97 | LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)') 98 | LOGGER.info(str(py if map else py.iloc[:, :2])) 99 | return py 100 | 101 | 102 | def test( 103 | weights=ROOT / 'yolov5s.pt', # weights path 104 | imgsz=640, # inference size (pixels) 105 | batch_size=1, # batch size 106 | data=ROOT / 'data/coco128.yaml', # dataset.yaml path 107 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 108 | half=False, # use FP16 half-precision inference 109 | test=False, # test exports only 110 | pt_only=False, # test PyTorch only 111 | hard_fail=False, # throw error on benchmark failure 112 | ): 113 | y, t = [], time.time() 114 | device = select_device(device) 115 | for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable) 116 | try: 117 | w = weights if f == '-' else \ 118 | export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights 119 | assert suffix in str(w), 'export failed' 120 | y.append([name, True]) 121 | except Exception: 122 | y.append([name, False]) # mAP, t_inference 123 | 124 | # Print results 125 | LOGGER.info('\n') 126 | parse_opt() 127 | notebook_init() # print system info 128 | py = pd.DataFrame(y, columns=['Format', 'Export']) 129 | LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)') 130 | LOGGER.info(str(py)) 131 | return py 132 | 133 | 134 | def parse_opt(): 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path') 137 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') 138 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 139 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 140 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 141 | parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') 142 | parser.add_argument('--test', action='store_true', help='test exports only') 143 | parser.add_argument('--pt-only', action='store_true', help='test PyTorch only') 144 | parser.add_argument('--hard-fail', action='store_true', help='throw error on benchmark failure') 145 | opt = parser.parse_args() 146 | opt.data = check_yaml(opt.data) # check YAML 147 | print_args(vars(opt)) 148 | return opt 149 | 150 | 151 | def main(opt): 152 | test(**vars(opt)) if opt.test else run(**vars(opt)) 153 | 154 | 155 | if __name__ == "__main__": 156 | opt = parse_opt() 157 | main(opt) 158 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Callback utils 4 | """ 5 | 6 | import threading 7 | 8 | 9 | class Callbacks: 10 | """" 11 | Handles all registered callbacks for YOLOv5 Hooks 12 | """ 13 | 14 | def __init__(self): 15 | # Define the available callbacks 16 | self._callbacks = { 17 | 'on_pretrain_routine_start': [], 18 | 'on_pretrain_routine_end': [], 19 | 'on_train_start': [], 20 | 'on_train_epoch_start': [], 21 | 'on_train_batch_start': [], 22 | 'optimizer_step': [], 23 | 'on_before_zero_grad': [], 24 | 'on_train_batch_end': [], 25 | 'on_train_epoch_end': [], 26 | 'on_val_start': [], 27 | 'on_val_batch_start': [], 28 | 'on_val_image_end': [], 29 | 'on_val_batch_end': [], 30 | 'on_val_end': [], 31 | 'on_fit_epoch_end': [], # fit = train + val 32 | 'on_model_save': [], 33 | 'on_train_end': [], 34 | 'on_params_update': [], 35 | 'teardown': [],} 36 | self.stop_training = False # set True to interrupt training 37 | 38 | def register_action(self, hook, name='', callback=None): 39 | """ 40 | Register a new action to a callback hook 41 | 42 | Args: 43 | hook: The callback hook name to register the action to 44 | name: The name of the action for later reference 45 | callback: The callback to fire 46 | """ 47 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 48 | assert callable(callback), f"callback '{callback}' is not callable" 49 | self._callbacks[hook].append({'name': name, 'callback': callback}) 50 | 51 | def get_registered_actions(self, hook=None): 52 | """" 53 | Returns all the registered actions by callback hook 54 | 55 | Args: 56 | hook: The name of the hook to check, defaults to all 57 | """ 58 | return self._callbacks[hook] if hook else self._callbacks 59 | 60 | def run(self, hook, *args, thread=False, **kwargs): 61 | """ 62 | Loop through the registered actions and fire all callbacks on main thread 63 | 64 | Args: 65 | hook: The name of the hook to check, defaults to all 66 | args: Arguments to receive from YOLOv5 67 | thread: (boolean) Run callbacks in daemon thread 68 | kwargs: Keyword Arguments to receive from YOLOv5 69 | """ 70 | 71 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 72 | for logger in self._callbacks[hook]: 73 | if thread: 74 | threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() 75 | else: 76 | logger['callback'](*args, **kwargs) 77 | -------------------------------------------------------------------------------- /utils/downloads.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Download utils 4 | """ 5 | 6 | import logging 7 | import os 8 | import platform 9 | import subprocess 10 | import time 11 | import urllib 12 | from pathlib import Path 13 | from zipfile import ZipFile 14 | 15 | import requests 16 | import torch 17 | 18 | 19 | def is_url(url, check_online=True): 20 | # Check if online file exists 21 | try: 22 | url = str(url) 23 | result = urllib.parse.urlparse(url) 24 | assert all([result.scheme, result.netloc, result.path]) # check if is url 25 | return (urllib.request.urlopen(url).getcode() == 200) if check_online else True # check if exists online 26 | except (AssertionError, urllib.request.HTTPError): 27 | return False 28 | 29 | 30 | def gsutil_getsize(url=''): 31 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 32 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 33 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 34 | 35 | 36 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): 37 | # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes 38 | from utils.general import LOGGER 39 | 40 | file = Path(file) 41 | assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" 42 | try: # url1 43 | LOGGER.info(f'Downloading {url} to {file}...') 44 | torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) 45 | assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check 46 | except Exception as e: # url2 47 | file.unlink(missing_ok=True) # remove partial downloads 48 | LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') 49 | os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail 50 | finally: 51 | if not file.exists() or file.stat().st_size < min_bytes: # check 52 | file.unlink(missing_ok=True) # remove partial downloads 53 | LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") 54 | LOGGER.info('') 55 | 56 | 57 | def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'): 58 | # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc. 59 | from utils.general import LOGGER 60 | 61 | def github_assets(repository, version='latest'): 62 | # Return GitHub repo tag (i.e. 'v6.2') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) 63 | if version != 'latest': 64 | version = f'tags/{version}' # i.e. tags/v6.2 65 | response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api 66 | return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets 67 | 68 | file = Path(str(file).strip().replace("'", '')) 69 | if not file.exists(): 70 | # URL specified 71 | name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. 72 | if str(file).startswith(('http:/', 'https:/')): # download 73 | url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ 74 | file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... 75 | if Path(file).is_file(): 76 | LOGGER.info(f'Found {url} locally at {file}') # file already exists 77 | else: 78 | safe_download(file=file, url=url, min_bytes=1E5) 79 | return file 80 | 81 | # GitHub assets 82 | assets = [ 83 | 'yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov5n6.pt', 'yolov5s6.pt', 84 | 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt'] 85 | try: 86 | tag, assets = github_assets(repo, release) 87 | except Exception: 88 | try: 89 | tag, assets = github_assets(repo) # latest release 90 | except Exception: 91 | try: 92 | tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] 93 | except Exception: 94 | tag = release 95 | 96 | file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) 97 | if name in assets: 98 | url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror 99 | safe_download( 100 | file, 101 | url=f'https://github.com/{repo}/releases/download/{tag}/{name}', 102 | url2=f'https://storage.googleapis.com/{repo}/{tag}/{name}', # backup url (optional) 103 | min_bytes=1E5, 104 | error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') 105 | 106 | return str(file) 107 | 108 | 109 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 110 | # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download() 111 | t = time.time() 112 | file = Path(file) 113 | cookie = Path('cookie') # gdrive cookie 114 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 115 | file.unlink(missing_ok=True) # remove existing file 116 | cookie.unlink(missing_ok=True) # remove existing cookie 117 | 118 | # Attempt file download 119 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 120 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 121 | if os.path.exists('cookie'): # large file 122 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 123 | else: # small file 124 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 125 | r = os.system(s) # execute, capture return 126 | cookie.unlink(missing_ok=True) # remove existing cookie 127 | 128 | # Error check 129 | if r != 0: 130 | file.unlink(missing_ok=True) # remove partial 131 | print('Download error ') # raise Exception('Download error') 132 | return r 133 | 134 | # Unzip if archive 135 | if file.suffix == '.zip': 136 | print('unzipping... ', end='') 137 | ZipFile(file).extractall(path=file.parent) # unzip 138 | file.unlink() # remove zip 139 | 140 | print(f'Done ({time.time() - t:.1f}s)') 141 | return r 142 | 143 | 144 | def get_token(cookie="./cookie"): 145 | with open(cookie) as f: 146 | for line in f: 147 | if "download" in line: 148 | return line.split()[-1] 149 | return "" 150 | 151 | 152 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- 153 | # 154 | # 155 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 156 | # # Uploads a file to a bucket 157 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 158 | # 159 | # storage_client = storage.Client() 160 | # bucket = storage_client.get_bucket(bucket_name) 161 | # blob = bucket.blob(destination_blob_name) 162 | # 163 | # blob.upload_from_filename(source_file_name) 164 | # 165 | # print('File {} uploaded to {}.'.format( 166 | # source_file_name, 167 | # destination_blob_name)) 168 | # 169 | # 170 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 171 | # # Uploads a blob from a bucket 172 | # storage_client = storage.Client() 173 | # bucket = storage_client.get_bucket(bucket_name) 174 | # blob = bucket.blob(source_blob_name) 175 | # 176 | # blob.download_to_filename(destination_file_name) 177 | # 178 | # print('Blob {} downloaded to {}.'.format( 179 | # source_blob_name, 180 | # destination_file_name)) 181 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo='WongKinYiu/yolov7'): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", '').lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets 27 | tag = response['tag_name'] # i.e. 'v1.0' 28 | except: # fallback plan 29 | assets = ['yolov7.pt'] 30 | tag = subprocess.check_output('git tag', shell=True).decode().split()[-1] 31 | 32 | name = file.name 33 | if name in assets: 34 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 35 | redundant = False # second download option 36 | try: # GitHub 37 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 38 | print(f'Downloading {url} to {file}...') 39 | torch.hub.download_url_to_file(url, file) 40 | assert file.exists() and file.stat().st_size > 1E6 # check 41 | except Exception as e: # GCP 42 | print(f'Download error: {e}') 43 | assert redundant, 'No secondary mirror' 44 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 45 | print(f'Downloading {url} to {file}...') 46 | os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights) 47 | finally: 48 | if not file.exists() or file.stat().st_size < 1E6: # check 49 | file.unlink(missing_ok=True) # remove partial downloads 50 | print(f'ERROR: Download failure: {msg}') 51 | print('') 52 | return 53 | 54 | 55 | def gdrive_download(id='', file='tmp.zip'): 56 | # Downloads a file from Google Drive. from yolov7.utils.google_utils import *; gdrive_download() 57 | t = time.time() 58 | file = Path(file) 59 | cookie = Path('cookie') # gdrive cookie 60 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 61 | file.unlink(missing_ok=True) # remove existing file 62 | cookie.unlink(missing_ok=True) # remove existing cookie 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 67 | if os.path.exists('cookie'): # large file 68 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 69 | else: # small file 70 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 71 | r = os.system(s) # execute, capture return 72 | cookie.unlink(missing_ok=True) # remove existing cookie 73 | 74 | # Error check 75 | if r != 0: 76 | file.unlink(missing_ok=True) # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if file.suffix == '.zip': 82 | print('unzipping... ', end='') 83 | os.system(f'unzip -q {file}') # unzip 84 | file.unlink() # remove zip to free space 85 | 86 | print(f'Done ({time.time() - t:.1f}s)') 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /utils/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Logging utils 4 | """ 5 | 6 | import os 7 | import warnings 8 | from pathlib import Path 9 | 10 | import pkg_resources as pkg 11 | import torch 12 | from torch.utils.tensorboard import SummaryWriter 13 | 14 | from utils.general import colorstr, cv2 15 | from utils.loggers.clearml.clearml_utils import ClearmlLogger 16 | from utils.loggers.wandb.wandb_utils import WandbLogger 17 | from utils.plots import plot_images, plot_labels, plot_results 18 | from utils.torch_utils import de_parallel 19 | 20 | LOGGERS = ('csv', 'tb', 'wandb', 'clearml') # *.csv, TensorBoard, Weights & Biases, ClearML 21 | RANK = int(os.getenv('RANK', -1)) 22 | 23 | try: 24 | import wandb 25 | 26 | assert hasattr(wandb, '__version__') # verify package import not local dir 27 | if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}: 28 | try: 29 | wandb_login_success = wandb.login(timeout=30) 30 | except wandb.errors.UsageError: # known non-TTY terminal issue 31 | wandb_login_success = False 32 | if not wandb_login_success: 33 | wandb = None 34 | except (ImportError, AssertionError): 35 | wandb = None 36 | 37 | try: 38 | import clearml 39 | 40 | assert hasattr(clearml, '__version__') # verify package import not local dir 41 | except (ImportError, AssertionError): 42 | clearml = None 43 | 44 | 45 | class Loggers(): 46 | # YOLOv5 Loggers class 47 | def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): 48 | self.save_dir = save_dir 49 | self.weights = weights 50 | self.opt = opt 51 | self.hyp = hyp 52 | self.plots = not opt.noplots # plot results 53 | self.logger = logger # for printing results to console 54 | self.include = include 55 | self.keys = [ 56 | 'train/box_loss', 57 | 'train/obj_loss', 58 | 'train/cls_loss', # train loss 59 | 'metrics/precision', 60 | 'metrics/recall', 61 | 'metrics/mAP_0.5', 62 | 'metrics/mAP_0.5:0.95', # metrics 63 | 'val/box_loss', 64 | 'val/obj_loss', 65 | 'val/cls_loss', # val loss 66 | 'x/lr0', 67 | 'x/lr1', 68 | 'x/lr2'] # params 69 | self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95'] 70 | for k in LOGGERS: 71 | setattr(self, k, None) # init empty logger dictionary 72 | self.csv = True # always log to csv 73 | 74 | # Messages 75 | if not wandb: 76 | prefix = colorstr('Weights & Biases: ') 77 | s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs in Weights & Biases" 78 | self.logger.info(s) 79 | if not clearml: 80 | prefix = colorstr('ClearML: ') 81 | s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML" 82 | self.logger.info(s) 83 | 84 | # TensorBoard 85 | s = self.save_dir 86 | if 'tb' in self.include and not self.opt.evolve: 87 | prefix = colorstr('TensorBoard: ') 88 | self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/") 89 | self.tb = SummaryWriter(str(s)) 90 | 91 | # W&B 92 | if wandb and 'wandb' in self.include: 93 | wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://') 94 | run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None 95 | self.opt.hyp = self.hyp # add hyperparameters 96 | self.wandb = WandbLogger(self.opt, run_id) 97 | # temp warn. because nested artifacts not supported after 0.12.10 98 | if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.11'): 99 | s = "YOLOv5 temporarily requires wandb version 0.12.10 or below. Some features may not work as expected." 100 | self.logger.warning(s) 101 | else: 102 | self.wandb = None 103 | 104 | # ClearML 105 | if clearml and 'clearml' in self.include: 106 | self.clearml = ClearmlLogger(self.opt, self.hyp) 107 | else: 108 | self.clearml = None 109 | 110 | def on_train_start(self): 111 | # Callback runs on train start 112 | pass 113 | 114 | def on_pretrain_routine_end(self, labels, names): 115 | # Callback runs on pre-train routine end 116 | if self.plots: 117 | plot_labels(labels, names, self.save_dir) 118 | paths = self.save_dir.glob('*labels*.jpg') # training labels 119 | if self.wandb: 120 | self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]}) 121 | # if self.clearml: 122 | # pass # ClearML saves these images automatically using hooks 123 | 124 | def on_train_batch_end(self, model, ni, imgs, targets, paths): 125 | # Callback runs on train batch end 126 | # ni: number integrated batches (since train start) 127 | if self.plots: 128 | if ni < 3: 129 | f = self.save_dir / f'train_batch{ni}.jpg' # filename 130 | plot_images(imgs, targets, paths, f) 131 | if ni == 0 and self.tb and not self.opt.sync_bn: 132 | log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz)) 133 | if ni == 10 and (self.wandb or self.clearml): 134 | files = sorted(self.save_dir.glob('train*.jpg')) 135 | if self.wandb: 136 | self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) 137 | if self.clearml: 138 | self.clearml.log_debug_samples(files, title='Mosaics') 139 | 140 | def on_train_epoch_end(self, epoch): 141 | # Callback runs on train epoch end 142 | if self.wandb: 143 | self.wandb.current_epoch = epoch + 1 144 | 145 | def on_val_image_end(self, pred, predn, path, names, im): 146 | # Callback runs on val image end 147 | if self.wandb: 148 | self.wandb.val_one_image(pred, predn, path, names, im) 149 | if self.clearml: 150 | self.clearml.log_image_with_boxes(path, pred, names, im) 151 | 152 | def on_val_end(self): 153 | # Callback runs on val end 154 | if self.wandb or self.clearml: 155 | files = sorted(self.save_dir.glob('val*.jpg')) 156 | if self.wandb: 157 | self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]}) 158 | if self.clearml: 159 | self.clearml.log_debug_samples(files, title='Validation') 160 | 161 | def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): 162 | # Callback runs at the end of each fit (train+val) epoch 163 | x = dict(zip(self.keys, vals)) 164 | if self.csv: 165 | file = self.save_dir / 'results.csv' 166 | n = len(x) + 1 # number of cols 167 | s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header 168 | with open(file, 'a') as f: 169 | f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') 170 | 171 | if self.tb: 172 | for k, v in x.items(): 173 | self.tb.add_scalar(k, v, epoch) 174 | elif self.clearml: # log to ClearML if TensorBoard not used 175 | for k, v in x.items(): 176 | title, series = k.split('/') 177 | self.clearml.task.get_logger().report_scalar(title, series, v, epoch) 178 | 179 | if self.wandb: 180 | if best_fitness == fi: 181 | best_results = [epoch] + vals[3:7] 182 | for i, name in enumerate(self.best_keys): 183 | self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary 184 | self.wandb.log(x) 185 | self.wandb.end_epoch(best_result=best_fitness == fi) 186 | 187 | if self.clearml: 188 | self.clearml.current_epoch_logged_images = set() # reset epoch image limit 189 | self.clearml.current_epoch += 1 190 | 191 | def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): 192 | # Callback runs on model save event 193 | if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1: 194 | if self.wandb: 195 | self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) 196 | if self.clearml: 197 | self.clearml.task.update_output_model(model_path=str(last), 198 | model_name='Latest Model', 199 | auto_delete_file=False) 200 | 201 | def on_train_end(self, last, best, epoch, results): 202 | # Callback runs on training end, i.e. saving best model 203 | if self.plots: 204 | plot_results(file=self.save_dir / 'results.csv') # save results.png 205 | files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] 206 | files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter 207 | self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}") 208 | 209 | if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles 210 | for f in files: 211 | self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') 212 | 213 | if self.wandb: 214 | self.wandb.log(dict(zip(self.keys[3:10], results))) 215 | self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) 216 | # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model 217 | if not self.opt.evolve: 218 | wandb.log_artifact(str(best if best.exists() else last), 219 | type='model', 220 | name=f'run_{self.wandb.wandb_run.id}_model', 221 | aliases=['latest', 'best', 'stripped']) 222 | self.wandb.finish_run() 223 | 224 | if self.clearml and not self.opt.evolve: 225 | self.clearml.task.update_output_model(model_path=str(best if best.exists() else last), name='Best Model') 226 | 227 | def on_params_update(self, params: dict): 228 | # Update hyperparams or configs of the experiment 229 | if self.wandb: 230 | self.wandb.wandb_run.config.update(params, allow_val_change=True) 231 | 232 | 233 | class GenericLogger: 234 | """ 235 | YOLOv5 General purpose logger for non-task specific logging 236 | Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) 237 | Arguments 238 | opt: Run arguments 239 | console_logger: Console logger 240 | include: loggers to include 241 | """ 242 | 243 | def __init__(self, opt, console_logger, include=('tb', 'wandb')): 244 | # init default loggers 245 | self.save_dir = Path(opt.save_dir) 246 | self.include = include 247 | self.console_logger = console_logger 248 | self.csv = self.save_dir / 'results.csv' # CSV logger 249 | if 'tb' in self.include: 250 | prefix = colorstr('TensorBoard: ') 251 | self.console_logger.info( 252 | f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/") 253 | self.tb = SummaryWriter(str(self.save_dir)) 254 | 255 | if wandb and 'wandb' in self.include: 256 | self.wandb = wandb.init(project=web_project_name(str(opt.project)), 257 | name=None if opt.name == "exp" else opt.name, 258 | config=opt) 259 | else: 260 | self.wandb = None 261 | 262 | def log_metrics(self, metrics, epoch): 263 | # Log metrics dictionary to all loggers 264 | if self.csv: 265 | keys, vals = list(metrics.keys()), list(metrics.values()) 266 | n = len(metrics) + 1 # number of cols 267 | s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header 268 | with open(self.csv, 'a') as f: 269 | f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') 270 | 271 | if self.tb: 272 | for k, v in metrics.items(): 273 | self.tb.add_scalar(k, v, epoch) 274 | 275 | if self.wandb: 276 | self.wandb.log(metrics, step=epoch) 277 | 278 | def log_images(self, files, name='Images', epoch=0): 279 | # Log images to all loggers 280 | files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path 281 | files = [f for f in files if f.exists()] # filter by exists 282 | 283 | if self.tb: 284 | for f in files: 285 | self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') 286 | 287 | if self.wandb: 288 | self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch) 289 | 290 | def log_graph(self, model, imgsz=(640, 640)): 291 | # Log model graph to all loggers 292 | if self.tb: 293 | log_tensorboard_graph(self.tb, model, imgsz) 294 | 295 | def log_model(self, model_path, epoch=0, metadata={}): 296 | # Log model to all loggers 297 | if self.wandb: 298 | art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata) 299 | art.add_file(str(model_path)) 300 | wandb.log_artifact(art) 301 | 302 | def update_params(self, params): 303 | # Update the paramters logged 304 | if self.wandb: 305 | wandb.run.config.update(params, allow_val_change=True) 306 | 307 | 308 | def log_tensorboard_graph(tb, model, imgsz=(640, 640)): 309 | # Log model graph to TensorBoard 310 | try: 311 | p = next(model.parameters()) # for device, type 312 | imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand 313 | im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty) 314 | with warnings.catch_warnings(): 315 | warnings.simplefilter('ignore') # suppress jit trace warning 316 | tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), []) 317 | except Exception as e: 318 | print(f'WARNING: TensorBoard graph visualization failure {e}') 319 | 320 | 321 | def web_project_name(project): 322 | # Convert local project name to web project name 323 | if not project.startswith('runs/train'): 324 | return project 325 | suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else '' 326 | return f'YOLOv5{suffix}' 327 | -------------------------------------------------------------------------------- /utils/loggers/clearml/README.md: -------------------------------------------------------------------------------- 1 | # ClearML Integration 2 | 3 | Clear|MLClear|ML 4 | 5 | ## About ClearML 6 | 7 | [ClearML](https://cutt.ly/yolov5-tutorial-clearml) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time ⏱️. 8 | 9 | 🔨 Track every YOLOv5 training run in the experiment manager 10 | 11 | 🔧 Version and easily access your custom training data with the integrated ClearML Data Versioning Tool 12 | 13 | 🔦 Remotely train and monitor your YOLOv5 training runs using ClearML Agent 14 | 15 | 🔬 Get the very best mAP using ClearML Hyperparameter Optimization 16 | 17 | 🔭 Turn your newly trained YOLOv5 model into an API with just a few commands using ClearML Serving 18 | 19 |
20 | And so much more. It's up to you how many of these tools you want to use, you can stick to the experiment manager, or chain them all together into an impressive pipeline! 21 |
22 |
23 | 24 | ![ClearML scalars dashboard](https://github.com/thepycoder/clearml_screenshots/raw/main/experiment_manager_with_compare.gif) 25 | 26 | 27 |
28 |
29 | 30 | ## 🦾 Setting Things Up 31 | 32 | To keep track of your experiments and/or data, ClearML needs to communicate to a server. You have 2 options to get one: 33 | 34 | Either sign up for free to the [ClearML Hosted Service](https://cutt.ly/yolov5-tutorial-clearml) or you can set up your own server, see [here](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server). Even the server is open-source, so even if you're dealing with sensitive data, you should be good to go! 35 | 36 | 1. Install the `clearml` python package: 37 | 38 | ```bash 39 | pip install clearml 40 | ``` 41 | 42 | 1. Connect the ClearML SDK to the server by [creating credentials](https://app.clear.ml/settings/workspace-configuration) (go right top to Settings -> Workspace -> Create new credentials), then execute the command below and follow the instructions: 43 | 44 | ```bash 45 | clearml-init 46 | ``` 47 | 48 | That's it! You're done 😎 49 | 50 |
51 | 52 | ## 🚀 Training YOLOv5 With ClearML 53 | 54 | To enable ClearML experiment tracking, simply install the ClearML pip package. 55 | 56 | ```bash 57 | pip install clearml 58 | ``` 59 | 60 | This will enable integration with the YOLOv5 training script. Every training run from now on, will be captured and stored by the ClearML experiment manager. If you want to change the `project_name` or `task_name`, head over to our custom logger, where you can change it: `utils/loggers/clearml/clearml_utils.py` 61 | 62 | ```bash 63 | python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache 64 | ``` 65 | 66 | This will capture: 67 | - Source code + uncommitted changes 68 | - Installed packages 69 | - (Hyper)parameters 70 | - Model files (use `--save-period n` to save a checkpoint every n epochs) 71 | - Console output 72 | - Scalars (mAP_0.5, mAP_0.5:0.95, precision, recall, losses, learning rates, ...) 73 | - General info such as machine details, runtime, creation date etc. 74 | - All produced plots such as label correlogram and confusion matrix 75 | - Images with bounding boxes per epoch 76 | - Mosaic per epoch 77 | - Validation images per epoch 78 | - ... 79 | 80 | That's a lot right? 🤯 81 | Now, we can visualize all of this information in the ClearML UI to get an overview of our training progress. Add custom columns to the table view (such as e.g. mAP_0.5) so you can easily sort on the best performing model. Or select multiple experiments and directly compare them! 82 | 83 | There even more we can do with all of this information, like hyperparameter optimization and remote execution, so keep reading if you want to see how that works! 84 | 85 |
86 | 87 | ## 🔗 Dataset Version Management 88 | 89 | Versioning your data separately from your code is generally a good idea and makes it easy to aqcuire the latest version too. This repository supports supplying a dataset version ID and it will make sure to get the data if it's not there yet. Next to that, this workflow also saves the used dataset ID as part of the task parameters, so you will always know for sure which data was used in which experiment! 90 | 91 | ![ClearML Dataset Interface](https://github.com/thepycoder/clearml_screenshots/raw/main/clearml_data.gif) 92 | 93 | ### Prepare Your Dataset 94 | 95 | The YOLOv5 repository supports a number of different datasets by using yaml files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the yaml or with the scripts provided by yolov5, you get this folder structure: 96 | 97 | ``` 98 | .. 99 | |_ yolov5 100 | |_ datasets 101 | |_ coco128 102 | |_ images 103 | |_ labels 104 | |_ LICENSE 105 | |_ README.txt 106 | ``` 107 | But this can be any dataset you wish. Feel free to use your own, as long as you keep to this folder structure. 108 | 109 | Next, ⚠️**copy the corresponding yaml file to the root of the dataset folder**⚠️. This yaml files contains the information ClearML will need to properly use the dataset. You can make this yourself too, of course, just follow the structure of the example yamls. 110 | 111 | Basically we need the following keys: `path`, `train`, `test`, `val`, `nc`, `names`. 112 | 113 | ``` 114 | .. 115 | |_ yolov5 116 | |_ datasets 117 | |_ coco128 118 | |_ images 119 | |_ labels 120 | |_ coco128.yaml # <---- HERE! 121 | |_ LICENSE 122 | |_ README.txt 123 | ``` 124 | 125 | ### Upload Your Dataset 126 | 127 | To get this dataset into ClearML as a versionned dataset, go to the dataset root folder and run the following command: 128 | ```bash 129 | cd coco128 130 | clearml-data sync --project YOLOv5 --name coco128 --folder . 131 | ``` 132 | 133 | The command `clearml-data sync` is actually a shorthand command. You could also run these commands one after the other: 134 | ```bash 135 | # Optionally add --parent if you want to base 136 | # this version on another dataset version, so no duplicate files are uploaded! 137 | clearml-data create --name coco128 --project YOLOv5 138 | clearml-data add --files . 139 | clearml-data close 140 | ``` 141 | 142 | ### Run Training Using A ClearML Dataset 143 | 144 | Now that you have a ClearML dataset, you can very simply use it to train custom YOLOv5 🚀 models! 145 | 146 | ```bash 147 | python train.py --img 640 --batch 16 --epochs 3 --data clearml:// --weights yolov5s.pt --cache 148 | ``` 149 | 150 |
151 | 152 | ## 👀 Hyperparameter Optimization 153 | 154 | Now that we have our experiments and data versioned, it's time to take a look at what we can build on top! 155 | 156 | Using the code information, installed packages and environment details, the experiment itself is now **completely reproducible**. In fact, ClearML allows you to clone an experiment and even change its parameters. We can then just rerun it with these new parameters automatically, this is basically what HPO does! 157 | 158 | To **run hyperparameter optimization locally**, we've included a pre-made script for you. Just make sure a training task has been run at least once, so it is in the ClearML experiment manager, we will essentially clone it and change its hyperparameters. 159 | 160 | You'll need to fill in the ID of this `template task` in the script found at `utils/loggers/clearml/hpo.py` and then just run it :) You can change `task.execute_locally()` to `task.execute()` to put it in a ClearML queue and have a remote agent work on it instead. 161 | 162 | ```bash 163 | # To use optuna, install it first, otherwise you can change the optimizer to just be RandomSearch 164 | pip install optuna 165 | python utils/loggers/clearml/hpo.py 166 | ``` 167 | 168 | ![HPO](https://github.com/thepycoder/clearml_screenshots/raw/main/hpo.png) 169 | 170 | ## 🤯 Remote Execution (advanced) 171 | 172 | Running HPO locally is really handy, but what if we want to run our experiments on a remote machine instead? Maybe you have access to a very powerful GPU machine on-site or you have some budget to use cloud GPUs. 173 | This is where the ClearML Agent comes into play. Check out what the agent can do here: 174 | 175 | - [YouTube video](https://youtu.be/MX3BrXnaULs) 176 | - [Documentation](https://clear.ml/docs/latest/docs/clearml_agent) 177 | 178 | In short: every experiment tracked by the experiment manager contains enough information to reproduce it on a different machine (installed packages, uncommitted changes etc.). So a ClearML agent does just that: it listens to a queue for incoming tasks and when it finds one, it recreates the environment and runs it while still reporting scalars, plots etc. to the experiment manager. 179 | 180 | You can turn any machine (a cloud VM, a local GPU machine, your own laptop ... ) into a ClearML agent by simply running: 181 | ```bash 182 | clearml-agent daemon --queue [--docker] 183 | ``` 184 | 185 | ### Cloning, Editing And Enqueuing 186 | 187 | With our agent running, we can give it some work. Remember from the HPO section that we can clone a task and edit the hyperparameters? We can do that from the interface too! 188 | 189 | 🪄 Clone the experiment by right clicking it 190 | 191 | 🎯 Edit the hyperparameters to what you wish them to be 192 | 193 | ⏳ Enqueue the task to any of the queues by right clicking it 194 | 195 | ![Enqueue a task from the UI](https://github.com/thepycoder/clearml_screenshots/raw/main/enqueue.gif) 196 | 197 | ### Executing A Task Remotely 198 | 199 | Now you can clone a task like we explained above, or simply mark your current script by adding `task.execute_remotely()` and on execution it will be put into a queue, for the agent to start working on! 200 | 201 | To run the YOLOv5 training script remotely, all you have to do is add this line to the training.py script after the clearml logger has been instatiated: 202 | ```python 203 | # ... 204 | # Loggers 205 | data_dict = None 206 | if RANK in {-1, 0}: 207 | loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance 208 | if loggers.clearml: 209 | loggers.clearml.task.execute_remotely(queue='my_queue') # <------ ADD THIS LINE 210 | # Data_dict is either None is user did not choose for ClearML dataset or is filled in by ClearML 211 | data_dict = loggers.clearml.data_dict 212 | # ... 213 | ``` 214 | When running the training script after this change, python will run the script up until that line, after which it will package the code and send it to the queue instead! 215 | 216 | ### Autoscaling workers 217 | 218 | ClearML comes with autoscalers too! This tool will automatically spin up new remote machines in the cloud of your choice (AWS, GCP, Azure) and turn them into ClearML agents for you whenever there are experiments detected in the queue. Once the tasks are processed, the autoscaler will automatically shut down the remote machines and you stop paying! 219 | 220 | Check out the autoscalers getting started video below. 221 | 222 | [![Watch the video](https://img.youtube.com/vi/j4XVMAaUt3E/0.jpg)](https://youtu.be/j4XVMAaUt3E) 223 | -------------------------------------------------------------------------------- /utils/loggers/clearml/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/clearml/clearml_utils.py: -------------------------------------------------------------------------------- 1 | """Main Logger class for ClearML experiment tracking.""" 2 | import glob 3 | import re 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import yaml 8 | 9 | from utils.plots import Annotator, colors 10 | 11 | try: 12 | import clearml 13 | from clearml import Dataset, Task 14 | assert hasattr(clearml, '__version__') # verify package import not local dir 15 | except (ImportError, AssertionError): 16 | clearml = None 17 | 18 | 19 | def construct_dataset(clearml_info_string): 20 | """Load in a clearml dataset and fill the internal data_dict with its contents. 21 | """ 22 | dataset_id = clearml_info_string.replace('clearml://', '') 23 | dataset = Dataset.get(dataset_id=dataset_id) 24 | dataset_root_path = Path(dataset.get_local_copy()) 25 | 26 | # We'll search for the yaml file definition in the dataset 27 | yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml"))) 28 | if len(yaml_filenames) > 1: 29 | raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains ' 30 | 'the dataset definition this way.') 31 | elif len(yaml_filenames) == 0: 32 | raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file ' 33 | 'inside the dataset root path.') 34 | with open(yaml_filenames[0]) as f: 35 | dataset_definition = yaml.safe_load(f) 36 | 37 | assert set(dataset_definition.keys()).issuperset( 38 | {'train', 'test', 'val', 'nc', 'names'} 39 | ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')" 40 | 41 | data_dict = dict() 42 | data_dict['train'] = str( 43 | (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None 44 | data_dict['test'] = str( 45 | (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None 46 | data_dict['val'] = str( 47 | (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None 48 | data_dict['nc'] = dataset_definition['nc'] 49 | data_dict['names'] = dataset_definition['names'] 50 | 51 | return data_dict 52 | 53 | 54 | class ClearmlLogger: 55 | """Log training runs, datasets, models, and predictions to ClearML. 56 | 57 | This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default, 58 | this information includes hyperparameters, system configuration and metrics, model metrics, code information and 59 | basic data metrics and analyses. 60 | 61 | By providing additional command line arguments to train.py, datasets, 62 | models and predictions can also be logged. 63 | """ 64 | 65 | def __init__(self, opt, hyp): 66 | """ 67 | - Initialize ClearML Task, this object will capture the experiment 68 | - Upload dataset version to ClearML Data if opt.upload_dataset is True 69 | 70 | arguments: 71 | opt (namespace) -- Commandline arguments for this run 72 | hyp (dict) -- Hyperparameters for this run 73 | 74 | """ 75 | self.current_epoch = 0 76 | # Keep tracked of amount of logged images to enforce a limit 77 | self.current_epoch_logged_images = set() 78 | # Maximum number of images to log to clearML per epoch 79 | self.max_imgs_to_log_per_epoch = 16 80 | # Get the interval of epochs when bounding box images should be logged 81 | self.bbox_interval = opt.bbox_interval 82 | self.clearml = clearml 83 | self.task = None 84 | self.data_dict = None 85 | if self.clearml: 86 | self.task = Task.init( 87 | project_name='YOLOv5', 88 | task_name='training', 89 | tags=['YOLOv5'], 90 | output_uri=True, 91 | auto_connect_frameworks={'pytorch': False} 92 | # We disconnect pytorch auto-detection, because we added manual model save points in the code 93 | ) 94 | # ClearML's hooks will already grab all general parameters 95 | # Only the hyperparameters coming from the yaml config file 96 | # will have to be added manually! 97 | self.task.connect(hyp, name='Hyperparameters') 98 | 99 | # Get ClearML Dataset Version if requested 100 | if opt.data.startswith('clearml://'): 101 | # data_dict should have the following keys: 102 | # names, nc (number of classes), test, train, val (all three relative paths to ../datasets) 103 | self.data_dict = construct_dataset(opt.data) 104 | # Set data to data_dict because wandb will crash without this information and opt is the best way 105 | # to give it to them 106 | opt.data = self.data_dict 107 | 108 | def log_debug_samples(self, files, title='Debug Samples'): 109 | """ 110 | Log files (images) as debug samples in the ClearML task. 111 | 112 | arguments: 113 | files (List(PosixPath)) a list of file paths in PosixPath format 114 | title (str) A title that groups together images with the same values 115 | """ 116 | for f in files: 117 | if f.exists(): 118 | it = re.search(r'_batch(\d+)', f.name) 119 | iteration = int(it.groups()[0]) if it else 0 120 | self.task.get_logger().report_image(title=title, 121 | series=f.name.replace(it.group(), ''), 122 | local_path=str(f), 123 | iteration=iteration) 124 | 125 | def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25): 126 | """ 127 | Draw the bounding boxes on a single image and report the result as a ClearML debug sample. 128 | 129 | arguments: 130 | image_path (PosixPath) the path the original image file 131 | boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] 132 | class_names (dict): dict containing mapping of class int to class name 133 | image (Tensor): A torch tensor containing the actual image data 134 | """ 135 | if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0: 136 | # Log every bbox_interval times and deduplicate for any intermittend extra eval runs 137 | if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images: 138 | im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2)) 139 | annotator = Annotator(im=im, pil=True) 140 | for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])): 141 | color = colors(i) 142 | 143 | class_name = class_names[int(class_nr)] 144 | confidence_percentage = round(float(conf) * 100, 2) 145 | label = f"{class_name}: {confidence_percentage}%" 146 | 147 | if conf > conf_threshold: 148 | annotator.rectangle(box.cpu().numpy(), outline=color) 149 | annotator.box_label(box.cpu().numpy(), label=label, color=color) 150 | 151 | annotated_image = annotator.result() 152 | self.task.get_logger().report_image(title='Bounding Boxes', 153 | series=image_path.name, 154 | iteration=self.current_epoch, 155 | image=annotated_image) 156 | self.current_epoch_logged_images.add(image_path) 157 | -------------------------------------------------------------------------------- /utils/loggers/clearml/hpo.py: -------------------------------------------------------------------------------- 1 | from clearml import Task 2 | # Connecting ClearML with the current process, 3 | # from here on everything is logged automatically 4 | from clearml.automation import HyperParameterOptimizer, UniformParameterRange 5 | from clearml.automation.optuna import OptimizerOptuna 6 | 7 | task = Task.init(project_name='Hyper-Parameter Optimization', 8 | task_name='YOLOv5', 9 | task_type=Task.TaskTypes.optimizer, 10 | reuse_last_task_id=False) 11 | 12 | # Example use case: 13 | optimizer = HyperParameterOptimizer( 14 | # This is the experiment we want to optimize 15 | base_task_id='', 16 | # here we define the hyper-parameters to optimize 17 | # Notice: The parameter name should exactly match what you see in the UI: / 18 | # For Example, here we see in the base experiment a section Named: "General" 19 | # under it a parameter named "batch_size", this becomes "General/batch_size" 20 | # If you have `argparse` for example, then arguments will appear under the "Args" section, 21 | # and you should instead pass "Args/batch_size" 22 | hyper_parameters=[ 23 | UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), 24 | UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), 25 | UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), 26 | UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), 27 | UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), 28 | UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), 29 | UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), 30 | UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), 31 | UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), 32 | UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), 33 | UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), 34 | UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), 35 | UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), 36 | UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), 37 | UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), 38 | UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), 39 | UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), 40 | UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), 41 | UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), 42 | UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), 43 | UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), 44 | UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), 45 | UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), 46 | UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), 47 | UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), 48 | UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), 49 | UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), 50 | UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], 51 | # this is the objective metric we want to maximize/minimize 52 | objective_metric_title='metrics', 53 | objective_metric_series='mAP_0.5', 54 | # now we decide if we want to maximize it or minimize it (accuracy we maximize) 55 | objective_metric_sign='max', 56 | # let us limit the number of concurrent experiments, 57 | # this in turn will make sure we do dont bombard the scheduler with experiments. 58 | # if we have an auto-scaler connected, this, by proxy, will limit the number of machine 59 | max_number_of_concurrent_tasks=1, 60 | # this is the optimizer class (actually doing the optimization) 61 | # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) 62 | optimizer_class=OptimizerOptuna, 63 | # If specified only the top K performing Tasks will be kept, the others will be automatically archived 64 | save_top_k_tasks_only=5, # 5, 65 | compute_time_limit=None, 66 | total_max_jobs=20, 67 | min_iteration_per_job=None, 68 | max_iteration_per_job=None, 69 | ) 70 | 71 | # report every 10 seconds, this is way too often, but we are testing here 72 | optimizer.set_report_period(10) 73 | # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent 74 | # an_optimizer.start_locally(job_complete_callback=job_complete_callback) 75 | # set the time limit for the optimization process (2 hours) 76 | optimizer.set_time_limit(in_minutes=120.0) 77 | # Start the optimization process in the local environment 78 | optimizer.start_locally() 79 | # wait until process is done (notice we are controlling the optimization process in the background) 80 | optimizer.wait() 81 | # make sure background optimization stopped 82 | optimizer.stop() 83 | 84 | print('We are done, good bye') 85 | -------------------------------------------------------------------------------- /utils/loggers/wandb/README.md: -------------------------------------------------------------------------------- 1 | 📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀. UPDATED 29 September 2021. 2 | 3 | - [About Weights & Biases](#about-weights-&-biases) 4 | - [First-Time Setup](#first-time-setup) 5 | - [Viewing runs](#viewing-runs) 6 | - [Disabling wandb](#disabling-wandb) 7 | - [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage) 8 | - [Reports: Share your work with the world!](#reports) 9 | 10 | ## About Weights & Biases 11 | 12 | Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions. 13 | 14 | Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows: 15 | 16 | - [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time 17 | - [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4) visualized automatically 18 | - [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization 19 | - [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators 20 | - [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently 21 | - [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models 22 | 23 | ## First-Time Setup 24 | 25 |
26 | Toggle Details 27 | When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device. 28 | 29 | W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as: 30 | 31 | ```shell 32 | $ python train.py --project ... --name ... 33 | ``` 34 | 35 | YOLOv5 notebook example: Open In Colab Open In Kaggle 36 | Screen Shot 2021-09-29 at 10 23 13 PM 37 | 38 |
39 | 40 | ## Viewing Runs 41 | 42 |
43 | Toggle Details 44 | Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in realtime . All important information is logged: 45 | 46 | - Training & Validation losses 47 | - Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95 48 | - Learning Rate over time 49 | - A bounding box debugging panel, showing the training progress over time 50 | - GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage** 51 | - System: Disk I/0, CPU utilization, RAM memory usage 52 | - Your trained model as W&B Artifact 53 | - Environment: OS and Python types, Git repository and state, **training command** 54 | 55 |

Weights & Biases dashboard

56 |
57 | 58 | ## Disabling wandb 59 | 60 | - training after running `wandb disabled` inside that directory creates no wandb run 61 | ![Screenshot (84)](https://user-images.githubusercontent.com/15766192/143441777-c780bdd7-7cb4-4404-9559-b4316030a985.png) 62 | 63 | - To enable wandb again, run `wandb online` 64 | ![Screenshot (85)](https://user-images.githubusercontent.com/15766192/143441866-7191b2cb-22f0-4e0f-ae64-2dc47dc13078.png) 65 | 66 | ## Advanced Usage 67 | 68 | You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started. 69 | 70 |
71 |

1: Train and Log Evaluation simultaneousy

72 | This is an extension of the previous section, but it'll also training after uploading the dataset. This also evaluation Table 73 | Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets, 74 | so no images will be uploaded from your system more than once. 75 |
76 | Usage 77 | Code $ python train.py --upload_data val 78 | 79 | ![Screenshot from 2021-11-21 17-40-06](https://user-images.githubusercontent.com/15766192/142761183-c1696d8c-3f38-45ab-991a-bb0dfd98ae7d.png) 80 | 81 |
82 | 83 |

2. Visualize and Version Datasets

84 | Log, visualize, dynamically query, and understand your data with W&B Tables. You can use the following command to log your dataset as a W&B Table. This will generate a {dataset}_wandb.yaml file which can be used to train from dataset artifact. 85 |
86 | Usage 87 | Code $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. 88 | 89 | ![Screenshot (64)](https://user-images.githubusercontent.com/15766192/128486078-d8433890-98a3-4d12-8986-b6c0e3fc64b9.png) 90 | 91 |
92 | 93 |

3: Train using dataset artifact

94 | When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that 95 | can be used to train a model directly from the dataset artifact. This also logs evaluation 96 |
97 | Usage 98 | Code $ python train.py --data {data}_wandb.yaml 99 | 100 | ![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png) 101 | 102 |
103 | 104 |

4: Save model checkpoints as artifacts

105 | To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval. 106 | You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged 107 | 108 |
109 | Usage 110 | Code $ python train.py --save_period 1 111 | 112 | ![Screenshot (68)](https://user-images.githubusercontent.com/15766192/128726138-ec6c1f60-639d-437d-b4ee-3acd9de47ef3.png) 113 | 114 |
115 | 116 |
117 | 118 |

5: Resume runs from checkpoint artifacts.

119 | Any run can be resumed using artifacts if the --resume argument starts with wandb-artifact:// prefix followed by the run path, i.e, wandb-artifact://username/project/runid . This doesn't require the model checkpoint to be present on the local system. 120 | 121 |
122 | Usage 123 | Code $ python train.py --resume wandb-artifact://{run_path} 124 | 125 | ![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png) 126 | 127 |
128 | 129 |

6: Resume runs from dataset artifact & checkpoint artifacts.

130 | Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device 131 | The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot --upload_dataset or 132 | train from _wandb.yaml file and set --save_period 133 | 134 |
135 | Usage 136 | Code $ python train.py --resume wandb-artifact://{run_path} 137 | 138 | ![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png) 139 | 140 |
141 | 142 | 143 | 144 |

Reports

145 | W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)). 146 | 147 | Weights & Biases Reports 148 | 149 | ## Environments 150 | 151 | YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): 152 | 153 | - **Google Colab and Kaggle** notebooks with free GPU: Open In Colab Open In Kaggle 154 | - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 155 | - **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart) 156 | - **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) Docker Pulls 157 | 158 | ## Status 159 | 160 | ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) 161 | 162 | If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit. 163 | -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | from utils.general import LOGGER 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 12 | if not logger.wandb: 13 | LOGGER.info("install wandb using `pip install wandb` to log the dataset") 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | parser.add_argument('--entity', default=None, help='W&B entity') 22 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 23 | 24 | opt = parser.parse_args() 25 | opt.resume = False # Explicitly disallow resume check for dataset upload job 26 | 27 | create_dataset_artifact(opt) 28 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).resolve() 7 | ROOT = FILE.parents[3] # YOLOv5 root directory 8 | if str(ROOT) not in sys.path: 9 | sys.path.append(str(ROOT)) # add ROOT to PATH 10 | 11 | from train import parse_opt, train 12 | from utils.callbacks import Callbacks 13 | from utils.general import increment_path 14 | from utils.torch_utils import select_device 15 | 16 | 17 | def sweep(): 18 | wandb.init() 19 | # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. 20 | hyp_dict = vars(wandb.config).get("_items").copy() 21 | 22 | # Workaround: get necessary opt args 23 | opt = parse_opt(known=True) 24 | opt.batch_size = hyp_dict.get("batch_size") 25 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 26 | opt.epochs = hyp_dict.get("epochs") 27 | opt.nosave = True 28 | opt.data = hyp_dict.get("data") 29 | opt.weights = str(opt.weights) 30 | opt.cfg = str(opt.cfg) 31 | opt.data = str(opt.data) 32 | opt.hyp = str(opt.hyp) 33 | opt.project = str(opt.project) 34 | device = select_device(opt.device, batch_size=opt.batch_size) 35 | 36 | # train 37 | train(hyp_dict, opt, device, callbacks=Callbacks()) 38 | 39 | 40 | if __name__ == "__main__": 41 | sweep() 42 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 4.0 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Loss functions 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from utils.metrics import bbox_iou 10 | from utils.torch_utils import de_parallel 11 | 12 | 13 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 14 | # return positive, negative label smoothing BCE targets 15 | return 1.0 - 0.5 * eps, 0.5 * eps 16 | 17 | 18 | class BCEBlurWithLogitsLoss(nn.Module): 19 | # BCEwithLogitLoss() with reduced missing label effects. 20 | def __init__(self, alpha=0.05): 21 | super().__init__() 22 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 23 | self.alpha = alpha 24 | 25 | def forward(self, pred, true): 26 | loss = self.loss_fcn(pred, true) 27 | pred = torch.sigmoid(pred) # prob from logits 28 | dx = pred - true # reduce only missing label effects 29 | # dx = (pred - true).abs() # reduce missing label and false label effects 30 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 31 | loss *= alpha_factor 32 | return loss.mean() 33 | 34 | 35 | class FocalLoss(nn.Module): 36 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 37 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 38 | super().__init__() 39 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 40 | self.gamma = gamma 41 | self.alpha = alpha 42 | self.reduction = loss_fcn.reduction 43 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 44 | 45 | def forward(self, pred, true): 46 | loss = self.loss_fcn(pred, true) 47 | # p_t = torch.exp(-loss) 48 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 49 | 50 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 51 | pred_prob = torch.sigmoid(pred) # prob from logits 52 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 53 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 54 | modulating_factor = (1.0 - p_t) ** self.gamma 55 | loss *= alpha_factor * modulating_factor 56 | 57 | if self.reduction == 'mean': 58 | return loss.mean() 59 | elif self.reduction == 'sum': 60 | return loss.sum() 61 | else: # 'none' 62 | return loss 63 | 64 | 65 | class QFocalLoss(nn.Module): 66 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 67 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 68 | super().__init__() 69 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 70 | self.gamma = gamma 71 | self.alpha = alpha 72 | self.reduction = loss_fcn.reduction 73 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 74 | 75 | def forward(self, pred, true): 76 | loss = self.loss_fcn(pred, true) 77 | 78 | pred_prob = torch.sigmoid(pred) # prob from logits 79 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 80 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 81 | loss *= alpha_factor * modulating_factor 82 | 83 | if self.reduction == 'mean': 84 | return loss.mean() 85 | elif self.reduction == 'sum': 86 | return loss.sum() 87 | else: # 'none' 88 | return loss 89 | 90 | 91 | class ComputeLoss: 92 | sort_obj_iou = False 93 | 94 | # Compute losses 95 | def __init__(self, model, autobalance=False): 96 | device = next(model.parameters()).device # get model device 97 | h = model.hyp # hyperparameters 98 | 99 | # Define criteria 100 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 101 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 102 | 103 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 104 | self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets 105 | 106 | # Focal loss 107 | g = h['fl_gamma'] # focal loss gamma 108 | if g > 0: 109 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 110 | 111 | m = de_parallel(model).model[-1] # Detect() module 112 | self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 113 | self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index 114 | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance 115 | self.na = m.na # number of anchors 116 | self.nc = m.nc # number of classes 117 | self.nl = m.nl # number of layers 118 | self.anchors = m.anchors 119 | self.device = device 120 | 121 | def __call__(self, p, targets): # predictions, targets 122 | lcls = torch.zeros(1, device=self.device) # class loss 123 | lbox = torch.zeros(1, device=self.device) # box loss 124 | lobj = torch.zeros(1, device=self.device) # object loss 125 | tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets 126 | 127 | # Losses 128 | for i, pi in enumerate(p): # layer index, layer predictions 129 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 130 | tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj 131 | 132 | n = b.shape[0] # number of targets 133 | if n: 134 | # pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0 135 | pxy, pwh, _, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions 136 | 137 | # Regression 138 | pxy = pxy.sigmoid() * 2 - 0.5 139 | pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] 140 | pbox = torch.cat((pxy, pwh), 1) # predicted box 141 | iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) 142 | lbox += (1.0 - iou).mean() # iou loss 143 | 144 | # Objectness 145 | iou = iou.detach().clamp(0).type(tobj.dtype) 146 | if self.sort_obj_iou: 147 | j = iou.argsort() 148 | b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] 149 | if self.gr < 1: 150 | iou = (1.0 - self.gr) + self.gr * iou 151 | tobj[b, a, gj, gi] = iou # iou ratio 152 | 153 | # Classification 154 | if self.nc > 1: # cls loss (only if multiple classes) 155 | t = torch.full_like(pcls, self.cn, device=self.device) # targets 156 | t[range(n), tcls[i]] = self.cp 157 | lcls += self.BCEcls(pcls, t) # BCE 158 | 159 | # Append targets to text file 160 | # with open('targets.txt', 'a') as file: 161 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 162 | 163 | obji = self.BCEobj(pi[..., 4], tobj) 164 | lobj += obji * self.balance[i] # obj loss 165 | if self.autobalance: 166 | self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() 167 | 168 | if self.autobalance: 169 | self.balance = [x / self.balance[self.ssi] for x in self.balance] 170 | lbox *= self.hyp['box'] 171 | lobj *= self.hyp['obj'] 172 | lcls *= self.hyp['cls'] 173 | bs = tobj.shape[0] # batch size 174 | 175 | return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach() 176 | 177 | def build_targets(self, p, targets): 178 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 179 | na, nt = self.na, targets.shape[0] # number of anchors, targets 180 | tcls, tbox, indices, anch = [], [], [], [] 181 | gain = torch.ones(7, device=self.device) # normalized to gridspace gain 182 | ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 183 | targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None]), 2) # append anchor indices 184 | 185 | g = 0.5 # bias 186 | off = torch.tensor( 187 | [ 188 | [0, 0], 189 | [1, 0], 190 | [0, 1], 191 | [-1, 0], 192 | [0, -1], # j,k,l,m 193 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 194 | ], 195 | device=self.device).float() * g # offsets 196 | 197 | for i in range(self.nl): 198 | anchors, shape = self.anchors[i], p[i].shape 199 | gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain 200 | 201 | # Match targets to anchors 202 | t = targets * gain # shape(3,n,7) 203 | if nt: 204 | # Matches 205 | r = t[..., 4:6] / anchors[:, None] # wh ratio 206 | j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare 207 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 208 | t = t[j] # filter 209 | 210 | # Offsets 211 | gxy = t[:, 2:4] # grid xy 212 | gxi = gain[[2, 3]] - gxy # inverse 213 | j, k = ((gxy % 1 < g) & (gxy > 1)).T 214 | l, m = ((gxi % 1 < g) & (gxi > 1)).T 215 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 216 | t = t.repeat((5, 1, 1))[j] 217 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 218 | else: 219 | t = targets[0] 220 | offsets = 0 221 | 222 | # Define 223 | bc, gxy, gwh, a = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors 224 | a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class 225 | gij = (gxy - offsets).long() 226 | gi, gj = gij.T # grid indices 227 | 228 | # Append 229 | indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid 230 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 231 | anch.append(anchors[a]) # anchors 232 | tcls.append(c) # class 233 | 234 | return tcls, tbox, indices, anch 235 | -------------------------------------------------------------------------------- /utils/segment/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/segment/augmentations.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Image augmentation functions 4 | """ 5 | 6 | import math 7 | import random 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | from ..augmentations import box_candidates 13 | from ..general import resample_segments, segment2box 14 | 15 | 16 | def mixup(im, labels, segments, im2, labels2, segments2): 17 | # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf 18 | r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 19 | im = (im * r + im2 * (1 - r)).astype(np.uint8) 20 | labels = np.concatenate((labels, labels2), 0) 21 | segments = np.concatenate((segments, segments2), 0) 22 | return im, labels, segments 23 | 24 | 25 | def random_perspective(im, 26 | targets=(), 27 | segments=(), 28 | degrees=10, 29 | translate=.1, 30 | scale=.1, 31 | shear=10, 32 | perspective=0.0, 33 | border=(0, 0)): 34 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 35 | # targets = [cls, xyxy] 36 | 37 | height = im.shape[0] + border[0] * 2 # shape(h,w,c) 38 | width = im.shape[1] + border[1] * 2 39 | 40 | # Center 41 | C = np.eye(3) 42 | C[0, 2] = -im.shape[1] / 2 # x translation (pixels) 43 | C[1, 2] = -im.shape[0] / 2 # y translation (pixels) 44 | 45 | # Perspective 46 | P = np.eye(3) 47 | P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) 48 | P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) 49 | 50 | # Rotation and Scale 51 | R = np.eye(3) 52 | a = random.uniform(-degrees, degrees) 53 | # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations 54 | s = random.uniform(1 - scale, 1 + scale) 55 | # s = 2 ** random.uniform(-scale, scale) 56 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) 57 | 58 | # Shear 59 | S = np.eye(3) 60 | S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) 61 | S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) 62 | 63 | # Translation 64 | T = np.eye(3) 65 | T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) 66 | T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) 67 | 68 | # Combined rotation matrix 69 | M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT 70 | if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed 71 | if perspective: 72 | im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) 73 | else: # affine 74 | im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) 75 | 76 | # Visualize 77 | # import matplotlib.pyplot as plt 78 | # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() 79 | # ax[0].imshow(im[:, :, ::-1]) # base 80 | # ax[1].imshow(im2[:, :, ::-1]) # warped 81 | 82 | # Transform label coordinates 83 | n = len(targets) 84 | new_segments = [] 85 | if n: 86 | new = np.zeros((n, 4)) 87 | segments = resample_segments(segments) # upsample 88 | for i, segment in enumerate(segments): 89 | xy = np.ones((len(segment), 3)) 90 | xy[:, :2] = segment 91 | xy = xy @ M.T # transform 92 | xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine 93 | 94 | # clip 95 | new[i] = segment2box(xy, width, height) 96 | new_segments.append(xy) 97 | 98 | # filter candidates 99 | i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) 100 | targets = targets[i] 101 | targets[:, 1:5] = new[i] 102 | new_segments = np.array(new_segments)[i] 103 | 104 | return im, targets, new_segments 105 | -------------------------------------------------------------------------------- /utils/segment/dataloaders.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Dataloaders 4 | """ 5 | 6 | import os 7 | import random 8 | 9 | import cv2 10 | import numpy as np 11 | import torch 12 | from torch.utils.data import DataLoader, distributed 13 | 14 | from ..augmentations import augment_hsv, copy_paste, letterbox 15 | from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker 16 | from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn 17 | from ..torch_utils import torch_distributed_zero_first 18 | from .augmentations import mixup, random_perspective 19 | 20 | 21 | def create_dataloader(path, 22 | imgsz, 23 | batch_size, 24 | stride, 25 | single_cls=False, 26 | hyp=None, 27 | augment=False, 28 | cache=False, 29 | pad=0.0, 30 | rect=False, 31 | rank=-1, 32 | workers=8, 33 | image_weights=False, 34 | quad=False, 35 | prefix='', 36 | shuffle=False, 37 | mask_downsample_ratio=1, 38 | overlap_mask=False): 39 | if rect and shuffle: 40 | LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False') 41 | shuffle = False 42 | with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP 43 | dataset = LoadImagesAndLabelsAndMasks( 44 | path, 45 | imgsz, 46 | batch_size, 47 | augment=augment, # augmentation 48 | hyp=hyp, # hyperparameters 49 | rect=rect, # rectangular batches 50 | cache_images=cache, 51 | single_cls=single_cls, 52 | stride=int(stride), 53 | pad=pad, 54 | image_weights=image_weights, 55 | prefix=prefix, 56 | downsample_ratio=mask_downsample_ratio, 57 | overlap=overlap_mask) 58 | 59 | batch_size = min(batch_size, len(dataset)) 60 | nd = torch.cuda.device_count() # number of CUDA devices 61 | nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers 62 | sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) 63 | loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates 64 | # generator = torch.Generator() 65 | # generator.manual_seed(0) 66 | return loader( 67 | dataset, 68 | batch_size=batch_size, 69 | shuffle=shuffle and sampler is None, 70 | num_workers=nw, 71 | sampler=sampler, 72 | pin_memory=True, 73 | collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, 74 | worker_init_fn=seed_worker, 75 | # generator=generator, 76 | ), dataset 77 | 78 | 79 | class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing 80 | 81 | def __init__( 82 | self, 83 | path, 84 | img_size=640, 85 | batch_size=16, 86 | augment=False, 87 | hyp=None, 88 | rect=False, 89 | image_weights=False, 90 | cache_images=False, 91 | single_cls=False, 92 | stride=32, 93 | pad=0, 94 | prefix="", 95 | downsample_ratio=1, 96 | overlap=False, 97 | ): 98 | super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, 99 | stride, pad, prefix) 100 | self.downsample_ratio = downsample_ratio 101 | self.overlap = overlap 102 | 103 | def __getitem__(self, index): 104 | index = self.indices[index] # linear, shuffled, or image_weights 105 | 106 | hyp = self.hyp 107 | mosaic = self.mosaic and random.random() < hyp['mosaic'] 108 | masks = [] 109 | if mosaic: 110 | # Load mosaic 111 | img, labels, segments = self.load_mosaic(index) 112 | shapes = None 113 | 114 | # MixUp augmentation 115 | if random.random() < hyp["mixup"]: 116 | img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) 117 | 118 | else: 119 | # Load image 120 | img, (h0, w0), (h, w) = self.load_image(index) 121 | 122 | # Letterbox 123 | shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape 124 | img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) 125 | shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling 126 | 127 | labels = self.labels[index].copy() 128 | # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy 129 | segments = self.segments[index].copy() 130 | if len(segments): 131 | for i_s in range(len(segments)): 132 | segments[i_s] = xyn2xy( 133 | segments[i_s], 134 | ratio[0] * w, 135 | ratio[1] * h, 136 | padw=pad[0], 137 | padh=pad[1], 138 | ) 139 | if labels.size: # normalized xywh to pixel xyxy format 140 | labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) 141 | 142 | if self.augment: 143 | img, labels, segments = random_perspective( 144 | img, 145 | labels, 146 | segments=segments, 147 | degrees=hyp["degrees"], 148 | translate=hyp["translate"], 149 | scale=hyp["scale"], 150 | shear=hyp["shear"], 151 | perspective=hyp["perspective"], 152 | return_seg=True, 153 | ) 154 | 155 | nl = len(labels) # number of labels 156 | if nl: 157 | labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) 158 | if self.overlap: 159 | masks, sorted_idx = polygons2masks_overlap(img.shape[:2], 160 | segments, 161 | downsample_ratio=self.downsample_ratio) 162 | masks = masks[None] # (640, 640) -> (1, 640, 640) 163 | labels = labels[sorted_idx] 164 | else: 165 | masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) 166 | 167 | masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // 168 | self.downsample_ratio, img.shape[1] // 169 | self.downsample_ratio)) 170 | # TODO: albumentations support 171 | if self.augment: 172 | # Albumentations 173 | # there are some augmentation that won't change boxes and masks, 174 | # so just be it for now. 175 | img, labels = self.albumentations(img, labels) 176 | nl = len(labels) # update after albumentations 177 | 178 | # HSV color-space 179 | augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) 180 | 181 | # Flip up-down 182 | if random.random() < hyp["flipud"]: 183 | img = np.flipud(img) 184 | if nl: 185 | labels[:, 2] = 1 - labels[:, 2] 186 | masks = torch.flip(masks, dims=[1]) 187 | 188 | # Flip left-right 189 | if random.random() < hyp["fliplr"]: 190 | img = np.fliplr(img) 191 | if nl: 192 | labels[:, 1] = 1 - labels[:, 1] 193 | masks = torch.flip(masks, dims=[2]) 194 | 195 | # Cutouts # labels = cutout(img, labels, p=0.5) 196 | 197 | labels_out = torch.zeros((nl, 6)) 198 | if nl: 199 | labels_out[:, 1:] = torch.from_numpy(labels) 200 | 201 | # Convert 202 | img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB 203 | img = np.ascontiguousarray(img) 204 | 205 | return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) 206 | 207 | def load_mosaic(self, index): 208 | # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic 209 | labels4, segments4 = [], [] 210 | s = self.img_size 211 | yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y 212 | 213 | # 3 additional image indices 214 | indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices 215 | for i, index in enumerate(indices): 216 | # Load image 217 | img, _, (h, w) = self.load_image(index) 218 | 219 | # place img in img4 220 | if i == 0: # top left 221 | img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles 222 | x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) 223 | x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) 224 | elif i == 1: # top right 225 | x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc 226 | x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h 227 | elif i == 2: # bottom left 228 | x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) 229 | x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) 230 | elif i == 3: # bottom right 231 | x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) 232 | x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) 233 | 234 | img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] 235 | padw = x1a - x1b 236 | padh = y1a - y1b 237 | 238 | labels, segments = self.labels[index].copy(), self.segments[index].copy() 239 | 240 | if labels.size: 241 | labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format 242 | segments = [xyn2xy(x, w, h, padw, padh) for x in segments] 243 | labels4.append(labels) 244 | segments4.extend(segments) 245 | 246 | # Concat/clip labels 247 | labels4 = np.concatenate(labels4, 0) 248 | for x in (labels4[:, 1:], *segments4): 249 | np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() 250 | # img4, labels4 = replicate(img4, labels4) # replicate 251 | 252 | # Augment 253 | img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) 254 | img4, labels4, segments4 = random_perspective(img4, 255 | labels4, 256 | segments4, 257 | degrees=self.hyp["degrees"], 258 | translate=self.hyp["translate"], 259 | scale=self.hyp["scale"], 260 | shear=self.hyp["shear"], 261 | perspective=self.hyp["perspective"], 262 | border=self.mosaic_border) # border to remove 263 | return img4, labels4, segments4 264 | 265 | @staticmethod 266 | def collate_fn(batch): 267 | img, label, path, shapes, masks = zip(*batch) # transposed 268 | batched_masks = torch.cat(masks, 0) 269 | for i, l in enumerate(label): 270 | l[:, 0] = i # add target image index for build_targets() 271 | return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks 272 | 273 | 274 | def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): 275 | """ 276 | Args: 277 | img_size (tuple): The image size. 278 | polygons (np.ndarray): [N, M], N is the number of polygons, 279 | M is the number of points(Be divided by 2). 280 | """ 281 | mask = np.zeros(img_size, dtype=np.uint8) 282 | polygons = np.asarray(polygons) 283 | polygons = polygons.astype(np.int32) 284 | shape = polygons.shape 285 | polygons = polygons.reshape(shape[0], -1, 2) 286 | cv2.fillPoly(mask, polygons, color=color) 287 | nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) 288 | # NOTE: fillPoly firstly then resize is trying the keep the same way 289 | # of loss calculation when mask-ratio=1. 290 | mask = cv2.resize(mask, (nw, nh)) 291 | return mask 292 | 293 | 294 | def polygons2masks(img_size, polygons, color, downsample_ratio=1): 295 | """ 296 | Args: 297 | img_size (tuple): The image size. 298 | polygons (list[np.ndarray]): each polygon is [N, M], 299 | N is the number of polygons, 300 | M is the number of points(Be divided by 2). 301 | """ 302 | masks = [] 303 | for si in range(len(polygons)): 304 | mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) 305 | masks.append(mask) 306 | return np.array(masks) 307 | 308 | 309 | def polygons2masks_overlap(img_size, segments, downsample_ratio=1): 310 | """Return a (640, 640) overlap mask.""" 311 | masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), dtype=np.uint8) 312 | areas = [] 313 | ms = [] 314 | for si in range(len(segments)): 315 | mask = polygon2mask( 316 | img_size, 317 | [segments[si].reshape(-1)], 318 | downsample_ratio=downsample_ratio, 319 | color=1, 320 | ) 321 | ms.append(mask) 322 | areas.append(mask.sum()) 323 | areas = np.asarray(areas) 324 | index = np.argsort(-areas) 325 | ms = np.array(ms)[index] 326 | for i in range(len(segments)): 327 | mask = ms[i] * (i + 1) 328 | masks = masks + mask 329 | masks = np.clip(masks, a_min=0, a_max=i + 1) 330 | return masks, index 331 | -------------------------------------------------------------------------------- /utils/segment/general.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | def crop(masks, boxes): 7 | """ 8 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 9 | Vectorized by Chong (thanks Chong). 10 | 11 | Args: 12 | - masks should be a size [h, w, n] tensor of masks 13 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 14 | """ 15 | 16 | n, h, w = masks.shape 17 | x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) 18 | r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 19 | c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 20 | 21 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 22 | 23 | 24 | def process_mask_upsample(protos, masks_in, bboxes, shape): 25 | """ 26 | Crop after upsample. 27 | proto_out: [mask_dim, mask_h, mask_w] 28 | out_masks: [n, mask_dim], n is number of masks after nms 29 | bboxes: [n, 4], n is number of masks after nms 30 | shape:input_image_size, (h, w) 31 | 32 | return: h, w, n 33 | """ 34 | 35 | c, mh, mw = protos.shape # CHW 36 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) 37 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 38 | masks = crop(masks, bboxes) # CHW 39 | return masks.gt_(0.5) 40 | 41 | 42 | def process_mask(protos, masks_in, bboxes, shape, upsample=False): 43 | """ 44 | Crop before upsample. 45 | proto_out: [mask_dim, mask_h, mask_w] 46 | out_masks: [n, mask_dim], n is number of masks after nms 47 | bboxes: [n, 4], n is number of masks after nms 48 | shape:input_image_size, (h, w) 49 | 50 | return: h, w, n 51 | """ 52 | 53 | c, mh, mw = protos.shape # CHW 54 | ih, iw = shape 55 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW 56 | 57 | downsampled_bboxes = bboxes.clone() 58 | downsampled_bboxes[:, 0] *= mw / iw 59 | downsampled_bboxes[:, 2] *= mw / iw 60 | downsampled_bboxes[:, 3] *= mh / ih 61 | downsampled_bboxes[:, 1] *= mh / ih 62 | 63 | masks = crop(masks, downsampled_bboxes) # CHW 64 | if upsample: 65 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 66 | return masks.gt_(0.5) 67 | 68 | 69 | def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): 70 | """ 71 | img1_shape: model input shape, [h, w] 72 | img0_shape: origin pic shape, [h, w, 3] 73 | masks: [h, w, num] 74 | resize for the most time 75 | """ 76 | # Rescale coords (xyxy) from img1_shape to img0_shape 77 | if ratio_pad is None: # calculate from img0_shape 78 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 79 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 80 | else: 81 | gain = ratio_pad[0][0] 82 | pad = ratio_pad[1] 83 | tl_pad = int(pad[1]), int(pad[0]) # y, x 84 | br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0]) 85 | 86 | if len(masks.shape) < 2: 87 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 88 | # masks_h, masks_w, n 89 | masks = masks[tl_pad[0]:br_pad[0], tl_pad[1]:br_pad[1]] 90 | # 1, n, masks_h, masks_w 91 | # masks = masks.permute(2, 0, 1).contiguous()[None, :] 92 | # # shape = [1, n, masks_h, masks_w] after F.interpolate, so take first element 93 | # masks = F.interpolate(masks, img0_shape[:2], mode='bilinear', align_corners=False)[0] 94 | # masks = masks.permute(1, 2, 0).contiguous() 95 | # masks_h, masks_w, n 96 | masks = cv2.resize(masks, (img0_shape[1], img0_shape[0])) 97 | 98 | # keepdim 99 | if len(masks.shape) == 2: 100 | masks = masks[:, :, None] 101 | 102 | return masks 103 | 104 | 105 | def mask_iou(mask1, mask2, eps=1e-7): 106 | """ 107 | mask1: [N, n] m1 means number of predicted objects 108 | mask2: [M, n] m2 means number of gt objects 109 | Note: n means image_w x image_h 110 | 111 | return: masks iou, [N, M] 112 | """ 113 | intersection = torch.matmul(mask1, mask2.t()).clamp(0) 114 | union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection 115 | return intersection / (union + eps) 116 | 117 | 118 | def masks_iou(mask1, mask2, eps=1e-7): 119 | """ 120 | mask1: [N, n] m1 means number of predicted objects 121 | mask2: [N, n] m2 means number of gt objects 122 | Note: n means image_w x image_h 123 | 124 | return: masks iou, (N, ) 125 | """ 126 | intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) 127 | union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection 128 | return intersection / (union + eps) 129 | 130 | 131 | def masks2segments(masks, strategy='largest'): 132 | # Convert masks(n,160,160) into segments(n,xy) 133 | segments = [] 134 | for x in masks.int().cpu().numpy().astype('uint8'): 135 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 136 | if c: 137 | if strategy == 'concat': # concatenate all segments 138 | c = np.concatenate([x.reshape(-1, 2) for x in c]) 139 | elif strategy == 'largest': # select largest segment 140 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 141 | else: 142 | c = np.zeros((0, 2)) # no segments found 143 | segments.append(c.astype('float32')) 144 | return segments -------------------------------------------------------------------------------- /utils/segment/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..general import xywh2xyxy 6 | from ..loss import FocalLoss, smooth_BCE 7 | from ..metrics import bbox_iou 8 | from ..torch_utils import de_parallel 9 | from .general import crop 10 | 11 | 12 | class ComputeLoss: 13 | # Compute losses 14 | def __init__(self, model, autobalance=False, overlap=False): 15 | self.sort_obj_iou = False 16 | self.overlap = overlap 17 | device = next(model.parameters()).device # get model device 18 | h = model.hyp # hyperparameters 19 | self.device = device 20 | 21 | # Define criteria 22 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 23 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 24 | 25 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 26 | self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets 27 | 28 | # Focal loss 29 | g = h['fl_gamma'] # focal loss gamma 30 | if g > 0: 31 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 32 | 33 | m = de_parallel(model).model[-1] # Detect() module 34 | self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 35 | self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index 36 | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance 37 | self.na = m.na # number of anchors 38 | self.nc = m.nc # number of classes 39 | self.nl = m.nl # number of layers 40 | self.nm = m.nm # number of masks 41 | self.anchors = m.anchors 42 | self.device = device 43 | 44 | def __call__(self, preds, targets, masks): # predictions, targets, model 45 | p, proto = preds 46 | bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width 47 | lcls = torch.zeros(1, device=self.device) 48 | lbox = torch.zeros(1, device=self.device) 49 | lobj = torch.zeros(1, device=self.device) 50 | lseg = torch.zeros(1, device=self.device) 51 | tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets 52 | 53 | # Losses 54 | for i, pi in enumerate(p): # layer index, layer predictions 55 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 56 | tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj 57 | 58 | n = b.shape[0] # number of targets 59 | if n: 60 | pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions 61 | 62 | # Box regression 63 | pxy = pxy.sigmoid() * 2 - 0.5 64 | pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] 65 | pbox = torch.cat((pxy, pwh), 1) # predicted box 66 | iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) 67 | lbox += (1.0 - iou).mean() # iou loss 68 | 69 | # Objectness 70 | iou = iou.detach().clamp(0).type(tobj.dtype) 71 | if self.sort_obj_iou: 72 | j = iou.argsort() 73 | b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] 74 | if self.gr < 1: 75 | iou = (1.0 - self.gr) + self.gr * iou 76 | tobj[b, a, gj, gi] = iou # iou ratio 77 | 78 | # Classification 79 | if self.nc > 1: # cls loss (only if multiple classes) 80 | t = torch.full_like(pcls, self.cn, device=self.device) # targets 81 | t[range(n), tcls[i]] = self.cp 82 | lcls += self.BCEcls(pcls, t) # BCE 83 | 84 | # Mask regression 85 | if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample 86 | masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] 87 | marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized 88 | mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) 89 | for bi in b.unique(): 90 | j = b == bi # matching index 91 | if self.overlap: 92 | mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) 93 | else: 94 | mask_gti = masks[tidxs[i]][j] 95 | lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) 96 | 97 | obji = self.BCEobj(pi[..., 4], tobj) 98 | lobj += obji * self.balance[i] # obj loss 99 | if self.autobalance: 100 | self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() 101 | 102 | if self.autobalance: 103 | self.balance = [x / self.balance[self.ssi] for x in self.balance] 104 | lbox *= self.hyp["box"] 105 | lobj *= self.hyp["obj"] 106 | lcls *= self.hyp["cls"] 107 | lseg *= self.hyp["box"] / bs 108 | 109 | loss = lbox + lobj + lcls + lseg 110 | return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() 111 | 112 | def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): 113 | # Mask loss for one image 114 | pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) 115 | loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") 116 | return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean() 117 | 118 | def build_targets(self, p, targets): 119 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 120 | na, nt = self.na, targets.shape[0] # number of anchors, targets 121 | tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] 122 | gain = torch.ones(8, device=self.device) # normalized to gridspace gain 123 | ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 124 | if self.overlap: 125 | batch = p[0].shape[0] 126 | ti = [] 127 | for i in range(batch): 128 | num = (targets[:, 0] == i).sum() # find number of targets of each image 129 | ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) 130 | ti = torch.cat(ti, 1) # (na, nt) 131 | else: 132 | ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) 133 | targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices 134 | 135 | g = 0.5 # bias 136 | off = torch.tensor( 137 | [ 138 | [0, 0], 139 | [1, 0], 140 | [0, 1], 141 | [-1, 0], 142 | [0, -1], # j,k,l,m 143 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 144 | ], 145 | device=self.device).float() * g # offsets 146 | 147 | for i in range(self.nl): 148 | anchors, shape = self.anchors[i], p[i].shape 149 | gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain 150 | 151 | # Match targets to anchors 152 | t = targets * gain # shape(3,n,7) 153 | if nt: 154 | # Matches 155 | r = t[..., 4:6] / anchors[:, None] # wh ratio 156 | j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare 157 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 158 | t = t[j] # filter 159 | 160 | # Offsets 161 | gxy = t[:, 2:4] # grid xy 162 | gxi = gain[[2, 3]] - gxy # inverse 163 | j, k = ((gxy % 1 < g) & (gxy > 1)).T 164 | l, m = ((gxi % 1 < g) & (gxi > 1)).T 165 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 166 | t = t.repeat((5, 1, 1))[j] 167 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 168 | else: 169 | t = targets[0] 170 | offsets = 0 171 | 172 | # Define 173 | bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors 174 | (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class 175 | gij = (gxy - offsets).long() 176 | gi, gj = gij.T # grid indices 177 | 178 | # Append 179 | indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid 180 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 181 | anch.append(anchors[a]) # anchors 182 | tcls.append(c) # class 183 | tidxs.append(tidx) 184 | xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized 185 | 186 | return tcls, tbox, indices, anch, tidxs, xywhn 187 | -------------------------------------------------------------------------------- /utils/segment/metrics.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Model validation metrics 4 | """ 5 | 6 | import numpy as np 7 | 8 | from ..metrics import ap_per_class 9 | 10 | 11 | def fitness(x): 12 | # Model fitness as a weighted combination of metrics 13 | w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] 14 | return (x[:, :8] * w).sum(1) 15 | 16 | 17 | def ap_per_class_box_and_mask( 18 | tp_m, 19 | tp_b, 20 | conf, 21 | pred_cls, 22 | target_cls, 23 | plot=False, 24 | save_dir=".", 25 | names=(), 26 | ): 27 | """ 28 | Args: 29 | tp_b: tp of boxes. 30 | tp_m: tp of masks. 31 | other arguments see `func: ap_per_class`. 32 | """ 33 | results_boxes = ap_per_class(tp_b, 34 | conf, 35 | pred_cls, 36 | target_cls, 37 | plot=plot, 38 | save_dir=save_dir, 39 | names=names, 40 | prefix="Box")[2:] 41 | results_masks = ap_per_class(tp_m, 42 | conf, 43 | pred_cls, 44 | target_cls, 45 | plot=plot, 46 | save_dir=save_dir, 47 | names=names, 48 | prefix="Mask")[2:] 49 | 50 | results = { 51 | "boxes": { 52 | "p": results_boxes[0], 53 | "r": results_boxes[1], 54 | "ap": results_boxes[3], 55 | "f1": results_boxes[2], 56 | "ap_class": results_boxes[4]}, 57 | "masks": { 58 | "p": results_masks[0], 59 | "r": results_masks[1], 60 | "ap": results_masks[3], 61 | "f1": results_masks[2], 62 | "ap_class": results_masks[4]}} 63 | return results 64 | 65 | 66 | class Metric: 67 | 68 | def __init__(self) -> None: 69 | self.p = [] # (nc, ) 70 | self.r = [] # (nc, ) 71 | self.f1 = [] # (nc, ) 72 | self.all_ap = [] # (nc, 10) 73 | self.ap_class_index = [] # (nc, ) 74 | 75 | @property 76 | def ap50(self): 77 | """AP@0.5 of all classes. 78 | Return: 79 | (nc, ) or []. 80 | """ 81 | return self.all_ap[:, 0] if len(self.all_ap) else [] 82 | 83 | @property 84 | def ap(self): 85 | """AP@0.5:0.95 86 | Return: 87 | (nc, ) or []. 88 | """ 89 | return self.all_ap.mean(1) if len(self.all_ap) else [] 90 | 91 | @property 92 | def mp(self): 93 | """mean precision of all classes. 94 | Return: 95 | float. 96 | """ 97 | return self.p.mean() if len(self.p) else 0.0 98 | 99 | @property 100 | def mr(self): 101 | """mean recall of all classes. 102 | Return: 103 | float. 104 | """ 105 | return self.r.mean() if len(self.r) else 0.0 106 | 107 | @property 108 | def map50(self): 109 | """Mean AP@0.5 of all classes. 110 | Return: 111 | float. 112 | """ 113 | return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 114 | 115 | @property 116 | def map(self): 117 | """Mean AP@0.5:0.95 of all classes. 118 | Return: 119 | float. 120 | """ 121 | return self.all_ap.mean() if len(self.all_ap) else 0.0 122 | 123 | def mean_results(self): 124 | """Mean of results, return mp, mr, map50, map""" 125 | return (self.mp, self.mr, self.map50, self.map) 126 | 127 | def class_result(self, i): 128 | """class-aware result, return p[i], r[i], ap50[i], ap[i]""" 129 | return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) 130 | 131 | def get_maps(self, nc): 132 | maps = np.zeros(nc) + self.map 133 | for i, c in enumerate(self.ap_class_index): 134 | maps[c] = self.ap[i] 135 | return maps 136 | 137 | def update(self, results): 138 | """ 139 | Args: 140 | results: tuple(p, r, ap, f1, ap_class) 141 | """ 142 | p, r, all_ap, f1, ap_class_index = results 143 | self.p = p 144 | self.r = r 145 | self.all_ap = all_ap 146 | self.f1 = f1 147 | self.ap_class_index = ap_class_index 148 | 149 | 150 | class Metrics: 151 | """Metric for boxes and masks.""" 152 | 153 | def __init__(self) -> None: 154 | self.metric_box = Metric() 155 | self.metric_mask = Metric() 156 | 157 | def update(self, results): 158 | """ 159 | Args: 160 | results: Dict{'boxes': Dict{}, 'masks': Dict{}} 161 | """ 162 | self.metric_box.update(list(results["boxes"].values())) 163 | self.metric_mask.update(list(results["masks"].values())) 164 | 165 | def mean_results(self): 166 | return self.metric_box.mean_results() + self.metric_mask.mean_results() 167 | 168 | def class_result(self, i): 169 | return self.metric_box.class_result(i) + self.metric_mask.class_result(i) 170 | 171 | def get_maps(self, nc): 172 | return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) 173 | 174 | @property 175 | def ap_class_index(self): 176 | # boxes and masks have the same ap_class_index 177 | return self.metric_box.ap_class_index 178 | 179 | 180 | KEYS = [ 181 | "train/box_loss", 182 | "train/seg_loss", # train loss 183 | "train/obj_loss", 184 | "train/cls_loss", 185 | "metrics/precision(B)", 186 | "metrics/recall(B)", 187 | "metrics/mAP_0.5(B)", 188 | "metrics/mAP_0.5:0.95(B)", # metrics 189 | "metrics/precision(M)", 190 | "metrics/recall(M)", 191 | "metrics/mAP_0.5(M)", 192 | "metrics/mAP_0.5:0.95(M)", # metrics 193 | "val/box_loss", 194 | "val/seg_loss", # val loss 195 | "val/obj_loss", 196 | "val/cls_loss", 197 | "x/lr0", 198 | "x/lr1", 199 | "x/lr2",] 200 | 201 | BEST_KEYS = [ 202 | "best/epoch", 203 | "best/precision(B)", 204 | "best/recall(B)", 205 | "best/mAP_0.5(B)", 206 | "best/mAP_0.5:0.95(B)", 207 | "best/precision(M)", 208 | "best/recall(M)", 209 | "best/mAP_0.5(M)", 210 | "best/mAP_0.5:0.95(M)",] 211 | -------------------------------------------------------------------------------- /utils/segment/plots.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import math 3 | from pathlib import Path 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | import torch 10 | 11 | from .. import threaded 12 | from ..general import xywh2xyxy 13 | from ..plots import Annotator, colors 14 | 15 | 16 | def plot_masks(img, masks, colors, alpha=0.5): 17 | """ 18 | Args: 19 | img (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] 20 | masks (tensor): predicted masks on cuda, shape: [n, h, w] 21 | colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] 22 | Return: 23 | ndarray: img after draw masks, shape: [h, w, 3] 24 | 25 | transform colors and send img_gpu to cpu for the most time. 26 | """ 27 | img_gpu = img.clone() 28 | num_masks = len(masks) 29 | if num_masks == 0: 30 | return img.permute(1, 2, 0).contiguous().cpu().numpy() * 255 31 | 32 | # [n, 1, 1, 3] 33 | # faster this way to transform colors 34 | colors = torch.tensor(colors, device=img.device).float() / 255.0 35 | colors = colors[:, None, None, :] 36 | # [n, h, w, 1] 37 | masks = masks[:, :, :, None] 38 | masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha 39 | inv_alph_masks = masks * (-alpha) + 1 40 | masks_color_summand = masks_color[0] 41 | if num_masks > 1: 42 | inv_alph_cumul = inv_alph_masks[:(num_masks - 1)].cumprod(dim=0) 43 | masks_color_cumul = masks_color[1:] * inv_alph_cumul 44 | masks_color_summand += masks_color_cumul.sum(dim=0) 45 | 46 | # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] 47 | img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv 48 | img_gpu = img_gpu.permute(1, 2, 0).contiguous() 49 | # [h, w, 3] 50 | img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand 51 | return (img_gpu * 255).byte().cpu().numpy() 52 | 53 | 54 | @threaded 55 | def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): 56 | # Plot image grid with labels 57 | if isinstance(images, torch.Tensor): 58 | images = images.cpu().float().numpy() 59 | if isinstance(targets, torch.Tensor): 60 | targets = targets.cpu().numpy() 61 | if isinstance(masks, torch.Tensor): 62 | masks = masks.cpu().numpy().astype(int) 63 | 64 | max_size = 1920 # max image size 65 | max_subplots = 16 # max image subplots, i.e. 4x4 66 | bs, _, h, w = images.shape # batch size, _, height, width 67 | bs = min(bs, max_subplots) # limit plot images 68 | ns = np.ceil(bs ** 0.5) # number of subplots (square) 69 | if np.max(images[0]) <= 1: 70 | images *= 255 # de-normalise (optional) 71 | 72 | # Build Image 73 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init 74 | for i, im in enumerate(images): 75 | if i == max_subplots: # if last batch has fewer images than we expect 76 | break 77 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 78 | im = im.transpose(1, 2, 0) 79 | mosaic[y:y + h, x:x + w, :] = im 80 | 81 | # Resize (optional) 82 | scale = max_size / ns / max(h, w) 83 | if scale < 1: 84 | h = math.ceil(scale * h) 85 | w = math.ceil(scale * w) 86 | mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) 87 | 88 | # Annotate 89 | fs = int((h + w) * ns * 0.01) # font size 90 | annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) 91 | for i in range(i + 1): 92 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 93 | annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders 94 | if paths: 95 | annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames 96 | if len(targets) > 0: 97 | idx = targets[:, 0] == i 98 | ti = targets[idx] # image targets 99 | 100 | boxes = xywh2xyxy(ti[:, 2:6]).T 101 | classes = ti[:, 1].astype('int') 102 | labels = ti.shape[1] == 6 # labels if no conf column 103 | conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) 104 | 105 | if boxes.shape[1]: 106 | if boxes.max() <= 1.01: # if normalized with tolerance 0.01 107 | boxes[[0, 2]] *= w # scale to pixels 108 | boxes[[1, 3]] *= h 109 | elif scale < 1: # absolute coords need scale if image scales 110 | boxes *= scale 111 | boxes[[0, 2]] += x 112 | boxes[[1, 3]] += y 113 | for j, box in enumerate(boxes.T.tolist()): 114 | cls = classes[j] 115 | color = colors(cls) 116 | cls = names[cls] if names else cls 117 | if labels or conf[j] > 0.25: # 0.25 conf thresh 118 | label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' 119 | annotator.box_label(box, label, color=color) 120 | 121 | # Plot masks 122 | if len(masks): 123 | if masks.max() > 1.0: # mean that masks are overlap 124 | image_masks = masks[[i]] # (1, 640, 640) 125 | nl = len(ti) 126 | index = np.arange(nl).reshape(nl, 1, 1) + 1 127 | image_masks = np.repeat(image_masks, nl, axis=0) 128 | image_masks = np.where(image_masks == index, 1.0, 0.0) 129 | else: 130 | image_masks = masks[idx] 131 | 132 | im = np.asarray(annotator.im).copy() 133 | for j, box in enumerate(boxes.T.tolist()): 134 | if labels or conf[j] > 0.25: # 0.25 conf thresh 135 | color = colors(classes[j]) 136 | mh, mw = image_masks[j].shape 137 | if mh != h or mw != w: 138 | mask = image_masks[j].astype(np.uint8) 139 | mask = cv2.resize(mask, (w, h)) 140 | mask = mask.astype(np.bool) 141 | else: 142 | mask = image_masks[j].astype(np.bool) 143 | with contextlib.suppress(Exception): 144 | im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 145 | annotator.fromarray(im) 146 | annotator.im.save(fname) # save 147 | 148 | 149 | def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): 150 | # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') 151 | save_dir = Path(file).parent if file else Path(dir) 152 | fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) 153 | ax = ax.ravel() 154 | files = list(save_dir.glob("results*.csv")) 155 | assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." 156 | for f in files: 157 | try: 158 | data = pd.read_csv(f) 159 | index = np.argmax( 160 | 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 161 | 0.1 * data.values[:, 11],) 162 | s = [x.strip() for x in data.columns] 163 | x = data.values[:, 0] 164 | for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): 165 | y = data.values[:, j] 166 | # y[y == 0] = np.nan # don't show zero values 167 | ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) 168 | if best: 169 | # best 170 | ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) 171 | ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") 172 | else: 173 | # last 174 | ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) 175 | ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") 176 | # if j in [8, 9, 10]: # share train and val loss y axes 177 | # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) 178 | except Exception as e: 179 | print(f"Warning: Plotting error for {f}: {e}") 180 | ax[1].legend() 181 | fig.savefig(save_dir / "results.png", dpi=200) 182 | plt.close() 183 | -------------------------------------------------------------------------------- /utils/wandb_logging/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/wandb_logging/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | 5 | from wandb_utils import WandbLogger 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | with open(opt.data) as f: 12 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 13 | logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOR', help='name of W&B Project') 21 | opt = parser.parse_args() 22 | opt.resume = False # Explicitly disallow resume check for dataset upload job 23 | 24 | create_dataset_artifact(opt) 25 | --------------------------------------------------------------------------------