├── .gitignore ├── LICENSE ├── README.md ├── config ├── config.yaml └── templates │ ├── yolov8-data-template.yaml │ └── yolov8-model-template.yaml ├── ensemble.py ├── img ├── ensemble-architecture.jpg ├── predictions1.jpg └── predictions8.jpg ├── models ├── __init__.py ├── detr │ ├── __init__.py │ ├── coco_detection.py │ ├── datasets_helper │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ └── transforms.py │ ├── detr.py │ ├── prediction.py │ ├── train.py │ └── util │ │ ├── __init__.py │ │ ├── box_ops.py │ │ ├── misc.py │ │ └── plot_results.py ├── ensemble │ └── detector.py ├── fastercnn │ ├── __init__.py │ ├── plot_results.py │ └── train.py └── yolo │ └── yolov8n.pt ├── requirements.txt ├── scripts ├── py │ ├── __pycache__ │ │ └── prepare_config.cpython-310.pyc │ ├── data_converter │ │ ├── coco_to_yolo.py │ │ └── yolo_to_coco.py │ ├── dataset_preparation │ │ ├── run_augmentation.py │ │ ├── split1.py │ │ └── split2.py │ ├── prepare_config.py │ └── preprocessing │ │ ├── clean_data.py │ │ └── resize_image.py └── sh │ ├── data_converter.sh │ ├── datasets_preparation.sh │ └── preprocessing.sh ├── test.py ├── train.py └── view_data.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 3 | /config/actual_config/ 4 | /data/ 5 | /outputs/ 6 | /venv/ 7 | /env 8 | .idea 9 | ./yolov8n.pt 10 | preds.npy 11 | 12 | *.pt 13 | *.pth 14 | *.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 MarcoParola 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **Detection framework** 2 | 3 | 4 | [![license](https://img.shields.io/static/v1?label=OS&message=Windows&color=green&style=plastic)]() 5 | [![Python](https://img.shields.io/static/v1?label=Python&message=3.10&color=blue&style=plastic)]() 6 | [![size](https://img.shields.io/github/languages/code-size/MarcoParola/detection_framework?style=plastic)]() 7 | [![license](https://img.shields.io/github/license/MarcoParola/detection_framework?style=plastic)]() 8 | 9 | 10 | The project concerns the development of an object detection ensemble architecture presented at [IEEE SSCI-2023](https://attend.ieee.org/ssci-2023/). Full text is available [here](https://ieeexplore.ieee.org/document/10371865). 11 | 12 | A python wrapping framework for performing object detection tasks using state-of-the-art deep learning architecture: YOLOv7, Faster R-CNN, DEtection TRansformer DE-TR. 13 | 14 | Alt text 15 | 16 | 17 | The architecture was tested on an oral cancer dataset, below are some examples of predictions 18 | 19 | Alt text 20 | Alt text 21 | 22 | ## **Installation** 23 | 24 | To install the framework, simply clone the repository and install the necessary dependencies: 25 | ```sh 26 | git clone https://github.com/MarcoParola/detection_framework.git 27 | cd detection_framework 28 | mkdir models data data/orig data/yolo data/coco 29 | 30 | #TODO quando scarichi il file json, rinominalo con "coco_dataset.json" 31 | ``` 32 | 33 | create and activate virtual environment, then install dependencies. 34 | ```sh 35 | python -m venv env 36 | . env/bin/activate 37 | python -m pip install -r requirements.txt 38 | python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' 39 | ``` 40 | 41 | download zip file containing images (oral1.zip) and coco dataset (oral1.json) and put cocodataset in `./data/`. 42 | 43 | 44 | 45 | ## **Usage** 46 | 47 | ### **Data preparation** 48 | Since this framework relies on different models, different data formats are needed. 49 | During the project installation, 3 subfolders are created in data: orig, yolo and coco. 50 | The basic idea is to put your dataset-images in the orig folder; then generate your yolo/coco dataset by using some preprocessing-converter scripts. Please note: if your data doesn't required any preprocessing, you can skip this step, and directily put your data in yolo or coco folder. 51 | 52 | ```sh 53 | sh scripts/sh/preprocessing.sh 54 | python -m scripts.py.preprocessing.resize_image preproc.img_size.width=640 preproc.img_size.height=640 55 | ``` 56 | 57 | 58 | 59 | ### **fine-tune a model** 60 | The basic command to find-tune a model is the following 61 | 62 | > python train.py model=*model_name* dataset=*dataset_type* 63 | 64 | Where ``model`` can assume the following value: 65 | * yolo 66 | * fasterRCNN 67 | * detr 68 | 69 | while ``dataset`` can assume "coco" or "yolo" 70 | 71 | 72 | The default folder for the images is ``./data/images/``, if you want put your file in a different folder, override the ``datasets.img_path`` argument: 73 | 74 | > python train.py model=fasterRCNN dataset=coco.json datasets.img_path=**new_img_path** 75 | 76 | To specify the name with which to save the model after fine tuning you can use the ``model_name`` argument: 77 | 78 | > python train.py model=fasterRCNN dataset=coco.json model_name=**name** 79 | 80 | If you find this repo useful, please cite it as: 81 | ``` 82 | @INPROCEEDINGS{10371865, 83 | author={Parola, Marco and Mantia, Gaetano La and Galatolo, Federico and Cimino, Mario G.C.A. and Campisi, Giuseppina and Di Fede, Olga}, 84 | booktitle={2023 IEEE Symposium Series on Computational Intelligence (SSCI)}, 85 | title={Image-Based Screening of Oral Cancer via Deep Ensemble Architecture}, 86 | year={2023}, 87 | volume={}, 88 | number={}, 89 | pages={1572-1578}, 90 | doi={10.1109/SSCI52147.2023.10371865} 91 | } 92 | ``` 93 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | project_path: ${hydra:runtime.cwd} 2 | model: yolo 3 | dataset: coco 4 | model_name: model 5 | 6 | config: 7 | config_path: config 8 | templates_path: config/templates 9 | actual_config_path: config/actual_config 10 | 11 | preproc: 12 | preprocessed_annotation: dataset.json 13 | orig: 14 | img_path: data/orig/images 15 | augmentation: 16 | img_path: data/coco/aug_images 17 | img_size: 18 | width: 640 19 | height: 640 20 | split_percentage: 0.6 21 | 22 | datasets: 23 | n_classes: 3 24 | color_channels: 3 25 | labels: 26 | - 0 27 | - 1 28 | - 2 29 | class_name: 30 | - 'neoplastic' 31 | - 'aphthous' 32 | - 'traumatic' 33 | class_file: classes.txt 34 | path: ${hydra:runtime.cwd}/data/ 35 | img_path: images/ 36 | original_data: orig/ 37 | filenames: 38 | dataset: coco_dataset.json 39 | dataset_type: train 40 | datasets_path: 41 | coco: 42 | folder: coco 43 | train: coco/train.json 44 | val: coco/val.json 45 | test: coco/test.json 46 | yolo: 47 | folder: yolo 48 | train: yolo/train 49 | val: yolo/val 50 | test: yolo/test 51 | 52 | yolo: 53 | yolo_templates: 54 | model_template: yolov8-model-template.yaml 55 | data_template: yolov8-data-template.yaml 56 | yolo_config: 57 | model_config: yolov8-model.yaml 58 | data_config_train: yolov8-data.yaml 59 | data_config_test: yolov8-data-test.yaml 60 | yolo_model: yolov8n.pt 61 | parameters: 62 | device: 0 63 | output_dir: outputs/yolo/model_outputs 64 | yolo_model_path: train/weights/best.pt 65 | yolo_detect_output_path: outputs/yolo/model_results_on_test 66 | 67 | fastercnn: 68 | parameters: 69 | train_dataset_name: oralcancer_train 70 | val_dataset_name: oralcancer_val 71 | test_dataset_name: oralcancer_test 72 | config_file_path: COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml 73 | checkpoint_url: COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml 74 | output_dir: outputs/fastercnn/model_outputs 75 | device: cuda 76 | fastercnn_model_path: best_model.pth 77 | fastercnn_detect_output_path: outputs/fastercnn/model_results_on_test 78 | 79 | detr: 80 | detr_path: detr 81 | parameters: 82 | dataset_file: custom 83 | coco_path: data/coco 84 | output_dir: outputs/detr 85 | logs_dir: training_logs 86 | device: cuda 87 | feature_extractor: facebook/detr-resnet-50 88 | lr_backbone: 1e-05 89 | gradient_clip_val: 0.1 90 | detr_model_path: model.pth 91 | detr_detect_output_path: outputs/detr/model_results_on_test 92 | 93 | 94 | training: 95 | wandb: false 96 | lr: 5e-5 97 | epochs: 200 98 | batch: 2 99 | val_batch: 1 100 | early_stopping: 101 | monitor: val_loss 102 | patience: 5 103 | weight_decay: 0.0001 104 | optimizer: Adam 105 | workers: 8 106 | img_size: 640 107 | 108 | test: 109 | confidence_threshold: 0.2 110 | iou_threshold: 0.5 111 | 112 | models: 113 | path: ${hydra:runtime.cwd}/models 114 | 115 | 116 | wandb: 117 | entity: mlpi 118 | project: oral 119 | tag: "" 120 | -------------------------------------------------------------------------------- /config/templates/yolov8-data-template.yaml: -------------------------------------------------------------------------------- 1 | train: {train_path} # train images 2 | val: {val_path} # val images 3 | 4 | names: {class_list_names} -------------------------------------------------------------------------------- /config/templates/yolov8-model-template.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: {nc} # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0n head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 12], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 9], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 45 | 46 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) -------------------------------------------------------------------------------- /ensemble.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import hydra 4 | import os 5 | import cv2 6 | import torch 7 | 8 | from ultralytics import YOLO 9 | 10 | from detectron2.engine import DefaultPredictor 11 | from detectron2.data import DatasetCatalog, MetadataCatalog 12 | from detectron2.utils.visualizer import Visualizer 13 | from transformers import DetrFeatureExtractor 14 | 15 | from models.detr.detr import Detr 16 | from models.ensemble.detector import EnsembledDetector 17 | from scripts.py.prepare_config import prepare_config 18 | from models.detr.prediction import visualize_predictions, get_predictions 19 | 20 | from PIL import Image, ImageDraw 21 | 22 | import numpy as np 23 | from matplotlib import pyplot as plt 24 | from torchvision.io import read_image 25 | from torchmetrics.detection.mean_ap import MeanAveragePrecision 26 | from pprint import pprint 27 | 28 | import tensorflow as tf 29 | from tensorflow import keras 30 | import keras_cv 31 | 32 | 33 | font = cv2.FONT_HERSHEY_SIMPLEX 34 | border_size = 4 35 | 36 | def plot_rect_and_text(img, bbox, text): 37 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color=(0, 255, 0), thickness=border_size) 38 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[0])+237, int(bbox[1])-50), color=(0, 255, 0), thickness=-1) 39 | cv2.putText(img, text, (int(bbox[0]+4), int(bbox[1])-10), fontScale=1.46, fontFace=font, color=(0, 0, 0), thickness=border_size) 40 | 41 | 42 | 43 | def compute_iou(bbox1, bbox2): 44 | # Extract coordinates from the bounding boxes 45 | x1, y1, w1, h1 = bbox1 46 | x2, y2, w2, h2 = bbox2 47 | 48 | # Calculate the coordinates of the intersection rectangle 49 | x_left = max(x1, x2) 50 | y_top = max(y1, y2) 51 | x_right = min(x1 + w1, x2 + w2) 52 | y_bottom = min(y1 + h1, y2 + h2) 53 | 54 | # Calculate the intersection area 55 | intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top) 56 | 57 | # Calculate the union area 58 | bbox1_area = w1 * h1 59 | bbox2_area = w2 * h2 60 | union_area = bbox1_area + bbox2_area - intersection_area 61 | 62 | # Calculate IoU 63 | iou = intersection_area / union_area if union_area > 0 else 0 64 | return iou 65 | 66 | 67 | 68 | def compute_metric_map(actual, predicted): 69 | num_images = len(actual) 70 | average_precisions = [] 71 | 72 | map50_95 = [] 73 | 74 | for map_step in np.arange(.5,1.,.05): 75 | for i in range(num_images): 76 | image_actual = actual[i] 77 | image_predicted = predicted[i] 78 | 79 | sorted_predicted = image_predicted 80 | sorted_actual = image_actual 81 | 82 | num_predictions = len(sorted_predicted) 83 | true_positives = np.zeros(num_predictions) 84 | false_positives = np.zeros(num_predictions) 85 | precision = [] 86 | recall = [] 87 | 88 | num_actual = len(sorted_actual) 89 | is_true_positive = np.zeros(num_actual, dtype=bool) 90 | 91 | for j, pred in enumerate(sorted_predicted): 92 | best_iou = 0.0 93 | best_match = -1 94 | 95 | for k, actual_bbox in enumerate(sorted_actual): 96 | iou = compute_iou(pred[:-2], actual_bbox[:-1]) 97 | if iou > best_iou: 98 | best_iou = iou 99 | best_match = k 100 | 101 | if best_iou >= map_step and not is_true_positive[best_match] and pred[-2]==actual_bbox[-1]: 102 | true_positives[j] = 1 103 | is_true_positive[best_match] = True 104 | else: 105 | false_positives[j] = 1 106 | 107 | precision.append(np.sum(true_positives) / (np.sum(true_positives) + np.sum(false_positives))) 108 | recall.append(np.sum(true_positives) / num_actual) 109 | 110 | average_precision = 0.0 111 | previous_recall = 0.0 112 | for prec, rec in zip(precision, recall): 113 | if np.isnan(prec): 114 | prec = 0 115 | if np.isnan(rec): 116 | rec = 0 117 | average_precision += (rec - previous_recall) * prec 118 | previous_recall = rec 119 | 120 | mean_average_precision = np.mean(average_precision) 121 | average_precisions.append(mean_average_precision) 122 | 123 | mean_average_precision_dataset = np.mean(average_precisions) 124 | map50_95.append(mean_average_precision_dataset) 125 | 126 | return np.mean(map50_95) 127 | #return mean_average_precision_dataset 128 | 129 | 130 | 131 | @hydra.main(config_path="./config/", config_name="config", version_base=None) 132 | def detect(cfg): 133 | if cfg.model == 'yolo': 134 | model_path = os.path.join(cfg.project_path, cfg.yolo.parameters.output_dir, cfg.yolo.yolo_model_path) 135 | model = YOLO(model_path) # load a custom model 136 | 137 | # define paths to input and output folders 138 | input_folder = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.yolo.test, cfg.datasets.img_path) 139 | output_folder = os.path.join(cfg.project_path, cfg.yolo.yolo_detect_output_path) 140 | 141 | if not os.path.exists(output_folder): 142 | os.makedirs(output_folder) 143 | 144 | # loop over each image in the input folder 145 | for image_name in os.listdir(input_folder): 146 | # read image 147 | image_path = os.path.join(input_folder, image_name) 148 | image = cv2.imread(image_path) 149 | # detect objects and get bounding boxes 150 | res = model(image) 151 | res_plotted = res[0].plot() 152 | # save image with bounding boxes 153 | output_path = os.path.join(output_folder, image_name) 154 | cv2.imwrite(output_path, res_plotted) 155 | 156 | 157 | if cfg.model == 'fasterRCNN': 158 | output_folder = os.path.join(cfg.project_path, cfg.fastercnn.fastercnn_detect_output_path) 159 | if not os.path.exists(output_folder): 160 | os.makedirs(output_folder) 161 | 162 | cfg.fastercnn.parameters.checkpoint_url = os.path.join(cfg.project_path, cfg.fastercnn.parameters.output_dir, 163 | cfg.fastercnn.fastercnn_model_path) 164 | config = prepare_config(cfg, 'test') 165 | predictor = DefaultPredictor(config) 166 | test_dataset_dicts = DatasetCatalog.get(cfg.fastercnn.parameters.test_dataset_name) 167 | # Loop over each image in the test dataset 168 | for d in test_dataset_dicts: 169 | # Load the image 170 | img = cv2.imread(d["file_name"]) 171 | # Use the predictor to generate predictions for the image 172 | outputs = predictor(img) 173 | # Get the predicted instances with the highest confidence scores 174 | instances = outputs["instances"] 175 | scores = instances.scores.tolist() 176 | indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:1] 177 | instances = instances[indices] 178 | # Visualize the predictions on the image 179 | v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.fastercnn.parameters.test_dataset_name)) 180 | v = v.draw_instance_predictions(instances.to("cpu")) 181 | # Save the image with the bounding boxes 182 | output_path = os.path.join(output_folder, os.path.basename(d["file_name"])) 183 | cv2.imwrite(output_path, v.get_image()[:, :, ::-1]) 184 | 185 | 186 | if cfg.model == "detr": 187 | # define paths to input and output folders 188 | input_folder = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 189 | output_folder = os.path.join(cfg.project_path, cfg.detr.detr_detect_output_path) 190 | test_annotation_file = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.test) 191 | 192 | if not os.path.exists(output_folder): 193 | os.makedirs(output_folder) 194 | 195 | with open(test_annotation_file, 'r') as f: 196 | test_data = json.load(f) 197 | 198 | # Define the model and the feature extractor 199 | model_path = os.path.join(os.path.join(cfg.project_path, cfg.detr.parameters.output_dir), 200 | cfg.detr.detr_model_path) 201 | feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") 202 | model = Detr(num_labels=cfg.datasets.n_classes) 203 | model = model.load_from_checkpoint(model_path) 204 | model.eval() 205 | 206 | # Apply detection to each test image 207 | for image_info in test_data["images"]: 208 | image_name = image_info["file_name"] 209 | image_path = os.path.join(input_folder, image_name) 210 | img = Image.open(image_path) 211 | encoding = feature_extractor(img, return_tensors="pt") 212 | encoding.keys() 213 | outputs = model(**encoding) 214 | visualize_predictions(img, outputs, output_folder, image_name, cfg.datasets.class_name) 215 | 216 | 217 | preds, targets = [],[] 218 | 219 | if cfg.model == "ensemble": 220 | 221 | # YOLO 222 | model_path = os.path.join(cfg.project_path, cfg.yolo.parameters.output_dir, cfg.yolo.yolo_model_path) 223 | model_yolo = YOLO(model_path) # load a custom model 224 | input_folder_yolo = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.yolo.test, cfg.datasets.img_path) 225 | test_yolo = os.listdir(input_folder_yolo) 226 | 227 | # FASTER-RCNN 228 | cfg.fastercnn.parameters.checkpoint_url = os.path.join(cfg.project_path, cfg.fastercnn.parameters.output_dir, 229 | cfg.fastercnn.fastercnn_model_path) 230 | cfg.model='fasterRCNN' 231 | config = prepare_config(cfg, 'test') 232 | model_fasterRCNN = DefaultPredictor(config) 233 | test_dataset_dicts = DatasetCatalog.get(cfg.fastercnn.parameters.test_dataset_name) 234 | 235 | # DETR 236 | input_folder_detr = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 237 | test_annotation_file = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.test) 238 | with open(test_annotation_file, 'r') as f: 239 | test_data = json.load(f) 240 | # Define the model and the feature extractor 241 | model_path = os.path.join(os.path.join(cfg.project_path, cfg.detr.parameters.output_dir), 242 | cfg.detr.detr_model_path) 243 | feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") 244 | model_detr = Detr(num_labels=cfg.datasets.n_classes) 245 | model_detr = model_detr.load_from_checkpoint(model_path) 246 | model_detr.eval() 247 | 248 | 249 | 250 | # prepare actual values to compute metric 251 | for i in range(len(test_data['images'])): 252 | target = [] 253 | d = test_dataset_dicts[i] 254 | for annotation in d['annotations']: 255 | bb = annotation['bbox'] 256 | target.append(np.array([bb[0], bb[1], bb[0]+bb[2], bb[1]+bb[3],annotation['category_id']])) 257 | targets.append(np.array(target)) 258 | targets = np.array(targets) 259 | #targets = targets.astype(np.float32) 260 | 261 | 262 | for i in range(len(test_yolo)): 263 | # YOLO 264 | image_name = test_yolo[i] 265 | image_path = os.path.join(input_folder_yolo, image_name) 266 | image_yolo = cv2.imread(image_path) 267 | image_multiple_bboxes = cv2.imread(image_path) 268 | # detect objects and get bounding boxes 269 | res = model_yolo(image_yolo) 270 | bbox_yolo, label_yolo = res[0].boxes.boxes, res[0].boxes.cls 271 | for bbox, label in zip( bbox_yolo, label_yolo ): 272 | plot_rect_and_text(image_yolo, bbox, cfg.datasets.class_name[int(label.item())]) 273 | cv2.rectangle(image_multiple_bboxes, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color=(0, 255, 0), thickness=border_size) 274 | 275 | 276 | # FASTER-RCNN 277 | d = test_dataset_dicts[i] 278 | img_fasterRCNN = cv2.imread(d["file_name"]) 279 | # Use the predictor to generate predictions for the image 280 | outputs = model_fasterRCNN(img_fasterRCNN) 281 | # Get the predicted instances with the highest confidence scores 282 | instances = outputs["instances"] 283 | scores = instances.scores.tolist() 284 | indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:1] 285 | instances = instances[indices] 286 | bbox_fasterRCNN, label_fasterRCNN = instances.pred_boxes, instances.pred_classes 287 | for bbox, label in zip( bbox_fasterRCNN, label_fasterRCNN ): 288 | plot_rect_and_text(img_fasterRCNN, bbox, cfg.datasets.class_name[int(label.item())]) 289 | cv2.rectangle(image_multiple_bboxes, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color=(255, 0, 0), thickness=border_size) 290 | 291 | 292 | # DETR 293 | image_name = d["file_name"] 294 | image_path = os.path.join(input_folder_detr, image_name) 295 | img_detr = Image.open(image_path) 296 | encoding = feature_extractor(img_detr, return_tensors="pt") 297 | encoding.keys() 298 | outputs = model_detr(**encoding) 299 | probas = outputs.logits.softmax(-1)[0, :, :-1] 300 | threshold=0.1 301 | keep = probas.max(-1).values > threshold 302 | label_detr, bbox_detr = get_predictions(img_detr, outputs, '', image_name, cfg.datasets.class_name) 303 | label_detr = torch.argmax(label_detr, dim=1) 304 | print('LABEL DETR:', label_detr) 305 | img_detr = np.asarray(img_detr) 306 | for bbox, label in zip( bbox_detr, label_detr): 307 | plot_rect_and_text(img_detr, bbox, cfg.datasets.class_name[int(label.item())]) 308 | cv2.rectangle(image_multiple_bboxes, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color=(0, 0, 255), thickness=border_size) 309 | 310 | 311 | predictions = [ 312 | { 313 | 'model' : 'yolo', 314 | 'labels': label_yolo, 315 | 'bboxes': bbox_yolo }, 316 | { 317 | 'model' : 'fasterRCNN', 318 | 'labels': label_fasterRCNN, 319 | 'bboxes': bbox_fasterRCNN }, 320 | { 321 | 'model' : 'detr', 322 | 'labels': label_detr, 323 | 'bboxes': bbox_detr }, 324 | ] 325 | 326 | 327 | 328 | # ENSEMBLE 329 | img_ens = Image.open(image_path) 330 | img_ens = np.asarray(img_ens) 331 | ens_detector = EnsembledDetector() 332 | label_ens, bbox_ens = ens_detector.predict(predictions, .45) 333 | for bbox, label in zip( bbox_ens, label_ens ): 334 | print(bbox, label) 335 | plot_rect_and_text(img_ens, bbox, cfg.datasets.class_name[label]) 336 | cv2.rectangle(image_multiple_bboxes, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color=(0, 0, 0), thickness=3) 337 | 338 | 339 | # GROUND THRUTH 340 | 341 | img = Image.open(image_path) 342 | 343 | for annotation in d['annotations']: 344 | img = np.asarray(img) 345 | bbox = annotation['bbox'] 346 | bbox = [int(bbox[0]), int(bbox[1]), int(bbox[0]) + int(bbox[2]), int(bbox[1]) + int(bbox[3]) ] 347 | label = annotation['category_id'] 348 | segmentation = annotation['segmentation'][0] 349 | plot_rect_and_text(img, bbox, cfg.datasets.class_name[label]) 350 | cv2.rectangle(image_multiple_bboxes, (bbox[0], bbox[1]), ( bbox[2], bbox[3] ), color=(255, 255, 255), thickness=3) 351 | 352 | pred = [] 353 | for bbox, label in zip(bbox_ens,label_ens): 354 | pred.append(np.array(bbox+[label, 1.])) 355 | preds.append(np.array(pred)) 356 | 357 | plt.rcParams.update({'font.size': 13}) 358 | # PLOT VARI 359 | img = np.asarray(img) 360 | plt.figure(figsize=(18,3.3)) 361 | plt.subplots_adjust(left=0.01, bottom=0.001, right=0.99, top=.999, wspace=0.1, hspace=0.01) 362 | plt.subplot(151) 363 | plt.imshow(img) 364 | plt.title('Ground truth') 365 | plt.xticks([], []) 366 | plt.yticks([], []) 367 | 368 | plt.subplot(152) 369 | plt.imshow(image_yolo[...,::-1]) 370 | plt.title('YOLOv8') 371 | plt.xticks([], []) 372 | plt.yticks([], []) 373 | 374 | plt.subplot(153) 375 | plt.imshow( img_fasterRCNN[...,::-1]) 376 | plt.title('FasterRCNN') 377 | plt.xticks([], []) 378 | plt.yticks([], []) 379 | 380 | plt.subplot(154) 381 | plt.imshow(img_detr) 382 | plt.title('DETR') 383 | plt.xticks([], []) 384 | plt.yticks([], []) 385 | 386 | plt.subplot(155) 387 | plt.imshow(img_ens) 388 | plt.title('Ensemble') 389 | plt.xticks([], []) 390 | plt.yticks([], []) 391 | 392 | plt.show() 393 | 394 | preds = np.array(preds) 395 | np.save('preds.npy', preds) 396 | preds = np.load('preds.npy', allow_pickle=True) 397 | 398 | mAP = compute_metric_map(targets, preds) 399 | print("mAP:", mAP) 400 | 401 | 402 | 403 | if __name__ == '__main__': 404 | detect() 405 | #main() -------------------------------------------------------------------------------- /img/ensemble-architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/img/ensemble-architecture.jpg -------------------------------------------------------------------------------- /img/predictions1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/img/predictions1.jpg -------------------------------------------------------------------------------- /img/predictions8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/img/predictions8.jpg -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/models/__init__.py -------------------------------------------------------------------------------- /models/detr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/models/detr/__init__.py -------------------------------------------------------------------------------- /models/detr/coco_detection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @author: A.Akdogan 5 | """ 6 | 7 | import torchvision 8 | 9 | 10 | class CocoDetection(torchvision.datasets.CocoDetection): 11 | def __init__(self, img_folder, train_json_path, test_json_path, feature_extractor, train=True): 12 | # ann_file = os.path.join(img_folder, "custom_train.json" if train else "custom_val.json") 13 | if train: 14 | ann_file = train_json_path 15 | else: 16 | ann_file = test_json_path 17 | super(CocoDetection, self).__init__(img_folder, ann_file) 18 | self.feature_extractor = feature_extractor 19 | 20 | def __getitem__(self, idx): 21 | # read in PIL image and target in COCO format 22 | img, target = super(CocoDetection, self).__getitem__(idx) 23 | 24 | # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target) 25 | image_id = self.ids[idx] 26 | target = {'image_id': image_id, 'annotations': target} 27 | encoding = self.feature_extractor(images=img, annotations=target, return_tensors="pt") 28 | pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension 29 | target = encoding["labels"][0] # remove batch dimension 30 | 31 | return pixel_values, target 32 | -------------------------------------------------------------------------------- /models/detr/datasets_helper/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch.utils.data 3 | import torchvision 4 | 5 | 6 | def get_coco_api_from_dataset(dataset): 7 | for _ in range(10): 8 | # if isinstance(dataset, torchvision.datasets.CocoDetection): 9 | # break 10 | if isinstance(dataset, torch.utils.data.Subset): 11 | dataset = dataset.dataset 12 | if isinstance(dataset, torchvision.datasets.CocoDetection): 13 | return dataset.coco 14 | -------------------------------------------------------------------------------- /models/detr/datasets_helper/coco_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | COCO evaluator that works in distributed mode. 4 | 5 | Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py 6 | The difference is that there is less copy-pasting from pycocotools 7 | in the end of the file, as python3 can suppress prints with contextlib 8 | """ 9 | import os 10 | import contextlib 11 | import copy 12 | import numpy as np 13 | import torch 14 | 15 | from pycocotools.cocoeval import COCOeval 16 | from pycocotools.coco import COCO 17 | import pycocotools.mask as mask_util 18 | 19 | from util.misc import all_gather 20 | 21 | 22 | class CocoEvaluator(object): 23 | def __init__(self, coco_gt, iou_types): 24 | assert isinstance(iou_types, (list, tuple)) 25 | coco_gt = copy.deepcopy(coco_gt) 26 | self.coco_gt = coco_gt 27 | 28 | self.iou_types = iou_types 29 | self.coco_eval = {} 30 | for iou_type in iou_types: 31 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 32 | 33 | self.img_ids = [] 34 | self.eval_imgs = {k: [] for k in iou_types} 35 | 36 | def update(self, predictions): 37 | img_ids = list(np.unique(list(predictions.keys()))) 38 | self.img_ids.extend(img_ids) 39 | 40 | for iou_type in self.iou_types: 41 | results = self.prepare(predictions, iou_type) 42 | 43 | # suppress pycocotools prints 44 | with open(os.devnull, 'w') as devnull: 45 | with contextlib.redirect_stdout(devnull): 46 | coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() 47 | coco_eval = self.coco_eval[iou_type] 48 | 49 | coco_eval.cocoDt = coco_dt 50 | coco_eval.params.imgIds = list(img_ids) 51 | img_ids, eval_imgs = evaluate(coco_eval) 52 | 53 | self.eval_imgs[iou_type].append(eval_imgs) 54 | 55 | def synchronize_between_processes(self): 56 | for iou_type in self.iou_types: 57 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 58 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 59 | 60 | def accumulate(self): 61 | for coco_eval in self.coco_eval.values(): 62 | coco_eval.accumulate() 63 | 64 | def summarize(self): 65 | for iou_type, coco_eval in self.coco_eval.items(): 66 | print("IoU metric: {}".format(iou_type)) 67 | coco_eval.summarize() 68 | 69 | def prepare(self, predictions, iou_type): 70 | if iou_type == "bbox": 71 | return self.prepare_for_coco_detection(predictions) 72 | elif iou_type == "segm": 73 | return self.prepare_for_coco_segmentation(predictions) 74 | elif iou_type == "keypoints": 75 | return self.prepare_for_coco_keypoint(predictions) 76 | else: 77 | raise ValueError("Unknown iou type {}".format(iou_type)) 78 | 79 | def prepare_for_coco_detection(self, predictions): 80 | coco_results = [] 81 | for original_id, prediction in predictions.items(): 82 | if len(prediction) == 0: 83 | continue 84 | 85 | boxes = prediction["boxes"] 86 | boxes = convert_to_xywh(boxes).tolist() 87 | scores = prediction["scores"].tolist() 88 | labels = prediction["labels"].tolist() 89 | 90 | coco_results.extend( 91 | [ 92 | { 93 | "image_id": original_id, 94 | "category_id": labels[k], 95 | "bbox": box, 96 | "score": scores[k], 97 | } 98 | for k, box in enumerate(boxes) 99 | ] 100 | ) 101 | return coco_results 102 | 103 | def prepare_for_coco_segmentation(self, predictions): 104 | coco_results = [] 105 | for original_id, prediction in predictions.items(): 106 | if len(prediction) == 0: 107 | continue 108 | 109 | scores = prediction["scores"] 110 | labels = prediction["labels"] 111 | masks = prediction["masks"] 112 | 113 | masks = masks > 0.5 114 | 115 | scores = prediction["scores"].tolist() 116 | labels = prediction["labels"].tolist() 117 | 118 | rles = [ 119 | mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] 120 | for mask in masks 121 | ] 122 | for rle in rles: 123 | rle["counts"] = rle["counts"].decode("utf-8") 124 | 125 | coco_results.extend( 126 | [ 127 | { 128 | "image_id": original_id, 129 | "category_id": labels[k], 130 | "segmentation": rle, 131 | "score": scores[k], 132 | } 133 | for k, rle in enumerate(rles) 134 | ] 135 | ) 136 | return coco_results 137 | 138 | def prepare_for_coco_keypoint(self, predictions): 139 | coco_results = [] 140 | for original_id, prediction in predictions.items(): 141 | if len(prediction) == 0: 142 | continue 143 | 144 | boxes = prediction["boxes"] 145 | boxes = convert_to_xywh(boxes).tolist() 146 | scores = prediction["scores"].tolist() 147 | labels = prediction["labels"].tolist() 148 | keypoints = prediction["keypoints"] 149 | keypoints = keypoints.flatten(start_dim=1).tolist() 150 | 151 | coco_results.extend( 152 | [ 153 | { 154 | "image_id": original_id, 155 | "category_id": labels[k], 156 | 'keypoints': keypoint, 157 | "score": scores[k], 158 | } 159 | for k, keypoint in enumerate(keypoints) 160 | ] 161 | ) 162 | return coco_results 163 | 164 | 165 | def convert_to_xywh(boxes): 166 | xmin, ymin, xmax, ymax = boxes.unbind(1) 167 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 168 | 169 | 170 | def merge(img_ids, eval_imgs): 171 | all_img_ids = all_gather(img_ids) 172 | all_eval_imgs = all_gather(eval_imgs) 173 | 174 | merged_img_ids = [] 175 | for p in all_img_ids: 176 | merged_img_ids.extend(p) 177 | 178 | merged_eval_imgs = [] 179 | for p in all_eval_imgs: 180 | merged_eval_imgs.append(p) 181 | 182 | merged_img_ids = np.array(merged_img_ids) 183 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 184 | 185 | # keep only unique (and in sorted order) images 186 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 187 | merged_eval_imgs = merged_eval_imgs[..., idx] 188 | 189 | return merged_img_ids, merged_eval_imgs 190 | 191 | 192 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 193 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 194 | img_ids = list(img_ids) 195 | eval_imgs = list(eval_imgs.flatten()) 196 | 197 | coco_eval.evalImgs = eval_imgs 198 | coco_eval.params.imgIds = img_ids 199 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 200 | 201 | 202 | ################################################################# 203 | # From pycocotools, just removed the prints and fixed 204 | # a Python3 bug about unicode not defined 205 | ################################################################# 206 | 207 | 208 | def evaluate(self): 209 | ''' 210 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 211 | :return: None 212 | ''' 213 | # tic = time.time() 214 | # print('Running per image evaluation...') 215 | p = self.params 216 | # add backward compatibility if useSegm is specified in params 217 | if p.useSegm is not None: 218 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 219 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 220 | # print('Evaluate annotation type *{}*'.format(p.iouType)) 221 | p.imgIds = list(np.unique(p.imgIds)) 222 | if p.useCats: 223 | p.catIds = list(np.unique(p.catIds)) 224 | p.maxDets = sorted(p.maxDets) 225 | self.params = p 226 | 227 | self._prepare() 228 | # loop through images, area range, max detection number 229 | catIds = p.catIds if p.useCats else [-1] 230 | 231 | if p.iouType == 'segm' or p.iouType == 'bbox': 232 | computeIoU = self.computeIoU 233 | elif p.iouType == 'keypoints': 234 | computeIoU = self.computeOks 235 | self.ious = { 236 | (imgId, catId): computeIoU(imgId, catId) 237 | for imgId in p.imgIds 238 | for catId in catIds} 239 | 240 | evaluateImg = self.evaluateImg 241 | maxDet = p.maxDets[-1] 242 | evalImgs = [ 243 | evaluateImg(imgId, catId, areaRng, maxDet) 244 | for catId in catIds 245 | for areaRng in p.areaRng 246 | for imgId in p.imgIds 247 | ] 248 | # this is NOT in the pycocotools code, but could be done outside 249 | evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) 250 | self._paramsEval = copy.deepcopy(self.params) 251 | # toc = time.time() 252 | # print('DONE (t={:0.2f}s).'.format(toc-tic)) 253 | return p.imgIds, evalImgs 254 | 255 | ################################################################# 256 | # end of straight copy from pycocotools, just removing the prints 257 | ################################################################# 258 | -------------------------------------------------------------------------------- /models/detr/datasets_helper/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Transforms and data augmentation for both image + bbox. 4 | """ 5 | import random 6 | 7 | import PIL 8 | import torch 9 | import torchvision.transforms as T 10 | import torchvision.transforms.functional as F 11 | 12 | from ..util.box_ops import box_xyxy_to_cxcywh 13 | from ..util.misc import interpolate 14 | 15 | 16 | def crop(image, target, region): 17 | cropped_image = F.crop(image, *region) 18 | 19 | target = target.copy() 20 | i, j, h, w = region 21 | 22 | # should we do something wrt the original size? 23 | target["size"] = torch.tensor([h, w]) 24 | 25 | fields = ["labels", "area", "iscrowd"] 26 | 27 | if "boxes" in target: 28 | boxes = target["boxes"] 29 | max_size = torch.as_tensor([w, h], dtype=torch.float32) 30 | cropped_boxes = boxes - torch.as_tensor([j, i, j, i]) 31 | cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size) 32 | cropped_boxes = cropped_boxes.clamp(min=0) 33 | area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1) 34 | target["boxes"] = cropped_boxes.reshape(-1, 4) 35 | target["area"] = area 36 | fields.append("boxes") 37 | 38 | if "masks" in target: 39 | # FIXME should we update the area here if there are no boxes? 40 | target['masks'] = target['masks'][:, i:i + h, j:j + w] 41 | fields.append("masks") 42 | 43 | # remove elements for which the boxes or masks that have zero area 44 | if "boxes" in target or "masks" in target: 45 | # favor boxes selection when defining which elements to keep 46 | # this is compatible with previous implementation 47 | if "boxes" in target: 48 | cropped_boxes = target['boxes'].reshape(-1, 2, 2) 49 | keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1) 50 | else: 51 | keep = target['masks'].flatten(1).any(1) 52 | 53 | for field in fields: 54 | target[field] = target[field][keep] 55 | 56 | return cropped_image, target 57 | 58 | 59 | def hflip(image, target): 60 | flipped_image = F.hflip(image) 61 | 62 | w, h = image.size 63 | 64 | target = target.copy() 65 | if "boxes" in target: 66 | boxes = target["boxes"] 67 | boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0]) 68 | target["boxes"] = boxes 69 | 70 | if "masks" in target: 71 | target['masks'] = target['masks'].flip(-1) 72 | 73 | return flipped_image, target 74 | 75 | 76 | def resize(image, target, size, max_size=None): 77 | # size can be min_size (scalar) or (w, h) tuple 78 | 79 | def get_size_with_aspect_ratio(image_size, size, max_size=None): 80 | w, h = image_size 81 | if max_size is not None: 82 | min_original_size = float(min((w, h))) 83 | max_original_size = float(max((w, h))) 84 | if max_original_size / min_original_size * size > max_size: 85 | size = int(round(max_size * min_original_size / max_original_size)) 86 | 87 | if (w <= h and w == size) or (h <= w and h == size): 88 | return (h, w) 89 | 90 | if w < h: 91 | ow = size 92 | oh = int(size * h / w) 93 | else: 94 | oh = size 95 | ow = int(size * w / h) 96 | 97 | return (oh, ow) 98 | 99 | def get_size(image_size, size, max_size=None): 100 | if isinstance(size, (list, tuple)): 101 | return size[::-1] 102 | else: 103 | return get_size_with_aspect_ratio(image_size, size, max_size) 104 | 105 | size = get_size(image.size, size, max_size) 106 | rescaled_image = F.resize(image, size) 107 | 108 | if target is None: 109 | return rescaled_image, None 110 | 111 | ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) 112 | ratio_width, ratio_height = ratios 113 | 114 | target = target.copy() 115 | if "boxes" in target: 116 | boxes = target["boxes"] 117 | scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height]) 118 | target["boxes"] = scaled_boxes 119 | 120 | if "area" in target: 121 | area = target["area"] 122 | scaled_area = area * (ratio_width * ratio_height) 123 | target["area"] = scaled_area 124 | 125 | h, w = size 126 | target["size"] = torch.tensor([h, w]) 127 | 128 | if "masks" in target: 129 | target['masks'] = interpolate( 130 | target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5 131 | 132 | return rescaled_image, target 133 | 134 | 135 | def pad(image, target, padding): 136 | # assumes that we only pad on the bottom right corners 137 | padded_image = F.pad(image, (0, 0, padding[0], padding[1])) 138 | if target is None: 139 | return padded_image, None 140 | target = target.copy() 141 | # should we do something wrt the original size? 142 | target["size"] = torch.tensor(padded_image.size[::-1]) 143 | if "masks" in target: 144 | target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[0], 0, padding[1])) 145 | return padded_image, target 146 | 147 | 148 | class RandomCrop(object): 149 | def __init__(self, size): 150 | self.size = size 151 | 152 | def __call__(self, img, target): 153 | region = T.RandomCrop.get_params(img, self.size) 154 | return crop(img, target, region) 155 | 156 | 157 | class RandomSizeCrop(object): 158 | def __init__(self, min_size: int, max_size: int): 159 | self.min_size = min_size 160 | self.max_size = max_size 161 | 162 | def __call__(self, img: PIL.Image.Image, target: dict): 163 | w = random.randint(self.min_size, min(img.width, self.max_size)) 164 | h = random.randint(self.min_size, min(img.height, self.max_size)) 165 | region = T.RandomCrop.get_params(img, [h, w]) 166 | return crop(img, target, region) 167 | 168 | 169 | class CenterCrop(object): 170 | def __init__(self, size): 171 | self.size = size 172 | 173 | def __call__(self, img, target): 174 | image_width, image_height = img.size 175 | crop_height, crop_width = self.size 176 | crop_top = int(round((image_height - crop_height) / 2.)) 177 | crop_left = int(round((image_width - crop_width) / 2.)) 178 | return crop(img, target, (crop_top, crop_left, crop_height, crop_width)) 179 | 180 | 181 | class RandomHorizontalFlip(object): 182 | def __init__(self, p=0.5): 183 | self.p = p 184 | 185 | def __call__(self, img, target): 186 | if random.random() < self.p: 187 | return hflip(img, target) 188 | return img, target 189 | 190 | 191 | class RandomResize(object): 192 | def __init__(self, sizes, max_size=None): 193 | assert isinstance(sizes, (list, tuple)) 194 | self.sizes = sizes 195 | self.max_size = max_size 196 | 197 | def __call__(self, img, target=None): 198 | size = random.choice(self.sizes) 199 | return resize(img, target, size, self.max_size) 200 | 201 | 202 | class RandomPad(object): 203 | def __init__(self, max_pad): 204 | self.max_pad = max_pad 205 | 206 | def __call__(self, img, target): 207 | pad_x = random.randint(0, self.max_pad) 208 | pad_y = random.randint(0, self.max_pad) 209 | return pad(img, target, (pad_x, pad_y)) 210 | 211 | 212 | class RandomSelect(object): 213 | """ 214 | Randomly selects between transforms1 and transforms2, 215 | with probability p for transforms1 and (1 - p) for transforms2 216 | """ 217 | def __init__(self, transforms1, transforms2, p=0.5): 218 | self.transforms1 = transforms1 219 | self.transforms2 = transforms2 220 | self.p = p 221 | 222 | def __call__(self, img, target): 223 | if random.random() < self.p: 224 | return self.transforms1(img, target) 225 | return self.transforms2(img, target) 226 | 227 | 228 | class ToTensor(object): 229 | def __call__(self, img, target): 230 | return F.to_tensor(img), target 231 | 232 | 233 | class RandomErasing(object): 234 | 235 | def __init__(self, *args, **kwargs): 236 | self.eraser = T.RandomErasing(*args, **kwargs) 237 | 238 | def __call__(self, img, target): 239 | return self.eraser(img), target 240 | 241 | 242 | class Normalize(object): 243 | def __init__(self, mean, std): 244 | self.mean = mean 245 | self.std = std 246 | 247 | def __call__(self, image, target=None): 248 | image = F.normalize(image, mean=self.mean, std=self.std) 249 | if target is None: 250 | return image, None 251 | target = target.copy() 252 | h, w = image.shape[-2:] 253 | if "boxes" in target: 254 | boxes = target["boxes"] 255 | boxes = box_xyxy_to_cxcywh(boxes) 256 | boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) 257 | target["boxes"] = boxes 258 | return image, target 259 | 260 | 261 | class Compose(object): 262 | def __init__(self, transforms): 263 | self.transforms = transforms 264 | 265 | def __call__(self, image, target): 266 | for t in self.transforms: 267 | image, target = t(image, target) 268 | return image, target 269 | 270 | def __repr__(self): 271 | format_string = self.__class__.__name__ + "(" 272 | for t in self.transforms: 273 | format_string += "\n" 274 | format_string += " {0}".format(t) 275 | format_string += "\n)" 276 | return format_string 277 | -------------------------------------------------------------------------------- /models/detr/detr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @author: A.Akdogan 5 | """ 6 | 7 | import pytorch_lightning as pl 8 | import torch 9 | from transformers import DetrForObjectDetection 10 | 11 | 12 | class Detr(pl.LightningModule): 13 | 14 | def __init__(self, lr=0.0001, lr_backbone=1e-05, weight_decay=0.0001, num_labels=3, train_dataloader=None, 15 | val_dataloader=None): 16 | super().__init__() 17 | # replace COCO classification head with custom head 18 | self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", 19 | num_labels=num_labels, 20 | ignore_mismatched_sizes=True) 21 | 22 | self.lr = lr 23 | self.lr_backbone = lr_backbone 24 | self.weight_decay = weight_decay 25 | self.t_dataloader = train_dataloader 26 | self.v_dataloader = val_dataloader 27 | 28 | def forward(self, pixel_values, pixel_mask): 29 | 30 | outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask) 31 | 32 | return outputs 33 | 34 | def common_step(self, batch): 35 | 36 | pixel_values = batch["pixel_values"] 37 | pixel_mask = batch["pixel_mask"] 38 | labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]] 39 | 40 | outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels) 41 | 42 | loss = outputs.loss 43 | loss_dict = outputs.loss_dict 44 | 45 | return loss, loss_dict 46 | 47 | def training_step(self, batch, batch_idx): 48 | 49 | loss, loss_dict = self.common_step(batch) 50 | # logs metrics for each training_step, 51 | # and the average across the epoch 52 | self.log("training_loss", loss) 53 | for k, v in loss_dict.items(): 54 | self.log("train_" + k, v.item()) 55 | 56 | return loss 57 | 58 | def validation_step(self, batch, batch_idx): 59 | 60 | loss, loss_dict = self.common_step(batch) 61 | self.log("validation_loss", loss) 62 | for k, v in loss_dict.items(): 63 | self.log("validation_" + k, v.item()) 64 | 65 | return loss 66 | 67 | def configure_optimizers(self): 68 | 69 | param_dicts = [ 70 | {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]}, 71 | { 72 | "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad], 73 | "lr": self.lr_backbone, 74 | }, 75 | ] 76 | optimizer = torch.optim.AdamW(param_dicts, lr=self.lr, 77 | weight_decay=self.weight_decay) 78 | 79 | return optimizer 80 | 81 | def train_dataloader(self): 82 | 83 | return self.t_dataloader 84 | 85 | def val_dataloader(self): 86 | 87 | return self.v_dataloader 88 | -------------------------------------------------------------------------------- /models/detr/prediction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @author: A.Akdogan 5 | """ 6 | 7 | import os 8 | 9 | import matplotlib.pyplot as plt 10 | import torch 11 | 12 | COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125], 13 | [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]] 14 | 15 | 16 | def box_cxcywh_to_xyxy(x): 17 | x_c, y_c, w, h = x.unbind(1) 18 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 19 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 20 | return torch.stack(b, dim=1) 21 | 22 | 23 | def rescale_bboxes(out_bbox, size): 24 | img_w, img_h = size 25 | b = box_cxcywh_to_xyxy(out_bbox) 26 | b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32) 27 | return b 28 | 29 | def get_max_prob(prob): 30 | probability = 0 31 | for p in prob: 32 | if p[p.argmax()] > probability: 33 | probability = p[p.argmax()] 34 | return probability 35 | 36 | def plot_results(pil_img, prob, boxes, output_folder, image_name, classes): 37 | plt.figure(figsize=(16, 10)) 38 | plt.imshow(pil_img) 39 | ax = plt.gca() 40 | colors = COLORS * 100 41 | 42 | max_prob = get_max_prob(prob) 43 | 44 | 45 | for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors): 46 | if (p[p.argmax()] == max_prob): 47 | ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 48 | fill=False, color=c, linewidth=3)) 49 | cl = p.argmax() 50 | text = f'{classes[cl.item()]}: {p[cl]:0.2f}' 51 | ax.text(xmin, ymin, text, fontsize=15, 52 | bbox=dict(facecolor='yellow', alpha=0.5)) 53 | plt.axis('off') 54 | 55 | plt.savefig(os.path.join(output_folder, image_name)) 56 | 57 | print("Saved the image: " + image_name) 58 | 59 | # Close the figure to avoid the "More than 20 figures have been opened" warning 60 | plt.close() 61 | 62 | 63 | def visualize_predictions(image, outputs, output_folder, image_name, classes, threshold=0.1): 64 | # keep only predictions with confidence >= threshold 65 | probas = outputs.logits.softmax(-1)[0, :, :-1] 66 | keep = probas.max(-1).values > threshold 67 | 68 | # convert predicted boxes from [0; 1] to image scales 69 | bboxes_scaled = rescale_bboxes(outputs.pred_boxes[0, keep].cpu(), image.size) 70 | 71 | # plot results 72 | plot_results(image, probas[keep], bboxes_scaled, output_folder, image_name, classes) 73 | 74 | 75 | # AGGIUNTO MANUALMENTE 76 | def get_predictions(image, outputs, output_folder, image_name, classes, threshold=0.1): 77 | # keep only predictions with confidence >= threshold 78 | probas = outputs.logits.softmax(-1)[0, :, :-1] 79 | keep = probas.max(-1).values > threshold 80 | 81 | # convert predicted boxes from [0; 1] to image scales 82 | bboxes_scaled = rescale_bboxes(outputs.pred_boxes[0, keep].cpu(), image.size) 83 | 84 | # pleas note, bboxes are encoded as x_min, y_min, x_max, y_max 85 | return probas[keep], bboxes_scaled 86 | -------------------------------------------------------------------------------- /models/detr/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @author: A.Akdogan 5 | """ 6 | import os 7 | 8 | import torch 9 | from pytorch_lightning import Trainer 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import TensorBoardLogger 12 | from torch.utils.data import DataLoader 13 | from tqdm import tqdm 14 | from transformers import DetrFeatureExtractor 15 | 16 | from .coco_detection import CocoDetection 17 | from .datasets_helper import get_coco_api_from_dataset 18 | from .datasets_helper.coco_eval import CocoEvaluator 19 | from .detr import Detr 20 | 21 | import numpy as np 22 | 23 | 24 | class DetrTrainer: 25 | 26 | def __init__(self, **kwargs): 27 | self.__dict__.update(kwargs) 28 | 29 | self.early_stop = EarlyStopping( 30 | monitor='val_loss', 31 | patience=self.patience, 32 | strict=False, 33 | verbose=False, 34 | mode='min' 35 | ) 36 | 37 | self.feature_extractor = DetrFeatureExtractor.from_pretrained(self.feature_extractor) 38 | 39 | @staticmethod 40 | def get_final_path(sub_count, join_list): 41 | 42 | path = os.path.dirname(os.path.realpath(__file__)) 43 | for i in range(sub_count): path = os.path.dirname(os.path.normpath(path)) 44 | for i in range(len(join_list)): path = os.path.join(path, join_list[i]) 45 | 46 | return path 47 | 48 | @staticmethod 49 | def collate_fn(batch): 50 | 51 | feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") 52 | pixel_values = [item[0] for item in batch] 53 | encoding = feature_extractor.pad_and_create_pixel_mask(pixel_values, return_tensors="pt") 54 | labels = [item[1] for item in batch] 55 | batch = {'pixel_values': encoding['pixel_values'], 'pixel_mask': encoding['pixel_mask'], 'labels': labels} 56 | 57 | return batch 58 | 59 | def create_dataset(self): 60 | train_dataset = CocoDetection(self.image_path, self.train_json_annot_path, self.val_json_annot_path, 61 | feature_extractor=self.feature_extractor) 62 | val_dataset = CocoDetection(self.image_path, self.train_json_annot_path, self.val_json_annot_path, 63 | feature_extractor=self.feature_extractor, train=False) 64 | test_dataset = CocoDetection(self.image_path, self.train_json_annot_path, self.test_json_annot_path, 65 | feature_extractor=self.feature_extractor, train=False) 66 | 67 | return train_dataset, val_dataset, test_dataset 68 | 69 | def evaluation(self, val_dataset, val_dataloader, model): 70 | 71 | base_ds = get_coco_api_from_dataset(val_dataset) 72 | iou_types = ['bbox'] 73 | coco_evaluator = CocoEvaluator(base_ds, iou_types) # initialize evaluator with ground truths 74 | 75 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 76 | 77 | model.to(device) 78 | model.eval() 79 | 80 | print("Running evaluation...") 81 | 82 | for idx, batch in enumerate(tqdm(val_dataloader)): 83 | # get the inputs 84 | pixel_values = batch["pixel_values"].to(device) 85 | pixel_mask = batch["pixel_mask"].to(device) 86 | labels = [{k: v.to(device) for k, v in t.items()} for t in 87 | batch["labels"]] # these are in DETR format, resized + normalized 88 | 89 | # forward pass 90 | outputs = model.model(pixel_values=pixel_values, pixel_mask=pixel_mask) 91 | 92 | orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0) 93 | results = self.feature_extractor.post_process(outputs, 94 | orig_target_sizes) # convert outputs of model to COCO api 95 | res = {target['image_id'].item(): output for target, output in zip(labels, results)} 96 | coco_evaluator.update(res) 97 | 98 | coco_evaluator.synchronize_between_processes() 99 | coco_evaluator.accumulate() 100 | 101 | # Initialize an array to store the AP for each class 102 | ap_50_per_class = np.zeros(3) 103 | ap_50_95_per_class = np.zeros(3) 104 | 105 | # Calculate class-wise AP using coco_evaluator 106 | for iou_type in iou_types: 107 | coco_eval = coco_evaluator.coco_eval[iou_type] 108 | for class_idx in range(3): 109 | ap_50_per_class[class_idx] = coco_eval.eval['precision'][0, :, class_idx, 0, -1].mean() 110 | ap_50_95_per_class[class_idx] = coco_eval.eval['precision'][:, :, class_idx, 0, -1].mean() 111 | 112 | # Print the mAP for each class 113 | for class_idx, ap in enumerate(ap_50_per_class): 114 | print(f"mAP_50 for class {class_idx}: {ap:.4f}") 115 | 116 | for class_idx, ap in enumerate(ap_50_95_per_class): 117 | print(f"mAP_50:95 for class {class_idx}: {ap:.4f}") 118 | 119 | coco_evaluator.summarize() 120 | 121 | def data_loader(self, dataset, batch_size, shuffle=False): 122 | dataloader = DataLoader(dataset, collate_fn=DetrTrainer.collate_fn, batch_size=batch_size, shuffle=shuffle) 123 | return dataloader 124 | 125 | def build_model(self, train_dataloader, val_dataloader): 126 | model = Detr(lr=self.lr, lr_backbone=self.lr_backbone, weight_decay=self.weight_decay, 127 | num_labels=self.num_classes, train_dataloader=train_dataloader, val_dataloader=val_dataloader) 128 | return model 129 | 130 | def train(self, train_dataset, val_dataset): 131 | train_dataloader = self.data_loader(train_dataset, self.train_batch_size, shuffle=True) 132 | val_dataloader = self.data_loader(val_dataset, self.test_batch_size) 133 | 134 | model = Detr(lr=self.lr, lr_backbone=self.lr_backbone, weight_decay=self.weight_decay, 135 | num_labels=self.num_classes, train_dataloader=train_dataloader, val_dataloader=val_dataloader) 136 | 137 | # Set custom logger with desired output directory 138 | logs_path = self.output_path 139 | logger = TensorBoardLogger(save_dir=logs_path, name=self.logs_dir) 140 | 141 | #PATH = 'C:/Users/fuma2/Development/Github/detection_framework/outputs/detr/model.pth' 142 | #model = model.load_from_checkpoint(PATH,lr=self.lr, lr_backbone=self.lr_backbone, weight_decay=self.weight_decay, 143 | # num_labels=self.num_classes, train_dataloader=train_dataloader, val_dataloader=val_dataloader) 144 | 145 | trainer = Trainer(max_epochs=self.max_epochs, gradient_clip_val=self.gradient_clip_val, logger=logger, 146 | callbacks=[self.early_stop]) 147 | trainer.fit(model) 148 | 149 | # ----- 150 | self.evaluation(val_dataset, val_dataloader, model) 151 | 152 | return model, trainer 153 | 154 | def main(self): 155 | train_dataset, val_dataset, test_dataset = self.create_dataset() 156 | _, trainer = self.train(train_dataset, val_dataset) 157 | 158 | logs_dir = os.path.join(self.output_path, self.logs_dir) 159 | # find the last run's version number by looking at the subdirectories of logs_dir 160 | version_nums = [int(dir_name.split("_")[-1]) for dir_name in os.listdir(logs_dir) if 161 | dir_name.startswith("version_")] 162 | last_version_num = max(version_nums) if version_nums else 0 163 | 164 | version_dir = os.path.join(logs_dir, f"version_{last_version_num}") 165 | # specify the path where the model.pth file will be saved 166 | model_path = os.path.join(version_dir, self.model_path) 167 | 168 | trainer.save_checkpoint(model_path) 169 | 170 | return 171 | -------------------------------------------------------------------------------- /models/detr/util/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /models/detr/util/box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Utilities for bounding box manipulation and GIoU. 4 | """ 5 | import torch 6 | from torchvision.ops.boxes import box_area 7 | 8 | 9 | def box_cxcywh_to_xyxy(x): 10 | x_c, y_c, w, h = x.unbind(-1) 11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 12 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 13 | return torch.stack(b, dim=-1) 14 | 15 | 16 | def box_xyxy_to_cxcywh(x): 17 | x0, y0, x1, y1 = x.unbind(-1) 18 | b = [(x0 + x1) / 2, (y0 + y1) / 2, 19 | (x1 - x0), (y1 - y0)] 20 | return torch.stack(b, dim=-1) 21 | 22 | 23 | # modified from torchvision to also return the union 24 | def box_iou(boxes1, boxes2): 25 | area1 = box_area(boxes1) 26 | area2 = box_area(boxes2) 27 | 28 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 29 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 30 | 31 | wh = (rb - lt).clamp(min=0) # [N,M,2] 32 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 33 | 34 | union = area1[:, None] + area2 - inter 35 | 36 | iou = inter / union 37 | return iou, union 38 | 39 | 40 | def generalized_box_iou(boxes1, boxes2): 41 | """ 42 | Generalized IoU from https://giou.stanford.edu/ 43 | 44 | The boxes should be in [x0, y0, x1, y1] format 45 | 46 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 47 | and M = len(boxes2) 48 | """ 49 | # degenerate boxes gives inf / nan results 50 | # so do an early check 51 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 52 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 53 | iou, union = box_iou(boxes1, boxes2) 54 | 55 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 56 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 57 | 58 | wh = (rb - lt).clamp(min=0) # [N,M,2] 59 | area = wh[:, :, 0] * wh[:, :, 1] 60 | 61 | return iou - (area - union) / area 62 | 63 | 64 | def masks_to_boxes(masks): 65 | """Compute the bounding boxes around the provided masks 66 | 67 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 68 | 69 | Returns a [N, 4] tensors, with the boxes in xyxy format 70 | """ 71 | if masks.numel() == 0: 72 | return torch.zeros((0, 4), device=masks.device) 73 | 74 | h, w = masks.shape[-2:] 75 | 76 | y = torch.arange(0, h, dtype=torch.float) 77 | x = torch.arange(0, w, dtype=torch.float) 78 | y, x = torch.meshgrid(y, x) 79 | 80 | x_mask = (masks * x.unsqueeze(0)) 81 | x_max = x_mask.flatten(1).max(-1)[0] 82 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 83 | 84 | y_mask = (masks * y.unsqueeze(0)) 85 | y_max = y_mask.flatten(1).max(-1)[0] 86 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 87 | 88 | return torch.stack([x_min, y_min, x_max, y_max], 1) 89 | -------------------------------------------------------------------------------- /models/detr/util/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Misc functions, including distributed helpers. 4 | 5 | Mostly copy-paste from torchvision references. 6 | """ 7 | import os 8 | import subprocess 9 | import time 10 | from collections import defaultdict, deque 11 | import datetime 12 | import pickle 13 | from packaging import version 14 | from typing import Optional, List 15 | 16 | import torch 17 | import torch.distributed as dist 18 | from torch import Tensor 19 | 20 | # needed due to empty tensor bug in pytorch and torchvision 0.5 21 | import torchvision 22 | if version.parse(torchvision.__version__) < version.parse('0.7'): 23 | from torchvision.ops import _new_empty_tensor 24 | from torchvision.ops.misc import _output_size 25 | 26 | 27 | class SmoothedValue(object): 28 | """Track a series of values and provide access to smoothed values over a 29 | window or the global series average. 30 | """ 31 | 32 | def __init__(self, window_size=20, fmt=None): 33 | if fmt is None: 34 | fmt = "{median:.4f} ({global_avg:.4f})" 35 | self.deque = deque(maxlen=window_size) 36 | self.total = 0.0 37 | self.count = 0 38 | self.fmt = fmt 39 | 40 | def update(self, value, n=1): 41 | self.deque.append(value) 42 | self.count += n 43 | self.total += value * n 44 | 45 | def synchronize_between_processes(self): 46 | """ 47 | Warning: does not synchronize the deque! 48 | """ 49 | if not is_dist_avail_and_initialized(): 50 | return 51 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') 52 | dist.barrier() 53 | dist.all_reduce(t) 54 | t = t.tolist() 55 | self.count = int(t[0]) 56 | self.total = t[1] 57 | 58 | @property 59 | def median(self): 60 | d = torch.tensor(list(self.deque)) 61 | return d.median().item() 62 | 63 | @property 64 | def avg(self): 65 | d = torch.tensor(list(self.deque), dtype=torch.float32) 66 | return d.mean().item() 67 | 68 | @property 69 | def global_avg(self): 70 | return self.total / self.count 71 | 72 | @property 73 | def max(self): 74 | return max(self.deque) 75 | 76 | @property 77 | def value(self): 78 | return self.deque[-1] 79 | 80 | def __str__(self): 81 | return self.fmt.format( 82 | median=self.median, 83 | avg=self.avg, 84 | global_avg=self.global_avg, 85 | max=self.max, 86 | value=self.value) 87 | 88 | 89 | def all_gather(data): 90 | """ 91 | Run all_gather on arbitrary picklable data (not necessarily tensors) 92 | Args: 93 | data: any picklable object 94 | Returns: 95 | list[data]: list of data gathered from each rank 96 | """ 97 | world_size = get_world_size() 98 | if world_size == 1: 99 | return [data] 100 | 101 | # serialized to a Tensor 102 | buffer = pickle.dumps(data) 103 | storage = torch.ByteStorage.from_buffer(buffer) 104 | tensor = torch.ByteTensor(storage).to("cuda") 105 | 106 | # obtain Tensor size of each rank 107 | local_size = torch.tensor([tensor.numel()], device="cuda") 108 | size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] 109 | dist.all_gather(size_list, local_size) 110 | size_list = [int(size.item()) for size in size_list] 111 | max_size = max(size_list) 112 | 113 | # receiving Tensor from all ranks 114 | # we pad the tensor because torch all_gather does not support 115 | # gathering tensors of different shapes 116 | tensor_list = [] 117 | for _ in size_list: 118 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) 119 | if local_size != max_size: 120 | padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") 121 | tensor = torch.cat((tensor, padding), dim=0) 122 | dist.all_gather(tensor_list, tensor) 123 | 124 | data_list = [] 125 | for size, tensor in zip(size_list, tensor_list): 126 | buffer = tensor.cpu().numpy().tobytes()[:size] 127 | data_list.append(pickle.loads(buffer)) 128 | 129 | return data_list 130 | 131 | 132 | def reduce_dict(input_dict, average=True): 133 | """ 134 | Args: 135 | input_dict (dict): all the values will be reduced 136 | average (bool): whether to do average or sum 137 | Reduce the values in the dictionary from all processes so that all processes 138 | have the averaged results. Returns a dict with the same fields as 139 | input_dict, after reduction. 140 | """ 141 | world_size = get_world_size() 142 | if world_size < 2: 143 | return input_dict 144 | with torch.no_grad(): 145 | names = [] 146 | values = [] 147 | # sort the keys so that they are consistent across processes 148 | for k in sorted(input_dict.keys()): 149 | names.append(k) 150 | values.append(input_dict[k]) 151 | values = torch.stack(values, dim=0) 152 | dist.all_reduce(values) 153 | if average: 154 | values /= world_size 155 | reduced_dict = {k: v for k, v in zip(names, values)} 156 | return reduced_dict 157 | 158 | 159 | class MetricLogger(object): 160 | def __init__(self, delimiter="\t"): 161 | self.meters = defaultdict(SmoothedValue) 162 | self.delimiter = delimiter 163 | 164 | def update(self, **kwargs): 165 | for k, v in kwargs.items(): 166 | if isinstance(v, torch.Tensor): 167 | v = v.item() 168 | assert isinstance(v, (float, int)) 169 | self.meters[k].update(v) 170 | 171 | def __getattr__(self, attr): 172 | if attr in self.meters: 173 | return self.meters[attr] 174 | if attr in self.__dict__: 175 | return self.__dict__[attr] 176 | raise AttributeError("'{}' object has no attribute '{}'".format( 177 | type(self).__name__, attr)) 178 | 179 | def __str__(self): 180 | loss_str = [] 181 | for name, meter in self.meters.items(): 182 | loss_str.append( 183 | "{}: {}".format(name, str(meter)) 184 | ) 185 | return self.delimiter.join(loss_str) 186 | 187 | def synchronize_between_processes(self): 188 | for meter in self.meters.values(): 189 | meter.synchronize_between_processes() 190 | 191 | def add_meter(self, name, meter): 192 | self.meters[name] = meter 193 | 194 | def log_every(self, iterable, print_freq, header=None): 195 | i = 0 196 | if not header: 197 | header = '' 198 | start_time = time.time() 199 | end = time.time() 200 | iter_time = SmoothedValue(fmt='{avg:.4f}') 201 | data_time = SmoothedValue(fmt='{avg:.4f}') 202 | space_fmt = ':' + str(len(str(len(iterable)))) + 'd' 203 | if torch.cuda.is_available(): 204 | log_msg = self.delimiter.join([ 205 | header, 206 | '[{0' + space_fmt + '}/{1}]', 207 | 'eta: {eta}', 208 | '{meters}', 209 | 'time: {time}', 210 | 'data: {data}', 211 | 'max mem: {memory:.0f}' 212 | ]) 213 | else: 214 | log_msg = self.delimiter.join([ 215 | header, 216 | '[{0' + space_fmt + '}/{1}]', 217 | 'eta: {eta}', 218 | '{meters}', 219 | 'time: {time}', 220 | 'data: {data}' 221 | ]) 222 | MB = 1024.0 * 1024.0 223 | for obj in iterable: 224 | data_time.update(time.time() - end) 225 | yield obj 226 | iter_time.update(time.time() - end) 227 | if i % print_freq == 0 or i == len(iterable) - 1: 228 | eta_seconds = iter_time.global_avg * (len(iterable) - i) 229 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 230 | if torch.cuda.is_available(): 231 | print(log_msg.format( 232 | i, len(iterable), eta=eta_string, 233 | meters=str(self), 234 | time=str(iter_time), data=str(data_time), 235 | memory=torch.cuda.max_memory_allocated() / MB)) 236 | else: 237 | print(log_msg.format( 238 | i, len(iterable), eta=eta_string, 239 | meters=str(self), 240 | time=str(iter_time), data=str(data_time))) 241 | i += 1 242 | end = time.time() 243 | total_time = time.time() - start_time 244 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 245 | print('{} Total time: {} ({:.4f} s / it)'.format( 246 | header, total_time_str, total_time / len(iterable))) 247 | 248 | 249 | def get_sha(): 250 | cwd = os.path.dirname(os.path.abspath(__file__)) 251 | 252 | def _run(command): 253 | return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() 254 | sha = 'N/A' 255 | diff = "clean" 256 | branch = 'N/A' 257 | try: 258 | sha = _run(['git', 'rev-parse', 'HEAD']) 259 | subprocess.check_output(['git', 'diff'], cwd=cwd) 260 | diff = _run(['git', 'diff-index', 'HEAD']) 261 | diff = "has uncommited changes" if diff else "clean" 262 | branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) 263 | except Exception: 264 | pass 265 | message = f"sha: {sha}, status: {diff}, branch: {branch}" 266 | return message 267 | 268 | 269 | def collate_fn(batch): 270 | batch = list(zip(*batch)) 271 | batch[0] = nested_tensor_from_tensor_list(batch[0]) 272 | return tuple(batch) 273 | 274 | 275 | def _max_by_axis(the_list): 276 | # type: (List[List[int]]) -> List[int] 277 | maxes = the_list[0] 278 | for sublist in the_list[1:]: 279 | for index, item in enumerate(sublist): 280 | maxes[index] = max(maxes[index], item) 281 | return maxes 282 | 283 | 284 | class NestedTensor(object): 285 | def __init__(self, tensors, mask: Optional[Tensor]): 286 | self.tensors = tensors 287 | self.mask = mask 288 | 289 | def to(self, device): 290 | # type: (Device) -> NestedTensor # noqa 291 | cast_tensor = self.tensors.to(device) 292 | mask = self.mask 293 | if mask is not None: 294 | assert mask is not None 295 | cast_mask = mask.to(device) 296 | else: 297 | cast_mask = None 298 | return NestedTensor(cast_tensor, cast_mask) 299 | 300 | def decompose(self): 301 | return self.tensors, self.mask 302 | 303 | def __repr__(self): 304 | return str(self.tensors) 305 | 306 | 307 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): 308 | # TODO make this more general 309 | if tensor_list[0].ndim == 3: 310 | if torchvision._is_tracing(): 311 | # nested_tensor_from_tensor_list() does not export well to ONNX 312 | # call _onnx_nested_tensor_from_tensor_list() instead 313 | return _onnx_nested_tensor_from_tensor_list(tensor_list) 314 | 315 | # TODO make it support different-sized images 316 | max_size = _max_by_axis([list(img.shape) for img in tensor_list]) 317 | # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) 318 | batch_shape = [len(tensor_list)] + max_size 319 | b, c, h, w = batch_shape 320 | dtype = tensor_list[0].dtype 321 | device = tensor_list[0].device 322 | tensor = torch.zeros(batch_shape, dtype=dtype, device=device) 323 | mask = torch.ones((b, h, w), dtype=torch.bool, device=device) 324 | for img, pad_img, m in zip(tensor_list, tensor, mask): 325 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 326 | m[: img.shape[1], :img.shape[2]] = False 327 | else: 328 | raise ValueError('not supported') 329 | return NestedTensor(tensor, mask) 330 | 331 | 332 | # _onnx_nested_tensor_from_tensor_list() is an implementation of 333 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing. 334 | @torch.jit.unused 335 | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: 336 | max_size = [] 337 | for i in range(tensor_list[0].dim()): 338 | max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64) 339 | max_size.append(max_size_i) 340 | max_size = tuple(max_size) 341 | 342 | # work around for 343 | # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 344 | # m[: img.shape[1], :img.shape[2]] = False 345 | # which is not yet supported in onnx 346 | padded_imgs = [] 347 | padded_masks = [] 348 | for img in tensor_list: 349 | padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] 350 | padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) 351 | padded_imgs.append(padded_img) 352 | 353 | m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) 354 | padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) 355 | padded_masks.append(padded_mask.to(torch.bool)) 356 | 357 | tensor = torch.stack(padded_imgs) 358 | mask = torch.stack(padded_masks) 359 | 360 | return NestedTensor(tensor, mask=mask) 361 | 362 | 363 | def setup_for_distributed(is_master): 364 | """ 365 | This function disables printing when not in master process 366 | """ 367 | import builtins as __builtin__ 368 | builtin_print = __builtin__.print 369 | 370 | def print(*args, **kwargs): 371 | force = kwargs.pop('force', False) 372 | if is_master or force: 373 | builtin_print(*args, **kwargs) 374 | 375 | __builtin__.print = print 376 | 377 | 378 | def is_dist_avail_and_initialized(): 379 | if not dist.is_available(): 380 | return False 381 | if not dist.is_initialized(): 382 | return False 383 | return True 384 | 385 | 386 | def get_world_size(): 387 | if not is_dist_avail_and_initialized(): 388 | return 1 389 | return dist.get_world_size() 390 | 391 | 392 | def get_rank(): 393 | if not is_dist_avail_and_initialized(): 394 | return 0 395 | return dist.get_rank() 396 | 397 | 398 | def is_main_process(): 399 | return get_rank() == 0 400 | 401 | 402 | def save_on_master(*args, **kwargs): 403 | if is_main_process(): 404 | torch.save(*args, **kwargs) 405 | 406 | 407 | def init_distributed_mode(args): 408 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 409 | args.rank = int(os.environ["RANK"]) 410 | args.world_size = int(os.environ['WORLD_SIZE']) 411 | args.gpu = int(os.environ['LOCAL_RANK']) 412 | elif 'SLURM_PROCID' in os.environ: 413 | args.rank = int(os.environ['SLURM_PROCID']) 414 | args.gpu = args.rank % torch.cuda.device_count() 415 | else: 416 | print('Not using distributed mode') 417 | args.distributed = False 418 | return 419 | 420 | args.distributed = True 421 | 422 | torch.cuda.set_device(args.gpu) 423 | args.dist_backend = 'nccl' 424 | print('| distributed init (rank {}): {}'.format( 425 | args.rank, args.dist_url), flush=True) 426 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 427 | world_size=args.world_size, rank=args.rank) 428 | torch.distributed.barrier() 429 | setup_for_distributed(args.rank == 0) 430 | 431 | 432 | @torch.no_grad() 433 | def accuracy(output, target, topk=(1,)): 434 | """Computes the precision@k for the specified values of k""" 435 | if target.numel() == 0: 436 | return [torch.zeros([], device=output.device)] 437 | maxk = max(topk) 438 | batch_size = target.size(0) 439 | 440 | _, pred = output.topk(maxk, 1, True, True) 441 | pred = pred.t() 442 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 443 | 444 | res = [] 445 | for k in topk: 446 | correct_k = correct[:k].view(-1).float().sum(0) 447 | res.append(correct_k.mul_(100.0 / batch_size)) 448 | return res 449 | 450 | 451 | def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): 452 | # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor 453 | """ 454 | Equivalent to nn.functional.interpolate, but with support for empty batch sizes. 455 | This will eventually be supported natively by PyTorch, and this 456 | class can go away. 457 | """ 458 | if version.parse(torchvision.__version__) < version.parse('0.7'): 459 | if input.numel() > 0: 460 | return torch.nn.functional.interpolate( 461 | input, size, scale_factor, mode, align_corners 462 | ) 463 | 464 | output_shape = _output_size(2, input, size, scale_factor) 465 | output_shape = list(input.shape[:-2]) + list(output_shape) 466 | return _new_empty_tensor(input, output_shape) 467 | else: 468 | return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) 469 | -------------------------------------------------------------------------------- /models/detr/util/plot_results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorboard.backend.event_processing.event_accumulator import EventAccumulator 3 | 4 | import matplotlib as mpl 5 | import matplotlib.pyplot as plt 6 | 7 | def plot_tensorflow_log(path): 8 | 9 | # Loading too much data is slow... 10 | tf_size_guidance = { 11 | 'compressedHistograms': 10, 12 | 'images': 0, 13 | 'scalars': 100, 14 | 'histograms': 1 15 | } 16 | 17 | event_acc = EventAccumulator(path, tf_size_guidance) 18 | event_acc.Reload() 19 | 20 | # Show all tags in the log file 21 | print(event_acc.Tags()) 22 | 23 | training_accuracies = event_acc.Scalars('training_loss') 24 | validation_accuracies = event_acc.Scalars('validation_loss') 25 | 26 | steps = 14949 27 | x = np.arange(steps) 28 | y = np.zeros([steps, 2]) 29 | 30 | for i in range(steps): 31 | y[i, 0] = training_accuracies[i][2] # value 32 | y[i, 1] = validation_accuracies[i][2] 33 | 34 | plt.plot(x, y[:,0], label='training accuracy') 35 | plt.plot(x, y[:,1], label='validation accuracy') 36 | 37 | plt.xlabel("Steps") 38 | plt.ylabel("Accuracy") 39 | plt.title("Training Progress") 40 | plt.legend(loc='upper right', frameon=True) 41 | plt.show() 42 | 43 | log_file="C:/Users/fuma2/Development/Github/detection_framework/outputs/detr/training_logs/version_0/events.out.tfevents.1681892850.MSI.19112.0" 44 | plot_tensorflow_log(log_file) -------------------------------------------------------------------------------- /models/ensemble/detector.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import os 4 | import cv2 5 | import torch 6 | from ultralytics import YOLO 7 | 8 | from transformers import DetrFeatureExtractor 9 | 10 | from models.detr.detr import Detr 11 | from scripts.py.prepare_config import prepare_config 12 | from models.detr.prediction import visualize_predictions, get_predictions 13 | 14 | import numpy as np 15 | from torchvision.io import read_image 16 | 17 | 18 | 19 | coeff = { 20 | 'yolo': [.457, .314, .49], 21 | 'detr': [.631, 303, .459], 22 | 'fasterRCNN': [.722, .279, .392] 23 | } 24 | 25 | 26 | class EnsembledDetector: 27 | 28 | def predict(self, predictions): 29 | for firsrt_prediction in predictions: 30 | for second_prediction in predictions: 31 | if firsrt_prediction['model'] != second_prediction['model']: 32 | break 33 | 34 | 35 | def predict(self, predictions, threshold=.5): 36 | 37 | label_ens, bbox_ens = [],[] 38 | 39 | for i in range(len(predictions)): 40 | pred_first_model = predictions[i] 41 | first_model = predictions[i]['model'] 42 | 43 | for j in range(len(predictions)): 44 | pred_second_model = predictions[j] 45 | second_model = predictions[j]['model'] 46 | 47 | if i != j: 48 | labels1, bboxes1 = pred_first_model['labels'], pred_first_model['bboxes'] 49 | labels2, bboxes2 = pred_second_model['labels'], pred_second_model['bboxes'] 50 | for label1,bbox1 in zip(labels1,bboxes1): 51 | for label2,bbox2 in zip(labels2,bboxes2): 52 | predicted_labels = { 53 | first_model:label1, 54 | second_model:label2 55 | } 56 | iou = self.compute_iou(bbox1, bbox2) 57 | check = False 58 | if iou > threshold: 59 | check = True 60 | for k in range(len(predictions)): 61 | pred_third_model = predictions[k] 62 | third_model = predictions[k]['model'] 63 | 64 | if i != k and j != k: 65 | bboxes3 = pred_third_model['bboxes'] 66 | labels3, bboxes3 = pred_third_model['labels'], pred_third_model['bboxes'] 67 | for label3,bbox3 in zip(labels3,bboxes3): 68 | iou = self.compute_iou(bbox1, bbox3) 69 | 70 | if iou > threshold: 71 | x = max(bbox1[0],bbox2[0],bbox3[0]) 72 | y = max(bbox1[1],bbox2[1],bbox3[1]) 73 | w = min(bbox1[2],bbox2[2],bbox3[2]) 74 | h = min(bbox1[3],bbox2[3],bbox3[3]) 75 | #l = self.average_weighted_voting([label1, label2, label3]) 76 | predicted_labels[third_model] = label3 77 | lbl = self.average_weighted_voting(predicted_labels) 78 | label_ens.append(lbl) 79 | bbox_ens.append([x,y,w,h]) 80 | check = False 81 | #break 82 | if check: 83 | x = max(bbox1[0],bbox2[0]) 84 | y = max(bbox1[1],bbox2[1]) 85 | w = min(bbox1[2],bbox2[2]) 86 | h = min(bbox1[3],bbox2[3]) 87 | #l = self.average_weighted_voting([label1, label2]) 88 | lbl = self.average_weighted_voting(predicted_labels) 89 | label_ens.append(lbl) 90 | bbox_ens.append([x,y,w,h]) 91 | 92 | bbox_ens,label_ens = self.merge_bboxes(bbox_ens, label_ens) 93 | return label_ens, bbox_ens 94 | 95 | def average_weighted_voting(self,predicted_labels): 96 | """ 97 | Computes the predicted label based on the average voting 98 | 99 | Arguments: 100 | predicted_labels -- Dict {'yolo': lbl1, 'fasterRCNN': lbl2, 'detr': lbl3}. 101 | 102 | Returns: 103 | predicted_label -- the label predicted by the ensemble model 104 | """ 105 | predicted_label = -1 106 | preds = [0,0,0] 107 | for key in predicted_labels: 108 | preds[int(predicted_labels[key])] = coeff[key][int(predicted_labels[key])] 109 | predicted_label = torch.argmax(torch.Tensor(preds)) 110 | return predicted_label 111 | 112 | 113 | def compute_iou(self,bbox1, bbox2): 114 | """ 115 | Computes the Intersection over Union (IoU) metric between two bounding boxes. 116 | 117 | Arguments: 118 | bbox1 -- Tuple (x, y, w, h) representing the first bounding box. 119 | bbox2 -- Tuple (x, y, w, h) representing the second bounding box. 120 | 121 | Returns: 122 | iou -- The Intersection over Union (IoU) metric. 123 | """ 124 | x1, y1, w1, h1 = bbox1[0],bbox1[1],bbox1[2],bbox1[3] 125 | x2, y2, w2, h2 = bbox2[0],bbox2[1],bbox2[2],bbox2[3] 126 | # Calculate the coordinates of the intersection rectangle 127 | x_intersection = max(x1, x2) 128 | y_intersection = max(y1, y2) 129 | w_intersection = min(x1 + w1, x2 + w2) - x_intersection 130 | h_intersection = min(y1 + h1, y2 + h2) - y_intersection 131 | 132 | # If the intersection is non-existent (negative width or height), return IoU = 0 133 | if w_intersection <= 0 or h_intersection <= 0: 134 | return 0.0 135 | 136 | # Calculate the areas of the bounding boxes 137 | area_bbox1 = w1 * h1 138 | area_bbox2 = w2 * h2 139 | # Calculate the area of the intersection and union 140 | area_intersection = w_intersection * h_intersection 141 | area_union = area_bbox1 + area_bbox2 - area_intersection 142 | 143 | iou = area_intersection / area_union 144 | return iou 145 | 146 | 147 | def merge_bboxes(self, bboxes_list, votes_list): 148 | ''' function to generate the final (predicted) bboxes from all those detected''' 149 | 150 | print(len(bboxes_list), len(votes_list)) 151 | merged_bboxes, merged_votes = [],[] 152 | for bbox, vote in zip(bboxes_list,votes_list): 153 | if len(merged_bboxes) == 0: 154 | merged_bboxes.append([bbox, 1]) 155 | merged_votes.append([0,0,0]) 156 | merged_votes[0][vote] += 1 157 | 158 | iou_check = True 159 | for i in range(len(merged_bboxes)): 160 | b1 = [torch.tensor(item, dtype=torch.float) for item in merged_bboxes[i][0]] 161 | b2 = [torch.tensor(item.clone(), dtype=torch.float) for item in bbox] 162 | iou = self.compute_iou(b1, b2) 163 | if iou > 0.5: 164 | iou_check = False 165 | box1 = merged_bboxes[i][0] 166 | box1 = torch.tensor([box1], dtype=torch.float) 167 | box2 = bbox 168 | box2 = torch.tensor([box2], dtype=torch.float) 169 | merged_bboxes[i][0] = ((box1*torch.tensor([merged_bboxes[i][1]], dtype=torch.float) + box2) / (merged_bboxes[i][1]+1) ).tolist()[0] 170 | merged_bboxes[i][1] += 1 171 | merged_votes[i][vote] += 1 172 | if iou_check: 173 | merged_bboxes.append([bbox, 1]) 174 | merged_votes.append([0,0,0]) 175 | merged_votes[-1][vote] += 1 176 | 177 | merged_bboxes = [bbox[0] for bbox in merged_bboxes] 178 | merged_votes = [vote.index(max(vote)) for vote in merged_votes] 179 | 180 | return merged_bboxes, merged_votes 181 | -------------------------------------------------------------------------------- /models/fastercnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/models/fastercnn/__init__.py -------------------------------------------------------------------------------- /models/fastercnn/plot_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import matplotlib.pyplot as plt 3 | 4 | # Read the JSON file 5 | with open('C:/Users/fuma2/Development/Github/detection_framework/outputs/fastercnn/model_outputs/metrics.json', 'r') as f: 6 | data = json.load(f) 7 | 8 | # Extract the loss values 9 | train_losses = [] 10 | val_losses = [] 11 | for d in data: 12 | if 'total_loss' in d: 13 | train_losses.append(d['total_loss']) 14 | if 'val_total_loss' in d: 15 | val_losses.append(d['val_total_loss']) 16 | 17 | # Create a plot of the training and validation loss over time 18 | plt.plot(val_losses, label='Validation loss') 19 | #plt.plot(train_losses, label='Training loss', color="orangered") 20 | plt.xlabel('Iterations') 21 | plt.ylabel('Loss') 22 | plt.legend() 23 | plt.show() -------------------------------------------------------------------------------- /models/fastercnn/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from detectron2.data import build_detection_test_loader, DatasetMapper 3 | from detectron2.engine import DefaultTrainer 4 | from detectron2.evaluation import COCOEvaluator, inference_on_dataset 5 | from detectron2.utils import comm 6 | 7 | 8 | class FastercnnTrainer(DefaultTrainer): 9 | 10 | def __init__(self, cfg, early_stop_patience=5): 11 | super().__init__(cfg) 12 | self.early_stop_patience = early_stop_patience 13 | self.min_loss = float('inf') 14 | self.patience_counter = 0 15 | 16 | def after_step(self): 17 | super().after_step() 18 | 19 | # Early stopping condition 20 | if (self.storage.iter + 1) % self.cfg.TEST.EVAL_PERIOD == 0: 21 | val_loader = iter(build_detection_test_loader(self.cfg, self.cfg.DATASETS.TEST, 22 | mapper=DatasetMapper(self.cfg, is_train=True))) 23 | val_loss = self.compute_validation_loss(val_loader) 24 | 25 | print(f"\033[32mValidation Loss: {val_loss}\033[0m") 26 | 27 | if val_loss < self.min_loss: 28 | self.min_loss = val_loss 29 | self.patience_counter = 0 30 | self.checkpointer.save("best_model") 31 | else: 32 | self.patience_counter += 1 33 | 34 | if self.patience_counter >= self.early_stop_patience: 35 | # Evaluate the model on the test dataset and print the results 36 | evaluator = COCOEvaluator(self.cfg.DATASETS.TEST[0], self.cfg, False, output_dir=self.cfg.OUTPUT_DIR) 37 | val_loader = build_detection_test_loader(self.cfg, self.cfg.DATASETS.TEST[0]) 38 | inference_on_dataset(self.model, val_loader, evaluator) 39 | raise RuntimeError('Early stopping triggered') 40 | 41 | def compute_validation_loss(self, val_loader): 42 | total_loss = 0.0 43 | num_batches = len(val_loader) # Calculate the number of batches in the validation loader 44 | # Iterate through the batches in the validation loader 45 | for i, data in enumerate(val_loader): 46 | with torch.no_grad(): 47 | loss_dict = self.model(data) # Pass the data through the model and compute the loss dictionary 48 | losses = sum( 49 | loss_dict.values()) # Sum the losses in the loss dictionary to get the total loss for the current batch 50 | assert torch.isfinite( 51 | losses).all(), loss_dict # Check if the computed loss values are finite and raise an exception with the loss dictionary if not 52 | total_loss += losses.item() # Add the total loss of the current batch to the total loss across all batches 53 | 54 | # If the current process is the main process, log individual losses for the last batch 55 | if comm.is_main_process(): 56 | if i == num_batches - 1: 57 | # Create a dictionary with the reduced individual losses and prefix keys with "val_" 58 | loss_dict_reduced = {"val_" + k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} 59 | # Log the individual losses using the storage object 60 | self.storage.put_scalars(**loss_dict_reduced) 61 | 62 | # If the current process is the main process, log the total loss across all batches 63 | if comm.is_main_process(): 64 | self.storage.put_scalar("val_total_loss", total_loss) 65 | 66 | return total_loss 67 | -------------------------------------------------------------------------------- /models/yolo/yolov8n.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/models/yolo/yolov8n.pt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | aiofiles==23.1.0 3 | aiohttp==3.8.4 4 | aiosignal==1.3.1 5 | antlr4-python3-runtime==4.9.3 6 | anyio==3.6.2 7 | argcomplete==3.0.5 8 | array-record==0.2.0 9 | astunparse==1.6.3 10 | async-timeout==4.0.2 11 | attrs==23.1.0 12 | black==23.3.0 13 | boto3==1.26.102 14 | botocore==1.29.102 15 | cachetools==5.3.0 16 | certifi==2022.12.7 17 | charset-normalizer==3.1.0 18 | click==8.1.3 19 | cloudpickle==2.2.1 20 | colorama==0.4.6 21 | contourpy==1.0.7 22 | cycler==0.11.0 23 | dacite==1.7.0 24 | Deprecated==1.2.13 25 | detectron2 @ git+https://github.com/facebookresearch/detectron2.git@af614bf8282ff14e159c45f37399e87b91a1ec41 26 | detr==0.1.4 27 | dill==0.3.6 28 | dm-tree==0.1.8 29 | dnspython==2.3.0 30 | docopt==0.6.2 31 | etils==1.3.0 32 | eventlet==0.33.3 33 | fiftyone==0.20.0 34 | fiftyone-brain==0.11.0 35 | fiftyone-db==0.4.0 36 | filelock==3.10.7 37 | flatbuffers==23.3.3 38 | fonttools==4.39.2 39 | frozenlist==1.3.3 40 | fsspec==2023.4.0 41 | ftfy==6.1.1 42 | future==0.18.3 43 | fvcore==0.1.5.post20221221 44 | gast==0.4.0 45 | glob2==0.7 46 | google-auth==2.17.0 47 | google-auth-oauthlib==0.4.6 48 | google-pasta==0.2.0 49 | googleapis-common-protos==1.59.0 50 | graphql-core==3.2.3 51 | greenlet==2.0.2 52 | grpcio==1.53.0 53 | h11==0.14.0 54 | h2==4.1.0 55 | h5py==3.8.0 56 | hpack==4.0.0 57 | httpcore==0.16.3 58 | httpx==0.23.3 59 | huggingface-hub==0.13.4 60 | hydra-core==1.3.2 61 | hypercorn==0.14.3 62 | hyperframe==6.0.1 63 | idna==3.4 64 | imageio==2.27.0 65 | imgaug==0.4.0 66 | importlib-resources==5.12.0 67 | iopath==0.1.9 68 | jax==0.4.8 69 | Jinja2==3.1.2 70 | jmespath==1.0.1 71 | joblib==1.2.0 72 | kaleido==0.2.1 73 | keras==2.12.0 74 | keras-cv==0.5.0 75 | kiwisolver==1.4.4 76 | lazy_loader==0.2 77 | libclang==16.0.0 78 | lightning-utilities==0.8.0 79 | mapcalc==0.2.2 80 | Markdown==3.4.3 81 | MarkupSafe==2.1.2 82 | matplotlib==3.7.1 83 | ml-dtypes==0.1.0 84 | mongoengine==0.24.2 85 | motor==3.1.1 86 | mpmath==1.3.0 87 | multidict==6.0.4 88 | mypy-extensions==1.0.0 89 | ndjson==0.3.1 90 | networkx==3.0 91 | numpy==1.23.5 92 | oauthlib==3.2.2 93 | omegaconf==2.3.0 94 | opencv-python==4.7.0.72 95 | opencv-python-headless==4.7.0.72 96 | opt-einsum==3.3.0 97 | packaging==23.0 98 | pandas==1.5.3 99 | pathspec==0.11.1 100 | patool==1.12 101 | Pillow==9.4.0 102 | pipreqs==0.4.11 103 | platformdirs==3.2.0 104 | plotly==5.14.0 105 | portalocker==2.7.0 106 | pprintpp==0.4.0 107 | priority==2.0.0 108 | promise==2.3 109 | protobuf==4.22.1 110 | psutil==5.9.4 111 | pyasn1==0.4.8 112 | pyasn1-modules==0.2.8 113 | pycocotools==2.0.6 114 | pymongo==4.3.3 115 | pyparsing==3.0.9 116 | python-dateutil==2.8.2 117 | pytorch-lightning==2.0.1.post0 118 | pytz==2023.3 119 | pytz-deprecation-shim==0.1.0.post0 120 | PyWavelets==1.4.1 121 | pywin32==306 122 | PyYAML==6.0 123 | regex==2023.3.23 124 | requests==2.28.2 125 | requests-oauthlib==1.3.1 126 | retrying==1.3.4 127 | rfc3986==1.5.0 128 | rsa==4.9 129 | s3transfer==0.6.0 130 | scikit-image==0.20.0 131 | scikit-learn==1.2.2 132 | scipy==1.10.1 133 | seaborn==0.12.2 134 | sentry-sdk==1.18.0 135 | shapely==2.0.1 136 | six==1.16.0 137 | sniffio==1.3.0 138 | sortedcontainers==2.4.0 139 | sse-starlette==0.10.3 140 | sseclient-py==1.7.2 141 | starlette==0.20.4 142 | strawberry-graphql==0.138.1 143 | sympy==1.11.1 144 | tabulate==0.9.0 145 | tenacity==8.2.2 146 | tensorboard==2.12.0 147 | tensorboard-data-server==0.7.0 148 | tensorboard-plugin-wit==1.8.1 149 | tensorflow==2.12.0 150 | tensorflow-datasets==4.9.2 151 | tensorflow-estimator==2.12.0 152 | tensorflow-intel==2.12.0 153 | tensorflow-io-gcs-filesystem==0.31.0 154 | tensorflow-metadata==1.13.1 155 | termcolor==2.2.0 156 | thop==0.1.1.post2209072238 157 | threadpoolctl==3.1.0 158 | tifffile==2023.3.21 159 | timm==0.6.13 160 | tokenizers==0.13.3 161 | toml==0.10.2 162 | tomli==2.0.1 163 | torch==2.0.0+cu117 164 | torchaudio==2.0.1+cu117 165 | torchmetrics==0.11.4 166 | torchvision==0.15.1+cu117 167 | tqdm==4.65.0 168 | transformers==4.28.1 169 | typing_extensions==4.5.0 170 | tzdata==2023.3 171 | tzlocal==4.3 172 | ultralytics==8.0.104 173 | universal-analytics-python3==1.1.1 174 | urllib3==1.26.15 175 | voxel51-eta==0.8.4 176 | wcwidth==0.2.6 177 | Werkzeug==2.2.3 178 | wrapt==1.14.1 179 | wsproto==1.2.0 180 | xmltodict==0.13.0 181 | yacs==0.1.8 182 | yarg==0.1.9 183 | yarl==1.8.2 184 | zipp==3.15.0 185 | -------------------------------------------------------------------------------- /scripts/py/__pycache__/prepare_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/scripts/py/__pycache__/prepare_config.cpython-310.pyc -------------------------------------------------------------------------------- /scripts/py/data_converter/coco_to_yolo.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import hydra 5 | 6 | 7 | def parse_coco_json(coco_json_file): 8 | with open(coco_json_file, "r") as f: 9 | data = json.load(f) 10 | return data 11 | 12 | 13 | def create_class_dict(data): 14 | class_dict = {} 15 | for category in data["categories"]: 16 | class_id = category["id"] 17 | class_name = category["name"] 18 | class_dict[class_id] = class_name 19 | return class_dict 20 | 21 | 22 | def convert_bbox_format(bbox, width, height): 23 | x, y, w, h = bbox 24 | x_center = x + (w / 2) 25 | y_center = y + (h / 2) 26 | return [x_center / width, y_center / height, w / width, h / height] 27 | 28 | 29 | def save_class_names(class_dict, class_file): 30 | with open(class_file, "w") as f: 31 | for class_id in sorted(class_dict): 32 | f.write(f"{class_dict[class_id]}\n") 33 | 34 | 35 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 36 | def coco_to_yolo(cfg): 37 | coco_json_file = os.path.join(cfg.datasets.path, 'coco', cfg.datasets.dataset_type) + ".json" 38 | label_folder = os.path.join(cfg.datasets.path, 'yolo', cfg.datasets.dataset_type, 'labels') 39 | images_folder = os.path.join(cfg.datasets.path, 'yolo', cfg.datasets.dataset_type, 'images') 40 | class_file = os.path.join(cfg.datasets.path, 'yolo', cfg.datasets.class_file) 41 | 42 | if not os.path.exists(label_folder): 43 | os.makedirs(label_folder) 44 | 45 | if not os.path.exists(images_folder): 46 | os.makedirs(images_folder) 47 | 48 | data = parse_coco_json(coco_json_file) 49 | class_dict = create_class_dict(data) 50 | 51 | for annotation in data["annotations"]: 52 | image_id = annotation["image_id"] 53 | class_id = annotation["category_id"] 54 | bbox = annotation["bbox"] 55 | 56 | image_info = [x for x in data["images"] if x["id"] == image_id][0] 57 | width, height = image_info["width"], image_info["height"] 58 | image_name = image_info["file_name"].rsplit(".", 1)[0] 59 | 60 | yolo_bbox = convert_bbox_format(bbox, width, height) 61 | 62 | label_file = os.path.join(label_folder, f"{image_name}.txt") 63 | 64 | with open(label_file, "a") as f: 65 | f.write(f"{class_id} {' '.join(map(str, yolo_bbox))}\n") 66 | 67 | save_class_names(class_dict, class_file) 68 | 69 | # Copy test images to coco/aug_images folder 70 | for image in data['images']: 71 | image_path = os.path.join(cfg.datasets.path, 'coco', 'aug_images', image['file_name']) 72 | if os.path.exists(image_path): 73 | shutil.copy(image_path, images_folder) 74 | 75 | print(f"{cfg.datasets.dataset_type} -> Number of images moved: {len(data['images'])}") 76 | 77 | 78 | if __name__ == '__main__': 79 | coco_to_yolo() 80 | -------------------------------------------------------------------------------- /scripts/py/data_converter/yolo_to_coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarcoParola/detection_framework/ba8be1f36d700d0fb6a8140dab9a7b29243d9eef/scripts/py/data_converter/yolo_to_coco.py -------------------------------------------------------------------------------- /scripts/py/dataset_preparation/run_augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import hydra 4 | import os 5 | import imgaug.augmenters as iaa 6 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage 7 | 8 | 9 | def augment_image_and_annotation(image, annotation): 10 | # Define the augmentation pipeline 11 | seq = iaa.Sequential([ 12 | iaa.Multiply((0.95, 1.05)), # Adjust brightness (95-105% of original) 13 | iaa.LinearContrast((0.95, 1.05)), # Adjust contrast (95-105% of original) 14 | iaa.AddToHueAndSaturation((-10, 10)), # Adjust hue and saturation (-10 to 10) 15 | iaa.Fliplr(0.5), # Horizontally flip 50% of images 16 | iaa.Affine( 17 | translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, 18 | rotate=(-10, 10), 19 | scale={"x": (0.9, 1.1), "y": (0.9, 1.1)}, 20 | mode="edge" 21 | ) 22 | ]) 23 | 24 | # Convert COCO format bounding boxes to imgaug format 25 | bbs = BoundingBoxesOnImage([ 26 | BoundingBox(x1=bb["bbox"][0], y1=bb["bbox"][1], x2=bb["bbox"][0] + bb["bbox"][2], 27 | y2=bb["bbox"][1] + bb["bbox"][3]) 28 | for bb in annotation["annotations"] 29 | ], shape=image.shape) 30 | 31 | # Apply augmentation 32 | image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) 33 | 34 | return image_aug, bbs_aug 35 | 36 | 37 | def perform_augmentation(coco_data, images_input_path, images_output_path, initial_image_id, initial_annotation_id): 38 | new_images = [] 39 | new_annotations = [] 40 | initial_image_id = initial_image_id 41 | initial_annotation_id = initial_annotation_id 42 | 43 | for img_info in coco_data["images"]: 44 | img_path = os.path.join(images_input_path, img_info["file_name"]) 45 | image = cv2.imread(img_path) 46 | 47 | img_annotations = { 48 | "annotations": [ann for ann in coco_data["annotations"] if ann["image_id"] == img_info["id"]], 49 | "image_id": img_info["id"] 50 | } 51 | 52 | for i in range(5): 53 | image_aug, bbs_aug = augment_image_and_annotation(image, img_annotations) 54 | 55 | new_images.append({ 56 | "id": len(new_images) + initial_image_id + 1, 57 | "width": image_aug.shape[1], 58 | "height": image_aug.shape[0], 59 | "file_name": f"aug_{i}_{img_info['file_name']}" 60 | }) 61 | 62 | # Convert imgaug bounding boxes back to COCO format 63 | annotations_aug = [] 64 | for bb_idx, bb in enumerate(bbs_aug.bounding_boxes): 65 | x1_clipped = max(0, bb.x1) 66 | y1_clipped = max(0, bb.y1) 67 | x2_clipped = min(image_aug.shape[1], bb.x2) 68 | y2_clipped = min(image_aug.shape[0], bb.y2) 69 | width_clipped = x2_clipped - x1_clipped 70 | height_clipped = y2_clipped - y1_clipped 71 | 72 | if width_clipped > 0 and height_clipped > 0: 73 | annotations_aug.append({ 74 | "id": len(new_annotations) + initial_annotation_id + 1 + bb_idx, 75 | "image_id": len(new_images) + initial_image_id, 76 | "category_id": img_annotations["annotations"][bb_idx]["category_id"], 77 | "area": int(width_clipped * height_clipped), 78 | "bbox": [round(float(x1_clipped), 1), round(float(y1_clipped), 1), 79 | round(float(width_clipped), 1), round(float(height_clipped), 1)], 80 | "iscrowd": img_annotations["annotations"][bb_idx]["iscrowd"], 81 | "isbbox": img_annotations["annotations"][bb_idx]["isbbox"], 82 | "color": img_annotations["annotations"][bb_idx]["color"] 83 | }) 84 | 85 | new_annotations.extend(annotations_aug) 86 | 87 | # Save augmented image 88 | cv2.imwrite(os.path.join(images_output_path, f"aug_{i}_{img_info['file_name']}"), image_aug) 89 | 90 | return new_images, new_annotations 91 | 92 | 93 | def save_augmented_annotations(coco_data, new_images, new_annotations, annotations_file_output): 94 | coco_data_augmented = coco_data.copy() 95 | coco_data_augmented["images"].extend(new_images) 96 | coco_data_augmented["annotations"].extend(new_annotations) 97 | 98 | with open(annotations_file_output, "w") as f: 99 | json.dump(coco_data_augmented, f) 100 | 101 | 102 | def get_initial_id(coco_data, test_annotation_file): 103 | with open(test_annotation_file, "r") as f: 104 | test_coco_data = json.load(f) 105 | 106 | # Sort the 'images' field by their 'id' 107 | coco_data['images'] = sorted(coco_data['images'], key=lambda x: x['id']) 108 | test_coco_data['images'] = sorted(test_coco_data['images'], key=lambda x: x['id']) 109 | 110 | # Sort the 'annotations' field by their 'id' 111 | coco_data['annotations'] = sorted(coco_data['annotations'], key=lambda x: x['id']) 112 | test_coco_data['annotations'] = sorted(test_coco_data['annotations'], key=lambda x: x['id']) 113 | 114 | initial_image_id = max(coco_data["images"][-1]["id"], test_coco_data["images"][-1]["id"]) 115 | initial_annotation_id = max(coco_data["annotations"][-1]["id"], test_coco_data["annotations"][-1]["id"]) 116 | 117 | return initial_image_id, initial_annotation_id 118 | 119 | 120 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 121 | def augmentation(cfg): 122 | annotations_file = os.path.join(cfg.datasets.path, 'coco', 'train.json') 123 | test_annotation_file = os.path.join(cfg.datasets.path, 'coco', 'test.json') 124 | images_input_path = os.path.join(cfg.datasets.path, cfg.datasets.img_path) 125 | images_output_path = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 126 | 127 | with open(annotations_file, "r") as f: 128 | coco_data = json.load(f) 129 | 130 | initial_image_id, initial_annotation_id = get_initial_id(coco_data, test_annotation_file) 131 | 132 | print(initial_image_id, initial_annotation_id) 133 | 134 | new_images, new_annotations = perform_augmentation(coco_data, images_input_path, images_output_path, 135 | initial_image_id, initial_annotation_id) 136 | 137 | save_augmented_annotations(coco_data, new_images, new_annotations, annotations_file) 138 | 139 | 140 | if __name__ == '__main__': 141 | augmentation() 142 | -------------------------------------------------------------------------------- /scripts/py/dataset_preparation/split1.py: -------------------------------------------------------------------------------- 1 | import json 2 | import hydra 3 | import os 4 | import random 5 | import shutil 6 | 7 | 8 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 9 | def split(cfg): 10 | annotations_file = os.path.join(cfg.datasets.path, cfg.preproc.preprocessed_annotation) 11 | train_annotation_file = os.path.join(cfg.datasets.path, 'coco', 'train.json') 12 | test_annotation_file = os.path.join(cfg.datasets.path, 'coco', 'test.json') 13 | aug_images_path = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 14 | 15 | train_percentage = cfg.preproc.split_percentage 16 | 17 | with open(annotations_file, 'r') as f: 18 | coco_data = json.load(f) 19 | 20 | # Shuffle the list of images in the JSON file 21 | random.shuffle(coco_data['images']) 22 | 23 | num_train = int(train_percentage * len(coco_data['images'])) 24 | 25 | train_images = coco_data['images'][:num_train] 26 | test_images = coco_data['images'][num_train:] 27 | 28 | train_annotations = [] 29 | test_annotations = [] 30 | 31 | # Copy the corresponding annotations to each set 32 | for ann in coco_data['annotations']: 33 | if ann['image_id'] in [x['id'] for x in train_images]: 34 | train_annotations.append(ann) 35 | elif ann['image_id'] in [x['id'] for x in test_images]: 36 | test_annotations.append(ann) 37 | 38 | # Create new COCO JSON files for each set 39 | train_coco_data = { 40 | 'images': train_images, 41 | 'annotations': train_annotations, 42 | 'categories': coco_data['categories'] 43 | } 44 | 45 | test_coco_data = { 46 | 'images': test_images, 47 | 'annotations': test_annotations, 48 | 'categories': coco_data['categories'] 49 | } 50 | 51 | # Write each set to its own COCO JSON file 52 | with open(train_annotation_file, 'w') as f: 53 | json.dump(train_coco_data, f) 54 | 55 | with open(test_annotation_file, 'w') as f: 56 | json.dump(test_coco_data, f) 57 | 58 | if not os.path.exists(aug_images_path): 59 | os.makedirs(aug_images_path) 60 | 61 | # Copy test images to coco/aug_images folder 62 | for image in test_images: 63 | image_path = os.path.join(cfg.datasets.path, cfg.datasets.img_path, image['file_name']) 64 | if os.path.exists(image_path): 65 | shutil.copy(image_path, aug_images_path) 66 | 67 | 68 | if __name__ == '__main__': 69 | split() 70 | -------------------------------------------------------------------------------- /scripts/py/dataset_preparation/split2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import hydra 3 | import os 4 | import random 5 | import shutil 6 | 7 | 8 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 9 | def split(cfg): 10 | train_annotation_file = os.path.join(cfg.datasets.path, 'coco', 'train.json') 11 | val_annotation_file = os.path.join(cfg.datasets.path, 'coco', 'val.json') 12 | aug_images_path = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 13 | 14 | train_percentage = cfg.preproc.split_percentage 15 | 16 | with open(train_annotation_file, 'r') as f: 17 | coco_data = json.load(f) 18 | 19 | # Shuffle the list of images in the JSON file 20 | random.shuffle(coco_data['images']) 21 | 22 | num_train = int(train_percentage * len(coco_data['images'])) 23 | 24 | train_images = coco_data['images'][:num_train] 25 | val_images = coco_data['images'][num_train:] 26 | 27 | train_annotations = [] 28 | val_annotations = [] 29 | 30 | # Copy the corresponding annotations to each set 31 | for ann in coco_data['annotations']: 32 | if ann['image_id'] in [x['id'] for x in train_images]: 33 | train_annotations.append(ann) 34 | elif ann['image_id'] in [x['id'] for x in val_images]: 35 | val_annotations.append(ann) 36 | 37 | # Create new COCO JSON files for each set 38 | train_coco_data = { 39 | 'images': train_images, 40 | 'annotations': train_annotations, 41 | 'categories': coco_data['categories'] 42 | } 43 | 44 | val_coco_data = { 45 | 'images': val_images, 46 | 'annotations': val_annotations, 47 | 'categories': coco_data['categories'] 48 | } 49 | 50 | # Write each set to its own COCO JSON file 51 | with open(train_annotation_file, 'w') as f: 52 | json.dump(train_coco_data, f) 53 | 54 | with open(val_annotation_file, 'w') as f: 55 | json.dump(val_coco_data, f) 56 | 57 | # Copy test images to coco/aug_images folder 58 | for image in train_images: 59 | image_path = os.path.join(cfg.datasets.path, cfg.datasets.img_path, image['file_name']) 60 | if os.path.exists(image_path): 61 | shutil.copy(image_path, aug_images_path) 62 | 63 | for image in val_images: 64 | image_path = os.path.join(cfg.datasets.path, cfg.datasets.img_path, image['file_name']) 65 | if os.path.exists(image_path): 66 | shutil.copy(image_path, aug_images_path) 67 | 68 | 69 | if __name__ == '__main__': 70 | split() 71 | -------------------------------------------------------------------------------- /scripts/py/prepare_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | 4 | import hydra 5 | import os 6 | 7 | from detectron2.config import get_cfg 8 | from detectron2.model_zoo import model_zoo 9 | from detectron2.data.datasets import register_coco_instances 10 | 11 | 12 | def get_yolo_configuration(cfg, mode): 13 | """ Obtain the yolo configuration to be used for train or test """ 14 | if (mode == "train"): 15 | data_path = os.path.join(cfg.project_path, cfg.config.actual_config_path, 16 | cfg.yolo.yolo_config.data_config_train) 17 | else: 18 | data_path = os.path.join(cfg.project_path, cfg.config.actual_config_path, 19 | cfg.yolo.yolo_config.data_config_test) 20 | 21 | config = { 22 | "project": os.path.join(cfg.project_path, cfg.yolo.parameters.output_dir), 23 | "data": data_path, 24 | "lr0": cfg.training.lr, 25 | "epochs": cfg.training.epochs, 26 | "batch": cfg.training.batch, 27 | "patience": cfg.training.early_stopping.patience, 28 | "optimizer": cfg.training.optimizer, 29 | "device": cfg.yolo.parameters.device, 30 | "workers": cfg.training.workers, 31 | "imgsz": cfg.training.img_size 32 | } 33 | 34 | return config 35 | 36 | 37 | def get_detr_configuration(cfg): 38 | """ Obtain the detr configuration to be used for train or test """ 39 | output_path = os.path.join(cfg.project_path, cfg.detr.parameters.output_dir) 40 | 41 | config = { 42 | "image_path": os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path), 43 | "train_json_annot_path": os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.train), 44 | "val_json_annot_path": os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.val), 45 | "test_json_annot_path": os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.test), 46 | "output_path": output_path, 47 | "model_path": cfg.detr.detr_model_path, 48 | 49 | "feature_extractor": cfg.detr.parameters.feature_extractor, 50 | "train_batch_size": cfg.training.batch, 51 | "test_batch_size": cfg.training.val_batch, 52 | "lr": cfg.training.lr, 53 | "lr_backbone": cfg.detr.parameters.lr_backbone, 54 | "weight_decay": cfg.training.weight_decay, 55 | "max_epochs": cfg.training.epochs, 56 | "gradient_clip_val": cfg.detr.parameters.gradient_clip_val, 57 | "patience": cfg.training.early_stopping.patience, 58 | 59 | "num_classes": cfg.datasets.n_classes, 60 | 61 | "logs_dir": cfg.detr.parameters.logs_dir 62 | } 63 | 64 | return config 65 | 66 | 67 | def get_num_images(json_path): 68 | with open(json_path, "r") as f: 69 | dataset = json.load(f) 70 | image_ids = [image['id'] for image in dataset['images']] 71 | return len(image_ids) 72 | 73 | 74 | def get_fastercnn_configuration(cfg, mode): 75 | """ Obtain the fastercnn configuration to be used for train or test """ 76 | 77 | images_path = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 78 | output_dir = os.path.join(cfg.project_path, cfg.fastercnn.parameters.output_dir) 79 | 80 | train_json_annot_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.train) 81 | val_json_annot_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.val) 82 | test_json_annot_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.coco.test) 83 | 84 | # Register the dataset for the model usages 85 | try: 86 | register_coco_instances(cfg.fastercnn.parameters.train_dataset_name, {}, train_json_annot_path, images_path) 87 | register_coco_instances(cfg.fastercnn.parameters.val_dataset_name, {}, val_json_annot_path, images_path) 88 | register_coco_instances(cfg.fastercnn.parameters.test_dataset_name, {}, test_json_annot_path, images_path) 89 | except AssertionError: 90 | pass 91 | 92 | # Get number of training images 93 | num_train_images = get_num_images(train_json_annot_path) 94 | 95 | # Create configuration 96 | config = get_cfg() 97 | 98 | config.merge_from_file(model_zoo.get_config_file(cfg.fastercnn.parameters.config_file_path)) 99 | if mode == "train": 100 | config.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( 101 | cfg.fastercnn.parameters.checkpoint_url) # Let training initialize from model zoo 102 | config.DATASETS.TEST = (cfg.fastercnn.parameters.val_dataset_name,) # Use the val dataset 103 | else: 104 | config.MODEL.WEIGHTS = os.path.join(output_dir, 105 | cfg.fastercnn.fastercnn_model_path) # Use the trained model for the test 106 | config.DATASETS.TEST = (cfg.fastercnn.parameters.test_dataset_name,) # Use the test dataset 107 | 108 | config.DATASETS.TRAIN = (cfg.fastercnn.parameters.train_dataset_name,) 109 | 110 | config.DATALOADER.NUM_WORKERS = cfg.training.workers 111 | 112 | config.SOLVER.IMS_PER_BATCH = cfg.training.batch # batch size 113 | config.SOLVER.BASE_LR = cfg.training.lr # LR 114 | config.SOLVER.MAX_ITER = math.ceil( 115 | num_train_images / cfg.training.batch * cfg.training.epochs) # Compute max_iter to get the right amount of epochs 116 | 117 | config.MODEL.ROI_HEADS.NUM_CLASSES = cfg.datasets.n_classes # Set number of classes 118 | config.MODEL.ROI_HEADS.SCORE_THRESH_TEST = cfg.test.confidence_threshold # Set confidence score threshold for this model 119 | config.MODEL.ROI_HEADS.NMS_THRESH_TEST = cfg.test.iou_threshold # Set iou score threshold for this model 120 | config.MODEL.DEVICE = cfg.fastercnn.parameters.device # CUDA 121 | 122 | config.TEST.EVAL_PERIOD = math.ceil( 123 | num_train_images / cfg.training.batch) # Eval the quality of the models at each epoch 124 | 125 | config.OUTPUT_DIR = output_dir 126 | if not os.path.exists(output_dir): 127 | os.makedirs(output_dir) 128 | 129 | return config 130 | 131 | 132 | def create_config_file(template_path, config_path, **kwargs): 133 | """function to create a configuration file given a template""" 134 | with open(template_path, "r") as template_file: 135 | try: 136 | config = template_file.read() 137 | config = config.format(**kwargs) 138 | with open(config_path, 'w') as config_file: 139 | config_file.write(config) 140 | except Exception as e: 141 | print(e) 142 | 143 | 144 | def prepare_config(cfg, mode): 145 | """function that returns the configuration of each model to be used for training or test""" 146 | 147 | if cfg.model == 'yolo': 148 | model_template_path = os.path.join(cfg.project_path, cfg.config.templates_path, 149 | cfg.yolo.yolo_templates.model_template) 150 | data_template_path = os.path.join(cfg.project_path, cfg.config.templates_path, 151 | cfg.yolo.yolo_templates.data_template) 152 | 153 | actual_config_path = os.path.join(cfg.project_path, cfg.config.actual_config_path) 154 | if not os.path.exists(actual_config_path): 155 | os.makedirs(actual_config_path) 156 | 157 | model_config_path = os.path.join(actual_config_path, cfg.yolo.yolo_config.model_config) 158 | data_config_path = os.path.join(actual_config_path, cfg.yolo.yolo_config.data_config_train) 159 | data_config_path_test = os.path.join(actual_config_path, cfg.yolo.yolo_config.data_config_test) 160 | 161 | train_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.yolo.train) 162 | val_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.yolo.val) 163 | test_path = os.path.join(cfg.datasets.path, cfg.datasets.datasets_path.yolo.test) 164 | 165 | # Create actual_config yaml file from the templates 166 | create_config_file(model_template_path, model_config_path, nc=cfg.datasets.n_classes) 167 | create_config_file(data_template_path, data_config_path, 168 | class_list_names=cfg.datasets.class_name, 169 | train_path=train_path, 170 | val_path=val_path 171 | ) 172 | 173 | create_config_file(data_template_path, data_config_path_test, 174 | class_list_names=cfg.datasets.class_name, 175 | train_path=train_path, 176 | val_path=test_path 177 | ) 178 | 179 | config = get_yolo_configuration(cfg, mode) 180 | 181 | return config 182 | 183 | if cfg.model == 'fasterRCNN': 184 | config = get_fastercnn_configuration(cfg, mode) 185 | 186 | return config 187 | 188 | if cfg.model == 'detr': 189 | config = get_detr_configuration(cfg) 190 | 191 | return config 192 | 193 | 194 | @hydra.main(config_path="../../config/", config_name="config", version_base=None) 195 | def main(cfg): 196 | prepare_config(cfg, mode="train") 197 | 198 | 199 | if __name__ == '__main__': 200 | main() 201 | -------------------------------------------------------------------------------- /scripts/py/preprocessing/clean_data.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | import json 3 | import hydra 4 | import os 5 | import shutil 6 | 7 | 8 | def move_jpg_files_to_images_folder(src_folder, dest_folder): 9 | file_count = 0 10 | 11 | # Define the allowed file patterns 12 | file_patterns = ['*.jpg', '*.jpeg', '*.JPG', '*.png', '*.PNG'] 13 | 14 | # Create the destination folder if it doesn't exist 15 | os.makedirs(dest_folder, exist_ok=True) 16 | 17 | # Iterate through the folder's content 18 | for entry in os.listdir(src_folder): 19 | full_path = os.path.join(src_folder, entry) 20 | # Check if the entry is a file (not a folder) 21 | if os.path.isfile(full_path): 22 | # Check if the file matches any of the allowed patterns 23 | if any(fnmatch.fnmatch(full_path, pattern) for pattern in file_patterns): 24 | # Move the file to the destination folder 25 | shutil.move(full_path, os.path.join(dest_folder, entry)) 26 | file_count += 1 27 | 28 | print(f'Moved {file_count} .jpg and .JPG files to the "images" folder.') 29 | 30 | 31 | # TODO commenta 32 | def get_coco_image_filenames(coco_annotation_file): 33 | with open(coco_annotation_file, 'r') as f: 34 | coco_data = json.load(f) 35 | 36 | # Create a set of all the "file_name" values in the COCO annotation file 37 | coco_image_filenames = {image['file_name'] for image in coco_data['images']} 38 | return coco_image_filenames 39 | 40 | 41 | # TODO commenta 42 | def remove_images_not_in_coco(images_folder, coco_image_filenames): 43 | removed_count = 0 44 | 45 | for entry in os.listdir(images_folder): 46 | full_path = os.path.join(images_folder, entry) 47 | 48 | if os.path.isfile(full_path) and entry not in coco_image_filenames: 49 | os.remove(full_path) 50 | removed_count += 1 51 | print( 52 | f'Removed {removed_count} images from the "images" folder that do not have a corresponding "file_name" in the COCO annotation file.') 53 | 54 | 55 | def remove_missing_images_from_coco(coco_annotation_file, images_folder): 56 | # Load the COCO annotation file 57 | with open(coco_annotation_file, 'r') as f: 58 | coco_data = json.load(f) 59 | 60 | # Filter out instances that do not have a corresponding image in the images folder 61 | filtered_images = [] 62 | image_ids_to_keep = set() 63 | for image in coco_data['images']: 64 | if 'file_name' in image: 65 | file_path = os.path.join(images_folder, image['file_name']) 66 | if os.path.isfile(file_path): 67 | filtered_images.append(image) 68 | image_ids_to_keep.add(image['id']) 69 | 70 | # Filter out annotations that do not have a corresponding image 71 | filtered_annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] in image_ids_to_keep] 72 | 73 | # Update the 'images' and 'annotations' keys in the COCO annotation data 74 | coco_data['images'] = filtered_images 75 | coco_data['annotations'] = filtered_annotations 76 | print('Filtered COCO annotation file saved.') 77 | return coco_data 78 | 79 | 80 | def save_coco_annotation(data, output_file): 81 | with open(output_file, 'w') as f: 82 | json.dump(data, f) 83 | 84 | 85 | def remove_duplicate_filenames(coco_annotation_file): 86 | # Load the COCO annotation file 87 | with open(coco_annotation_file, 'r') as f: 88 | coco_data = json.load(f) 89 | 90 | # Create a dictionary to store filename occurrences 91 | filename_counts = {} 92 | for image in coco_data['images']: 93 | if 'file_name' in image: 94 | filename = image['file_name'] 95 | filename_counts[filename] = filename_counts.get(filename, 0) + 1 96 | 97 | # Filter out duplicate images 98 | unique_images = [] 99 | unique_image_ids = set() 100 | duplicate_images = [] 101 | for image in coco_data['images']: 102 | if 'file_name' in image and filename_counts[image['file_name']] == 1: 103 | unique_images.append(image) 104 | unique_image_ids.add(image['id']) 105 | else: 106 | duplicate_images.append(image['file_name']) 107 | 108 | # Filter out annotations that correspond to duplicate images 109 | unique_annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] in unique_image_ids] 110 | 111 | # Update the 'images' and 'annotations' keys in the COCO annotation data 112 | coco_data['images'] = unique_images 113 | coco_data['annotations'] = unique_annotations 114 | print('Filtered COCO annotation file saved.') 115 | return coco_data, duplicate_images 116 | 117 | 118 | def remove_images(images_to_remove, images_folder): 119 | for image_file in images_to_remove: 120 | file_path = os.path.join(images_folder, image_file) 121 | if os.path.isfile(file_path): 122 | os.remove(file_path) 123 | print(f'Removed {file_path}') 124 | 125 | 126 | def rename_all_images_in_filesystem(coco_file_name, images_folder): 127 | with open(coco_file_name, 'r') as f: 128 | coco_data = json.load(f) 129 | 130 | # Iterate through the images in the COCO data 131 | for image in coco_data['images']: 132 | old_filename = image['file_name'] 133 | new_filename = f'{image["id"]}.jpg' # Assuming all images are in jpg format 134 | 135 | # Construct the full file paths 136 | old_filepath = os.path.join(images_folder, old_filename) 137 | new_filepath = os.path.join(images_folder, new_filename) 138 | 139 | # Check if the old file exists and rename it 140 | if os.path.exists(old_filepath): 141 | os.rename(old_filepath, new_filepath) 142 | print(f'Renamed "{old_filename}" to "{new_filename}"') 143 | else: 144 | print(f'File not found: "{old_filename}"') 145 | 146 | 147 | def rename_all_images_in_cocofile(coco_file_name, image_folder): 148 | # Load the COCO annotation file 149 | with open(coco_file_name, "r") as json_file: 150 | coco_data = json.load(json_file) 151 | 152 | # Iterate through all images and modify the path and file_name 153 | for image in coco_data["images"]: 154 | image_id = str(image["id"]) 155 | image["file_name"] = image_id + ".jpg" 156 | image["path"] = image_folder + image_id + ".jpg" 157 | 158 | return coco_data 159 | 160 | 161 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 162 | def clean(cfg): 163 | # Replace 'your_source_folder_path' with the path to the folder you want to search 164 | src_folder = os.path.join(cfg.project_path, 'data', 'orig', 'tmp') 165 | # Replace 'your_destination_folder_path' with the path to the destination folder 166 | dest_folder = os.path.join(cfg.project_path, cfg.preproc.orig.img_path) 167 | move_jpg_files_to_images_folder(src_folder, dest_folder) 168 | 169 | # Replace 'your_coco_annotation_file_path' with the path to your COCO annotation file 170 | coco_annotation_file = os.path.join(cfg.datasets.path, cfg.datasets.original_data, cfg.datasets.filenames.dataset) 171 | coco_annotation_file_tmp = os.path.join(cfg.datasets.path, cfg.datasets.original_data, 172 | 'preprocessed_' + cfg.datasets.filenames.dataset) 173 | 174 | coco_image_filenames = get_coco_image_filenames(coco_annotation_file) 175 | remove_images_not_in_coco(dest_folder, coco_image_filenames) 176 | 177 | filtered_coco_data = remove_missing_images_from_coco(coco_annotation_file, dest_folder) 178 | save_coco_annotation(filtered_coco_data, coco_annotation_file_tmp) 179 | 180 | filtered_coco_data, duplicate_image_files = remove_duplicate_filenames(coco_annotation_file_tmp) 181 | remove_images(duplicate_image_files, dest_folder) 182 | save_coco_annotation(filtered_coco_data, coco_annotation_file_tmp) 183 | 184 | rename_all_images_in_filesystem(coco_annotation_file_tmp, dest_folder) 185 | 186 | renamed_coco_data = rename_all_images_in_cocofile(coco_annotation_file_tmp, dest_folder) 187 | save_coco_annotation(renamed_coco_data, coco_annotation_file_tmp) 188 | 189 | shutil.rmtree(src_folder) 190 | 191 | 192 | if __name__ == '__main__': 193 | clean() 194 | -------------------------------------------------------------------------------- /scripts/py/preprocessing/resize_image.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import cv2 3 | import json 4 | import hydra 5 | import os 6 | import imgaug.augmenters as iaa 7 | from imgaug.augmentables.bbs import BoundingBox 8 | 9 | 10 | def build_dictionaries(data): 11 | print("Building dictionaries...") 12 | anns = defaultdict(list) 13 | anns_idx = dict() 14 | for i in range(0, len(data['annotations'])): 15 | anns[data['annotations'][i]['image_id']].append(data['annotations'][i]) 16 | anns_idx[data['annotations'][i]['id']] = i 17 | print("Dictionnaries built.") 18 | return anns, anns_idx 19 | 20 | 21 | def resizeImageAndBoundingBoxes(imgFile, bboxes, targetImgW, targetImgH, outputImgFile): 22 | print("Reading image {0} ...".format(imgFile)) 23 | img = cv2.imread(imgFile) 24 | 25 | seq = iaa.Sequential([ 26 | iaa.CropToSquare(position="center"), 27 | # crop the image to a square shape with the center of the original image as the center of the cropped image 28 | iaa.Resize({"height": targetImgH, "width": targetImgW}), 29 | # resize the cropped image to the target size of (targetImgW, targetImgH) 30 | iaa.PadToFixedSize(width=targetImgW, height=targetImgH) 31 | # add padding to the image to make sure it has dimensions of (targetImgW, targetImgH) 32 | ]) 33 | image_aug, bbs_aug = seq(image=img, bounding_boxes=bboxes) 34 | 35 | print("Writing resized image {0} ...".format(outputImgFile)) 36 | cv2.imwrite(outputImgFile, image_aug) 37 | print("Resized image {0} written successfully.".format(outputImgFile)) 38 | 39 | return bbs_aug 40 | 41 | 42 | @hydra.main(config_path="../../../config/", config_name="config", version_base=None) 43 | def resize(cfg): 44 | image_dir = os.path.join(cfg.project_path, cfg.preproc.orig.img_path) 45 | annotations_file = os.path.join(cfg.datasets.path, cfg.datasets.original_data, 46 | 'preprocessed_' + cfg.datasets.filenames.dataset) 47 | target_img_w = cfg.preproc.img_size.width 48 | target_img_h = cfg.preproc.img_size.height 49 | output_image_dir = os.path.join(cfg.datasets.path, cfg.datasets.img_path) 50 | output_annotations_file = os.path.join(cfg.datasets.path, cfg.preproc.preprocessed_annotation) 51 | 52 | if not os.path.exists(output_image_dir): 53 | os.makedirs(output_image_dir) 54 | 55 | print("Loading annotations file...") 56 | data = json.load(open(annotations_file, 'r')) 57 | print("Annotations file loaded.") 58 | 59 | annotations, annotationsIdx = build_dictionaries(data) 60 | 61 | for img in data['images']: 62 | print("Processing image file {0} and its bounding boxes...".format(img['file_name'])) 63 | 64 | annList = annotations[img['id']] 65 | 66 | # Convert COCO format bounding boxes to imgaug format 67 | bboxesList = [] 68 | for ann in annList: 69 | bboxData = ann['bbox'] 70 | bboxesList.append( 71 | BoundingBox(x1=bboxData[0], y1=bboxData[1], x2=bboxData[0] + bboxData[2], y2=bboxData[1] + bboxData[3])) 72 | 73 | imgFullPath = os.path.join(image_dir, img['file_name']) 74 | outputImgFullPath = os.path.join(output_image_dir, img['file_name']) 75 | 76 | outNewBBoxes = resizeImageAndBoundingBoxes(imgFullPath, bboxesList, 77 | target_img_w, target_img_h, outputImgFullPath) 78 | 79 | for i in range(0, len(annList)): 80 | annId = annList[i]['id'] 81 | 82 | x1_clipped = max(0, outNewBBoxes[i].x1) 83 | y1_clipped = max(0, outNewBBoxes[i].y1) 84 | x2_clipped = min(target_img_w, outNewBBoxes[i].x2) 85 | y2_clipped = min(target_img_h, outNewBBoxes[i].y2) 86 | width_clipped = x2_clipped - x1_clipped 87 | height_clipped = y2_clipped - y1_clipped 88 | 89 | data['annotations'][annotationsIdx[annId]]['bbox'][0] = round(float(x1_clipped), 1) 90 | data['annotations'][annotationsIdx[annId]]['bbox'][1] = round(float(y1_clipped), 1) 91 | data['annotations'][annotationsIdx[annId]]['bbox'][2] = round(float(width_clipped), 1) 92 | data['annotations'][annotationsIdx[annId]]['bbox'][3] = round(float(height_clipped), 1) 93 | 94 | img['width'] = target_img_w 95 | img['height'] = target_img_h 96 | 97 | print("Writing modified annotations to file...") 98 | with open(output_annotations_file, 'w') as outfile: 99 | json.dump(data, outfile) 100 | 101 | print("Finished.") 102 | 103 | return 104 | 105 | 106 | if __name__ == '__main__': 107 | resize() 108 | -------------------------------------------------------------------------------- /scripts/sh/data_converter.sh: -------------------------------------------------------------------------------- 1 | python scripts/py/data_converter/coco_to_yolo.py datasets.dataset_type=train 2 | python scripts/py/data_converter/coco_to_yolo.py datasets.dataset_type=val 3 | python scripts/py/data_converter/coco_to_yolo.py datasets.dataset_type=test -------------------------------------------------------------------------------- /scripts/sh/datasets_preparation.sh: -------------------------------------------------------------------------------- 1 | python scripts/py/dataset_preparation/split1.py preproc.split_percentage=0.6 2 | python scripts/py/dataset_preparation/run_augmentation.py 3 | python scripts/py/dataset_preparation/split2.py preproc.split_percentage=0.8 -------------------------------------------------------------------------------- /scripts/sh/preprocessing.sh: -------------------------------------------------------------------------------- 1 | wget https://drive.google.com/file/d/1xW63RZTvxrnLzTzpUx0kkh8d9IWepN4_/view?usp=sharing -P data/orig 2 | wget https://drive.google.com/file/d/1deqYC1PmjpMYDQP4DrELxTr25MFGGnzo/view?usp=share_link -P data/orig 3 | 4 | 5 | unzip ./data/orig/oral1.zip -d ./data/orig/tmp/ 6 | python scripts/py/preprocessing/clean_data.py 7 | python scripts/py/preprocessing/resize_image.py preproc.img_size.width=640 preproc.img_size.height=640 8 | 9 | mkdir data/coco 10 | mkdir data/yolo 11 | 12 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import hydra 3 | 4 | from detectron2.data import build_detection_test_loader 5 | from detectron2.engine import DefaultPredictor 6 | from detectron2.evaluation import COCOEvaluator, inference_on_dataset 7 | from ultralytics import YOLO 8 | 9 | from models.detr.train import DetrTrainer 10 | from scripts.py.prepare_config import prepare_config 11 | 12 | @hydra.main(config_path="./config/", config_name="config", version_base=None) 13 | def test(cfg): 14 | config = prepare_config(cfg, "test") 15 | 16 | if cfg.model == 'yolo': 17 | model_path = os.path.join(cfg.project_path, cfg.yolo.parameters.output_dir, cfg.yolo.yolo_model_path) 18 | print(model_path) 19 | model = YOLO(model_path) # load a custom model 20 | # Validate the model 21 | model.val(**config) # no arguments needed, dataset and settings remembered 22 | 23 | if cfg.model == 'fasterRCNN': 24 | predictor = DefaultPredictor(config) 25 | 26 | evaluator = COCOEvaluator(cfg.fastercnn.parameters.test_dataset_name, config, False, 27 | output_dir=cfg.fastercnn.parameters.output_dir) 28 | test_loader = build_detection_test_loader(config, cfg.fastercnn.parameters.test_dataset_name) 29 | inference_on_dataset(predictor.model, test_loader, evaluator) 30 | 31 | if cfg.model == 'detr': 32 | detr = DetrTrainer(**config) 33 | train_dataset, _, test_dataset = detr.create_dataset() 34 | train_dataloader = detr.data_loader(train_dataset, batch_size=config['train_batch_size']) 35 | test_dataloader = detr.data_loader(test_dataset, batch_size=config['test_batch_size']) 36 | model_path = os.path.join(config["output_path"], config["model_path"]) 37 | model = detr.build_model(train_dataloader, test_dataloader) 38 | model = model.load_from_checkpoint(model_path, **config) 39 | detr.evaluation(test_dataset, test_dataloader, model) 40 | 41 | 42 | if __name__ == '__main__': 43 | test() 44 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | import os 3 | 4 | from ultralytics import YOLO 5 | from scripts.py.prepare_config import prepare_config 6 | from models.fastercnn.train import FastercnnTrainer 7 | from models.detr.train import DetrTrainer 8 | 9 | 10 | @hydra.main(config_path="./config/", config_name="config", version_base=None) 11 | def train(cfg): 12 | config = prepare_config(cfg, "train") 13 | 14 | if cfg.model == 'yolo': 15 | model_path = os.path.join(cfg.project_path, cfg.config.actual_config_path, cfg.yolo.yolo_config.model_config) 16 | yolo_model_path = os.path.join(cfg.project_path, cfg.models.path, 'yolo', cfg.yolo.yolo_model) 17 | 18 | model = YOLO(model_path).load(yolo_model_path) # build from YAML and transfer weights 19 | model.train(**config) # Train the model 20 | 21 | if cfg.model == 'fasterRCNN': 22 | trainer = FastercnnTrainer(config, cfg.training.early_stopping.patience) 23 | trainer.resume_or_load(resume=False) 24 | try: 25 | trainer.train() 26 | except RuntimeError: 27 | print(f"\033[32mEarly stopping triggered \033[0m") 28 | 29 | if cfg.model == "detr": 30 | DetrTrainer(**config).main() 31 | 32 | 33 | if __name__ == '__main__': 34 | train() 35 | -------------------------------------------------------------------------------- /view_data.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | import os 3 | import fiftyone as fo 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def convert_bbox_format(bbox, img_width, img_height): 9 | x, y, w, h = bbox 10 | x1 = int((x - w / 2) * img_width) 11 | y1 = int((y - h / 2) * img_height) 12 | x2 = int((x + w / 2) * img_width) 13 | y2 = int((y + h / 2) * img_height) 14 | return x1, y1, x2, y2 15 | 16 | 17 | @hydra.main(config_path="./config/", config_name="config", version_base=None) 18 | def view(cfg): 19 | if cfg.dataset == 'yolo': 20 | # Load the YOLO labels and images 21 | label_folder = os.path.join(cfg.datasets.path, 'yolo', cfg.datasets.dataset_type, 'labels') 22 | image_folder = os.path.join(cfg.datasets.path, 'yolo', cfg.datasets.dataset_type, 'images') 23 | output_folder = os.path.join(cfg.project_path, 'outputs', 'yolo', 'annotated_images_visualization') 24 | 25 | if not os.path.exists(output_folder): 26 | os.makedirs(output_folder) 27 | 28 | for file_name in os.listdir(label_folder): 29 | # Load the label file 30 | label_file = os.path.join(label_folder, file_name) 31 | with open(label_file, 'r') as f: 32 | label_str = f.read() 33 | label_list = label_str.strip().split('\n') 34 | labels = [] 35 | for label in label_list: 36 | label_parts = label.strip().split(' ') 37 | label_class = int(label_parts[0]) 38 | label_bbox = list(map(float, label_parts[1:])) 39 | labels.append([label_class] + label_bbox) 40 | 41 | # Load the corresponding image 42 | img_file = os.path.join(image_folder, file_name.replace('.txt', '.jpg')) 43 | img = cv2.imread(img_file) 44 | 45 | # Draw the bounding boxes on the image 46 | for label in labels: 47 | label_class = label[0] 48 | bbox = label[1:] 49 | x1, y1, x2, y2 = convert_bbox_format(bbox, img.shape[1], img.shape[0]) 50 | color = tuple(map(int, np.random.randint(0, 256, 3))) 51 | cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) 52 | cv2.putText(img, str(label_class), (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) 53 | 54 | # Save the annotated image 55 | output_file = os.path.join(output_folder, file_name.replace('.txt', '.jpg')) 56 | cv2.imwrite(output_file, img) 57 | 58 | 59 | elif cfg.dataset == 'coco': 60 | img_path = os.path.join(cfg.project_path, cfg.preproc.augmentation.img_path) 61 | coco_file = os.path.join(cfg.datasets.path, 'coco', cfg.datasets.dataset_type + '.json') 62 | 63 | dataset = fo.Dataset.from_dir( 64 | dataset_type=fo.types.COCODetectionDataset, 65 | data_path=img_path, 66 | labels_path=coco_file, 67 | ) 68 | 69 | session = fo.launch_app(dataset) 70 | session.wait() 71 | 72 | 73 | if __name__ == '__main__': 74 | view() 75 | --------------------------------------------------------------------------------