├── .gitignore ├── LICENSE ├── README.md └── notebooks ├── pytorch-mask-r-cnn-onnx-export-colab.ipynb ├── pytorch-mask-r-cnn-onnx-export.ipynb ├── pytorch-mask-r-cnn-training-colab.ipynb ├── pytorch-mask-r-cnn-training-cpu.ipynb ├── pytorch-mask-r-cnn-training-windows.ipynb ├── pytorch-mask-r-cnn-training-with-coco-metrics.ipynb ├── pytorch-mask-r-cnn-training-with-coco-style-evaluation.ipynb ├── pytorch-mask-r-cnn-training.ipynb └── windows_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | /notebooks/pretrained_checkpoints/* 2 | /notebooks/pytorch-mask-r-cnn-instance-segmentation/* 3 | /notebooks/Datasets/* 4 | /notebooks/.ipynb_checkpoints/* 5 | /notebooks/*.ttf 6 | /notebooks/*.jpg 7 | /notebooks/*.png 8 | /notebooks/*.onnx 9 | /notebooks/__pycache__/* 10 | 11 | 12 | # Temporary 13 | /notebooks/pytorch-mask-r-cnn-training (copy).ipynb 14 | /notebooks/pytorch-mask-r-cnn-training-windows-arc.ipynb -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Christian J. Mills 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Training Mask R-CNN Models with PyTorch 2 | 3 | This tutorial provides a step-by-step guide on training Mask R-CNN models with PyTorch. 4 | 5 | 6 | 7 | ## Training 8 | 9 | | Jupyter Notebook | Google Colab | 10 | | ------------------------------------------------------------ | ------------------------------------------------------------ | 11 | | [GitHub Repository](https://github.com/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-training.ipynb) | [Open In Colab](https://colab.research.google.com/github/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-training-colab.ipynb) | 12 | 13 | 14 | 15 | ## ONNX Export 16 | 17 | | Jupyter Notebook | Google Colab | 18 | | ------------------------------------------------------------ | ------------------------------------------------------------ | 19 | | [GitHub Repository](https://github.com/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-onnx-export.ipynb) | [Open In Colab](https://colab.research.google.com/github/cj-mills/pytorch-mask-rcnn-tutorial-code/blob/main/notebooks/pytorch-mask-r-cnn-onnx-export-colab.ipynb) | 20 | 21 | 22 | 23 | ## License 24 | 25 | Distributed under the MIT License. See [`LICENSE`](./LICENSE) for more information. -------------------------------------------------------------------------------- /notebooks/windows_utils.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | import torch 3 | import torchvision 4 | torchvision.disable_beta_transforms_warning() 5 | from torchvision.tv_tensors import BoundingBoxes, Mask 6 | import torchvision.transforms.v2 as transforms 7 | 8 | from PIL import Image, ImageDraw 9 | import numpy as np 10 | 11 | class StudentIDDataset(Dataset): 12 | """ 13 | This class represents a PyTorch Dataset for a collection of images and their annotations. 14 | The class is designed to load images along with their corresponding segmentation masks, bounding box annotations, and labels. 15 | """ 16 | def __init__(self, img_keys, annotation_df, img_dict, class_to_idx, transforms=None): 17 | """ 18 | Constructor for the HagridDataset class. 19 | 20 | Parameters: 21 | img_keys (list): List of unique identifiers for images. 22 | annotation_df (DataFrame): DataFrame containing the image annotations. 23 | img_dict (dict): Dictionary mapping image identifiers to image file paths. 24 | class_to_idx (dict): Dictionary mapping class labels to indices. 25 | transforms (callable, optional): Optional transform to be applied on a sample. 26 | """ 27 | super(Dataset, self).__init__() 28 | 29 | self._img_keys = img_keys # List of image keys 30 | self._annotation_df = annotation_df # DataFrame containing annotations 31 | self._img_dict = img_dict # Dictionary mapping image keys to image paths 32 | self._class_to_idx = class_to_idx # Dictionary mapping class names to class indices 33 | self._transforms = transforms # Image transforms to be applied 34 | 35 | def __len__(self): 36 | """ 37 | Returns the length of the dataset. 38 | 39 | Returns: 40 | int: The number of items in the dataset. 41 | """ 42 | return len(self._img_keys) 43 | 44 | def __getitem__(self, index): 45 | """ 46 | Fetch an item from the dataset at the specified index. 47 | 48 | Parameters: 49 | index (int): Index of the item to fetch from the dataset. 50 | 51 | Returns: 52 | tuple: A tuple containing the image and its associated target (annotations). 53 | """ 54 | # Retrieve the key for the image at the specified index 55 | img_key = self._img_keys[index] 56 | # Get the annotations for this image 57 | annotation = self._annotation_df.loc[img_key] 58 | # Load the image and its target (segmentation masks, bounding boxes and labels) 59 | image, target = self._load_image_and_target(annotation) 60 | 61 | # Apply the transformations, if any 62 | if self._transforms: 63 | image, target = self._transforms(image, target) 64 | 65 | return image, target 66 | 67 | def _load_image_and_target(self, annotation): 68 | """ 69 | Load an image and its target (bounding boxes and labels). 70 | 71 | Parameters: 72 | annotation (pandas.Series): The annotations for an image. 73 | 74 | Returns: 75 | tuple: A tuple containing the image and a dictionary with 'boxes' and 'labels' keys. 76 | """ 77 | # Retrieve the file path of the image 78 | filepath = self._img_dict[annotation.name] 79 | # Open the image file and convert it to RGB 80 | image = Image.open(filepath).convert('RGB') 81 | 82 | # Convert the class labels to indices 83 | labels = [shape['label'] for shape in annotation['shapes']] 84 | labels = torch.Tensor([self._class_to_idx[label] for label in labels]) 85 | labels = labels.to(dtype=torch.int64) 86 | 87 | # Convert polygons to mask images 88 | shape_points = [shape['points'] for shape in annotation['shapes']] 89 | xy_coords = [[tuple(p) for p in points] for points in shape_points] 90 | mask_imgs = [create_polygon_mask(image.size, xy) for xy in xy_coords] 91 | masks = Mask(torch.concat([Mask(transforms.PILToTensor()(mask_img), dtype=torch.bool) for mask_img in mask_imgs])) 92 | 93 | # Generate bounding box annotations from segmentation masks 94 | bboxes = BoundingBoxes(data=torchvision.ops.masks_to_boxes(masks), format='xyxy', canvas_size=image.size[::-1]) 95 | 96 | return image, {'masks': masks,'boxes': bboxes, 'labels': labels} 97 | 98 | 99 | 100 | def tuple_batch(batch): 101 | return tuple(zip(*batch)) 102 | 103 | 104 | def create_polygon_mask(image_size, vertices): 105 | """ 106 | Create a grayscale image with a white polygonal area on a black background. 107 | 108 | Parameters: 109 | - image_size (tuple): A tuple representing the dimensions (width, height) of the image. 110 | - vertices (list): A list of tuples, each containing the x, y coordinates of a vertex 111 | of the polygon. Vertices should be in clockwise or counter-clockwise order. 112 | 113 | Returns: 114 | - PIL.Image.Image: A PIL Image object containing the polygonal mask. 115 | """ 116 | 117 | # Create a new black image with the given dimensions 118 | mask_img = Image.new('L', image_size, 0) 119 | 120 | # Draw the polygon on the image. The area inside the polygon will be white (255). 121 | ImageDraw.Draw(mask_img, 'L').polygon(vertices, fill=(255)) 122 | 123 | # Return the image with the drawn polygon 124 | return mask_img --------------------------------------------------------------------------------