├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
├── docker-compose.yml
├── flowcharts
    ├── 1_Folder Structure.png
    ├── 2_Training Pipeline.png
    ├── 3_Data Ingestion.png
    ├── 4_Data transformation.png
    ├── 5_Model trainer.png
    ├── 6_Model Evaluation.png
    ├── 7_Model pusher.png
    └── 8_Prediction pipeline.png
├── helmet
    ├── __init__.py
    ├── components
    │   ├── __init__.py
    │   ├── data_ingestion.py
    │   ├── data_transformation.py
    │   ├── model_evaluation.py
    │   ├── model_pusher.py
    │   └── model_trainer.py
    ├── configuration
    │   ├── __init__.py
    │   └── s3_operations.py
    ├── constants
    │   └── __init__.py
    ├── entity
    │   ├── __init__.py
    │   ├── artifacts_entity.py
    │   └── config_entity.py
    ├── exception
    │   └── __init__.py
    ├── logger
    │   └── __init__.py
    ├── ml
    │   ├── __init__.py
    │   ├── detection
    │   │   ├── README.md
    │   │   ├── coco_eval.py
    │   │   ├── coco_utils.py
    │   │   ├── engine.py
    │   │   ├── group_by_aspect_ratio.py
    │   │   ├── presets.py
    │   │   ├── train.py
    │   │   ├── transforms.py
    │   │   └── utils.py
    │   ├── feature
    │   │   └── helmet_detection.py
    │   └── models
    │   │   └── model_optimiser.py
    ├── pipeline
    │   ├── __init__.py
    │   ├── prediction_pipeline.py
    │   └── train_pipeline.py
    └── utils
    │   ├── __init__.py
    │   └── main_utils.py
├── notebooks
    └── Experiment.ipynb
├── requirements.txt
├── setup.py
└── tools
    ├── cmd.txt
    └── voc2coco.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | data/*
131 | artifacts/*


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | COPY . /helmet
 3 | WORKDIR /helmet
 4 | RUN pip install --upgrade pip
 5 | RUN apt-get update
 6 | RUN apt-get install ffmpeg libsm6 libxext6  -y
 7 | RUN pip install -r requirements.txt
 8 | RUN conda install -c conda-forge pycocotools
 9 | #RUN pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
10 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
11 | RUN pip install -e .
12 | CMD ["python","app.py"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 BAPPY AHMED
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Helmet-Detection-PyTorch
2 | 
3 | ### Donwload data from here:
4 | 
5 | https://drive.google.com/file/d/1oYBdYcQKPGPfqj7n4is-10k17vL6Cmlp/view?usp=sharing
6 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, File
 2 | from uvicorn import run as app_run
 3 | from fastapi.middleware.cors import CORSMiddleware
 4 | from fastapi.responses import Response, JSONResponse
 5 | from helmet.constants import APP_HOST, APP_PORT
 6 | from helmet.pipeline.train_pipeline import TrainPipeline
 7 | from helmet.pipeline.prediction_pipeline import PredictionPipeline
 8 | 
 9 | 
10 | app = FastAPI()
11 | 
12 | origins = ["*"]
13 | 
14 | app.add_middleware(
15 |     CORSMiddleware,
16 |     allow_origins=origins,
17 |     allow_credentials=True,
18 |     allow_methods=["*"],
19 |     allow_headers=["*"],
20 | )
21 | 
22 | 
23 | @app.get("/train")
24 | async def training():
25 |     try:
26 |         train_pipeline = TrainPipeline()
27 | 
28 |         train_pipeline.run_pipeline()
29 | 
30 |         return Response("Training successful !!")
31 | 
32 |     except Exception as e:
33 |         return Response(f"Error Occurred! {e}")
34 | 
35 | 
36 | @app.post("/predict")
37 | async def prediction(image_file: bytes = File(description="A file read as bytes")):
38 |     try:
39 |         prediction_pipeline = PredictionPipeline()
40 |         final_output = prediction_pipeline.run_pipeline(image_file)
41 |         # print(final_output)
42 |         # return JSONResponse(content= final_output, status_code=200)
43 |         return final_output
44 |     except Exception as e:
45 |         return JSONResponse(content=f"Error Occurred! {e}", status_code=500)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     app_run(app, host=APP_HOST, port=APP_PORT)


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "1"
 2 | services:
 3 |   web:
 4 |     image: ${IMAGE_NAME}
 5 |     ports:
 6 |       - "80:8080"
 7 |     environment:
 8 |       - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
 9 |       - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
10 |       - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}


--------------------------------------------------------------------------------
/flowcharts/1_Folder Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/1_Folder Structure.png


--------------------------------------------------------------------------------
/flowcharts/2_Training Pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/2_Training Pipeline.png


--------------------------------------------------------------------------------
/flowcharts/3_Data Ingestion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/3_Data Ingestion.png


--------------------------------------------------------------------------------
/flowcharts/4_Data transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/4_Data transformation.png


--------------------------------------------------------------------------------
/flowcharts/5_Model trainer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/5_Model trainer.png


--------------------------------------------------------------------------------
/flowcharts/6_Model Evaluation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/6_Model Evaluation.png


--------------------------------------------------------------------------------
/flowcharts/7_Model pusher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/7_Model pusher.png


--------------------------------------------------------------------------------
/flowcharts/8_Prediction pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/8_Prediction pipeline.png


--------------------------------------------------------------------------------
/helmet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/__init__.py


--------------------------------------------------------------------------------
/helmet/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/components/__init__.py


--------------------------------------------------------------------------------
/helmet/components/data_ingestion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from zipfile import ZipFile
 4 | from helmet.entity.config_entity import DataIngestionConfig
 5 | from helmet.entity.artifacts_entity import DataIngestionArtifacts
 6 | from helmet.configuration.s3_operations import S3Operation
 7 | from helmet.exception import HelmetException
 8 | from helmet.logger import logging
 9 | from helmet.constants import *
10 | 
11 | 
12 | class DataIngestion:
13 |     def __init__(self, data_ingestion_config: DataIngestionConfig, s3_operations: S3Operation):
14 |         self.data_ingestion_config = data_ingestion_config
15 |         self.s3_operations = s3_operations
16 | 
17 |     
18 |     def get_data_from_s3(self) -> None:
19 |         try:
20 |             logging.info("Entered the get_data_from_s3 method of Data ingestion class")
21 |             os.makedirs(self.data_ingestion_config.DATA_INGESTION_ARTIFACTS_DIR, exist_ok=True)
22 | 
23 |             self.s3_operations.read_data_from_s3(self.data_ingestion_config.ZIP_FILE_NAME,
24 |                                                  self.data_ingestion_config.BUCKET_NAME,
25 |                                                  self.data_ingestion_config.ZIP_FILE_PATH)
26 |             logging.info("Exited the get_data_from_s3 method of Data ingestion class")
27 |         except Exception as e:
28 |             raise HelmetException(e, sys) from e
29 | 
30 |     
31 |     def unzip_and_clean(self):
32 |         logging.info("Entered the unzip_and_clean method of Data ingestion class")
33 |         try:
34 |             with ZipFile(self.data_ingestion_config.ZIP_FILE_PATH, 'r') as zip_ref:
35 |                 zip_ref.extractall(self.data_ingestion_config.ZIP_FILE_DIR)
36 |             logging.info("Exited the unzip_and_clean method of Data ingestion class")
37 | 
38 |             return self.data_ingestion_config.TRAIN_DATA_ARTIFACT_DIR, self.data_ingestion_config.TEST_DATA_ARTIFACT_DIR, self.data_ingestion_config.VALID_DATA_ARTIFACT_DIR
39 |         except Exception as e:
40 |             raise HelmetException(e, sys) from e
41 | 
42 | 
43 |     
44 |     def initiate_data_ingestion(self) -> DataIngestionArtifacts: 
45 |         logging.info("Entered the initiate_data_ingestion method of Data ingestion class")
46 |         try:
47 |             self.get_data_from_s3()
48 | 
49 |             logging.info("Fetched the data from S3 bucket")
50 | 
51 |             train_file_path, test_file_path, valid_file_path= self.unzip_and_clean()
52 | 
53 |             logging.info("Unzipped file and splited into train, test and valid")
54 | 
55 |             data_ingestion_artifact = DataIngestionArtifacts(train_file_path=train_file_path, 
56 |                                                                 test_file_path=test_file_path,
57 |                                                                 valid_file_path=valid_file_path)
58 | 
59 |             logging.info("Exited the initiate_data_ingestion method of Data ingestion class")
60 | 
61 |             logging.info(f"Data ingestion artifact: {data_ingestion_artifact}")
62 | 
63 |             return data_ingestion_artifact
64 | 
65 |         except Exception as e:
66 |             raise HelmetException(e, sys) from e


--------------------------------------------------------------------------------
/helmet/components/data_transformation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from pycocotools.coco import COCO
  4 | import albumentations as A
  5 | from albumentations.pytorch import ToTensorV2
  6 | from helmet.logger import logging
  7 | from helmet.exception import HelmetException
  8 | from helmet.ml.feature.helmet_detection import HelmetDetection
  9 | from helmet.constants import *
 10 | from helmet.utils.main_utils import save_object
 11 | from helmet.entity.config_entity import DataTransformationConfig
 12 | from helmet.entity.artifacts_entity import DataIngestionArtifacts, DataTransformationArtifacts
 13 | 
 14 | 
 15 | class DataTransformation:
 16 |     def __init__(self, data_transformation_config: DataTransformationConfig, data_ingestion_artifact: DataIngestionArtifacts):
 17 |         self.data_transformation_config = data_transformation_config
 18 |         self.data_ingestion_artifact = data_ingestion_artifact
 19 |     
 20 | 
 21 | 
 22 |     def number_of_classes(self):
 23 | 
 24 |         try:
 25 | 
 26 |             coco = COCO(os.path.join(self.data_ingestion_artifact.train_file_path, ANNOTATIONS_COCO_JSON_FILE))
 27 |             categories = coco.cats
 28 |             classes = [i[1]['name'] for i in categories.items()]
 29 |             n_classes = len(classes)
 30 | 
 31 |             return n_classes
 32 |         except Exception as e:
 33 |             raise HelmetException(e, sys) from e
 34 | 
 35 | 
 36 |     
 37 |     def get_transforms(self, train=False):
 38 |         try: 
 39 |             if train:
 40 |                 transform = A.Compose([
 41 |                     A.Resize(INPUT_SIZE, INPUT_SIZE),
 42 |                     A.HorizontalFlip(p=HORIZONTAL_FLIP),
 43 |                     A.VerticalFlip(p=VERTICAL_FLIP),
 44 |                     A.RandomBrightnessContrast(p=RANDOM_BRIGHTNESS_CONTRAST),
 45 |                     A.ColorJitter(p=COLOR_JITTER),
 46 |                     ToTensorV2()
 47 |                 ], bbox_params=A.BboxParams(format=BBOX_FORMAT))
 48 |             else:
 49 |                 transform = A.Compose([
 50 |                     A.Resize(INPUT_SIZE, INPUT_SIZE), 
 51 |                     ToTensorV2()
 52 |                 ], bbox_params=A.BboxParams(format=BBOX_FORMAT))
 53 |             return transform
 54 |         except Exception as e:
 55 |             raise HelmetException(e, sys) from e
 56 | 
 57 | 
 58 |     
 59 |     def initiate_data_transformation(self) -> DataTransformationArtifacts:
 60 | 
 61 |         try:
 62 |             logging.info("Entered the initiate_data_transformation method of Data transformation class")
 63 | 
 64 |             n_classes = self.number_of_classes()
 65 |             print(n_classes)
 66 | 
 67 |             logging.info(f"Total number of classes: {n_classes}")
 68 | 
 69 |             train_dataset = HelmetDetection(root=self.data_transformation_config.ROOT_DIR,
 70 |                                             split=self.data_transformation_config.TRAIN_SPLIT,
 71 |                                             transforms=self.get_transforms(True))
 72 | 
 73 |             logging.info(f"Training dataset prepared")
 74 | 
 75 |             test_dataset = HelmetDetection(root=self.data_transformation_config.ROOT_DIR,
 76 |                                            split=self.data_transformation_config.TEST_SPLIT,
 77 |                                            transforms=self.get_transforms(False))
 78 | 
 79 |             logging.info(f"Testing dataset prepared")
 80 | 
 81 |             save_object(self.data_transformation_config.TRAIN_TRANSFORM_OBJECT_FILE_PATH, train_dataset)
 82 |             save_object(self.data_transformation_config.TEST_TRANSFORM_OBJECT_FILE_PATH, test_dataset)
 83 | 
 84 |             logging.info("Saved the train transformed object")
 85 | 
 86 |             data_transformation_artifact = DataTransformationArtifacts(
 87 |                 transformed_train_object=self.data_transformation_config.TRAIN_TRANSFORM_OBJECT_FILE_PATH,
 88 |                 transformed_test_object=self.data_transformation_config.TEST_TRANSFORM_OBJECT_FILE_PATH,
 89 |                 number_of_classes=n_classes)
 90 | 
 91 |             logging.info("Exited the initiate_data_transformation method of Data transformation class")
 92 | 
 93 |             return data_transformation_artifact
 94 | 
 95 |         except Exception as e:
 96 |             raise HelmetException(e, sys) from e
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/helmet/components/model_evaluation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import sys
  4 | import torch
  5 | import math
  6 | import pandas as pd
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | from torch.utils.data import DataLoader
 10 | from helmet.constants import *
 11 | from helmet.logger import logging
 12 | from helmet.exception import HelmetException
 13 | from helmet.utils.main_utils import load_object
 14 | from helmet.entity.config_entity import ModelEvaluationConfig
 15 | from helmet.configuration.s3_operations import S3Operation
 16 | from helmet.entity.artifacts_entity import ModelTrainerArtifacts, DataTransformationArtifacts, ModelEvaluationArtifacts
 17 | 
 18 | 
 19 | class ModelEvaluation:
 20 | 
 21 |     def __init__(self, model_evaluation_config:ModelEvaluationConfig,
 22 |                 data_transformation_artifacts:DataTransformationArtifacts,
 23 |                 model_trainer_artifacts:ModelTrainerArtifacts):
 24 | 
 25 |         self.model_evaluation_config = model_evaluation_config
 26 |         self.data_transformation_artifacts = data_transformation_artifacts
 27 |         self.model_trainer_artifacts = model_trainer_artifacts
 28 |         self.s3 = S3Operation()
 29 |         self.bucket_name = BUCKET_NAME
 30 | 
 31 | 
 32 |     
 33 |     @staticmethod
 34 |     def collate_fn(batch):
 35 |         """
 36 |         This is our collating function for the train dataloader,
 37 |         it allows us to create batches of data that can be easily pass into the model
 38 |         """
 39 |         try:
 40 |             return tuple(zip(*batch))
 41 |         except Exception as e:
 42 |             raise HelmetException(e, sys) from e
 43 | 
 44 |     
 45 |     def get_model_from_s3(self) -> str:
 46 |         """
 47 |         Method Name :   predict
 48 |         Description :   This method predicts the image.
 49 | 
 50 |         Output      :   Predictions
 51 |         """
 52 |         logging.info("Entered the get_model_from_s3 method of PredictionPipeline class")
 53 |         try:
 54 |             # Loading the best model from s3 bucket
 55 |             predict_model_path = self.model_evaluation_config.BEST_MODEL_PATH
 56 |             best_model_path = self.s3.read_data_from_s3(TRAINED_MODEL_NAME, self.bucket_name, predict_model_path)
 57 |             logging.info("Exited the get_model_from_s3 method of PredictionPipeline class")
 58 |             return best_model_path
 59 | 
 60 |         except Exception as e:
 61 |             raise HelmetException(e, sys) from e
 62 | 
 63 | 
 64 |     
 65 |     def evaluate(self, model, dataloader, device):
 66 |         try:
 67 |             model.to(device)
 68 |             all_losses = []
 69 |             all_losses_dict = []
 70 | 
 71 |             for images, targets in tqdm(dataloader):
 72 |                 images = list(image.to(device) for image in images)
 73 |                 targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets]
 74 | 
 75 |                 loss_dict = model(images, targets)  # the model computes the loss automatically if we pass in targets
 76 |                 losses = sum(loss for loss in loss_dict.values())
 77 |                 loss_dict_append = {k: v.item() for k, v in loss_dict.items()}
 78 |                 loss_value = losses.item()
 79 | 
 80 |                 all_losses.append(loss_value)
 81 |                 all_losses_dict.append(loss_dict_append)
 82 | 
 83 |                 if not math.isfinite(loss_value):
 84 |                     print(f"Loss is {loss_value}, stopping training")  # train if loss becomes infinity
 85 |                     print(loss_dict)
 86 |                     sys.exit(1)
 87 | 
 88 |                 losses.backward()
 89 | 
 90 |             all_losses_dict = pd.DataFrame(all_losses_dict)  # for printing
 91 | 
 92 |             print("loss: {:.6f},loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format(
 93 |                     np.mean(all_losses),
 94 |                     all_losses_dict['loss_classifier'].mean(),
 95 |                     all_losses_dict['loss_box_reg'].mean(),
 96 |                     all_losses_dict['loss_rpn_box_reg'].mean(),
 97 |                     all_losses_dict['loss_objectness'].mean()
 98 |                 ))
 99 |             return all_losses_dict, np.mean(all_losses)
100 | 
101 |         except Exception as e:
102 |             raise HelmetException(e, sys) from e
103 | 
104 | 
105 |     
106 |     def initiate_model_evaluation(self) -> ModelEvaluationArtifacts:
107 |         """
108 |                 Method Name :   initiate_model_evaluation
109 |                 Description :   This function is used to initiate all steps of the model evaluation
110 | 
111 |                 Output      :   Returns model evaluation artifact
112 |                 On Failure  :   Write an exception log and then raise an exception
113 |         """
114 | 
115 |         try:
116 |             trained_model = torch.load(self.model_trainer_artifacts.trained_model_path)
117 | 
118 |             test_dataset = load_object(self.data_transformation_artifacts.transformed_test_object)
119 | 
120 |             test_loader = DataLoader(test_dataset,
121 |                                       batch_size=self.model_evaluation_config.BATCH,
122 |                                       shuffle=self.model_evaluation_config.SHUFFLE,
123 |                                       num_workers=self.model_evaluation_config.NUM_WORKERS,
124 |                                       collate_fn=self.collate_fn
125 |                                       )
126 | 
127 |             logging.info("loaded saved model")
128 | 
129 |             trained_model = trained_model.to(DEVICE)
130 | 
131 |             all_losses_dict, all_losses = self.evaluate(trained_model, test_loader, device=DEVICE)
132 |             os.makedirs(self.model_evaluation_config.EVALUATED_MODEL_DIR, exist_ok=True)
133 |             all_losses_dict.to_csv(self.model_evaluation_config.EVALUATED_LOSS_CSV_PATH, index=False)
134 | 
135 |             s3_model = self.get_model_from_s3()
136 |             s3_model = torch.load(s3_model, map_location=torch.device(DEVICE))
137 | 
138 |             s3_all_losses_dict, s3_all_losses = self.evaluate(s3_model,test_loader, device=DEVICE)
139 | 
140 |             if s3_all_losses > all_losses:
141 |                 # 0.03 > 0.02
142 |                 is_model_accepted = True
143 | 
144 |                 model_evaluation_artifact = ModelEvaluationArtifacts(
145 |                     is_model_accepted=is_model_accepted,
146 |                     all_losses=all_losses)
147 | 
148 |             else:
149 |                 is_model_accepted = False
150 | 
151 |                 model_evaluation_artifact = ModelEvaluationArtifacts(
152 |                     is_model_accepted=is_model_accepted,
153 |                     all_losses=s3_all_losses)
154 | 
155 |             logging.info("Exited the initiate_model_evaluation method of Model Evaluation class")
156 |             return model_evaluation_artifact
157 | 
158 |         except Exception as e:
159 |             raise HelmetException(e, sys) from e
160 | 


--------------------------------------------------------------------------------
/helmet/components/model_pusher.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from helmet.exception import HelmetException
 3 | from helmet.logger import logging
 4 | from helmet.entity.config_entity import ModelPusherConfig
 5 | from helmet.entity.artifacts_entity import ModelPusherArtifacts
 6 | from helmet.configuration.s3_operations import S3Operation
 7 | 
 8 | 
 9 | class ModelPusher:
10 | 
11 |     def __init__(self, model_pusher_config: ModelPusherConfig, s3: S3Operation):
12 | 
13 |         self.model_pusher_config = model_pusher_config
14 |         self.s3 = s3
15 | 
16 |     
17 |     def initiate_model_pusher(self) -> ModelPusherArtifacts:
18 |         """
19 |             Method Name :   initiate_model_pusher
20 |             Description :   This method initiates model pusher.
21 | 
22 |             Output      :    Model pusher artifact
23 |         """
24 |         logging.info("Entered initiate_model_pusher method of ModelTrainer class")
25 |         try:
26 |             # Uploading the model to s3 bucket
27 |             self.s3.upload_file(
28 |                 self.model_pusher_config.BEST_MODEL_PATH,
29 |                 self.model_pusher_config.S3_MODEL_KEY_PATH,
30 |                 self.model_pusher_config.BUCKET_NAME,
31 |                 remove=False,
32 |             )
33 |             logging.info("Uploaded best model to s3 bucket")
34 | 
35 |             # Saving the model pusher artifacts
36 |             model_pusher_artifact = ModelPusherArtifacts(
37 |                 bucket_name=self.model_pusher_config.BUCKET_NAME,
38 |                 s3_model_path=self.model_pusher_config.S3_MODEL_KEY_PATH,
39 |             )
40 |             logging.info("Exited the initiate_model_pusher method of ModelTrainer class")
41 |             return model_pusher_artifact
42 | 
43 |         except Exception as e:
44 |             raise HelmetException(e, sys) from e


--------------------------------------------------------------------------------
/helmet/components/model_trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import math
  4 | import numpy as np
  5 | import pandas as pd
  6 | from tqdm import tqdm
  7 | import torch
  8 | from torchvision import models
  9 | from torch.utils.data import DataLoader
 10 | from helmet.logger import logging
 11 | from helmet.exception import HelmetException
 12 | from helmet.utils.main_utils import load_object
 13 | from helmet.ml.models.model_optimiser import model_optimiser
 14 | from helmet.entity.config_entity import ModelTrainerConfig
 15 | from helmet.entity.artifacts_entity import DataTransformationArtifacts, ModelTrainerArtifacts
 16 | from helmet.ml.detection.engine import train_one_epoch, evaluate
 17 | 
 18 | 
 19 | class ModelTrainer:
 20 |     def __init__(self, data_transformation_artifacts: DataTransformationArtifacts,
 21 |                     model_trainer_config: ModelTrainerConfig):
 22 |         """
 23 |         :param data_transformation_artifacts: Output reference of data transformation artifact stage
 24 |         :param model_trainer_config: Configuration for model trainer
 25 |         """
 26 | 
 27 |         self.data_transformation_artifacts = data_transformation_artifacts
 28 |         self.model_trainer_config = model_trainer_config
 29 | 
 30 |     
 31 |     def train(self, model, optimizer, loader, device, epoch):
 32 |         try:
 33 |             model.to(device)
 34 |             model.train() 
 35 |             all_losses = []
 36 |             all_losses_dict = []
 37 |             
 38 |             for images, targets in tqdm(loader):
 39 |                 images = list(image.to(device) for image in images)
 40 |                 targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets]
 41 |                 
 42 |                 loss_dict = model(images, targets) # the model computes the loss automatically if we pass in targets
 43 |                 losses = sum(loss for loss in loss_dict.values())
 44 |                 loss_dict_append = {k: v.item() for k, v in loss_dict.items()}
 45 |                 loss_value = losses.item()
 46 |                 
 47 |                 all_losses.append(loss_value)
 48 |                 all_losses_dict.append(loss_dict_append)
 49 |                 
 50 |                 if not math.isfinite(loss_value):
 51 |                     print(f"Loss is {loss_value}, stopping training")  # train if loss becomes infinity
 52 |                     print(loss_dict)
 53 |                     sys.exit(1)
 54 |                 
 55 |                 optimizer.zero_grad()
 56 |                 losses.backward()
 57 |                 optimizer.step()
 58 |             all_losses_dict = pd.DataFrame(all_losses_dict)  # for printing
 59 | 
 60 |             print("Epoch {}, lr: {:.6f}, loss: {:.6f}, loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format(
 61 |                 epoch, optimizer.param_groups[0]['lr'], np.mean(all_losses),
 62 |                 all_losses_dict['loss_classifier'].mean(),
 63 |                 all_losses_dict['loss_box_reg'].mean(),
 64 |                 all_losses_dict['loss_rpn_box_reg'].mean(),
 65 |                 all_losses_dict['loss_objectness'].mean()
 66 |             ))
 67 | 
 68 |         except Exception as e:
 69 |             raise HelmetException(e, sys) from e
 70 | 
 71 |     
 72 |     @staticmethod
 73 |     def collate_fn(batch):
 74 |         """
 75 |         This is our collating function for the train dataloader, 
 76 |         it allows us to create batches of data that can be easily pass into the model
 77 |         """
 78 |         try:
 79 |             return tuple(zip(*batch))
 80 |         except Exception as e:
 81 |             raise HelmetException(e, sys) from e
 82 | 
 83 | 
 84 |     
 85 |     def initiate_model_trainer(self,) -> ModelTrainerArtifacts:
 86 |         logging.info("Entered initiate_model_trainer method of ModelTrainer class")
 87 | 
 88 |         """
 89 |         Method Name :   initiate_model_trainer
 90 |         Description :   This function initiates a model trainer steps
 91 |         
 92 |         Output      :   Returns model trainer artifact
 93 |         On Failure  :   Write an exception log and then raise an exception
 94 |         """
 95 | 
 96 |         try:
 97 |             train_dataset = load_object(self.data_transformation_artifacts.transformed_train_object)
 98 | 
 99 |             train_loader = DataLoader(train_dataset,
100 |                                      batch_size=self.model_trainer_config.BATCH_SIZE,
101 |                                      shuffle=self.model_trainer_config.SHUFFLE,
102 |                                      num_workers=self.model_trainer_config.NUM_WORKERS,
103 |                                      collate_fn=self.collate_fn
104 |                                      )
105 | 
106 |             test_dataset = load_object(self.data_transformation_artifacts.transformed_test_object)
107 | 
108 |             test_loader = DataLoader(test_dataset,
109 |                                       batch_size=1,
110 |                                       shuffle=self.model_trainer_config.SHUFFLE,
111 |                                       num_workers=self.model_trainer_config.NUM_WORKERS,
112 |                                       collate_fn=self.collate_fn
113 |                                       )
114 | 
115 |             logging.info("Loaded training data loader object")
116 | 
117 |             model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
118 | 
119 |             logging.info("Loaded faster Rcnn  model")
120 | 
121 |             in_features = model.roi_heads.box_predictor.cls_score.in_features  # we need to change the head
122 | 
123 |             model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, self.data_transformation_artifacts.number_of_classes)
124 | 
125 |             optimiser = model_optimiser(model)
126 | 
127 |             logging.info("loaded optimiser")
128 | 
129 |             for epoch in range(self.model_trainer_config.EPOCH):
130 |                 # self.train(model, optimiser, train_loader, self.model_trainer_config.DEVICE, epoch)
131 | 
132 |                 self.train(model, optimiser, train_loader, self.model_trainer_config.DEVICE, epoch)
133 | 
134 |             os.makedirs(self.model_trainer_config.TRAINED_MODEL_DIR, exist_ok=True)
135 |             torch.save(model, self.model_trainer_config.TRAINED_MODEL_PATH)
136 | 
137 |             logging.info(f"Saved the trained model")
138 | 
139 |             model_trainer_artifacts = ModelTrainerArtifacts(
140 |                 trained_model_path=self.model_trainer_config.TRAINED_MODEL_PATH
141 |             )
142 |             logging.info(f"Model trainer artifact: {model_trainer_artifacts}")
143 | 
144 |             return model_trainer_artifacts
145 | 
146 |         except Exception as e:
147 |             raise HelmetException(e, sys) from e
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/helmet/configuration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/configuration/__init__.py


--------------------------------------------------------------------------------
/helmet/configuration/s3_operations.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import sys
  4 | from io import StringIO
  5 | from typing import List, Union
  6 | from botocore.exceptions import ClientError
  7 | import boto3
  8 | from helmet.exception import HelmetException
  9 | from helmet.logger import logging
 10 | from mypy_boto3_s3.service_resource import Bucket
 11 | from helmet.constants import *
 12 | 
 13 | MODEL_SAVE_FORMAT = ".pt"
 14 | 
 15 | 
 16 | class S3Operation:
 17 |     s3_client=None
 18 |     s3_resource = None
 19 |     def __init__(self):
 20 |         # self.s3_client = boto3.client("s3")
 21 | 
 22 |         # self.s3_resource = boto3.resource("s3")
 23 |         if S3Operation.s3_resource==None or S3Operation.s3_client==None:
 24 |             __access_key_id = os.getenv(AWS_ACCESS_KEY_ID_ENV_KEY, )
 25 |             __secret_access_key = os.getenv(AWS_SECRET_ACCESS_KEY_ENV_KEY, )
 26 |             if __access_key_id is None:
 27 |                 raise Exception(f"Environment variable: {AWS_ACCESS_KEY_ID_ENV_KEY} is not set.")
 28 |             if __secret_access_key is None:
 29 |                 raise Exception(f"Environment variable: {AWS_SECRET_ACCESS_KEY_ENV_KEY} is not set.")
 30 |         
 31 |             S3Operation.s3_resource = boto3.resource('s3',
 32 |                                             aws_access_key_id=__access_key_id,
 33 |                                             aws_secret_access_key=__secret_access_key,
 34 |                                             region_name=REGION_NAME
 35 |                                             )
 36 |             S3Operation.s3_client = boto3.client('s3',
 37 |                                         aws_access_key_id=__access_key_id,
 38 |                                         aws_secret_access_key=__secret_access_key,
 39 |                                         region_name=REGION_NAME
 40 |                                         )
 41 |         self.s3_resource = S3Operation.s3_resource
 42 |         self.s3_client = S3Operation.s3_client
 43 |     @staticmethod
 44 |     def read_object(
 45 |         object_name: str, decode: bool = True, make_readable: bool = False
 46 |     ) -> Union[StringIO, str]:
 47 |         """
 48 |         Method Name :   read_object
 49 |         Description :   This method reads the object_name object with kwargs
 50 | 
 51 |         Output      :   The column name is renamed
 52 |         On Failure  :   Write an exception log and then raise an exception
 53 | 
 54 |         Version     :   1.2
 55 |         Revisions   :   moved setup to cloud
 56 |         """
 57 |         logging.info("Entered the read_object method of S3Operations class")
 58 | 
 59 |         try:
 60 |             func = (
 61 |                 lambda: object_name.get()["Body"].read().decode()
 62 |                 if decode is True
 63 |                 else object_name.get()["Body"].read()
 64 |             )
 65 |             conv_func = lambda: StringIO(func()) if make_readable is True else func()
 66 | 
 67 |             logging.info("Exited the read_object method of S3Operations class")
 68 | 
 69 |             return conv_func()
 70 | 
 71 |         except Exception as e:
 72 |             raise HelmetException(e, sys) from e
 73 | 
 74 |     def get_bucket(self, bucket_name: str) -> Bucket:
 75 |         """
 76 |         Method Name :   get_bucket
 77 |         Description :   This method gets the bucket object based on the bucket_name
 78 | 
 79 |         Output      :   Bucket object is returned based on the bucket name
 80 |         On Failure  :   Write an exception log and then raise an exception
 81 | 
 82 |         Version     :   1.2
 83 |         Revisions   :   moved setup to cloud
 84 |         """
 85 |         logging.info("Entered the get_bucket method of S3Operations class")
 86 | 
 87 |         try:
 88 |             bucket = self.s3_resource.Bucket(bucket_name)
 89 | 
 90 |             logging.info("Exited the get_bucket method of S3Operations class")
 91 | 
 92 |             return bucket
 93 | 
 94 |         except Exception as e:
 95 |             raise HelmetException(e, sys) from e
 96 | 
 97 |     def get_file_object(
 98 |         self, filename: str, bucket_name: str
 99 |     ) -> Union[List[object], object]:
100 |         """
101 |         Method Name :   get_file_object
102 |         Description :   This method gets the file object from bucket_name bucket based on filename
103 | 
104 |         Output      :   list of objects or object is returned based on filename
105 |         On Failure  :   Write an exception log and then raise an exception
106 | 
107 |         Version     :   1.2
108 |         Revisions   :   moved setup to cloud
109 |         """
110 |         logging.info("Entered the get_file_object method of S3Operations class")
111 | 
112 |         try:
113 |             bucket = self.get_bucket(bucket_name)
114 | 
115 |             lst_objs = [object for object in bucket.objects.filter(Prefix=filename)]
116 | 
117 |             func = lambda x: x[0] if len(x) == 1 else x
118 | 
119 |             file_objs = func(lst_objs)
120 | 
121 |             logging.info("Exited the get_file_object method of S3Operations class")
122 | 
123 |             return file_objs
124 | 
125 |         except Exception as e:
126 |             raise HelmetException(e, sys) from e
127 | 
128 |     def load_model(
129 |         self, model_name: str, bucket_name: str, model_dir: str = None
130 |     ) -> object:
131 |         """
132 |         Method Name :   load_model
133 |         Description :   This method loads the model_name model from bucket_name bucket with kwargs
134 | 
135 |         Output      :   list of objects or object is returned based on filename
136 |         On Failure  :   Write an exception log and then raise an exception
137 | 
138 |         Version     :   1.2
139 |         Revisions   :   moved setup to cloud
140 |         """
141 |         logging.info("Entered the load_model method of S3Operations class")
142 | 
143 |         try:
144 |             func = (
145 |                 lambda: model_name
146 |                 if model_dir is None
147 |                 else model_dir + "/" + model_name
148 |             )
149 | 
150 |             model_file = func()
151 | 
152 |             f_obj = self.get_file_object(model_file, bucket_name)
153 | 
154 |             model_obj = self.read_object(f_obj, decode=False)
155 | 
156 |             return model_obj
157 |             logging.info("Exited the load_model method of S3Operations class")
158 | 
159 |         except Exception as e:
160 |             raise HelmetException(e, sys) from e
161 | 
162 |     def create_folder(self, folder_name: str, bucket_name: str) -> None:
163 |         """
164 |         Method Name :   create_folder
165 |         Description :   This method creates a folder_name folder in bucket_name bucket
166 | 
167 |         Output      :   Folder is created in s3 bucket
168 |         On Failure  :   Write an exception log and then raise an exception
169 | 
170 |         Version     :   1.2
171 |         Revisions   :   moved setup to cloud
172 |         """
173 |         logging.info("Entered the create_folder method of S3Operations class")
174 | 
175 |         try:
176 |             self.s3_resource.Object(bucket_name, folder_name).load()
177 | 
178 |         except ClientError as e:
179 |             if e.response["Error"]["Code"] == "404":
180 |                 folder_obj = folder_name + "/"
181 | 
182 |                 self.s3_client.put_object(Bucket=bucket_name, Key=folder_obj)
183 | 
184 |             else:
185 |                 pass
186 | 
187 |             logging.info("Exited the create_folder method of S3Operations class")
188 | 
189 |     def upload_file(
190 |         self,
191 |         from_filename: str,
192 |         to_filename: str,
193 |         bucket_name: str,
194 |         remove: bool = True,
195 |     ):
196 |         """
197 |         Method Name :   upload_file
198 |         Description :   This method uploads the from_filename file to bucket_name bucket with to_filename as bucket filename
199 | 
200 |         Output      :   Folder is created in s3 bucket
201 |         On Failure  :   Write an exception log and then raise an exception
202 | 
203 |         Version     :   1.2
204 |         Revisions   :   moved setup to cloud
205 |         """
206 |         logging.info("Entered the upload_file method of S3Operations class")
207 | 
208 |         try:
209 |             logging.info(
210 |                 f"Uploading {from_filename} file to {to_filename} file in {bucket_name} bucket"
211 |             )
212 | 
213 |             self.s3_resource.meta.client.upload_file(
214 |                 from_filename, bucket_name, to_filename
215 |             )
216 | 
217 |             logging.info(
218 |                 f"Uploaded {from_filename} file to {to_filename} file in {bucket_name} bucket"
219 |             )
220 | 
221 |             if remove is True:
222 |                 os.remove(from_filename)
223 | 
224 |                 logging.info(f"Remove is set to {remove}, deleted the file")
225 | 
226 |             else:
227 |                 logging.info(f"Remove is set to {remove}, not deleted the file")
228 | 
229 |             logging.info("Exited the upload_file method of S3Operations class")
230 | 
231 |         except Exception as e:
232 |             raise HelmetException(e, sys) from e
233 | 
234 |     
235 |     def read_data_from_s3(self, filename: str, bucket_name: str, output_filename: str):
236 |         try:
237 |             bucket = self.get_bucket(bucket_name)
238 |             
239 |             obj = bucket.download_file(Key=filename, Filename=output_filename)
240 | 
241 |             return output_filename
242 |             
243 |         except Exception as e:
244 |             raise HelmetException(e, sys) from e
245 | 


--------------------------------------------------------------------------------
/helmet/constants/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from datetime import datetime
 4 | 
 5 | TIMESTAMP: str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
 6 | 
 7 | # Data Ingestion Constants
 8 | ARTIFACTS_DIR = os.path.join("artifacts", TIMESTAMP)
 9 | BUCKET_NAME = 'helmet-object-detection'
10 | ZIP_FILE_NAME = 'data.zip'
11 | ANNOTATIONS_COCO_JSON_FILE = '_annotations.coco.json'
12 | 
13 | INPUT_SIZE = 416
14 | HORIZONTAL_FLIP = 0.3
15 | VERTICAL_FLIP = 0.3
16 | RANDOM_BRIGHTNESS_CONTRAST = 0.1
17 | COLOR_JITTER = 0.1
18 | BBOX_FORMAT = 'coco'
19 | 
20 | RAW_FILE_NAME = 'helmet'
21 | 
22 | # Data ingestion constants 
23 | DATA_INGESTION_ARTIFACTS_DIR = 'DataIngestionArtifacts'
24 | DATA_INGESTION_TRAIN_DIR = 'train'
25 | DATA_INGESTION_TEST_DIR = 'test'
26 | DATA_INGESTION_VALID_DIR = 'valid'
27 | 
28 | # Data transformation constants 
29 | DATA_TRANSFORMATION_ARTIFACTS_DIR = 'DataTransformationArtifacts'
30 | DATA_TRANSFORMATION_TRAIN_DIR = 'Train'
31 | DATA_TRANSFORMATION_TEST_DIR = 'Test'
32 | DATA_TRANSFORMATION_TRAIN_FILE_NAME = "train.pkl"
33 | DATA_TRANSFORMATION_TEST_FILE_NAME = "test.pkl"
34 | DATA_TRANSFORMATION_TRAIN_SPLIT = 'train'
35 | DATA_TRANSFORMATION_TEST_SPLIT = 'test'
36 | 
37 | 
38 | # Model Training Constants 
39 | TRAINED_MODEL_DIR = 'TrainedModel'
40 | TRAINED_MODEL_NAME = 'model.pt'
41 | TRAINED_BATCH_SIZE = 2
42 | TRAINED_SHUFFLE = False
43 | TRAINED_NUM_WORKERS = 1
44 | EPOCH = 1
45 | 
46 | 
47 | # Model evaluation constants
48 | MODEL_EVALUATION_ARTIFACTS_DIR = 'ModelEvaluationArtifacts'
49 | MODEL_EVALUATION_FILE_NAME = 'loss.csv'
50 | 
51 | # Common constants
52 | use_cuda = torch.cuda.is_available()
53 | DEVICE = torch.device("cuda" if use_cuda else "cpu")
54 | 
55 | APP_HOST = "0.0.0.0"
56 | APP_PORT = 8080
57 | 
58 | # Prediction Constants
59 | PREDICTION_CLASSES = ['With Helmet', 'Without Helmet']
60 | 
61 | 
62 | 
63 | # AWS CONSTANTS
64 | AWS_ACCESS_KEY_ID_ENV_KEY = "AWS_ACCESS_KEY_ID"
65 | AWS_SECRET_ACCESS_KEY_ENV_KEY = "AWS_SECRET_ACCESS_KEY"
66 | REGION_NAME = "ap-south-1"


--------------------------------------------------------------------------------
/helmet/entity/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/entity/__init__.py


--------------------------------------------------------------------------------
/helmet/entity/artifacts_entity.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | # Data ingestion artifacts
 4 | @dataclass
 5 | class DataIngestionArtifacts:
 6 |     train_file_path: str 
 7 |     test_file_path: str
 8 |     valid_file_path: str
 9 | 
10 | 
11 | @dataclass
12 | class DataTransformationArtifacts:
13 |     transformed_train_object: str 
14 |     transformed_test_object: str
15 |     number_of_classes: int
16 | 
17 | 
18 | @dataclass
19 | class ModelTrainerArtifacts:
20 |     trained_model_path: str
21 | 
22 | 
23 | @dataclass
24 | class ModelEvaluationArtifacts:
25 |     is_model_accepted: bool
26 |     all_losses: str
27 | 
28 | 
29 | @dataclass
30 | class ModelPusherArtifacts:
31 |     bucket_name: str
32 |     s3_model_path: str
33 | 


--------------------------------------------------------------------------------
/helmet/entity/config_entity.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from from_root import from_root
 3 | from helmet.constants import *
 4 | from helmet.configuration.s3_operations import S3Operation
 5 | import os
 6 | 
 7 | 
 8 | @dataclass
 9 | class DataIngestionConfig:
10 |     def __init__(self):
11 |         self.S3_OPERATION = S3Operation(),
12 |         self.BUCKET_NAME: str = BUCKET_NAME
13 |         self.ZIP_FILE_NAME:str = ZIP_FILE_NAME
14 |         self.DATA_INGESTION_ARTIFACTS_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, DATA_INGESTION_ARTIFACTS_DIR)
15 |         self.TRAIN_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_TRAIN_DIR)
16 |         self.TEST_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_TEST_DIR)
17 |         self.VALID_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_VALID_DIR)
18 |         self.ZIP_FILE_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR)
19 |         self.ZIP_FILE_PATH = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, self.ZIP_FILE_NAME)
20 |         self.UNZIPPED_FILE_PATH = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, RAW_FILE_NAME)
21 | 
22 | 
23 | 
24 | @dataclass
25 | class DataTransformationConfig:
26 |     def __init__(self):
27 |         self.ROOT_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, DATA_INGESTION_ARTIFACTS_DIR)
28 |         self.DATA_TRANSFORMATION_ARTIFACTS_DIR: str = os.path.join(from_root(),ARTIFACTS_DIR,DATA_TRANSFORMATION_ARTIFACTS_DIR)
29 |         self.TRAIN_TRANSFORM_DATA_ARTIFACT_DIR = os.path.join(self.DATA_TRANSFORMATION_ARTIFACTS_DIR,DATA_TRANSFORMATION_TRAIN_DIR)
30 |         self.TEST_TRANSFORM_DATA_ARTIFACT_DIR = os.path.join(self.DATA_TRANSFORMATION_ARTIFACTS_DIR,DATA_TRANSFORMATION_TEST_DIR)
31 |         self.TRAIN_TRANSFORM_OBJECT_FILE_PATH = os.path.join(self.TRAIN_TRANSFORM_DATA_ARTIFACT_DIR,
32 |                                                                 DATA_TRANSFORMATION_TRAIN_FILE_NAME)
33 |         self.TEST_TRANSFORM_OBJECT_FILE_PATH = os.path.join(self.TEST_TRANSFORM_DATA_ARTIFACT_DIR,
34 |                                                                 DATA_TRANSFORMATION_TEST_FILE_NAME)
35 |         
36 |         self.TRAIN_SPLIT = DATA_TRANSFORMATION_TRAIN_SPLIT
37 |         self.TEST_SPLIT = DATA_TRANSFORMATION_TEST_SPLIT
38 | 
39 | 
40 | 
41 | @dataclass
42 | class ModelTrainerConfig:
43 |      def __init__(self):
44 |         self.TRAINED_MODEL_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, TRAINED_MODEL_DIR)
45 |         self.TRAINED_MODEL_PATH = os.path.join(self.TRAINED_MODEL_DIR, TRAINED_MODEL_NAME)
46 |         self.BATCH_SIZE: int = TRAINED_BATCH_SIZE
47 |         self.SHUFFLE: bool = TRAINED_SHUFFLE
48 |         self.NUM_WORKERS = TRAINED_NUM_WORKERS
49 |         self.EPOCH: int = EPOCH
50 |         self.DEVICE = DEVICE 
51 | 
52 | 
53 | @dataclass
54 | class ModelEvaluationConfig:
55 |     def __init__(self):
56 |         self.EVALUATED_MODEL_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, MODEL_EVALUATION_ARTIFACTS_DIR)
57 |         self.EVALUATED_LOSS_CSV_PATH = os.path.join(self.EVALUATED_MODEL_DIR, MODEL_EVALUATION_FILE_NAME)
58 |         self.BEST_MODEL_PATH = os.path.join(self.EVALUATED_MODEL_DIR, TRAINED_MODEL_NAME )
59 |         self.DEVICE = DEVICE
60 |         self.BATCH: int = 1
61 |         self.SHUFFLE: bool = TRAINED_SHUFFLE
62 |         self.NUM_WORKERS = TRAINED_NUM_WORKERS
63 | 
64 | 
65 | 
66 | @dataclass
67 | class ModelPusherConfig:
68 |     def __init__(self):
69 |         self.TRAINED_MODEL_DIR: str = os.path.join(from_root(),ARTIFACTS_DIR,TRAINED_MODEL_DIR)
70 |         self.BEST_MODEL_PATH: str = os.path.join(self.TRAINED_MODEL_DIR,TRAINED_MODEL_NAME)
71 |         self.BUCKET_NAME: str = BUCKET_NAME
72 |         self.S3_MODEL_KEY_PATH: str = os.path.join(TRAINED_MODEL_NAME)


--------------------------------------------------------------------------------
/helmet/exception/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | 
 5 | def error_message_detail(error, error_detail:sys):
 6 |     _, _, exc_tb = error_detail.exc_info()
 7 |     file_name = exc_tb.tb_frame.f_code.co_filename
 8 |     error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
 9 |         file_name, exc_tb.tb_lineno, str(error)
10 |     )
11 | 
12 |     return error_message
13 | 
14 | 
15 | class HelmetException(Exception):
16 |     def __init__(self, error_message, error_detail):
17 |         """
18 |         :param error_message: error message in string format
19 |         """
20 |         super().__init__(error_message)
21 |         self.error_message = error_message_detail(
22 |             error_message, error_detail=error_detail
23 |         )
24 | 
25 |     def __str__(self):
26 |         return self.error_message


--------------------------------------------------------------------------------
/helmet/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from from_root import from_root
 5 | from datetime import datetime
 6 | 
 7 | LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
 8 | logs_path = os.path.join(from_root(), "logs", LOG_FILE)
 9 | 
10 | os.makedirs(logs_path, exist_ok=True)
11 | 
12 | LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
13 | 
14 | logging.basicConfig(
15 |     filename=LOG_FILE_PATH,
16 |     format="[ %(asctime)s ] %(name)s - %(levelname)s - %(message)s",
17 |     level=logging.DEBUG,
18 | )


--------------------------------------------------------------------------------
/helmet/ml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/ml/__init__.py


--------------------------------------------------------------------------------
/helmet/ml/detection/README.md:
--------------------------------------------------------------------------------
 1 | # Object detection reference training scripts
 2 | 
 3 | This folder contains reference training scripts for object detection.
 4 | They serve as a log of how to train specific models, to provide baseline
 5 | training and evaluation scripts to quickly bootstrap research.
 6 | 
 7 | To execute the example commands below you must install the following:
 8 | 
 9 | ```
10 | cython
11 | pycocotools
12 | matplotlib
13 | ```
14 | 
15 | You must modify the following flags:
16 | 
17 | `--data-path=/path/to/coco/dataset`
18 | 
19 | `--nproc_per_node=<number_of_gpus_available>`
20 | 
21 | Except otherwise noted, all models have been trained on 8x V100 GPUs. 
22 | 
23 | ### Faster R-CNN ResNet-50 FPN
24 | ```
25 | torchrun --nproc_per_node=8 train.py\
26 |     --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
27 |     --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1
28 | ```
29 | 
30 | ### Faster R-CNN MobileNetV3-Large FPN
31 | ```
32 | torchrun --nproc_per_node=8 train.py\
33 |     --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\
34 |     --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1
35 | ```
36 | 
37 | ### Faster R-CNN MobileNetV3-Large 320 FPN
38 | ```
39 | torchrun --nproc_per_node=8 train.py\
40 |     --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\
41 |     --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1
42 | ```
43 | 
44 | ### FCOS ResNet-50 FPN
45 | ```
46 | torchrun --nproc_per_node=8 train.py\
47 |     --dataset coco --model fcos_resnet50_fpn --epochs 26\
48 |     --lr-steps 16 22 --aspect-ratio-group-factor 3  --lr 0.01 --amp --weights-backbone ResNet50_Weights.IMAGENET1K_V1
49 | ```
50 | 
51 | ### RetinaNet
52 | ```
53 | torchrun --nproc_per_node=8 train.py\
54 |     --dataset coco --model retinanet_resnet50_fpn --epochs 26\
55 |     --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --weights-backbone ResNet50_Weights.IMAGENET1K_V1
56 | ```
57 | 
58 | ### SSD300 VGG16
59 | ```
60 | torchrun --nproc_per_node=8 train.py\
61 |     --dataset coco --model ssd300_vgg16 --epochs 120\
62 |     --lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\
63 |     --weight-decay 0.0005 --data-augmentation ssd --weights-backbone VGG16_Weights.IMAGENET1K_FEATURES
64 | ```
65 | 
66 | ### SSDlite320 MobileNetV3-Large
67 | ```
68 | torchrun --nproc_per_node=8 train.py\
69 |     --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
70 |     --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
71 |     --weight-decay 0.00004 --data-augmentation ssdlite
72 | ```
73 | 
74 | 
75 | ### Mask R-CNN
76 | ```
77 | torchrun --nproc_per_node=8 train.py\
78 |     --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
79 |     --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1
80 | ```
81 | 
82 | 
83 | ### Keypoint R-CNN
84 | ```
85 | torchrun --nproc_per_node=8 train.py\
86 |     --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
87 |     --lr-steps 36 43 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1
88 | ```
89 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/coco_eval.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import io
  3 | from contextlib import redirect_stdout
  4 | 
  5 | import numpy as np
  6 | import pycocotools.mask as mask_util
  7 | import torch
  8 | import utils
  9 | from pycocotools.coco import COCO
 10 | from pycocotools.cocoeval import COCOeval
 11 | 
 12 | 
 13 | class CocoEvaluator:
 14 |     def __init__(self, coco_gt, iou_types):
 15 |         if not isinstance(iou_types, (list, tuple)):
 16 |             raise TypeError(f"This constructor expects iou_types of type list or tuple, instead  got {type(iou_types)}")
 17 |         coco_gt = copy.deepcopy(coco_gt)
 18 |         self.coco_gt = coco_gt
 19 | 
 20 |         self.iou_types = iou_types
 21 |         self.coco_eval = {}
 22 |         for iou_type in iou_types:
 23 |             self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
 24 | 
 25 |         self.img_ids = []
 26 |         self.eval_imgs = {k: [] for k in iou_types}
 27 | 
 28 |     def update(self, predictions):
 29 |         img_ids = list(np.unique(list(predictions.keys())))
 30 |         self.img_ids.extend(img_ids)
 31 | 
 32 |         for iou_type in self.iou_types:
 33 |             results = self.prepare(predictions, iou_type)
 34 |             with redirect_stdout(io.StringIO()):
 35 |                 coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
 36 |             coco_eval = self.coco_eval[iou_type]
 37 | 
 38 |             coco_eval.cocoDt = coco_dt
 39 |             coco_eval.params.imgIds = list(img_ids)
 40 |             img_ids, eval_imgs = evaluate(coco_eval)
 41 | 
 42 |             self.eval_imgs[iou_type].append(eval_imgs)
 43 | 
 44 |     def synchronize_between_processes(self):
 45 |         for iou_type in self.iou_types:
 46 |             self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 47 |             create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
 48 | 
 49 |     def accumulate(self):
 50 |         for coco_eval in self.coco_eval.values():
 51 |             coco_eval.accumulate()
 52 | 
 53 |     def summarize(self):
 54 |         for iou_type, coco_eval in self.coco_eval.items():
 55 |             print(f"IoU metric: {iou_type}")
 56 |             coco_eval.summarize()
 57 | 
 58 |     def prepare(self, predictions, iou_type):
 59 |         if iou_type == "bbox":
 60 |             return self.prepare_for_coco_detection(predictions)
 61 |         if iou_type == "segm":
 62 |             return self.prepare_for_coco_segmentation(predictions)
 63 |         if iou_type == "keypoints":
 64 |             return self.prepare_for_coco_keypoint(predictions)
 65 |         raise ValueError(f"Unknown iou type {iou_type}")
 66 | 
 67 |     def prepare_for_coco_detection(self, predictions):
 68 |         coco_results = []
 69 |         for original_id, prediction in predictions.items():
 70 |             if len(prediction) == 0:
 71 |                 continue
 72 | 
 73 |             boxes = prediction["boxes"]
 74 |             boxes = convert_to_xywh(boxes).tolist()
 75 |             scores = prediction["scores"].tolist()
 76 |             labels = prediction["labels"].tolist()
 77 | 
 78 |             coco_results.extend(
 79 |                 [
 80 |                     {
 81 |                         "image_id": original_id,
 82 |                         "category_id": labels[k],
 83 |                         "bbox": box,
 84 |                         "score": scores[k],
 85 |                     }
 86 |                     for k, box in enumerate(boxes)
 87 |                 ]
 88 |             )
 89 |         return coco_results
 90 | 
 91 |     def prepare_for_coco_segmentation(self, predictions):
 92 |         coco_results = []
 93 |         for original_id, prediction in predictions.items():
 94 |             if len(prediction) == 0:
 95 |                 continue
 96 | 
 97 |             scores = prediction["scores"]
 98 |             labels = prediction["labels"]
 99 |             masks = prediction["masks"]
100 | 
101 |             masks = masks > 0.5
102 | 
103 |             scores = prediction["scores"].tolist()
104 |             labels = prediction["labels"].tolist()
105 | 
106 |             rles = [
107 |                 mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks
108 |             ]
109 |             for rle in rles:
110 |                 rle["counts"] = rle["counts"].decode("utf-8")
111 | 
112 |             coco_results.extend(
113 |                 [
114 |                     {
115 |                         "image_id": original_id,
116 |                         "category_id": labels[k],
117 |                         "segmentation": rle,
118 |                         "score": scores[k],
119 |                     }
120 |                     for k, rle in enumerate(rles)
121 |                 ]
122 |             )
123 |         return coco_results
124 | 
125 |     def prepare_for_coco_keypoint(self, predictions):
126 |         coco_results = []
127 |         for original_id, prediction in predictions.items():
128 |             if len(prediction) == 0:
129 |                 continue
130 | 
131 |             boxes = prediction["boxes"]
132 |             boxes = convert_to_xywh(boxes).tolist()
133 |             scores = prediction["scores"].tolist()
134 |             labels = prediction["labels"].tolist()
135 |             keypoints = prediction["keypoints"]
136 |             keypoints = keypoints.flatten(start_dim=1).tolist()
137 | 
138 |             coco_results.extend(
139 |                 [
140 |                     {
141 |                         "image_id": original_id,
142 |                         "category_id": labels[k],
143 |                         "keypoints": keypoint,
144 |                         "score": scores[k],
145 |                     }
146 |                     for k, keypoint in enumerate(keypoints)
147 |                 ]
148 |             )
149 |         return coco_results
150 | 
151 | 
152 | def convert_to_xywh(boxes):
153 |     xmin, ymin, xmax, ymax = boxes.unbind(1)
154 |     return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
155 | 
156 | 
157 | def merge(img_ids, eval_imgs):
158 |     all_img_ids = utils.all_gather(img_ids)
159 |     all_eval_imgs = utils.all_gather(eval_imgs)
160 | 
161 |     merged_img_ids = []
162 |     for p in all_img_ids:
163 |         merged_img_ids.extend(p)
164 | 
165 |     merged_eval_imgs = []
166 |     for p in all_eval_imgs:
167 |         merged_eval_imgs.append(p)
168 | 
169 |     merged_img_ids = np.array(merged_img_ids)
170 |     merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
171 | 
172 |     # keep only unique (and in sorted order) images
173 |     merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
174 |     merged_eval_imgs = merged_eval_imgs[..., idx]
175 | 
176 |     return merged_img_ids, merged_eval_imgs
177 | 
178 | 
179 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
180 |     img_ids, eval_imgs = merge(img_ids, eval_imgs)
181 |     img_ids = list(img_ids)
182 |     eval_imgs = list(eval_imgs.flatten())
183 | 
184 |     coco_eval.evalImgs = eval_imgs
185 |     coco_eval.params.imgIds = img_ids
186 |     coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
187 | 
188 | 
189 | def evaluate(imgs):
190 |     with redirect_stdout(io.StringIO()):
191 |         imgs.evaluate()
192 |     return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds))
193 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | import sys
  4 | 
  5 | import torch
  6 | import torch.utils.data
  7 | import torchvision
  8 | import helmet.ml.detection.transforms as T
  9 | from pycocotools import mask as coco_mask
 10 | from pycocotools.coco import COCO
 11 | from helmet.exception import HelmetException
 12 | 
 13 | 
 14 | class FilterAndRemapCocoCategories:
 15 |     def __init__(self, categories, remap=True):
 16 |         self.categories = categories
 17 |         self.remap = remap
 18 | 
 19 |     def __call__(self, image, target):
 20 |         anno = target["annotations"]
 21 |         anno = [obj for obj in anno if obj["category_id"] in self.categories]
 22 |         if not self.remap:
 23 |             target["annotations"] = anno
 24 |             return image, target
 25 |         anno = copy.deepcopy(anno)
 26 |         for obj in anno:
 27 |             obj["category_id"] = self.categories.index(obj["category_id"])
 28 |         target["annotations"] = anno
 29 |         return image, target
 30 | 
 31 | 
 32 | def convert_coco_poly_to_mask(segmentations, height, width):
 33 |     masks = []
 34 |     for polygons in segmentations:
 35 |         rles = coco_mask.frPyObjects(polygons, height, width)
 36 |         mask = coco_mask.decode(rles)
 37 |         if len(mask.shape) < 3:
 38 |             mask = mask[..., None]
 39 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 40 |         mask = mask.any(dim=2)
 41 |         masks.append(mask)
 42 |     if masks:
 43 |         masks = torch.stack(masks, dim=0)
 44 |     else:
 45 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 46 |     return masks
 47 | 
 48 | 
 49 | class ConvertCocoPolysToMask:
 50 |     def __call__(self, image, target):
 51 |         w, h = image.size
 52 | 
 53 |         image_id = target["image_id"]
 54 |         image_id = torch.tensor([image_id])
 55 | 
 56 |         anno = target["annotations"]
 57 | 
 58 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
 59 | 
 60 |         boxes = [obj["bbox"] for obj in anno]
 61 |         # guard against no boxes via resizing
 62 |         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
 63 |         boxes[:, 2:] += boxes[:, :2]
 64 |         boxes[:, 0::2].clamp_(min=0, max=w)
 65 |         boxes[:, 1::2].clamp_(min=0, max=h)
 66 | 
 67 |         classes = [obj["category_id"] for obj in anno]
 68 |         classes = torch.tensor(classes, dtype=torch.int64)
 69 | 
 70 |         segmentations = [obj["segmentation"] for obj in anno]
 71 |         masks = convert_coco_poly_to_mask(segmentations, h, w)
 72 | 
 73 |         keypoints = None
 74 |         if anno and "keypoints" in anno[0]:
 75 |             keypoints = [obj["keypoints"] for obj in anno]
 76 |             keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
 77 |             num_keypoints = keypoints.shape[0]
 78 |             if num_keypoints:
 79 |                 keypoints = keypoints.view(num_keypoints, -1, 3)
 80 | 
 81 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
 82 |         boxes = boxes[keep]
 83 |         classes = classes[keep]
 84 |         masks = masks[keep]
 85 |         if keypoints is not None:
 86 |             keypoints = keypoints[keep]
 87 | 
 88 |         target = {}
 89 |         target["boxes"] = boxes
 90 |         target["labels"] = classes
 91 |         target["masks"] = masks
 92 |         target["image_id"] = image_id
 93 |         if keypoints is not None:
 94 |             target["keypoints"] = keypoints
 95 | 
 96 |         # for conversion to coco api
 97 |         area = torch.tensor([obj["area"] for obj in anno])
 98 |         iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
 99 |         target["area"] = area
100 |         target["iscrowd"] = iscrowd
101 | 
102 |         return image, target
103 | 
104 | 
105 | def _coco_remove_images_without_annotations(dataset, cat_list=None):
106 |     def _has_only_empty_bbox(anno):
107 |         return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
108 | 
109 |     def _count_visible_keypoints(anno):
110 |         return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
111 | 
112 |     min_keypoints_per_image = 10
113 | 
114 |     def _has_valid_annotation(anno):
115 |         # if it's empty, there is no annotation
116 |         if len(anno) == 0:
117 |             return False
118 |         # if all boxes have close to zero area, there is no annotation
119 |         if _has_only_empty_bbox(anno):
120 |             return False
121 |         # keypoints task have a slight different critera for considering
122 |         # if an annotation is valid
123 |         if "keypoints" not in anno[0]:
124 |             return True
125 |         # for keypoint detection tasks, only consider valid images those
126 |         # containing at least min_keypoints_per_image
127 |         if _count_visible_keypoints(anno) >= min_keypoints_per_image:
128 |             return True
129 |         return False
130 | 
131 |     if not isinstance(dataset, torchvision.datasets.CocoDetection):
132 |         raise TypeError(
133 |             f"This function expects dataset of type torchvision.datasets.CocoDetection, instead  got {type(dataset)}"
134 |         )
135 |     ids = []
136 |     for ds_idx, img_id in enumerate(dataset.ids):
137 |         ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
138 |         anno = dataset.coco.loadAnns(ann_ids)
139 |         if cat_list:
140 |             anno = [obj for obj in anno if obj["category_id"] in cat_list]
141 |         if _has_valid_annotation(anno):
142 |             ids.append(ds_idx)
143 | 
144 |     dataset = torch.utils.data.Subset(dataset, ids)
145 |     return dataset
146 | 
147 | 
148 | def convert_to_coco_api(ds):
149 |     try:
150 |         coco_ds = COCO()
151 |         # annotation IDs need to start at 1, not 0, see torchvision issue #1530
152 |         ann_id = 1
153 |         dataset = {"images": [], "categories": [], "annotations": []}
154 |         categories = set()
155 |         for img_idx in range(len(ds)):
156 |             # find better way to get target
157 |             # targets = ds.get_annotations(img_idx)
158 |             img, targets = ds[img_idx]
159 |             image_id = targets["image_id"].item()
160 |             img_dict = {}
161 |             img_dict["id"] = image_id
162 |             img_dict["height"] = img.shape[-2]
163 |             img_dict["width"] = img.shape[-1]
164 |             dataset["images"].append(img_dict)
165 |             bboxes = targets["boxes"].clone()
166 |             bboxes[:, 2:] -= bboxes[:, :2]
167 |             bboxes = bboxes.tolist()
168 |             labels = targets["labels"].tolist()
169 |             areas = targets["area"].tolist()
170 |             iscrowd = targets["iscrowd"].tolist()
171 |             if "masks" in targets:
172 |                 masks = targets["masks"]
173 |                 # make masks Fortran contiguous for coco_mask
174 |                 masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
175 |             if "keypoints" in targets:
176 |                 keypoints = targets["keypoints"]
177 |                 keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
178 |             num_objs = len(bboxes)
179 |             for i in range(num_objs):
180 |                 ann = {}
181 |                 ann["image_id"] = image_id
182 |                 ann["bbox"] = bboxes[i]
183 |                 ann["category_id"] = labels[i]
184 |                 categories.add(labels[i])
185 |                 ann["area"] = areas[i]
186 |                 ann["iscrowd"] = iscrowd[i]
187 |                 ann["id"] = ann_id
188 |                 if "masks" in targets:
189 |                     ann["segmentation"] = coco_mask.encode(masks[i].numpy())
190 |                 if "keypoints" in targets:
191 |                     ann["keypoints"] = keypoints[i]
192 |                     ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3])
193 |                 dataset["annotations"].append(ann)
194 |                 ann_id += 1
195 |         dataset["categories"] = [{"id": i} for i in sorted(categories)]
196 |         coco_ds.dataset = dataset
197 |         coco_ds.createIndex()
198 |         return coco_ds
199 |     except Exception as e:
200 |         raise HelmetException(e, sys) from e
201 | 
202 | 
203 | def get_coco_api_from_dataset(dataset):
204 |     for _ in range(10):
205 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
206 |             break
207 |         if isinstance(dataset, torch.utils.data.Subset):
208 |             dataset = dataset.dataset
209 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
210 |         return dataset.coco
211 |     return convert_to_coco_api(dataset)
212 | 
213 | 
214 | class CocoDetection(torchvision.datasets.CocoDetection):
215 |     def __init__(self, img_folder, ann_file, transforms):
216 |         super().__init__(img_folder, ann_file)
217 |         self._transforms = transforms
218 | 
219 |     def __getitem__(self, idx):
220 |         img, target = super().__getitem__(idx)
221 |         image_id = self.ids[idx]
222 |         target = dict(image_id=image_id, annotations=target)
223 |         if self._transforms is not None:
224 |             img, target = self._transforms(img, target)
225 |         return img, target
226 | 
227 | 
228 | def get_coco(root, image_set, transforms, mode="instances"):
229 |     anno_file_template = "{}_{}2017.json"
230 |     PATHS = {
231 |         "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
232 |         "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
233 |         # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
234 |     }
235 | 
236 |     t = [ConvertCocoPolysToMask()]
237 | 
238 |     if transforms is not None:
239 |         t.append(transforms)
240 |     transforms = T.Compose(t)
241 | 
242 |     img_folder, ann_file = PATHS[image_set]
243 |     img_folder = os.path.join(root, img_folder)
244 |     ann_file = os.path.join(root, ann_file)
245 | 
246 |     dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
247 | 
248 |     if image_set == "train":
249 |         dataset = _coco_remove_images_without_annotations(dataset)
250 | 
251 |     # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
252 | 
253 |     return dataset
254 | 
255 | 
256 | def get_coco_kp(root, image_set, transforms):
257 |     return get_coco(root, image_set, transforms, mode="person_keypoints")
258 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/engine.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | import time
  4 | 
  5 | import torch
  6 | import torchvision.models.detection.mask_rcnn
  7 | from helmet.exception import HelmetException
  8 | from helmet.ml.detection import utils
  9 | from helmet.ml.detection.coco_eval import CocoEvaluator
 10 | from helmet.ml.detection.coco_utils import get_coco_api_from_dataset
 11 | 
 12 | 
 13 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
 14 |     try:
 15 |         model.to(device)
 16 |         model.train()
 17 |         metric_logger = utils.MetricLogger(delimiter="  ")
 18 |         metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
 19 |         header = f"Epoch: [{epoch}]"
 20 | 
 21 |         lr_scheduler = None
 22 |         if epoch == 0:
 23 |             warmup_factor = 1.0 / 1000
 24 |             warmup_iters = min(1000, len(data_loader) - 1)
 25 | 
 26 |             lr_scheduler = torch.optim.lr_scheduler.LinearLR(
 27 |                 optimizer, start_factor=warmup_factor, total_iters=warmup_iters
 28 |             )
 29 | 
 30 |         for images, targets in metric_logger.log_every(data_loader, print_freq, header):
 31 |             images = list(image.to(device) for image in images)
 32 |             targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 33 |             with torch.cuda.amp.autocast(enabled=scaler is not None):
 34 |                 loss_dict = model(images, targets)
 35 |                 losses = sum(loss for loss in loss_dict.values())
 36 | 
 37 |             # reduce losses over all GPUs for logging purposes
 38 |             loss_dict_reduced = utils.reduce_dict(loss_dict)
 39 |             losses_reduced = sum(loss for loss in loss_dict_reduced.values())
 40 | 
 41 |             loss_value = losses_reduced.item()
 42 | 
 43 |             if not math.isfinite(loss_value):
 44 |                 print(f"Loss is {loss_value}, stopping training")
 45 |                 print(loss_dict_reduced)
 46 |                 sys.exit(1)
 47 | 
 48 |             optimizer.zero_grad()
 49 |             if scaler is not None:
 50 |                 scaler.scale(losses).backward()
 51 |                 scaler.step(optimizer)
 52 |                 scaler.update()
 53 |             else:
 54 |                 losses.backward()
 55 |                 optimizer.step()
 56 | 
 57 |             if lr_scheduler is not None:
 58 |                 lr_scheduler.step()
 59 | 
 60 |             metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
 61 |             metric_logger.update(lr=optimizer.param_groups[0]["lr"])
 62 | 
 63 |         return metric_logger
 64 |     except Exception as e:
 65 |         raise HelmetException(e, sys) from e
 66 | 
 67 | 
 68 | def _get_iou_types(model):
 69 |     try:
 70 |         model_without_ddp = model
 71 |         if isinstance(model, torch.nn.parallel.DistributedDataParallel):
 72 |             model_without_ddp = model.module
 73 |         iou_types = ["bbox"]
 74 |         if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
 75 |             iou_types.append("segm")
 76 |         if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
 77 |             iou_types.append("keypoints")
 78 |         return iou_types
 79 |     except Exception as e:
 80 |         raise HelmetException(e, sys) from e
 81 | 
 82 | 
 83 | @torch.inference_mode()
 84 | def evaluate(model, data_loader, device):
 85 |     try:
 86 |         n_threads = torch.get_num_threads()
 87 |         # FIXME remove this and make paste_masks_in_image run on the GPU
 88 |         torch.set_num_threads(1)
 89 |         cpu_device = torch.device("cpu")
 90 |         model.eval()
 91 |         metric_logger = utils.MetricLogger(delimiter="  ")
 92 |         header = "Test:"
 93 | 
 94 |         coco = get_coco_api_from_dataset(data_loader.dataset)
 95 |         iou_types = _get_iou_types(model)
 96 |         coco_evaluator = CocoEvaluator(coco, iou_types)
 97 | 
 98 |         for images, targets in metric_logger.log_every(data_loader, 100, header):
 99 |             images = list(img.to(device) for img in images)
100 | 
101 |             if torch.cuda.is_available():
102 |                 torch.cuda.synchronize()
103 |             model_time = time.time()
104 |             outputs = model(images)
105 | 
106 |             outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
107 |             model_time = time.time() - model_time
108 | 
109 |             res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
110 |             evaluator_time = time.time()
111 |             coco_evaluator.update(res)
112 |             evaluator_time = time.time() - evaluator_time
113 |             metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
114 | 
115 |         # gather the stats from all processes
116 |         metric_logger.synchronize_between_processes()
117 |         print("Averaged stats:", metric_logger)
118 |         coco_evaluator.synchronize_between_processes()
119 | 
120 |         # accumulate predictions from all images
121 |         coco_evaluator.accumulate()
122 |         coco_evaluator.summarize()
123 |         torch.set_num_threads(n_threads)
124 |         return coco_evaluator
125 |     except Exception as e:
126 |         raise HelmetException(e, sys) from e
127 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/group_by_aspect_ratio.py:
--------------------------------------------------------------------------------
  1 | import bisect
  2 | import copy
  3 | import math
  4 | from collections import defaultdict
  5 | from itertools import chain, repeat
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.utils.data
 10 | import torchvision
 11 | from PIL import Image
 12 | from torch.utils.data.sampler import BatchSampler, Sampler
 13 | from torch.utils.model_zoo import tqdm
 14 | 
 15 | 
 16 | def _repeat_to_at_least(iterable, n):
 17 |     repeat_times = math.ceil(n / len(iterable))
 18 |     repeated = chain.from_iterable(repeat(iterable, repeat_times))
 19 |     return list(repeated)
 20 | 
 21 | 
 22 | class GroupedBatchSampler(BatchSampler):
 23 |     """
 24 |     Wraps another sampler to yield a mini-batch of indices.
 25 |     It enforces that the batch only contain elements from the same group.
 26 |     It also tries to provide mini-batches which follows an ordering which is
 27 |     as close as possible to the ordering from the original sampler.
 28 |     Args:
 29 |         sampler (Sampler): Base sampler.
 30 |         group_ids (list[int]): If the sampler produces indices in range [0, N),
 31 |             `group_ids` must be a list of `N` ints which contains the group id of each sample.
 32 |             The group ids must be a continuous set of integers starting from
 33 |             0, i.e. they must be in the range [0, num_groups).
 34 |         batch_size (int): Size of mini-batch.
 35 |     """
 36 | 
 37 |     def __init__(self, sampler, group_ids, batch_size):
 38 |         if not isinstance(sampler, Sampler):
 39 |             raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}")
 40 |         self.sampler = sampler
 41 |         self.group_ids = group_ids
 42 |         self.batch_size = batch_size
 43 | 
 44 |     def __iter__(self):
 45 |         buffer_per_group = defaultdict(list)
 46 |         samples_per_group = defaultdict(list)
 47 | 
 48 |         num_batches = 0
 49 |         for idx in self.sampler:
 50 |             group_id = self.group_ids[idx]
 51 |             buffer_per_group[group_id].append(idx)
 52 |             samples_per_group[group_id].append(idx)
 53 |             if len(buffer_per_group[group_id]) == self.batch_size:
 54 |                 yield buffer_per_group[group_id]
 55 |                 num_batches += 1
 56 |                 del buffer_per_group[group_id]
 57 |             assert len(buffer_per_group[group_id]) < self.batch_size
 58 | 
 59 |         # now we have run out of elements that satisfy
 60 |         # the group criteria, let's return the remaining
 61 |         # elements so that the size of the sampler is
 62 |         # deterministic
 63 |         expected_num_batches = len(self)
 64 |         num_remaining = expected_num_batches - num_batches
 65 |         if num_remaining > 0:
 66 |             # for the remaining batches, take first the buffers with largest number
 67 |             # of elements
 68 |             for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True):
 69 |                 remaining = self.batch_size - len(buffer_per_group[group_id])
 70 |                 samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
 71 |                 buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
 72 |                 assert len(buffer_per_group[group_id]) == self.batch_size
 73 |                 yield buffer_per_group[group_id]
 74 |                 num_remaining -= 1
 75 |                 if num_remaining == 0:
 76 |                     break
 77 |         assert num_remaining == 0
 78 | 
 79 |     def __len__(self):
 80 |         return len(self.sampler) // self.batch_size
 81 | 
 82 | 
 83 | def _compute_aspect_ratios_slow(dataset, indices=None):
 84 |     print(
 85 |         "Your dataset doesn't support the fast path for "
 86 |         "computing the aspect ratios, so will iterate over "
 87 |         "the full dataset and load every image instead. "
 88 |         "This might take some time..."
 89 |     )
 90 |     if indices is None:
 91 |         indices = range(len(dataset))
 92 | 
 93 |     class SubsetSampler(Sampler):
 94 |         def __init__(self, indices):
 95 |             self.indices = indices
 96 | 
 97 |         def __iter__(self):
 98 |             return iter(self.indices)
 99 | 
100 |         def __len__(self):
101 |             return len(self.indices)
102 | 
103 |     sampler = SubsetSampler(indices)
104 |     data_loader = torch.utils.data.DataLoader(
105 |         dataset,
106 |         batch_size=1,
107 |         sampler=sampler,
108 |         num_workers=14,  # you might want to increase it for faster processing
109 |         collate_fn=lambda x: x[0],
110 |     )
111 |     aspect_ratios = []
112 |     with tqdm(total=len(dataset)) as pbar:
113 |         for _i, (img, _) in enumerate(data_loader):
114 |             pbar.update(1)
115 |             height, width = img.shape[-2:]
116 |             aspect_ratio = float(width) / float(height)
117 |             aspect_ratios.append(aspect_ratio)
118 |     return aspect_ratios
119 | 
120 | 
121 | def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
122 |     if indices is None:
123 |         indices = range(len(dataset))
124 |     aspect_ratios = []
125 |     for i in indices:
126 |         height, width = dataset.get_height_and_width(i)
127 |         aspect_ratio = float(width) / float(height)
128 |         aspect_ratios.append(aspect_ratio)
129 |     return aspect_ratios
130 | 
131 | 
132 | def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
133 |     if indices is None:
134 |         indices = range(len(dataset))
135 |     aspect_ratios = []
136 |     for i in indices:
137 |         img_info = dataset.coco.imgs[dataset.ids[i]]
138 |         aspect_ratio = float(img_info["width"]) / float(img_info["height"])
139 |         aspect_ratios.append(aspect_ratio)
140 |     return aspect_ratios
141 | 
142 | 
143 | def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
144 |     if indices is None:
145 |         indices = range(len(dataset))
146 |     aspect_ratios = []
147 |     for i in indices:
148 |         # this doesn't load the data into memory, because PIL loads it lazily
149 |         width, height = Image.open(dataset.images[i]).size
150 |         aspect_ratio = float(width) / float(height)
151 |         aspect_ratios.append(aspect_ratio)
152 |     return aspect_ratios
153 | 
154 | 
155 | def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
156 |     if indices is None:
157 |         indices = range(len(dataset))
158 | 
159 |     ds_indices = [dataset.indices[i] for i in indices]
160 |     return compute_aspect_ratios(dataset.dataset, ds_indices)
161 | 
162 | 
163 | def compute_aspect_ratios(dataset, indices=None):
164 |     if hasattr(dataset, "get_height_and_width"):
165 |         return _compute_aspect_ratios_custom_dataset(dataset, indices)
166 | 
167 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
168 |         return _compute_aspect_ratios_coco_dataset(dataset, indices)
169 | 
170 |     if isinstance(dataset, torchvision.datasets.VOCDetection):
171 |         return _compute_aspect_ratios_voc_dataset(dataset, indices)
172 | 
173 |     if isinstance(dataset, torch.utils.data.Subset):
174 |         return _compute_aspect_ratios_subset_dataset(dataset, indices)
175 | 
176 |     # slow path
177 |     return _compute_aspect_ratios_slow(dataset, indices)
178 | 
179 | 
180 | def _quantize(x, bins):
181 |     bins = copy.deepcopy(bins)
182 |     bins = sorted(bins)
183 |     quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
184 |     return quantized
185 | 
186 | 
187 | def create_aspect_ratio_groups(dataset, k=0):
188 |     aspect_ratios = compute_aspect_ratios(dataset)
189 |     bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
190 |     groups = _quantize(aspect_ratios, bins)
191 |     # count number of elements per group
192 |     counts = np.unique(groups, return_counts=True)[1]
193 |     fbins = [0] + bins + [np.inf]
194 |     print(f"Using {fbins} as bins for aspect ratio quantization")
195 |     print(f"Count of instances per bin: {counts}")
196 |     return groups
197 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/presets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import transforms as T
 3 | 
 4 | 
 5 | class DetectionPresetTrain:
 6 |     def __init__(self, *, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)):
 7 |         if data_augmentation == "hflip":
 8 |             self.transforms = T.Compose(
 9 |                 [
10 |                     T.RandomHorizontalFlip(p=hflip_prob),
11 |                     T.PILToTensor(),
12 |                     T.ConvertImageDtype(torch.float),
13 |                 ]
14 |             )
15 |         elif data_augmentation == "lsj":
16 |             self.transforms = T.Compose(
17 |                 [
18 |                     T.ScaleJitter(target_size=(1024, 1024)),
19 |                     T.FixedSizeCrop(size=(1024, 1024), fill=mean),
20 |                     T.RandomHorizontalFlip(p=hflip_prob),
21 |                     T.PILToTensor(),
22 |                     T.ConvertImageDtype(torch.float),
23 |                 ]
24 |             )
25 |         elif data_augmentation == "multiscale":
26 |             self.transforms = T.Compose(
27 |                 [
28 |                     T.RandomShortestSize(
29 |                         min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333
30 |                     ),
31 |                     T.RandomHorizontalFlip(p=hflip_prob),
32 |                     T.PILToTensor(),
33 |                     T.ConvertImageDtype(torch.float),
34 |                 ]
35 |             )
36 |         elif data_augmentation == "ssd":
37 |             self.transforms = T.Compose(
38 |                 [
39 |                     T.RandomPhotometricDistort(),
40 |                     T.RandomZoomOut(fill=list(mean)),
41 |                     T.RandomIoUCrop(),
42 |                     T.RandomHorizontalFlip(p=hflip_prob),
43 |                     T.PILToTensor(),
44 |                     T.ConvertImageDtype(torch.float),
45 |                 ]
46 |             )
47 |         elif data_augmentation == "ssdlite":
48 |             self.transforms = T.Compose(
49 |                 [
50 |                     T.RandomIoUCrop(),
51 |                     T.RandomHorizontalFlip(p=hflip_prob),
52 |                     T.PILToTensor(),
53 |                     T.ConvertImageDtype(torch.float),
54 |                 ]
55 |             )
56 |         else:
57 |             raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"')
58 | 
59 |     def __call__(self, img, target):
60 |         return self.transforms(img, target)
61 | 
62 | 
63 | class DetectionPresetEval:
64 |     def __init__(self):
65 |         self.transforms = T.Compose(
66 |             [
67 |                 T.PILToTensor(),
68 |                 T.ConvertImageDtype(torch.float),
69 |             ]
70 |         )
71 | 
72 |     def __call__(self, img, target):
73 |         return self.transforms(img, target)
74 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/train.py:
--------------------------------------------------------------------------------
  1 | r"""PyTorch Detection Training.
  2 | 
  3 | To run in a multi-gpu environment, use the distributed launcher::
  4 | 
  5 |     python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
  6 |         train.py ... --world-size $NGPU
  7 | 
  8 | The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
  9 |     --lr 0.02 --batch-size 2 --world-size 8
 10 | If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
 11 | 
 12 | On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
 13 |     --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
 14 | 
 15 | Also, if you train Keypoint R-CNN, the default hyperparameters are
 16 |     --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
 17 | Because the number of images is smaller in the person keypoint subset of COCO,
 18 | the number of epochs should be adapted so that we have the same number of iterations.
 19 | """
 20 | import datetime
 21 | import os
 22 | import time
 23 | 
 24 | import presets
 25 | import torch
 26 | import torch.utils.data
 27 | import torchvision
 28 | import torchvision.models.detection
 29 | import torchvision.models.detection.mask_rcnn
 30 | import utils
 31 | from coco_utils import get_coco, get_coco_kp
 32 | from engine import evaluate, train_one_epoch
 33 | from group_by_aspect_ratio import create_aspect_ratio_groups, GroupedBatchSampler
 34 | from torchvision.transforms import InterpolationMode
 35 | from transforms import SimpleCopyPaste
 36 | 
 37 | 
 38 | def copypaste_collate_fn(batch):
 39 |     copypaste = SimpleCopyPaste(blending=True, resize_interpolation=InterpolationMode.BILINEAR)
 40 |     return copypaste(*utils.collate_fn(batch))
 41 | 
 42 | 
 43 | def get_dataset(name, image_set, transform, data_path):
 44 |     paths = {"coco": (data_path, get_coco, 91), "coco_kp": (data_path, get_coco_kp, 2)}
 45 |     p, ds_fn, num_classes = paths[name]
 46 | 
 47 |     ds = ds_fn(p, image_set=image_set, transforms=transform)
 48 |     return ds, num_classes
 49 | 
 50 | 
 51 | def get_transform(train, args):
 52 |     if train:
 53 |         return presets.DetectionPresetTrain(data_augmentation=args.data_augmentation)
 54 |     elif args.weights and args.test_only:
 55 |         weights = torchvision.models.get_weight(args.weights)
 56 |         trans = weights.transforms()
 57 |         return lambda img, target: (trans(img), target)
 58 |     else:
 59 |         return presets.DetectionPresetEval()
 60 | 
 61 | 
 62 | def get_args_parser(add_help=True):
 63 |     import argparse
 64 | 
 65 |     parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help)
 66 | 
 67 |     parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path")
 68 |     parser.add_argument("--dataset", default="coco", type=str, help="dataset name")
 69 |     parser.add_argument("--model", default="maskrcnn_resnet50_fpn", type=str, help="model name")
 70 |     parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)")
 71 |     parser.add_argument(
 72 |         "-b", "--batch-size", default=2, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
 73 |     )
 74 |     parser.add_argument("--epochs", default=26, type=int, metavar="N", help="number of total epochs to run")
 75 |     parser.add_argument(
 76 |         "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)"
 77 |     )
 78 |     parser.add_argument("--opt", default="sgd", type=str, help="optimizer")
 79 |     parser.add_argument(
 80 |         "--lr",
 81 |         default=0.02,
 82 |         type=float,
 83 |         help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu",
 84 |     )
 85 |     parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
 86 |     parser.add_argument(
 87 |         "--wd",
 88 |         "--weight-decay",
 89 |         default=1e-4,
 90 |         type=float,
 91 |         metavar="W",
 92 |         help="weight decay (default: 1e-4)",
 93 |         dest="weight_decay",
 94 |     )
 95 |     parser.add_argument(
 96 |         "--norm-weight-decay",
 97 |         default=None,
 98 |         type=float,
 99 |         help="weight decay for Normalization layers (default: None, same value as --wd)",
100 |     )
101 |     parser.add_argument(
102 |         "--lr-scheduler", default="multisteplr", type=str, help="name of lr scheduler (default: multisteplr)"
103 |     )
104 |     parser.add_argument(
105 |         "--lr-step-size", default=8, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)"
106 |     )
107 |     parser.add_argument(
108 |         "--lr-steps",
109 |         default=[16, 22],
110 |         nargs="+",
111 |         type=int,
112 |         help="decrease lr every step-size epochs (multisteplr scheduler only)",
113 |     )
114 |     parser.add_argument(
115 |         "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)"
116 |     )
117 |     parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
118 |     parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs")
119 |     parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
120 |     parser.add_argument("--start_epoch", default=0, type=int, help="start epoch")
121 |     parser.add_argument("--aspect-ratio-group-factor", default=3, type=int)
122 |     parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn")
123 |     parser.add_argument(
124 |         "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone"
125 |     )
126 |     parser.add_argument(
127 |         "--data-augmentation", default="hflip", type=str, help="data augmentation policy (default: hflip)"
128 |     )
129 |     parser.add_argument(
130 |         "--sync-bn",
131 |         dest="sync_bn",
132 |         help="Use sync batch norm",
133 |         action="store_true",
134 |     )
135 |     parser.add_argument(
136 |         "--test-only",
137 |         dest="test_only",
138 |         help="Only test the model",
139 |         action="store_true",
140 |     )
141 | 
142 |     parser.add_argument(
143 |         "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
144 |     )
145 | 
146 |     # distributed training parameters
147 |     parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
148 |     parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
149 |     parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
150 |     parser.add_argument("--weights-backbone", default=None, type=str, help="the backbone weights enum name to load")
151 | 
152 |     # Mixed precision training parameters
153 |     parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")
154 | 
155 |     # Use CopyPaste augmentation training parameter
156 |     parser.add_argument(
157 |         "--use-copypaste",
158 |         action="store_true",
159 |         help="Use CopyPaste data augmentation. Works only with data-augmentation='lsj'.",
160 |     )
161 | 
162 |     return parser
163 | 
164 | 
165 | def main(args):
166 |     if args.output_dir:
167 |         utils.mkdir(args.output_dir)
168 | 
169 |     utils.init_distributed_mode(args)
170 |     print(args)
171 | 
172 |     device = torch.device(args.device)
173 | 
174 |     if args.use_deterministic_algorithms:
175 |         torch.use_deterministic_algorithms(True)
176 | 
177 |     # Data loading code
178 |     print("Loading data")
179 | 
180 |     dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args), args.data_path)
181 |     dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args), args.data_path)
182 | 
183 |     print("Creating data loaders")
184 |     if args.distributed:
185 |         train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
186 |         test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False)
187 |     else:
188 |         train_sampler = torch.utils.data.RandomSampler(dataset)
189 |         test_sampler = torch.utils.data.SequentialSampler(dataset_test)
190 | 
191 |     if args.aspect_ratio_group_factor >= 0:
192 |         group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
193 |         train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
194 |     else:
195 |         train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True)
196 | 
197 |     train_collate_fn = utils.collate_fn
198 |     if args.use_copypaste:
199 |         if args.data_augmentation != "lsj":
200 |             raise RuntimeError("SimpleCopyPaste algorithm currently only supports the 'lsj' data augmentation policies")
201 | 
202 |         train_collate_fn = copypaste_collate_fn
203 | 
204 |     data_loader = torch.utils.data.DataLoader(
205 |         dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_collate_fn
206 |     )
207 | 
208 |     data_loader_test = torch.utils.data.DataLoader(
209 |         dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
210 |     )
211 | 
212 |     print("Creating model")
213 |     kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers}
214 |     if args.data_augmentation in ["multiscale", "lsj"]:
215 |         kwargs["_skip_resize"] = True
216 |     if "rcnn" in args.model:
217 |         if args.rpn_score_thresh is not None:
218 |             kwargs["rpn_score_thresh"] = args.rpn_score_thresh
219 |     model = torchvision.models.get_model(
220 |         args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs
221 |     )
222 |     model.to(device)
223 |     if args.distributed and args.sync_bn:
224 |         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
225 | 
226 |     model_without_ddp = model
227 |     if args.distributed:
228 |         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
229 |         model_without_ddp = model.module
230 | 
231 |     if args.norm_weight_decay is None:
232 |         parameters = [p for p in model.parameters() if p.requires_grad]
233 |     else:
234 |         param_groups = torchvision.ops._utils.split_normalization_params(model)
235 |         wd_groups = [args.norm_weight_decay, args.weight_decay]
236 |         parameters = [{"params": p, "weight_decay": w} for p, w in zip(param_groups, wd_groups) if p]
237 | 
238 |     opt_name = args.opt.lower()
239 |     if opt_name.startswith("sgd"):
240 |         optimizer = torch.optim.SGD(
241 |             parameters,
242 |             lr=args.lr,
243 |             momentum=args.momentum,
244 |             weight_decay=args.weight_decay,
245 |             nesterov="nesterov" in opt_name,
246 |         )
247 |     elif opt_name == "adamw":
248 |         optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay)
249 |     else:
250 |         raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD and AdamW are supported.")
251 | 
252 |     scaler = torch.cuda.amp.GradScaler() if args.amp else None
253 | 
254 |     args.lr_scheduler = args.lr_scheduler.lower()
255 |     if args.lr_scheduler == "multisteplr":
256 |         lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
257 |     elif args.lr_scheduler == "cosineannealinglr":
258 |         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
259 |     else:
260 |         raise RuntimeError(
261 |             f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported."
262 |         )
263 | 
264 |     if args.resume:
265 |         checkpoint = torch.load(args.resume, map_location="cpu")
266 |         model_without_ddp.load_state_dict(checkpoint["model"])
267 |         optimizer.load_state_dict(checkpoint["optimizer"])
268 |         lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
269 |         args.start_epoch = checkpoint["epoch"] + 1
270 |         if args.amp:
271 |             scaler.load_state_dict(checkpoint["scaler"])
272 | 
273 |     if args.test_only:
274 |         torch.backends.cudnn.deterministic = True
275 |         evaluate(model, data_loader_test, device=device)
276 |         return
277 | 
278 |     print("Start training")
279 |     start_time = time.time()
280 |     for epoch in range(args.start_epoch, args.epochs):
281 |         if args.distributed:
282 |             train_sampler.set_epoch(epoch)
283 |         train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, scaler)
284 |         lr_scheduler.step()
285 |         if args.output_dir:
286 |             checkpoint = {
287 |                 "model": model_without_ddp.state_dict(),
288 |                 "optimizer": optimizer.state_dict(),
289 |                 "lr_scheduler": lr_scheduler.state_dict(),
290 |                 "args": args,
291 |                 "epoch": epoch,
292 |             }
293 |             if args.amp:
294 |                 checkpoint["scaler"] = scaler.state_dict()
295 |             utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
296 |             utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
297 | 
298 |         # evaluate after every epoch
299 |         evaluate(model, data_loader_test, device=device)
300 | 
301 |     total_time = time.time() - start_time
302 |     total_time_str = str(datetime.timedelta(seconds=int(total_time)))
303 |     print(f"Training time {total_time_str}")
304 | 
305 | 
306 | if __name__ == "__main__":
307 |     args = get_args_parser().parse_args()
308 |     main(args)
309 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/transforms.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional, Tuple, Union
  2 | 
  3 | import torch
  4 | import torchvision
  5 | from torch import nn, Tensor
  6 | from torchvision import ops
  7 | from torchvision.transforms import functional as F, InterpolationMode, transforms as T
  8 | 
  9 | 
 10 | def _flip_coco_person_keypoints(kps, width):
 11 |     flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 12 |     flipped_data = kps[:, flip_inds]
 13 |     flipped_data[..., 0] = width - flipped_data[..., 0]
 14 |     # Maintain COCO convention that if visibility == 0, then x, y = 0
 15 |     inds = flipped_data[..., 2] == 0
 16 |     flipped_data[inds] = 0
 17 |     return flipped_data
 18 | 
 19 | 
 20 | class Compose:
 21 |     def __init__(self, transforms):
 22 |         self.transforms = transforms
 23 | 
 24 |     def __call__(self, image, target):
 25 |         for t in self.transforms:
 26 |             image, target = t(image, target)
 27 |         return image, target
 28 | 
 29 | 
 30 | class RandomHorizontalFlip(T.RandomHorizontalFlip):
 31 |     def forward(
 32 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
 33 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
 34 |         if torch.rand(1) < self.p:
 35 |             image = F.hflip(image)
 36 |             if target is not None:
 37 |                 _, _, width = F.get_dimensions(image)
 38 |                 target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
 39 |                 if "masks" in target:
 40 |                     target["masks"] = target["masks"].flip(-1)
 41 |                 if "keypoints" in target:
 42 |                     keypoints = target["keypoints"]
 43 |                     keypoints = _flip_coco_person_keypoints(keypoints, width)
 44 |                     target["keypoints"] = keypoints
 45 |         return image, target
 46 | 
 47 | 
 48 | class PILToTensor(nn.Module):
 49 |     def forward(
 50 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
 51 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
 52 |         image = F.pil_to_tensor(image)
 53 |         return image, target
 54 | 
 55 | 
 56 | class ConvertImageDtype(nn.Module):
 57 |     def __init__(self, dtype: torch.dtype) -> None:
 58 |         super().__init__()
 59 |         self.dtype = dtype
 60 | 
 61 |     def forward(
 62 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
 63 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
 64 |         image = F.convert_image_dtype(image, self.dtype)
 65 |         return image, target
 66 | 
 67 | 
 68 | class RandomIoUCrop(nn.Module):
 69 |     def __init__(
 70 |         self,
 71 |         min_scale: float = 0.3,
 72 |         max_scale: float = 1.0,
 73 |         min_aspect_ratio: float = 0.5,
 74 |         max_aspect_ratio: float = 2.0,
 75 |         sampler_options: Optional[List[float]] = None,
 76 |         trials: int = 40,
 77 |     ):
 78 |         super().__init__()
 79 |         # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
 80 |         self.min_scale = min_scale
 81 |         self.max_scale = max_scale
 82 |         self.min_aspect_ratio = min_aspect_ratio
 83 |         self.max_aspect_ratio = max_aspect_ratio
 84 |         if sampler_options is None:
 85 |             sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
 86 |         self.options = sampler_options
 87 |         self.trials = trials
 88 | 
 89 |     def forward(
 90 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
 91 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
 92 |         if target is None:
 93 |             raise ValueError("The targets can't be None for this transform.")
 94 | 
 95 |         if isinstance(image, torch.Tensor):
 96 |             if image.ndimension() not in {2, 3}:
 97 |                 raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
 98 |             elif image.ndimension() == 2:
 99 |                 image = image.unsqueeze(0)
100 | 
101 |         _, orig_h, orig_w = F.get_dimensions(image)
102 | 
103 |         while True:
104 |             # sample an option
105 |             idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
106 |             min_jaccard_overlap = self.options[idx]
107 |             if min_jaccard_overlap >= 1.0:  # a value larger than 1 encodes the leave as-is option
108 |                 return image, target
109 | 
110 |             for _ in range(self.trials):
111 |                 # check the aspect ratio limitations
112 |                 r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
113 |                 new_w = int(orig_w * r[0])
114 |                 new_h = int(orig_h * r[1])
115 |                 aspect_ratio = new_w / new_h
116 |                 if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
117 |                     continue
118 | 
119 |                 # check for 0 area crops
120 |                 r = torch.rand(2)
121 |                 left = int((orig_w - new_w) * r[0])
122 |                 top = int((orig_h - new_h) * r[1])
123 |                 right = left + new_w
124 |                 bottom = top + new_h
125 |                 if left == right or top == bottom:
126 |                     continue
127 | 
128 |                 # check for any valid boxes with centers within the crop area
129 |                 cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
130 |                 cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
131 |                 is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
132 |                 if not is_within_crop_area.any():
133 |                     continue
134 | 
135 |                 # check at least 1 box with jaccard limitations
136 |                 boxes = target["boxes"][is_within_crop_area]
137 |                 ious = torchvision.ops.boxes.box_iou(
138 |                     boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device)
139 |                 )
140 |                 if ious.max() < min_jaccard_overlap:
141 |                     continue
142 | 
143 |                 # keep only valid boxes and perform cropping
144 |                 target["boxes"] = boxes
145 |                 target["labels"] = target["labels"][is_within_crop_area]
146 |                 target["boxes"][:, 0::2] -= left
147 |                 target["boxes"][:, 1::2] -= top
148 |                 target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
149 |                 target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
150 |                 image = F.crop(image, top, left, new_h, new_w)
151 | 
152 |                 return image, target
153 | 
154 | 
155 | class RandomZoomOut(nn.Module):
156 |     def __init__(
157 |         self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
158 |     ):
159 |         super().__init__()
160 |         if fill is None:
161 |             fill = [0.0, 0.0, 0.0]
162 |         self.fill = fill
163 |         self.side_range = side_range
164 |         if side_range[0] < 1.0 or side_range[0] > side_range[1]:
165 |             raise ValueError(f"Invalid canvas side range provided {side_range}.")
166 |         self.p = p
167 | 
168 |     @torch.jit.unused
169 |     def _get_fill_value(self, is_pil):
170 |         # type: (bool) -> int
171 |         # We fake the type to make it work on JIT
172 |         return tuple(int(x) for x in self.fill) if is_pil else 0
173 | 
174 |     def forward(
175 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
176 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
177 |         if isinstance(image, torch.Tensor):
178 |             if image.ndimension() not in {2, 3}:
179 |                 raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
180 |             elif image.ndimension() == 2:
181 |                 image = image.unsqueeze(0)
182 | 
183 |         if torch.rand(1) >= self.p:
184 |             return image, target
185 | 
186 |         _, orig_h, orig_w = F.get_dimensions(image)
187 | 
188 |         r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
189 |         canvas_width = int(orig_w * r)
190 |         canvas_height = int(orig_h * r)
191 | 
192 |         r = torch.rand(2)
193 |         left = int((canvas_width - orig_w) * r[0])
194 |         top = int((canvas_height - orig_h) * r[1])
195 |         right = canvas_width - (left + orig_w)
196 |         bottom = canvas_height - (top + orig_h)
197 | 
198 |         if torch.jit.is_scripting():
199 |             fill = 0
200 |         else:
201 |             fill = self._get_fill_value(F._is_pil_image(image))
202 | 
203 |         image = F.pad(image, [left, top, right, bottom], fill=fill)
204 |         if isinstance(image, torch.Tensor):
205 |             # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
206 |             v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
207 |             image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[
208 |                 ..., :, (left + orig_w) :
209 |             ] = v
210 | 
211 |         if target is not None:
212 |             target["boxes"][:, 0::2] += left
213 |             target["boxes"][:, 1::2] += top
214 | 
215 |         return image, target
216 | 
217 | 
218 | class RandomPhotometricDistort(nn.Module):
219 |     def __init__(
220 |         self,
221 |         contrast: Tuple[float, float] = (0.5, 1.5),
222 |         saturation: Tuple[float, float] = (0.5, 1.5),
223 |         hue: Tuple[float, float] = (-0.05, 0.05),
224 |         brightness: Tuple[float, float] = (0.875, 1.125),
225 |         p: float = 0.5,
226 |     ):
227 |         super().__init__()
228 |         self._brightness = T.ColorJitter(brightness=brightness)
229 |         self._contrast = T.ColorJitter(contrast=contrast)
230 |         self._hue = T.ColorJitter(hue=hue)
231 |         self._saturation = T.ColorJitter(saturation=saturation)
232 |         self.p = p
233 | 
234 |     def forward(
235 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
236 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
237 |         if isinstance(image, torch.Tensor):
238 |             if image.ndimension() not in {2, 3}:
239 |                 raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
240 |             elif image.ndimension() == 2:
241 |                 image = image.unsqueeze(0)
242 | 
243 |         r = torch.rand(7)
244 | 
245 |         if r[0] < self.p:
246 |             image = self._brightness(image)
247 | 
248 |         contrast_before = r[1] < 0.5
249 |         if contrast_before:
250 |             if r[2] < self.p:
251 |                 image = self._contrast(image)
252 | 
253 |         if r[3] < self.p:
254 |             image = self._saturation(image)
255 | 
256 |         if r[4] < self.p:
257 |             image = self._hue(image)
258 | 
259 |         if not contrast_before:
260 |             if r[5] < self.p:
261 |                 image = self._contrast(image)
262 | 
263 |         if r[6] < self.p:
264 |             channels, _, _ = F.get_dimensions(image)
265 |             permutation = torch.randperm(channels)
266 | 
267 |             is_pil = F._is_pil_image(image)
268 |             if is_pil:
269 |                 image = F.pil_to_tensor(image)
270 |                 image = F.convert_image_dtype(image)
271 |             image = image[..., permutation, :, :]
272 |             if is_pil:
273 |                 image = F.to_pil_image(image)
274 | 
275 |         return image, target
276 | 
277 | 
278 | class ScaleJitter(nn.Module):
279 |     """Randomly resizes the image and its bounding boxes  within the specified scale range.
280 |     The class implements the Scale Jitter augmentation as described in the paper
281 |     `"Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation" <https://arxiv.org/abs/2012.07177>`_.
282 | 
283 |     Args:
284 |         target_size (tuple of ints): The target size for the transform provided in (height, weight) format.
285 |         scale_range (tuple of ints): scaling factor interval, e.g (a, b), then scale is randomly sampled from the
286 |             range a <= scale <= b.
287 |         interpolation (InterpolationMode): Desired interpolation enum defined by
288 |             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
289 |     """
290 | 
291 |     def __init__(
292 |         self,
293 |         target_size: Tuple[int, int],
294 |         scale_range: Tuple[float, float] = (0.1, 2.0),
295 |         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
296 |     ):
297 |         super().__init__()
298 |         self.target_size = target_size
299 |         self.scale_range = scale_range
300 |         self.interpolation = interpolation
301 | 
302 |     def forward(
303 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
304 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
305 |         if isinstance(image, torch.Tensor):
306 |             if image.ndimension() not in {2, 3}:
307 |                 raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
308 |             elif image.ndimension() == 2:
309 |                 image = image.unsqueeze(0)
310 | 
311 |         _, orig_height, orig_width = F.get_dimensions(image)
312 | 
313 |         scale = self.scale_range[0] + torch.rand(1) * (self.scale_range[1] - self.scale_range[0])
314 |         r = min(self.target_size[1] / orig_height, self.target_size[0] / orig_width) * scale
315 |         new_width = int(orig_width * r)
316 |         new_height = int(orig_height * r)
317 | 
318 |         image = F.resize(image, [new_height, new_width], interpolation=self.interpolation)
319 | 
320 |         if target is not None:
321 |             target["boxes"][:, 0::2] *= new_width / orig_width
322 |             target["boxes"][:, 1::2] *= new_height / orig_height
323 |             if "masks" in target:
324 |                 target["masks"] = F.resize(
325 |                     target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST
326 |                 )
327 | 
328 |         return image, target
329 | 
330 | 
331 | class FixedSizeCrop(nn.Module):
332 |     def __init__(self, size, fill=0, padding_mode="constant"):
333 |         super().__init__()
334 |         size = tuple(T._setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))
335 |         self.crop_height = size[0]
336 |         self.crop_width = size[1]
337 |         self.fill = fill  # TODO: Fill is currently respected only on PIL. Apply tensor patch.
338 |         self.padding_mode = padding_mode
339 | 
340 |     def _pad(self, img, target, padding):
341 |         # Taken from the functional_tensor.py pad
342 |         if isinstance(padding, int):
343 |             pad_left = pad_right = pad_top = pad_bottom = padding
344 |         elif len(padding) == 1:
345 |             pad_left = pad_right = pad_top = pad_bottom = padding[0]
346 |         elif len(padding) == 2:
347 |             pad_left = pad_right = padding[0]
348 |             pad_top = pad_bottom = padding[1]
349 |         else:
350 |             pad_left = padding[0]
351 |             pad_top = padding[1]
352 |             pad_right = padding[2]
353 |             pad_bottom = padding[3]
354 | 
355 |         padding = [pad_left, pad_top, pad_right, pad_bottom]
356 |         img = F.pad(img, padding, self.fill, self.padding_mode)
357 |         if target is not None:
358 |             target["boxes"][:, 0::2] += pad_left
359 |             target["boxes"][:, 1::2] += pad_top
360 |             if "masks" in target:
361 |                 target["masks"] = F.pad(target["masks"], padding, 0, "constant")
362 | 
363 |         return img, target
364 | 
365 |     def _crop(self, img, target, top, left, height, width):
366 |         img = F.crop(img, top, left, height, width)
367 |         if target is not None:
368 |             boxes = target["boxes"]
369 |             boxes[:, 0::2] -= left
370 |             boxes[:, 1::2] -= top
371 |             boxes[:, 0::2].clamp_(min=0, max=width)
372 |             boxes[:, 1::2].clamp_(min=0, max=height)
373 | 
374 |             is_valid = (boxes[:, 0] < boxes[:, 2]) & (boxes[:, 1] < boxes[:, 3])
375 | 
376 |             target["boxes"] = boxes[is_valid]
377 |             target["labels"] = target["labels"][is_valid]
378 |             if "masks" in target:
379 |                 target["masks"] = F.crop(target["masks"][is_valid], top, left, height, width)
380 | 
381 |         return img, target
382 | 
383 |     def forward(self, img, target=None):
384 |         _, height, width = F.get_dimensions(img)
385 |         new_height = min(height, self.crop_height)
386 |         new_width = min(width, self.crop_width)
387 | 
388 |         if new_height != height or new_width != width:
389 |             offset_height = max(height - self.crop_height, 0)
390 |             offset_width = max(width - self.crop_width, 0)
391 | 
392 |             r = torch.rand(1)
393 |             top = int(offset_height * r)
394 |             left = int(offset_width * r)
395 | 
396 |             img, target = self._crop(img, target, top, left, new_height, new_width)
397 | 
398 |         pad_bottom = max(self.crop_height - new_height, 0)
399 |         pad_right = max(self.crop_width - new_width, 0)
400 |         if pad_bottom != 0 or pad_right != 0:
401 |             img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom])
402 | 
403 |         return img, target
404 | 
405 | 
406 | class RandomShortestSize(nn.Module):
407 |     def __init__(
408 |         self,
409 |         min_size: Union[List[int], Tuple[int], int],
410 |         max_size: int,
411 |         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
412 |     ):
413 |         super().__init__()
414 |         self.min_size = [min_size] if isinstance(min_size, int) else list(min_size)
415 |         self.max_size = max_size
416 |         self.interpolation = interpolation
417 | 
418 |     def forward(
419 |         self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
420 |     ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
421 |         _, orig_height, orig_width = F.get_dimensions(image)
422 | 
423 |         min_size = self.min_size[torch.randint(len(self.min_size), (1,)).item()]
424 |         r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width))
425 | 
426 |         new_width = int(orig_width * r)
427 |         new_height = int(orig_height * r)
428 | 
429 |         image = F.resize(image, [new_height, new_width], interpolation=self.interpolation)
430 | 
431 |         if target is not None:
432 |             target["boxes"][:, 0::2] *= new_width / orig_width
433 |             target["boxes"][:, 1::2] *= new_height / orig_height
434 |             if "masks" in target:
435 |                 target["masks"] = F.resize(
436 |                     target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST
437 |                 )
438 | 
439 |         return image, target
440 | 
441 | 
442 | def _copy_paste(
443 |     image: torch.Tensor,
444 |     target: Dict[str, Tensor],
445 |     paste_image: torch.Tensor,
446 |     paste_target: Dict[str, Tensor],
447 |     blending: bool = True,
448 |     resize_interpolation: F.InterpolationMode = F.InterpolationMode.BILINEAR,
449 | ) -> Tuple[torch.Tensor, Dict[str, Tensor]]:
450 | 
451 |     # Random paste targets selection:
452 |     num_masks = len(paste_target["masks"])
453 | 
454 |     if num_masks < 1:
455 |         # Such degerante case with num_masks=0 can happen with LSJ
456 |         # Let's just return (image, target)
457 |         return image, target
458 | 
459 |     # We have to please torch script by explicitly specifying dtype as torch.long
460 |     random_selection = torch.randint(0, num_masks, (num_masks,), device=paste_image.device)
461 |     random_selection = torch.unique(random_selection).to(torch.long)
462 | 
463 |     paste_masks = paste_target["masks"][random_selection]
464 |     paste_boxes = paste_target["boxes"][random_selection]
465 |     paste_labels = paste_target["labels"][random_selection]
466 | 
467 |     masks = target["masks"]
468 | 
469 |     # We resize source and paste data if they have different sizes
470 |     # This is something we introduced here as originally the algorithm works
471 |     # on equal-sized data (for example, coming from LSJ data augmentations)
472 |     size1 = image.shape[-2:]
473 |     size2 = paste_image.shape[-2:]
474 |     if size1 != size2:
475 |         paste_image = F.resize(paste_image, size1, interpolation=resize_interpolation)
476 |         paste_masks = F.resize(paste_masks, size1, interpolation=F.InterpolationMode.NEAREST)
477 |         # resize bboxes:
478 |         ratios = torch.tensor((size1[1] / size2[1], size1[0] / size2[0]), device=paste_boxes.device)
479 |         paste_boxes = paste_boxes.view(-1, 2, 2).mul(ratios).view(paste_boxes.shape)
480 | 
481 |     paste_alpha_mask = paste_masks.sum(dim=0) > 0
482 | 
483 |     if blending:
484 |         paste_alpha_mask = F.gaussian_blur(
485 |             paste_alpha_mask.unsqueeze(0),
486 |             kernel_size=(5, 5),
487 |             sigma=[
488 |                 2.0,
489 |             ],
490 |         )
491 | 
492 |     # Copy-paste images:
493 |     image = (image * (~paste_alpha_mask)) + (paste_image * paste_alpha_mask)
494 | 
495 |     # Copy-paste masks:
496 |     masks = masks * (~paste_alpha_mask)
497 |     non_all_zero_masks = masks.sum((-1, -2)) > 0
498 |     masks = masks[non_all_zero_masks]
499 | 
500 |     # Do a shallow copy of the target dict
501 |     out_target = {k: v for k, v in target.items()}
502 | 
503 |     out_target["masks"] = torch.cat([masks, paste_masks])
504 | 
505 |     # Copy-paste boxes and labels
506 |     boxes = ops.masks_to_boxes(masks)
507 |     out_target["boxes"] = torch.cat([boxes, paste_boxes])
508 | 
509 |     labels = target["labels"][non_all_zero_masks]
510 |     out_target["labels"] = torch.cat([labels, paste_labels])
511 | 
512 |     # Update additional optional keys: area and iscrowd if exist
513 |     if "area" in target:
514 |         out_target["area"] = out_target["masks"].sum((-1, -2)).to(torch.float32)
515 | 
516 |     if "iscrowd" in target and "iscrowd" in paste_target:
517 |         # target['iscrowd'] size can be differ from mask size (non_all_zero_masks)
518 |         # For example, if previous transforms geometrically modifies masks/boxes/labels but
519 |         # does not update "iscrowd"
520 |         if len(target["iscrowd"]) == len(non_all_zero_masks):
521 |             iscrowd = target["iscrowd"][non_all_zero_masks]
522 |             paste_iscrowd = paste_target["iscrowd"][random_selection]
523 |             out_target["iscrowd"] = torch.cat([iscrowd, paste_iscrowd])
524 | 
525 |     # Check for degenerated boxes and remove them
526 |     boxes = out_target["boxes"]
527 |     degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
528 |     if degenerate_boxes.any():
529 |         valid_targets = ~degenerate_boxes.any(dim=1)
530 | 
531 |         out_target["boxes"] = boxes[valid_targets]
532 |         out_target["masks"] = out_target["masks"][valid_targets]
533 |         out_target["labels"] = out_target["labels"][valid_targets]
534 | 
535 |         if "area" in out_target:
536 |             out_target["area"] = out_target["area"][valid_targets]
537 |         if "iscrowd" in out_target and len(out_target["iscrowd"]) == len(valid_targets):
538 |             out_target["iscrowd"] = out_target["iscrowd"][valid_targets]
539 | 
540 |     return image, out_target
541 | 
542 | 
543 | class SimpleCopyPaste(torch.nn.Module):
544 |     def __init__(self, blending=True, resize_interpolation=F.InterpolationMode.BILINEAR):
545 |         super().__init__()
546 |         self.resize_interpolation = resize_interpolation
547 |         self.blending = blending
548 | 
549 |     def forward(
550 |         self, images: List[torch.Tensor], targets: List[Dict[str, Tensor]]
551 |     ) -> Tuple[List[torch.Tensor], List[Dict[str, Tensor]]]:
552 |         torch._assert(
553 |             isinstance(images, (list, tuple)) and all([isinstance(v, torch.Tensor) for v in images]),
554 |             "images should be a list of tensors",
555 |         )
556 |         torch._assert(
557 |             isinstance(targets, (list, tuple)) and len(images) == len(targets),
558 |             "targets should be a list of the same size as images",
559 |         )
560 |         for target in targets:
561 |             # Can not check for instance type dict with inside torch.jit.script
562 |             # torch._assert(isinstance(target, dict), "targets item should be a dict")
563 |             for k in ["masks", "boxes", "labels"]:
564 |                 torch._assert(k in target, f"Key {k} should be present in targets")
565 |                 torch._assert(isinstance(target[k], torch.Tensor), f"Value for the key {k} should be a tensor")
566 | 
567 |         # images = [t1, t2, ..., tN]
568 |         # Let's define paste_images as shifted list of input images
569 |         # paste_images = [t2, t3, ..., tN, t1]
570 |         # FYI: in TF they mix data on the dataset level
571 |         images_rolled = images[-1:] + images[:-1]
572 |         targets_rolled = targets[-1:] + targets[:-1]
573 | 
574 |         output_images: List[torch.Tensor] = []
575 |         output_targets: List[Dict[str, Tensor]] = []
576 | 
577 |         for image, target, paste_image, paste_target in zip(images, targets, images_rolled, targets_rolled):
578 |             output_image, output_data = _copy_paste(
579 |                 image,
580 |                 target,
581 |                 paste_image,
582 |                 paste_target,
583 |                 blending=self.blending,
584 |                 resize_interpolation=self.resize_interpolation,
585 |             )
586 |             output_images.append(output_image)
587 |             output_targets.append(output_data)
588 | 
589 |         return output_images, output_targets
590 | 
591 |     def __repr__(self) -> str:
592 |         s = f"{self.__class__.__name__}(blending={self.blending}, resize_interpolation={self.resize_interpolation})"
593 |         return s
594 | 


--------------------------------------------------------------------------------
/helmet/ml/detection/utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import errno
  3 | import os
  4 | import time
  5 | from collections import defaultdict, deque
  6 | 
  7 | import torch
  8 | import torch.distributed as dist
  9 | 
 10 | 
 11 | class SmoothedValue:
 12 |     """Track a series of values and provide access to smoothed values over a
 13 |     window or the global series average.
 14 |     """
 15 | 
 16 |     def __init__(self, window_size=20, fmt=None):
 17 |         if fmt is None:
 18 |             fmt = "{median:.4f} ({global_avg:.4f})"
 19 |         self.deque = deque(maxlen=window_size)
 20 |         self.total = 0.0
 21 |         self.count = 0
 22 |         self.fmt = fmt
 23 | 
 24 |     def update(self, value, n=1):
 25 |         self.deque.append(value)
 26 |         self.count += n
 27 |         self.total += value * n
 28 | 
 29 |     def synchronize_between_processes(self):
 30 |         """
 31 |         Warning: does not synchronize the deque!
 32 |         """
 33 |         if not is_dist_avail_and_initialized():
 34 |             return
 35 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
 36 |         dist.barrier()
 37 |         dist.all_reduce(t)
 38 |         t = t.tolist()
 39 |         self.count = int(t[0])
 40 |         self.total = t[1]
 41 | 
 42 |     @property
 43 |     def median(self):
 44 |         d = torch.tensor(list(self.deque))
 45 |         return d.median().item()
 46 | 
 47 |     @property
 48 |     def avg(self):
 49 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 50 |         return d.mean().item()
 51 | 
 52 |     @property
 53 |     def global_avg(self):
 54 |         return self.total / self.count
 55 | 
 56 |     @property
 57 |     def max(self):
 58 |         return max(self.deque)
 59 | 
 60 |     @property
 61 |     def value(self):
 62 |         return self.deque[-1]
 63 | 
 64 |     def __str__(self):
 65 |         return self.fmt.format(
 66 |             median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
 67 |         )
 68 | 
 69 | 
 70 | def all_gather(data):
 71 |     """
 72 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 73 |     Args:
 74 |         data: any picklable object
 75 |     Returns:
 76 |         list[data]: list of data gathered from each rank
 77 |     """
 78 |     world_size = get_world_size()
 79 |     if world_size == 1:
 80 |         return [data]
 81 |     data_list = [None] * world_size
 82 |     dist.all_gather_object(data_list, data)
 83 |     return data_list
 84 | 
 85 | 
 86 | def reduce_dict(input_dict, average=True):
 87 |     """
 88 |     Args:
 89 |         input_dict (dict): all the values will be reduced
 90 |         average (bool): whether to do average or sum
 91 |     Reduce the values in the dictionary from all processes so that all processes
 92 |     have the averaged results. Returns a dict with the same fields as
 93 |     input_dict, after reduction.
 94 |     """
 95 |     world_size = get_world_size()
 96 |     if world_size < 2:
 97 |         return input_dict
 98 |     with torch.inference_mode():
 99 |         names = []
100 |         values = []
101 |         # sort the keys so that they are consistent across processes
102 |         for k in sorted(input_dict.keys()):
103 |             names.append(k)
104 |             values.append(input_dict[k])
105 |         values = torch.stack(values, dim=0)
106 |         dist.all_reduce(values)
107 |         if average:
108 |             values /= world_size
109 |         reduced_dict = {k: v for k, v in zip(names, values)}
110 |     return reduced_dict
111 | 
112 | 
113 | class MetricLogger:
114 |     def __init__(self, delimiter="\t"):
115 |         self.meters = defaultdict(SmoothedValue)
116 |         self.delimiter = delimiter
117 | 
118 |     def update(self, **kwargs):
119 |         for k, v in kwargs.items():
120 |             if isinstance(v, torch.Tensor):
121 |                 v = v.item()
122 |             assert isinstance(v, (float, int))
123 |             self.meters[k].update(v)
124 | 
125 |     def __getattr__(self, attr):
126 |         if attr in self.meters:
127 |             return self.meters[attr]
128 |         if attr in self.__dict__:
129 |             return self.__dict__[attr]
130 |         raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
131 | 
132 |     def __str__(self):
133 |         loss_str = []
134 |         for name, meter in self.meters.items():
135 |             loss_str.append(f"{name}: {str(meter)}")
136 |         return self.delimiter.join(loss_str)
137 | 
138 |     def synchronize_between_processes(self):
139 |         for meter in self.meters.values():
140 |             meter.synchronize_between_processes()
141 | 
142 |     def add_meter(self, name, meter):
143 |         self.meters[name] = meter
144 | 
145 |     def log_every(self, iterable, print_freq, header=None):
146 |         i = 0
147 |         if not header:
148 |             header = ""
149 |         start_time = time.time()
150 |         end = time.time()
151 |         iter_time = SmoothedValue(fmt="{avg:.4f}")
152 |         data_time = SmoothedValue(fmt="{avg:.4f}")
153 |         space_fmt = ":" + str(len(str(len(iterable)))) + "d"
154 |         if torch.cuda.is_available():
155 |             log_msg = self.delimiter.join(
156 |                 [
157 |                     header,
158 |                     "[{0" + space_fmt + "}/{1}]",
159 |                     "eta: {eta}",
160 |                     "{meters}",
161 |                     "time: {time}",
162 |                     "data: {data}",
163 |                     "max mem: {memory:.0f}",
164 |                 ]
165 |             )
166 |         else:
167 |             log_msg = self.delimiter.join(
168 |                 [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
169 |             )
170 |         MB = 1024.0 * 1024.0
171 |         for obj in iterable:
172 |             data_time.update(time.time() - end)
173 |             yield obj
174 |             iter_time.update(time.time() - end)
175 |             if i % print_freq == 0 or i == len(iterable) - 1:
176 |                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
177 |                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
178 |                 if torch.cuda.is_available():
179 |                     print(
180 |                         log_msg.format(
181 |                             i,
182 |                             len(iterable),
183 |                             eta=eta_string,
184 |                             meters=str(self),
185 |                             time=str(iter_time),
186 |                             data=str(data_time),
187 |                             memory=torch.cuda.max_memory_allocated() / MB,
188 |                         )
189 |                     )
190 |                 else:
191 |                     print(
192 |                         log_msg.format(
193 |                             i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
194 |                         )
195 |                     )
196 |             i += 1
197 |             end = time.time()
198 |         total_time = time.time() - start_time
199 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
200 |         print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)")
201 | 
202 | 
203 | def collate_fn(batch):
204 |     return tuple(zip(*batch))
205 | 
206 | 
207 | def mkdir(path):
208 |     try:
209 |         os.makedirs(path)
210 |     except OSError as e:
211 |         if e.errno != errno.EEXIST:
212 |             raise
213 | 
214 | 
215 | def setup_for_distributed(is_master):
216 |     """
217 |     This function disables printing when not in master process
218 |     """
219 |     import builtins as __builtin__
220 | 
221 |     builtin_print = __builtin__.print
222 | 
223 |     def print(*args, **kwargs):
224 |         force = kwargs.pop("force", False)
225 |         if is_master or force:
226 |             builtin_print(*args, **kwargs)
227 | 
228 |     __builtin__.print = print
229 | 
230 | 
231 | def is_dist_avail_and_initialized():
232 |     if not dist.is_available():
233 |         return False
234 |     if not dist.is_initialized():
235 |         return False
236 |     return True
237 | 
238 | 
239 | def get_world_size():
240 |     if not is_dist_avail_and_initialized():
241 |         return 1
242 |     return dist.get_world_size()
243 | 
244 | 
245 | def get_rank():
246 |     if not is_dist_avail_and_initialized():
247 |         return 0
248 |     return dist.get_rank()
249 | 
250 | 
251 | def is_main_process():
252 |     return get_rank() == 0
253 | 
254 | 
255 | def save_on_master(*args, **kwargs):
256 |     if is_main_process():
257 |         torch.save(*args, **kwargs)
258 | 
259 | 
260 | def init_distributed_mode(args):
261 |     if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
262 |         args.rank = int(os.environ["RANK"])
263 |         args.world_size = int(os.environ["WORLD_SIZE"])
264 |         args.gpu = int(os.environ["LOCAL_RANK"])
265 |     elif "SLURM_PROCID" in os.environ:
266 |         args.rank = int(os.environ["SLURM_PROCID"])
267 |         args.gpu = args.rank % torch.cuda.device_count()
268 |     else:
269 |         print("Not using distributed mode")
270 |         args.distributed = False
271 |         return
272 | 
273 |     args.distributed = True
274 | 
275 |     torch.cuda.set_device(args.gpu)
276 |     args.dist_backend = "nccl"
277 |     print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
278 |     torch.distributed.init_process_group(
279 |         backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
280 |     )
281 |     torch.distributed.barrier()
282 |     setup_for_distributed(args.rank == 0)
283 | 


--------------------------------------------------------------------------------
/helmet/ml/feature/helmet_detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision import datasets
 3 | from pycocotools.coco import COCO
 4 | from helmet.constants import ANNOTATIONS_COCO_JSON_FILE
 5 | from helmet.exception import HelmetException
 6 | import cv2
 7 | import os
 8 | import sys
 9 | import copy
10 | 
11 | 
12 | class HelmetDetection(datasets.VisionDataset):
13 | 
14 |     def __init__(self, root, split='train', transform=None, target_transform=None, transforms=None):
15 |         # the 3 transform parameters are required for datasets.VisionDataset
16 |         super().__init__(root, transforms, transform, target_transform)
17 |         self.split = split #train, valid, test
18 |         self.coco = COCO(os.path.join(root, split, ANNOTATIONS_COCO_JSON_FILE)) # annotation stored here
19 |         self.ids = list(sorted(self.coco.imgs.keys()))
20 |         self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
21 |     
22 |     def _load_image(self, id: int):
23 |         try:
24 |             path = self.coco.loadImgs(id)[0]['file_name']
25 |             image = cv2.imread(os.path.join(self.root, self.split, path))
26 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
27 |             return image
28 |         except Exception as e:
29 |             raise HelmetException(e, sys) from e
30 | 
31 |     def _load_target(self, id):
32 |         try:
33 |             return self.coco.loadAnns(self.coco.getAnnIds(id))
34 |         except Exception as e:
35 |             raise HelmetException(e, sys) from e
36 |     
37 |     def __getitem__(self, index):
38 |         try:
39 |             id = self.ids[index]
40 |             image = self._load_image(id)
41 |             target = self._load_target(id)
42 |             target = copy.deepcopy(self._load_target(id))
43 |             
44 |             boxes = [t['bbox'] + [t['category_id']] for t in target]  # required annotation format for albumentations
45 |             if self.transforms is not None:
46 |                 transformed = self.transforms(image=image, bboxes=boxes)
47 |             
48 |             image = transformed['image']
49 |             boxes = transformed['bboxes']
50 |             
51 |             new_boxes = [] # convert from xywh to xyxy
52 |             for box in boxes:
53 |                 xmin = box[0]
54 |                 xmax = xmin + box[2]
55 |                 ymin = box[1]
56 |                 ymax = ymin + box[3]
57 |                 new_boxes.append([xmin, ymin, xmax, ymax])
58 |         
59 |             boxes = torch.tensor(new_boxes, dtype=torch.float32)
60 |             
61 |             targ = {} # here is our transformed target
62 |             targ['boxes'] = boxes
63 |             targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
64 |             targ['image_id'] = torch.tensor([t['image_id'] for t in target])
65 |             targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
66 |             targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
67 |             return image.div(255), targ # scale images
68 |         
69 |         except Exception as e:
70 |             raise HelmetException(e, sys) from e
71 | 
72 |     def __len__(self):
73 |         try:
74 |             return len(self.ids)
75 | 
76 |         except Exception as e:
77 |             raise HelmetException(e, sys) from e
78 | 


--------------------------------------------------------------------------------
/helmet/ml/models/model_optimiser.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | 
4 | def model_optimiser(model):
5 |         params = [p for p in model.parameters() if p.requires_grad]
6 |         optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)
7 |         return optimizer
8 | 


--------------------------------------------------------------------------------
/helmet/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/pipeline/__init__.py


--------------------------------------------------------------------------------
/helmet/pipeline/prediction_pipeline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import io
 3 | import sys
 4 | from PIL import Image
 5 | import base64
 6 | from io import BytesIO
 7 | from torchvision import transforms
 8 | from torchvision.utils import draw_bounding_boxes
 9 | from helmet.exception import HelmetException
10 | from helmet.logger import logging
11 | from helmet.configuration.s3_operations import S3Operation
12 | from helmet.constants import *
13 | 
14 | 
15 | class PredictionPipeline:
16 |     def __init__(self):
17 |         self.s3 = S3Operation()
18 |         self.bucket_name = BUCKET_NAME
19 | 
20 |     def image_loader(self, image_bytes):
21 |         """load image, returns cuda tensor"""
22 |         logging.info("Entered the image_loader method of PredictionPipeline class")
23 |         try:
24 |             # image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
25 |             image = Image.open(io.BytesIO(image_bytes))
26 |             convert_tensor = transforms.ToTensor()
27 |             tensor_image = convert_tensor(image)
28 |             # image = image[:3]
29 |             image_int = torch.tensor(tensor_image * 255, dtype=torch.uint8)
30 |             logging.info("Exited the image_loader method of PredictionPipeline class")
31 |             return tensor_image, image_int
32 | 
33 |         except Exception as e:
34 |             raise HelmetException(e, sys) from e
35 | 
36 |     
37 |     def get_model_from_s3(self) -> str:
38 |         """
39 |         Method Name :   predict
40 |         Description :   This method predicts the image.
41 | 
42 |         Output      :   Predictions
43 |         """
44 |         logging.info("Entered the get_model_from_s3 method of PredictionPipeline class")
45 |         try:
46 |             # Loading the best model from s3 bucket
47 |             os.makedirs("artifacts/PredictModel", exist_ok=True)
48 |             predict_model_path = os.path.join(os.getcwd(), "artifacts", "PredictModel", TRAINED_MODEL_NAME)
49 |             best_model_path = self.s3.read_data_from_s3(TRAINED_MODEL_NAME, self.bucket_name, predict_model_path)
50 |             logging.info("Exited the get_model_from_s3 method of PredictionPipeline class")
51 |             return best_model_path
52 | 
53 |         except Exception as e:
54 |             raise HelmetException(e, sys) from e
55 | 
56 | 
57 |     
58 |     def prediction(self, best_model_path: str, image_tensor, image_int_tensor) -> float:
59 |         logging.info("Entered the prediction method of PredictionPipeline class")
60 |         try:
61 |             model = torch.load(best_model_path, map_location=torch.device(DEVICE))
62 |             model.eval()
63 |             with torch.no_grad():
64 |                 prediction = model([image_tensor.to(DEVICE)])
65 |                 pred = prediction[0]
66 | 
67 |             bbox_tensor = draw_bounding_boxes(image_int_tensor,
68 |                                 pred['boxes'][pred['scores'] > 0.8],
69 |                                 [PREDICTION_CLASSES[i] for i in pred['labels'][pred['scores'] > 0.8].tolist()],
70 |                                 width=4).permute(0, 2, 1)
71 | 
72 |             transform = transforms.ToPILImage()
73 |             img = transform(bbox_tensor)
74 |             buffered = BytesIO()
75 |             img.save(buffered, format="JPEG")
76 |             img_str = base64.b64encode(buffered.getvalue())
77 | 
78 |             logging.info("Exited the prediction method of PredictionPipeline class")
79 |             return img_str
80 | 
81 |         except Exception as e:
82 |             raise HelmetException(e, sys) from e
83 | 
84 | 
85 |     
86 |     
87 |     def run_pipeline(self, data):
88 |         logging.info("Entered the run_pipeline method of PredictionPipeline class")
89 |         try:
90 |             image, image_int = self.image_loader(data)
91 |             print(image.shape)
92 |             print(image_int.shape)
93 |             best_model_path: str = self.get_model_from_s3()
94 |             detected_image = self.prediction(best_model_path, image, image_int)
95 |             logging.info("Exited the run_pipeline method of PredictionPipeline class")
96 |             return detected_image
97 |         except Exception as e:
98 |             raise HelmetException(e, sys) from e
99 | 


--------------------------------------------------------------------------------
/helmet/pipeline/train_pipeline.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from helmet.components.data_ingestion import DataIngestion
  3 | from helmet.components.data_transformation import DataTransformation
  4 | from helmet.components.model_trainer import ModelTrainer
  5 | from helmet.components.model_evaluation import ModelEvaluation
  6 | from helmet.components.model_pusher import ModelPusher
  7 | from helmet.configuration.s3_operations import S3Operation
  8 | from helmet.entity.config_entity import DataIngestionConfig,DataTransformationConfig, ModelTrainerConfig,ModelEvaluationConfig, ModelPusherConfig
  9 | from helmet.entity.artifacts_entity import DataIngestionArtifacts, DataTransformationArtifacts, ModelTrainerArtifacts, ModelEvaluationArtifacts,ModelPusherArtifacts 
 10 | from helmet.logger import logging
 11 | from helmet.exception import HelmetException
 12 | 
 13 | 
 14 | 
 15 | 
 16 | class TrainPipeline:
 17 |     def __init__(self):
 18 |         self.data_ingestion_config = DataIngestionConfig()
 19 |         self.data_transformation_config = DataTransformationConfig()
 20 |         self.model_trainer_config = ModelTrainerConfig()
 21 |         self.model_evaluation_config = ModelEvaluationConfig()
 22 |         self.model_pusher_config = ModelPusherConfig()
 23 |         self.s3_operations = S3Operation()
 24 | 
 25 | 
 26 |     
 27 |     def start_data_ingestion(self) -> DataIngestionArtifacts:
 28 |         logging.info("Entered the start_data_ingestion method of TrainPipeline class")
 29 |         try:
 30 |             logging.info("Getting the data from S3 bucket")
 31 |             data_ingestion = DataIngestion(
 32 |                 data_ingestion_config=self.data_ingestion_config, s3_operations= S3Operation()
 33 |             )
 34 |             data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
 35 |             logging.info("Got the train, test and valid from s3")
 36 |             logging.info("Exited the start_data_ingestion method of TrainPipeline class")
 37 |             return data_ingestion_artifact
 38 | 
 39 |         except Exception as e:
 40 |             raise HelmetException(e, sys) from e
 41 | 
 42 | 
 43 |     
 44 |     def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifacts,) -> DataTransformationArtifacts:
 45 |         logging.info(
 46 |             "Entered the start_data_transformation method of TrainPipeline class"
 47 |         )
 48 |         try:
 49 |             data_transformation = DataTransformation(
 50 |                 
 51 |                 data_ingestion_artifact=data_ingestion_artifact,
 52 |                 data_transformation_config=self.data_transformation_config,
 53 |             )
 54 |             data_transformation_artifact = (
 55 |                 data_transformation.initiate_data_transformation()
 56 |             )
 57 |             logging.info(
 58 |                 "Exited the start_data_transformation method of TrainPipeline class"
 59 |             )
 60 |             return data_transformation_artifact
 61 | 
 62 |         except Exception as e:
 63 |             raise HelmetException(e, sys) from e
 64 | 
 65 |     
 66 |     def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifacts) -> ModelTrainerArtifacts:
 67 |         logging.info(
 68 |             "Entered the start_model_trainer method of TrainPipeline class"
 69 |         )
 70 |         try:
 71 |             model_trainer = ModelTrainer(data_transformation_artifacts=data_transformation_artifact,
 72 |                                         model_trainer_config=self.model_trainer_config
 73 |                                         )
 74 |             model_trainer_artifact = model_trainer.initiate_model_trainer()
 75 |             logging.info("Exited the start_model_trainer method of TrainPipeline class")
 76 |             return model_trainer_artifact
 77 | 
 78 |         except Exception as e:
 79 |             raise HelmetException(e, sys)
 80 | 
 81 |     
 82 |     def start_model_evaluation(self, model_trainer_artifact: ModelTrainerArtifacts, data_transformation_artifact: DataTransformationArtifacts) -> ModelEvaluationArtifacts:
 83 |         logging.info("Entered the start_model_evaluation method of TrainPipeline class")
 84 |         try:
 85 |             model_evaluation = ModelEvaluation(data_transformation_artifacts = data_transformation_artifact,
 86 |                                                 model_evaluation_config=self.model_evaluation_config,
 87 |                                                 model_trainer_artifacts=model_trainer_artifact)
 88 | 
 89 |             model_evaluation_artifact = model_evaluation.initiate_model_evaluation()
 90 |             logging.info("Exited the start_model_evaluation method of TrainPipeline class")
 91 |             return model_evaluation_artifact
 92 | 
 93 |         except Exception as e:
 94 |             raise HelmetException(e, sys) from e
 95 | 
 96 | 
 97 |     
 98 |     def start_model_pusher(self,s3: S3Operation,) -> ModelPusherArtifacts:
 99 |         logging.info("Entered the start_model_pusher method of TrainPipeline class")
100 |         try:
101 |             model_pusher = ModelPusher(
102 |                 model_pusher_config=self.model_pusher_config,
103 |                 s3=s3,
104 |             )
105 |             model_pusher_artifact = model_pusher.initiate_model_pusher()
106 |             logging.info("Initiated the model pusher")
107 |             logging.info("Exited the start_model_pusher method of TrainPipeline class")
108 |             return model_pusher_artifact
109 | 
110 |         except Exception as e:
111 |             raise HelmetException(e, sys) from e
112 | 
113 | 
114 |     
115 |     def run_pipeline(self) -> None:
116 |         logging.info("Entered the run_pipeline method of TrainPipeline class")
117 |         try:
118 |             data_ingestion_artifact = self.start_data_ingestion()
119 |             data_transformation_artifact = self.start_data_transformation(
120 |                 data_ingestion_artifact=data_ingestion_artifact
121 |             )
122 |             model_trainer_artifact = self.start_model_trainer(
123 |                 data_transformation_artifact=data_transformation_artifact
124 |             )
125 |             model_evaluation_artifact = self.start_model_evaluation(model_trainer_artifact=model_trainer_artifact,
126 |                                                                     data_transformation_artifact=data_transformation_artifact
127 |             )
128 |             if not model_evaluation_artifact.is_model_accepted:
129 |                  raise Exception("Trained model is not better than the best model")
130 |             
131 |             model_pusher_artifact = self.start_model_pusher(s3=self.s3_operations)
132 |             
133 |             logging.info("Exited the run_pipeline method of TrainPipeline class")
134 | 
135 |         except Exception as e:
136 |             raise HelmetException(e, sys) from e


--------------------------------------------------------------------------------
/helmet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/utils/__init__.py


--------------------------------------------------------------------------------
/helmet/utils/main_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import sys
 4 | import dill
 5 | import base64
 6 | from helmet.logger import logging
 7 | from helmet.exception import HelmetException
 8 | 
 9 | 
10 | def save_object(file_path: str, obj: object) -> None:
11 |     logging.info("Entered the save_object method of utils")
12 | 
13 |     try:
14 |         os.makedirs(os.path.dirname(file_path), exist_ok=True)
15 |         with open(file_path, "wb") as file_obj:
16 |             dill.dump(obj, file_obj)
17 | 
18 |         logging.info("Exited the save_object method of utils")
19 | 
20 |     except Exception as e:
21 |         raise HelmetException(e, sys) from e
22 | 
23 | 
24 | def load_object(file_path: str) -> object:
25 |     logging.info("Entered the load_object method of utils")
26 | 
27 |     try:
28 | 
29 |         with open(file_path, "rb") as file_obj:
30 |             obj = dill.load(file_obj)
31 | 
32 |         logging.info("Exited the load_object method of utils")
33 | 
34 |         return obj
35 | 
36 |     except Exception as e:
37 |         raise HelmetException(e, sys) from e
38 | 
39 | 
40 | def image_to_base64(image):
41 |     with open(image, "rb") as img_file:
42 |         my_string = base64.b64encode(img_file.read())
43 | 
44 |     return my_string
45 | 
46 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | notebook
 3 | pandas
 4 | matplotlib
 5 | opencv-python
 6 | albumentations
 7 | ipykernel
 8 | tqdm
 9 | pycocotools   #Comment when creating Docker Image
10 | cython
11 | from-root
12 | boto3
13 | mypy-boto3-s3
14 | Pillow
15 | torch-summary
16 | fastapi
17 | uvicorn
18 | Jinja2
19 | python-multipart
20 | PyYAML
21 | dill==0.3.5.1
22 | utils
23 | torch>=1.7.0  # see https://pytorch.org/get-started/locally/ (recommended)
24 | torchvision>=0.8.1
25 | -e .


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="helmet",
 5 |     version="0.0.1",
 6 |     author="Bappy",
 7 |     author_email="entbappy73@gmail.com",
 8 |     packages=find_packages(),
 9 |     install_requires=[],
10 | )
11 | 


--------------------------------------------------------------------------------
/tools/cmd.txt:
--------------------------------------------------------------------------------
1 | coco convertion cmd:
2 | python voc2coco.py annotations output.json
3 | 


--------------------------------------------------------------------------------
/tools/voc2coco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # pip install lxml
  4 | 
  5 | import sys
  6 | import os
  7 | import json
  8 | import xml.etree.ElementTree as ET
  9 | import glob
 10 | 
 11 | START_BOUNDING_BOX_ID = 1
 12 | PRE_DEFINE_CATEGORIES = None
 13 | # If necessary, pre-define category and its id
 14 | #  PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
 15 | #  "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
 16 | #  "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
 17 | #  "motorbike": 14, "person": 15, "pottedplant": 16,
 18 | #  "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}
 19 | 
 20 | 
 21 | def get(root, name):
 22 |     vars = root.findall(name)
 23 |     return vars
 24 | 
 25 | 
 26 | def get_and_check(root, name, length):
 27 |     vars = root.findall(name)
 28 |     if len(vars) == 0:
 29 |         raise ValueError("Can not find %s in %s." % (name, root.tag))
 30 |     if length > 0 and len(vars) != length:
 31 |         raise ValueError(
 32 |             "The size of %s is supposed to be %d, but is %d."
 33 |             % (name, length, len(vars))
 34 |         )
 35 |     if length == 1:
 36 |         vars = vars[0]
 37 |     return vars
 38 | 
 39 | 
 40 | def get_filename_as_int(filename):
 41 |     try:
 42 |         filename = filename.replace("\\", "/")
 43 |         filename = os.path.splitext(os.path.basename(filename))[0]
 44 |         return str(filename)
 45 |     except:
 46 |         raise ValueError("Filename %s is supposed to be an integer." % (filename))
 47 | 
 48 | 
 49 | def get_categories(xml_files):
 50 |     """Generate category name to id mapping from a list of xml files.
 51 |     
 52 |     Arguments:
 53 |         xml_files {list} -- A list of xml file paths.
 54 |     
 55 |     Returns:
 56 |         dict -- category name to id mapping.
 57 |     """
 58 |     classes_names = []
 59 |     for xml_file in xml_files:
 60 |         tree = ET.parse(xml_file)
 61 |         root = tree.getroot()
 62 |         for member in root.findall("object"):
 63 |             classes_names.append(member[0].text)
 64 |     classes_names = list(set(classes_names))
 65 |     classes_names.sort()
 66 |     return {name: i for i, name in enumerate(classes_names)}
 67 | 
 68 | 
 69 | def convert(xml_files, json_file):
 70 |     json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
 71 |     if PRE_DEFINE_CATEGORIES is not None:
 72 |         categories = PRE_DEFINE_CATEGORIES
 73 |     else:
 74 |         categories = get_categories(xml_files)
 75 |     bnd_id = START_BOUNDING_BOX_ID
 76 |     for xml_file in xml_files:
 77 |         tree = ET.parse(xml_file)
 78 |         root = tree.getroot()
 79 |         path = get(root, "path")
 80 |         if len(path) == 1:
 81 |             filename = os.path.basename(path[0].text)
 82 |         elif len(path) == 0:
 83 |             filename = get_and_check(root, "filename", 1).text
 84 |         else:
 85 |             raise ValueError("%d paths found in %s" % (len(path), xml_file))
 86 |         ## The filename must be a number
 87 |         image_id = get_filename_as_int(filename)
 88 |         size = get_and_check(root, "size", 1)
 89 |         width = int(get_and_check(size, "width", 1).text)
 90 |         height = int(get_and_check(size, "height", 1).text)
 91 |         image = {
 92 |             "file_name": filename,
 93 |             "height": height,
 94 |             "width": width,
 95 |             "id": image_id,
 96 |         }
 97 |         json_dict["images"].append(image)
 98 |         ## Currently we do not support segmentation.
 99 |         #  segmented = get_and_check(root, 'segmented', 1).text
100 |         #  assert segmented == '0'
101 |         for obj in get(root, "object"):
102 |             category = get_and_check(obj, "name", 1).text
103 |             if category not in categories:
104 |                 new_id = len(categories)
105 |                 categories[category] = new_id
106 |             category_id = categories[category]
107 |             bndbox = get_and_check(obj, "bndbox", 1)
108 |             xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1
109 |             ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1
110 |             xmax = int(get_and_check(bndbox, "xmax", 1).text)
111 |             ymax = int(get_and_check(bndbox, "ymax", 1).text)
112 |             assert xmax > xmin
113 |             assert ymax > ymin
114 |             o_width = abs(xmax - xmin)
115 |             o_height = abs(ymax - ymin)
116 |             ann = {
117 |                 "area": o_width * o_height,
118 |                 "iscrowd": 0,
119 |                 "image_id": image_id,
120 |                 "bbox": [xmin, ymin, o_width, o_height],
121 |                 "category_id": category_id,
122 |                 "id": bnd_id,
123 |                 "ignore": 0,
124 |                 "segmentation": [],
125 |             }
126 |             json_dict["annotations"].append(ann)
127 |             bnd_id = bnd_id + 1
128 | 
129 |     for cate, cid in categories.items():
130 |         cat = {"supercategory": "none", "id": cid, "name": cate}
131 |         json_dict["categories"].append(cat)
132 | 
133 |     #os.makedirs(os.path.dirname(json_file), exist_ok=True)
134 |     json_fp = open(json_file, "w")
135 |     json_str = json.dumps(json_dict)
136 |     json_fp.write(json_str)
137 |     json_fp.close()
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     import argparse
142 | 
143 |     parser = argparse.ArgumentParser(
144 |         description="Convert Pascal VOC annotation to COCO format."
145 |     )
146 |     parser.add_argument("xml_dir", help="Directory path to xml files.", type=str)
147 |     parser.add_argument("json_file", help="Output COCO format json file.", type=str)
148 |     args = parser.parse_args()
149 |     xml_files = glob.glob(os.path.join(args.xml_dir, "*.xml"))
150 | 
151 |     # If you want to do train/test split, you can pass a subset of xml files to convert function.
152 |     print("Number of xml files: {}".format(len(xml_files)))
153 |     convert(xml_files, args.json_file)
154 |     print("Success: {}".format(args.json_file))
155 | 


--------------------------------------------------------------------------------