├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py ├── docker-compose.yml ├── flowcharts ├── 1_Folder Structure.png ├── 2_Training Pipeline.png ├── 3_Data Ingestion.png ├── 4_Data transformation.png ├── 5_Model trainer.png ├── 6_Model Evaluation.png ├── 7_Model pusher.png └── 8_Prediction pipeline.png ├── helmet ├── __init__.py ├── components │ ├── __init__.py │ ├── data_ingestion.py │ ├── data_transformation.py │ ├── model_evaluation.py │ ├── model_pusher.py │ └── model_trainer.py ├── configuration │ ├── __init__.py │ └── s3_operations.py ├── constants │ └── __init__.py ├── entity │ ├── __init__.py │ ├── artifacts_entity.py │ └── config_entity.py ├── exception │ └── __init__.py ├── logger │ └── __init__.py ├── ml │ ├── __init__.py │ ├── detection │ │ ├── README.md │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── engine.py │ │ ├── group_by_aspect_ratio.py │ │ ├── presets.py │ │ ├── train.py │ │ ├── transforms.py │ │ └── utils.py │ ├── feature │ │ └── helmet_detection.py │ └── models │ │ └── model_optimiser.py ├── pipeline │ ├── __init__.py │ ├── prediction_pipeline.py │ └── train_pipeline.py └── utils │ ├── __init__.py │ └── main_utils.py ├── notebooks └── Experiment.ipynb ├── requirements.txt ├── setup.py └── tools ├── cmd.txt └── voc2coco.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | data/* 131 | artifacts/* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | COPY . /helmet 3 | WORKDIR /helmet 4 | RUN pip install --upgrade pip 5 | RUN apt-get update 6 | RUN apt-get install ffmpeg libsm6 libxext6 -y 7 | RUN pip install -r requirements.txt 8 | RUN conda install -c conda-forge pycocotools 9 | #RUN pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu 10 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 11 | RUN pip install -e . 12 | CMD ["python","app.py"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 BAPPY AHMED 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Helmet-Detection-PyTorch 2 | 3 | ### Donwload data from here: 4 | 5 | https://drive.google.com/file/d/1oYBdYcQKPGPfqj7n4is-10k17vL6Cmlp/view?usp=sharing 6 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, File 2 | from uvicorn import run as app_run 3 | from fastapi.middleware.cors import CORSMiddleware 4 | from fastapi.responses import Response, JSONResponse 5 | from helmet.constants import APP_HOST, APP_PORT 6 | from helmet.pipeline.train_pipeline import TrainPipeline 7 | from helmet.pipeline.prediction_pipeline import PredictionPipeline 8 | 9 | 10 | app = FastAPI() 11 | 12 | origins = ["*"] 13 | 14 | app.add_middleware( 15 | CORSMiddleware, 16 | allow_origins=origins, 17 | allow_credentials=True, 18 | allow_methods=["*"], 19 | allow_headers=["*"], 20 | ) 21 | 22 | 23 | @app.get("/train") 24 | async def training(): 25 | try: 26 | train_pipeline = TrainPipeline() 27 | 28 | train_pipeline.run_pipeline() 29 | 30 | return Response("Training successful !!") 31 | 32 | except Exception as e: 33 | return Response(f"Error Occurred! {e}") 34 | 35 | 36 | @app.post("/predict") 37 | async def prediction(image_file: bytes = File(description="A file read as bytes")): 38 | try: 39 | prediction_pipeline = PredictionPipeline() 40 | final_output = prediction_pipeline.run_pipeline(image_file) 41 | # print(final_output) 42 | # return JSONResponse(content= final_output, status_code=200) 43 | return final_output 44 | except Exception as e: 45 | return JSONResponse(content=f"Error Occurred! {e}", status_code=500) 46 | 47 | 48 | if __name__ == "__main__": 49 | app_run(app, host=APP_HOST, port=APP_PORT) -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "1" 2 | services: 3 | web: 4 | image: ${IMAGE_NAME} 5 | ports: 6 | - "80:8080" 7 | environment: 8 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 9 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 10 | - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} -------------------------------------------------------------------------------- /flowcharts/1_Folder Structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/1_Folder Structure.png -------------------------------------------------------------------------------- /flowcharts/2_Training Pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/2_Training Pipeline.png -------------------------------------------------------------------------------- /flowcharts/3_Data Ingestion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/3_Data Ingestion.png -------------------------------------------------------------------------------- /flowcharts/4_Data transformation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/4_Data transformation.png -------------------------------------------------------------------------------- /flowcharts/5_Model trainer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/5_Model trainer.png -------------------------------------------------------------------------------- /flowcharts/6_Model Evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/6_Model Evaluation.png -------------------------------------------------------------------------------- /flowcharts/7_Model pusher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/7_Model pusher.png -------------------------------------------------------------------------------- /flowcharts/8_Prediction pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/flowcharts/8_Prediction pipeline.png -------------------------------------------------------------------------------- /helmet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/__init__.py -------------------------------------------------------------------------------- /helmet/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/components/__init__.py -------------------------------------------------------------------------------- /helmet/components/data_ingestion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from zipfile import ZipFile 4 | from helmet.entity.config_entity import DataIngestionConfig 5 | from helmet.entity.artifacts_entity import DataIngestionArtifacts 6 | from helmet.configuration.s3_operations import S3Operation 7 | from helmet.exception import HelmetException 8 | from helmet.logger import logging 9 | from helmet.constants import * 10 | 11 | 12 | class DataIngestion: 13 | def __init__(self, data_ingestion_config: DataIngestionConfig, s3_operations: S3Operation): 14 | self.data_ingestion_config = data_ingestion_config 15 | self.s3_operations = s3_operations 16 | 17 | 18 | def get_data_from_s3(self) -> None: 19 | try: 20 | logging.info("Entered the get_data_from_s3 method of Data ingestion class") 21 | os.makedirs(self.data_ingestion_config.DATA_INGESTION_ARTIFACTS_DIR, exist_ok=True) 22 | 23 | self.s3_operations.read_data_from_s3(self.data_ingestion_config.ZIP_FILE_NAME, 24 | self.data_ingestion_config.BUCKET_NAME, 25 | self.data_ingestion_config.ZIP_FILE_PATH) 26 | logging.info("Exited the get_data_from_s3 method of Data ingestion class") 27 | except Exception as e: 28 | raise HelmetException(e, sys) from e 29 | 30 | 31 | def unzip_and_clean(self): 32 | logging.info("Entered the unzip_and_clean method of Data ingestion class") 33 | try: 34 | with ZipFile(self.data_ingestion_config.ZIP_FILE_PATH, 'r') as zip_ref: 35 | zip_ref.extractall(self.data_ingestion_config.ZIP_FILE_DIR) 36 | logging.info("Exited the unzip_and_clean method of Data ingestion class") 37 | 38 | return self.data_ingestion_config.TRAIN_DATA_ARTIFACT_DIR, self.data_ingestion_config.TEST_DATA_ARTIFACT_DIR, self.data_ingestion_config.VALID_DATA_ARTIFACT_DIR 39 | except Exception as e: 40 | raise HelmetException(e, sys) from e 41 | 42 | 43 | 44 | def initiate_data_ingestion(self) -> DataIngestionArtifacts: 45 | logging.info("Entered the initiate_data_ingestion method of Data ingestion class") 46 | try: 47 | self.get_data_from_s3() 48 | 49 | logging.info("Fetched the data from S3 bucket") 50 | 51 | train_file_path, test_file_path, valid_file_path= self.unzip_and_clean() 52 | 53 | logging.info("Unzipped file and splited into train, test and valid") 54 | 55 | data_ingestion_artifact = DataIngestionArtifacts(train_file_path=train_file_path, 56 | test_file_path=test_file_path, 57 | valid_file_path=valid_file_path) 58 | 59 | logging.info("Exited the initiate_data_ingestion method of Data ingestion class") 60 | 61 | logging.info(f"Data ingestion artifact: {data_ingestion_artifact}") 62 | 63 | return data_ingestion_artifact 64 | 65 | except Exception as e: 66 | raise HelmetException(e, sys) from e -------------------------------------------------------------------------------- /helmet/components/data_transformation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pycocotools.coco import COCO 4 | import albumentations as A 5 | from albumentations.pytorch import ToTensorV2 6 | from helmet.logger import logging 7 | from helmet.exception import HelmetException 8 | from helmet.ml.feature.helmet_detection import HelmetDetection 9 | from helmet.constants import * 10 | from helmet.utils.main_utils import save_object 11 | from helmet.entity.config_entity import DataTransformationConfig 12 | from helmet.entity.artifacts_entity import DataIngestionArtifacts, DataTransformationArtifacts 13 | 14 | 15 | class DataTransformation: 16 | def __init__(self, data_transformation_config: DataTransformationConfig, data_ingestion_artifact: DataIngestionArtifacts): 17 | self.data_transformation_config = data_transformation_config 18 | self.data_ingestion_artifact = data_ingestion_artifact 19 | 20 | 21 | 22 | def number_of_classes(self): 23 | 24 | try: 25 | 26 | coco = COCO(os.path.join(self.data_ingestion_artifact.train_file_path, ANNOTATIONS_COCO_JSON_FILE)) 27 | categories = coco.cats 28 | classes = [i[1]['name'] for i in categories.items()] 29 | n_classes = len(classes) 30 | 31 | return n_classes 32 | except Exception as e: 33 | raise HelmetException(e, sys) from e 34 | 35 | 36 | 37 | def get_transforms(self, train=False): 38 | try: 39 | if train: 40 | transform = A.Compose([ 41 | A.Resize(INPUT_SIZE, INPUT_SIZE), 42 | A.HorizontalFlip(p=HORIZONTAL_FLIP), 43 | A.VerticalFlip(p=VERTICAL_FLIP), 44 | A.RandomBrightnessContrast(p=RANDOM_BRIGHTNESS_CONTRAST), 45 | A.ColorJitter(p=COLOR_JITTER), 46 | ToTensorV2() 47 | ], bbox_params=A.BboxParams(format=BBOX_FORMAT)) 48 | else: 49 | transform = A.Compose([ 50 | A.Resize(INPUT_SIZE, INPUT_SIZE), 51 | ToTensorV2() 52 | ], bbox_params=A.BboxParams(format=BBOX_FORMAT)) 53 | return transform 54 | except Exception as e: 55 | raise HelmetException(e, sys) from e 56 | 57 | 58 | 59 | def initiate_data_transformation(self) -> DataTransformationArtifacts: 60 | 61 | try: 62 | logging.info("Entered the initiate_data_transformation method of Data transformation class") 63 | 64 | n_classes = self.number_of_classes() 65 | print(n_classes) 66 | 67 | logging.info(f"Total number of classes: {n_classes}") 68 | 69 | train_dataset = HelmetDetection(root=self.data_transformation_config.ROOT_DIR, 70 | split=self.data_transformation_config.TRAIN_SPLIT, 71 | transforms=self.get_transforms(True)) 72 | 73 | logging.info(f"Training dataset prepared") 74 | 75 | test_dataset = HelmetDetection(root=self.data_transformation_config.ROOT_DIR, 76 | split=self.data_transformation_config.TEST_SPLIT, 77 | transforms=self.get_transforms(False)) 78 | 79 | logging.info(f"Testing dataset prepared") 80 | 81 | save_object(self.data_transformation_config.TRAIN_TRANSFORM_OBJECT_FILE_PATH, train_dataset) 82 | save_object(self.data_transformation_config.TEST_TRANSFORM_OBJECT_FILE_PATH, test_dataset) 83 | 84 | logging.info("Saved the train transformed object") 85 | 86 | data_transformation_artifact = DataTransformationArtifacts( 87 | transformed_train_object=self.data_transformation_config.TRAIN_TRANSFORM_OBJECT_FILE_PATH, 88 | transformed_test_object=self.data_transformation_config.TEST_TRANSFORM_OBJECT_FILE_PATH, 89 | number_of_classes=n_classes) 90 | 91 | logging.info("Exited the initiate_data_transformation method of Data transformation class") 92 | 93 | return data_transformation_artifact 94 | 95 | except Exception as e: 96 | raise HelmetException(e, sys) from e 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /helmet/components/model_evaluation.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import torch 5 | import math 6 | import pandas as pd 7 | import numpy as np 8 | from tqdm import tqdm 9 | from torch.utils.data import DataLoader 10 | from helmet.constants import * 11 | from helmet.logger import logging 12 | from helmet.exception import HelmetException 13 | from helmet.utils.main_utils import load_object 14 | from helmet.entity.config_entity import ModelEvaluationConfig 15 | from helmet.configuration.s3_operations import S3Operation 16 | from helmet.entity.artifacts_entity import ModelTrainerArtifacts, DataTransformationArtifacts, ModelEvaluationArtifacts 17 | 18 | 19 | class ModelEvaluation: 20 | 21 | def __init__(self, model_evaluation_config:ModelEvaluationConfig, 22 | data_transformation_artifacts:DataTransformationArtifacts, 23 | model_trainer_artifacts:ModelTrainerArtifacts): 24 | 25 | self.model_evaluation_config = model_evaluation_config 26 | self.data_transformation_artifacts = data_transformation_artifacts 27 | self.model_trainer_artifacts = model_trainer_artifacts 28 | self.s3 = S3Operation() 29 | self.bucket_name = BUCKET_NAME 30 | 31 | 32 | 33 | @staticmethod 34 | def collate_fn(batch): 35 | """ 36 | This is our collating function for the train dataloader, 37 | it allows us to create batches of data that can be easily pass into the model 38 | """ 39 | try: 40 | return tuple(zip(*batch)) 41 | except Exception as e: 42 | raise HelmetException(e, sys) from e 43 | 44 | 45 | def get_model_from_s3(self) -> str: 46 | """ 47 | Method Name : predict 48 | Description : This method predicts the image. 49 | 50 | Output : Predictions 51 | """ 52 | logging.info("Entered the get_model_from_s3 method of PredictionPipeline class") 53 | try: 54 | # Loading the best model from s3 bucket 55 | predict_model_path = self.model_evaluation_config.BEST_MODEL_PATH 56 | best_model_path = self.s3.read_data_from_s3(TRAINED_MODEL_NAME, self.bucket_name, predict_model_path) 57 | logging.info("Exited the get_model_from_s3 method of PredictionPipeline class") 58 | return best_model_path 59 | 60 | except Exception as e: 61 | raise HelmetException(e, sys) from e 62 | 63 | 64 | 65 | def evaluate(self, model, dataloader, device): 66 | try: 67 | model.to(device) 68 | all_losses = [] 69 | all_losses_dict = [] 70 | 71 | for images, targets in tqdm(dataloader): 72 | images = list(image.to(device) for image in images) 73 | targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets] 74 | 75 | loss_dict = model(images, targets) # the model computes the loss automatically if we pass in targets 76 | losses = sum(loss for loss in loss_dict.values()) 77 | loss_dict_append = {k: v.item() for k, v in loss_dict.items()} 78 | loss_value = losses.item() 79 | 80 | all_losses.append(loss_value) 81 | all_losses_dict.append(loss_dict_append) 82 | 83 | if not math.isfinite(loss_value): 84 | print(f"Loss is {loss_value}, stopping training") # train if loss becomes infinity 85 | print(loss_dict) 86 | sys.exit(1) 87 | 88 | losses.backward() 89 | 90 | all_losses_dict = pd.DataFrame(all_losses_dict) # for printing 91 | 92 | print("loss: {:.6f},loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format( 93 | np.mean(all_losses), 94 | all_losses_dict['loss_classifier'].mean(), 95 | all_losses_dict['loss_box_reg'].mean(), 96 | all_losses_dict['loss_rpn_box_reg'].mean(), 97 | all_losses_dict['loss_objectness'].mean() 98 | )) 99 | return all_losses_dict, np.mean(all_losses) 100 | 101 | except Exception as e: 102 | raise HelmetException(e, sys) from e 103 | 104 | 105 | 106 | def initiate_model_evaluation(self) -> ModelEvaluationArtifacts: 107 | """ 108 | Method Name : initiate_model_evaluation 109 | Description : This function is used to initiate all steps of the model evaluation 110 | 111 | Output : Returns model evaluation artifact 112 | On Failure : Write an exception log and then raise an exception 113 | """ 114 | 115 | try: 116 | trained_model = torch.load(self.model_trainer_artifacts.trained_model_path) 117 | 118 | test_dataset = load_object(self.data_transformation_artifacts.transformed_test_object) 119 | 120 | test_loader = DataLoader(test_dataset, 121 | batch_size=self.model_evaluation_config.BATCH, 122 | shuffle=self.model_evaluation_config.SHUFFLE, 123 | num_workers=self.model_evaluation_config.NUM_WORKERS, 124 | collate_fn=self.collate_fn 125 | ) 126 | 127 | logging.info("loaded saved model") 128 | 129 | trained_model = trained_model.to(DEVICE) 130 | 131 | all_losses_dict, all_losses = self.evaluate(trained_model, test_loader, device=DEVICE) 132 | os.makedirs(self.model_evaluation_config.EVALUATED_MODEL_DIR, exist_ok=True) 133 | all_losses_dict.to_csv(self.model_evaluation_config.EVALUATED_LOSS_CSV_PATH, index=False) 134 | 135 | s3_model = self.get_model_from_s3() 136 | s3_model = torch.load(s3_model, map_location=torch.device(DEVICE)) 137 | 138 | s3_all_losses_dict, s3_all_losses = self.evaluate(s3_model,test_loader, device=DEVICE) 139 | 140 | if s3_all_losses > all_losses: 141 | # 0.03 > 0.02 142 | is_model_accepted = True 143 | 144 | model_evaluation_artifact = ModelEvaluationArtifacts( 145 | is_model_accepted=is_model_accepted, 146 | all_losses=all_losses) 147 | 148 | else: 149 | is_model_accepted = False 150 | 151 | model_evaluation_artifact = ModelEvaluationArtifacts( 152 | is_model_accepted=is_model_accepted, 153 | all_losses=s3_all_losses) 154 | 155 | logging.info("Exited the initiate_model_evaluation method of Model Evaluation class") 156 | return model_evaluation_artifact 157 | 158 | except Exception as e: 159 | raise HelmetException(e, sys) from e 160 | -------------------------------------------------------------------------------- /helmet/components/model_pusher.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from helmet.exception import HelmetException 3 | from helmet.logger import logging 4 | from helmet.entity.config_entity import ModelPusherConfig 5 | from helmet.entity.artifacts_entity import ModelPusherArtifacts 6 | from helmet.configuration.s3_operations import S3Operation 7 | 8 | 9 | class ModelPusher: 10 | 11 | def __init__(self, model_pusher_config: ModelPusherConfig, s3: S3Operation): 12 | 13 | self.model_pusher_config = model_pusher_config 14 | self.s3 = s3 15 | 16 | 17 | def initiate_model_pusher(self) -> ModelPusherArtifacts: 18 | """ 19 | Method Name : initiate_model_pusher 20 | Description : This method initiates model pusher. 21 | 22 | Output : Model pusher artifact 23 | """ 24 | logging.info("Entered initiate_model_pusher method of ModelTrainer class") 25 | try: 26 | # Uploading the model to s3 bucket 27 | self.s3.upload_file( 28 | self.model_pusher_config.BEST_MODEL_PATH, 29 | self.model_pusher_config.S3_MODEL_KEY_PATH, 30 | self.model_pusher_config.BUCKET_NAME, 31 | remove=False, 32 | ) 33 | logging.info("Uploaded best model to s3 bucket") 34 | 35 | # Saving the model pusher artifacts 36 | model_pusher_artifact = ModelPusherArtifacts( 37 | bucket_name=self.model_pusher_config.BUCKET_NAME, 38 | s3_model_path=self.model_pusher_config.S3_MODEL_KEY_PATH, 39 | ) 40 | logging.info("Exited the initiate_model_pusher method of ModelTrainer class") 41 | return model_pusher_artifact 42 | 43 | except Exception as e: 44 | raise HelmetException(e, sys) from e -------------------------------------------------------------------------------- /helmet/components/model_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import numpy as np 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import torch 8 | from torchvision import models 9 | from torch.utils.data import DataLoader 10 | from helmet.logger import logging 11 | from helmet.exception import HelmetException 12 | from helmet.utils.main_utils import load_object 13 | from helmet.ml.models.model_optimiser import model_optimiser 14 | from helmet.entity.config_entity import ModelTrainerConfig 15 | from helmet.entity.artifacts_entity import DataTransformationArtifacts, ModelTrainerArtifacts 16 | from helmet.ml.detection.engine import train_one_epoch, evaluate 17 | 18 | 19 | class ModelTrainer: 20 | def __init__(self, data_transformation_artifacts: DataTransformationArtifacts, 21 | model_trainer_config: ModelTrainerConfig): 22 | """ 23 | :param data_transformation_artifacts: Output reference of data transformation artifact stage 24 | :param model_trainer_config: Configuration for model trainer 25 | """ 26 | 27 | self.data_transformation_artifacts = data_transformation_artifacts 28 | self.model_trainer_config = model_trainer_config 29 | 30 | 31 | def train(self, model, optimizer, loader, device, epoch): 32 | try: 33 | model.to(device) 34 | model.train() 35 | all_losses = [] 36 | all_losses_dict = [] 37 | 38 | for images, targets in tqdm(loader): 39 | images = list(image.to(device) for image in images) 40 | targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets] 41 | 42 | loss_dict = model(images, targets) # the model computes the loss automatically if we pass in targets 43 | losses = sum(loss for loss in loss_dict.values()) 44 | loss_dict_append = {k: v.item() for k, v in loss_dict.items()} 45 | loss_value = losses.item() 46 | 47 | all_losses.append(loss_value) 48 | all_losses_dict.append(loss_dict_append) 49 | 50 | if not math.isfinite(loss_value): 51 | print(f"Loss is {loss_value}, stopping training") # train if loss becomes infinity 52 | print(loss_dict) 53 | sys.exit(1) 54 | 55 | optimizer.zero_grad() 56 | losses.backward() 57 | optimizer.step() 58 | all_losses_dict = pd.DataFrame(all_losses_dict) # for printing 59 | 60 | print("Epoch {}, lr: {:.6f}, loss: {:.6f}, loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format( 61 | epoch, optimizer.param_groups[0]['lr'], np.mean(all_losses), 62 | all_losses_dict['loss_classifier'].mean(), 63 | all_losses_dict['loss_box_reg'].mean(), 64 | all_losses_dict['loss_rpn_box_reg'].mean(), 65 | all_losses_dict['loss_objectness'].mean() 66 | )) 67 | 68 | except Exception as e: 69 | raise HelmetException(e, sys) from e 70 | 71 | 72 | @staticmethod 73 | def collate_fn(batch): 74 | """ 75 | This is our collating function for the train dataloader, 76 | it allows us to create batches of data that can be easily pass into the model 77 | """ 78 | try: 79 | return tuple(zip(*batch)) 80 | except Exception as e: 81 | raise HelmetException(e, sys) from e 82 | 83 | 84 | 85 | def initiate_model_trainer(self,) -> ModelTrainerArtifacts: 86 | logging.info("Entered initiate_model_trainer method of ModelTrainer class") 87 | 88 | """ 89 | Method Name : initiate_model_trainer 90 | Description : This function initiates a model trainer steps 91 | 92 | Output : Returns model trainer artifact 93 | On Failure : Write an exception log and then raise an exception 94 | """ 95 | 96 | try: 97 | train_dataset = load_object(self.data_transformation_artifacts.transformed_train_object) 98 | 99 | train_loader = DataLoader(train_dataset, 100 | batch_size=self.model_trainer_config.BATCH_SIZE, 101 | shuffle=self.model_trainer_config.SHUFFLE, 102 | num_workers=self.model_trainer_config.NUM_WORKERS, 103 | collate_fn=self.collate_fn 104 | ) 105 | 106 | test_dataset = load_object(self.data_transformation_artifacts.transformed_test_object) 107 | 108 | test_loader = DataLoader(test_dataset, 109 | batch_size=1, 110 | shuffle=self.model_trainer_config.SHUFFLE, 111 | num_workers=self.model_trainer_config.NUM_WORKERS, 112 | collate_fn=self.collate_fn 113 | ) 114 | 115 | logging.info("Loaded training data loader object") 116 | 117 | model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True) 118 | 119 | logging.info("Loaded faster Rcnn model") 120 | 121 | in_features = model.roi_heads.box_predictor.cls_score.in_features # we need to change the head 122 | 123 | model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, self.data_transformation_artifacts.number_of_classes) 124 | 125 | optimiser = model_optimiser(model) 126 | 127 | logging.info("loaded optimiser") 128 | 129 | for epoch in range(self.model_trainer_config.EPOCH): 130 | # self.train(model, optimiser, train_loader, self.model_trainer_config.DEVICE, epoch) 131 | 132 | self.train(model, optimiser, train_loader, self.model_trainer_config.DEVICE, epoch) 133 | 134 | os.makedirs(self.model_trainer_config.TRAINED_MODEL_DIR, exist_ok=True) 135 | torch.save(model, self.model_trainer_config.TRAINED_MODEL_PATH) 136 | 137 | logging.info(f"Saved the trained model") 138 | 139 | model_trainer_artifacts = ModelTrainerArtifacts( 140 | trained_model_path=self.model_trainer_config.TRAINED_MODEL_PATH 141 | ) 142 | logging.info(f"Model trainer artifact: {model_trainer_artifacts}") 143 | 144 | return model_trainer_artifacts 145 | 146 | except Exception as e: 147 | raise HelmetException(e, sys) from e 148 | 149 | 150 | -------------------------------------------------------------------------------- /helmet/configuration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/configuration/__init__.py -------------------------------------------------------------------------------- /helmet/configuration/s3_operations.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import sys 4 | from io import StringIO 5 | from typing import List, Union 6 | from botocore.exceptions import ClientError 7 | import boto3 8 | from helmet.exception import HelmetException 9 | from helmet.logger import logging 10 | from mypy_boto3_s3.service_resource import Bucket 11 | from helmet.constants import * 12 | 13 | MODEL_SAVE_FORMAT = ".pt" 14 | 15 | 16 | class S3Operation: 17 | s3_client=None 18 | s3_resource = None 19 | def __init__(self): 20 | # self.s3_client = boto3.client("s3") 21 | 22 | # self.s3_resource = boto3.resource("s3") 23 | if S3Operation.s3_resource==None or S3Operation.s3_client==None: 24 | __access_key_id = os.getenv(AWS_ACCESS_KEY_ID_ENV_KEY, ) 25 | __secret_access_key = os.getenv(AWS_SECRET_ACCESS_KEY_ENV_KEY, ) 26 | if __access_key_id is None: 27 | raise Exception(f"Environment variable: {AWS_ACCESS_KEY_ID_ENV_KEY} is not set.") 28 | if __secret_access_key is None: 29 | raise Exception(f"Environment variable: {AWS_SECRET_ACCESS_KEY_ENV_KEY} is not set.") 30 | 31 | S3Operation.s3_resource = boto3.resource('s3', 32 | aws_access_key_id=__access_key_id, 33 | aws_secret_access_key=__secret_access_key, 34 | region_name=REGION_NAME 35 | ) 36 | S3Operation.s3_client = boto3.client('s3', 37 | aws_access_key_id=__access_key_id, 38 | aws_secret_access_key=__secret_access_key, 39 | region_name=REGION_NAME 40 | ) 41 | self.s3_resource = S3Operation.s3_resource 42 | self.s3_client = S3Operation.s3_client 43 | @staticmethod 44 | def read_object( 45 | object_name: str, decode: bool = True, make_readable: bool = False 46 | ) -> Union[StringIO, str]: 47 | """ 48 | Method Name : read_object 49 | Description : This method reads the object_name object with kwargs 50 | 51 | Output : The column name is renamed 52 | On Failure : Write an exception log and then raise an exception 53 | 54 | Version : 1.2 55 | Revisions : moved setup to cloud 56 | """ 57 | logging.info("Entered the read_object method of S3Operations class") 58 | 59 | try: 60 | func = ( 61 | lambda: object_name.get()["Body"].read().decode() 62 | if decode is True 63 | else object_name.get()["Body"].read() 64 | ) 65 | conv_func = lambda: StringIO(func()) if make_readable is True else func() 66 | 67 | logging.info("Exited the read_object method of S3Operations class") 68 | 69 | return conv_func() 70 | 71 | except Exception as e: 72 | raise HelmetException(e, sys) from e 73 | 74 | def get_bucket(self, bucket_name: str) -> Bucket: 75 | """ 76 | Method Name : get_bucket 77 | Description : This method gets the bucket object based on the bucket_name 78 | 79 | Output : Bucket object is returned based on the bucket name 80 | On Failure : Write an exception log and then raise an exception 81 | 82 | Version : 1.2 83 | Revisions : moved setup to cloud 84 | """ 85 | logging.info("Entered the get_bucket method of S3Operations class") 86 | 87 | try: 88 | bucket = self.s3_resource.Bucket(bucket_name) 89 | 90 | logging.info("Exited the get_bucket method of S3Operations class") 91 | 92 | return bucket 93 | 94 | except Exception as e: 95 | raise HelmetException(e, sys) from e 96 | 97 | def get_file_object( 98 | self, filename: str, bucket_name: str 99 | ) -> Union[List[object], object]: 100 | """ 101 | Method Name : get_file_object 102 | Description : This method gets the file object from bucket_name bucket based on filename 103 | 104 | Output : list of objects or object is returned based on filename 105 | On Failure : Write an exception log and then raise an exception 106 | 107 | Version : 1.2 108 | Revisions : moved setup to cloud 109 | """ 110 | logging.info("Entered the get_file_object method of S3Operations class") 111 | 112 | try: 113 | bucket = self.get_bucket(bucket_name) 114 | 115 | lst_objs = [object for object in bucket.objects.filter(Prefix=filename)] 116 | 117 | func = lambda x: x[0] if len(x) == 1 else x 118 | 119 | file_objs = func(lst_objs) 120 | 121 | logging.info("Exited the get_file_object method of S3Operations class") 122 | 123 | return file_objs 124 | 125 | except Exception as e: 126 | raise HelmetException(e, sys) from e 127 | 128 | def load_model( 129 | self, model_name: str, bucket_name: str, model_dir: str = None 130 | ) -> object: 131 | """ 132 | Method Name : load_model 133 | Description : This method loads the model_name model from bucket_name bucket with kwargs 134 | 135 | Output : list of objects or object is returned based on filename 136 | On Failure : Write an exception log and then raise an exception 137 | 138 | Version : 1.2 139 | Revisions : moved setup to cloud 140 | """ 141 | logging.info("Entered the load_model method of S3Operations class") 142 | 143 | try: 144 | func = ( 145 | lambda: model_name 146 | if model_dir is None 147 | else model_dir + "/" + model_name 148 | ) 149 | 150 | model_file = func() 151 | 152 | f_obj = self.get_file_object(model_file, bucket_name) 153 | 154 | model_obj = self.read_object(f_obj, decode=False) 155 | 156 | return model_obj 157 | logging.info("Exited the load_model method of S3Operations class") 158 | 159 | except Exception as e: 160 | raise HelmetException(e, sys) from e 161 | 162 | def create_folder(self, folder_name: str, bucket_name: str) -> None: 163 | """ 164 | Method Name : create_folder 165 | Description : This method creates a folder_name folder in bucket_name bucket 166 | 167 | Output : Folder is created in s3 bucket 168 | On Failure : Write an exception log and then raise an exception 169 | 170 | Version : 1.2 171 | Revisions : moved setup to cloud 172 | """ 173 | logging.info("Entered the create_folder method of S3Operations class") 174 | 175 | try: 176 | self.s3_resource.Object(bucket_name, folder_name).load() 177 | 178 | except ClientError as e: 179 | if e.response["Error"]["Code"] == "404": 180 | folder_obj = folder_name + "/" 181 | 182 | self.s3_client.put_object(Bucket=bucket_name, Key=folder_obj) 183 | 184 | else: 185 | pass 186 | 187 | logging.info("Exited the create_folder method of S3Operations class") 188 | 189 | def upload_file( 190 | self, 191 | from_filename: str, 192 | to_filename: str, 193 | bucket_name: str, 194 | remove: bool = True, 195 | ): 196 | """ 197 | Method Name : upload_file 198 | Description : This method uploads the from_filename file to bucket_name bucket with to_filename as bucket filename 199 | 200 | Output : Folder is created in s3 bucket 201 | On Failure : Write an exception log and then raise an exception 202 | 203 | Version : 1.2 204 | Revisions : moved setup to cloud 205 | """ 206 | logging.info("Entered the upload_file method of S3Operations class") 207 | 208 | try: 209 | logging.info( 210 | f"Uploading {from_filename} file to {to_filename} file in {bucket_name} bucket" 211 | ) 212 | 213 | self.s3_resource.meta.client.upload_file( 214 | from_filename, bucket_name, to_filename 215 | ) 216 | 217 | logging.info( 218 | f"Uploaded {from_filename} file to {to_filename} file in {bucket_name} bucket" 219 | ) 220 | 221 | if remove is True: 222 | os.remove(from_filename) 223 | 224 | logging.info(f"Remove is set to {remove}, deleted the file") 225 | 226 | else: 227 | logging.info(f"Remove is set to {remove}, not deleted the file") 228 | 229 | logging.info("Exited the upload_file method of S3Operations class") 230 | 231 | except Exception as e: 232 | raise HelmetException(e, sys) from e 233 | 234 | 235 | def read_data_from_s3(self, filename: str, bucket_name: str, output_filename: str): 236 | try: 237 | bucket = self.get_bucket(bucket_name) 238 | 239 | obj = bucket.download_file(Key=filename, Filename=output_filename) 240 | 241 | return output_filename 242 | 243 | except Exception as e: 244 | raise HelmetException(e, sys) from e 245 | -------------------------------------------------------------------------------- /helmet/constants/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from datetime import datetime 4 | 5 | TIMESTAMP: str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") 6 | 7 | # Data Ingestion Constants 8 | ARTIFACTS_DIR = os.path.join("artifacts", TIMESTAMP) 9 | BUCKET_NAME = 'helmet-object-detection' 10 | ZIP_FILE_NAME = 'data.zip' 11 | ANNOTATIONS_COCO_JSON_FILE = '_annotations.coco.json' 12 | 13 | INPUT_SIZE = 416 14 | HORIZONTAL_FLIP = 0.3 15 | VERTICAL_FLIP = 0.3 16 | RANDOM_BRIGHTNESS_CONTRAST = 0.1 17 | COLOR_JITTER = 0.1 18 | BBOX_FORMAT = 'coco' 19 | 20 | RAW_FILE_NAME = 'helmet' 21 | 22 | # Data ingestion constants 23 | DATA_INGESTION_ARTIFACTS_DIR = 'DataIngestionArtifacts' 24 | DATA_INGESTION_TRAIN_DIR = 'train' 25 | DATA_INGESTION_TEST_DIR = 'test' 26 | DATA_INGESTION_VALID_DIR = 'valid' 27 | 28 | # Data transformation constants 29 | DATA_TRANSFORMATION_ARTIFACTS_DIR = 'DataTransformationArtifacts' 30 | DATA_TRANSFORMATION_TRAIN_DIR = 'Train' 31 | DATA_TRANSFORMATION_TEST_DIR = 'Test' 32 | DATA_TRANSFORMATION_TRAIN_FILE_NAME = "train.pkl" 33 | DATA_TRANSFORMATION_TEST_FILE_NAME = "test.pkl" 34 | DATA_TRANSFORMATION_TRAIN_SPLIT = 'train' 35 | DATA_TRANSFORMATION_TEST_SPLIT = 'test' 36 | 37 | 38 | # Model Training Constants 39 | TRAINED_MODEL_DIR = 'TrainedModel' 40 | TRAINED_MODEL_NAME = 'model.pt' 41 | TRAINED_BATCH_SIZE = 2 42 | TRAINED_SHUFFLE = False 43 | TRAINED_NUM_WORKERS = 1 44 | EPOCH = 1 45 | 46 | 47 | # Model evaluation constants 48 | MODEL_EVALUATION_ARTIFACTS_DIR = 'ModelEvaluationArtifacts' 49 | MODEL_EVALUATION_FILE_NAME = 'loss.csv' 50 | 51 | # Common constants 52 | use_cuda = torch.cuda.is_available() 53 | DEVICE = torch.device("cuda" if use_cuda else "cpu") 54 | 55 | APP_HOST = "0.0.0.0" 56 | APP_PORT = 8080 57 | 58 | # Prediction Constants 59 | PREDICTION_CLASSES = ['With Helmet', 'Without Helmet'] 60 | 61 | 62 | 63 | # AWS CONSTANTS 64 | AWS_ACCESS_KEY_ID_ENV_KEY = "AWS_ACCESS_KEY_ID" 65 | AWS_SECRET_ACCESS_KEY_ENV_KEY = "AWS_SECRET_ACCESS_KEY" 66 | REGION_NAME = "ap-south-1" -------------------------------------------------------------------------------- /helmet/entity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/entity/__init__.py -------------------------------------------------------------------------------- /helmet/entity/artifacts_entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | # Data ingestion artifacts 4 | @dataclass 5 | class DataIngestionArtifacts: 6 | train_file_path: str 7 | test_file_path: str 8 | valid_file_path: str 9 | 10 | 11 | @dataclass 12 | class DataTransformationArtifacts: 13 | transformed_train_object: str 14 | transformed_test_object: str 15 | number_of_classes: int 16 | 17 | 18 | @dataclass 19 | class ModelTrainerArtifacts: 20 | trained_model_path: str 21 | 22 | 23 | @dataclass 24 | class ModelEvaluationArtifacts: 25 | is_model_accepted: bool 26 | all_losses: str 27 | 28 | 29 | @dataclass 30 | class ModelPusherArtifacts: 31 | bucket_name: str 32 | s3_model_path: str 33 | -------------------------------------------------------------------------------- /helmet/entity/config_entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from from_root import from_root 3 | from helmet.constants import * 4 | from helmet.configuration.s3_operations import S3Operation 5 | import os 6 | 7 | 8 | @dataclass 9 | class DataIngestionConfig: 10 | def __init__(self): 11 | self.S3_OPERATION = S3Operation(), 12 | self.BUCKET_NAME: str = BUCKET_NAME 13 | self.ZIP_FILE_NAME:str = ZIP_FILE_NAME 14 | self.DATA_INGESTION_ARTIFACTS_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, DATA_INGESTION_ARTIFACTS_DIR) 15 | self.TRAIN_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_TRAIN_DIR) 16 | self.TEST_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_TEST_DIR) 17 | self.VALID_DATA_ARTIFACT_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, DATA_INGESTION_VALID_DIR) 18 | self.ZIP_FILE_DIR = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR) 19 | self.ZIP_FILE_PATH = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, self.ZIP_FILE_NAME) 20 | self.UNZIPPED_FILE_PATH = os.path.join(self.DATA_INGESTION_ARTIFACTS_DIR, RAW_FILE_NAME) 21 | 22 | 23 | 24 | @dataclass 25 | class DataTransformationConfig: 26 | def __init__(self): 27 | self.ROOT_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, DATA_INGESTION_ARTIFACTS_DIR) 28 | self.DATA_TRANSFORMATION_ARTIFACTS_DIR: str = os.path.join(from_root(),ARTIFACTS_DIR,DATA_TRANSFORMATION_ARTIFACTS_DIR) 29 | self.TRAIN_TRANSFORM_DATA_ARTIFACT_DIR = os.path.join(self.DATA_TRANSFORMATION_ARTIFACTS_DIR,DATA_TRANSFORMATION_TRAIN_DIR) 30 | self.TEST_TRANSFORM_DATA_ARTIFACT_DIR = os.path.join(self.DATA_TRANSFORMATION_ARTIFACTS_DIR,DATA_TRANSFORMATION_TEST_DIR) 31 | self.TRAIN_TRANSFORM_OBJECT_FILE_PATH = os.path.join(self.TRAIN_TRANSFORM_DATA_ARTIFACT_DIR, 32 | DATA_TRANSFORMATION_TRAIN_FILE_NAME) 33 | self.TEST_TRANSFORM_OBJECT_FILE_PATH = os.path.join(self.TEST_TRANSFORM_DATA_ARTIFACT_DIR, 34 | DATA_TRANSFORMATION_TEST_FILE_NAME) 35 | 36 | self.TRAIN_SPLIT = DATA_TRANSFORMATION_TRAIN_SPLIT 37 | self.TEST_SPLIT = DATA_TRANSFORMATION_TEST_SPLIT 38 | 39 | 40 | 41 | @dataclass 42 | class ModelTrainerConfig: 43 | def __init__(self): 44 | self.TRAINED_MODEL_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, TRAINED_MODEL_DIR) 45 | self.TRAINED_MODEL_PATH = os.path.join(self.TRAINED_MODEL_DIR, TRAINED_MODEL_NAME) 46 | self.BATCH_SIZE: int = TRAINED_BATCH_SIZE 47 | self.SHUFFLE: bool = TRAINED_SHUFFLE 48 | self.NUM_WORKERS = TRAINED_NUM_WORKERS 49 | self.EPOCH: int = EPOCH 50 | self.DEVICE = DEVICE 51 | 52 | 53 | @dataclass 54 | class ModelEvaluationConfig: 55 | def __init__(self): 56 | self.EVALUATED_MODEL_DIR: str = os.path.join(from_root(), ARTIFACTS_DIR, MODEL_EVALUATION_ARTIFACTS_DIR) 57 | self.EVALUATED_LOSS_CSV_PATH = os.path.join(self.EVALUATED_MODEL_DIR, MODEL_EVALUATION_FILE_NAME) 58 | self.BEST_MODEL_PATH = os.path.join(self.EVALUATED_MODEL_DIR, TRAINED_MODEL_NAME ) 59 | self.DEVICE = DEVICE 60 | self.BATCH: int = 1 61 | self.SHUFFLE: bool = TRAINED_SHUFFLE 62 | self.NUM_WORKERS = TRAINED_NUM_WORKERS 63 | 64 | 65 | 66 | @dataclass 67 | class ModelPusherConfig: 68 | def __init__(self): 69 | self.TRAINED_MODEL_DIR: str = os.path.join(from_root(),ARTIFACTS_DIR,TRAINED_MODEL_DIR) 70 | self.BEST_MODEL_PATH: str = os.path.join(self.TRAINED_MODEL_DIR,TRAINED_MODEL_NAME) 71 | self.BUCKET_NAME: str = BUCKET_NAME 72 | self.S3_MODEL_KEY_PATH: str = os.path.join(TRAINED_MODEL_NAME) -------------------------------------------------------------------------------- /helmet/exception/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def error_message_detail(error, error_detail:sys): 6 | _, _, exc_tb = error_detail.exc_info() 7 | file_name = exc_tb.tb_frame.f_code.co_filename 8 | error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format( 9 | file_name, exc_tb.tb_lineno, str(error) 10 | ) 11 | 12 | return error_message 13 | 14 | 15 | class HelmetException(Exception): 16 | def __init__(self, error_message, error_detail): 17 | """ 18 | :param error_message: error message in string format 19 | """ 20 | super().__init__(error_message) 21 | self.error_message = error_message_detail( 22 | error_message, error_detail=error_detail 23 | ) 24 | 25 | def __str__(self): 26 | return self.error_message -------------------------------------------------------------------------------- /helmet/logger/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from from_root import from_root 5 | from datetime import datetime 6 | 7 | LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log" 8 | logs_path = os.path.join(from_root(), "logs", LOG_FILE) 9 | 10 | os.makedirs(logs_path, exist_ok=True) 11 | 12 | LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE) 13 | 14 | logging.basicConfig( 15 | filename=LOG_FILE_PATH, 16 | format="[ %(asctime)s ] %(name)s - %(levelname)s - %(message)s", 17 | level=logging.DEBUG, 18 | ) -------------------------------------------------------------------------------- /helmet/ml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/ml/__init__.py -------------------------------------------------------------------------------- /helmet/ml/detection/README.md: -------------------------------------------------------------------------------- 1 | # Object detection reference training scripts 2 | 3 | This folder contains reference training scripts for object detection. 4 | They serve as a log of how to train specific models, to provide baseline 5 | training and evaluation scripts to quickly bootstrap research. 6 | 7 | To execute the example commands below you must install the following: 8 | 9 | ``` 10 | cython 11 | pycocotools 12 | matplotlib 13 | ``` 14 | 15 | You must modify the following flags: 16 | 17 | `--data-path=/path/to/coco/dataset` 18 | 19 | `--nproc_per_node=` 20 | 21 | Except otherwise noted, all models have been trained on 8x V100 GPUs. 22 | 23 | ### Faster R-CNN ResNet-50 FPN 24 | ``` 25 | torchrun --nproc_per_node=8 train.py\ 26 | --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\ 27 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 28 | ``` 29 | 30 | ### Faster R-CNN MobileNetV3-Large FPN 31 | ``` 32 | torchrun --nproc_per_node=8 train.py\ 33 | --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\ 34 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 35 | ``` 36 | 37 | ### Faster R-CNN MobileNetV3-Large 320 FPN 38 | ``` 39 | torchrun --nproc_per_node=8 train.py\ 40 | --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\ 41 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone MobileNet_V3_Large_Weights.IMAGENET1K_V1 42 | ``` 43 | 44 | ### FCOS ResNet-50 FPN 45 | ``` 46 | torchrun --nproc_per_node=8 train.py\ 47 | --dataset coco --model fcos_resnet50_fpn --epochs 26\ 48 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --amp --weights-backbone ResNet50_Weights.IMAGENET1K_V1 49 | ``` 50 | 51 | ### RetinaNet 52 | ``` 53 | torchrun --nproc_per_node=8 train.py\ 54 | --dataset coco --model retinanet_resnet50_fpn --epochs 26\ 55 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 56 | ``` 57 | 58 | ### SSD300 VGG16 59 | ``` 60 | torchrun --nproc_per_node=8 train.py\ 61 | --dataset coco --model ssd300_vgg16 --epochs 120\ 62 | --lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\ 63 | --weight-decay 0.0005 --data-augmentation ssd --weights-backbone VGG16_Weights.IMAGENET1K_FEATURES 64 | ``` 65 | 66 | ### SSDlite320 MobileNetV3-Large 67 | ``` 68 | torchrun --nproc_per_node=8 train.py\ 69 | --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\ 70 | --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\ 71 | --weight-decay 0.00004 --data-augmentation ssdlite 72 | ``` 73 | 74 | 75 | ### Mask R-CNN 76 | ``` 77 | torchrun --nproc_per_node=8 train.py\ 78 | --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\ 79 | --lr-steps 16 22 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 80 | ``` 81 | 82 | 83 | ### Keypoint R-CNN 84 | ``` 85 | torchrun --nproc_per_node=8 train.py\ 86 | --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\ 87 | --lr-steps 36 43 --aspect-ratio-group-factor 3 --weights-backbone ResNet50_Weights.IMAGENET1K_V1 88 | ``` 89 | -------------------------------------------------------------------------------- /helmet/ml/detection/coco_eval.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import io 3 | from contextlib import redirect_stdout 4 | 5 | import numpy as np 6 | import pycocotools.mask as mask_util 7 | import torch 8 | import utils 9 | from pycocotools.coco import COCO 10 | from pycocotools.cocoeval import COCOeval 11 | 12 | 13 | class CocoEvaluator: 14 | def __init__(self, coco_gt, iou_types): 15 | if not isinstance(iou_types, (list, tuple)): 16 | raise TypeError(f"This constructor expects iou_types of type list or tuple, instead got {type(iou_types)}") 17 | coco_gt = copy.deepcopy(coco_gt) 18 | self.coco_gt = coco_gt 19 | 20 | self.iou_types = iou_types 21 | self.coco_eval = {} 22 | for iou_type in iou_types: 23 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 24 | 25 | self.img_ids = [] 26 | self.eval_imgs = {k: [] for k in iou_types} 27 | 28 | def update(self, predictions): 29 | img_ids = list(np.unique(list(predictions.keys()))) 30 | self.img_ids.extend(img_ids) 31 | 32 | for iou_type in self.iou_types: 33 | results = self.prepare(predictions, iou_type) 34 | with redirect_stdout(io.StringIO()): 35 | coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() 36 | coco_eval = self.coco_eval[iou_type] 37 | 38 | coco_eval.cocoDt = coco_dt 39 | coco_eval.params.imgIds = list(img_ids) 40 | img_ids, eval_imgs = evaluate(coco_eval) 41 | 42 | self.eval_imgs[iou_type].append(eval_imgs) 43 | 44 | def synchronize_between_processes(self): 45 | for iou_type in self.iou_types: 46 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 47 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 48 | 49 | def accumulate(self): 50 | for coco_eval in self.coco_eval.values(): 51 | coco_eval.accumulate() 52 | 53 | def summarize(self): 54 | for iou_type, coco_eval in self.coco_eval.items(): 55 | print(f"IoU metric: {iou_type}") 56 | coco_eval.summarize() 57 | 58 | def prepare(self, predictions, iou_type): 59 | if iou_type == "bbox": 60 | return self.prepare_for_coco_detection(predictions) 61 | if iou_type == "segm": 62 | return self.prepare_for_coco_segmentation(predictions) 63 | if iou_type == "keypoints": 64 | return self.prepare_for_coco_keypoint(predictions) 65 | raise ValueError(f"Unknown iou type {iou_type}") 66 | 67 | def prepare_for_coco_detection(self, predictions): 68 | coco_results = [] 69 | for original_id, prediction in predictions.items(): 70 | if len(prediction) == 0: 71 | continue 72 | 73 | boxes = prediction["boxes"] 74 | boxes = convert_to_xywh(boxes).tolist() 75 | scores = prediction["scores"].tolist() 76 | labels = prediction["labels"].tolist() 77 | 78 | coco_results.extend( 79 | [ 80 | { 81 | "image_id": original_id, 82 | "category_id": labels[k], 83 | "bbox": box, 84 | "score": scores[k], 85 | } 86 | for k, box in enumerate(boxes) 87 | ] 88 | ) 89 | return coco_results 90 | 91 | def prepare_for_coco_segmentation(self, predictions): 92 | coco_results = [] 93 | for original_id, prediction in predictions.items(): 94 | if len(prediction) == 0: 95 | continue 96 | 97 | scores = prediction["scores"] 98 | labels = prediction["labels"] 99 | masks = prediction["masks"] 100 | 101 | masks = masks > 0.5 102 | 103 | scores = prediction["scores"].tolist() 104 | labels = prediction["labels"].tolist() 105 | 106 | rles = [ 107 | mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks 108 | ] 109 | for rle in rles: 110 | rle["counts"] = rle["counts"].decode("utf-8") 111 | 112 | coco_results.extend( 113 | [ 114 | { 115 | "image_id": original_id, 116 | "category_id": labels[k], 117 | "segmentation": rle, 118 | "score": scores[k], 119 | } 120 | for k, rle in enumerate(rles) 121 | ] 122 | ) 123 | return coco_results 124 | 125 | def prepare_for_coco_keypoint(self, predictions): 126 | coco_results = [] 127 | for original_id, prediction in predictions.items(): 128 | if len(prediction) == 0: 129 | continue 130 | 131 | boxes = prediction["boxes"] 132 | boxes = convert_to_xywh(boxes).tolist() 133 | scores = prediction["scores"].tolist() 134 | labels = prediction["labels"].tolist() 135 | keypoints = prediction["keypoints"] 136 | keypoints = keypoints.flatten(start_dim=1).tolist() 137 | 138 | coco_results.extend( 139 | [ 140 | { 141 | "image_id": original_id, 142 | "category_id": labels[k], 143 | "keypoints": keypoint, 144 | "score": scores[k], 145 | } 146 | for k, keypoint in enumerate(keypoints) 147 | ] 148 | ) 149 | return coco_results 150 | 151 | 152 | def convert_to_xywh(boxes): 153 | xmin, ymin, xmax, ymax = boxes.unbind(1) 154 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 155 | 156 | 157 | def merge(img_ids, eval_imgs): 158 | all_img_ids = utils.all_gather(img_ids) 159 | all_eval_imgs = utils.all_gather(eval_imgs) 160 | 161 | merged_img_ids = [] 162 | for p in all_img_ids: 163 | merged_img_ids.extend(p) 164 | 165 | merged_eval_imgs = [] 166 | for p in all_eval_imgs: 167 | merged_eval_imgs.append(p) 168 | 169 | merged_img_ids = np.array(merged_img_ids) 170 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 171 | 172 | # keep only unique (and in sorted order) images 173 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 174 | merged_eval_imgs = merged_eval_imgs[..., idx] 175 | 176 | return merged_img_ids, merged_eval_imgs 177 | 178 | 179 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 180 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 181 | img_ids = list(img_ids) 182 | eval_imgs = list(eval_imgs.flatten()) 183 | 184 | coco_eval.evalImgs = eval_imgs 185 | coco_eval.params.imgIds = img_ids 186 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 187 | 188 | 189 | def evaluate(imgs): 190 | with redirect_stdout(io.StringIO()): 191 | imgs.evaluate() 192 | return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds)) 193 | -------------------------------------------------------------------------------- /helmet/ml/detection/coco_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import sys 4 | 5 | import torch 6 | import torch.utils.data 7 | import torchvision 8 | import helmet.ml.detection.transforms as T 9 | from pycocotools import mask as coco_mask 10 | from pycocotools.coco import COCO 11 | from helmet.exception import HelmetException 12 | 13 | 14 | class FilterAndRemapCocoCategories: 15 | def __init__(self, categories, remap=True): 16 | self.categories = categories 17 | self.remap = remap 18 | 19 | def __call__(self, image, target): 20 | anno = target["annotations"] 21 | anno = [obj for obj in anno if obj["category_id"] in self.categories] 22 | if not self.remap: 23 | target["annotations"] = anno 24 | return image, target 25 | anno = copy.deepcopy(anno) 26 | for obj in anno: 27 | obj["category_id"] = self.categories.index(obj["category_id"]) 28 | target["annotations"] = anno 29 | return image, target 30 | 31 | 32 | def convert_coco_poly_to_mask(segmentations, height, width): 33 | masks = [] 34 | for polygons in segmentations: 35 | rles = coco_mask.frPyObjects(polygons, height, width) 36 | mask = coco_mask.decode(rles) 37 | if len(mask.shape) < 3: 38 | mask = mask[..., None] 39 | mask = torch.as_tensor(mask, dtype=torch.uint8) 40 | mask = mask.any(dim=2) 41 | masks.append(mask) 42 | if masks: 43 | masks = torch.stack(masks, dim=0) 44 | else: 45 | masks = torch.zeros((0, height, width), dtype=torch.uint8) 46 | return masks 47 | 48 | 49 | class ConvertCocoPolysToMask: 50 | def __call__(self, image, target): 51 | w, h = image.size 52 | 53 | image_id = target["image_id"] 54 | image_id = torch.tensor([image_id]) 55 | 56 | anno = target["annotations"] 57 | 58 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 59 | 60 | boxes = [obj["bbox"] for obj in anno] 61 | # guard against no boxes via resizing 62 | boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) 63 | boxes[:, 2:] += boxes[:, :2] 64 | boxes[:, 0::2].clamp_(min=0, max=w) 65 | boxes[:, 1::2].clamp_(min=0, max=h) 66 | 67 | classes = [obj["category_id"] for obj in anno] 68 | classes = torch.tensor(classes, dtype=torch.int64) 69 | 70 | segmentations = [obj["segmentation"] for obj in anno] 71 | masks = convert_coco_poly_to_mask(segmentations, h, w) 72 | 73 | keypoints = None 74 | if anno and "keypoints" in anno[0]: 75 | keypoints = [obj["keypoints"] for obj in anno] 76 | keypoints = torch.as_tensor(keypoints, dtype=torch.float32) 77 | num_keypoints = keypoints.shape[0] 78 | if num_keypoints: 79 | keypoints = keypoints.view(num_keypoints, -1, 3) 80 | 81 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 82 | boxes = boxes[keep] 83 | classes = classes[keep] 84 | masks = masks[keep] 85 | if keypoints is not None: 86 | keypoints = keypoints[keep] 87 | 88 | target = {} 89 | target["boxes"] = boxes 90 | target["labels"] = classes 91 | target["masks"] = masks 92 | target["image_id"] = image_id 93 | if keypoints is not None: 94 | target["keypoints"] = keypoints 95 | 96 | # for conversion to coco api 97 | area = torch.tensor([obj["area"] for obj in anno]) 98 | iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) 99 | target["area"] = area 100 | target["iscrowd"] = iscrowd 101 | 102 | return image, target 103 | 104 | 105 | def _coco_remove_images_without_annotations(dataset, cat_list=None): 106 | def _has_only_empty_bbox(anno): 107 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 108 | 109 | def _count_visible_keypoints(anno): 110 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 111 | 112 | min_keypoints_per_image = 10 113 | 114 | def _has_valid_annotation(anno): 115 | # if it's empty, there is no annotation 116 | if len(anno) == 0: 117 | return False 118 | # if all boxes have close to zero area, there is no annotation 119 | if _has_only_empty_bbox(anno): 120 | return False 121 | # keypoints task have a slight different critera for considering 122 | # if an annotation is valid 123 | if "keypoints" not in anno[0]: 124 | return True 125 | # for keypoint detection tasks, only consider valid images those 126 | # containing at least min_keypoints_per_image 127 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 128 | return True 129 | return False 130 | 131 | if not isinstance(dataset, torchvision.datasets.CocoDetection): 132 | raise TypeError( 133 | f"This function expects dataset of type torchvision.datasets.CocoDetection, instead got {type(dataset)}" 134 | ) 135 | ids = [] 136 | for ds_idx, img_id in enumerate(dataset.ids): 137 | ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) 138 | anno = dataset.coco.loadAnns(ann_ids) 139 | if cat_list: 140 | anno = [obj for obj in anno if obj["category_id"] in cat_list] 141 | if _has_valid_annotation(anno): 142 | ids.append(ds_idx) 143 | 144 | dataset = torch.utils.data.Subset(dataset, ids) 145 | return dataset 146 | 147 | 148 | def convert_to_coco_api(ds): 149 | try: 150 | coco_ds = COCO() 151 | # annotation IDs need to start at 1, not 0, see torchvision issue #1530 152 | ann_id = 1 153 | dataset = {"images": [], "categories": [], "annotations": []} 154 | categories = set() 155 | for img_idx in range(len(ds)): 156 | # find better way to get target 157 | # targets = ds.get_annotations(img_idx) 158 | img, targets = ds[img_idx] 159 | image_id = targets["image_id"].item() 160 | img_dict = {} 161 | img_dict["id"] = image_id 162 | img_dict["height"] = img.shape[-2] 163 | img_dict["width"] = img.shape[-1] 164 | dataset["images"].append(img_dict) 165 | bboxes = targets["boxes"].clone() 166 | bboxes[:, 2:] -= bboxes[:, :2] 167 | bboxes = bboxes.tolist() 168 | labels = targets["labels"].tolist() 169 | areas = targets["area"].tolist() 170 | iscrowd = targets["iscrowd"].tolist() 171 | if "masks" in targets: 172 | masks = targets["masks"] 173 | # make masks Fortran contiguous for coco_mask 174 | masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) 175 | if "keypoints" in targets: 176 | keypoints = targets["keypoints"] 177 | keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() 178 | num_objs = len(bboxes) 179 | for i in range(num_objs): 180 | ann = {} 181 | ann["image_id"] = image_id 182 | ann["bbox"] = bboxes[i] 183 | ann["category_id"] = labels[i] 184 | categories.add(labels[i]) 185 | ann["area"] = areas[i] 186 | ann["iscrowd"] = iscrowd[i] 187 | ann["id"] = ann_id 188 | if "masks" in targets: 189 | ann["segmentation"] = coco_mask.encode(masks[i].numpy()) 190 | if "keypoints" in targets: 191 | ann["keypoints"] = keypoints[i] 192 | ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3]) 193 | dataset["annotations"].append(ann) 194 | ann_id += 1 195 | dataset["categories"] = [{"id": i} for i in sorted(categories)] 196 | coco_ds.dataset = dataset 197 | coco_ds.createIndex() 198 | return coco_ds 199 | except Exception as e: 200 | raise HelmetException(e, sys) from e 201 | 202 | 203 | def get_coco_api_from_dataset(dataset): 204 | for _ in range(10): 205 | if isinstance(dataset, torchvision.datasets.CocoDetection): 206 | break 207 | if isinstance(dataset, torch.utils.data.Subset): 208 | dataset = dataset.dataset 209 | if isinstance(dataset, torchvision.datasets.CocoDetection): 210 | return dataset.coco 211 | return convert_to_coco_api(dataset) 212 | 213 | 214 | class CocoDetection(torchvision.datasets.CocoDetection): 215 | def __init__(self, img_folder, ann_file, transforms): 216 | super().__init__(img_folder, ann_file) 217 | self._transforms = transforms 218 | 219 | def __getitem__(self, idx): 220 | img, target = super().__getitem__(idx) 221 | image_id = self.ids[idx] 222 | target = dict(image_id=image_id, annotations=target) 223 | if self._transforms is not None: 224 | img, target = self._transforms(img, target) 225 | return img, target 226 | 227 | 228 | def get_coco(root, image_set, transforms, mode="instances"): 229 | anno_file_template = "{}_{}2017.json" 230 | PATHS = { 231 | "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), 232 | "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), 233 | # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) 234 | } 235 | 236 | t = [ConvertCocoPolysToMask()] 237 | 238 | if transforms is not None: 239 | t.append(transforms) 240 | transforms = T.Compose(t) 241 | 242 | img_folder, ann_file = PATHS[image_set] 243 | img_folder = os.path.join(root, img_folder) 244 | ann_file = os.path.join(root, ann_file) 245 | 246 | dataset = CocoDetection(img_folder, ann_file, transforms=transforms) 247 | 248 | if image_set == "train": 249 | dataset = _coco_remove_images_without_annotations(dataset) 250 | 251 | # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) 252 | 253 | return dataset 254 | 255 | 256 | def get_coco_kp(root, image_set, transforms): 257 | return get_coco(root, image_set, transforms, mode="person_keypoints") 258 | -------------------------------------------------------------------------------- /helmet/ml/detection/engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | import time 4 | 5 | import torch 6 | import torchvision.models.detection.mask_rcnn 7 | from helmet.exception import HelmetException 8 | from helmet.ml.detection import utils 9 | from helmet.ml.detection.coco_eval import CocoEvaluator 10 | from helmet.ml.detection.coco_utils import get_coco_api_from_dataset 11 | 12 | 13 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None): 14 | try: 15 | model.to(device) 16 | model.train() 17 | metric_logger = utils.MetricLogger(delimiter=" ") 18 | metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}")) 19 | header = f"Epoch: [{epoch}]" 20 | 21 | lr_scheduler = None 22 | if epoch == 0: 23 | warmup_factor = 1.0 / 1000 24 | warmup_iters = min(1000, len(data_loader) - 1) 25 | 26 | lr_scheduler = torch.optim.lr_scheduler.LinearLR( 27 | optimizer, start_factor=warmup_factor, total_iters=warmup_iters 28 | ) 29 | 30 | for images, targets in metric_logger.log_every(data_loader, print_freq, header): 31 | images = list(image.to(device) for image in images) 32 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets] 33 | with torch.cuda.amp.autocast(enabled=scaler is not None): 34 | loss_dict = model(images, targets) 35 | losses = sum(loss for loss in loss_dict.values()) 36 | 37 | # reduce losses over all GPUs for logging purposes 38 | loss_dict_reduced = utils.reduce_dict(loss_dict) 39 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 40 | 41 | loss_value = losses_reduced.item() 42 | 43 | if not math.isfinite(loss_value): 44 | print(f"Loss is {loss_value}, stopping training") 45 | print(loss_dict_reduced) 46 | sys.exit(1) 47 | 48 | optimizer.zero_grad() 49 | if scaler is not None: 50 | scaler.scale(losses).backward() 51 | scaler.step(optimizer) 52 | scaler.update() 53 | else: 54 | losses.backward() 55 | optimizer.step() 56 | 57 | if lr_scheduler is not None: 58 | lr_scheduler.step() 59 | 60 | metric_logger.update(loss=losses_reduced, **loss_dict_reduced) 61 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 62 | 63 | return metric_logger 64 | except Exception as e: 65 | raise HelmetException(e, sys) from e 66 | 67 | 68 | def _get_iou_types(model): 69 | try: 70 | model_without_ddp = model 71 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 72 | model_without_ddp = model.module 73 | iou_types = ["bbox"] 74 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): 75 | iou_types.append("segm") 76 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): 77 | iou_types.append("keypoints") 78 | return iou_types 79 | except Exception as e: 80 | raise HelmetException(e, sys) from e 81 | 82 | 83 | @torch.inference_mode() 84 | def evaluate(model, data_loader, device): 85 | try: 86 | n_threads = torch.get_num_threads() 87 | # FIXME remove this and make paste_masks_in_image run on the GPU 88 | torch.set_num_threads(1) 89 | cpu_device = torch.device("cpu") 90 | model.eval() 91 | metric_logger = utils.MetricLogger(delimiter=" ") 92 | header = "Test:" 93 | 94 | coco = get_coco_api_from_dataset(data_loader.dataset) 95 | iou_types = _get_iou_types(model) 96 | coco_evaluator = CocoEvaluator(coco, iou_types) 97 | 98 | for images, targets in metric_logger.log_every(data_loader, 100, header): 99 | images = list(img.to(device) for img in images) 100 | 101 | if torch.cuda.is_available(): 102 | torch.cuda.synchronize() 103 | model_time = time.time() 104 | outputs = model(images) 105 | 106 | outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] 107 | model_time = time.time() - model_time 108 | 109 | res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} 110 | evaluator_time = time.time() 111 | coco_evaluator.update(res) 112 | evaluator_time = time.time() - evaluator_time 113 | metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) 114 | 115 | # gather the stats from all processes 116 | metric_logger.synchronize_between_processes() 117 | print("Averaged stats:", metric_logger) 118 | coco_evaluator.synchronize_between_processes() 119 | 120 | # accumulate predictions from all images 121 | coco_evaluator.accumulate() 122 | coco_evaluator.summarize() 123 | torch.set_num_threads(n_threads) 124 | return coco_evaluator 125 | except Exception as e: 126 | raise HelmetException(e, sys) from e 127 | -------------------------------------------------------------------------------- /helmet/ml/detection/group_by_aspect_ratio.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import copy 3 | import math 4 | from collections import defaultdict 5 | from itertools import chain, repeat 6 | 7 | import numpy as np 8 | import torch 9 | import torch.utils.data 10 | import torchvision 11 | from PIL import Image 12 | from torch.utils.data.sampler import BatchSampler, Sampler 13 | from torch.utils.model_zoo import tqdm 14 | 15 | 16 | def _repeat_to_at_least(iterable, n): 17 | repeat_times = math.ceil(n / len(iterable)) 18 | repeated = chain.from_iterable(repeat(iterable, repeat_times)) 19 | return list(repeated) 20 | 21 | 22 | class GroupedBatchSampler(BatchSampler): 23 | """ 24 | Wraps another sampler to yield a mini-batch of indices. 25 | It enforces that the batch only contain elements from the same group. 26 | It also tries to provide mini-batches which follows an ordering which is 27 | as close as possible to the ordering from the original sampler. 28 | Args: 29 | sampler (Sampler): Base sampler. 30 | group_ids (list[int]): If the sampler produces indices in range [0, N), 31 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 32 | The group ids must be a continuous set of integers starting from 33 | 0, i.e. they must be in the range [0, num_groups). 34 | batch_size (int): Size of mini-batch. 35 | """ 36 | 37 | def __init__(self, sampler, group_ids, batch_size): 38 | if not isinstance(sampler, Sampler): 39 | raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}") 40 | self.sampler = sampler 41 | self.group_ids = group_ids 42 | self.batch_size = batch_size 43 | 44 | def __iter__(self): 45 | buffer_per_group = defaultdict(list) 46 | samples_per_group = defaultdict(list) 47 | 48 | num_batches = 0 49 | for idx in self.sampler: 50 | group_id = self.group_ids[idx] 51 | buffer_per_group[group_id].append(idx) 52 | samples_per_group[group_id].append(idx) 53 | if len(buffer_per_group[group_id]) == self.batch_size: 54 | yield buffer_per_group[group_id] 55 | num_batches += 1 56 | del buffer_per_group[group_id] 57 | assert len(buffer_per_group[group_id]) < self.batch_size 58 | 59 | # now we have run out of elements that satisfy 60 | # the group criteria, let's return the remaining 61 | # elements so that the size of the sampler is 62 | # deterministic 63 | expected_num_batches = len(self) 64 | num_remaining = expected_num_batches - num_batches 65 | if num_remaining > 0: 66 | # for the remaining batches, take first the buffers with largest number 67 | # of elements 68 | for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): 69 | remaining = self.batch_size - len(buffer_per_group[group_id]) 70 | samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) 71 | buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) 72 | assert len(buffer_per_group[group_id]) == self.batch_size 73 | yield buffer_per_group[group_id] 74 | num_remaining -= 1 75 | if num_remaining == 0: 76 | break 77 | assert num_remaining == 0 78 | 79 | def __len__(self): 80 | return len(self.sampler) // self.batch_size 81 | 82 | 83 | def _compute_aspect_ratios_slow(dataset, indices=None): 84 | print( 85 | "Your dataset doesn't support the fast path for " 86 | "computing the aspect ratios, so will iterate over " 87 | "the full dataset and load every image instead. " 88 | "This might take some time..." 89 | ) 90 | if indices is None: 91 | indices = range(len(dataset)) 92 | 93 | class SubsetSampler(Sampler): 94 | def __init__(self, indices): 95 | self.indices = indices 96 | 97 | def __iter__(self): 98 | return iter(self.indices) 99 | 100 | def __len__(self): 101 | return len(self.indices) 102 | 103 | sampler = SubsetSampler(indices) 104 | data_loader = torch.utils.data.DataLoader( 105 | dataset, 106 | batch_size=1, 107 | sampler=sampler, 108 | num_workers=14, # you might want to increase it for faster processing 109 | collate_fn=lambda x: x[0], 110 | ) 111 | aspect_ratios = [] 112 | with tqdm(total=len(dataset)) as pbar: 113 | for _i, (img, _) in enumerate(data_loader): 114 | pbar.update(1) 115 | height, width = img.shape[-2:] 116 | aspect_ratio = float(width) / float(height) 117 | aspect_ratios.append(aspect_ratio) 118 | return aspect_ratios 119 | 120 | 121 | def _compute_aspect_ratios_custom_dataset(dataset, indices=None): 122 | if indices is None: 123 | indices = range(len(dataset)) 124 | aspect_ratios = [] 125 | for i in indices: 126 | height, width = dataset.get_height_and_width(i) 127 | aspect_ratio = float(width) / float(height) 128 | aspect_ratios.append(aspect_ratio) 129 | return aspect_ratios 130 | 131 | 132 | def _compute_aspect_ratios_coco_dataset(dataset, indices=None): 133 | if indices is None: 134 | indices = range(len(dataset)) 135 | aspect_ratios = [] 136 | for i in indices: 137 | img_info = dataset.coco.imgs[dataset.ids[i]] 138 | aspect_ratio = float(img_info["width"]) / float(img_info["height"]) 139 | aspect_ratios.append(aspect_ratio) 140 | return aspect_ratios 141 | 142 | 143 | def _compute_aspect_ratios_voc_dataset(dataset, indices=None): 144 | if indices is None: 145 | indices = range(len(dataset)) 146 | aspect_ratios = [] 147 | for i in indices: 148 | # this doesn't load the data into memory, because PIL loads it lazily 149 | width, height = Image.open(dataset.images[i]).size 150 | aspect_ratio = float(width) / float(height) 151 | aspect_ratios.append(aspect_ratio) 152 | return aspect_ratios 153 | 154 | 155 | def _compute_aspect_ratios_subset_dataset(dataset, indices=None): 156 | if indices is None: 157 | indices = range(len(dataset)) 158 | 159 | ds_indices = [dataset.indices[i] for i in indices] 160 | return compute_aspect_ratios(dataset.dataset, ds_indices) 161 | 162 | 163 | def compute_aspect_ratios(dataset, indices=None): 164 | if hasattr(dataset, "get_height_and_width"): 165 | return _compute_aspect_ratios_custom_dataset(dataset, indices) 166 | 167 | if isinstance(dataset, torchvision.datasets.CocoDetection): 168 | return _compute_aspect_ratios_coco_dataset(dataset, indices) 169 | 170 | if isinstance(dataset, torchvision.datasets.VOCDetection): 171 | return _compute_aspect_ratios_voc_dataset(dataset, indices) 172 | 173 | if isinstance(dataset, torch.utils.data.Subset): 174 | return _compute_aspect_ratios_subset_dataset(dataset, indices) 175 | 176 | # slow path 177 | return _compute_aspect_ratios_slow(dataset, indices) 178 | 179 | 180 | def _quantize(x, bins): 181 | bins = copy.deepcopy(bins) 182 | bins = sorted(bins) 183 | quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) 184 | return quantized 185 | 186 | 187 | def create_aspect_ratio_groups(dataset, k=0): 188 | aspect_ratios = compute_aspect_ratios(dataset) 189 | bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] 190 | groups = _quantize(aspect_ratios, bins) 191 | # count number of elements per group 192 | counts = np.unique(groups, return_counts=True)[1] 193 | fbins = [0] + bins + [np.inf] 194 | print(f"Using {fbins} as bins for aspect ratio quantization") 195 | print(f"Count of instances per bin: {counts}") 196 | return groups 197 | -------------------------------------------------------------------------------- /helmet/ml/detection/presets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import transforms as T 3 | 4 | 5 | class DetectionPresetTrain: 6 | def __init__(self, *, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)): 7 | if data_augmentation == "hflip": 8 | self.transforms = T.Compose( 9 | [ 10 | T.RandomHorizontalFlip(p=hflip_prob), 11 | T.PILToTensor(), 12 | T.ConvertImageDtype(torch.float), 13 | ] 14 | ) 15 | elif data_augmentation == "lsj": 16 | self.transforms = T.Compose( 17 | [ 18 | T.ScaleJitter(target_size=(1024, 1024)), 19 | T.FixedSizeCrop(size=(1024, 1024), fill=mean), 20 | T.RandomHorizontalFlip(p=hflip_prob), 21 | T.PILToTensor(), 22 | T.ConvertImageDtype(torch.float), 23 | ] 24 | ) 25 | elif data_augmentation == "multiscale": 26 | self.transforms = T.Compose( 27 | [ 28 | T.RandomShortestSize( 29 | min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333 30 | ), 31 | T.RandomHorizontalFlip(p=hflip_prob), 32 | T.PILToTensor(), 33 | T.ConvertImageDtype(torch.float), 34 | ] 35 | ) 36 | elif data_augmentation == "ssd": 37 | self.transforms = T.Compose( 38 | [ 39 | T.RandomPhotometricDistort(), 40 | T.RandomZoomOut(fill=list(mean)), 41 | T.RandomIoUCrop(), 42 | T.RandomHorizontalFlip(p=hflip_prob), 43 | T.PILToTensor(), 44 | T.ConvertImageDtype(torch.float), 45 | ] 46 | ) 47 | elif data_augmentation == "ssdlite": 48 | self.transforms = T.Compose( 49 | [ 50 | T.RandomIoUCrop(), 51 | T.RandomHorizontalFlip(p=hflip_prob), 52 | T.PILToTensor(), 53 | T.ConvertImageDtype(torch.float), 54 | ] 55 | ) 56 | else: 57 | raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') 58 | 59 | def __call__(self, img, target): 60 | return self.transforms(img, target) 61 | 62 | 63 | class DetectionPresetEval: 64 | def __init__(self): 65 | self.transforms = T.Compose( 66 | [ 67 | T.PILToTensor(), 68 | T.ConvertImageDtype(torch.float), 69 | ] 70 | ) 71 | 72 | def __call__(self, img, target): 73 | return self.transforms(img, target) 74 | -------------------------------------------------------------------------------- /helmet/ml/detection/train.py: -------------------------------------------------------------------------------- 1 | r"""PyTorch Detection Training. 2 | 3 | To run in a multi-gpu environment, use the distributed launcher:: 4 | 5 | python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ 6 | train.py ... --world-size $NGPU 7 | 8 | The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. 9 | --lr 0.02 --batch-size 2 --world-size 8 10 | If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. 11 | 12 | On top of that, for training Faster/Mask R-CNN, the default hyperparameters are 13 | --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 14 | 15 | Also, if you train Keypoint R-CNN, the default hyperparameters are 16 | --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 17 | Because the number of images is smaller in the person keypoint subset of COCO, 18 | the number of epochs should be adapted so that we have the same number of iterations. 19 | """ 20 | import datetime 21 | import os 22 | import time 23 | 24 | import presets 25 | import torch 26 | import torch.utils.data 27 | import torchvision 28 | import torchvision.models.detection 29 | import torchvision.models.detection.mask_rcnn 30 | import utils 31 | from coco_utils import get_coco, get_coco_kp 32 | from engine import evaluate, train_one_epoch 33 | from group_by_aspect_ratio import create_aspect_ratio_groups, GroupedBatchSampler 34 | from torchvision.transforms import InterpolationMode 35 | from transforms import SimpleCopyPaste 36 | 37 | 38 | def copypaste_collate_fn(batch): 39 | copypaste = SimpleCopyPaste(blending=True, resize_interpolation=InterpolationMode.BILINEAR) 40 | return copypaste(*utils.collate_fn(batch)) 41 | 42 | 43 | def get_dataset(name, image_set, transform, data_path): 44 | paths = {"coco": (data_path, get_coco, 91), "coco_kp": (data_path, get_coco_kp, 2)} 45 | p, ds_fn, num_classes = paths[name] 46 | 47 | ds = ds_fn(p, image_set=image_set, transforms=transform) 48 | return ds, num_classes 49 | 50 | 51 | def get_transform(train, args): 52 | if train: 53 | return presets.DetectionPresetTrain(data_augmentation=args.data_augmentation) 54 | elif args.weights and args.test_only: 55 | weights = torchvision.models.get_weight(args.weights) 56 | trans = weights.transforms() 57 | return lambda img, target: (trans(img), target) 58 | else: 59 | return presets.DetectionPresetEval() 60 | 61 | 62 | def get_args_parser(add_help=True): 63 | import argparse 64 | 65 | parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help) 66 | 67 | parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path") 68 | parser.add_argument("--dataset", default="coco", type=str, help="dataset name") 69 | parser.add_argument("--model", default="maskrcnn_resnet50_fpn", type=str, help="model name") 70 | parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)") 71 | parser.add_argument( 72 | "-b", "--batch-size", default=2, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" 73 | ) 74 | parser.add_argument("--epochs", default=26, type=int, metavar="N", help="number of total epochs to run") 75 | parser.add_argument( 76 | "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)" 77 | ) 78 | parser.add_argument("--opt", default="sgd", type=str, help="optimizer") 79 | parser.add_argument( 80 | "--lr", 81 | default=0.02, 82 | type=float, 83 | help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu", 84 | ) 85 | parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") 86 | parser.add_argument( 87 | "--wd", 88 | "--weight-decay", 89 | default=1e-4, 90 | type=float, 91 | metavar="W", 92 | help="weight decay (default: 1e-4)", 93 | dest="weight_decay", 94 | ) 95 | parser.add_argument( 96 | "--norm-weight-decay", 97 | default=None, 98 | type=float, 99 | help="weight decay for Normalization layers (default: None, same value as --wd)", 100 | ) 101 | parser.add_argument( 102 | "--lr-scheduler", default="multisteplr", type=str, help="name of lr scheduler (default: multisteplr)" 103 | ) 104 | parser.add_argument( 105 | "--lr-step-size", default=8, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)" 106 | ) 107 | parser.add_argument( 108 | "--lr-steps", 109 | default=[16, 22], 110 | nargs="+", 111 | type=int, 112 | help="decrease lr every step-size epochs (multisteplr scheduler only)", 113 | ) 114 | parser.add_argument( 115 | "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)" 116 | ) 117 | parser.add_argument("--print-freq", default=20, type=int, help="print frequency") 118 | parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs") 119 | parser.add_argument("--resume", default="", type=str, help="path of checkpoint") 120 | parser.add_argument("--start_epoch", default=0, type=int, help="start epoch") 121 | parser.add_argument("--aspect-ratio-group-factor", default=3, type=int) 122 | parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn") 123 | parser.add_argument( 124 | "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone" 125 | ) 126 | parser.add_argument( 127 | "--data-augmentation", default="hflip", type=str, help="data augmentation policy (default: hflip)" 128 | ) 129 | parser.add_argument( 130 | "--sync-bn", 131 | dest="sync_bn", 132 | help="Use sync batch norm", 133 | action="store_true", 134 | ) 135 | parser.add_argument( 136 | "--test-only", 137 | dest="test_only", 138 | help="Only test the model", 139 | action="store_true", 140 | ) 141 | 142 | parser.add_argument( 143 | "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." 144 | ) 145 | 146 | # distributed training parameters 147 | parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") 148 | parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") 149 | parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load") 150 | parser.add_argument("--weights-backbone", default=None, type=str, help="the backbone weights enum name to load") 151 | 152 | # Mixed precision training parameters 153 | parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") 154 | 155 | # Use CopyPaste augmentation training parameter 156 | parser.add_argument( 157 | "--use-copypaste", 158 | action="store_true", 159 | help="Use CopyPaste data augmentation. Works only with data-augmentation='lsj'.", 160 | ) 161 | 162 | return parser 163 | 164 | 165 | def main(args): 166 | if args.output_dir: 167 | utils.mkdir(args.output_dir) 168 | 169 | utils.init_distributed_mode(args) 170 | print(args) 171 | 172 | device = torch.device(args.device) 173 | 174 | if args.use_deterministic_algorithms: 175 | torch.use_deterministic_algorithms(True) 176 | 177 | # Data loading code 178 | print("Loading data") 179 | 180 | dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args), args.data_path) 181 | dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args), args.data_path) 182 | 183 | print("Creating data loaders") 184 | if args.distributed: 185 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) 186 | test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False) 187 | else: 188 | train_sampler = torch.utils.data.RandomSampler(dataset) 189 | test_sampler = torch.utils.data.SequentialSampler(dataset_test) 190 | 191 | if args.aspect_ratio_group_factor >= 0: 192 | group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) 193 | train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) 194 | else: 195 | train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True) 196 | 197 | train_collate_fn = utils.collate_fn 198 | if args.use_copypaste: 199 | if args.data_augmentation != "lsj": 200 | raise RuntimeError("SimpleCopyPaste algorithm currently only supports the 'lsj' data augmentation policies") 201 | 202 | train_collate_fn = copypaste_collate_fn 203 | 204 | data_loader = torch.utils.data.DataLoader( 205 | dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_collate_fn 206 | ) 207 | 208 | data_loader_test = torch.utils.data.DataLoader( 209 | dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn 210 | ) 211 | 212 | print("Creating model") 213 | kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers} 214 | if args.data_augmentation in ["multiscale", "lsj"]: 215 | kwargs["_skip_resize"] = True 216 | if "rcnn" in args.model: 217 | if args.rpn_score_thresh is not None: 218 | kwargs["rpn_score_thresh"] = args.rpn_score_thresh 219 | model = torchvision.models.get_model( 220 | args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs 221 | ) 222 | model.to(device) 223 | if args.distributed and args.sync_bn: 224 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) 225 | 226 | model_without_ddp = model 227 | if args.distributed: 228 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) 229 | model_without_ddp = model.module 230 | 231 | if args.norm_weight_decay is None: 232 | parameters = [p for p in model.parameters() if p.requires_grad] 233 | else: 234 | param_groups = torchvision.ops._utils.split_normalization_params(model) 235 | wd_groups = [args.norm_weight_decay, args.weight_decay] 236 | parameters = [{"params": p, "weight_decay": w} for p, w in zip(param_groups, wd_groups) if p] 237 | 238 | opt_name = args.opt.lower() 239 | if opt_name.startswith("sgd"): 240 | optimizer = torch.optim.SGD( 241 | parameters, 242 | lr=args.lr, 243 | momentum=args.momentum, 244 | weight_decay=args.weight_decay, 245 | nesterov="nesterov" in opt_name, 246 | ) 247 | elif opt_name == "adamw": 248 | optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay) 249 | else: 250 | raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD and AdamW are supported.") 251 | 252 | scaler = torch.cuda.amp.GradScaler() if args.amp else None 253 | 254 | args.lr_scheduler = args.lr_scheduler.lower() 255 | if args.lr_scheduler == "multisteplr": 256 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) 257 | elif args.lr_scheduler == "cosineannealinglr": 258 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) 259 | else: 260 | raise RuntimeError( 261 | f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported." 262 | ) 263 | 264 | if args.resume: 265 | checkpoint = torch.load(args.resume, map_location="cpu") 266 | model_without_ddp.load_state_dict(checkpoint["model"]) 267 | optimizer.load_state_dict(checkpoint["optimizer"]) 268 | lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) 269 | args.start_epoch = checkpoint["epoch"] + 1 270 | if args.amp: 271 | scaler.load_state_dict(checkpoint["scaler"]) 272 | 273 | if args.test_only: 274 | torch.backends.cudnn.deterministic = True 275 | evaluate(model, data_loader_test, device=device) 276 | return 277 | 278 | print("Start training") 279 | start_time = time.time() 280 | for epoch in range(args.start_epoch, args.epochs): 281 | if args.distributed: 282 | train_sampler.set_epoch(epoch) 283 | train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, scaler) 284 | lr_scheduler.step() 285 | if args.output_dir: 286 | checkpoint = { 287 | "model": model_without_ddp.state_dict(), 288 | "optimizer": optimizer.state_dict(), 289 | "lr_scheduler": lr_scheduler.state_dict(), 290 | "args": args, 291 | "epoch": epoch, 292 | } 293 | if args.amp: 294 | checkpoint["scaler"] = scaler.state_dict() 295 | utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) 296 | utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) 297 | 298 | # evaluate after every epoch 299 | evaluate(model, data_loader_test, device=device) 300 | 301 | total_time = time.time() - start_time 302 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 303 | print(f"Training time {total_time_str}") 304 | 305 | 306 | if __name__ == "__main__": 307 | args = get_args_parser().parse_args() 308 | main(args) 309 | -------------------------------------------------------------------------------- /helmet/ml/detection/transforms.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import torch 4 | import torchvision 5 | from torch import nn, Tensor 6 | from torchvision import ops 7 | from torchvision.transforms import functional as F, InterpolationMode, transforms as T 8 | 9 | 10 | def _flip_coco_person_keypoints(kps, width): 11 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 12 | flipped_data = kps[:, flip_inds] 13 | flipped_data[..., 0] = width - flipped_data[..., 0] 14 | # Maintain COCO convention that if visibility == 0, then x, y = 0 15 | inds = flipped_data[..., 2] == 0 16 | flipped_data[inds] = 0 17 | return flipped_data 18 | 19 | 20 | class Compose: 21 | def __init__(self, transforms): 22 | self.transforms = transforms 23 | 24 | def __call__(self, image, target): 25 | for t in self.transforms: 26 | image, target = t(image, target) 27 | return image, target 28 | 29 | 30 | class RandomHorizontalFlip(T.RandomHorizontalFlip): 31 | def forward( 32 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 33 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 34 | if torch.rand(1) < self.p: 35 | image = F.hflip(image) 36 | if target is not None: 37 | _, _, width = F.get_dimensions(image) 38 | target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] 39 | if "masks" in target: 40 | target["masks"] = target["masks"].flip(-1) 41 | if "keypoints" in target: 42 | keypoints = target["keypoints"] 43 | keypoints = _flip_coco_person_keypoints(keypoints, width) 44 | target["keypoints"] = keypoints 45 | return image, target 46 | 47 | 48 | class PILToTensor(nn.Module): 49 | def forward( 50 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 51 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 52 | image = F.pil_to_tensor(image) 53 | return image, target 54 | 55 | 56 | class ConvertImageDtype(nn.Module): 57 | def __init__(self, dtype: torch.dtype) -> None: 58 | super().__init__() 59 | self.dtype = dtype 60 | 61 | def forward( 62 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 63 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 64 | image = F.convert_image_dtype(image, self.dtype) 65 | return image, target 66 | 67 | 68 | class RandomIoUCrop(nn.Module): 69 | def __init__( 70 | self, 71 | min_scale: float = 0.3, 72 | max_scale: float = 1.0, 73 | min_aspect_ratio: float = 0.5, 74 | max_aspect_ratio: float = 2.0, 75 | sampler_options: Optional[List[float]] = None, 76 | trials: int = 40, 77 | ): 78 | super().__init__() 79 | # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 80 | self.min_scale = min_scale 81 | self.max_scale = max_scale 82 | self.min_aspect_ratio = min_aspect_ratio 83 | self.max_aspect_ratio = max_aspect_ratio 84 | if sampler_options is None: 85 | sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] 86 | self.options = sampler_options 87 | self.trials = trials 88 | 89 | def forward( 90 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 91 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 92 | if target is None: 93 | raise ValueError("The targets can't be None for this transform.") 94 | 95 | if isinstance(image, torch.Tensor): 96 | if image.ndimension() not in {2, 3}: 97 | raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") 98 | elif image.ndimension() == 2: 99 | image = image.unsqueeze(0) 100 | 101 | _, orig_h, orig_w = F.get_dimensions(image) 102 | 103 | while True: 104 | # sample an option 105 | idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) 106 | min_jaccard_overlap = self.options[idx] 107 | if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option 108 | return image, target 109 | 110 | for _ in range(self.trials): 111 | # check the aspect ratio limitations 112 | r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) 113 | new_w = int(orig_w * r[0]) 114 | new_h = int(orig_h * r[1]) 115 | aspect_ratio = new_w / new_h 116 | if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): 117 | continue 118 | 119 | # check for 0 area crops 120 | r = torch.rand(2) 121 | left = int((orig_w - new_w) * r[0]) 122 | top = int((orig_h - new_h) * r[1]) 123 | right = left + new_w 124 | bottom = top + new_h 125 | if left == right or top == bottom: 126 | continue 127 | 128 | # check for any valid boxes with centers within the crop area 129 | cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) 130 | cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) 131 | is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) 132 | if not is_within_crop_area.any(): 133 | continue 134 | 135 | # check at least 1 box with jaccard limitations 136 | boxes = target["boxes"][is_within_crop_area] 137 | ious = torchvision.ops.boxes.box_iou( 138 | boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device) 139 | ) 140 | if ious.max() < min_jaccard_overlap: 141 | continue 142 | 143 | # keep only valid boxes and perform cropping 144 | target["boxes"] = boxes 145 | target["labels"] = target["labels"][is_within_crop_area] 146 | target["boxes"][:, 0::2] -= left 147 | target["boxes"][:, 1::2] -= top 148 | target["boxes"][:, 0::2].clamp_(min=0, max=new_w) 149 | target["boxes"][:, 1::2].clamp_(min=0, max=new_h) 150 | image = F.crop(image, top, left, new_h, new_w) 151 | 152 | return image, target 153 | 154 | 155 | class RandomZoomOut(nn.Module): 156 | def __init__( 157 | self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 158 | ): 159 | super().__init__() 160 | if fill is None: 161 | fill = [0.0, 0.0, 0.0] 162 | self.fill = fill 163 | self.side_range = side_range 164 | if side_range[0] < 1.0 or side_range[0] > side_range[1]: 165 | raise ValueError(f"Invalid canvas side range provided {side_range}.") 166 | self.p = p 167 | 168 | @torch.jit.unused 169 | def _get_fill_value(self, is_pil): 170 | # type: (bool) -> int 171 | # We fake the type to make it work on JIT 172 | return tuple(int(x) for x in self.fill) if is_pil else 0 173 | 174 | def forward( 175 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 176 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 177 | if isinstance(image, torch.Tensor): 178 | if image.ndimension() not in {2, 3}: 179 | raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") 180 | elif image.ndimension() == 2: 181 | image = image.unsqueeze(0) 182 | 183 | if torch.rand(1) >= self.p: 184 | return image, target 185 | 186 | _, orig_h, orig_w = F.get_dimensions(image) 187 | 188 | r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) 189 | canvas_width = int(orig_w * r) 190 | canvas_height = int(orig_h * r) 191 | 192 | r = torch.rand(2) 193 | left = int((canvas_width - orig_w) * r[0]) 194 | top = int((canvas_height - orig_h) * r[1]) 195 | right = canvas_width - (left + orig_w) 196 | bottom = canvas_height - (top + orig_h) 197 | 198 | if torch.jit.is_scripting(): 199 | fill = 0 200 | else: 201 | fill = self._get_fill_value(F._is_pil_image(image)) 202 | 203 | image = F.pad(image, [left, top, right, bottom], fill=fill) 204 | if isinstance(image, torch.Tensor): 205 | # PyTorch's pad supports only integers on fill. So we need to overwrite the colour 206 | v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) 207 | image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[ 208 | ..., :, (left + orig_w) : 209 | ] = v 210 | 211 | if target is not None: 212 | target["boxes"][:, 0::2] += left 213 | target["boxes"][:, 1::2] += top 214 | 215 | return image, target 216 | 217 | 218 | class RandomPhotometricDistort(nn.Module): 219 | def __init__( 220 | self, 221 | contrast: Tuple[float, float] = (0.5, 1.5), 222 | saturation: Tuple[float, float] = (0.5, 1.5), 223 | hue: Tuple[float, float] = (-0.05, 0.05), 224 | brightness: Tuple[float, float] = (0.875, 1.125), 225 | p: float = 0.5, 226 | ): 227 | super().__init__() 228 | self._brightness = T.ColorJitter(brightness=brightness) 229 | self._contrast = T.ColorJitter(contrast=contrast) 230 | self._hue = T.ColorJitter(hue=hue) 231 | self._saturation = T.ColorJitter(saturation=saturation) 232 | self.p = p 233 | 234 | def forward( 235 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 236 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 237 | if isinstance(image, torch.Tensor): 238 | if image.ndimension() not in {2, 3}: 239 | raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") 240 | elif image.ndimension() == 2: 241 | image = image.unsqueeze(0) 242 | 243 | r = torch.rand(7) 244 | 245 | if r[0] < self.p: 246 | image = self._brightness(image) 247 | 248 | contrast_before = r[1] < 0.5 249 | if contrast_before: 250 | if r[2] < self.p: 251 | image = self._contrast(image) 252 | 253 | if r[3] < self.p: 254 | image = self._saturation(image) 255 | 256 | if r[4] < self.p: 257 | image = self._hue(image) 258 | 259 | if not contrast_before: 260 | if r[5] < self.p: 261 | image = self._contrast(image) 262 | 263 | if r[6] < self.p: 264 | channels, _, _ = F.get_dimensions(image) 265 | permutation = torch.randperm(channels) 266 | 267 | is_pil = F._is_pil_image(image) 268 | if is_pil: 269 | image = F.pil_to_tensor(image) 270 | image = F.convert_image_dtype(image) 271 | image = image[..., permutation, :, :] 272 | if is_pil: 273 | image = F.to_pil_image(image) 274 | 275 | return image, target 276 | 277 | 278 | class ScaleJitter(nn.Module): 279 | """Randomly resizes the image and its bounding boxes within the specified scale range. 280 | The class implements the Scale Jitter augmentation as described in the paper 281 | `"Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation" `_. 282 | 283 | Args: 284 | target_size (tuple of ints): The target size for the transform provided in (height, weight) format. 285 | scale_range (tuple of ints): scaling factor interval, e.g (a, b), then scale is randomly sampled from the 286 | range a <= scale <= b. 287 | interpolation (InterpolationMode): Desired interpolation enum defined by 288 | :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. 289 | """ 290 | 291 | def __init__( 292 | self, 293 | target_size: Tuple[int, int], 294 | scale_range: Tuple[float, float] = (0.1, 2.0), 295 | interpolation: InterpolationMode = InterpolationMode.BILINEAR, 296 | ): 297 | super().__init__() 298 | self.target_size = target_size 299 | self.scale_range = scale_range 300 | self.interpolation = interpolation 301 | 302 | def forward( 303 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 304 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 305 | if isinstance(image, torch.Tensor): 306 | if image.ndimension() not in {2, 3}: 307 | raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") 308 | elif image.ndimension() == 2: 309 | image = image.unsqueeze(0) 310 | 311 | _, orig_height, orig_width = F.get_dimensions(image) 312 | 313 | scale = self.scale_range[0] + torch.rand(1) * (self.scale_range[1] - self.scale_range[0]) 314 | r = min(self.target_size[1] / orig_height, self.target_size[0] / orig_width) * scale 315 | new_width = int(orig_width * r) 316 | new_height = int(orig_height * r) 317 | 318 | image = F.resize(image, [new_height, new_width], interpolation=self.interpolation) 319 | 320 | if target is not None: 321 | target["boxes"][:, 0::2] *= new_width / orig_width 322 | target["boxes"][:, 1::2] *= new_height / orig_height 323 | if "masks" in target: 324 | target["masks"] = F.resize( 325 | target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST 326 | ) 327 | 328 | return image, target 329 | 330 | 331 | class FixedSizeCrop(nn.Module): 332 | def __init__(self, size, fill=0, padding_mode="constant"): 333 | super().__init__() 334 | size = tuple(T._setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")) 335 | self.crop_height = size[0] 336 | self.crop_width = size[1] 337 | self.fill = fill # TODO: Fill is currently respected only on PIL. Apply tensor patch. 338 | self.padding_mode = padding_mode 339 | 340 | def _pad(self, img, target, padding): 341 | # Taken from the functional_tensor.py pad 342 | if isinstance(padding, int): 343 | pad_left = pad_right = pad_top = pad_bottom = padding 344 | elif len(padding) == 1: 345 | pad_left = pad_right = pad_top = pad_bottom = padding[0] 346 | elif len(padding) == 2: 347 | pad_left = pad_right = padding[0] 348 | pad_top = pad_bottom = padding[1] 349 | else: 350 | pad_left = padding[0] 351 | pad_top = padding[1] 352 | pad_right = padding[2] 353 | pad_bottom = padding[3] 354 | 355 | padding = [pad_left, pad_top, pad_right, pad_bottom] 356 | img = F.pad(img, padding, self.fill, self.padding_mode) 357 | if target is not None: 358 | target["boxes"][:, 0::2] += pad_left 359 | target["boxes"][:, 1::2] += pad_top 360 | if "masks" in target: 361 | target["masks"] = F.pad(target["masks"], padding, 0, "constant") 362 | 363 | return img, target 364 | 365 | def _crop(self, img, target, top, left, height, width): 366 | img = F.crop(img, top, left, height, width) 367 | if target is not None: 368 | boxes = target["boxes"] 369 | boxes[:, 0::2] -= left 370 | boxes[:, 1::2] -= top 371 | boxes[:, 0::2].clamp_(min=0, max=width) 372 | boxes[:, 1::2].clamp_(min=0, max=height) 373 | 374 | is_valid = (boxes[:, 0] < boxes[:, 2]) & (boxes[:, 1] < boxes[:, 3]) 375 | 376 | target["boxes"] = boxes[is_valid] 377 | target["labels"] = target["labels"][is_valid] 378 | if "masks" in target: 379 | target["masks"] = F.crop(target["masks"][is_valid], top, left, height, width) 380 | 381 | return img, target 382 | 383 | def forward(self, img, target=None): 384 | _, height, width = F.get_dimensions(img) 385 | new_height = min(height, self.crop_height) 386 | new_width = min(width, self.crop_width) 387 | 388 | if new_height != height or new_width != width: 389 | offset_height = max(height - self.crop_height, 0) 390 | offset_width = max(width - self.crop_width, 0) 391 | 392 | r = torch.rand(1) 393 | top = int(offset_height * r) 394 | left = int(offset_width * r) 395 | 396 | img, target = self._crop(img, target, top, left, new_height, new_width) 397 | 398 | pad_bottom = max(self.crop_height - new_height, 0) 399 | pad_right = max(self.crop_width - new_width, 0) 400 | if pad_bottom != 0 or pad_right != 0: 401 | img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom]) 402 | 403 | return img, target 404 | 405 | 406 | class RandomShortestSize(nn.Module): 407 | def __init__( 408 | self, 409 | min_size: Union[List[int], Tuple[int], int], 410 | max_size: int, 411 | interpolation: InterpolationMode = InterpolationMode.BILINEAR, 412 | ): 413 | super().__init__() 414 | self.min_size = [min_size] if isinstance(min_size, int) else list(min_size) 415 | self.max_size = max_size 416 | self.interpolation = interpolation 417 | 418 | def forward( 419 | self, image: Tensor, target: Optional[Dict[str, Tensor]] = None 420 | ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: 421 | _, orig_height, orig_width = F.get_dimensions(image) 422 | 423 | min_size = self.min_size[torch.randint(len(self.min_size), (1,)).item()] 424 | r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width)) 425 | 426 | new_width = int(orig_width * r) 427 | new_height = int(orig_height * r) 428 | 429 | image = F.resize(image, [new_height, new_width], interpolation=self.interpolation) 430 | 431 | if target is not None: 432 | target["boxes"][:, 0::2] *= new_width / orig_width 433 | target["boxes"][:, 1::2] *= new_height / orig_height 434 | if "masks" in target: 435 | target["masks"] = F.resize( 436 | target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST 437 | ) 438 | 439 | return image, target 440 | 441 | 442 | def _copy_paste( 443 | image: torch.Tensor, 444 | target: Dict[str, Tensor], 445 | paste_image: torch.Tensor, 446 | paste_target: Dict[str, Tensor], 447 | blending: bool = True, 448 | resize_interpolation: F.InterpolationMode = F.InterpolationMode.BILINEAR, 449 | ) -> Tuple[torch.Tensor, Dict[str, Tensor]]: 450 | 451 | # Random paste targets selection: 452 | num_masks = len(paste_target["masks"]) 453 | 454 | if num_masks < 1: 455 | # Such degerante case with num_masks=0 can happen with LSJ 456 | # Let's just return (image, target) 457 | return image, target 458 | 459 | # We have to please torch script by explicitly specifying dtype as torch.long 460 | random_selection = torch.randint(0, num_masks, (num_masks,), device=paste_image.device) 461 | random_selection = torch.unique(random_selection).to(torch.long) 462 | 463 | paste_masks = paste_target["masks"][random_selection] 464 | paste_boxes = paste_target["boxes"][random_selection] 465 | paste_labels = paste_target["labels"][random_selection] 466 | 467 | masks = target["masks"] 468 | 469 | # We resize source and paste data if they have different sizes 470 | # This is something we introduced here as originally the algorithm works 471 | # on equal-sized data (for example, coming from LSJ data augmentations) 472 | size1 = image.shape[-2:] 473 | size2 = paste_image.shape[-2:] 474 | if size1 != size2: 475 | paste_image = F.resize(paste_image, size1, interpolation=resize_interpolation) 476 | paste_masks = F.resize(paste_masks, size1, interpolation=F.InterpolationMode.NEAREST) 477 | # resize bboxes: 478 | ratios = torch.tensor((size1[1] / size2[1], size1[0] / size2[0]), device=paste_boxes.device) 479 | paste_boxes = paste_boxes.view(-1, 2, 2).mul(ratios).view(paste_boxes.shape) 480 | 481 | paste_alpha_mask = paste_masks.sum(dim=0) > 0 482 | 483 | if blending: 484 | paste_alpha_mask = F.gaussian_blur( 485 | paste_alpha_mask.unsqueeze(0), 486 | kernel_size=(5, 5), 487 | sigma=[ 488 | 2.0, 489 | ], 490 | ) 491 | 492 | # Copy-paste images: 493 | image = (image * (~paste_alpha_mask)) + (paste_image * paste_alpha_mask) 494 | 495 | # Copy-paste masks: 496 | masks = masks * (~paste_alpha_mask) 497 | non_all_zero_masks = masks.sum((-1, -2)) > 0 498 | masks = masks[non_all_zero_masks] 499 | 500 | # Do a shallow copy of the target dict 501 | out_target = {k: v for k, v in target.items()} 502 | 503 | out_target["masks"] = torch.cat([masks, paste_masks]) 504 | 505 | # Copy-paste boxes and labels 506 | boxes = ops.masks_to_boxes(masks) 507 | out_target["boxes"] = torch.cat([boxes, paste_boxes]) 508 | 509 | labels = target["labels"][non_all_zero_masks] 510 | out_target["labels"] = torch.cat([labels, paste_labels]) 511 | 512 | # Update additional optional keys: area and iscrowd if exist 513 | if "area" in target: 514 | out_target["area"] = out_target["masks"].sum((-1, -2)).to(torch.float32) 515 | 516 | if "iscrowd" in target and "iscrowd" in paste_target: 517 | # target['iscrowd'] size can be differ from mask size (non_all_zero_masks) 518 | # For example, if previous transforms geometrically modifies masks/boxes/labels but 519 | # does not update "iscrowd" 520 | if len(target["iscrowd"]) == len(non_all_zero_masks): 521 | iscrowd = target["iscrowd"][non_all_zero_masks] 522 | paste_iscrowd = paste_target["iscrowd"][random_selection] 523 | out_target["iscrowd"] = torch.cat([iscrowd, paste_iscrowd]) 524 | 525 | # Check for degenerated boxes and remove them 526 | boxes = out_target["boxes"] 527 | degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] 528 | if degenerate_boxes.any(): 529 | valid_targets = ~degenerate_boxes.any(dim=1) 530 | 531 | out_target["boxes"] = boxes[valid_targets] 532 | out_target["masks"] = out_target["masks"][valid_targets] 533 | out_target["labels"] = out_target["labels"][valid_targets] 534 | 535 | if "area" in out_target: 536 | out_target["area"] = out_target["area"][valid_targets] 537 | if "iscrowd" in out_target and len(out_target["iscrowd"]) == len(valid_targets): 538 | out_target["iscrowd"] = out_target["iscrowd"][valid_targets] 539 | 540 | return image, out_target 541 | 542 | 543 | class SimpleCopyPaste(torch.nn.Module): 544 | def __init__(self, blending=True, resize_interpolation=F.InterpolationMode.BILINEAR): 545 | super().__init__() 546 | self.resize_interpolation = resize_interpolation 547 | self.blending = blending 548 | 549 | def forward( 550 | self, images: List[torch.Tensor], targets: List[Dict[str, Tensor]] 551 | ) -> Tuple[List[torch.Tensor], List[Dict[str, Tensor]]]: 552 | torch._assert( 553 | isinstance(images, (list, tuple)) and all([isinstance(v, torch.Tensor) for v in images]), 554 | "images should be a list of tensors", 555 | ) 556 | torch._assert( 557 | isinstance(targets, (list, tuple)) and len(images) == len(targets), 558 | "targets should be a list of the same size as images", 559 | ) 560 | for target in targets: 561 | # Can not check for instance type dict with inside torch.jit.script 562 | # torch._assert(isinstance(target, dict), "targets item should be a dict") 563 | for k in ["masks", "boxes", "labels"]: 564 | torch._assert(k in target, f"Key {k} should be present in targets") 565 | torch._assert(isinstance(target[k], torch.Tensor), f"Value for the key {k} should be a tensor") 566 | 567 | # images = [t1, t2, ..., tN] 568 | # Let's define paste_images as shifted list of input images 569 | # paste_images = [t2, t3, ..., tN, t1] 570 | # FYI: in TF they mix data on the dataset level 571 | images_rolled = images[-1:] + images[:-1] 572 | targets_rolled = targets[-1:] + targets[:-1] 573 | 574 | output_images: List[torch.Tensor] = [] 575 | output_targets: List[Dict[str, Tensor]] = [] 576 | 577 | for image, target, paste_image, paste_target in zip(images, targets, images_rolled, targets_rolled): 578 | output_image, output_data = _copy_paste( 579 | image, 580 | target, 581 | paste_image, 582 | paste_target, 583 | blending=self.blending, 584 | resize_interpolation=self.resize_interpolation, 585 | ) 586 | output_images.append(output_image) 587 | output_targets.append(output_data) 588 | 589 | return output_images, output_targets 590 | 591 | def __repr__(self) -> str: 592 | s = f"{self.__class__.__name__}(blending={self.blending}, resize_interpolation={self.resize_interpolation})" 593 | return s 594 | -------------------------------------------------------------------------------- /helmet/ml/detection/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import errno 3 | import os 4 | import time 5 | from collections import defaultdict, deque 6 | 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | class SmoothedValue: 12 | """Track a series of values and provide access to smoothed values over a 13 | window or the global series average. 14 | """ 15 | 16 | def __init__(self, window_size=20, fmt=None): 17 | if fmt is None: 18 | fmt = "{median:.4f} ({global_avg:.4f})" 19 | self.deque = deque(maxlen=window_size) 20 | self.total = 0.0 21 | self.count = 0 22 | self.fmt = fmt 23 | 24 | def update(self, value, n=1): 25 | self.deque.append(value) 26 | self.count += n 27 | self.total += value * n 28 | 29 | def synchronize_between_processes(self): 30 | """ 31 | Warning: does not synchronize the deque! 32 | """ 33 | if not is_dist_avail_and_initialized(): 34 | return 35 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") 36 | dist.barrier() 37 | dist.all_reduce(t) 38 | t = t.tolist() 39 | self.count = int(t[0]) 40 | self.total = t[1] 41 | 42 | @property 43 | def median(self): 44 | d = torch.tensor(list(self.deque)) 45 | return d.median().item() 46 | 47 | @property 48 | def avg(self): 49 | d = torch.tensor(list(self.deque), dtype=torch.float32) 50 | return d.mean().item() 51 | 52 | @property 53 | def global_avg(self): 54 | return self.total / self.count 55 | 56 | @property 57 | def max(self): 58 | return max(self.deque) 59 | 60 | @property 61 | def value(self): 62 | return self.deque[-1] 63 | 64 | def __str__(self): 65 | return self.fmt.format( 66 | median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value 67 | ) 68 | 69 | 70 | def all_gather(data): 71 | """ 72 | Run all_gather on arbitrary picklable data (not necessarily tensors) 73 | Args: 74 | data: any picklable object 75 | Returns: 76 | list[data]: list of data gathered from each rank 77 | """ 78 | world_size = get_world_size() 79 | if world_size == 1: 80 | return [data] 81 | data_list = [None] * world_size 82 | dist.all_gather_object(data_list, data) 83 | return data_list 84 | 85 | 86 | def reduce_dict(input_dict, average=True): 87 | """ 88 | Args: 89 | input_dict (dict): all the values will be reduced 90 | average (bool): whether to do average or sum 91 | Reduce the values in the dictionary from all processes so that all processes 92 | have the averaged results. Returns a dict with the same fields as 93 | input_dict, after reduction. 94 | """ 95 | world_size = get_world_size() 96 | if world_size < 2: 97 | return input_dict 98 | with torch.inference_mode(): 99 | names = [] 100 | values = [] 101 | # sort the keys so that they are consistent across processes 102 | for k in sorted(input_dict.keys()): 103 | names.append(k) 104 | values.append(input_dict[k]) 105 | values = torch.stack(values, dim=0) 106 | dist.all_reduce(values) 107 | if average: 108 | values /= world_size 109 | reduced_dict = {k: v for k, v in zip(names, values)} 110 | return reduced_dict 111 | 112 | 113 | class MetricLogger: 114 | def __init__(self, delimiter="\t"): 115 | self.meters = defaultdict(SmoothedValue) 116 | self.delimiter = delimiter 117 | 118 | def update(self, **kwargs): 119 | for k, v in kwargs.items(): 120 | if isinstance(v, torch.Tensor): 121 | v = v.item() 122 | assert isinstance(v, (float, int)) 123 | self.meters[k].update(v) 124 | 125 | def __getattr__(self, attr): 126 | if attr in self.meters: 127 | return self.meters[attr] 128 | if attr in self.__dict__: 129 | return self.__dict__[attr] 130 | raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") 131 | 132 | def __str__(self): 133 | loss_str = [] 134 | for name, meter in self.meters.items(): 135 | loss_str.append(f"{name}: {str(meter)}") 136 | return self.delimiter.join(loss_str) 137 | 138 | def synchronize_between_processes(self): 139 | for meter in self.meters.values(): 140 | meter.synchronize_between_processes() 141 | 142 | def add_meter(self, name, meter): 143 | self.meters[name] = meter 144 | 145 | def log_every(self, iterable, print_freq, header=None): 146 | i = 0 147 | if not header: 148 | header = "" 149 | start_time = time.time() 150 | end = time.time() 151 | iter_time = SmoothedValue(fmt="{avg:.4f}") 152 | data_time = SmoothedValue(fmt="{avg:.4f}") 153 | space_fmt = ":" + str(len(str(len(iterable)))) + "d" 154 | if torch.cuda.is_available(): 155 | log_msg = self.delimiter.join( 156 | [ 157 | header, 158 | "[{0" + space_fmt + "}/{1}]", 159 | "eta: {eta}", 160 | "{meters}", 161 | "time: {time}", 162 | "data: {data}", 163 | "max mem: {memory:.0f}", 164 | ] 165 | ) 166 | else: 167 | log_msg = self.delimiter.join( 168 | [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] 169 | ) 170 | MB = 1024.0 * 1024.0 171 | for obj in iterable: 172 | data_time.update(time.time() - end) 173 | yield obj 174 | iter_time.update(time.time() - end) 175 | if i % print_freq == 0 or i == len(iterable) - 1: 176 | eta_seconds = iter_time.global_avg * (len(iterable) - i) 177 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 178 | if torch.cuda.is_available(): 179 | print( 180 | log_msg.format( 181 | i, 182 | len(iterable), 183 | eta=eta_string, 184 | meters=str(self), 185 | time=str(iter_time), 186 | data=str(data_time), 187 | memory=torch.cuda.max_memory_allocated() / MB, 188 | ) 189 | ) 190 | else: 191 | print( 192 | log_msg.format( 193 | i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) 194 | ) 195 | ) 196 | i += 1 197 | end = time.time() 198 | total_time = time.time() - start_time 199 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 200 | print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)") 201 | 202 | 203 | def collate_fn(batch): 204 | return tuple(zip(*batch)) 205 | 206 | 207 | def mkdir(path): 208 | try: 209 | os.makedirs(path) 210 | except OSError as e: 211 | if e.errno != errno.EEXIST: 212 | raise 213 | 214 | 215 | def setup_for_distributed(is_master): 216 | """ 217 | This function disables printing when not in master process 218 | """ 219 | import builtins as __builtin__ 220 | 221 | builtin_print = __builtin__.print 222 | 223 | def print(*args, **kwargs): 224 | force = kwargs.pop("force", False) 225 | if is_master or force: 226 | builtin_print(*args, **kwargs) 227 | 228 | __builtin__.print = print 229 | 230 | 231 | def is_dist_avail_and_initialized(): 232 | if not dist.is_available(): 233 | return False 234 | if not dist.is_initialized(): 235 | return False 236 | return True 237 | 238 | 239 | def get_world_size(): 240 | if not is_dist_avail_and_initialized(): 241 | return 1 242 | return dist.get_world_size() 243 | 244 | 245 | def get_rank(): 246 | if not is_dist_avail_and_initialized(): 247 | return 0 248 | return dist.get_rank() 249 | 250 | 251 | def is_main_process(): 252 | return get_rank() == 0 253 | 254 | 255 | def save_on_master(*args, **kwargs): 256 | if is_main_process(): 257 | torch.save(*args, **kwargs) 258 | 259 | 260 | def init_distributed_mode(args): 261 | if "RANK" in os.environ and "WORLD_SIZE" in os.environ: 262 | args.rank = int(os.environ["RANK"]) 263 | args.world_size = int(os.environ["WORLD_SIZE"]) 264 | args.gpu = int(os.environ["LOCAL_RANK"]) 265 | elif "SLURM_PROCID" in os.environ: 266 | args.rank = int(os.environ["SLURM_PROCID"]) 267 | args.gpu = args.rank % torch.cuda.device_count() 268 | else: 269 | print("Not using distributed mode") 270 | args.distributed = False 271 | return 272 | 273 | args.distributed = True 274 | 275 | torch.cuda.set_device(args.gpu) 276 | args.dist_backend = "nccl" 277 | print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) 278 | torch.distributed.init_process_group( 279 | backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank 280 | ) 281 | torch.distributed.barrier() 282 | setup_for_distributed(args.rank == 0) 283 | -------------------------------------------------------------------------------- /helmet/ml/feature/helmet_detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import datasets 3 | from pycocotools.coco import COCO 4 | from helmet.constants import ANNOTATIONS_COCO_JSON_FILE 5 | from helmet.exception import HelmetException 6 | import cv2 7 | import os 8 | import sys 9 | import copy 10 | 11 | 12 | class HelmetDetection(datasets.VisionDataset): 13 | 14 | def __init__(self, root, split='train', transform=None, target_transform=None, transforms=None): 15 | # the 3 transform parameters are required for datasets.VisionDataset 16 | super().__init__(root, transforms, transform, target_transform) 17 | self.split = split #train, valid, test 18 | self.coco = COCO(os.path.join(root, split, ANNOTATIONS_COCO_JSON_FILE)) # annotation stored here 19 | self.ids = list(sorted(self.coco.imgs.keys())) 20 | self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)] 21 | 22 | def _load_image(self, id: int): 23 | try: 24 | path = self.coco.loadImgs(id)[0]['file_name'] 25 | image = cv2.imread(os.path.join(self.root, self.split, path)) 26 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 27 | return image 28 | except Exception as e: 29 | raise HelmetException(e, sys) from e 30 | 31 | def _load_target(self, id): 32 | try: 33 | return self.coco.loadAnns(self.coco.getAnnIds(id)) 34 | except Exception as e: 35 | raise HelmetException(e, sys) from e 36 | 37 | def __getitem__(self, index): 38 | try: 39 | id = self.ids[index] 40 | image = self._load_image(id) 41 | target = self._load_target(id) 42 | target = copy.deepcopy(self._load_target(id)) 43 | 44 | boxes = [t['bbox'] + [t['category_id']] for t in target] # required annotation format for albumentations 45 | if self.transforms is not None: 46 | transformed = self.transforms(image=image, bboxes=boxes) 47 | 48 | image = transformed['image'] 49 | boxes = transformed['bboxes'] 50 | 51 | new_boxes = [] # convert from xywh to xyxy 52 | for box in boxes: 53 | xmin = box[0] 54 | xmax = xmin + box[2] 55 | ymin = box[1] 56 | ymax = ymin + box[3] 57 | new_boxes.append([xmin, ymin, xmax, ymax]) 58 | 59 | boxes = torch.tensor(new_boxes, dtype=torch.float32) 60 | 61 | targ = {} # here is our transformed target 62 | targ['boxes'] = boxes 63 | targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64) 64 | targ['image_id'] = torch.tensor([t['image_id'] for t in target]) 65 | targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area 66 | targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64) 67 | return image.div(255), targ # scale images 68 | 69 | except Exception as e: 70 | raise HelmetException(e, sys) from e 71 | 72 | def __len__(self): 73 | try: 74 | return len(self.ids) 75 | 76 | except Exception as e: 77 | raise HelmetException(e, sys) from e 78 | -------------------------------------------------------------------------------- /helmet/ml/models/model_optimiser.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def model_optimiser(model): 5 | params = [p for p in model.parameters() if p.requires_grad] 6 | optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4) 7 | return optimizer 8 | -------------------------------------------------------------------------------- /helmet/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/pipeline/__init__.py -------------------------------------------------------------------------------- /helmet/pipeline/prediction_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import sys 4 | from PIL import Image 5 | import base64 6 | from io import BytesIO 7 | from torchvision import transforms 8 | from torchvision.utils import draw_bounding_boxes 9 | from helmet.exception import HelmetException 10 | from helmet.logger import logging 11 | from helmet.configuration.s3_operations import S3Operation 12 | from helmet.constants import * 13 | 14 | 15 | class PredictionPipeline: 16 | def __init__(self): 17 | self.s3 = S3Operation() 18 | self.bucket_name = BUCKET_NAME 19 | 20 | def image_loader(self, image_bytes): 21 | """load image, returns cuda tensor""" 22 | logging.info("Entered the image_loader method of PredictionPipeline class") 23 | try: 24 | # image = Image.open(io.BytesIO(image_bytes)).convert('RGB') 25 | image = Image.open(io.BytesIO(image_bytes)) 26 | convert_tensor = transforms.ToTensor() 27 | tensor_image = convert_tensor(image) 28 | # image = image[:3] 29 | image_int = torch.tensor(tensor_image * 255, dtype=torch.uint8) 30 | logging.info("Exited the image_loader method of PredictionPipeline class") 31 | return tensor_image, image_int 32 | 33 | except Exception as e: 34 | raise HelmetException(e, sys) from e 35 | 36 | 37 | def get_model_from_s3(self) -> str: 38 | """ 39 | Method Name : predict 40 | Description : This method predicts the image. 41 | 42 | Output : Predictions 43 | """ 44 | logging.info("Entered the get_model_from_s3 method of PredictionPipeline class") 45 | try: 46 | # Loading the best model from s3 bucket 47 | os.makedirs("artifacts/PredictModel", exist_ok=True) 48 | predict_model_path = os.path.join(os.getcwd(), "artifacts", "PredictModel", TRAINED_MODEL_NAME) 49 | best_model_path = self.s3.read_data_from_s3(TRAINED_MODEL_NAME, self.bucket_name, predict_model_path) 50 | logging.info("Exited the get_model_from_s3 method of PredictionPipeline class") 51 | return best_model_path 52 | 53 | except Exception as e: 54 | raise HelmetException(e, sys) from e 55 | 56 | 57 | 58 | def prediction(self, best_model_path: str, image_tensor, image_int_tensor) -> float: 59 | logging.info("Entered the prediction method of PredictionPipeline class") 60 | try: 61 | model = torch.load(best_model_path, map_location=torch.device(DEVICE)) 62 | model.eval() 63 | with torch.no_grad(): 64 | prediction = model([image_tensor.to(DEVICE)]) 65 | pred = prediction[0] 66 | 67 | bbox_tensor = draw_bounding_boxes(image_int_tensor, 68 | pred['boxes'][pred['scores'] > 0.8], 69 | [PREDICTION_CLASSES[i] for i in pred['labels'][pred['scores'] > 0.8].tolist()], 70 | width=4).permute(0, 2, 1) 71 | 72 | transform = transforms.ToPILImage() 73 | img = transform(bbox_tensor) 74 | buffered = BytesIO() 75 | img.save(buffered, format="JPEG") 76 | img_str = base64.b64encode(buffered.getvalue()) 77 | 78 | logging.info("Exited the prediction method of PredictionPipeline class") 79 | return img_str 80 | 81 | except Exception as e: 82 | raise HelmetException(e, sys) from e 83 | 84 | 85 | 86 | 87 | def run_pipeline(self, data): 88 | logging.info("Entered the run_pipeline method of PredictionPipeline class") 89 | try: 90 | image, image_int = self.image_loader(data) 91 | print(image.shape) 92 | print(image_int.shape) 93 | best_model_path: str = self.get_model_from_s3() 94 | detected_image = self.prediction(best_model_path, image, image_int) 95 | logging.info("Exited the run_pipeline method of PredictionPipeline class") 96 | return detected_image 97 | except Exception as e: 98 | raise HelmetException(e, sys) from e 99 | -------------------------------------------------------------------------------- /helmet/pipeline/train_pipeline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from helmet.components.data_ingestion import DataIngestion 3 | from helmet.components.data_transformation import DataTransformation 4 | from helmet.components.model_trainer import ModelTrainer 5 | from helmet.components.model_evaluation import ModelEvaluation 6 | from helmet.components.model_pusher import ModelPusher 7 | from helmet.configuration.s3_operations import S3Operation 8 | from helmet.entity.config_entity import DataIngestionConfig,DataTransformationConfig, ModelTrainerConfig,ModelEvaluationConfig, ModelPusherConfig 9 | from helmet.entity.artifacts_entity import DataIngestionArtifacts, DataTransformationArtifacts, ModelTrainerArtifacts, ModelEvaluationArtifacts,ModelPusherArtifacts 10 | from helmet.logger import logging 11 | from helmet.exception import HelmetException 12 | 13 | 14 | 15 | 16 | class TrainPipeline: 17 | def __init__(self): 18 | self.data_ingestion_config = DataIngestionConfig() 19 | self.data_transformation_config = DataTransformationConfig() 20 | self.model_trainer_config = ModelTrainerConfig() 21 | self.model_evaluation_config = ModelEvaluationConfig() 22 | self.model_pusher_config = ModelPusherConfig() 23 | self.s3_operations = S3Operation() 24 | 25 | 26 | 27 | def start_data_ingestion(self) -> DataIngestionArtifacts: 28 | logging.info("Entered the start_data_ingestion method of TrainPipeline class") 29 | try: 30 | logging.info("Getting the data from S3 bucket") 31 | data_ingestion = DataIngestion( 32 | data_ingestion_config=self.data_ingestion_config, s3_operations= S3Operation() 33 | ) 34 | data_ingestion_artifact = data_ingestion.initiate_data_ingestion() 35 | logging.info("Got the train, test and valid from s3") 36 | logging.info("Exited the start_data_ingestion method of TrainPipeline class") 37 | return data_ingestion_artifact 38 | 39 | except Exception as e: 40 | raise HelmetException(e, sys) from e 41 | 42 | 43 | 44 | def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifacts,) -> DataTransformationArtifacts: 45 | logging.info( 46 | "Entered the start_data_transformation method of TrainPipeline class" 47 | ) 48 | try: 49 | data_transformation = DataTransformation( 50 | 51 | data_ingestion_artifact=data_ingestion_artifact, 52 | data_transformation_config=self.data_transformation_config, 53 | ) 54 | data_transformation_artifact = ( 55 | data_transformation.initiate_data_transformation() 56 | ) 57 | logging.info( 58 | "Exited the start_data_transformation method of TrainPipeline class" 59 | ) 60 | return data_transformation_artifact 61 | 62 | except Exception as e: 63 | raise HelmetException(e, sys) from e 64 | 65 | 66 | def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifacts) -> ModelTrainerArtifacts: 67 | logging.info( 68 | "Entered the start_model_trainer method of TrainPipeline class" 69 | ) 70 | try: 71 | model_trainer = ModelTrainer(data_transformation_artifacts=data_transformation_artifact, 72 | model_trainer_config=self.model_trainer_config 73 | ) 74 | model_trainer_artifact = model_trainer.initiate_model_trainer() 75 | logging.info("Exited the start_model_trainer method of TrainPipeline class") 76 | return model_trainer_artifact 77 | 78 | except Exception as e: 79 | raise HelmetException(e, sys) 80 | 81 | 82 | def start_model_evaluation(self, model_trainer_artifact: ModelTrainerArtifacts, data_transformation_artifact: DataTransformationArtifacts) -> ModelEvaluationArtifacts: 83 | logging.info("Entered the start_model_evaluation method of TrainPipeline class") 84 | try: 85 | model_evaluation = ModelEvaluation(data_transformation_artifacts = data_transformation_artifact, 86 | model_evaluation_config=self.model_evaluation_config, 87 | model_trainer_artifacts=model_trainer_artifact) 88 | 89 | model_evaluation_artifact = model_evaluation.initiate_model_evaluation() 90 | logging.info("Exited the start_model_evaluation method of TrainPipeline class") 91 | return model_evaluation_artifact 92 | 93 | except Exception as e: 94 | raise HelmetException(e, sys) from e 95 | 96 | 97 | 98 | def start_model_pusher(self,s3: S3Operation,) -> ModelPusherArtifacts: 99 | logging.info("Entered the start_model_pusher method of TrainPipeline class") 100 | try: 101 | model_pusher = ModelPusher( 102 | model_pusher_config=self.model_pusher_config, 103 | s3=s3, 104 | ) 105 | model_pusher_artifact = model_pusher.initiate_model_pusher() 106 | logging.info("Initiated the model pusher") 107 | logging.info("Exited the start_model_pusher method of TrainPipeline class") 108 | return model_pusher_artifact 109 | 110 | except Exception as e: 111 | raise HelmetException(e, sys) from e 112 | 113 | 114 | 115 | def run_pipeline(self) -> None: 116 | logging.info("Entered the run_pipeline method of TrainPipeline class") 117 | try: 118 | data_ingestion_artifact = self.start_data_ingestion() 119 | data_transformation_artifact = self.start_data_transformation( 120 | data_ingestion_artifact=data_ingestion_artifact 121 | ) 122 | model_trainer_artifact = self.start_model_trainer( 123 | data_transformation_artifact=data_transformation_artifact 124 | ) 125 | model_evaluation_artifact = self.start_model_evaluation(model_trainer_artifact=model_trainer_artifact, 126 | data_transformation_artifact=data_transformation_artifact 127 | ) 128 | if not model_evaluation_artifact.is_model_accepted: 129 | raise Exception("Trained model is not better than the best model") 130 | 131 | model_pusher_artifact = self.start_model_pusher(s3=self.s3_operations) 132 | 133 | logging.info("Exited the run_pipeline method of TrainPipeline class") 134 | 135 | except Exception as e: 136 | raise HelmetException(e, sys) from e -------------------------------------------------------------------------------- /helmet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/Helmet-Detection-PyTorch/65cdd44a38e2a32fee00a18e84bd0fd9dae90e9c/helmet/utils/__init__.py -------------------------------------------------------------------------------- /helmet/utils/main_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import dill 5 | import base64 6 | from helmet.logger import logging 7 | from helmet.exception import HelmetException 8 | 9 | 10 | def save_object(file_path: str, obj: object) -> None: 11 | logging.info("Entered the save_object method of utils") 12 | 13 | try: 14 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 15 | with open(file_path, "wb") as file_obj: 16 | dill.dump(obj, file_obj) 17 | 18 | logging.info("Exited the save_object method of utils") 19 | 20 | except Exception as e: 21 | raise HelmetException(e, sys) from e 22 | 23 | 24 | def load_object(file_path: str) -> object: 25 | logging.info("Entered the load_object method of utils") 26 | 27 | try: 28 | 29 | with open(file_path, "rb") as file_obj: 30 | obj = dill.load(file_obj) 31 | 32 | logging.info("Exited the load_object method of utils") 33 | 34 | return obj 35 | 36 | except Exception as e: 37 | raise HelmetException(e, sys) from e 38 | 39 | 40 | def image_to_base64(image): 41 | with open(image, "rb") as img_file: 42 | my_string = base64.b64encode(img_file.read()) 43 | 44 | return my_string 45 | 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | notebook 3 | pandas 4 | matplotlib 5 | opencv-python 6 | albumentations 7 | ipykernel 8 | tqdm 9 | pycocotools #Comment when creating Docker Image 10 | cython 11 | from-root 12 | boto3 13 | mypy-boto3-s3 14 | Pillow 15 | torch-summary 16 | fastapi 17 | uvicorn 18 | Jinja2 19 | python-multipart 20 | PyYAML 21 | dill==0.3.5.1 22 | utils 23 | torch>=1.7.0 # see https://pytorch.org/get-started/locally/ (recommended) 24 | torchvision>=0.8.1 25 | -e . -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="helmet", 5 | version="0.0.1", 6 | author="Bappy", 7 | author_email="entbappy73@gmail.com", 8 | packages=find_packages(), 9 | install_requires=[], 10 | ) 11 | -------------------------------------------------------------------------------- /tools/cmd.txt: -------------------------------------------------------------------------------- 1 | coco convertion cmd: 2 | python voc2coco.py annotations output.json 3 | -------------------------------------------------------------------------------- /tools/voc2coco.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # pip install lxml 4 | 5 | import sys 6 | import os 7 | import json 8 | import xml.etree.ElementTree as ET 9 | import glob 10 | 11 | START_BOUNDING_BOX_ID = 1 12 | PRE_DEFINE_CATEGORIES = None 13 | # If necessary, pre-define category and its id 14 | # PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, 15 | # "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9, 16 | # "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, 17 | # "motorbike": 14, "person": 15, "pottedplant": 16, 18 | # "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20} 19 | 20 | 21 | def get(root, name): 22 | vars = root.findall(name) 23 | return vars 24 | 25 | 26 | def get_and_check(root, name, length): 27 | vars = root.findall(name) 28 | if len(vars) == 0: 29 | raise ValueError("Can not find %s in %s." % (name, root.tag)) 30 | if length > 0 and len(vars) != length: 31 | raise ValueError( 32 | "The size of %s is supposed to be %d, but is %d." 33 | % (name, length, len(vars)) 34 | ) 35 | if length == 1: 36 | vars = vars[0] 37 | return vars 38 | 39 | 40 | def get_filename_as_int(filename): 41 | try: 42 | filename = filename.replace("\\", "/") 43 | filename = os.path.splitext(os.path.basename(filename))[0] 44 | return str(filename) 45 | except: 46 | raise ValueError("Filename %s is supposed to be an integer." % (filename)) 47 | 48 | 49 | def get_categories(xml_files): 50 | """Generate category name to id mapping from a list of xml files. 51 | 52 | Arguments: 53 | xml_files {list} -- A list of xml file paths. 54 | 55 | Returns: 56 | dict -- category name to id mapping. 57 | """ 58 | classes_names = [] 59 | for xml_file in xml_files: 60 | tree = ET.parse(xml_file) 61 | root = tree.getroot() 62 | for member in root.findall("object"): 63 | classes_names.append(member[0].text) 64 | classes_names = list(set(classes_names)) 65 | classes_names.sort() 66 | return {name: i for i, name in enumerate(classes_names)} 67 | 68 | 69 | def convert(xml_files, json_file): 70 | json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []} 71 | if PRE_DEFINE_CATEGORIES is not None: 72 | categories = PRE_DEFINE_CATEGORIES 73 | else: 74 | categories = get_categories(xml_files) 75 | bnd_id = START_BOUNDING_BOX_ID 76 | for xml_file in xml_files: 77 | tree = ET.parse(xml_file) 78 | root = tree.getroot() 79 | path = get(root, "path") 80 | if len(path) == 1: 81 | filename = os.path.basename(path[0].text) 82 | elif len(path) == 0: 83 | filename = get_and_check(root, "filename", 1).text 84 | else: 85 | raise ValueError("%d paths found in %s" % (len(path), xml_file)) 86 | ## The filename must be a number 87 | image_id = get_filename_as_int(filename) 88 | size = get_and_check(root, "size", 1) 89 | width = int(get_and_check(size, "width", 1).text) 90 | height = int(get_and_check(size, "height", 1).text) 91 | image = { 92 | "file_name": filename, 93 | "height": height, 94 | "width": width, 95 | "id": image_id, 96 | } 97 | json_dict["images"].append(image) 98 | ## Currently we do not support segmentation. 99 | # segmented = get_and_check(root, 'segmented', 1).text 100 | # assert segmented == '0' 101 | for obj in get(root, "object"): 102 | category = get_and_check(obj, "name", 1).text 103 | if category not in categories: 104 | new_id = len(categories) 105 | categories[category] = new_id 106 | category_id = categories[category] 107 | bndbox = get_and_check(obj, "bndbox", 1) 108 | xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1 109 | ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1 110 | xmax = int(get_and_check(bndbox, "xmax", 1).text) 111 | ymax = int(get_and_check(bndbox, "ymax", 1).text) 112 | assert xmax > xmin 113 | assert ymax > ymin 114 | o_width = abs(xmax - xmin) 115 | o_height = abs(ymax - ymin) 116 | ann = { 117 | "area": o_width * o_height, 118 | "iscrowd": 0, 119 | "image_id": image_id, 120 | "bbox": [xmin, ymin, o_width, o_height], 121 | "category_id": category_id, 122 | "id": bnd_id, 123 | "ignore": 0, 124 | "segmentation": [], 125 | } 126 | json_dict["annotations"].append(ann) 127 | bnd_id = bnd_id + 1 128 | 129 | for cate, cid in categories.items(): 130 | cat = {"supercategory": "none", "id": cid, "name": cate} 131 | json_dict["categories"].append(cat) 132 | 133 | #os.makedirs(os.path.dirname(json_file), exist_ok=True) 134 | json_fp = open(json_file, "w") 135 | json_str = json.dumps(json_dict) 136 | json_fp.write(json_str) 137 | json_fp.close() 138 | 139 | 140 | if __name__ == "__main__": 141 | import argparse 142 | 143 | parser = argparse.ArgumentParser( 144 | description="Convert Pascal VOC annotation to COCO format." 145 | ) 146 | parser.add_argument("xml_dir", help="Directory path to xml files.", type=str) 147 | parser.add_argument("json_file", help="Output COCO format json file.", type=str) 148 | args = parser.parse_args() 149 | xml_files = glob.glob(os.path.join(args.xml_dir, "*.xml")) 150 | 151 | # If you want to do train/test split, you can pass a subset of xml files to convert function. 152 | print("Number of xml files: {}".format(len(xml_files))) 153 | convert(xml_files, args.json_file) 154 | print("Success: {}".format(args.json_file)) 155 | --------------------------------------------------------------------------------