├── .gitignore ├── LICENSE ├── README.md ├── image ├── Building_Footprint_Extraction │ ├── README.md │ ├── dataset.py │ ├── metrics.py │ ├── model.py │ ├── requirements.txt │ ├── train.py │ ├── training.py │ └── utils.py ├── Defect_Detection │ ├── README.md │ ├── datasets.py │ ├── model.py │ ├── requirements.txt │ ├── train.py │ └── utils.py ├── Road_Obstacle_Detection │ ├── README.md │ ├── dice_loss.py │ ├── eval.py │ ├── lf_loader.py │ ├── train.py │ ├── train_loss.txt │ ├── utils.py │ └── val_loss.txt └── fastlane │ ├── Image_Classification │ └── ImageClassification.ipynb │ ├── OCR │ ├── OCR.ipynb │ ├── charnet │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── defaults.py │ │ └── modeling │ │ │ ├── __init__.py │ │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── decoder.py │ │ │ ├── hourglass.py │ │ │ └── resnet.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── misc.py │ │ │ └── scale.py │ │ │ ├── model.py │ │ │ ├── postprocessing.py │ │ │ ├── rotated_nms.py │ │ │ └── utils.py │ ├── configs │ │ └── icdar2015_hourglass88.yaml │ ├── datasets │ │ └── ICDAR2015 │ │ │ └── test │ │ │ ├── GenericVocabulary.txt │ │ │ └── char_dict.txt │ ├── iou.py │ └── sample.jpg │ ├── Object_Detection │ ├── ObjectDetection.ipynb │ ├── dataset.py │ ├── models.py │ ├── skynews-boeing-737-plane_5435020.jpg │ └── utils.py │ └── README.md └── video ├── Galbladder_Segmentation ├── Detectron2StepByStep.ipynb ├── DetectronGBScript.py ├── GallbladderFiles │ ├── NOGO 1-16424 via_project_20May2021_17h2m.json │ ├── NOGO 1_16504 via_project_22May2021_9h9m.json │ ├── NOGO1_319 via_project_14May2021_13h54m.json │ ├── d018a7fb_25Apr2021_13h18m36s nogo.json │ ├── nogo 1-450 via_project_18May2021_19h36m.json │ ├── nogo1_16859 via_project_22May2021_17h54m (66).json │ ├── nogo1_16859 via_project_22May2021_17h54m.json │ ├── nogo270via_project_11May2021_18h54m.json │ ├── nogo310via_project_13May2021_16h20m.json │ ├── via_project_13May2021_23h12m_nogo.json │ ├── via_project_26Apr2021_20h18m nogo231.json │ ├── via_project_2nogo.json │ ├── via_project_3May2021_17h41m_nogo (1).json │ ├── via_project_3May2021_17h41m_nogo.json │ ├── via_project_vid04_nogo.json │ ├── via_project_vid6_nogo_100.json │ ├── via_project_vid8_nogo_100.json │ ├── via_project_video_02_nogo_1-100.json │ ├── video20_03260.nogo.json │ ├── video_18_00979.nogo.json │ └── video_24_09836_nogo.json ├── README.md ├── _launch.sh ├── _runner.sh ├── augCoords.json ├── grandproj.env ├── jsonOutput │ ├── bladder_val_coco_format.json │ ├── bladder_val_coco_format.json.lock │ ├── coco_instances_results.json │ └── instances_predictions.pth ├── parameters.txt └── runt4v1Detectron.slrm └── Traffic_Incident_Detection ├── .gitignore ├── .idea ├── .gitignore ├── deployment.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── vcs.xml ├── webServers.xml └── yowo.iml ├── README.md ├── backbones_2d ├── DeepLabV3PlusPytorch │ ├── LICENSE │ ├── README.md │ ├── datasets │ │ ├── __init__.py │ │ ├── cityscapes.py │ │ ├── utils.py │ │ └── voc.py │ ├── main.py │ ├── metrics │ │ ├── __init__.py │ │ └── stream_metrics.py │ ├── network │ │ ├── __init__.py │ │ ├── _deeplab.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── mobilenetv2.py │ │ │ └── resnet.py │ │ ├── modeling.py │ │ └── utils.py │ ├── predict.py │ ├── resnet_2d.ipynb │ └── utils │ │ ├── __init__.py │ │ ├── ext_transforms.py │ │ ├── loss.py │ │ ├── scheduler.py │ │ ├── utils.py │ │ └── visualizer.py └── darknet.py ├── backbones_3d ├── mobilenet.py ├── mobilenetv2.py ├── resnet.py ├── resnext.py ├── shufflenet.py └── shufflenetv2.py ├── cfg ├── ava.yaml ├── ava_categories_count.json ├── ava_categories_ratio.json ├── custom_config.py ├── defaults.py ├── dota_config.yaml ├── dota_train.yaml ├── jhmdb.yaml ├── parser.py ├── ucf24.yaml ├── ucf24_charmed-leaf-23_copy.yaml ├── ucf24_finalAnnots.mat ├── yolo.cfg └── yolo_cfg.py ├── core ├── FocalLoss.py ├── cfam.py ├── detection_visualization.py ├── detection_visualization_obj_anom.py ├── eval_results.py ├── model.py ├── optimization.py ├── plot_ava_result.py ├── region_loss.py └── utils.py ├── dataset_factory ├── ava_dataset.py ├── ava_eval_helper.py ├── ava_evaluation │ ├── README.md │ ├── __init__.py │ ├── label_map_util.py │ ├── metrics.py │ ├── np_box_list.py │ ├── np_box_list_ops.py │ ├── np_box_mask_list.py │ ├── np_box_mask_list_ops.py │ ├── np_box_ops.py │ ├── np_mask_ops.py │ ├── object_detection_evaluation.py │ ├── per_image_evaluation.py │ └── standard_fields.py ├── ava_helper.py ├── clip.py ├── cv2_transform.py ├── dataset_utils.py ├── dota.py ├── generate_anchors.py ├── image.py ├── list_dataset.py ├── logging.py ├── meters.py └── transform.py ├── dota_anchors.py ├── dota_dl.ipynb ├── main.py ├── main_dota.py ├── test_video_ava.py └── video_mAP.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | Image_Segmentation/Defect_Detection/ckpt 3 | *__pycache__ 4 | Image_Segmentation/Defect_Detection/samples 5 | Image_Segmentation/Building_Footprint_Extraction/samples 6 | Image_Segmentation/Building_Footprint_Extraction/ckpt 7 | Image_Segmentation/Road_Obstacle_Detection/ckpt 8 | Image_Segmentation/Road_Obstacle_Detection/samples 9 | *.ipynb_checkpoints 10 | Image_Segmentation/Road_Obstacle_Detection/test.ipynb 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vector Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer Vision Project 2 | 3 | This project involved facilitating knowledge transfer between Vector and its industry sponsors. Specifically, the objectives were following: 4 | 5 | 1. Learn about recent advances in deep learning for computer vision 6 | 2. Apply methods to novel use cases in industry 7 | 8 | Several use cases involving both images and videos are explored. These use-cases reflected current industry needs, participants’ interests and expertise, and opportunities to translate academic advances into real-world applications: 9 | 10 | **Image Use Cases** 11 | 1. Unsupervised defect detection in manufacturing using autoencoders 12 | 2. Building footprint extraction using semantic segmentation 13 | 3. Road Obstactle Detection using semantic segmentation 14 | 15 | **Video Use Cases** 16 | 1. Semantic segmentation of videos from cholecystectomy procedures (gallbladder surgery) 17 | 2. Traffic incident detection of videos using augment 18 | 19 | ## Additional Tooling 20 | In addition, the AI Engineering team has created a separate repository that works as a tool-kit the Computer Vision project at Vector Institute. It includes various datasets readily loadable from the shared cluster as well as useful image/video tools such as data augmentation and visualization utilities.You can find the repository at https://github.com/VectorInstitute/vector_cv_tools 21 | 22 | ## Usage 23 | Each folder corresponding to a use case includes instructions to run the experiments. It should be noted that this repository is no longer maintained and solely serves as an artifact of the project. 24 | 25 | ## Citations 26 | Please ensure you cite [Computer Vision: Applications in Manufacturing, Surgery, Traffic, Satellites, and Unlabelled Data Recognition Technical Report](https://vectorinstitute.ai/wp-content/uploads/2022/05/computer_vision_project_report_may252022.pdf) whenever you are citing this GitHub repository 27 | 28 | ## Acknowledgements 29 | Many thanks to our sponsor companies, researchers and Vector Institute staff for making this collaboration possible and providing academic support and computing infrastructure during all phases of this work. We would specifically like to thank the following individuals for their contributions. 30 | 31 | * Elham Ahmadi 32 | * Andrew Alberts-Scherer 33 | * Raghav Goyal 34 | * John Jewell 35 | * Shuja Khalid 36 | * Matthew Kowal 37 | * Andriy Levitskyy 38 | * Jinbiao Ning 39 | * Tristan Trim 40 | * Kuldeep Panjwani 41 | * Saeed Pouryazdian 42 | * Sim Sachar 43 | * Yilei Wu 44 | * An Zhou 45 | -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/README.md: -------------------------------------------------------------------------------- 1 | # Building Footprint Extraction 2 | 3 | ## Overview 4 | 5 | As high resolution satellite imagery becomes increasingly available in both the public and private domain, a number of beneficial applications that leverage this data are enabled. Extraction of building footprints in satellite imagery is a core component of many downstream applications of satellite imagery such as humanitarian assistance and disaster response. This paper offers a comparative study of methods for building footprint extraction in satellite imagery. The focus is to explore state-of-the-art semantic segmentation models in computer vision using the SpaceNet 2 Building Detection Dataset. Four high-level approaches, and six total variants, are trained and evaluated including U-Net, UNet++, Fully Convolutional Networks (FCN) and DeepLabv3. The Intersection over Union (IoU) is used to quantify the segmentation performance on a held out test set. In our experiments, we found that Deeplabv3 with a Resnet-101 backbone is the most accurate approach to building footprint extraction out of the surveyed methods. In general, models that leverage pretraining achieve high accuracy and require minimal training. Conversely, models that do not leverage pretraining are inaccurate and require longer training regimes. 6 | 7 | ## Dataset 8 | In order to benchmark the aforementioned approaches on building footprint extraction in satellite images, the [SpaceNet Building Detection V2 dataset](https://spacenet.ai/spacenet-buildings-dataset-v2/) is used. This dataset contains high resolution satellite imagery and corresponding labels that specify the location of building footprints. The dataset includes 302,701 Building Labels from across 10,593 multi-spectral satellite images of Vegas, Paris, Shanghai and Khartoum. The labels are binary and indicate whether each pixel is building or background. 9 | 10 |

11 | Screen Shot 2021-09-28 at 5 41 20 PM 12 |
13 |

14 | Figure 1: An example of images (left) and labels (right) in the Spacenet Building 15 | Detection V2. 16 |
17 |

18 | 19 | ## Experimental Setup 20 | 21 | The dataset is divided into training (80%), validating (10%) and testing (10%) sets. Images are resized from 650x650 to 384x384 using bi-cubic interpolation and normalized using the mean and standard deviation of the Imagenet dataset. 22 | The proposed semantic segmentation models are trained on the training set, while the validating set is used to determine a stopping criteria. Lastly, the trained model is evaluated on the testing set. Intersection over Union (IoU) is the metric used to evaluate the model performance and measures the overlap between the labels of the prediction and ground truth. IoU ranges from 0 to 1 where 1 denotes perfect and complete overlap. 23 | 24 | ## Results 25 | 26 |

27 | Screen Shot 2021-09-28 at 5 41 20 PM 28 |
29 |

30 | Figure 2: IOU score on test set for each approach. 31 |
32 |

33 | 34 |

35 | Screen Shot 2021-09-28 at 5 41 20 PM 36 |
37 |

38 | Figure 3: A visualization of the predictions generated by each approach along with the input image (far left) and ground truth label (far right). 39 |
40 |

41 | 42 |

43 | Screen Shot 2021-09-28 at 5 41 20 PM 44 |
45 |

46 | Figure 4: Binary cross entropy loss for training set (top) and validation set 47 | (bottom) across epochs. 48 |
49 |

50 | 51 | ## Running Code 52 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 53 | 54 | ``` 55 | conda create -n new_env 56 | conda activate new_env 57 | pip install -r requirements.txt 58 | ``` 59 | 60 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 61 | ``` 62 | python train.py --model fcn50 --epochs 10 --batch_size 4 --data_path /path/to/spacenet 63 | ``` 64 | 65 | The **train.py** script has the following arguments: 66 | - **model**: (str): Architecture variation for experiments. *required* 67 | - **data_path** (str): The root directory of the dataset. *required* 68 | - **epochs** (int): The number of epochs to train the model. Default 25 69 | - **batch_size** (int) The batch size for training, validation and testing. Default 8 70 | - **learning_rate** (float): Learning rates of model. Default .0001 71 | - **size** (int): Side length of input image. Default 384 72 | - **train_perc** (float): The proportion of samples used for train. Default .8 73 | - **val_perc** (float): The proportion of samples used for validation. Default .1 74 | -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/dataset.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | 5 | import numpy as np 6 | import torch 7 | from PIL import Image 8 | from torch.utils.data.dataset import Dataset 9 | 10 | class SpaceNet_Dataset(Dataset): 11 | def __init__(self, img_dir_list, mask_dir_list, img_transform = None, mask_transform=None): 12 | self.img_dir_list = img_dir_list 13 | self.mask_dir_list = mask_dir_list 14 | 15 | img_paths, mask_paths = [], [] 16 | 17 | for img_dir, mask_dir in zip(img_dir_list, mask_dir_list): 18 | img_paths += [f"{img_dir}/{img_file}" for img_file in os.listdir(img_dir)] 19 | mask_paths += [f"{mask_dir}/{mask_file}" for mask_file in os.listdir(mask_dir)] 20 | 21 | self.img_paths = sorted(img_paths) 22 | self.mask_paths = [f"{new_mask_path}_mask.png" for new_mask_path in sorted([mask_path[:-9] for mask_path in mask_paths])] 23 | 24 | self.img_transform = img_transform 25 | self.mask_transform = mask_transform 26 | 27 | 28 | 29 | def __len__(self): 30 | return len(self.img_paths) 31 | 32 | def __getitem__(self, index): 33 | img_path = self.img_paths[index] 34 | mask_path = self.mask_paths[index] 35 | 36 | img = Image.open(img_path) 37 | img = self.img_transform(img) 38 | 39 | mask = Image.open(mask_path).convert("1") 40 | mask = self.mask_transform(mask) 41 | mask = torch.from_numpy(np.array(mask).astype(int)).unsqueeze(0) 42 | 43 | return img, mask -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | class DiceCoeff(Function): 6 | """Dice coeff for individual examples""" 7 | 8 | def forward(self, input, target): 9 | self.save_for_backward(input, target) 10 | eps = 0.0001 11 | self.inter = torch.dot(input.view(-1), target.view(-1)) 12 | self.union = torch.sum(input) + torch.sum(target) + eps 13 | 14 | t = (2 * self.inter.float() + eps) / self.union.float() 15 | return t 16 | 17 | # This function has only a single output, so it gets only one gradient 18 | def backward(self, grad_output): 19 | 20 | input, target = self.saved_variables 21 | grad_input = grad_target = None 22 | 23 | if self.needs_input_grad[0]: 24 | grad_input = grad_output * 2 * (target * self.union - self.inter) \ 25 | / (self.union * self.union) 26 | if self.needs_input_grad[1]: 27 | grad_target = None 28 | 29 | return grad_input, grad_target 30 | 31 | 32 | def dice_coeff(input, target): 33 | """Dice coeff for batches""" 34 | if input.is_cuda: 35 | s = torch.FloatTensor(1).cuda().zero_() 36 | else: 37 | s = torch.FloatTensor(1).zero_() 38 | 39 | for i, c in enumerate(zip(input, target)): 40 | s = s + DiceCoeff().forward(c[0], c[1]) 41 | 42 | return s / (i + 1) 43 | 44 | 45 | 46 | class IoU(Function): 47 | """IoU for individual examples""" 48 | def forward(self, input, target): 49 | eps = 0.0001 50 | self.inter = torch.dot(input.view(-1), target.view(-1)) 51 | self.union = torch.sum(input) + torch.sum(target) + eps-self.inter 52 | 53 | t = (self.inter.float()+eps) / self.union.float() 54 | return t 55 | 56 | def iou(input, target): 57 | """IoU for batches""" 58 | if input.is_cuda: 59 | s = torch.FloatTensor(1).cuda().zero_() 60 | else: 61 | s = torch.FloatTensor(1).zero_() 62 | 63 | for i, c in enumerate(zip(input, target)): 64 | s = s + IoU().forward(c[0], c[1]) 65 | 66 | return s / (i + 1) -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.11.0 2 | alabaster==0.7.12 3 | anaconda-client==1.7.2 4 | anaconda-navigator==1.7.0 5 | anaconda-project==0.8.3 6 | appdirs==1.4.3 7 | asn1crypto==0.24.0 8 | astor==0.8.1 9 | astroid==2.2.5 10 | astropy==3.2.1 11 | astunparse==1.6.3 12 | atomicwrites==1.3.0 13 | attrs==19.1.0 14 | Automat==0.7.0 15 | Babel==2.7.0 16 | backcall==0.1.0 17 | backports.os==0.1.1 18 | backports.shutil-get-terminal-size==1.0.0 19 | beautifulsoup4==4.7.1 20 | bitarray==0.9.3 21 | bkcharts==0.2 22 | blaze==0.11.3 23 | bleach==3.1.0 24 | bokeh==1.2.0 25 | boto==2.49.0 26 | Bottleneck==1.2.1 27 | cachetools==4.2.1 28 | certifi==2019.6.16 29 | cffi==1.12.3 30 | chardet==3.0.4 31 | Click==7.0 32 | cloudpickle==1.6.0 33 | clyent==1.2.2 34 | colorama==0.4.1 35 | conda==4.7.12 36 | conda-build==3.17.6 37 | conda-package-handling==1.6.0 38 | conda-verify==3.1.1 39 | constantly==15.1.0 40 | contextlib2==0.5.5 41 | convertdate==2.3.2 42 | cryptography==2.7 43 | cycler==0.10.0 44 | Cython==0.29.12 45 | cytoolz==0.10.0 46 | dask==2.1.0 47 | dataclasses==0.8 48 | datashape==0.5.4 49 | decorator==4.4.0 50 | defusedxml==0.6.0 51 | distributed==2.1.0 52 | dm-tree==0.1.5 53 | docutils==0.14 54 | entrypoints==0.3 55 | et-xmlfile==1.0.1 56 | fastcache==1.1.0 57 | filelock==3.0.12 58 | Flask==1.1.1 59 | Flask-Cors==3.0.7 60 | flatbuffers==1.12 61 | fredapi==0.4.3 62 | future==0.17.1 63 | gast==0.3.3 64 | gevent==1.4.0 65 | glob2==0.7 66 | gluonts==0.8.1 67 | gmpy2==2.0.8 68 | google-auth==1.27.0 69 | google-auth-oauthlib==0.4.2 70 | google-pasta==0.2.0 71 | googledrivedownloader==0.4 72 | graphviz==0.8.4 73 | greenlet==0.4.15 74 | grpcio==1.32.0 75 | h5py==2.10.0 76 | heapdict==1.0.0 77 | hijri-converter==2.2.2 78 | holidays==0.11.3.1 79 | html5lib==1.0.1 80 | hyperlink==18.0.0 81 | idna==2.8 82 | imageio==2.5.0 83 | imagesize==1.1.0 84 | importlib-metadata==0.17 85 | incremental==17.5.0 86 | ipykernel==5.1.1 87 | ipython==7.6.1 88 | ipython_genutils==0.2.0 89 | ipywidgets==7.5.0 90 | isodate==0.6.0 91 | isort==4.3.21 92 | itsdangerous==1.1.0 93 | jdcal==1.4.1 94 | jedi==0.13.3 95 | jeepney==0.4 96 | Jinja2==2.10.1 97 | joblib==0.13.2 98 | json5==0.8.4 99 | jsonschema==3.0.1 100 | jupyter==1.0.0 101 | jupyter-client==5.3.1 102 | jupyter-console==6.0.0 103 | jupyter-core==4.5.0 104 | jupyterlab==1.0.2 105 | jupyterlab-launcher==0.13.1 106 | jupyterlab-server==1.0.0 107 | Keras-Applications==1.0.8 108 | Keras-Preprocessing==1.1.2 109 | keyring==18.0.0 110 | kiwisolver==1.1.0 111 | korean-lunar-calendar==0.2.1 112 | lazy-object-proxy==1.4.1 113 | libarchive-c==2.8 114 | lief==0.9.0 115 | lightgbm==3.3.2 116 | llvmlite==0.29.0 117 | locket==0.2.0 118 | lxml==4.3.4 119 | Markdown==3.3.3 120 | MarkupSafe==1.1.1 121 | matplotlib==3.1.0 122 | mccabe==0.6.1 123 | mistune==0.8.4 124 | mkl-fft==1.0.12 125 | mkl-random==1.0.2 126 | mkl-service==2.0.2 127 | mock==3.0.5 128 | more-itertools==7.0.0 129 | mpmath==1.1.0 130 | msgpack==0.6.1 131 | multipledispatch==0.6.0 132 | mxnet-cu112==1.8.0.post0 133 | navigator-updater==0.1.0 134 | nbconvert==5.5.0 135 | nbformat==4.4.0 136 | networkx==2.3 137 | nltk==3.4.4 138 | nose==1.3.7 139 | notebook==6.0.0 140 | numba==0.45.0 141 | numexpr==2.6.9 142 | numpy==1.19.5 143 | numpydoc==0.9.1 144 | oauthlib==3.1.0 145 | odo==0.5.1 146 | olefile==0.46 147 | openpyxl==2.6.2 148 | opt-einsum==3.3.0 149 | packaging==19.0 150 | pandas==1.1.5 151 | pandocfilters==1.4.2 152 | parso==0.5.0 153 | partd==1.0.0 154 | path.py==12.0.1 155 | pathlib2==2.3.4 156 | patsy==0.5.1 157 | pep8==1.7.1 158 | pexpect==4.7.0 159 | pickleshare==0.7.5 160 | Pillow==8.2.0 161 | pkginfo==1.5.0.1 162 | plotly==5.6.0 163 | plotly-express==0.4.1 164 | pluggy==0.12.0 165 | ply==3.11 166 | prometheus-client==0.7.1 167 | promise==2.3 168 | prompt-toolkit==2.0.9 169 | protobuf==3.15.1 170 | psutil==5.6.3 171 | ptyprocess==0.6.0 172 | py==1.8.0 173 | pyasn1==0.4.4 174 | pyasn1-modules==0.2.2 175 | pycodestyle==2.5.0 176 | pycosat==0.6.3 177 | pycparser==2.19 178 | pycrypto==2.6.1 179 | pycurl==7.43.0.3 180 | pydantic==1.8.2 181 | pyflakes==2.1.1 182 | Pygments==2.4.2 183 | pylint==2.3.1 184 | PyMeeus==0.5.11 185 | pyodbc==4.0.26 186 | pyOpenSSL==19.0.0 187 | pyparsing==2.4.0 188 | pyrsistent==0.14.11 189 | PySocks==1.7.0 190 | pytest==5.0.1 191 | pytest-arraydiff==0.3 192 | pytest-astropy==0.5.0 193 | pytest-doctestplus==0.3.0 194 | pytest-openfiles==0.3.2 195 | pytest-remotedata==0.3.1 196 | python-dateutil==2.8.0 197 | pytz==2019.1 198 | PyWavelets==1.0.3 199 | PyYAML==5.1.1 200 | pyzmq==18.0.0 201 | QtAwesome==0.5.7 202 | qtconsole==4.5.1 203 | QtPy==1.8.0 204 | rdflib==5.0.0 205 | requests==2.22.0 206 | requests-oauthlib==1.3.0 207 | rope==0.14.0 208 | rsa==4.7.1 209 | ruamel_yaml==0.15.46 210 | scikit-image==0.15.0 211 | scikit-learn==0.21.2 212 | scipy==1.4.1 213 | seaborn==0.9.0 214 | SecretStorage==3.1.1 215 | Send2Trash==1.5.0 216 | service-identity==17.0.0 217 | simplegeneric==0.8.1 218 | singledispatch==3.4.0.3 219 | six==1.15.0 220 | snowballstemmer==1.9.0 221 | sortedcollections==1.1.2 222 | sortedcontainers==2.1.0 223 | soupsieve==1.8 224 | Sphinx==2.1.2 225 | sphinxcontrib-applehelp==1.0.1 226 | sphinxcontrib-devhelp==1.0.1 227 | sphinxcontrib-htmlhelp==1.0.2 228 | sphinxcontrib-jsmath==1.0.1 229 | sphinxcontrib-qthelp==1.0.2 230 | sphinxcontrib-serializinghtml==1.1.3 231 | sphinxcontrib-websupport==1.1.2 232 | spyder==3.3.6 233 | spyder-kernels==0.5.1 234 | SQLAlchemy==1.3.5 235 | statsmodels==0.10.0 236 | sympy==1.4 237 | tables==3.5.2 238 | tblib==1.4.0 239 | tenacity==8.0.1 240 | tensorboard==2.4.1 241 | tensorboard-plugin-wit==1.8.0 242 | tensorflow==2.4.1 243 | tensorflow-estimator==2.4.0 244 | tensorflow-gpu==2.4.1 245 | tensorflow-probability==0.12.1 246 | termcolor==1.1.0 247 | terminado==0.8.2 248 | testpath==0.4.2 249 | toolz==0.10.0 250 | torch-scatter==2.0.7 251 | tornado==6.0.3 252 | tqdm==4.32.1 253 | traitlets==4.3.2 254 | Twisted==18.7.0 255 | typed-ast==1.3.4 256 | typing==3.6.2 257 | typing-extensions==3.10.0.2 258 | unicodecsv==0.14.1 259 | urllib3==1.24.2 260 | wcwidth==0.1.7 261 | webencodings==0.5.1 262 | Werkzeug==0.15.4 263 | widgetsnbextension==3.5.0 264 | wrapt==1.12.1 265 | wurlitzer==1.0.2 266 | xlrd==1.2.0 267 | XlsxWriter==1.1.8 268 | xlwt==1.3.0 269 | yacs==0.1.8 270 | zict==1.0.0 271 | zipp==0.5.1 272 | zope.interface==4.5.0 273 | -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/training.py: -------------------------------------------------------------------------------- 1 | import torch, numpy as np 2 | from utils import save_viz 3 | from metrics import iou 4 | 5 | def get_label_dist(loader): 6 | count_list = [] 7 | for _, (_, lbl) in enumerate(loader): 8 | cnt = torch.bincount(lbl.int().flatten()) 9 | count_list.append(cnt) 10 | 11 | cnts = torch.stack(count_list, dim=0).sum(dim=0).tolist() 12 | zero_count, one_count = cnts[0], cnts[1] 13 | perc = zero_count / (zero_count + one_count) 14 | return perc 15 | 16 | 17 | def train_fn(loader, model, opt, loss_fn, device): 18 | loss_list = [] 19 | for batch_id, (data, targets) in enumerate(loader): 20 | data = data.to(device=device) 21 | targets = targets.float().to(device) 22 | predictions = model(data)['out'] 23 | loss = loss_fn(predictions, targets) 24 | opt.zero_grad() 25 | loss.backward() 26 | opt.step() 27 | loss_list.append(loss.item()) 28 | 29 | mean_loss = np.mean(loss_list) 30 | return mean_loss 31 | 32 | 33 | def val_fn(loader, model, loss_fn, device, color_map, sample_path, epoch, perc, viz): 34 | loss_list = [] 35 | for batch_id, (data, targets) in enumerate(loader): 36 | data = data.to(device=device) 37 | targets = targets.float().to(device=device) 38 | with torch.no_grad(): 39 | predictions = model(data)['out'] 40 | loss = loss_fn(predictions, targets) 41 | loss_list.append(loss.item()) 42 | if viz: 43 | save_viz(data, predictions, targets, color_map, epoch, sample_path, perc) 44 | viz = False 45 | 46 | mean_loss = np.mean(loss_list) 47 | return mean_loss 48 | 49 | 50 | def test_fn(loader, model, loss_fn, device, perc): 51 | target_list, pred_list = [], [] 52 | for batch_id, (data, targets) in enumerate(loader): 53 | data = data.to(device=device) 54 | targets = targets.float().to(device=device) 55 | with torch.no_grad(): 56 | pred = model(data)['out'] 57 | pred_list.append(pred) 58 | target_list.append(targets) 59 | 60 | pred = torch.cat(pred_list, dim=0) 61 | target = torch.cat(target_list, dim=0) 62 | thresh = np.quantile(pred.flatten().cpu().numpy(), perc) 63 | test_loss = loss_fn(pred, target).item() 64 | pred = (pred > thresh).float() 65 | test_iou = iou(pred, target).item() 66 | return ( 67 | test_loss, test_iou) -------------------------------------------------------------------------------- /image/Building_Footprint_Extraction/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, deeplabv3_resnet50, deeplabv3_resnet101 4 | from torchvision.models.segmentation.deeplabv3 import DeepLabHead 5 | from torchvision.models.segmentation.fcn import FCNHead 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | from model import UNET, UNETPlus 11 | 12 | def get_model(model_type, pretrained): 13 | model = None 14 | if model_type == "fcn50": 15 | model = get_model_fcn50(pretrained) 16 | 17 | elif model_type == "fcn101": 18 | model = get_model_fcn101(pretrained) 19 | 20 | elif model_type == "dlv350": 21 | model = get_model_dlv350(pretrained) 22 | 23 | elif model_type == "dlv3101": 24 | model = get_model_dlv3101(pretrained) 25 | 26 | elif model_type == "unet": 27 | model = UNET(in_channels=3, out_channels=1) 28 | 29 | elif model_type == "unetplus": 30 | model = UNETPlus(n_channels=3, n_classes=1) 31 | 32 | return model 33 | 34 | def get_model_fcn50(pretrained=True, c_out=1): 35 | # Prepare Model and Save to Checkpoint Directory 36 | model = fcn_resnet50(pretrained=pretrained) 37 | 38 | model.classifier = FCNHead(2048, c_out) 39 | model.aux_classifier = None 40 | model = nn.DataParallel(model) 41 | return model 42 | 43 | def get_model_fcn101(pretrained=True, c_out=1): 44 | # Prepare Model and Save to Checkpoint Directory 45 | model = fcn_resnet101(pretrained=pretrained) 46 | model.classifier = FCNHead(2048, c_out) 47 | model.aux_classifier = None 48 | model = nn.DataParallel(model) 49 | return model 50 | 51 | def get_model_dlv350(pretrained=True, c_out=1): 52 | # Prepare Model and Save to Checkpoint Directory 53 | model = deeplabv3_resnet50(pretrained=pretrained) 54 | model.classifier = DeepLabHead(2048, c_out) 55 | model.aux_classifier = None 56 | model = nn.DataParallel(model) 57 | 58 | return model 59 | 60 | def get_model_dlv3101(pretrained=True, c_out=1): 61 | # Prepare Model and Save to Checkpoint Directory 62 | model = deeplabv3_resnet101(pretrained=pretrained) 63 | model.classifier = DeepLabHead(2048, c_out) 64 | model.aux_classifier = None 65 | model = nn.DataParallel(model) 66 | return model 67 | 68 | def save_checkpoint(model, opt, epoch, path, train_loss_list=[], val_loss_list=[]): 69 | """Save Checkpoint""" 70 | 71 | torch.save({ 72 | "model": model.state_dict(), 73 | "opt": opt.state_dict(), 74 | "epoch": epoch, 75 | "train_loss_list": train_loss_list, 76 | "val_loss_list": val_loss_list 77 | }, 78 | path) 79 | 80 | 81 | def save_viz(img, out, lbl, color_map, epoch, sample_path, perc): 82 | img = img.cpu().numpy() 83 | out = out.cpu().numpy() 84 | lbl = lbl.cpu().numpy() 85 | 86 | thresh = np.quantile(out, perc) 87 | 88 | print("thresh", thresh) 89 | 90 | img = (img - np.min(img)) / (np.max(img) - np.min(img)) 91 | rows = out.shape[2] 92 | cols = out.shape[3] 93 | 94 | masks = [] 95 | masks_gt = [] 96 | for index, (im, o, l) in enumerate(zip(img, out, lbl)): 97 | o, l = o.squeeze(), l.squeeze() 98 | 99 | o = (o > thresh).astype(int) 100 | 101 | 102 | mask = np.zeros((rows, cols, 3), dtype=np.uint8) 103 | mask_gt = np.zeros((rows, cols, 3), dtype=np.uint8) 104 | 105 | for j in range(rows): 106 | for i in range(cols): 107 | mask[j, i] = color_map[o[j, i]] 108 | mask_gt[j, i] = color_map[l[j, i]] 109 | 110 | 111 | 112 | f, axarr = plt.subplots(1, 3, figsize=(20, 20)) 113 | im = np.moveaxis(im, 0, -1) 114 | axarr[0].imshow(im) 115 | axarr[0].title.set_text('Image') 116 | axarr[1].imshow(mask_gt) 117 | axarr[1].title.set_text('Label') 118 | axarr[2].imshow(mask) 119 | axarr[2].title.set_text('Prediction') 120 | f.savefig( f"{sample_path}/epoch_{str(epoch)}_{str(index)}.jpg") -------------------------------------------------------------------------------- /image/Defect_Detection/README.md: -------------------------------------------------------------------------------- 1 | # Defect Detection 2 | 3 | ## Overview 4 | 5 | Anomaly detection is an important task in computer vision that is concerned with identifying anomalous images given a training set of only normal images. In anomaly segmentation, the concept of anomaly detection is extended to the pixel level in order to identify anomalous regions of images. There are many applications to anomaly detection including biomedical image segmentation, video surveillance and defect detection. In particular, defect detection involves detecting abnormalities in manufacturing components and so is widely used in the industry to enhance quality assurance and efficiency in the production process \cite{bergmann2019mvtec}. However, having a person manually inspect each component is not feasible in most cases. To address this, systems have been proposed to automate the detection of defective components. These approaches generally take as input an image of a component and output a label or pixel-level mask that predicts whether the image or pixel is anomalous. Although initial approaches were generally ineffective, newer, deep learning based approaches have shown very strong performance in anomaly detection and segmentation. Thus, these new methods have the potential to dramatically increase quality assurance and efficiency. In order to compare anomaly detection methods, several datasets have been proposed as benchmarks such as MNIST, CIFAR, and UCSD, whereas there are much fewer benchmark datasets for the anomaly segmentation task. To address this, the MVTec Anomaly Detection Dataset was recently introduced as a benchmark for anomaly segmentation. 6 | 7 | MVTec is focused on industrial inspection; consisting of a training set of normal images of objects and textures as well as a test set with both normal and anomalous samples along with their corresponding labels. There are over 70 different types of defects across the anomalous images that are typical in the manufacturing process. The quality and practical nature of the MVTec dataset has made it a popular benchmark for recently proposed anomaly segmentation methods. The goal of this focus phase of the project is to apply state-of-the-art methods to accurately segment anomalies in the MVTec dataset. In doing so, we compared the performance of different anomaly segmentation methods in the industrial inspection setting. Additionally, we sought to optimize the performance of the methods by altering the hyperparameters and architectures of the approaches. 8 | 9 | ## Dataset 10 | The MVTec anomaly detection dataset contains 5354 high-resolution images from 15 different object categories and includes 70 different types of defects across the anomalous images that are typical in the manufacturing process. For each object category, a training set of normal images of objects and textures as well as a test set with both normal and anomalous samples along with their corresponding labels. 11 | 12 |

13 | mvtec dataset 14 |
15 |

16 | Figure 1: An example of inlier images (left) and labels (right) for multiple object categories in the MVTec dataset. 17 |
18 |

19 | 20 | ## Experimental Setup 21 | The MVTEC dataset object categories each include a train set of normal samples and a test set of both normal and anomalous samples. Models were optimized to be able to reconstruct samples from the inlier distribution during the training phase. Subsequently, at test time, both normal and anomalous images are input to the model and the pixelwise reconstruction error of samples is used to identify anomalous regions. Specifically, the models were evaluated on the testing data for each of the object categories and the average area under the ROC curve (AUC) is reported. A small validation set of normal images is used to determine which model step yields the most optimal set of parameters. Specifically, 10\% of images were randomly removed from the train set and used as the validation set. For testing, the entire test set was used and the average AUC across object categories is reported for each method. 22 | 23 | ## Results 24 | 25 |

26 | vtec results 27 |
28 |

29 | Figure 2: Avergae AUC score on test set for each approach. 30 |
31 |

32 | 33 |

34 | mvtec visual result 35 |
36 |

37 | Figure 3: A visualization of the predictions generated by the network for an anomolous sample. 38 |
39 |

40 | 41 | ## Running Code 42 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 43 | 44 | ``` 45 | conda create -n new_env 46 | conda activate new_env 47 | pip install -r requirements.txt 48 | ``` 49 | 50 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 51 | ``` 52 | python train.py --model vae --epochs 10 --ckpt_path /path/to/checkpoint/folder --dataset_path /path/to/mvtec 53 | ``` 54 | 55 | The **train.py** script has the following arguments: 56 | - **model**: (str): Architecture variation for experiments. ae or vae. *required* 57 | - **data_path** (str): The root directory of the dataset. *required* 58 | - **ckpt_path** (str): The directory to save model checkpoints. *required* 59 | - **epochs** (int): The number of epochs to train the model. Default 100 60 | - **batch_size** (int) The batch size for trainingtesting. Default 32 61 | - **learning_rate** (float): Learning rates of model. Default .001 62 | - **size** (int): Side length of input image Default 128 63 | 64 | -------------------------------------------------------------------------------- /image/Defect_Detection/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import glob 4 | from PIL import Image 5 | import numpy as np 6 | 7 | from torch.utils.data import Dataset 8 | 9 | class MVTecADDataset(Dataset): 10 | def __init__(self, img_dir, mode, transform, size=128): 11 | self.img_dir = img_dir 12 | self.mode = mode 13 | self.size = size 14 | 15 | if self.mode == "train": 16 | self.img_paths = glob.glob(f"{self.img_dir}/train/good/*.png") 17 | else: 18 | 19 | paths = glob.glob(f"{self.img_dir}/test/*/*.png") 20 | 21 | inlier_img_paths = glob.glob(f"{self.img_dir}/test/good/*.png") 22 | outlier_img_paths = list(set(paths) - set(inlier_img_paths)) 23 | self.img_paths = inlier_img_paths + outlier_img_paths 24 | self.outlier_lbl_paths = [f"{self.img_dir}/ground_truth/{path.split('/')[-2]}/{path.split('/')[-1][:-4]}_mask.png" for path in outlier_img_paths] 25 | 26 | self.outlier_lbl = np.array([np.array(Image.open(path).convert('1').resize((self.size, self.size))) for path in self.outlier_lbl_paths]) 27 | 28 | 29 | self.inlier_lbl = np.zeros(shape=(len(inlier_img_paths), self.outlier_lbl.shape[1], self.outlier_lbl.shape[2])) 30 | 31 | self.labels = torch.from_numpy(np.concatenate([self.inlier_lbl, self.outlier_lbl])).int() 32 | 33 | 34 | self.transform = transform 35 | 36 | def __getitem__(self, index): 37 | if self.mode == "test": 38 | x = Image.open(self.img_paths[index]).convert("RGB") 39 | if self.transform is not None: 40 | x = self.transform(x) 41 | 42 | 43 | y = self.labels[index] 44 | return x, y 45 | else: 46 | x = Image.open(self.img_paths[index]).convert('RGB') 47 | if self.transform is not None: 48 | x = self.transform(x) 49 | return x 50 | 51 | def __len__(self): 52 | return len(self.img_paths) -------------------------------------------------------------------------------- /image/Defect_Detection/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | class Decoder(nn.Module): 7 | """ 8 | The model architecture is taken from https://github.com/pytorch/examples/issues/70 9 | """ 10 | 11 | def __init__(self, in_channels, dec_channels, hidden_dim): 12 | self.in_channels = in_channels 13 | self.dec_channels = dec_channels 14 | self.hidden_dim = hidden_dim 15 | 16 | super().__init__() 17 | self.main = nn.Sequential( 18 | # input is Z, going into a convolution 19 | nn.ConvTranspose2d(self.hidden_dim, self.dec_channels * 16, 4, 1, 0, bias=False), 20 | nn.BatchNorm2d(self.dec_channels * 16), 21 | nn.ReLU(True), 22 | # state size. (NGF*16) x 4 x 4 23 | nn.ConvTranspose2d(self.dec_channels * 16, self.dec_channels * 8, 4, 2, 1, bias=False), 24 | nn.BatchNorm2d(self.dec_channels * 8), 25 | nn.ReLU(True), 26 | # state size. (NGF*8) x 8 x 8 27 | nn.ConvTranspose2d(self.dec_channels * 8, self.dec_channels * 4, 4, 2, 1, bias=False), 28 | nn.BatchNorm2d(self.dec_channels * 4), 29 | nn.ReLU(True), 30 | # state size. (NGF*4) x 16 x 16 31 | nn.ConvTranspose2d(self.dec_channels * 4, self.dec_channels * 2, 4, 2, 1, bias=False), 32 | nn.BatchNorm2d(self.dec_channels * 2), 33 | nn.ReLU(True), 34 | # state size. (NGF*2) x 32 x 32 35 | nn.ConvTranspose2d(self.dec_channels * 2, self.dec_channels, 4, 2, 1, bias=False), 36 | nn.BatchNorm2d(self.dec_channels), 37 | nn.ReLU(True), 38 | # state size. (NGF) x 64 x 64 39 | nn.ConvTranspose2d(self.dec_channels, self.in_channels, 4, 2, 1, bias=False), 40 | nn.Sigmoid() 41 | # state size. (NC) x 128 x 128 42 | ) 43 | 44 | def forward(self, x): 45 | return self.main(x) 46 | 47 | 48 | class Encoder(nn.Module): 49 | """ 50 | The model architecture is taken from https://github.com/pytorch/examples/issues/70 51 | """ 52 | 53 | def __init__(self, in_channels, enc_channels, hidden_dim): 54 | self.in_channels = in_channels 55 | self.enc_channels = enc_channels 56 | self.hidden_dim = hidden_dim 57 | 58 | super().__init__() 59 | self.main = nn.Sequential( 60 | # input is (NC) x 128 x 128 61 | nn.Conv2d(self.in_channels, self.enc_channels, 4, stride=2, padding=1, bias=False), 62 | nn.LeakyReLU(0.2, inplace=True), 63 | # state size. (NDF) x 64 x 64 64 | nn.Conv2d(self.enc_channels, self.enc_channels * 2, 4, stride=2, padding=1, bias=False), 65 | nn.BatchNorm2d(self.enc_channels * 2), 66 | nn.LeakyReLU(0.2, inplace=True), 67 | # state size. (NDF*2) x 32 x 32 68 | nn.Conv2d(self.enc_channels * 2, self.enc_channels * 4, 4, stride=2, padding=1, bias=False), 69 | nn.BatchNorm2d(self.enc_channels * 4), 70 | nn.LeakyReLU(0.2, inplace=True), 71 | # state size. (NDF*4) x 16 x 16 72 | nn.Conv2d(self.enc_channels * 4, self.enc_channels * 8, 4, stride=2, padding=1, bias=False), 73 | nn.BatchNorm2d(self.enc_channels * 8), 74 | nn.LeakyReLU(0.2, inplace=True), 75 | # state size. (NDF*8) x 8 x 8 76 | nn.Conv2d(self.enc_channels * 8, self.enc_channels * 16, 4, stride=2, padding=1, bias=False), 77 | nn.BatchNorm2d(self.enc_channels * 16), 78 | nn.LeakyReLU(0.2, inplace=True), 79 | # state size. (NDF*16) x 4 x 4 80 | nn.Conv2d(self.enc_channels * 16, self.hidden_dim, 4, stride=1, padding=0, bias=False), 81 | nn.Flatten(), 82 | ) 83 | 84 | def forward(self, x): 85 | return self.main(x) 86 | 87 | def vae_loss_fn(x, recon_batch, mu, logvar): 88 | 89 | recon_loss = ae_loss_fn(x, recon_batch) 90 | 91 | KLD = torch.mean(-0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(),dim=1),dim=0) 92 | 93 | return recon_loss + KLD 94 | 95 | def ae_loss_fn(x, recon_batch): 96 | """Function taken and modified from 97 | https://github.com/pytorch/examples/tree/master/vae 98 | """ 99 | MSE = ((x - recon_batch) ** 2).mean() 100 | return MSE 101 | 102 | class ConvVAE(nn.Module): 103 | 104 | def __init__(self, in_channels=3, enc_channels=128, dec_channels=128, hidden_dim=100): 105 | super().__init__() 106 | 107 | self.in_channels = in_channels 108 | self.enc_channels = enc_channels 109 | self.dec_channels = dec_channels 110 | self.hidden_dim = hidden_dim 111 | 112 | self.encoder = Encoder(self.in_channels, self.enc_channels, self.hidden_dim*2) 113 | self.decoder = Decoder(self.in_channels, self.dec_channels, self.hidden_dim) 114 | 115 | def reparameterize(self, mu, logvar): 116 | std = (0.5 * logvar).exp() 117 | eps = torch.randn_like(std) 118 | 119 | return mu + eps * std 120 | 121 | def forward(self, x): 122 | enc_out = self.encoder(x) 123 | mu, logvar = enc_out[..., :self.hidden_dim], enc_out[..., self.hidden_dim:] 124 | z = self.reparameterize(mu, logvar) 125 | recon_batch = self.decoder(z.unsqueeze(-1).unsqueeze(-1)) 126 | return recon_batch, mu, logvar 127 | 128 | class AE(nn.Module): 129 | 130 | def __init__(self, in_channels=3, enc_channels=128, dec_channels=128, hidden_dim=100): 131 | super().__init__() 132 | self.in_channels = in_channels 133 | self.enc_channels = enc_channels 134 | self.dec_channels = dec_channels 135 | self.hidden_dim = hidden_dim 136 | 137 | self.encoder = Encoder(self.in_channels, self.enc_channels, self.hidden_dim) 138 | self.decoder = Decoder(self.in_channels, self.dec_channels, self.hidden_dim) 139 | 140 | def forward(self, x): 141 | enc_out = self.encoder(x) 142 | recon_batch = self.decoder(enc_out.unsqueeze(-1).unsqueeze(-1)) 143 | return recon_batch 144 | 145 | -------------------------------------------------------------------------------- /image/Defect_Detection/requirements.txt: -------------------------------------------------------------------------------- 1 | anyio==3.7.0 2 | argon2-cffi==21.3.0 3 | argon2-cffi-bindings==21.2.0 4 | arrow==1.2.3 5 | asttokens==2.2.1 6 | attrs==23.1.0 7 | backcall==0.2.0 8 | beautifulsoup4==4.12.2 9 | bleach==6.0.0 10 | certifi==2023.5.7 11 | cffi==1.15.1 12 | charset-normalizer==3.1.0 13 | comm==0.1.3 14 | debugpy==1.6.7 15 | decorator==5.1.1 16 | defusedxml==0.7.1 17 | exceptiongroup==1.1.1 18 | executing==1.2.0 19 | fastjsonschema==2.17.1 20 | fqdn==1.5.1 21 | idna==3.4 22 | importlib-metadata==6.7.0 23 | ipykernel==6.23.3 24 | ipython==8.14.0 25 | ipython-genutils==0.2.0 26 | ipywidgets==8.0.6 27 | isoduration==20.11.0 28 | jedi==0.18.2 29 | Jinja2==3.1.2 30 | joblib==1.2.0 31 | jsonpointer==2.4 32 | jsonschema==4.17.3 33 | jupyter==1.0.0 34 | jupyter-console==6.6.3 35 | jupyter-events==0.6.3 36 | jupyter_client==8.3.0 37 | jupyter_core==5.3.1 38 | jupyter_server==2.6.0 39 | jupyter_server_terminals==0.4.4 40 | jupyterlab-pygments==0.2.2 41 | jupyterlab-widgets==3.0.7 42 | MarkupSafe==2.1.3 43 | matplotlib-inline==0.1.6 44 | mistune==3.0.1 45 | nbclassic==1.0.0 46 | nbclient==0.8.0 47 | nbconvert==7.6.0 48 | nbformat==5.9.0 49 | nest-asyncio==1.5.6 50 | notebook==6.5.4 51 | notebook_shim==0.2.3 52 | numpy==1.25.0 53 | overrides==7.3.1 54 | packaging==23.1 55 | pandas==2.0.2 56 | pandocfilters==1.5.0 57 | parso==0.8.3 58 | pexpect==4.8.0 59 | pickleshare==0.7.5 60 | Pillow==9.5.0 61 | platformdirs==3.8.0 62 | prometheus-client==0.17.0 63 | prompt-toolkit==3.0.38 64 | psutil==5.9.5 65 | ptyprocess==0.7.0 66 | pure-eval==0.2.2 67 | pycparser==2.21 68 | Pygments==2.15.1 69 | pyrsistent==0.19.3 70 | python-dateutil==2.8.2 71 | python-json-logger==2.0.7 72 | pytz==2023.3 73 | PyYAML==6.0 74 | pyzmq==25.1.0 75 | qtconsole==5.4.3 76 | QtPy==2.3.1 77 | requests==2.31.0 78 | rfc3339-validator==0.1.4 79 | rfc3986-validator==0.1.1 80 | scikit-learn==1.2.2 81 | scipy==1.10.1 82 | Send2Trash==1.8.2 83 | six==1.16.0 84 | sniffio==1.3.0 85 | soupsieve==2.4.1 86 | stack-data==0.6.2 87 | terminado==0.17.1 88 | threadpoolctl==3.1.0 89 | tinycss2==1.2.1 90 | torch==1.11.0 91 | torchvision==0.12.0 92 | tornado==6.3.2 93 | traitlets==5.9.0 94 | typing_extensions==4.6.3 95 | tzdata==2023.3 96 | uri-template==1.3.0 97 | urllib3==2.0.3 98 | wcwidth==0.2.6 99 | webcolors==1.13 100 | webencodings==0.5.1 101 | websocket-client==1.6.1 102 | widgetsnbextension==4.0.7 103 | zipp==3.15.0 104 | -------------------------------------------------------------------------------- /image/Defect_Detection/train.py: -------------------------------------------------------------------------------- 1 | # system imports 2 | import os 3 | import logging 4 | import glob 5 | from pathlib import Path 6 | import re 7 | import argparse 8 | 9 | # external dependencies 10 | import torch 11 | import torch.nn as nn 12 | from torch.optim import Adam 13 | from torchvision import transforms 14 | from torch.utils.data import DataLoader 15 | 16 | # relative imports 17 | from model import AE, ConvVAE, ae_loss_fn, vae_loss_fn 18 | from datasets import MVTecADDataset 19 | from utils import train_step, test_step, save_checkpoint 20 | 21 | parser = argparse.ArgumentParser(description="Feature Memory for Anomaly Detection") 22 | 23 | # basic config 24 | parser.add_argument('--model', type=str, help='Architecture variation for experiments. ae or vae.') 25 | parser.add_argument('--epochs', type=int, default=100, help=' The number of epochs to train the model.') 26 | parser.add_argument('--batch_size', type=int, default=8, help=' The batch size for training, validation and testing.') 27 | parser.add_argument('--learning_rate', type=float, default=.001, help='Learning rates of model.') 28 | parser.add_argument('--size', type=int, default=128, help='Side length of input image') 29 | parser.add_argument('--data_path', type=str, help='The root directory of the dataset.') 30 | parser.add_argument('--ckpt_path', type=str, help='The directory to save model checkpoints.') 31 | 32 | args = parser.parse_args() 33 | 34 | # Data Paths 35 | 36 | CLASSES = ["toothbrush", 37 | "pill", 38 | "leather", 39 | "hazelnut", 40 | "capsule", 41 | "cable", 42 | "bottle", 43 | "zipper", 44 | "tile", 45 | "transistor", 46 | "wood", 47 | "metal_nut", 48 | "screw", 49 | "carpet", 50 | "grid"] 51 | 52 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 53 | 54 | def main(): 55 | 56 | transform = transforms.Compose([ 57 | transforms.ToTensor(), 58 | transforms.Resize(size=(args.size, args.size)), 59 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 60 | ]) 61 | 62 | test_auc_list = [] 63 | for inlier in CLASSES: 64 | # Prepare Data 65 | print("class", inlier) 66 | current_epoch = 0 67 | ckpt_path = f"{args.ckpt_path}/{inlier}.pth" 68 | img_dir = f"{args.data_path}/{inlier}" 69 | train_dataset = MVTecADDataset(img_dir, "train", transform) 70 | test_dataset = MVTecADDataset(img_dir, "test", transform, args.size) 71 | 72 | train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) 73 | test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) 74 | 75 | 76 | model = ConvVAE() if args.model == "vae" else AE() 77 | model = torch.nn.DataParallel(model) 78 | 79 | optimizer = Adam(model.parameters(), lr=args.learning_rate) 80 | save_checkpoint(model, optimizer, epoch=current_epoch, path=ckpt_path) 81 | 82 | loss_fn = vae_loss_fn if args.model == "vae" else ae_loss_fn 83 | 84 | highest_auc = 0 85 | while True: 86 | ckpt = torch.load(ckpt_path) 87 | epoch = ckpt["epoch"] 88 | 89 | if epoch == args.epochs: 90 | break 91 | 92 | model = ConvVAE() if args.model == "vae" else AE() 93 | model = nn.DataParallel(model) 94 | model.load_state_dict(ckpt["model"]) 95 | model.to(DEVICE) 96 | 97 | model.train() 98 | train_loss = train_step(train_loader, model, optimizer, loss_fn, DEVICE, args.model) 99 | 100 | model.eval() 101 | test_auc, test_loss = test_step(test_loader, model, loss_fn, DEVICE, args.model) 102 | 103 | print(f"Train Loss: {str(train_loss)} \t Test AUC: {str(test_auc)}") 104 | 105 | if test_auc > highest_auc: 106 | highest_auc = test_auc 107 | 108 | save_checkpoint(model, optimizer, epoch + 1, ckpt_path) 109 | 110 | test_auc_list.append(highest_auc) 111 | 112 | 113 | print(f"Average AUC: {str(np.mean(test_auc_list))}") 114 | 115 | ###################################################### 116 | 117 | if __name__ == "__main__": 118 | main() -------------------------------------------------------------------------------- /image/Defect_Detection/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from sklearn.metrics import roc_auc_score, roc_curve 5 | 6 | def get_auc(preds, lbls): 7 | preds = preds.flatten().cpu().numpy() 8 | lbls = lbls.flatten().cpu().numpy() 9 | 10 | auc = roc_auc_score(lbls, preds) 11 | return auc 12 | 13 | def save_checkpoint(model, opt, epoch, path): 14 | """Save Checkpoint""" 15 | 16 | torch.save({ 17 | "model": model.state_dict(), 18 | "opt": opt.state_dict(), 19 | "epoch": epoch 20 | }, 21 | path) 22 | 23 | 24 | def train_step(loader, model, optimizer, loss_fn, device, model_str): 25 | 26 | train_loss_list = [] 27 | 28 | for i, data in enumerate(loader): 29 | data = data.to(device) 30 | optimizer.zero_grad() 31 | if model_str == "vae": 32 | recon, mu, logvar = model(data) 33 | loss = loss_fn(data, recon, mu, logvar) 34 | else: 35 | recon = model(data) 36 | loss = loss_fn(data, recon) 37 | loss.backward() 38 | optimizer.step() 39 | train_loss_list.append(loss.item()) 40 | 41 | return np.mean(train_loss_list) 42 | 43 | def test_step(loader, model, loss_fn, device, model_str): 44 | 45 | loss_list, error_map_list, lbl_list = [], [], [] 46 | for i, (data, lbl) in enumerate(loader): 47 | data, lbl = data.to(device), lbl.to(device) 48 | 49 | with torch.no_grad(): 50 | if model_str == "vae": 51 | recon, mu, logvar = model(data) 52 | loss = loss_fn(data, recon, mu, logvar) 53 | else: 54 | recon = model(data) 55 | loss = loss_fn(data, recon) 56 | loss_list.append(loss.item()) 57 | error_map = torch.mean((data - recon)**2, dim=1).unsqueeze(1) 58 | error_map_list.append(error_map) 59 | lbl_list.append(lbl) 60 | 61 | error_maps = torch.cat(error_map_list, dim=0) 62 | lbls = torch.cat(lbl_list, dim=0) 63 | preds = (error_maps - torch.min(error_maps)) / (torch.max(error_maps) - torch.min(error_maps)) 64 | 65 | auc = get_auc(preds, lbls) 66 | loss = np.mean(loss_list) 67 | 68 | return auc, loss 69 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/README.md: -------------------------------------------------------------------------------- 1 | # Road Obstacle Detection 2 | 3 | ## Overview 4 | 5 | Detecting obstacles on the road/railway is a critical part of the driving task which has not been mastered by fully autonomous vehicles. Semantic segmentation plays an important role in addressing the challenges of identifying the locations of obstacles. In this phase of the project, we explore the application of semantic segmentation methods to the task of detecting road obstacles using the Lost and Found Dataset. The goal of the experiments is to determine which model architecture is the best for road obstacle detection - something that is of interest to both the practioner and researchers. 6 | 7 | ## Dataset 8 | The Lost and Found dataset was introduced to evaluate the performance of small road obstacle detection approaches. The Lost and Found Dataset includes 2k images recording from 13 different challenging street scenarios, featuring 37 different obstacles types. Each object is labeled with a unique ID, allowing for a later refinement into subcategories. An overview of the Lost and Found dataset is available below, which is refined into three classes: driveable area, non drivable area and obstacles. 9 | 10 |

11 | The Lost and Found Dataset 12 |
13 |

14 | Figure 1: The Lost and Found Dataset. 15 |
16 |

17 | 18 | ## Results 19 | 20 |

21 | Validation Cross Entropy 22 |
23 |

24 | Figure 2: The validation cross entropy loss for each model across epochs. 25 |
26 |

27 | 28 |

29 | Visual Result 30 |
31 |

32 | Figure 3: Visual results comparing prediction made by each model for a test image. 33 |
34 |

35 | 36 | ## Running Code 37 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 38 | 39 | ``` 40 | conda create -n new_env 41 | conda activate new_env 42 | pip install -r requirements.txt 43 | ``` 44 | 45 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 46 | ``` 47 | python train.py --epochs 10 --batch_size 4 48 | ``` 49 | 50 | The **train.py** script has the following arguments: 51 | - **epochs** (int): The number of epochs to train the memory. 52 | - **batch_size** (int) The batch size for training, validation and testing. 53 | - **learning_rate** (float): Learning rates of memory units. 54 | - **height** (int): Height of input image. 55 | - **width** (int): Width of input image. 56 | - **train_perc** (float): The proportion of samples used for train. 57 | - **data_path** (str): The root directory of the dataset. 58 | - **ckpt_path** (str): Path of checkpoint file. 59 | - **best_ckpt_path** (str): Path of checkpoint file for best performing model on the validation set. 60 | - **sample_path** (str): Path of file to save example images. 61 | 62 | 63 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/dice_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | class DiceCoeff(Function): 6 | """Dice coeff for individual examples""" 7 | 8 | def forward(self, input, target): 9 | self.save_for_backward(input, target) 10 | eps = 0.0001 11 | self.inter = torch.dot(input.view(-1), target.view(-1)) 12 | self.union = torch.sum(input) + torch.sum(target) + eps 13 | 14 | t = (2 * self.inter.float() + eps) / self.union.float() 15 | return t 16 | 17 | # This function has only a single output, so it gets only one gradient 18 | def backward(self, grad_output): 19 | 20 | input, target = self.saved_variables 21 | grad_input = grad_target = None 22 | 23 | if self.needs_input_grad[0]: 24 | grad_input = grad_output * 2 * (target * self.union - self.inter) \ 25 | / (self.union * self.union) 26 | if self.needs_input_grad[1]: 27 | grad_target = None 28 | 29 | return grad_input, grad_target 30 | 31 | 32 | def dice_coeff(input, target): 33 | """Dice coeff for batches""" 34 | if input.is_cuda: 35 | s = torch.FloatTensor(1).cuda().zero_() 36 | else: 37 | s = torch.FloatTensor(1).zero_() 38 | 39 | for i, c in enumerate(zip(input, target)): 40 | s = s + DiceCoeff().forward(c[0], c[1]) 41 | 42 | return s / (i + 1) 43 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/eval.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch 3 | from tqdm import tqdm 4 | 5 | from dice_loss import dice_coeff 6 | 7 | 8 | def eval_net(net, loader, device): 9 | """Evaluation without the densecrf with the dice coefficient""" 10 | net.eval() 11 | mask_type = torch.long 12 | n_val = len(loader) # the number of batch 13 | tot = 0 14 | 15 | with tqdm(total=n_val, desc='Validation round', unit='batch', leave=False) as pbar: 16 | for batch in loader: 17 | imgs, true_masks =batch #batch['image'], batch['mask'] 18 | #true_masks=(true_masks > 0.5).float() 19 | imgs = imgs.to(device=device, dtype=torch.float32) 20 | true_masks = true_masks.to(device=device, dtype=mask_type) 21 | 22 | with torch.no_grad(): 23 | mask_pred = net(imgs) 24 | 25 | 26 | tot += F.cross_entropy(mask_pred, true_masks).item() 27 | pbar.update() 28 | 29 | net.train() 30 | return tot / n_val 31 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import argparse 4 | from tqdm import tqdm 5 | import torch.nn.functional as F 6 | 7 | import numpy as np 8 | from PIL import Image 9 | import torch.utils.data as data 10 | 11 | import matplotlib.pyplot as plt 12 | 13 | import torch 14 | import torch.utils.data as data 15 | import torch.nn as nn 16 | from torch.utils.data import DataLoader 17 | from torch.nn import CrossEntropyLoss 18 | 19 | from torchvision.datasets import Cityscapes 20 | from torchvision.utils import make_grid 21 | 22 | from lf_loader import lostandfoundLoader 23 | 24 | from eval import eval_net 25 | 26 | from utils import train_step, val_step, get_model, save_viz, save_checkpoint 27 | 28 | parser = argparse.ArgumentParser(description="Feature Memory for Anomaly Detection") 29 | 30 | # basic config 31 | parser.add_argument('--epochs', type=int, default=2, help=' The number of epochs to train the memory.') 32 | parser.add_argument('--batch_size', type=int, default=4, help=' The batch size for training, validation and testing.') 33 | parser.add_argument('--learning_rate', type=float, default=3e-4, help='Learning rates of model.') 34 | parser.add_argument('--height', type=int, default=128, help='Height of input image') 35 | parser.add_argument('--width', type=int, default=256, help='Width of input image') 36 | parser.add_argument('--train_perc', type=float, default=.9, help='Proportion of samples to use in training set') 37 | parser.add_argument('--data_path', type=str, default="/scratch/ssd002/datasets/lostandfound", help='The root directory of the dataset.') 38 | parser.add_argument('--ckpt_path', type=str, default="ckpt/run_1.pth", help='The file to save model checkpoints.') 39 | parser.add_argument('--best_ckpt_path', type=str, default="ckpt/best_run_1.pth", help='The file to save best model checkpoint.') 40 | parser.add_argument('--sample_path', type=str, default="samples", help='The file to save best model checkpoint.') 41 | 42 | 43 | args = parser.parse_args() 44 | 45 | # Global Variables 46 | IMG_SIZE = (args.height, args.width) #H, W 47 | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 48 | CURRENT_EPOCH = 0 49 | 50 | LF_MAP = { 51 | 0: (0, 0, 0), 52 | 1: (255, 0, 0), 53 | 2: (0, 255, 0), 54 | 3: (0, 0, 255), 55 | } 56 | 57 | def main(): 58 | 59 | # Prepare Dataset and Dataloader 60 | dataset = lostandfoundLoader(args.data_path, is_transform=True, augmentations=None) 61 | 62 | train_size = int(len(dataset) * args.train_perc) 63 | val_size = len(dataset) - train_size 64 | train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size]) 65 | 66 | train_dataloader = data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=2) 67 | val_dataloader = data.DataLoader(val_dataset, batch_size=args.batch_size, num_workers=2) 68 | 69 | model = get_model(pretrained=True) 70 | 71 | # Loss and Optimizer 72 | criterion = CrossEntropyLoss() 73 | opt = torch.optim.Adam(model.parameters(), lr=args.learning_rate) 74 | 75 | # Save Initial checkpoint to be subsquently restored from 76 | save_checkpoint(model, opt, epoch=CURRENT_EPOCH, path=args.ckpt_path) 77 | 78 | train_loss_list = [] 79 | val_loss_list = [] 80 | max_val_loss = 1e10 81 | while True: 82 | # Load checkpoint 83 | ckpt = torch.load(args.ckpt_path) 84 | 85 | epoch = ckpt["epoch"] 86 | 87 | if epoch == args.epochs: 88 | break 89 | 90 | model = get_model(pretrained=False) 91 | model.load_state_dict(ckpt["model"]) 92 | model.to(DEVICE) 93 | 94 | opt = torch.optim.Adam(model.parameters(), lr=args.learning_rate) 95 | opt.load_state_dict(ckpt["opt"]) 96 | 97 | model.train() 98 | train_loss = train_step(model, opt, criterion, train_dataloader, epoch, DEVICE) 99 | train_loss_list.append(train_loss) 100 | 101 | model.eval() 102 | val_loss = val_step(model, criterion, val_dataloader, epoch, DEVICE, LF_MAP, args.sample_path) 103 | val_loss_list.append(val_loss) 104 | 105 | 106 | with open("train_loss.txt", "a") as myfile: 107 | myfile.write(f"{str(epoch)}\t{str(train_loss)}\n") 108 | 109 | with open("val_loss.txt", "a") as myfile: 110 | myfile.write(f"{str(epoch)}\t{str(val_loss)}\n") 111 | 112 | if val_loss < max_val_loss: 113 | torch.save({ 114 | "model": model.state_dict(), 115 | "opt": opt.state_dict(), 116 | "epoch": epoch, 117 | }, 118 | args.best_ckpt_path) 119 | 120 | save_checkpoint(model, opt, epoch + 1, args.ckpt_path) 121 | model.cpu() 122 | 123 | 124 | f, axarr = plt.subplots(1, 2, figsize=(20,20)) 125 | axarr[0].plot(train_loss_list) 126 | axarr[0].title.set_text("Train Loss") 127 | axarr[1].plot(val_loss_list) 128 | axarr[1].title.set_text("Validation Loss") 129 | 130 | fig_path = f"{args.sample_path}/loss_figure.jpg" 131 | f.savefig(fig_path) 132 | 133 | if __name__ == "__main__": 134 | main() -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/train_loss.txt: -------------------------------------------------------------------------------- 1 | 0 0.16359891243630725 2 | 1 0.08223168643055556 3 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/utils.py: -------------------------------------------------------------------------------- 1 | import tqdm 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from torchvision.models.segmentation import fcn_resnet50 10 | 11 | 12 | def train_step(model, opt, criterion, dataloader, epoch, device): 13 | losses = [] 14 | counter = 0 15 | for i, (img, lbl) in enumerate(dataloader): 16 | lbl = lbl.long() 17 | img, lbl = img.to(device), lbl.to(device) 18 | opt.zero_grad() 19 | out = model(img)["out"] 20 | loss = criterion(out, lbl) 21 | loss.backward() 22 | opt.step() 23 | losses.append(loss.item()) 24 | 25 | return np.mean(losses) 26 | 27 | def val_step(model, criterion, dataloader, epoch, device, lf_map, sample_path): 28 | losses = [] 29 | dices = [] 30 | viz = True 31 | for i, (img, lbl) in enumerate(dataloader): 32 | lbl = lbl.long() 33 | img, lbl = img.to(device), lbl.to(device) 34 | 35 | with torch.no_grad(): 36 | out = model(img)["out"] 37 | 38 | loss = criterion(out, lbl) 39 | losses.append(loss.item()) 40 | 41 | if viz: 42 | save_viz(img, out, lbl, lf_map, epoch, sample_path) 43 | viz = False 44 | 45 | return np.mean(losses) 46 | 47 | def save_viz(img, out, lbl, color_map, epoch, sample_path): 48 | img = img.cpu().numpy() 49 | out = out.cpu().numpy() 50 | lbl = lbl.cpu().numpy() 51 | rows = out.shape[2] 52 | cols = out.shape[3] 53 | 54 | masks = [] 55 | masks_gt = [] 56 | for index, (im, o, l) in enumerate(zip(img, out, lbl)): 57 | mask = np.zeros((rows, cols, 3), dtype=np.uint8) 58 | mask_gt = np.zeros((rows, cols, 3), dtype=np.uint8) 59 | for j in range(rows): 60 | for i in range(cols): 61 | mask[j, i] = color_map[np.argmax(o[:, j, i]-1, axis=0)] 62 | mask_gt[j, i] = color_map[l[j, i]] 63 | 64 | mask_path = f"{sample_path}/epoch_{str(epoch)}_pred_{str(index)}.jpg" 65 | lbl_path = f"{sample_path}/epoch_{str(epoch)}_lbl_{str(index)}.jpg" 66 | img_path = f"{sample_path}/epoch_{str(epoch)}_img_{str(index)}.jpg" 67 | f, axarr = plt.subplots(1, 3, figsize=(20, 20)) 68 | im = np.moveaxis(im, 0, -1) 69 | axarr[0].imshow(im) 70 | axarr[0].title.set_text('Image') 71 | axarr[1].imshow(mask_gt) 72 | axarr[1].title.set_text('Label') 73 | axarr[2].imshow(mask) 74 | axarr[2].title.set_text('Prediction') 75 | f.savefig( f"{sample_path}/epoch_{str(epoch)}_{str(index)}.jpg") 76 | 77 | def get_model(pretrained=False): 78 | # Prepare Model and Save to Checkpoint Directory 79 | model = fcn_resnet50(pretrained=pretrained) 80 | model.classifier[4] = nn.Conv2d(512, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1)) 81 | model.aux_classifier = None 82 | model = nn.DataParallel(model) 83 | return model 84 | 85 | def save_checkpoint(model, opt, epoch, path): 86 | """Save Checkpoint""" 87 | 88 | torch.save({ 89 | "model": model.state_dict(), 90 | "opt": opt.state_dict(), 91 | "epoch": epoch, 92 | }, 93 | path) 94 | -------------------------------------------------------------------------------- /image/Road_Obstacle_Detection/val_loss.txt: -------------------------------------------------------------------------------- 1 | 0 0.08754148219640438 2 | 1 0.1392884962260723 3 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/__init__.py -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import _C as cfg 2 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/config/defaults.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | from yacs.config import CfgNode as CN 9 | 10 | 11 | _C = CN() 12 | 13 | _C.INPUT_SIZE = 2280 14 | _C.SIZE_DIVISIBILITY = 1 15 | _C.WEIGHT= "" 16 | 17 | _C.CHAR_DICT_FILE = "" 18 | _C.WORD_LEXICON_PATH = "" 19 | 20 | _C.WORD_MIN_SCORE = 0.95 21 | _C.WORD_NMS_IOU_THRESH = 0.15 22 | _C.CHAR_MIN_SCORE = 0.25 23 | _C.CHAR_NMS_IOU_THRESH = 0.3 24 | _C.MAGNITUDE_THRESH = 0.2 25 | 26 | _C.WORD_STRIDE = 4 27 | _C.CHAR_STRIDE = 4 28 | _C.NUM_CHAR_CLASSES = 68 29 | 30 | _C.WORD_DETECTOR_DILATION = 1 31 | _C.RESULTS_SEPARATOR = chr(31) 32 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/modeling/__init__.py -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/modeling/backbone/__init__.py -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/backbone/decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | from torch import nn 9 | from collections import OrderedDict 10 | from torch.functional import F 11 | 12 | 13 | class Decoder(nn.Module): 14 | def __init__(self, in_channels_list, out_channels): 15 | super(Decoder, self).__init__() 16 | self.backbone_feature_reduction = nn.ModuleList() 17 | self.top_down_feature_reduction = nn.ModuleList() 18 | for i, in_channels in enumerate(in_channels_list[::-1]): 19 | self.backbone_feature_reduction.append( 20 | self._conv1x1_relu(in_channels, out_channels) 21 | ) 22 | if i < len(in_channels_list) - 2: 23 | self.top_down_feature_reduction.append( 24 | self._conv1x1_relu(out_channels, out_channels) 25 | ) 26 | 27 | def _conv1x1_relu(self, in_channels, out_channels): 28 | return nn.Sequential(OrderedDict([ 29 | ("conv", nn.Conv2d( 30 | in_channels, out_channels, 31 | kernel_size=1, stride=1, 32 | bias=False 33 | )), 34 | ("relu", nn.ReLU()) 35 | ])) 36 | 37 | def forward(self, x): 38 | x = x[::-1] # to lowest resolution first 39 | top_down_feature = None 40 | for i, feature in enumerate(x): 41 | feature = self.backbone_feature_reduction[i](feature) 42 | if i == 0: 43 | top_down_feature = feature 44 | else: 45 | upsampled_feature = F.interpolate( 46 | top_down_feature, 47 | size=feature.size()[-2:], 48 | mode='bilinear', 49 | align_corners=True 50 | ) 51 | if i < len(x) - 1: 52 | top_down_feature = self.top_down_feature_reduction[i - 1]( 53 | feature + upsampled_feature 54 | ) 55 | else: 56 | top_down_feature = feature + upsampled_feature 57 | return top_down_feature 58 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/backbone/hourglass.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | import torch 9 | from torch import nn 10 | import torch.nn.functional as F 11 | 12 | 13 | _norm_func = lambda num_features: nn.BatchNorm2d(num_features, eps=1e-5) 14 | 15 | 16 | def _make_layer(in_channels, out_channels, num_blocks, **kwargs): 17 | blocks = [] 18 | blocks.append(Residual(in_channels, out_channels)) 19 | for _ in range(1, num_blocks): 20 | blocks.append(Residual(out_channels, out_channels, **kwargs)) 21 | return nn.Sequential(*blocks) 22 | 23 | 24 | def _make_layer_revr(in_channels, out_channels, num_blocks, **kwargs): 25 | blocks = [] 26 | for _ in range(num_blocks - 1): 27 | blocks.append(Residual(in_channels, in_channels, **kwargs)) 28 | blocks.append(Residual(in_channels, out_channels, **kwargs)) 29 | return nn.Sequential(*blocks) 30 | 31 | 32 | class Residual(nn.Module): 33 | def __init__(self, in_channels, out_channels, stride=1): 34 | super(Residual, self).__init__() 35 | 36 | self.conv_1 = nn.Sequential( 37 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False), 38 | _norm_func(out_channels), 39 | nn.ReLU() 40 | ) 41 | self.conv_2 = nn.Sequential( 42 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1, bias=False), 43 | _norm_func(out_channels) 44 | ) 45 | if stride != 1 or in_channels != out_channels: 46 | self.skip = nn.Sequential( 47 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False), 48 | _norm_func(out_channels) 49 | ) 50 | else: 51 | self.skip = None 52 | self.out_relu = nn.ReLU() 53 | 54 | def forward(self, x): 55 | b1 = self.conv_2(self.conv_1(x)) 56 | if self.skip is None: 57 | return self.out_relu(b1 + x) 58 | else: 59 | return self.out_relu(b1 + self.skip(x)) 60 | 61 | 62 | class HourGlassBlock(nn.Module): 63 | def __init__(self, n, channels, blocks): 64 | super(HourGlassBlock, self).__init__() 65 | 66 | self.up_1 = _make_layer(channels[0], channels[0], blocks[0]) 67 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 68 | self.low_1 = _make_layer(channels[0], channels[1], blocks[0]) 69 | if n <= 1: 70 | self.low_2 = _make_layer(channels[1], channels[1], blocks[1]) 71 | else: 72 | self.low_2 = HourGlassBlock(n - 1, channels[1:], blocks[1:]) 73 | self.low_3 = _make_layer_revr(channels[1], channels[0], blocks[0]) 74 | 75 | def forward(self, x): 76 | upsample = lambda input: F.interpolate(input, scale_factor=2, mode='bilinear', align_corners=True) 77 | up_1 = self.up_1(x) 78 | low = self.low_3(self.low_2(self.low_1(self.pool(x)))) 79 | return upsample(low) + up_1 80 | 81 | 82 | class HourGlassNet(nn.Module): 83 | def __init__(self, n, channels, blocks): 84 | super(HourGlassNet, self).__init__() 85 | self.pre = nn.Sequential( 86 | nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3, bias=False), 87 | _norm_func(128), 88 | nn.ReLU(), 89 | Residual(128, 256, stride=2) 90 | ) 91 | hourglass_blocks = [] 92 | for _ in range(2): 93 | hourglass_blocks.append( 94 | HourGlassBlock(n, channels, blocks) 95 | ) 96 | self.hourglass_blocks = nn.Sequential(*hourglass_blocks) 97 | 98 | def forward(self, x): 99 | return self.hourglass_blocks(self.pre(x)) 100 | 101 | 102 | def hourglass88(): 103 | return HourGlassNet(3, [256, 256, 256, 512], [2, 2, 2, 2]) 104 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | from .misc import Conv2d 9 | from .misc import ConvTranspose2d 10 | from .misc import BatchNorm2d 11 | from .misc import interpolate 12 | from .scale import Scale 13 | 14 | 15 | __all__ = [ 16 | "Conv2d", 17 | "ConvTranspose2d", 18 | "interpolate", 19 | "BatchNorm2d", 20 | "Scale" 21 | ] 22 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/layers/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | helper class that supports empty tensors on some nn functions. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in 6 | those functions. 7 | 8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013 9 | is implemented 10 | """ 11 | 12 | import math 13 | import torch 14 | from torch.nn.modules.utils import _ntuple 15 | 16 | 17 | class _NewEmptyTensorOp(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, new_shape): 20 | ctx.shape = x.shape 21 | return x.new_empty(new_shape) 22 | 23 | @staticmethod 24 | def backward(ctx, grad): 25 | shape = ctx.shape 26 | return _NewEmptyTensorOp.apply(grad, shape), None 27 | 28 | 29 | class Conv2d(torch.nn.Conv2d): 30 | def forward(self, x): 31 | if x.numel() > 0: 32 | return super(Conv2d, self).forward(x) 33 | # get output shape 34 | 35 | output_shape = [ 36 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 37 | for i, p, di, k, d in zip( 38 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride 39 | ) 40 | ] 41 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 42 | return _NewEmptyTensorOp.apply(x, output_shape) 43 | 44 | 45 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 46 | def forward(self, x): 47 | if x.numel() > 0: 48 | return super(ConvTranspose2d, self).forward(x) 49 | # get output shape 50 | 51 | output_shape = [ 52 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 53 | for i, p, di, k, d, op in zip( 54 | x.shape[-2:], 55 | self.padding, 56 | self.dilation, 57 | self.kernel_size, 58 | self.stride, 59 | self.output_padding, 60 | ) 61 | ] 62 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape 63 | return _NewEmptyTensorOp.apply(x, output_shape) 64 | 65 | 66 | class BatchNorm2d(torch.nn.BatchNorm2d): 67 | def forward(self, x): 68 | if x.numel() > 0: 69 | return super(BatchNorm2d, self).forward(x) 70 | # get output shape 71 | output_shape = x.shape 72 | return _NewEmptyTensorOp.apply(x, output_shape) 73 | 74 | 75 | def interpolate( 76 | input, size=None, scale_factor=None, mode="nearest", align_corners=None 77 | ): 78 | if input.numel() > 0: 79 | return torch.nn.functional.interpolate( 80 | input, size, scale_factor, mode, align_corners 81 | ) 82 | 83 | def _check_size_scale_factor(dim): 84 | if size is None and scale_factor is None: 85 | raise ValueError("either size or scale_factor should be defined") 86 | if size is not None and scale_factor is not None: 87 | raise ValueError("only one of size or scale_factor should be defined") 88 | if ( 89 | scale_factor is not None 90 | and isinstance(scale_factor, tuple) 91 | and len(scale_factor) != dim 92 | ): 93 | raise ValueError( 94 | "scale_factor shape must match input shape. " 95 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) 96 | ) 97 | 98 | def _output_size(dim): 99 | _check_size_scale_factor(dim) 100 | if size is not None: 101 | return size 102 | scale_factors = _ntuple(dim)(scale_factor) 103 | # math.floor might return float in py2.7 104 | return [ 105 | int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) 106 | ] 107 | 108 | output_shape = tuple(_output_size(2)) 109 | output_shape = input.shape[:-2] + output_shape 110 | return _NewEmptyTensorOp.apply(input, output_shape) 111 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/layers/scale.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class Scale(nn.Module): 13 | def __init__(self, init_value=1.0): 14 | super(Scale, self).__init__() 15 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 16 | 17 | def forward(self, input): 18 | return input * self.scale 19 | -------------------------------------------------------------------------------- /image/fastlane/OCR/charnet/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong Technologies Co., Ltd. 2 | # All rights reserved. 3 | # 4 | # Contact: github@malong.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | import math 9 | 10 | 11 | def rotate_rect(x1, y1, x2, y2, degree, center_x, center_y): 12 | points = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] 13 | new_points = list() 14 | for point in points: 15 | dx = point[0] - center_x 16 | dy = point[1] - center_y 17 | new_x = center_x + dx * math.cos(degree) - dy * math.sin(degree) 18 | new_y = center_y + dx * math.sin(degree) + dy * math.cos(degree) 19 | new_points.append([(new_x), (new_y)]) 20 | return new_points 21 | -------------------------------------------------------------------------------- /image/fastlane/OCR/configs/icdar2015_hourglass88.yaml: -------------------------------------------------------------------------------- 1 | INPUT_SIZE: 2280 2 | WEIGHT: "weights/icdar2015_hourglass88.pth" 3 | CHAR_DICT_FILE: "datasets/ICDAR2015/test/char_dict.txt" 4 | WORD_LEXICON_PATH: "datasets/ICDAR2015/test/GenericVocabulary.txt" 5 | RESULTS_SEPARATOR: "," 6 | SIZE_DIVISIBILITY: 128 7 | -------------------------------------------------------------------------------- /image/fastlane/OCR/datasets/ICDAR2015/test/char_dict.txt: -------------------------------------------------------------------------------- 1 | a0 2 | b1 3 | c2 4 | d3 5 | e4 6 | f5 7 | g6 8 | h7 9 | i8 10 | j9 11 | k10 12 | l11 13 | m12 14 | n13 15 | o14 16 | p15 17 | q16 18 | r17 19 | s18 20 | t19 21 | u20 22 | v21 23 | w22 24 | x23 25 | y24 26 | z25 27 | 026 28 | 127 29 | 228 30 | 329 31 | 430 32 | 531 33 | 632 34 | 733 35 | 834 36 | 935 37 | !36 38 | #37 39 | "38 40 | %39 41 | $40 42 | '41 43 | &42 44 | )43 45 | (44 46 | +45 47 | *46 48 | -47 49 | ,48 50 | /49 51 | .50 52 | ;51 53 | :52 54 | =53 55 | <54 56 | ?55 57 | >56 58 | @57 59 | [58 60 | ]59 61 | \60 62 | _61 63 | ^62 64 | `63 65 | {64 66 | }65 67 | |66 68 | ~67 69 | -------------------------------------------------------------------------------- /image/fastlane/OCR/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/sample.jpg -------------------------------------------------------------------------------- /image/fastlane/Object_Detection/dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from torch.utils.data import Dataset 3 | import pandas as pd 4 | import torch 5 | import numpy as np 6 | import os 7 | from PIL import Image 8 | from utils import iou_width_height 9 | 10 | class YOLODataset(Dataset): 11 | def __init__( 12 | self, 13 | csv_file, 14 | img_dir, 15 | label_dir, 16 | anchors, 17 | image_size=416, 18 | S=[13, 26, 52], 19 | C=20, 20 | transform=None, 21 | ): 22 | self.annotations = pd.read_csv(csv_file) 23 | self.img_dir = img_dir 24 | self.label_dir = label_dir 25 | self.image_size = image_size 26 | self.transform = transform 27 | self.S = S 28 | self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales 29 | self.num_anchors = self.anchors.shape[0] 30 | self.num_anchors_per_scale = self.num_anchors // 3 31 | self.C = C 32 | self.ignore_iou_thresh = 0.5 33 | 34 | def __len__(self): 35 | return len(self.annotations) 36 | 37 | def __getitem__(self, index): 38 | label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) 39 | bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist() 40 | img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) 41 | image = np.array(Image.open(img_path).convert("RGB")) 42 | 43 | if self.transform: 44 | augmentations = self.transform(image=image, bboxes=bboxes) 45 | image = augmentations["image"] 46 | bboxes = augmentations["bboxes"] 47 | 48 | # Below assumes 3 scale predictions (as paper) and same num of anchors per scale 49 | targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S] 50 | for box in bboxes: 51 | iou_anchors = iou_width_height(torch.tensor(box[2:4]), self.anchors) 52 | anchor_indices = iou_anchors.argsort(descending=True, dim=0) 53 | x, y, width, height, class_label = box 54 | has_anchor = [False] * 3 # each scale should have one anchor 55 | for anchor_idx in anchor_indices: 56 | scale_idx = anchor_idx // self.num_anchors_per_scale 57 | anchor_on_scale = anchor_idx % self.num_anchors_per_scale 58 | S = self.S[scale_idx] 59 | i, j = int(S * y), int(S * x) # which cell 60 | anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] 61 | if not anchor_taken and not has_anchor[scale_idx]: 62 | targets[scale_idx][anchor_on_scale, i, j, 0] = 1 63 | x_cell, y_cell = S * x - j, S * y - i # both between [0,1] 64 | width_cell, height_cell = ( 65 | width * S, 66 | height * S, 67 | ) # can be greater than 1 since it's relative to cell 68 | box_coordinates = torch.tensor( 69 | [x_cell, y_cell, width_cell, height_cell] 70 | ) 71 | targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates 72 | targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) 73 | has_anchor[scale_idx] = True 74 | 75 | elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh: 76 | targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction 77 | 78 | return image, tuple(targets) 79 | -------------------------------------------------------------------------------- /image/fastlane/Object_Detection/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of YOLOv3 architecture 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | """ 9 | Information about architecture config: 10 | Tuple is structured by (filters, kernel_size, stride) 11 | Every conv is a same convolution. 12 | List is structured by "B" indicating a residual block followed by the number of repeats 13 | "S" is for scale prediction block and computing the yolo loss 14 | "U" is for upsampling the feature map and concatenating with a previous layer 15 | """ 16 | config = [ 17 | (32, 3, 1), 18 | (64, 3, 2), 19 | ["B", 1], 20 | (128, 3, 2), 21 | ["B", 2], 22 | (256, 3, 2), 23 | ["B", 8], 24 | (512, 3, 2), 25 | ["B", 8], 26 | (1024, 3, 2), 27 | ["B", 4], # To this point is Darknet-53 28 | (512, 1, 1), 29 | (1024, 3, 1), 30 | "S", 31 | (256, 1, 1), 32 | "U", 33 | (256, 1, 1), 34 | (512, 3, 1), 35 | "S", 36 | (128, 1, 1), 37 | "U", 38 | (128, 1, 1), 39 | (256, 3, 1), 40 | "S", 41 | ] 42 | 43 | 44 | class CNNBlock(nn.Module): 45 | def __init__(self, in_channels, out_channels, bn_act=True, **kwargs): 46 | super().__init__() 47 | self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs) 48 | self.bn = nn.BatchNorm2d(out_channels) 49 | self.leaky = nn.LeakyReLU(0.1) 50 | self.use_bn_act = bn_act 51 | 52 | def forward(self, x): 53 | if self.use_bn_act: 54 | return self.leaky(self.bn(self.conv(x))) 55 | else: 56 | return self.conv(x) 57 | 58 | 59 | class ResidualBlock(nn.Module): 60 | def __init__(self, channels, use_residual=True, num_repeats=1): 61 | super().__init__() 62 | self.layers = nn.ModuleList() 63 | for repeat in range(num_repeats): 64 | self.layers += [ 65 | nn.Sequential( 66 | CNNBlock(channels, channels // 2, kernel_size=1), 67 | CNNBlock(channels // 2, channels, kernel_size=3, padding=1), 68 | ) 69 | ] 70 | 71 | self.use_residual = use_residual 72 | self.num_repeats = num_repeats 73 | 74 | def forward(self, x): 75 | for layer in self.layers: 76 | if self.use_residual: 77 | x = x + layer(x) 78 | else: 79 | x = layer(x) 80 | 81 | return x 82 | 83 | 84 | class ScalePrediction(nn.Module): 85 | def __init__(self, in_channels, num_classes): 86 | super().__init__() 87 | self.pred = nn.Sequential( 88 | CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1), 89 | CNNBlock( 90 | 2 * in_channels, (num_classes + 5) * 3, bn_act=False, kernel_size=1 91 | ), 92 | ) 93 | self.num_classes = num_classes 94 | 95 | def forward(self, x): 96 | return ( 97 | self.pred(x) 98 | .reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3]) 99 | .permute(0, 1, 3, 4, 2) 100 | ) 101 | 102 | 103 | class YOLOv3(nn.Module): 104 | def __init__(self, in_channels=3, num_classes=80): 105 | super().__init__() 106 | self.num_classes = num_classes 107 | self.in_channels = in_channels 108 | self.layers = self._create_conv_layers() 109 | 110 | def forward(self, x): 111 | outputs = [] # for each scale 112 | route_connections = [] 113 | for layer in self.layers: 114 | if isinstance(layer, ScalePrediction): 115 | outputs.append(layer(x)) 116 | continue 117 | 118 | x = layer(x) 119 | 120 | if isinstance(layer, ResidualBlock) and layer.num_repeats == 8: 121 | route_connections.append(x) 122 | 123 | elif isinstance(layer, nn.Upsample): 124 | x = torch.cat([x, route_connections[-1]], dim=1) 125 | route_connections.pop() 126 | 127 | return outputs 128 | 129 | def _create_conv_layers(self): 130 | layers = nn.ModuleList() 131 | in_channels = self.in_channels 132 | 133 | for module in config: 134 | if isinstance(module, tuple): 135 | out_channels, kernel_size, stride = module 136 | layers.append( 137 | CNNBlock( 138 | in_channels, 139 | out_channels, 140 | kernel_size=kernel_size, 141 | stride=stride, 142 | padding=1 if kernel_size == 3 else 0, 143 | ) 144 | ) 145 | in_channels = out_channels 146 | 147 | elif isinstance(module, list): 148 | num_repeats = module[1] 149 | layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,)) 150 | 151 | elif isinstance(module, str): 152 | if module == "S": 153 | layers += [ 154 | ResidualBlock(in_channels, use_residual=False, num_repeats=1), 155 | CNNBlock(in_channels, in_channels // 2, kernel_size=1), 156 | ScalePrediction(in_channels // 2, num_classes=self.num_classes), 157 | ] 158 | in_channels = in_channels // 2 159 | 160 | elif module == "U": 161 | layers.append(nn.Upsample(scale_factor=2),) 162 | in_channels = in_channels * 3 163 | 164 | return layers 165 | 166 | 167 | if __name__ == "__main__": 168 | num_classes = 20 169 | IMAGE_SIZE = 416 170 | model = YOLOv3(num_classes=num_classes) 171 | x = torch.randn((2, 3, IMAGE_SIZE, IMAGE_SIZE)) 172 | out = model(x) 173 | assert model(x)[0].shape == (2, 3, IMAGE_SIZE//32, IMAGE_SIZE//32, num_classes + 5) 174 | assert model(x)[1].shape == (2, 3, IMAGE_SIZE//16, IMAGE_SIZE//16, num_classes + 5) 175 | assert model(x)[2].shape == (2, 3, IMAGE_SIZE//8, IMAGE_SIZE//8, num_classes + 5) 176 | print("Success!") 177 | -------------------------------------------------------------------------------- /image/fastlane/Object_Detection/skynews-boeing-737-plane_5435020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/Object_Detection/skynews-boeing-737-plane_5435020.jpg -------------------------------------------------------------------------------- /image/fastlane/README.md: -------------------------------------------------------------------------------- 1 | # Vector Fastlane 2 | 3 | You need to have conda installed on you machine. Follow these intructions 4 | 5 | ``` 6 | conda create -n pytorch181 python=3.9 7 | conda activate pytorch181 8 | conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch 9 | pip install albumentations scikit-learn scikit-image matplotlib opencv-python yacs joblib natsort h5py tqdm 10 | pip install gdown addict future pyyaml requests scipy yapf editdistance pyclipper pandas==1.4.0 shapely==2.0.1 11 | ``` 12 | 13 | You can download the datasets and pretrained weights from this [link](https://drive.google.com/drive/folders/1qqK1uQsgkj0MT7yOhx33mTRlISy27QCA?usp=share_link). 14 | -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/GallbladderFiles/NOGO1_319 via_project_14May2021_13h54m.json: -------------------------------------------------------------------------------- 1 | {"_via_settings":{"ui":{"annotation_editor_height":25,"annotation_editor_fontsize":0.8,"leftsidebar_width":18,"image_grid":{"img_height":80,"rshape_fill":"none","rshape_fill_opacity":0.3,"rshape_stroke":"yellow","rshape_stroke_width":2,"show_region_shape":true,"show_image_policy":"all"},"image":{"region_label":"__via_region_id__","region_color":"__via_default_region_color__","region_label_font":"10px Sans","on_image_annotation_editor_placement":"NEAR_REGION"}},"core":{"buffer_size":18,"filepath":{},"default_filepath":""},"project":{"name":"NOGO1_319 via_project_14May2021_13h54m"}},"_via_img_metadata":{"frame_317_endo.png426401":{"filename":"frame_317_endo.png","size":426401,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[386,412,435,443,436,405,394],"all_points_y":[384,388,385,379,348,356,364]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[494,545,592,625,623,626,675,705,726,761,783,727,416,443,448,447,463,498],"all_points_y":[241,274,284,294,322,332,332,339,345,358,357,475,471,409,381,367,359,287]},"region_attributes":{}}],"file_attributes":{}},"frame_318_endo.png446373":{"filename":"frame_318_endo.png","size":446373,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[386,412,435,443,436,405,394],"all_points_y":[377,381,378,372,341,349,357]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[494,545,618,625,623,626,673,705,726,761,783,727,416,443,448,447,463,498],"all_points_y":[241,274,291,294,322,332,327,339,345,358,357,475,471,409,381,367,359,287]},"region_attributes":{}}],"file_attributes":{}},"frame_319_endo.png429032":{"filename":"frame_319_endo.png","size":429032,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[382,408,431,439,432,401,390],"all_points_y":[375,379,376,370,339,347,355]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[492,543,584,623,621,624,671,703,724,759,781,725,414,441,446,445,461,496],"all_points_y":[242,275,280,295,323,333,328,340,346,359,358,476,472,410,382,368,360,288]},"region_attributes":{}}],"file_attributes":{}},"frame_311_endo.png432467":{"filename":"frame_311_endo.png","size":432467,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[486,520,561,589,613,610,616,667,699,721,771,722,403,429,436,433,457,480],"all_points_y":[245,272,286,289,301,323,342,340,345,355,373,477,477,421,385,372,357,315]},"region_attributes":{}}],"file_attributes":{}},"frame_312_endo.png443219":{"filename":"frame_312_endo.png","size":443219,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[488,522,563,591,615,612,618,669,701,723,773,724,405,431,438,435,459,482],"all_points_y":[245,272,286,289,301,323,342,340,345,355,373,477,477,421,385,372,357,315]},"region_attributes":{}}],"file_attributes":{}},"frame_313_endo.png425114":{"filename":"frame_313_endo.png","size":425114,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[489,524,565,593,617,614,620,671,703,725,775,726,407,433,441,437,461,484],"all_points_y":[252,273,287,290,302,324,343,341,346,356,374,478,478,422,399,373,358,316]},"region_attributes":{}}],"file_attributes":{}},"frame_314_endo.png438937":{"filename":"frame_314_endo.png","size":438937,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[491,526,567,595,619,616,622,673,705,727,777,728,409,435,443,439,463,486],"all_points_y":[254,275,289,292,304,326,345,343,348,358,376,480,480,424,401,375,360,318]},"region_attributes":{}}],"file_attributes":{}},"frame_315_endo.png391587":{"filename":"frame_315_endo.png","size":391587,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[488,539,586,619,617,620,669,699,720,755,777,724,409,437,442,441,457,492],"all_points_y":[248,281,291,301,329,339,339,346,352,365,364,475,473,416,388,374,366,294]},"region_attributes":{}}],"file_attributes":{}},"frame_316_endo.png438069":{"filename":"frame_316_endo.png","size":438069,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[490,541,588,621,619,622,671,701,722,757,779,723,412,439,444,443,459,494],"all_points_y":[243,276,286,296,324,334,334,341,347,360,359,477,473,411,383,369,361,289]},"region_attributes":{}}],"file_attributes":{}}},"_via_attributes":{"region":{},"file":{}},"_via_data_format_version":"2.0.10","_via_image_id_list":["frame_317_endo.png426401","frame_318_endo.png446373","frame_319_endo.png429032","frame_311_endo.png432467","frame_312_endo.png443219","frame_313_endo.png425114","frame_314_endo.png438937","frame_315_endo.png391587","frame_316_endo.png438069"]} -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Gallbladder Segmentation 2 | 3 | To work with this project, it is a prerequisite to install detectron2 and it's dependencies. The instructions for this is in the website https://github.com/facebookresearch/detectron2 4 | 5 | ## Training for classes for personal project in detectron2: 6 | 7 | Detectron2 has a prespecified workflow for common machine learning datasets such as COCO, Pascal VOC, and cityscapes. There are also arrangements for the tasks that can be performed within these datasets such as object detection, and the different types of segmentation (see "detectron2/configs/" folder). However, there are some additions required for the sake of using detectron2 in custom projects and external datasets. In our case, we are trying to use detectron2 to detect the No-Go-Zone in a laparoscopic surgery. 8 | 9 | To enable this, we first had to register the dataset under MetadataCatalog and DatasetCatalog. We need the dataset to be in a specific list-of-dictionaries format (keys=filename, imageId, height, width, annotations). Next, we simply had to call the DatasetCatalog and MetadataCatalog objects to register the training and evaluation parts of the dataset and the classes within it. A tutorial for this can be found in the official collaboratory page for detectron2 which is in "https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5" in the "Train on a custom dataset" section. 10 | 11 | Also, to perform periodic evaluation during training, we followed the recommendations given in the build_evaluator method from the defaults.py module. We added a class called MyTrainer which inherits DefaultTrainer from the detectron2.engine location. Another addition we had to make was the LossEvalHook class which inherits the HookBase class from the detectron2.engine.hooks location. This enables us to register our own events on which the evaluation steps should automatically take place during training. 12 | 13 | ## Changes made to default demo workflow: 14 | 15 | Another change required was for creating an output video. In the VisualizationDemo class of predictor.py, we have to make sure the metadata it picks up is from our dataset, so we had to use the line 16 | 17 | `self.metadata = MetadataCatalog.get("bladder_val")` 18 | 19 | instead of the old line used to set the self.metadata variable in the __init__ part. To make the colour the same for all the frames, we had to add a line in video_visualizer.py which hard-codes the colour by making a list repeating the same RGB code. For example, we added "colors=[[0,0.502,0.502]]*10" after the line where it gets the colour. 20 | 21 | For video output smoothing, we added some lines of code where it sums up the area of the segment predictions over an interval and outputs it to the video so the predictions look stable for that amount of frames. These changes were made in video_visualizer.py and predictor.py. These changes mainly included adding a buffer value to choose the interval over the averaging of prediction mask area, a way to retain the masks until the buffer criteria is met, and a signal to the draw_instance_predictions method in video_visualizer.py. 22 | 23 | ## Instructions to run training and inference: 24 | 25 | To run the training, there is an slrm file called runt4v1Detectron.slrm. Essentially that file runs the command: 26 | 27 | ```python 28 | python DetectronGBScript.py 29 | --wd 30 | --ims 31 | --lr 32 | --e 33 | --roi 34 | --d 35 | ``` 36 | 37 | Example: 38 | 39 | 40 | ```python 41 | python DetectronGBScript.py 42 | --wd 0.0001 43 | --ims 8 44 | --lr 0.00001 45 | --e 30 46 | --roi 512 47 | --d 'detectron2/output/0.0001_8_0.00001_30/' 48 | ``` 49 | 50 | Also, the command in the jupyter notebook that we used from the root directory to run the inference on video and save the output is in the format: 51 | 52 | ```python 53 | %run detectron2/demo/demo.py 54 | --config-file 55 | --video-input 56 | --confidence-threshold 57 | --output 58 | --opts MODEL.WEIGHTS MODEL.ROI_HEADS.NUM_CLASSES 59 | ``` 60 | 61 | Example: 62 | 63 | ```python 64 | %run detectron2/demo/demo.py 65 | --config-file detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml 66 | --video-input video64.mp4 67 | --confidence-threshold 0.7 68 | --output video-outputBigger2.mkv 69 | --opts MODEL.WEIGHTS './output/model_final-Confident20210818.pth' MODEL.ROI_HEADS.NUM_CLASSES 1 70 | ``` 71 | 72 | ## Step By Step Tutorial: 73 | 74 | For a step by step notebook tutorial, go to Detectron2StepByStep.ipynb in this folder and run the notebook cells 75 | 76 | ## Set Up Instructions For Step-By-Step tutorial: 77 | 78 | To run the step-by-step training, 79 | - Put the JSON annotation files in the "/GallbladderFiles/" folder. 80 | - The images from the JSON file annotations need to be in the exact location as in the JSON file with respect to the root of this project. For example, there are 2 folders where the images from the JSON file are specified, "archive", and "CTC_Frames". 81 | - The segmentation masks used for training also have to be stored in the "/gallBladder/masks/" folder 82 | - Further description of the steps are in the notebook itself. 83 | 84 | -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/_launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 1 4 | #SBATCH --gres=gpu:1 5 | #SBATCH -p p100 6 | #SBATCH --cpus-per-task=2 7 | #SBATCH --time=180:00:00 8 | #SBATCH --mem=12GB 9 | #SBATCH --job-name=gabgab 10 | #SBATCH --output=gabgab_job_%j.out 11 | 12 | . /etc/profile.d/lmod.sh 13 | . grandproj.env 14 | module use /pkgs/environment-modules/ 15 | module load pytorch1.7.1-cuda10.2-python3.6 16 | /h/skhalid/cv_vector/_runner.sh 17 | #(while true; do nvidia-smi; top -b -n 1 | head -20; sleep 10; done) & 18 | #python /h/skhalid/pytorch.py 19 | #wait 20 | -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/_runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | jupyter nbconvert dataLoaderGBPract.ipynb --to python 4 | ipython ./dataLoaderGBPract.py 5 | -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/grandproj.env: -------------------------------------------------------------------------------- 1 | export PATH=/pkgs/anaconda3/bin:$PATH 2 | export PYTHONPATH=/h/$USER/grandproj:$PYTHONPATH 3 | -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/jsonOutput/bladder_val_coco_format.json.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Galbladder_Segmentation/jsonOutput/bladder_val_coco_format.json.lock -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/jsonOutput/instances_predictions.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Galbladder_Segmentation/jsonOutput/instances_predictions.pth -------------------------------------------------------------------------------- /video/Galbladder_Segmentation/runt4v1Detectron.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # The lines that start with #SBATCH are read by slurm to set up the job 4 | # any #SBATCH argument after the first non-empty/non-comment line will be ignored 5 | 6 | #SBATCH --job-name=abc123 7 | # Change this for a different type of GPU 8 | #SBATCH --partition=t4v1 9 | 10 | # Change this for a different quality of service (priority) 11 | #SBATCH --qos=normal 12 | 13 | # Change this for request different number of CPUs/GPU/Memory, they must fit on a single node 14 | #SBATCH --cpus-per-task=2 15 | #SBATCH --gres=gpu:1 16 | #SBATCH --mem=8G 17 | 18 | # stdout/err are directed to file, these two arguments speficify where they should go, %j is a formatter for the job id 19 | #SBATCH --output=./%j_testJob.out 20 | #SBATCH --error=./%j_testJob.err 21 | 22 | # Set the file mode to append, otherwise preemption resets the file and the previous output will be overwritten 23 | #SBATCH --open-mode=append 24 | 25 | 26 | if [ -z "$SLURM_JOB_ID" ] 27 | then 28 | echo ------------- FAILED ---------------- 29 | echo \$SLURM_JOB_ID is empty, did you launch the script with "sbatch" ? 30 | exit 31 | else 32 | echo Job $SLRUM_JOB_ID is running 33 | fi 34 | 35 | 36 | module load vector_cv_project 37 | hostname 38 | which python 39 | nvidia-smi 40 | 41 | echo "This goes to stderr" 1>&2 42 | 43 | wd=0.0001 44 | ims=8 45 | lr=0.00001 46 | e=30 47 | roi=512 48 | #d='lr2e_5_ims_4_wd_1e_5' 49 | 50 | touch $SLURM_JOB_ID'_'$wd'_'$ims'_'$lr'_'$e'_.txt' 51 | 52 | python DetectronGBScript.py --wd $wd --ims $ims --lr $lr --e $e --roi $roi --d 'detectron2/output/'$wd'_'$ims'_'$lr'_'$e'/' 53 | 54 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # trained models 34 | *.pth 35 | 36 | # log files 37 | *.log 38 | 39 | # text files 40 | *.txt 41 | 42 | # Some folders 43 | backup/ 44 | ucf_detections/ 45 | jhmdb_detections/ 46 | dota_detections/ 47 | weights/ 48 | vector_cv_tools 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # SageMath parsed files 93 | *.sage.py 94 | 95 | # Environments 96 | .env 97 | .venv 98 | env/ 99 | venv/ 100 | ENV/ 101 | env.bak/ 102 | venv.bak/ 103 | 104 | # Spyder project settings 105 | .spyderproject 106 | .spyproject 107 | 108 | # Rope project settings 109 | .ropeproject 110 | 111 | # mkdocs documentation 112 | /site 113 | 114 | # mypy 115 | .mypy_cache/ 116 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/webServers.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/.idea/yowo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/README.md: -------------------------------------------------------------------------------- 1 | # Vector CV Project - Two Stream Traffic Accident Detection with YOWO 2 | 3 | In this project, we use ***YOWO*** (***Y**ou **O**nly **W**atch **O**nce*), a unified CNN architecture designed for real-time spatiotemporal action localization, to detect traffic accidents in video. The codebase is built off of the [YOWO PyTorch Official Repository](https://github.com/wei-tim/YOWO). 4 | 5 | The repository contains PyTorch code for accident detection on the DoTA dataset and spatiotemporal action localization AVA, UCF101-24 and JHMDB datasets! 6 | 7 | Please see the [Computer Vision Project Report - (https://vectorinstitute.ai/wp-content/uploads/2022/05/computer_vision_project_report_may252022.pdf]() for more details on the project and method. 8 | 9 | ## Installation 10 | 11 | ### Datasets 12 | 13 | * DOTA : Download from [here](https://github.com/MoonBlvd/Detection-of-Traffic-Anomaly) 14 | * AVA : Download from [here](https://github.com/cvdfoundation/ava-dataset) 15 | * UCF101-24: Download from [here](https://drive.google.com/file/d/1o2l6nYhd-0DDXGP-IPReBP4y1ffVmGSE/view?usp=sharing) 16 | * J-HMDB-21: Download from [here](http://jhmdb.is.tue.mpg.de/challenge/JHMDB/datasets) 17 | 18 | Use instructions [here](https://github.com/facebookresearch/SlowFast/blob/master/slowfast/datasets/DATASET.md) for the preparation of AVA dataset. 19 | 20 | Modify the paths in ucf24.data and jhmdb21.data under cfg directory accordingly. 21 | Download the dataset annotations from [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0). 22 | 23 | ### Download backbone pretrained weights 24 | 25 | * Darknet-19 weights can be downloaded via: 26 | ```bash 27 | wget http://pjreddie.com/media/files/yolo.weights 28 | ``` 29 | 30 | * ResNet pretrained models can be downloaded from [here](https://drive.google.com/drive/folders/1zvl89AgFAApbH0At-gMuZSeQB_LpNP-M?usp=sharing). 31 | * For resource efficient 3D CNN architectures (ShuffleNet, ShuffleNetv2, MobileNet, MobileNetv2), pretrained models can be downloaded from [here](https://github.com/okankop/Efficient-3DCNNs). 32 | 33 | ### Pretrained YOWO models 34 | 35 | Pretrained models for UCF101-24 and J-HMDB-21 datasets can be downloaded from [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0). 36 | Pretrained models for AVA dataset can be downloaded from [here](https://drive.google.com/drive/folders/1g-jTfxCV9_uNFr61pjo4VxNfgDlbWLlb?usp=sharing). 37 | 38 | All materials (annotations and pretrained models) are also available in Baiduyun Disk: 39 | [here](https://pan.baidu.com/s/1yaOYqzcEx96z9gAkOhMnvQ) with password 95mm 40 | 41 | ## Running the code 42 | 43 | * All training configurations are given in cfg/dota_train.yaml cfg/ava.yaml cfg/ucf24.yaml, and cfg/jhmdb.yaml files. 44 | * DoTA training: 45 | ```bash 46 | python main_dota.py --cfg cfg/dota_train.yaml 47 | ``` 48 | * AVA training: 49 | ```bash 50 | python main.py --cfg cfg/ava.yaml 51 | ``` 52 | * UCF101-24 training: 53 | ```bash 54 | python main.py --cfg cfg/ucf24.yaml 55 | ``` 56 | * J-HMDB-21 training: 57 | ```bash 58 | python main.py --cfg cfg/jhmdb.yaml 59 | ``` 60 | 61 | ## Validating the model 62 | 63 | * For AVA dataset, after each epoch, validation is performed and frame-mAP score is provided. 64 | 65 | * Note that calculating frame-mAP with DoTA is not currently implemented and precision and recall from the validation epoch are used as the main evaluation metrics. 66 | 67 | * For DoTA, UCF101-24 and J-HMDB-21 datasets, after each validation, frame detections is recorded under 'dota_detections', 'jhmdb_detections' or 'ucf_detections'. From [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0), 'groundtruths_jhmdb.zip' and 'groundtruths_jhmdb.zip' should be downloaded and extracted to "evaluation/Object-Detection-Metrics". Then, run the following command to calculate frame_mAP. 68 | 69 | ```bash 70 | python evaluation/Object-Detection-Metrics/pascalvoc.py --gtfolder PATH-TO-GROUNDTRUTHS-FOLDER --detfolder PATH-TO-DETECTIONS-FOLDER 71 | 72 | ``` 73 | 74 | * For video_mAP, set the pretrained model in the correct yaml file and run: 75 | ```bash 76 | python video_mAP.py --cfg cfg/ucf24.yaml 77 | ``` 78 | 79 | ## Running on a test video 80 | 81 | * You can run AVA pretrained model on any test video with the following code: 82 | ```bash 83 | python test_video_ava.py --cfg cfg/ava.yaml 84 | ``` 85 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Gongfan Fang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc import VOCSegmentation 2 | from .cityscapes import Cityscapes -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import hashlib 4 | import errno 5 | from tqdm import tqdm 6 | 7 | 8 | def gen_bar_updater(pbar): 9 | def bar_update(count, block_size, total_size): 10 | if pbar.total is None and total_size: 11 | pbar.total = total_size 12 | progress_bytes = count * block_size 13 | pbar.update(progress_bytes - pbar.n) 14 | 15 | return bar_update 16 | 17 | 18 | def check_integrity(fpath, md5=None): 19 | if md5 is None: 20 | return True 21 | if not os.path.isfile(fpath): 22 | return False 23 | md5o = hashlib.md5() 24 | with open(fpath, 'rb') as f: 25 | # read in 1MB chunks 26 | for chunk in iter(lambda: f.read(1024 * 1024), b''): 27 | md5o.update(chunk) 28 | md5c = md5o.hexdigest() 29 | if md5c != md5: 30 | return False 31 | return True 32 | 33 | 34 | def makedir_exist_ok(dirpath): 35 | """ 36 | Python2 support for os.makedirs(.., exist_ok=True) 37 | """ 38 | try: 39 | os.makedirs(dirpath) 40 | except OSError as e: 41 | if e.errno == errno.EEXIST: 42 | pass 43 | else: 44 | raise 45 | 46 | 47 | def download_url(url, root, filename=None, md5=None): 48 | """Download a file from a url and place it in root. 49 | Args: 50 | url (str): URL to download file from 51 | root (str): Directory to place downloaded file in 52 | filename (str): Name to save the file under. If None, use the basename of the URL 53 | md5 (str): MD5 checksum of the download. If None, do not check 54 | """ 55 | from six.moves import urllib 56 | 57 | root = os.path.expanduser(root) 58 | if not filename: 59 | filename = os.path.basename(url) 60 | fpath = os.path.join(root, filename) 61 | 62 | makedir_exist_ok(root) 63 | 64 | # downloads file 65 | if os.path.isfile(fpath) and check_integrity(fpath, md5): 66 | print('Using downloaded and verified file: ' + fpath) 67 | else: 68 | try: 69 | print('Downloading ' + url + ' to ' + fpath) 70 | urllib.request.urlretrieve( 71 | url, fpath, 72 | reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True)) 73 | ) 74 | except OSError: 75 | if url[:5] == 'https': 76 | url = url.replace('https:', 'http:') 77 | print('Failed download. Trying https -> http instead.' 78 | ' Downloading ' + url + ' to ' + fpath) 79 | urllib.request.urlretrieve( 80 | url, fpath, 81 | reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True)) 82 | ) 83 | 84 | 85 | def list_dir(root, prefix=False): 86 | """List all directories at a given root 87 | Args: 88 | root (str): Path to directory whose folders need to be listed 89 | prefix (bool, optional): If true, prepends the path to each result, otherwise 90 | only returns the name of the directories found 91 | """ 92 | root = os.path.expanduser(root) 93 | directories = list( 94 | filter( 95 | lambda p: os.path.isdir(os.path.join(root, p)), 96 | os.listdir(root) 97 | ) 98 | ) 99 | 100 | if prefix is True: 101 | directories = [os.path.join(root, d) for d in directories] 102 | 103 | return directories 104 | 105 | 106 | def list_files(root, suffix, prefix=False): 107 | """List all files ending with a suffix at a given root 108 | Args: 109 | root (str): Path to directory whose folders need to be listed 110 | suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png'). 111 | It uses the Python "str.endswith" method and is passed directly 112 | prefix (bool, optional): If true, prepends the path to each result, otherwise 113 | only returns the name of the files found 114 | """ 115 | root = os.path.expanduser(root) 116 | files = list( 117 | filter( 118 | lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix), 119 | os.listdir(root) 120 | ) 121 | ) 122 | 123 | if prefix is True: 124 | files = [os.path.join(root, d) for d in files] 125 | 126 | return files -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .stream_metrics import StreamSegMetrics, AverageMeter 2 | 3 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/metrics/stream_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import confusion_matrix 3 | 4 | class _StreamMetrics(object): 5 | def __init__(self): 6 | """ Overridden by subclasses """ 7 | raise NotImplementedError() 8 | 9 | def update(self, gt, pred): 10 | """ Overridden by subclasses """ 11 | raise NotImplementedError() 12 | 13 | def get_results(self): 14 | """ Overridden by subclasses """ 15 | raise NotImplementedError() 16 | 17 | def to_str(self, metrics): 18 | """ Overridden by subclasses """ 19 | raise NotImplementedError() 20 | 21 | def reset(self): 22 | """ Overridden by subclasses """ 23 | raise NotImplementedError() 24 | 25 | class StreamSegMetrics(_StreamMetrics): 26 | """ 27 | Stream Metrics for Semantic Segmentation Task 28 | """ 29 | def __init__(self, n_classes): 30 | self.n_classes = n_classes 31 | self.confusion_matrix = np.zeros((n_classes, n_classes)) 32 | 33 | def update(self, label_trues, label_preds): 34 | for lt, lp in zip(label_trues, label_preds): 35 | self.confusion_matrix += self._fast_hist( lt.flatten(), lp.flatten() ) 36 | 37 | @staticmethod 38 | def to_str(results): 39 | string = "\n" 40 | for k, v in results.items(): 41 | if k!="Class IoU": 42 | string += "%s: %f\n"%(k, v) 43 | 44 | #string+='Class IoU:\n' 45 | #for k, v in results['Class IoU'].items(): 46 | # string += "\tclass %d: %f\n"%(k, v) 47 | return string 48 | 49 | def _fast_hist(self, label_true, label_pred): 50 | mask = (label_true >= 0) & (label_true < self.n_classes) 51 | hist = np.bincount( 52 | self.n_classes * label_true[mask].astype(int) + label_pred[mask], 53 | minlength=self.n_classes ** 2, 54 | ).reshape(self.n_classes, self.n_classes) 55 | return hist 56 | 57 | def get_results(self): 58 | """Returns accuracy score evaluation result. 59 | - overall accuracy 60 | - mean accuracy 61 | - mean IU 62 | - fwavacc 63 | """ 64 | hist = self.confusion_matrix 65 | acc = np.diag(hist).sum() / hist.sum() 66 | acc_cls = np.diag(hist) / hist.sum(axis=1) 67 | acc_cls = np.nanmean(acc_cls) 68 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 69 | mean_iu = np.nanmean(iu) 70 | freq = hist.sum(axis=1) / hist.sum() 71 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 72 | cls_iu = dict(zip(range(self.n_classes), iu)) 73 | 74 | return { 75 | "Overall Acc": acc, 76 | "Mean Acc": acc_cls, 77 | "FreqW Acc": fwavacc, 78 | "Mean IoU": mean_iu, 79 | "Class IoU": cls_iu, 80 | } 81 | 82 | def reset(self): 83 | self.confusion_matrix = np.zeros((self.n_classes, self.n_classes)) 84 | 85 | class AverageMeter(object): 86 | """Computes average values""" 87 | def __init__(self): 88 | self.book = dict() 89 | 90 | def reset_all(self): 91 | self.book.clear() 92 | 93 | def reset(self, id): 94 | item = self.book.get(id, None) 95 | if item is not None: 96 | item[0] = 0 97 | item[1] = 0 98 | 99 | def update(self, id, val): 100 | record = self.book.get(id, None) 101 | if record is None: 102 | self.book[id] = [val, 1] 103 | else: 104 | record[0]+=val 105 | record[1]+=1 106 | 107 | def get_results(self, id): 108 | record = self.book.get(id, None) 109 | assert record is not None 110 | return record[0] / record[1] 111 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling import * 2 | from ._deeplab import convert_to_separable_conv -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from . import resnet 2 | from . import mobilenetv2 3 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | from collections import OrderedDict 6 | 7 | class _SimpleSegmentationModel(nn.Module): 8 | def __init__(self, backbone, classifier): 9 | super(_SimpleSegmentationModel, self).__init__() 10 | self.backbone = backbone 11 | self.classifier = classifier 12 | 13 | def forward(self, x): 14 | input_shape = x.shape[-2:] 15 | features = self.backbone(x) 16 | x = self.classifier(features) 17 | x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) 18 | return x 19 | 20 | 21 | class IntermediateLayerGetter(nn.ModuleDict): 22 | """ 23 | Module wrapper that returns intermediate layers from a model 24 | 25 | It has a strong assumption that the modules have been registered 26 | into the model in the same order as they are used. 27 | This means that one should **not** reuse the same nn.Module 28 | twice in the forward if you want this to work. 29 | 30 | Additionally, it is only able to query submodules that are directly 31 | assigned to the model. So if `model` is passed, `model.feature1` can 32 | be returned, but not `model.feature1.layer2`. 33 | 34 | Arguments: 35 | model (nn.Module): model on which we will extract the features 36 | return_layers (Dict[name, new_name]): a dict containing the names 37 | of the modules for which the activations will be returned as 38 | the key of the dict, and the value of the dict is the name 39 | of the returned activation (which the user can specify). 40 | 41 | Examples:: 42 | 43 | >>> m = torchvision.models.resnet18(pretrained=True) 44 | >>> # extract layer1 and layer3, giving as names `feat1` and feat2` 45 | >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m, 46 | >>> {'layer1': 'feat1', 'layer3': 'feat2'}) 47 | >>> out = new_m(torch.rand(1, 3, 224, 224)) 48 | >>> print([(k, v.shape) for k, v in out.items()]) 49 | >>> [('feat1', torch.Size([1, 64, 56, 56])), 50 | >>> ('feat2', torch.Size([1, 256, 14, 14]))] 51 | """ 52 | def __init__(self, model, return_layers): 53 | if not set(return_layers).issubset([name for name, _ in model.named_children()]): 54 | raise ValueError("return_layers are not present in model") 55 | 56 | orig_return_layers = return_layers 57 | return_layers = {k: v for k, v in return_layers.items()} 58 | layers = OrderedDict() 59 | for name, module in model.named_children(): 60 | layers[name] = module 61 | if name in return_layers: 62 | del return_layers[name] 63 | if not return_layers: 64 | break 65 | 66 | super(IntermediateLayerGetter, self).__init__(layers) 67 | self.return_layers = orig_return_layers 68 | 69 | def forward(self, x): 70 | out = OrderedDict() 71 | for name, module in self.named_children(): 72 | x = module(x) 73 | if name in self.return_layers: 74 | out_name = self.return_layers[name] 75 | out[out_name] = x 76 | return out 77 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | from .visualizer import Visualizer 3 | from .scheduler import PolyLR 4 | from .loss import FocalLoss -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | 5 | class FocalLoss(nn.Module): 6 | def __init__(self, alpha=1, gamma=0, size_average=True, ignore_index=255): 7 | super(FocalLoss, self).__init__() 8 | self.alpha = alpha 9 | self.gamma = gamma 10 | self.ignore_index = ignore_index 11 | self.size_average = size_average 12 | 13 | def forward(self, inputs, targets): 14 | ce_loss = F.cross_entropy( 15 | inputs, targets, reduction='none', ignore_index=self.ignore_index) 16 | pt = torch.exp(-ce_loss) 17 | focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss 18 | if self.size_average: 19 | return focal_loss.mean() 20 | else: 21 | return focal_loss.sum() -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/scheduler.py: -------------------------------------------------------------------------------- 1 | from torch.optim.lr_scheduler import _LRScheduler, StepLR 2 | 3 | class PolyLR(_LRScheduler): 4 | def __init__(self, optimizer, max_iters, power=0.9, last_epoch=-1, min_lr=1e-6): 5 | self.power = power 6 | self.max_iters = max_iters # avoid zero lr 7 | self.min_lr = min_lr 8 | super(PolyLR, self).__init__(optimizer, last_epoch) 9 | 10 | def get_lr(self): 11 | return [ max( base_lr * ( 1 - self.last_epoch/self.max_iters )**self.power, self.min_lr) 12 | for base_lr in self.base_lrs] -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/utils.py: -------------------------------------------------------------------------------- 1 | from torchvision.transforms.functional import normalize 2 | import torch.nn as nn 3 | import numpy as np 4 | import os 5 | 6 | def denormalize(tensor, mean, std): 7 | mean = np.array(mean) 8 | std = np.array(std) 9 | 10 | _mean = -mean/std 11 | _std = 1/std 12 | return normalize(tensor, _mean, _std) 13 | 14 | class Denormalize(object): 15 | def __init__(self, mean, std): 16 | mean = np.array(mean) 17 | std = np.array(std) 18 | self._mean = -mean/std 19 | self._std = 1/std 20 | 21 | def __call__(self, tensor): 22 | if isinstance(tensor, np.ndarray): 23 | return (tensor - self._mean.reshape(-1,1,1)) / self._std.reshape(-1,1,1) 24 | return normalize(tensor, self._mean, self._std) 25 | 26 | def set_bn_momentum(model, momentum=0.1): 27 | for m in model.modules(): 28 | if isinstance(m, nn.BatchNorm2d): 29 | m.momentum = momentum 30 | 31 | def fix_bn(model): 32 | for m in model.modules(): 33 | if isinstance(m, nn.BatchNorm2d): 34 | m.eval() 35 | 36 | def mkdir(path): 37 | if not os.path.exists(path): 38 | os.mkdir(path) 39 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/visualizer.py: -------------------------------------------------------------------------------- 1 | from visdom import Visdom 2 | import json 3 | 4 | class Visualizer(object): 5 | """ Visualizer 6 | """ 7 | def __init__(self, port='13579', env='main', id=None): 8 | #self.cur_win = {} 9 | self.vis = Visdom(port=port, env=env) 10 | self.id = id 11 | self.env = env 12 | # Restore 13 | #ori_win = self.vis.get_window_data() 14 | #ori_win = json.loads(ori_win) 15 | #print(ori_win) 16 | #self.cur_win = { v['title']: k for k, v in ori_win.items() } 17 | 18 | def vis_scalar(self, name, x, y, opts=None): 19 | if not isinstance(x, list): 20 | x = [x] 21 | if not isinstance(y, list): 22 | y = [y] 23 | 24 | if self.id is not None: 25 | name = "[%s]"%self.id + name 26 | default_opts = { 'title': name } 27 | if opts is not None: 28 | default_opts.update(opts) 29 | 30 | #win = self.cur_win.get(name, None) 31 | #if win is not None: 32 | self.vis.line( X=x, Y=y, win=name, opts=default_opts, update='append') 33 | #else: 34 | # self.cur_win[name] = self.vis.line( X=x, Y=y, opts=default_opts) 35 | 36 | def vis_image(self, name, img, env=None, opts=None): 37 | """ vis image in visdom 38 | """ 39 | if env is None: 40 | env = self.env 41 | if self.id is not None: 42 | name = "[%s]"%self.id + name 43 | #win = self.cur_win.get(name, None) 44 | default_opts = { 'title': name } 45 | if opts is not None: 46 | default_opts.update(opts) 47 | #if win is not None: 48 | self.vis.image( img=img, win=name, opts=opts, env=env ) 49 | #else: 50 | # self.cur_win[name] = self.vis.image( img=img, opts=default_opts, env=env ) 51 | 52 | def vis_table(self, name, tbl, opts=None): 53 | #win = self.cur_win.get(name, None) 54 | 55 | tbl_str = " " 56 | tbl_str+=" \ 57 | \ 58 | \ 59 | " 60 | for k, v in tbl.items(): 61 | tbl_str+= " \ 62 | \ 63 | \ 64 | "%(k, v) 65 | 66 | tbl_str+="
TermValue
%s%s
" 67 | 68 | default_opts = { 'title': name } 69 | if opts is not None: 70 | default_opts.update(opts) 71 | #if win is not None: 72 | self.vis.text(tbl_str, win=name, opts=default_opts) 73 | #else: 74 | #self.cur_win[name] = self.vis.text(tbl_str, opts=default_opts) 75 | 76 | 77 | if __name__=='__main__': 78 | import numpy as np 79 | vis = Visualizer(port=35588, env='main') 80 | tbl = {"lr": 214, "momentum": 0.9} 81 | vis.vis_table("test_table", tbl) 82 | tbl = {"lr": 244444, "momentum": 0.9, "haha": "hoho"} 83 | vis.vis_table("test_table", tbl) 84 | 85 | vis.vis_scalar(name='loss', x=0, y=1) 86 | vis.vis_scalar(name='loss', x=2, y=4) 87 | vis.vis_scalar(name='loss', x=4, y=6) -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_3d/mobilenet.py: -------------------------------------------------------------------------------- 1 | '''MobileNet in PyTorch. 2 | 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" 4 | for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def conv_bn(inp, oup, stride): 12 | return nn.Sequential( 13 | nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False), 14 | nn.BatchNorm3d(oup), 15 | nn.ReLU(inplace=True) 16 | ) 17 | 18 | 19 | class Block(nn.Module): 20 | '''Depthwise conv + Pointwise conv''' 21 | def __init__(self, in_planes, out_planes, stride=1): 22 | super(Block, self).__init__() 23 | self.conv1 = nn.Conv3d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) 24 | self.bn1 = nn.BatchNorm3d(in_planes) 25 | self.conv2 = nn.Conv3d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 26 | self.bn2 = nn.BatchNorm3d(out_planes) 27 | 28 | def forward(self, x): 29 | out = F.relu(self.bn1(self.conv1(x))) 30 | out = F.relu(self.bn2(self.conv2(out))) 31 | return out 32 | 33 | 34 | class MobileNet(nn.Module): 35 | def __init__(self, width_mult=1.): 36 | super(MobileNet, self).__init__() 37 | 38 | input_channel = 32 39 | last_channel = 1024 40 | input_channel = int(input_channel * width_mult) 41 | last_channel = int(last_channel * width_mult) 42 | cfg = [ 43 | # c, n, s 44 | [64, 1, (2,2,2)], 45 | [128, 2, (2,2,2)], 46 | [256, 2, (2,2,2)], 47 | [512, 6, (2,2,2)], 48 | [1024, 2, (1,1,1)], 49 | ] 50 | 51 | self.features = [conv_bn(3, input_channel, (1,2,2))] 52 | # building inverted residual blocks 53 | for c, n, s in cfg: 54 | output_channel = int(c * width_mult) 55 | for i in range(n): 56 | stride = s if i == 0 else 1 57 | self.features.append(Block(input_channel, output_channel, stride)) 58 | input_channel = output_channel 59 | # make it nn.Sequential 60 | self.features = nn.Sequential(*self.features) 61 | self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1) 62 | 63 | def forward(self, x): 64 | x = self.features(x) 65 | 66 | if x.size(2) == 2: 67 | x = self.avgpool(x) 68 | 69 | return x 70 | 71 | 72 | def get_fine_tuning_parameters(model, ft_portion): 73 | if ft_portion == "complete": 74 | return model.parameters() 75 | 76 | elif ft_portion == "last_layer": 77 | ft_module_names = [] 78 | ft_module_names.append('classifier') 79 | 80 | parameters = [] 81 | for k, v in model.named_parameters(): 82 | for ft_module in ft_module_names: 83 | if ft_module in k: 84 | parameters.append({'params': v}) 85 | break 86 | else: 87 | parameters.append({'params': v, 'lr': 0.0}) 88 | return parameters 89 | 90 | else: 91 | raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected") 92 | 93 | 94 | def get_model(**kwargs): 95 | """ 96 | Returns the model. 97 | """ 98 | model = MobileNet(**kwargs) 99 | return model 100 | 101 | 102 | 103 | if __name__ == '__main__': 104 | model = get_model(width_mult=1.) 105 | model = model.cuda() 106 | model = nn.DataParallel(model, device_ids=None) 107 | print(model) 108 | 109 | input_var = Variable(torch.randn(8, 3, 16, 112, 112)) 110 | output = model(input_var) 111 | print(output.shape) 112 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_3d/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | '''MobilenetV2 in PyTorch. 2 | 3 | See the paper "MobileNetV2: Inverted Residuals and Linear Bottlenecks" for more details. 4 | ''' 5 | import torch 6 | import math 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | 11 | 12 | 13 | 14 | def conv_bn(inp, oup, stride): 15 | return nn.Sequential( 16 | nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False), 17 | nn.BatchNorm3d(oup), 18 | nn.ReLU6(inplace=True) 19 | ) 20 | 21 | 22 | def conv_1x1x1_bn(inp, oup): 23 | return nn.Sequential( 24 | nn.Conv3d(inp, oup, 1, 1, 0, bias=False), 25 | nn.BatchNorm3d(oup), 26 | nn.ReLU6(inplace=True) 27 | ) 28 | 29 | 30 | class InvertedResidual(nn.Module): 31 | def __init__(self, inp, oup, stride, expand_ratio): 32 | super(InvertedResidual, self).__init__() 33 | self.stride = stride 34 | 35 | hidden_dim = round(inp * expand_ratio) 36 | self.use_res_connect = self.stride == (1,1,1) and inp == oup 37 | 38 | if expand_ratio == 1: 39 | self.conv = nn.Sequential( 40 | # dw 41 | nn.Conv3d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 42 | nn.BatchNorm3d(hidden_dim), 43 | nn.ReLU6(inplace=True), 44 | # pw-linear 45 | nn.Conv3d(hidden_dim, oup, 1, 1, 0, bias=False), 46 | nn.BatchNorm3d(oup), 47 | ) 48 | else: 49 | self.conv = nn.Sequential( 50 | # pw 51 | nn.Conv3d(inp, hidden_dim, 1, 1, 0, bias=False), 52 | nn.BatchNorm3d(hidden_dim), 53 | nn.ReLU6(inplace=True), 54 | # dw 55 | nn.Conv3d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 56 | nn.BatchNorm3d(hidden_dim), 57 | nn.ReLU6(inplace=True), 58 | # pw-linear 59 | nn.Conv3d(hidden_dim, oup, 1, 1, 0, bias=False), 60 | nn.BatchNorm3d(oup), 61 | ) 62 | 63 | def forward(self, x): 64 | if self.use_res_connect: 65 | return x + self.conv(x) 66 | else: 67 | return self.conv(x) 68 | 69 | 70 | class MobileNetV2(nn.Module): 71 | def __init__(self, width_mult=1.): 72 | super(MobileNetV2, self).__init__() 73 | block = InvertedResidual 74 | input_channel = 32 75 | last_channel = 1280 76 | interverted_residual_setting = [ 77 | # t, c, n, s 78 | [1, 16, 1, (1,1,1)], 79 | [6, 24, 2, (2,2,2)], 80 | [6, 32, 3, (2,2,2)], 81 | [6, 64, 4, (2,2,2)], 82 | [6, 96, 3, (1,1,1)], 83 | [6, 160, 3, (2,2,2)], 84 | [6, 320, 1, (1,1,1)], 85 | ] 86 | 87 | # building first layer 88 | input_channel = int(input_channel * width_mult) 89 | self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel 90 | self.features = [conv_bn(3, input_channel, (1,2,2))] 91 | # building inverted residual blocks 92 | for t, c, n, s in interverted_residual_setting: 93 | output_channel = int(c * width_mult) 94 | for i in range(n): 95 | stride = s if i == 0 else (1,1,1) 96 | self.features.append(block(input_channel, output_channel, stride, expand_ratio=t)) 97 | input_channel = output_channel 98 | # building last several layers 99 | self.features.append(conv_1x1x1_bn(input_channel, self.last_channel)) 100 | # make it nn.Sequential 101 | self.features = nn.Sequential(*self.features) 102 | self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1) 103 | 104 | self._initialize_weights() 105 | 106 | def forward(self, x): 107 | x = self.features(x) 108 | 109 | if x.size(2) == 2: 110 | x = self.avgpool(x) 111 | 112 | return x 113 | 114 | def _initialize_weights(self): 115 | for m in self.modules(): 116 | if isinstance(m, nn.Conv3d): 117 | n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[2] * m.out_channels 118 | m.weight.data.normal_(0, math.sqrt(2. / n)) 119 | if m.bias is not None: 120 | m.bias.data.zero_() 121 | elif isinstance(m, nn.BatchNorm3d): 122 | m.weight.data.fill_(1) 123 | m.bias.data.zero_() 124 | elif isinstance(m, nn.Linear): 125 | n = m.weight.size(1) 126 | m.weight.data.normal_(0, 0.01) 127 | m.bias.data.zero_() 128 | 129 | 130 | def get_fine_tuning_parameters(model, ft_portion): 131 | if ft_portion == "complete": 132 | return model.parameters() 133 | 134 | elif ft_portion == "last_layer": 135 | ft_module_names = [] 136 | ft_module_names.append('classifier') 137 | 138 | parameters = [] 139 | for k, v in model.named_parameters(): 140 | for ft_module in ft_module_names: 141 | if ft_module in k: 142 | parameters.append({'params': v}) 143 | break 144 | else: 145 | parameters.append({'params': v, 'lr': 0.0}) 146 | return parameters 147 | 148 | else: 149 | raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected") 150 | 151 | 152 | def get_model(**kwargs): 153 | """ 154 | Returns the model. 155 | """ 156 | model = MobileNetV2(**kwargs) 157 | return model 158 | 159 | 160 | if __name__ == "__main__": 161 | model = get_model(width_mult=1.) 162 | model = model.cuda() 163 | model = nn.DataParallel(model, device_ids=None) 164 | print(model) 165 | 166 | 167 | input_var = Variable(torch.randn(8, 3, 16, 112, 112)) 168 | output = model(input_var) 169 | print(output.shape) 170 | 171 | 172 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/backbones_3d/shufflenet.py: -------------------------------------------------------------------------------- 1 | '''ShuffleNet in PyTorch. 2 | 3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | 10 | 11 | def conv_bn(inp, oup, stride): 12 | return nn.Sequential( 13 | nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False), 14 | nn.BatchNorm3d(oup), 15 | nn.ReLU(inplace=True) 16 | ) 17 | 18 | 19 | def channel_shuffle(x, groups): 20 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 21 | batchsize, num_channels, depth, height, width = x.data.size() 22 | channels_per_group = num_channels // groups 23 | # reshape 24 | x = x.view(batchsize, groups, 25 | channels_per_group, depth, height, width) 26 | #permute 27 | x = x.permute(0,2,1,3,4,5).contiguous() 28 | # flatten 29 | x = x.view(batchsize, num_channels, depth, height, width) 30 | return x 31 | 32 | 33 | 34 | class Bottleneck(nn.Module): 35 | def __init__(self, in_planes, out_planes, stride, groups): 36 | super(Bottleneck, self).__init__() 37 | self.stride = stride 38 | self.groups = groups 39 | mid_planes = out_planes//4 40 | if self.stride == 2: 41 | out_planes = out_planes - in_planes 42 | g = 1 if in_planes==24 else groups 43 | self.conv1 = nn.Conv3d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) 44 | self.bn1 = nn.BatchNorm3d(mid_planes) 45 | self.conv2 = nn.Conv3d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) 46 | self.bn2 = nn.BatchNorm3d(mid_planes) 47 | self.conv3 = nn.Conv3d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) 48 | self.bn3 = nn.BatchNorm3d(out_planes) 49 | self.relu = nn.ReLU(inplace=True) 50 | 51 | if stride == 2: 52 | self.shortcut = nn.AvgPool3d(kernel_size=(2,3,3), stride=2, padding=(0,1,1)) 53 | 54 | 55 | def forward(self, x): 56 | out = self.relu(self.bn1(self.conv1(x))) 57 | out = channel_shuffle(out, self.groups) 58 | out = self.bn2(self.conv2(out)) 59 | out = self.bn3(self.conv3(out)) 60 | 61 | if self.stride == 2: 62 | a = self.shortcut(x) 63 | out = self.relu(torch.cat([out, self.shortcut(x)], 1)) 64 | else: 65 | out = self.relu(out + x) 66 | 67 | return out 68 | 69 | 70 | class ShuffleNet(nn.Module): 71 | def __init__(self, 72 | groups, 73 | width_mult=1, 74 | num_classes=400): 75 | super(ShuffleNet, self).__init__() 76 | self.num_classes = num_classes 77 | self.groups = groups 78 | num_blocks = [4,8,4] 79 | 80 | # index 0 is invalid and should never be called. 81 | # only used for indexing convenience. 82 | if groups == 1: 83 | out_planes = [24, 144, 288, 567] 84 | elif groups == 2: 85 | out_planes = [24, 200, 400, 800] 86 | elif groups == 3: 87 | out_planes = [24, 240, 480, 960] 88 | elif groups == 4: 89 | out_planes = [24, 272, 544, 1088] 90 | elif groups == 8: 91 | out_planes = [24, 384, 768, 1536] 92 | else: 93 | raise ValueError( 94 | """{} groups is not supported for 95 | 1x1 Grouped Convolutions""".format(num_groups)) 96 | out_planes = [int(i * width_mult) for i in out_planes] 97 | self.in_planes = out_planes[0] 98 | self.conv1 = conv_bn(3, self.in_planes, stride=(1,2,2)) 99 | self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1) 100 | self.layer1 = self._make_layer(out_planes[1], num_blocks[0], self.groups) 101 | self.layer2 = self._make_layer(out_planes[2], num_blocks[1], self.groups) 102 | self.layer3 = self._make_layer(out_planes[3], num_blocks[2], self.groups) 103 | self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1) 104 | 105 | def _make_layer(self, out_planes, num_blocks, groups): 106 | layers = [] 107 | for i in range(num_blocks): 108 | stride = 2 if i == 0 else 1 109 | layers.append(Bottleneck(self.in_planes, out_planes, stride=stride, groups=groups)) 110 | self.in_planes = out_planes 111 | return nn.Sequential(*layers) 112 | 113 | def forward(self, x): 114 | out = self.conv1(x) 115 | out = self.maxpool(out) 116 | out = self.layer1(out) 117 | out = self.layer2(out) 118 | out = self.layer3(out) 119 | 120 | if out.size(2) == 2: 121 | out = self.avgpool(out) 122 | 123 | return out 124 | 125 | def get_fine_tuning_parameters(model, ft_portion): 126 | if ft_portion == "complete": 127 | return model.parameters() 128 | 129 | elif ft_portion == "last_layer": 130 | ft_module_names = [] 131 | ft_module_names.append('classifier') 132 | 133 | parameters = [] 134 | for k, v in model.named_parameters(): 135 | for ft_module in ft_module_names: 136 | if ft_module in k: 137 | parameters.append({'params': v}) 138 | break 139 | else: 140 | parameters.append({'params': v, 'lr': 0.0}) 141 | return parameters 142 | 143 | else: 144 | raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected") 145 | 146 | 147 | def get_model(**kwargs): 148 | """ 149 | Returns the model. 150 | """ 151 | model = ShuffleNet(**kwargs) 152 | return model 153 | 154 | 155 | if __name__ == "__main__": 156 | model = get_model(groups=3, num_classes=600, width_mult=1) 157 | model = model.cuda() 158 | model = nn.DataParallel(model, device_ids=None) 159 | print(model) 160 | 161 | input_var = Variable(torch.randn(8, 3, 16, 112, 112)) 162 | output = model(input_var) 163 | print(output.shape) 164 | 165 | 166 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ava.yaml: -------------------------------------------------------------------------------- 1 | TRAIN: 2 | RESUME_PATH: "/usr/home/sut/YOWO/backup/ava/yowo_ava_32f_s1_best_ap_01905.pth" 3 | DATASET: ava # `ava`, `ucf24` or `jhmdb21` 4 | BATCH_SIZE: 6 5 | TOTAL_BATCH_SIZE: 128 6 | LEARNING_RATE: 1e-4 7 | EVALUATE: True 8 | MODE: "val" # `train`, `test` or `val` 9 | BEGIN_EPOCH: 1 10 | END_EPOCH: 10 11 | CLASS_RATIO_FILE: "cfg/ava_categories_ratio.json" 12 | USE_GROUNDTRUTH: False 13 | USE_SLOWFAST: False 14 | DATA: 15 | NUM_FRAMES: 32 16 | SAMPLING_RATE: 1 17 | TRAIN_JITTER_SCALES: [256, 320] 18 | TRAIN_CROP_SIZE: 224 19 | TEST_CROP_SIZE: 224 20 | SOLVER: 21 | STEPS: [3, 4, 5, 6] 22 | LR_DECAY_RATE: 0.5 23 | ANCHORS: [0.71626, 2.13583, 1.28967, 4.15014, 2.12714, 5.09344, 3.27212, 5.87423, 5.16303, 6.33821] 24 | AVA: 25 | BGR: False 26 | DETECTION_SCORE_THRESH: 0.8 27 | TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"] 28 | MODEL: 29 | NUM_CLASSES: 80 30 | BACKBONE_3D: resnext101 31 | BACKBONE_2D: darknet 32 | WEIGHTS: 33 | BACKBONE_3D: "weights/resnext-101-kinetics.pth" 34 | BACKBONE_2D: "weights/yolo.weights" 35 | FREEZE_BACKBONE_3D: False 36 | FREEZE_BACKBONE_2D: False 37 | DATA_LOADER: 38 | NUM_WORKERS: 8 39 | PIN_MEMORY: True 40 | BACKUP_DIR: "backup/ava" 41 | DEMO: 42 | ENABLE: False 43 | OUT_PATH: "ava_detections/videos" 44 | LABEL_FILE_PATH: "/usr/home/sut/dataset_factory/AVA/annotations/ava_classnames.json" 45 | RNG_SEED: 1 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ava_categories_count.json: -------------------------------------------------------------------------------- 1 | {"watch (a person)": 214424, "stand": 210079, "talk to (e.g., self, a person, a group)": 140020, "listen to (a person)": 136621, "sit": 129830, "carry/hold (an object)": 103526, "walk": 53041, "touch (an object)": 22537, "bend/bow (at the waist)": 10810, "lie/sleep": 7252, "ride (e.g., a bike, a car, a horse)": 6794, "dance": 4752, "answer phone": 4392, "run/jog": 4340, "eat": 4100, "smoke": 3698, "crouch/kneel": 3289, "fight/hit (a person)": 3173, "drink": 2856, "read": 2854, "grab (a person)": 2757, "martial art": 2590, "sing to (e.g., self, a person, a group)": 2232, "watch (e.g., TV)": 2190, "play musical instrument": 2079, "drive (e.g., a car, a truck)": 1749, "open (e.g., a window, a car door)": 1574, "hand clap": 1550, "hug (a person)": 1547, "get up": 1451, "give/serve (an object) to (a person)": 1415, "listen (e.g., to music)": 1072, "write": 1051, "close (e.g., a door, a box)": 998, "kiss (a person)": 918, "take (an object) from (a person)": 802, "sail boat": 797, "hand shake": 736, "put down": 659, "lift/pick up": 658, "text on/look at a cellphone": 542, "lift (a person)": 511, "pull (an object)": 479, "push (an object)": 479, "hand wave": 459, "dress/put on clothing": 454, "push (another person)": 441, "fall down": 382, "throw": 348, "climb (e.g., a mountain)": 344, "jump/leap": 318, "work on a computer": 287, "enter": 282, "shoot": 269, "hit (an object)": 230, "cut": 226, "take a photo": 217, "turn (e.g., a screwdriver)": 172, "swim": 146, "point to (an object)": 131} -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ava_categories_ratio.json: -------------------------------------------------------------------------------- 1 | {"80": 0.5059319458527529, "12": 0.5005430972490094, "79": 0.3317767554377424, "74": 0.32139321745252786, "11": 0.3018567607333167, "17": 0.24206491290886498, "14": 0.12267378359756043, "59": 0.05155061034502472, "1": 0.02512088050957867, "8": 0.016115395377806128, "49": 0.014466546112115732, "10": 0.010040529196366514, "15": 0.00986601595291753, "4": 0.00982990976461774, "29": 0.009101768300571982, "54": 0.008999467433722578, "64": 0.008108848122327766, "27": 0.007025662473334075, "3": 0.006983538586984321, "48": 0.006456990007612388, "9": 0.006369733385887896, "66": 0.006026724597039895, "61": 0.0059966361067900695, "77": 0.004943538948046204, "41": 0.003902477185402268, "38": 0.0037640701302530744, "28": 0.0035745126416791783, "67": 0.003571503792654196, "6": 0.0033819463040803003, "70": 0.0033187604745556682, "65": 0.003228495003806194, "22": 0.0023649553336362243, "63": 0.002337875692411382, "51": 0.0021633624489623983, "72": 0.002148318203837486, "37": 0.0020099111486882922, "68": 0.001862477546464151, "78": 0.0018293802071893438, "47": 0.0016037165303156584, "36": 0.0015615926439659037, "57": 0.0012486723453677265, "73": 0.0012035396099929893, "46": 0.0011644245726682172, "76": 0.0010681414038687781, "69": 0.001056106007768848, "45": 0.001035044064593971, "26": 0.001029026366544006, "5": 0.0008755750662698998, "20": 0.0008063715386953028, "58": 0.0007492034072206358, "7": 0.000737168011120706, "30": 0.0006769910306210565, "52": 0.0006679644835461091, "24": 0.0005536282205967751, "56": 0.0005446016735218277, "34": 0.0005325662774218978, "62": 0.0005295574283969153, "2": 0.0004723892969222483, "42": 0.0004152211654475813, "60": 0.00041221231642259886, "75": 0.00038212382617277413, "40": 0.00035504418494793186, "13": 0.00033398224177305457, "43": 0.00029185835542329993, "44": 0.0002828318083483525, "18": 0.00024070792199859788, "33": 0.0002376990729736154, "39": 0.0002015928846738257, "53": 0.0002015928846738257, "23": 0.00019557518662386078, "50": 0.00018654863954891334, "55": 0.00018053094149894842, "21": 0.0001594689983240711, "31": 0.0001594689983240711, "71": 0.00015044245124912366, "25": 0.00012336281002428142, "19": 0.00010831856489936904, "35": 0.00010530971587438658, "16": 6.017698049964947e-05, "32": 2.1061943174877314e-05} -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/custom_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Add custom configs and default values""" 5 | 6 | 7 | def add_custom_config(_C): 8 | # Add your own customized configs. 9 | pass 10 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/dota_config.yaml: -------------------------------------------------------------------------------- 1 | # Dataset 2 | root: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset' 3 | data_root: "/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/DoTA_fol_train_data" 4 | val_data_root: "/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/DoTA_fol_val_data"# DoTA_fol_val_data 5 | label_file: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/metadata_val.json' 6 | train_split: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/train_split.txt' 7 | val_split: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/val_split.txt' 8 | 9 | track_dir: "./sort_output/" 10 | flow_dir: "./flownet2/" 11 | ego_motion_dir: "" 12 | img_dir: "/scratch/ssd002/dataset_factory/cv_project/Detection-of-Traffic-Anomaly/dataset/frames" 13 | 14 | # dataset arguments 15 | seed_max: 5 16 | segment_len: 10 #16 17 | 18 | device: 'cuda' 19 | # fol model parameters 20 | pred_timesteps: 5 21 | with_ego: False 22 | pred_dim: 4 23 | 24 | # dataloader parameters 25 | shuffle: True 26 | num_workers: 32 27 | GPU: 0 28 | batch_size: 16 29 | 30 | # image parameters 31 | H: 720 32 | W: 1280 33 | channels: 3 34 | 35 | flow_roi_size: [5,5,2] 36 | max_age: 10 37 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/dota_train.yaml: -------------------------------------------------------------------------------- 1 | TRAIN: 2 | # RESUME_PATH: "backup/dota/matt_run2_SlowerLrDecay/yowo_dota_16f_best.pth" 3 | RESUME_PATH: "" 4 | DATASET: dota # `ava`, `ucf24` or `jhmdb21` 5 | BATCH_SIZE: 48 6 | TOTAL_BATCH_SIZE: 128 7 | LEARNING_RATE: 1e-3 8 | EVALUATE: False 9 | FINE_TUNE: False 10 | BEGIN_EPOCH: 1 11 | END_EPOCH: 300 12 | SOLVER: 13 | MOMENTUM: 0.9 14 | WEIGHT_DECAY: 1e-4 15 | # STEPS: [5, 10, 20, 40,] 16 | STEPS: [20, 40, 60, 80, 120, 160, 200, 280] 17 | LR_DECAY_RATE: 0.5 18 | ######################################## ANCHOR BOXES 19 | # 5x DOTA ANCHORS 224x224 20 | # ANCHORS: [0.46875,0.78125,0.9375,1.90625,1.25,2.4375,1.625,4.09375,2.59375,4.15625] 21 | # NUM_ANCHORS: 5 22 | # 9x DOTA ANCHORS 224x224 23 | ANCHORS: [0.34375,0.625,0.6875,1.09375,0.71875,1.78125,1.0625,1.96875,1.3125,2.625,1.34375,2.8125,1.6875,2.8125,2.53125,4.625,2.5625,4.6875] 24 | NUM_ANCHORS: 9 25 | # # 5x DOTA ANCHORS 1280x720 26 | # ANCHORS: [2.34375,2.6875, 5.09375, 5.84375, 7.90625, 7.375, 8.6875, 11.3125, 14.5, 13.9375] 27 | # NUM_ANCHORS: 5 28 | # # 9x DOTA ANCHORS 1280x720 29 | # ANCHORS: [1.71875, 2.0625, 3.59375, 3.25, 3.90625, 5.90625,5.96875,6.84375,6.28125,7.81258.78125,8.03125,10.1875,12.21875,12.46875,13.09375,16.25,15.21875] 30 | # NUM_ANCHORS: 5 31 | ######################################## 32 | OBJECT_SCALE: 5 33 | NOOBJECT_SCALE: 1 34 | CLASS_SCALE: 1 35 | COORD_SCALE: 1 36 | NO_ANOMALY: True 37 | COMBINED_BOX: False 38 | CE_LOSS_WEIGHT: 1.0 39 | DATA: 40 | NUM_FRAMES: 16 41 | SAMPLING_RATE: 1 42 | TRAIN_JITTER_SCALES: [256, 320] 43 | TRAIN_CROP_SIZE: 224 44 | TEST_CROP_SIZE: 224 45 | MEAN: [0.4345, 0.4051, 0.3775] 46 | STD: [0.2768, 0.2713, 0.2737] 47 | MODEL: 48 | NUM_CLASSES: 11 49 | BACKBONE_3D: shufflenetv2_2x 50 | BACKBONE_2D: darknet 51 | WEIGHTS: 52 | BACKBONE_3D: "weights/kinetics_shufflenetv2_2.0x_RGB_16_best.pth" 53 | BACKBONE_2D: "weights/yolo.weights" 54 | FREEZE_BACKBONE_3D: False 55 | FREEZE_BACKBONE_2D: False 56 | LISTDATA: 57 | BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24" 58 | TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt" 59 | TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt" 60 | TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt" 61 | MAX_OBJS: 7 62 | CLASS_NAMES: [ 63 | "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 64 | "Diving", "Fencing", "FloorGymnastics", "GolfSwing" 65 | ] 66 | RNG_SEED: 1 67 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/jhmdb.yaml: -------------------------------------------------------------------------------- 1 | TRAIN: 2 | RESUME_PATH: "" 3 | DATASET: jhmdb21 # `ava`, `ucf24` or `jhmdb21` 4 | BATCH_SIZE: 18 5 | TOTAL_BATCH_SIZE: 128 6 | LEARNING_RATE: 1e-4 7 | EVALUATE: False 8 | FINE_TUNE: False 9 | BEGIN_EPOCH: 1 10 | END_EPOCH: 10 11 | SOLVER: 12 | MOMENTUM: 0.9 13 | WEIGHT_DECAY: 5e-4 14 | STEPS: [3, 4, 5, 6] 15 | LR_DECAY_RATE: 0.5 16 | ANCHORS: [0.95878, 3.10197, 1.67204, 4.0040, 1.75482, 5.64937, 3.09299, 5.80857, 4.91803, 6.25225] 17 | NUM_ANCHORS: 5 18 | OBJECT_SCALE: 5 19 | NOOBJECT_SCALE: 1 20 | CLASS_SCALE: 1 21 | COORD_SCALE: 1 22 | DATA: 23 | NUM_FRAMES: 32 24 | SAMPLING_RATE: 1 25 | TRAIN_JITTER_SCALES: [256, 320] 26 | TRAIN_CROP_SIZE: 224 27 | TEST_CROP_SIZE: 224 28 | MEAN: [0.4345, 0.4051, 0.3775] 29 | STD: [0.2768, 0.2713, 0.2737] 30 | MODEL: 31 | NUM_CLASSES: 21 32 | BACKBONE_3D: resnext101 33 | BACKBONE_2D: darknet 34 | WEIGHTS: 35 | BACKBONE_3D: "weights/resnext-101-kinetics-hmdb51_split1.pth" 36 | BACKBONE_2D: "weights/yolo.weights" 37 | FREEZE_BACKBONE_3D: True 38 | FREEZE_BACKBONE_2D: True 39 | LISTDATA: 40 | BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb" 41 | TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb/trainlist.txt" 42 | TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb/testlist.txt" 43 | TEST_VIDEO_FILE: "/data1/jhmdb/testlist_video.txt" 44 | MAX_OBJS: 1 45 | CLASS_NAMES: [ 46 | "brush_hair", "catch", "clap", "climb_stairs", "golf", 47 | "jump", "kick_ball", "pick", "pour", "pullup", "push", 48 | "run", "shoot_ball", "shoot_bow", "shoot_gun", "sit", 49 | "stand", "swing_baseball", "throw", "walk", "wave" 50 | ] 51 | BACKUP_DIR: "backup/jhmdb" 52 | RNG_SEED: 1 53 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Argument parser functions.""" 5 | 6 | import argparse 7 | import sys 8 | 9 | from cfg.defaults import get_cfg 10 | 11 | 12 | def parse_args(): 13 | """ 14 | Args: 15 | shard_id (int): shard id for the current machine. Starts from 0 to 16 | num_shards - 1. If single machine is used, then set shard id to 0. 17 | num_shards (int): number of shards using by the job. 18 | init_method (str): initialization method to launch the job with multiple 19 | devices. Options includes TCP or shared file-system for 20 | initialization. details can be find in 21 | https://pytorch.org/docs/stable/distributed.html#tcp-initialization 22 | cfg (str): path to the config file. 23 | opts (argument): provide addtional options from the command line, it 24 | overwrites the config loaded from file. 25 | """ 26 | parser = argparse.ArgumentParser( 27 | description="Provide YOWO video training and testing pipeline." 28 | ) 29 | parser.add_argument( # added for now 30 | '--dataset', 31 | default='ucf101-24', 32 | type=str, 33 | help='Select dataset from (ucf101-24, jhmdb-21, ava)' 34 | ) 35 | parser.add_argument( 36 | "--cfg", 37 | dest="cfg_file", 38 | help="Path to the config file", 39 | default="cfg/dota_train.yaml", 40 | type=str, 41 | ) 42 | parser.add_argument( 43 | "opts", 44 | help="See slowfast/config/defaults.py for all options", 45 | default=None, 46 | nargs=argparse.REMAINDER, 47 | ) 48 | parser.add_argument( 49 | "--save_dir", 50 | help="Save directory customized with slurm ID", 51 | default="backup", 52 | type=str, 53 | ) 54 | 55 | if len(sys.argv) == 1: 56 | parser.print_help() 57 | return parser.parse_args() 58 | 59 | 60 | def load_config(args): 61 | """ 62 | Given the arguemnts, load and initialize the configs. 63 | Args: 64 | args (argument): arguments includes `shard_id`, `num_shards`, 65 | `init_method`, `cfg_file`, and `opts`. 66 | """ 67 | # Setup cfg. 68 | cfg = get_cfg() 69 | # Load config from cfg. 70 | if args.cfg_file is not None: 71 | cfg.merge_from_file(args.cfg_file) 72 | # Load config from command line, overwrite config from opts. 73 | if args.opts is not None: 74 | cfg.merge_from_list(args.opts) 75 | 76 | # Inherit parameters from args. 77 | 78 | # Create the checkpoint dir. 79 | return cfg 80 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ucf24.yaml: -------------------------------------------------------------------------------- 1 | TRAIN: 2 | RESUME_PATH: "" 3 | DATASET: ucf24 # `ava`, `ucf24` or `jhmdb21` 4 | BATCH_SIZE: 12 5 | TOTAL_BATCH_SIZE: 128 6 | LEARNING_RATE: 1e-4 7 | EVALUATE: False 8 | FINE_TUNE: False 9 | BEGIN_EPOCH: 1 10 | END_EPOCH: 5 11 | SOLVER: 12 | MOMENTUM: 0.9 13 | WEIGHT_DECAY: 5e-4 14 | STEPS: [2, 3, 4, 5] 15 | LR_DECAY_RATE: 0.5 16 | ANCHORS: [0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979] 17 | NUM_ANCHORS: 5 18 | OBJECT_SCALE: 5 19 | NOOBJECT_SCALE: 1 20 | CLASS_SCALE: 1 21 | COORD_SCALE: 1 22 | DATA: 23 | NUM_FRAMES: 16 24 | SAMPLING_RATE: 1 25 | TRAIN_JITTER_SCALES: [256, 320] 26 | TRAIN_CROP_SIZE: 224 27 | TEST_CROP_SIZE: 224 28 | MEAN: [0.4345, 0.4051, 0.3775] 29 | STD: [0.2768, 0.2713, 0.2737] 30 | MODEL: 31 | NUM_CLASSES: 24 32 | BACKBONE_3D: resnext101 33 | BACKBONE_2D: darknet 34 | WEIGHTS: 35 | BACKBONE_3D: "weights/resnext-101-kinetics.pth" 36 | BACKBONE_2D: "weights/yolo.weights" 37 | FREEZE_BACKBONE_3D: True 38 | FREEZE_BACKBONE_2D: True 39 | LISTDATA: 40 | BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24" 41 | TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt" 42 | TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt" 43 | TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt" 44 | MAX_OBJS: 6 45 | CLASS_NAMES: [ 46 | "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 47 | "Diving", "Fencing", "FloorGymnastics", "GolfSwing", "HorseRiding", 48 | "IceDancing", "LongJump", "PoleVault", "RopeClimbing", "SalsaSpin", 49 | "SkateBoarding", "Skiing", "Skijet", "SoccerJuggling", "Surfing", 50 | "TennisSwing", "TrampolineJumping", "VolleyballSpiking", "WalkingWithDog" 51 | ] 52 | BACKUP_DIR: "backup/ucf24" 53 | RNG_SEED: 1 54 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ucf24_charmed-leaf-23_copy.yaml: -------------------------------------------------------------------------------- 1 | TRAIN: 2 | RESUME_PATH: "" 3 | DATASET: ucf24 # `ava`, `ucf24` or `jhmdb21` 4 | BATCH_SIZE: 18 5 | TOTAL_BATCH_SIZE: 128 6 | LEARNING_RATE: 1e-4 7 | EVALUATE: False 8 | FINE_TUNE: False 9 | BEGIN_EPOCH: 1 10 | END_EPOCH: 5 11 | SOLVER: 12 | MOMENTUM: 0.9 13 | WEIGHT_DECAY: 5e-4 14 | STEPS: [2, 3, 4, 5] 15 | LR_DECAY_RATE: 0.5 16 | ANCHORS: [0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979] 17 | NUM_ANCHORS: 5 18 | OBJECT_SCALE: 5 19 | NOOBJECT_SCALE: 1 20 | CLASS_SCALE: 1 21 | COORD_SCALE: 1 22 | DATA: 23 | NUM_FRAMES: 16 24 | SAMPLING_RATE: 1 25 | TRAIN_JITTER_SCALES: [256, 320] 26 | TRAIN_CROP_SIZE: 224 27 | TEST_CROP_SIZE: 224 28 | MEAN: [0.4345, 0.4051, 0.3775] 29 | STD: [0.2768, 0.2713, 0.2737] 30 | MODEL: 31 | NUM_CLASSES: 24 32 | BACKBONE_3D: resnext101 33 | BACKBONE_2D: darknet 34 | WEIGHTS: 35 | BACKBONE_3D: "weights/resnext-101-kinetics.pth" 36 | BACKBONE_2D: "weights/yolo.weights" 37 | FREEZE_BACKBONE_3D: False 38 | FREEZE_BACKBONE_2D: False 39 | LISTDATA: 40 | BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24" 41 | TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt" 42 | TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt" 43 | TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt" 44 | MAX_OBJS: 6 45 | CLASS_NAMES: [ 46 | "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 47 | "Diving", "Fencing", "FloorGymnastics", "GolfSwing", "HorseRiding", 48 | "IceDancing", "LongJump", "PoleVault", "RopeClimbing", "SalsaSpin", 49 | "SkateBoarding", "Skiing", "Skijet", "SoccerJuggling", "Surfing", 50 | "TennisSwing", "TrampolineJumping", "VolleyballSpiking", "WalkingWithDog" 51 | ] 52 | BACKUP_DIR: "backup/ucf24" 53 | RNG_SEED: 1 54 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/ucf24_finalAnnots.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Traffic_Incident_Detection/cfg/ucf24_finalAnnots.mat -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=224#1280#224 9 | height=224#720#224 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/core/FocalLoss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # -------------------------------------------------------- 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Chao CHEN (chaochancs@gmail.com) 6 | # Created On: 2017-08-11 7 | # -------------------------------------------------------- 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torch.autograd import Variable 12 | 13 | class FocalLoss(nn.Module): 14 | r""" 15 | This criterion is a implemenation of Focal Loss, which is proposed in 16 | Focal Loss for Dense Object Detection. 17 | 18 | Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class]) 19 | 20 | The losses are averaged across observations for each minibatch. 21 | 22 | Args: 23 | alpha(1D Tensor, Variable) : the scalar factor for this criterion 24 | gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5), 25 | putting more focus on hard, misclassified examples 26 | size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch. 27 | However, if the field size_average is set to False, the losses are 28 | instead summed for each minibatch. 29 | 30 | """ 31 | def __init__(self, class_num, alpha=None, gamma=2, size_average=True): 32 | super(FocalLoss, self).__init__() 33 | if alpha is None: 34 | self.alpha = Variable(torch.ones(class_num, 1)) 35 | else: 36 | if isinstance(alpha, Variable): 37 | self.alpha = alpha 38 | else: 39 | self.alpha = Variable(alpha) 40 | self.gamma = gamma 41 | self.class_num = class_num 42 | self.size_average = size_average 43 | 44 | def forward(self, inputs, targets): 45 | N = inputs.size(0) 46 | #print(N) 47 | C = inputs.size(1) 48 | P = F.softmax(inputs, dim=1) 49 | 50 | class_mask = inputs.data.new(N, C).fill_(0) 51 | class_mask = Variable(class_mask) 52 | ids = targets.view(-1, 1) 53 | class_mask.scatter_(1, ids, 1.) 54 | 55 | 56 | if inputs.is_cuda and not self.alpha.is_cuda: 57 | self.alpha = self.alpha.cuda() 58 | alpha = self.alpha[ids.data.view(-1)] 59 | 60 | probs = (P*class_mask).sum(1).view(-1,1) 61 | 62 | log_p = probs.log() 63 | 64 | batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 65 | 66 | if self.size_average: 67 | loss = batch_loss.mean() 68 | else: 69 | loss = batch_loss.sum() 70 | return loss 71 | 72 | 73 | 74 | if __name__ == "__main__": 75 | alpha = torch.rand(21, 1) 76 | print(alpha) 77 | FL = FocalLoss(class_num=5, gamma=0 ) 78 | CE = nn.CrossEntropyLoss() 79 | N = 4 80 | C = 5 81 | inputs = torch.rand(N, C) 82 | targets = torch.LongTensor(N).random_(C) 83 | inputs_fl = Variable(inputs.clone(), requires_grad=True) 84 | targets_fl = Variable(targets.clone()) 85 | 86 | inputs_ce = Variable(inputs.clone(), requires_grad=True) 87 | targets_ce = Variable(targets.clone()) 88 | print('----inputs----') 89 | print(inputs) 90 | print('---target-----') 91 | print(targets) 92 | 93 | fl_loss = FL(inputs_fl, targets_fl) 94 | ce_loss = CE(inputs_ce, targets_ce) 95 | print('ce = {}, fl ={}'.format(ce_loss.data[0], fl_loss.data[0])) 96 | fl_loss.backward() 97 | ce_loss.backward() 98 | #print(inputs_fl.grad.data) 99 | print(inputs_ce.grad.data) 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/core/cfam.py: -------------------------------------------------------------------------------- 1 | """ 2 | We thank CASIA IVA for sharing his code 'https://github.com/junfu1115/DANet' 3 | that we have build our code on top. 4 | """ 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import Variable 10 | 11 | 12 | class CAM_Module(nn.Module): 13 | """ Channel attention module """ 14 | def __init__(self, in_dim): 15 | super(CAM_Module, self).__init__() 16 | self.chanel_in = in_dim 17 | 18 | 19 | self.gamma = nn.Parameter(torch.zeros(1)) 20 | self.softmax = nn.Softmax(dim=-1) 21 | def forward(self,x): 22 | """ 23 | inputs : 24 | x : input feature maps( B X C X H X W ) 25 | returns : 26 | out : attention value + input feature 27 | attention: B X C X C 28 | """ 29 | m_batchsize, C, height, width = x.size() 30 | proj_query = x.view(m_batchsize, C, -1) 31 | proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) 32 | energy = torch.bmm(proj_query, proj_key) 33 | energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy 34 | attention = self.softmax(energy_new) 35 | proj_value = x.view(m_batchsize, C, -1) 36 | 37 | out = torch.bmm(attention, proj_value) 38 | out = out.view(m_batchsize, C, height, width) 39 | 40 | out = self.gamma*out + x 41 | return out 42 | 43 | 44 | class CFAMBlock(nn.Module): 45 | def __init__(self, in_channels, out_channels): 46 | super(CFAMBlock, self).__init__() 47 | inter_channels = 1024 48 | self.conv_bn_relu1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, kernel_size=1, bias=False), 49 | nn.BatchNorm2d(inter_channels), 50 | nn.ReLU()) 51 | 52 | self.conv_bn_relu2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False), 53 | nn.BatchNorm2d(inter_channels), 54 | nn.ReLU()) 55 | 56 | self.sc = CAM_Module(inter_channels) 57 | 58 | self.conv_bn_relu3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False), 59 | nn.BatchNorm2d(inter_channels), 60 | nn.ReLU()) 61 | 62 | self.conv_out = nn.Sequential(nn.Dropout2d(0.1, False), nn.Conv2d(inter_channels, out_channels, 1)) 63 | 64 | def forward(self, x): 65 | 66 | x = self.conv_bn_relu1(x) 67 | x = self.conv_bn_relu2(x) 68 | x = self.sc(x) 69 | x = self.conv_bn_relu3(x) 70 | output = self.conv_out(x) 71 | 72 | return output 73 | 74 | 75 | if __name__ == "__main__": 76 | data = torch.randn(18, 2473, 7, 7).cuda() 77 | in_channels = data.size()[1] 78 | out_channels = 145 79 | model = CFAMBlock(in_channels, out_channels).cuda() 80 | print(model) 81 | output = model(data) 82 | print(output.size()) 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/core/detection_visualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import matplotlib.patches as patches 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | import argparse 8 | import yaml 9 | from argparse import Namespace 10 | from dataset_factory.dota import DoTA 11 | 12 | def get_detections_data(vid_id, epoch=200): 13 | detection_path = f'{detection_save_path}/detections_{epoch}/{vid_id}' 14 | f = open(detection_path, 'r') 15 | t = f.read() 16 | detections = pd.DataFrame([s.split(' ') 17 | for s in t.split('\n') if s != ''], 18 | columns=['label', 'conf', 'x1', 'y1', 'x2', 'y2']) 19 | return detections.sort_values('conf', ascending=False) 20 | 21 | 22 | def create_detection_rectangles(detections, top_k=1): 23 | rects = [] 24 | labels = [] 25 | confs = [] 26 | xs = [] 27 | ys = [] 28 | 29 | for i in range(0, top_k): 30 | if i >= len(detections): 31 | break 32 | 33 | detection = detections.iloc[i, :] 34 | x1 = int(detection['x1']) 35 | y1 = int(detection['y1']) 36 | x2 = int(detection['x2']) 37 | y2 = int(detection['y2']) 38 | conf = round(float(detection['conf']), 3) 39 | lbl = detection['label'] 40 | 41 | confs.append(conf) 42 | labels.append(lbl) 43 | xs.append(x1) 44 | ys.append(y1) 45 | 46 | if i == 0: 47 | rects.append(patches.Rectangle((x1, y1), (x2 - x1), (y2 - y1), 48 | linewidth=2, edgecolor='b', facecolor='none', linestyle='dashed')) 49 | else: 50 | rects.append(patches.Rectangle((x1, y1), (x2 - x1), (y2 - y1), 51 | linewidth=1, edgecolor='r', facecolor='none', linestyle='dashed')) 52 | return rects, labels, confs, xs, ys 53 | 54 | 55 | def visualize_detections(data_loader, epoch, n=100, top_k=4): 56 | for i in range(0, n): 57 | test_sample = data_loader[i] 58 | vid_id = test_sample[0] 59 | video = test_sample[1] 60 | gt = test_sample[2][0:5] 61 | label = gt[0] 62 | bbox = gt[1:5] 63 | last_frame = video[:, -1, :, :].permute(1, 2, 0) 64 | 65 | factor = 224 66 | cx, cy, iw, ih = bbox 67 | lx = int(factor * (cx - (iw / 2))) 68 | ly = int(factor * (cy - (ih / 2))) 69 | w = int(factor * iw) 70 | h = int(factor * ih) 71 | 72 | detections = get_detections_data(vid_id, epoch) 73 | pred_rects, labels, confs, xs, ys = create_detection_rectangles(detections, top_k) 74 | 75 | print("True", label, " | Prediction", labels[0], "(", confs[0], ")", " ", vid_id) 76 | fig, ax = plt.subplots() 77 | ax.imshow(last_frame) 78 | true_rect = patches.Rectangle((lx, ly), w, h, linewidth=2, edgecolor='g', facecolor='none', linestyle='dashed') 79 | ax.add_patch(true_rect) 80 | plt.text(lx, ly, f"GT-{label+1}", fontsize=12, color='g') 81 | 82 | for j, pred_rect in enumerate(pred_rects): 83 | if confs[j] > 0.25: 84 | ax.add_patch(pred_rect) 85 | 86 | if j == 0: 87 | plt.text(xs[j], ys[j], f"Pred-{labels[j]}-{confs[j]}", fontsize=10, color='b') 88 | else: 89 | plt.text(xs[j], ys[j], f"Pred-{labels[j]}-{confs[j]}", fontsize=8, color='r') 90 | 91 | if not os.path.exists("figures"): 92 | os.mkdir("figures") 93 | if not os.path.exists(f"figures/epoch_{epoch}"): 94 | os.mkdir(f"figures/epoch_{epoch}") 95 | plt.savefig(f"figures/epoch_{epoch}/{vid_id}") 96 | 97 | config_file = 'cfg/dota_config.yaml' 98 | with open(config_file, 'r') as f: 99 | dl_args = yaml.load(f) 100 | dl_args = Namespace(**dl_args) 101 | 102 | if not os.path.exists(dl_args.root): 103 | print('did not find data! -------------') 104 | sys.exit() 105 | 106 | detection_save_path = 'dota_detections/run1' 107 | 108 | d = DoTA(dl_args, phase='test', n_frames=16, combined_bbox=True) 109 | 110 | visualize_detections(d, epoch=200, n=100, top_k=4) -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/core/plot_ava_result.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import operator 4 | import pdb 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | #import _init_paths 9 | #from config.defaults import get_cfg 10 | #from utils.ava_eval_helper import read_labelmap 11 | #from dataset_factory.ava_dataset import Ava 12 | 13 | 14 | def main(json_file): 15 | with open('categories_count.json', 'r') as fb: 16 | categories_count = json.load(fb) 17 | 18 | with open(json_file, 'r') as fb: 19 | detection_result = json.load(fb) 20 | 21 | prefix = 'PascalBoxes_PerformanceByCategory/AP@0.5IOU/' 22 | categories = list(categories_count.keys()) 23 | mAP_list = [] 24 | for category in categories: 25 | mAP = detection_result[prefix + category] 26 | print(mAP) 27 | mAP_list.append(mAP) 28 | 29 | # width = np.diff(mAP_list).min() 30 | fig, ax = plt.subplots(figsize=(20, 8)) 31 | x = list(range(len(categories))) 32 | ax.bar(x, mAP_list, align='center', width=0.8) 33 | for i, y in enumerate(mAP_list): 34 | ax.text(x[i] - 0.5, y + 0.01, '{:.2f}'.format(y), fontsize='x-small') 35 | ax.set(xticks=list(range(0, len(categories))), xticklabels=categories) 36 | plt.xticks(rotation='vertical') 37 | plt.gcf().subplots_adjust(bottom=0.34) 38 | #fig.autofmt_xdate() 39 | plt.savefig('ava_output_histogram.eps', format='eps') 40 | #plt.xticks(list(range(len(categories))), categories, rotation='vertical') 41 | #plt.show() 42 | 43 | 44 | 45 | if __name__ == '__main__': 46 | main('latest_detection.json') 47 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/README.md: -------------------------------------------------------------------------------- 1 | The code under this folder is from the official [ActivityNet repo](https://github.com/activitynet/ActivityNet). 2 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/__init__.py -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Numpy BoxList classes and functions.""" 17 | 18 | from __future__ import ( 19 | absolute_import, 20 | division, 21 | print_function, 22 | unicode_literals, 23 | ) 24 | import numpy as np 25 | 26 | 27 | class BoxList(object): 28 | """Box collection. 29 | 30 | BoxList represents a list of bounding boxes as numpy array, where each 31 | bounding box is represented as a row of 4 numbers, 32 | [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a 33 | given list correspond to a single image. 34 | 35 | Optionally, users can add additional related fields (such as 36 | objectness/classification scores). 37 | """ 38 | 39 | def __init__(self, data): 40 | """Constructs box collection. 41 | 42 | Args: 43 | data: a numpy array of shape [N, 4] representing box coordinates 44 | 45 | Raises: 46 | ValueError: if bbox data is not a numpy array 47 | ValueError: if invalid dimensions for bbox data 48 | """ 49 | if not isinstance(data, np.ndarray): 50 | raise ValueError("data must be a numpy array.") 51 | if len(data.shape) != 2 or data.shape[1] != 4: 52 | raise ValueError("Invalid dimensions for box data.") 53 | if data.dtype != np.float32 and data.dtype != np.float64: 54 | raise ValueError( 55 | "Invalid data type for box data: float is required." 56 | ) 57 | if not self._is_valid_boxes(data): 58 | raise ValueError( 59 | "Invalid box data. data must be a numpy array of " 60 | "N*[y_min, x_min, y_max, x_max]" 61 | ) 62 | self.data = {"boxes": data} 63 | 64 | def num_boxes(self): 65 | """Return number of boxes held in collections.""" 66 | return self.data["boxes"].shape[0] 67 | 68 | def get_extra_fields(self): 69 | """Return all non-box fields.""" 70 | return [k for k in self.data.keys() if k != "boxes"] 71 | 72 | def has_field(self, field): 73 | return field in self.data 74 | 75 | def add_field(self, field, field_data): 76 | """Add data to a specified field. 77 | 78 | Args: 79 | field: a string parameter used to speficy a related field to be accessed. 80 | field_data: a numpy array of [N, ...] representing the data associated 81 | with the field. 82 | Raises: 83 | ValueError: if the field is already exist or the dimension of the field 84 | data does not matches the number of boxes. 85 | """ 86 | if self.has_field(field): 87 | raise ValueError("Field " + field + "already exists") 88 | if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes(): 89 | raise ValueError("Invalid dimensions for field data") 90 | self.data[field] = field_data 91 | 92 | def get(self): 93 | """Convenience function for accesssing box coordinates. 94 | 95 | Returns: 96 | a numpy array of shape [N, 4] representing box corners 97 | """ 98 | return self.get_field("boxes") 99 | 100 | def get_field(self, field): 101 | """Accesses data associated with the specified field in the box collection. 102 | 103 | Args: 104 | field: a string parameter used to speficy a related field to be accessed. 105 | 106 | Returns: 107 | a numpy 1-d array representing data of an associated field 108 | 109 | Raises: 110 | ValueError: if invalid field 111 | """ 112 | if not self.has_field(field): 113 | raise ValueError("field {} does not exist".format(field)) 114 | return self.data[field] 115 | 116 | def get_coordinates(self): 117 | """Get corner coordinates of boxes. 118 | 119 | Returns: 120 | a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max] 121 | """ 122 | box_coordinates = self.get() 123 | y_min = box_coordinates[:, 0] 124 | x_min = box_coordinates[:, 1] 125 | y_max = box_coordinates[:, 2] 126 | x_max = box_coordinates[:, 3] 127 | return [y_min, x_min, y_max, x_max] 128 | 129 | def _is_valid_boxes(self, data): 130 | """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin]. 131 | 132 | Args: 133 | data: a numpy array of shape [N, 4] representing box coordinates 134 | 135 | Returns: 136 | a boolean indicating whether all ymax of boxes are equal or greater than 137 | ymin, and all xmax of boxes are equal or greater than xmin. 138 | """ 139 | if data.shape[0] > 0: 140 | for i in range(data.shape[0]): 141 | if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]: 142 | return False 143 | return True 144 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_mask_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Numpy BoxMaskList classes and functions.""" 17 | 18 | from __future__ import ( 19 | absolute_import, 20 | division, 21 | print_function, 22 | unicode_literals, 23 | ) 24 | import numpy as np 25 | 26 | from . import np_box_list 27 | 28 | 29 | class BoxMaskList(np_box_list.BoxList): 30 | """Convenience wrapper for BoxList with masks. 31 | 32 | BoxMaskList extends the np_box_list.BoxList to contain masks as well. 33 | In particular, its constructor receives both boxes and masks. Note that the 34 | masks correspond to the full image. 35 | """ 36 | 37 | def __init__(self, box_data, mask_data): 38 | """Constructs box collection. 39 | 40 | Args: 41 | box_data: a numpy array of shape [N, 4] representing box coordinates 42 | mask_data: a numpy array of shape [N, height, width] representing masks 43 | with values are in {0,1}. The masks correspond to the full 44 | image. The height and the width will be equal to image height and width. 45 | 46 | Raises: 47 | ValueError: if bbox data is not a numpy array 48 | ValueError: if invalid dimensions for bbox data 49 | ValueError: if mask data is not a numpy array 50 | ValueError: if invalid dimension for mask data 51 | """ 52 | super(BoxMaskList, self).__init__(box_data) 53 | if not isinstance(mask_data, np.ndarray): 54 | raise ValueError("Mask data must be a numpy array.") 55 | if len(mask_data.shape) != 3: 56 | raise ValueError("Invalid dimensions for mask data.") 57 | if mask_data.dtype != np.uint8: 58 | raise ValueError( 59 | "Invalid data type for mask data: uint8 is required." 60 | ) 61 | if mask_data.shape[0] != box_data.shape[0]: 62 | raise ValueError( 63 | "There should be the same number of boxes and masks." 64 | ) 65 | self.data["masks"] = mask_data 66 | 67 | def get_masks(self): 68 | """Convenience function for accessing masks. 69 | 70 | Returns: 71 | a numpy array of shape [N, height, width] representing masks 72 | """ 73 | return self.get_field("masks") 74 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Operations for [N, 4] numpy arrays representing bounding boxes. 17 | 18 | Example box operations that are supported: 19 | * Areas: compute bounding box areas 20 | * IOU: pairwise intersection-over-union scores 21 | """ 22 | from __future__ import ( 23 | absolute_import, 24 | division, 25 | print_function, 26 | unicode_literals, 27 | ) 28 | import numpy as np 29 | 30 | 31 | def area(boxes): 32 | """Computes area of boxes. 33 | 34 | Args: 35 | boxes: Numpy array with shape [N, 4] holding N boxes 36 | 37 | Returns: 38 | a numpy array with shape [N*1] representing box areas 39 | """ 40 | return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 41 | 42 | 43 | def intersection(boxes1, boxes2): 44 | """Compute pairwise intersection areas between boxes. 45 | 46 | Args: 47 | boxes1: a numpy array with shape [N, 4] holding N boxes 48 | boxes2: a numpy array with shape [M, 4] holding M boxes 49 | 50 | Returns: 51 | a numpy array with shape [N*M] representing pairwise intersection area 52 | """ 53 | [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) 54 | [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) 55 | 56 | all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) 57 | all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) 58 | intersect_heights = np.maximum( 59 | np.zeros(all_pairs_max_ymin.shape), 60 | all_pairs_min_ymax - all_pairs_max_ymin, 61 | ) 62 | all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) 63 | all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) 64 | intersect_widths = np.maximum( 65 | np.zeros(all_pairs_max_xmin.shape), 66 | all_pairs_min_xmax - all_pairs_max_xmin, 67 | ) 68 | return intersect_heights * intersect_widths 69 | 70 | 71 | def iou(boxes1, boxes2): 72 | """Computes pairwise intersection-over-union between box collections. 73 | 74 | Args: 75 | boxes1: a numpy array with shape [N, 4] holding N boxes. 76 | boxes2: a numpy array with shape [M, 4] holding N boxes. 77 | 78 | Returns: 79 | a numpy array with shape [N, M] representing pairwise iou scores. 80 | """ 81 | intersect = intersection(boxes1, boxes2) 82 | area1 = area(boxes1) 83 | area2 = area(boxes2) 84 | union = ( 85 | np.expand_dims(area1, axis=1) 86 | + np.expand_dims(area2, axis=0) 87 | - intersect 88 | ) 89 | return intersect / union 90 | 91 | 92 | def ioa(boxes1, boxes2): 93 | """Computes pairwise intersection-over-area between box collections. 94 | 95 | Intersection-over-area (ioa) between two boxes box1 and box2 is defined as 96 | their intersection area over box2's area. Note that ioa is not symmetric, 97 | that is, IOA(box1, box2) != IOA(box2, box1). 98 | 99 | Args: 100 | boxes1: a numpy array with shape [N, 4] holding N boxes. 101 | boxes2: a numpy array with shape [M, 4] holding N boxes. 102 | 103 | Returns: 104 | a numpy array with shape [N, M] representing pairwise ioa scores. 105 | """ 106 | intersect = intersection(boxes1, boxes2) 107 | areas = np.expand_dims(area(boxes2), axis=0) 108 | return intersect / areas 109 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_mask_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Operations for [N, height, width] numpy arrays representing masks. 17 | 18 | Example mask operations that are supported: 19 | * Areas: compute mask areas 20 | * IOU: pairwise intersection-over-union scores 21 | """ 22 | from __future__ import ( 23 | absolute_import, 24 | division, 25 | print_function, 26 | unicode_literals, 27 | ) 28 | import numpy as np 29 | 30 | EPSILON = 1e-7 31 | 32 | 33 | def area(masks): 34 | """Computes area of masks. 35 | 36 | Args: 37 | masks: Numpy array with shape [N, height, width] holding N masks. Masks 38 | values are of type np.uint8 and values are in {0,1}. 39 | 40 | Returns: 41 | a numpy array with shape [N*1] representing mask areas. 42 | 43 | Raises: 44 | ValueError: If masks.dtype is not np.uint8 45 | """ 46 | if masks.dtype != np.uint8: 47 | raise ValueError("Masks type should be np.uint8") 48 | return np.sum(masks, axis=(1, 2), dtype=np.float32) 49 | 50 | 51 | def intersection(masks1, masks2): 52 | """Compute pairwise intersection areas between masks. 53 | 54 | Args: 55 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 56 | values are of type np.uint8 and values are in {0,1}. 57 | masks2: a numpy array with shape [M, height, width] holding M masks. Masks 58 | values are of type np.uint8 and values are in {0,1}. 59 | 60 | Returns: 61 | a numpy array with shape [N*M] representing pairwise intersection area. 62 | 63 | Raises: 64 | ValueError: If masks1 and masks2 are not of type np.uint8. 65 | """ 66 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 67 | raise ValueError("masks1 and masks2 should be of type np.uint8") 68 | n = masks1.shape[0] 69 | m = masks2.shape[0] 70 | answer = np.zeros([n, m], dtype=np.float32) 71 | for i in np.arange(n): 72 | for j in np.arange(m): 73 | answer[i, j] = np.sum( 74 | np.minimum(masks1[i], masks2[j]), dtype=np.float32 75 | ) 76 | return answer 77 | 78 | 79 | def iou(masks1, masks2): 80 | """Computes pairwise intersection-over-union between mask collections. 81 | 82 | Args: 83 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 84 | values are of type np.uint8 and values are in {0,1}. 85 | masks2: a numpy array with shape [M, height, width] holding N masks. Masks 86 | values are of type np.uint8 and values are in {0,1}. 87 | 88 | Returns: 89 | a numpy array with shape [N, M] representing pairwise iou scores. 90 | 91 | Raises: 92 | ValueError: If masks1 and masks2 are not of type np.uint8. 93 | """ 94 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 95 | raise ValueError("masks1 and masks2 should be of type np.uint8") 96 | intersect = intersection(masks1, masks2) 97 | area1 = area(masks1) 98 | area2 = area(masks2) 99 | union = ( 100 | np.expand_dims(area1, axis=1) 101 | + np.expand_dims(area2, axis=0) 102 | - intersect 103 | ) 104 | return intersect / np.maximum(union, EPSILON) 105 | 106 | 107 | def ioa(masks1, masks2): 108 | """Computes pairwise intersection-over-area between box collections. 109 | 110 | Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as 111 | their intersection area over mask2's area. Note that ioa is not symmetric, 112 | that is, IOA(mask1, mask2) != IOA(mask2, mask1). 113 | 114 | Args: 115 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 116 | values are of type np.uint8 and values are in {0,1}. 117 | masks2: a numpy array with shape [M, height, width] holding N masks. Masks 118 | values are of type np.uint8 and values are in {0,1}. 119 | 120 | Returns: 121 | a numpy array with shape [N, M] representing pairwise ioa scores. 122 | 123 | Raises: 124 | ValueError: If masks1 and masks2 are not of type np.uint8. 125 | """ 126 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 127 | raise ValueError("masks1 and masks2 should be of type np.uint8") 128 | intersect = intersection(masks1, masks2) 129 | areas = np.expand_dims(area(masks2), axis=0) 130 | return intersect / (areas + EPSILON) 131 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/generate_anchors.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Feb 20, 2017 3 | @author: jumabek 4 | 5 | Acquired from https://github.com/Jumabek/darknet_scripts/blob/master/gen_anchors.py 6 | ''' 7 | from os import listdir 8 | from os.path import isfile, join 9 | import argparse 10 | import numpy as np 11 | import sys 12 | import os 13 | import shutil 14 | import random 15 | import math 16 | 17 | width_in_cfg_file = 224. 18 | height_in_cfg_file = 224. 19 | 20 | def IOU(x,centroids): 21 | similarities = [] 22 | k = len(centroids) 23 | for centroid in centroids: 24 | c_w,c_h = centroid 25 | w,h = x 26 | if c_w>=w and c_h>=h: 27 | similarity = w*h/(c_w*c_h) 28 | elif c_w>=w and c_h<=h: 29 | similarity = w*c_h/(w*h + (c_w-w)*c_h) 30 | elif c_w<=w and c_h>=h: 31 | similarity = c_w*h/(w*h + c_w*(c_h-h)) 32 | else: #means both w,h are bigger than c_w and c_h respectively 33 | similarity = (c_w*c_h)/(w*h) 34 | similarities.append(similarity) # will become (k,) shape 35 | return np.array(similarities) 36 | 37 | def avg_IOU(X,centroids): 38 | n,d = X.shape 39 | sum = 0. 40 | for i in range(X.shape[0]): 41 | #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy 42 | sum+= max(IOU(X[i],centroids)) 43 | return sum/n 44 | 45 | def write_anchors_to_file(centroids,X,anchor_file): 46 | f = open(anchor_file,'w') 47 | 48 | anchors = centroids.copy() 49 | print(anchors.shape) 50 | 51 | for i in range(anchors.shape[0]): 52 | anchors[i][0]*=width_in_cfg_file/32. 53 | anchors[i][1]*=height_in_cfg_file/32. 54 | 55 | 56 | widths = anchors[:,0] 57 | sorted_indices = np.argsort(widths) 58 | 59 | print('Anchors = ', anchors[sorted_indices]) 60 | 61 | for i in sorted_indices[:-1]: 62 | f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1])) 63 | 64 | #there should not be comma after last anchor, that's why 65 | f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1])) 66 | 67 | f.write('%f\n'%(avg_IOU(X,centroids))) 68 | 69 | def kmeans(X,centroids,eps,anchor_file): 70 | 71 | N = X.shape[0] 72 | iterations = 0 73 | k,dim = centroids.shape 74 | prev_assignments = np.ones(N)*(-1) 75 | iter = 0 76 | old_D = np.zeros((N,k)) 77 | 78 | while True: 79 | D = [] 80 | iter+=1 81 | for i in range(N): 82 | d = 1 - IOU(X[i],centroids) 83 | D.append(d) 84 | D = np.array(D) # D.shape = (N,k) 85 | 86 | print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D)))) 87 | 88 | #assign samples to centroids 89 | assignments = np.argmin(D,axis=1) 90 | 91 | if (assignments == prev_assignments).all() : 92 | print("Centroids = ",centroids) 93 | write_anchors_to_file(centroids,X,anchor_file) 94 | return 95 | 96 | #calculate new centroids 97 | centroid_sums=np.zeros((k,dim),np.float) 98 | for i in range(N): 99 | centroid_sums[assignments[i]]+=X[i] 100 | for j in range(k): 101 | centroids[j] = centroid_sums[j]/(np.sum(assignments==j)) 102 | 103 | prev_assignments = assignments.copy() 104 | old_D = D.copy() 105 | 106 | def main(argv): 107 | parser = argparse.ArgumentParser() 108 | parser.add_argument('-filelist', default = '/usr/home/sut/dataset_factory/AVA/annotations/ava_train_v2.2.csv', 109 | help='path to filelist\n' ) 110 | parser.add_argument('-output_dir', default = '/usr/home/sut/dataset_factory/AVA/annotations', type = str, 111 | help='Output anchor directory\n' ) 112 | parser.add_argument('-num_clusters', default = 5, type = int, 113 | help='number of clusters\n' ) 114 | 115 | 116 | args = parser.parse_args() 117 | 118 | if not os.path.exists(args.output_dir): 119 | os.mkdir(args.output_dir) 120 | 121 | f = open(args.filelist) 122 | 123 | lines = [line.rstrip('\n') for line in f.readlines()] 124 | 125 | annotation_dims = [] 126 | 127 | size = np.zeros((1,1,3)) 128 | for line in lines: 129 | line_annot = line.split(',') 130 | w = float(line_annot[4]) - float(line_annot[2]) 131 | h = float(line_annot[5]) - float(line_annot[3]) 132 | annotation_dims.append(tuple(map(float,(w,h)))) 133 | 134 | annotation_dims = np.array(annotation_dims) 135 | 136 | eps = 0.005 137 | 138 | if args.num_clusters == 0: 139 | for num_clusters in range(1,11): #we make 1 through 10 clusters 140 | anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters)) 141 | 142 | indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)] 143 | centroids = annotation_dims[indices] 144 | kmeans(annotation_dims,centroids,eps,anchor_file) 145 | print('centroids.shape', centroids.shape) 146 | else: 147 | anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters)) 148 | indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)] 149 | centroids = annotation_dims[indices] 150 | kmeans(annotation_dims,centroids,eps,anchor_file) 151 | print('centroids.shape', centroids.shape) 152 | 153 | if __name__=="__main__": 154 | main(sys.argv) 155 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/list_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import os 5 | import glob 6 | import random 7 | import numpy as np 8 | 9 | import torch 10 | from torch.utils.data import Dataset 11 | from PIL import Image 12 | 13 | from datasets.clip import * 14 | 15 | 16 | class UCF_JHMDB_Dataset(Dataset): 17 | 18 | # clip duration = 8, i.e, for each time 8 frames are considered together 19 | def __init__(self, base, root, dataset='ucf24', shape=None, 20 | transform=None, target_transform=None, 21 | train=False, clip_duration=16, sampling_rate=1): 22 | with open(root, 'r') as file: 23 | self.lines = file.readlines() 24 | 25 | self.base_path = base 26 | self.dataset = dataset 27 | self.nSamples = len(self.lines) 28 | self.transform = transform 29 | self.target_transform = target_transform 30 | self.train = train 31 | self.shape = shape 32 | self.clip_duration = clip_duration 33 | self.sampling_rate = sampling_rate 34 | 35 | def __len__(self): 36 | return self.nSamples 37 | 38 | def __getitem__(self, index): 39 | assert index <= len(self), 'index range error' 40 | imgpath = self.lines[index].rstrip() 41 | 42 | if self.train: # For Training 43 | jitter = 0.2 44 | hue = 0.1 45 | saturation = 1.5 46 | exposure = 1.5 47 | 48 | clip, label = load_data_detection(self.base_path, imgpath, self.train, self.clip_duration, self.sampling_rate, self.shape, self.dataset, jitter, hue, saturation, exposure) 49 | 50 | else: # For Testing 51 | frame_idx, clip, label = load_data_detection(self.base_path, imgpath, False, self.clip_duration, self.sampling_rate, self.shape, self.dataset) 52 | clip = [img.resize(self.shape) for img in clip] 53 | 54 | if self.transform is not None: 55 | clip = [self.transform(img) for img in clip] 56 | 57 | # (self.duration, -1) + self.shape = (8, -1, 224, 224) 58 | clip = torch.cat(clip, 0).view((self.clip_duration, -1) + self.shape).permute(1, 0, 2, 3) 59 | 60 | if self.target_transform is not None: 61 | label = self.target_transform(label) 62 | 63 | if self.train: 64 | return (clip, label) 65 | else: 66 | return (frame_idx, clip, label) -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/logging.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Logging.""" 5 | 6 | import time 7 | import os 8 | import logging 9 | import functools 10 | import sys 11 | from fvcore.common.file_io import PathManager 12 | 13 | 14 | @functools.lru_cache(maxsize=None) 15 | def _cached_log_stream(filename): 16 | return PathManager.open(filename, "a") 17 | 18 | 19 | def setup_logging(output_dir=None): 20 | """ 21 | Sets up the logging for multiple processes. Only enable the logging for the 22 | master process, and suppress logging for the non-master processes. 23 | """ 24 | # Set up logging format. 25 | _FORMAT = "[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s" 26 | 27 | logging.root.handlers = [] 28 | 29 | logger = logging.getLogger() 30 | logger.setLevel(logging.DEBUG) 31 | logger.propagate = False 32 | plain_formatter = logging.Formatter( 33 | "[%(asctime)s][%(levelname)s] %(name)s: %(lineno)4d: %(message)s", 34 | datefmt="%m/%d %H:%M:%S", 35 | ) 36 | 37 | ch = logging.StreamHandler(stream=sys.stdout) 38 | ch.setLevel(logging.INFO) 39 | ch.setFormatter(plain_formatter) 40 | logger.addHandler(ch) 41 | 42 | if output_dir is not None: 43 | filename = os.path.join(output_dir, "stdout.log") 44 | fh = logging.StreamHandler(_cached_log_stream(filename)) 45 | fh.setLevel(logging.DEBUG) 46 | fh.setFormatter(plain_formatter) 47 | logger.addHandler(fh) 48 | 49 | 50 | def get_logger(name): 51 | """ 52 | Retrieve the logger with the specified name or, if name is None, return a 53 | logger which is the root logger of the hierarchy. 54 | Args: 55 | name (string): name of the logger. 56 | """ 57 | return logging.getLogger(name) 58 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dataset_factory/meters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Meters.""" 5 | 6 | import datetime 7 | import time 8 | import numpy as np 9 | import os 10 | from collections import defaultdict, deque 11 | import torch 12 | from fvcore.common.timer import Timer 13 | import json 14 | 15 | from dataset_factory import logging 16 | from dataset_factory import ava_helper 17 | from dataset_factory.ava_eval_helper import ( 18 | run_evaluation, 19 | read_csv, 20 | read_exclusions, 21 | read_labelmap, 22 | write_results 23 | ) 24 | 25 | logger = logging.get_logger(__name__) 26 | 27 | 28 | def get_ava_mini_groundtruth(full_groundtruth): 29 | """ 30 | Get the groundtruth annotations corresponding the "subset" of AVA val set. 31 | We define the subset to be the frames such that (second % 4 == 0). 32 | We optionally use subset for faster evaluation during training 33 | (in order to track training progress). 34 | Args: 35 | full_groundtruth(dict): list of groundtruth. 36 | """ 37 | ret = [defaultdict(list), defaultdict(list), defaultdict(list)] 38 | 39 | for i in range(3): 40 | for key in full_groundtruth[i].keys(): 41 | if int(key.split(",")[1]) % 4 == 0: 42 | ret[i][key] = full_groundtruth[i][key] 43 | return ret 44 | 45 | 46 | class AVAMeter(object): 47 | def __init__(self, cfg, mode, output_json): 48 | self.cfg = cfg 49 | self.all_preds = [] 50 | self.mode = mode 51 | self.output_json = os.path.join(self.cfg.BACKUP_DIR, output_json) 52 | self.full_ava_test = cfg.AVA.FULL_TEST_ON_VAL 53 | self.excluded_keys = read_exclusions( 54 | os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.EXCLUSION_FILE) 55 | ) 56 | self.categories, self.class_whitelist = read_labelmap( 57 | os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.LABEL_MAP_FILE) 58 | ) 59 | gt_filename = os.path.join( 60 | cfg.AVA.ANNOTATION_DIR, cfg.AVA.GROUNDTRUTH_FILE 61 | ) 62 | self.full_groundtruth = read_csv(gt_filename, self.class_whitelist) 63 | self.mini_groundtruth = get_ava_mini_groundtruth(self.full_groundtruth) 64 | _, self.video_idx_to_name = ava_helper.load_image_lists(cfg, self.mode == 'train') 65 | 66 | def update_stats(self, preds): 67 | self.all_preds.extend(preds) 68 | 69 | def evaluate_ava(self): 70 | eval_start = time.time() 71 | detections = self.get_ava_eval_data() 72 | if self.mode == 'test' or (self.full_ava_test and self.mode == "val"): 73 | groundtruth = self.full_groundtruth 74 | else: 75 | groundtruth = self.mini_groundtruth 76 | logger.info("Evaluating with %d unique GT frames." % len(groundtruth[0])) 77 | logger.info("Evaluating with %d unique detection frames" % len(detections[0])) 78 | 79 | name = "latest" 80 | write_results(detections, os.path.join(self.cfg.BACKUP_DIR, "detections_%s.csv" % name)) 81 | write_results(groundtruth, os.path.join(self.cfg.BACKUP_DIR, "groundtruth_%s.csv" % name)) 82 | results = run_evaluation(self.categories, groundtruth, detections, self.excluded_keys) 83 | with open(self.output_json, 'w') as fp: 84 | json.dump(results, fp) 85 | logger.info("Save eval results in {}".format(self.output_json)) 86 | 87 | logger.info("AVA eval done in %f seconds." % (time.time() - eval_start)) 88 | 89 | return results["PascalBoxes_Precision/mAP@0.5IOU"] 90 | 91 | def get_ava_eval_data(self): 92 | out_scores = defaultdict(list) 93 | out_labels = defaultdict(list) 94 | out_boxes = defaultdict(list) 95 | count = 0 96 | 97 | # each pred is [[x1, y1, x2, y2], [scores], [video_idx, src]] 98 | for i in range(len(self.all_preds)): 99 | pred = self.all_preds[i] 100 | assert len(pred) == 3 101 | video_idx = int(np.round(pred[-1][0])) 102 | sec = int(np.round(pred[-1][1])) 103 | box = pred[0] 104 | scores = pred[1] 105 | assert len(scores) == 80 106 | # try: 107 | # assert len(scores) == len(labels) 108 | # except TypeError: 109 | # pdb.set_trace() 110 | 111 | video = self.video_idx_to_name[video_idx] 112 | key = video + ',' + "%04d" % (sec) 113 | box = [box[1], box[0], box[3], box[2]] # turn to y1,x1,y2,x2 114 | 115 | for cls_idx, score in enumerate(scores): 116 | if cls_idx + 1 in self.class_whitelist: 117 | out_scores[key].append(score) 118 | out_labels[key].append(cls_idx + 1) 119 | out_boxes[key].append(box) 120 | count += 1 121 | 122 | return out_boxes, out_labels, out_scores 123 | 124 | 125 | class AverageMeter(object): 126 | """Computes and stores the average and current value""" 127 | 128 | def __init__(self): 129 | self.reset() 130 | 131 | def reset(self): 132 | self.val = 0 133 | self.avg = 0 134 | self.sum = 0 135 | self.count = 0 136 | 137 | def update(self, val, n=1): 138 | self.val = val 139 | self.sum += val * n 140 | self.count += n 141 | self.avg = self.sum / self.count 142 | -------------------------------------------------------------------------------- /video/Traffic_Incident_Detection/dota_anchors.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import yaml 5 | from argparse import Namespace 6 | from dataset_factory.dota import DoTA 7 | import seaborn as sns 8 | 9 | 10 | num_anchors = 9 11 | # image_size_wh = (224,224) 12 | image_size_wh = (1280,720) 13 | 14 | sns.set() # for plot styling 15 | 16 | with open("cfg/dota_config.yaml", 'r') as f: 17 | args = yaml.load(f) 18 | args = Namespace(**args) 19 | 20 | if not os.path.exists('/scratch/ssd002/dataset_factory/cv_project/Detection-of-Traffic-Anomaly/dataset'): 21 | args.root = '/home/matthew/Desktop/Datasets/DoTA' 22 | args.data_root = '/home/matthew/Desktop/Datasets/DoTA/DoTA_fol_train_data' 23 | args.val_data_root = '/home/matthew/Desktop/Datasets/DoTA/DoTA_fol_val_data' 24 | args.label_file = '/home/matthew/Desktop/Datasets/DoTA/metadata_val.json' 25 | args.train_split = '/home/matthew/Desktop/Datasets/DoTA/train_split.txt' 26 | args.val_split = '/home/matthew/Desktop/Datasets/DoTA/val_split.txt' 27 | args.img_dir = '/home/matthew/Desktop/Datasets/DoTA/frames' 28 | 29 | d = DoTA(args, phase='train', n_frames=16, combined_bbox=True) 30 | 31 | 32 | # RESIZE TO 224 33 | 34 | lbls = [i[2] for i in d.data_list] 35 | 36 | w, h = [], [] 37 | 38 | for lbl in lbls: 39 | w1 = lbl[2] - lbl[0] 40 | h1 = lbl[3] - lbl[1] 41 | 42 | # RESIZE 43 | if image_size_wh[0] != 1280 or image_size_wh[1] != 720: 44 | w2 = image_size_wh[0] * w1 / 1280 45 | h2 = image_size_wh[1] * h1 / 720 46 | else: 47 | w2 = w1 48 | h2 = h1 49 | w.append(w2) 50 | h.append(h2) 51 | 52 | # w.append(lbl[2] - lbl[0]) 53 | # h.append(lbl[3] - lbl[1]) 54 | 55 | w = np.asarray(w) 56 | h = np.asarray(h) 57 | 58 | x = [w, h] 59 | x = np.asarray(x) 60 | x = x.transpose() 61 | ########################################## K- Means 62 | ########################################## 63 | 64 | from sklearn.cluster import KMeans 65 | 66 | kmeans3 = KMeans(n_clusters=num_anchors) 67 | kmeans3.fit(x) 68 | y_kmeans3 = kmeans3.predict(x) 69 | 70 | ########################################## 71 | centers3 = kmeans3.cluster_centers_ 72 | 73 | yolo_anchor_average = [] 74 | for ind in range(num_anchors): 75 | yolo_anchor_average.append(np.mean(x[y_kmeans3 == ind], axis=0)) 76 | 77 | yolo_anchor_average = np.array(yolo_anchor_average) 78 | 79 | plt.scatter(x[:, 0], x[:, 1], c=y_kmeans3, s=2, cmap='viridis') 80 | plt.scatter(yolo_anchor_average[:, 0], yolo_anchor_average[:, 1], c='red', s=50); 81 | yoloV3anchors = yolo_anchor_average 82 | yoloV3anchors[:, 0] = yolo_anchor_average[:, 0] / 1280 * 608 83 | yoloV3anchors[:, 1] = yolo_anchor_average[:, 1] / 720 * 608 84 | yoloV3anchors = np.rint(yoloV3anchors) 85 | fig, ax = plt.subplots() 86 | for ind in range(num_anchors): 87 | rectangle = plt.Rectangle((304 - yoloV3anchors[ind, 0] / 2, 304 - yoloV3anchors[ind, 1] / 2), yoloV3anchors[ind, 0], 88 | yoloV3anchors[ind, 1], fc='b', edgecolor='b', fill=None) 89 | ax.add_patch(rectangle) 90 | ax.set_aspect(1.0) 91 | plt.axis([0, 608, 0, 608]) 92 | plt.show() 93 | yoloV3anchors.sort(axis=0) 94 | print("Your custom anchor boxes for the original image space are {}".format(yoloV3anchors)) 95 | x = np.array([[0.70458, 1.18803], [1.26654, 2.55121], [1.59382, 4.08321], [2.30548, 4.94180], [3.52332, 5.91979]]) 96 | print("YOWO boxes for 224 image space are {}".format(x*32)) 97 | print('>>>') 98 | print('>>>') 99 | print('>>>') 100 | print("Your custom anchor boxes for 224 image space to a 7x7 feature map (i.e., 32x reduction) are {}".format(yoloV3anchors/32)) 101 | # YOWO anchor boxes 102 | x = np.array([[0.70458, 1.18803], [1.26654, 2.55121], [1.59382, 4.08321], [2.30548, 4.94180], [3.52332, 5.91979]]) 103 | print("YOWO boxes for 224 image space to a 7x7 feature map (i.e., 32x reduction) are {}".format(x)) 104 | --------------------------------------------------------------------------------