├── .gitignore
├── LICENSE
├── README.md
├── image
    ├── Building_Footprint_Extraction
    │   ├── README.md
    │   ├── dataset.py
    │   ├── metrics.py
    │   ├── model.py
    │   ├── requirements.txt
    │   ├── train.py
    │   ├── training.py
    │   └── utils.py
    ├── Defect_Detection
    │   ├── README.md
    │   ├── datasets.py
    │   ├── model.py
    │   ├── requirements.txt
    │   ├── train.py
    │   └── utils.py
    ├── Road_Obstacle_Detection
    │   ├── README.md
    │   ├── dice_loss.py
    │   ├── eval.py
    │   ├── lf_loader.py
    │   ├── train.py
    │   ├── train_loss.txt
    │   ├── utils.py
    │   └── val_loss.txt
    └── fastlane
    │   ├── Image_Classification
    │       └── ImageClassification.ipynb
    │   ├── OCR
    │       ├── OCR.ipynb
    │       ├── charnet
    │       │   ├── __init__.py
    │       │   ├── config
    │       │   │   ├── __init__.py
    │       │   │   └── defaults.py
    │       │   └── modeling
    │       │   │   ├── __init__.py
    │       │   │   ├── backbone
    │       │   │       ├── __init__.py
    │       │   │       ├── decoder.py
    │       │   │       ├── hourglass.py
    │       │   │       └── resnet.py
    │       │   │   ├── layers
    │       │   │       ├── __init__.py
    │       │   │       ├── misc.py
    │       │   │       └── scale.py
    │       │   │   ├── model.py
    │       │   │   ├── postprocessing.py
    │       │   │   ├── rotated_nms.py
    │       │   │   └── utils.py
    │       ├── configs
    │       │   └── icdar2015_hourglass88.yaml
    │       ├── datasets
    │       │   └── ICDAR2015
    │       │   │   └── test
    │       │   │       ├── GenericVocabulary.txt
    │       │   │       └── char_dict.txt
    │       ├── iou.py
    │       └── sample.jpg
    │   ├── Object_Detection
    │       ├── ObjectDetection.ipynb
    │       ├── dataset.py
    │       ├── models.py
    │       ├── skynews-boeing-737-plane_5435020.jpg
    │       └── utils.py
    │   └── README.md
└── video
    ├── Galbladder_Segmentation
        ├── Detectron2StepByStep.ipynb
        ├── DetectronGBScript.py
        ├── GallbladderFiles
        │   ├── NOGO 1-16424 via_project_20May2021_17h2m.json
        │   ├── NOGO 1_16504 via_project_22May2021_9h9m.json
        │   ├── NOGO1_319 via_project_14May2021_13h54m.json
        │   ├── d018a7fb_25Apr2021_13h18m36s nogo.json
        │   ├── nogo 1-450 via_project_18May2021_19h36m.json
        │   ├── nogo1_16859 via_project_22May2021_17h54m (66).json
        │   ├── nogo1_16859 via_project_22May2021_17h54m.json
        │   ├── nogo270via_project_11May2021_18h54m.json
        │   ├── nogo310via_project_13May2021_16h20m.json
        │   ├── via_project_13May2021_23h12m_nogo.json
        │   ├── via_project_26Apr2021_20h18m nogo231.json
        │   ├── via_project_2nogo.json
        │   ├── via_project_3May2021_17h41m_nogo (1).json
        │   ├── via_project_3May2021_17h41m_nogo.json
        │   ├── via_project_vid04_nogo.json
        │   ├── via_project_vid6_nogo_100.json
        │   ├── via_project_vid8_nogo_100.json
        │   ├── via_project_video_02_nogo_1-100.json
        │   ├── video20_03260.nogo.json
        │   ├── video_18_00979.nogo.json
        │   └── video_24_09836_nogo.json
        ├── README.md
        ├── _launch.sh
        ├── _runner.sh
        ├── augCoords.json
        ├── grandproj.env
        ├── jsonOutput
        │   ├── bladder_val_coco_format.json
        │   ├── bladder_val_coco_format.json.lock
        │   ├── coco_instances_results.json
        │   └── instances_predictions.pth
        ├── parameters.txt
        └── runt4v1Detectron.slrm
    └── Traffic_Incident_Detection
        ├── .gitignore
        ├── .idea
            ├── .gitignore
            ├── deployment.xml
            ├── inspectionProfiles
            │   └── profiles_settings.xml
            ├── misc.xml
            ├── modules.xml
            ├── vcs.xml
            ├── webServers.xml
            └── yowo.iml
        ├── README.md
        ├── backbones_2d
            ├── DeepLabV3PlusPytorch
            │   ├── LICENSE
            │   ├── README.md
            │   ├── datasets
            │   │   ├── __init__.py
            │   │   ├── cityscapes.py
            │   │   ├── utils.py
            │   │   └── voc.py
            │   ├── main.py
            │   ├── metrics
            │   │   ├── __init__.py
            │   │   └── stream_metrics.py
            │   ├── network
            │   │   ├── __init__.py
            │   │   ├── _deeplab.py
            │   │   ├── backbone
            │   │   │   ├── __init__.py
            │   │   │   ├── mobilenetv2.py
            │   │   │   └── resnet.py
            │   │   ├── modeling.py
            │   │   └── utils.py
            │   ├── predict.py
            │   ├── resnet_2d.ipynb
            │   └── utils
            │   │   ├── __init__.py
            │   │   ├── ext_transforms.py
            │   │   ├── loss.py
            │   │   ├── scheduler.py
            │   │   ├── utils.py
            │   │   └── visualizer.py
            └── darknet.py
        ├── backbones_3d
            ├── mobilenet.py
            ├── mobilenetv2.py
            ├── resnet.py
            ├── resnext.py
            ├── shufflenet.py
            └── shufflenetv2.py
        ├── cfg
            ├── ava.yaml
            ├── ava_categories_count.json
            ├── ava_categories_ratio.json
            ├── custom_config.py
            ├── defaults.py
            ├── dota_config.yaml
            ├── dota_train.yaml
            ├── jhmdb.yaml
            ├── parser.py
            ├── ucf24.yaml
            ├── ucf24_charmed-leaf-23_copy.yaml
            ├── ucf24_finalAnnots.mat
            ├── yolo.cfg
            └── yolo_cfg.py
        ├── core
            ├── FocalLoss.py
            ├── cfam.py
            ├── detection_visualization.py
            ├── detection_visualization_obj_anom.py
            ├── eval_results.py
            ├── model.py
            ├── optimization.py
            ├── plot_ava_result.py
            ├── region_loss.py
            └── utils.py
        ├── dataset_factory
            ├── ava_dataset.py
            ├── ava_eval_helper.py
            ├── ava_evaluation
            │   ├── README.md
            │   ├── __init__.py
            │   ├── label_map_util.py
            │   ├── metrics.py
            │   ├── np_box_list.py
            │   ├── np_box_list_ops.py
            │   ├── np_box_mask_list.py
            │   ├── np_box_mask_list_ops.py
            │   ├── np_box_ops.py
            │   ├── np_mask_ops.py
            │   ├── object_detection_evaluation.py
            │   ├── per_image_evaluation.py
            │   └── standard_fields.py
            ├── ava_helper.py
            ├── clip.py
            ├── cv2_transform.py
            ├── dataset_utils.py
            ├── dota.py
            ├── generate_anchors.py
            ├── image.py
            ├── list_dataset.py
            ├── logging.py
            ├── meters.py
            └── transform.py
        ├── dota_anchors.py
        ├── dota_dl.ipynb
        ├── main.py
        ├── main_dota.py
        ├── test_video_ava.py
        └── video_mAP.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | Image_Segmentation/Defect_Detection/ckpt
 3 | *__pycache__
 4 | Image_Segmentation/Defect_Detection/samples
 5 | Image_Segmentation/Building_Footprint_Extraction/samples
 6 | Image_Segmentation/Building_Footprint_Extraction/ckpt
 7 | Image_Segmentation/Road_Obstacle_Detection/ckpt
 8 | Image_Segmentation/Road_Obstacle_Detection/samples
 9 | *.ipynb_checkpoints
10 | Image_Segmentation/Road_Obstacle_Detection/test.ipynb
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Vector Institute
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Computer Vision Project
 2 | 
 3 | This project involved facilitating knowledge transfer between Vector and its industry sponsors. Specifically, the objectives were following: 
 4 | 
 5 | 1. Learn about recent advances in deep learning for computer vision
 6 | 2. Apply methods to novel use cases in industry
 7 | 
 8 | Several use cases involving both images and videos are explored. These use-cases reflected current industry needs, participants’ interests and expertise, and opportunities to translate academic advances into real-world applications: 
 9 | 
10 | **Image Use Cases**
11 | 1. Unsupervised defect detection in manufacturing using autoencoders
12 | 2. Building footprint extraction using semantic segmentation
13 | 3. Road Obstactle Detection using semantic segmentation
14 | 
15 | **Video Use Cases**
16 | 1. Semantic segmentation of videos from cholecystectomy procedures (gallbladder surgery)
17 | 2. Traffic incident detection of videos using augment
18 | 
19 | ## Additional Tooling
20 | In addition, the AI Engineering team has created a separate repository that works as a tool-kit the Computer Vision project at Vector Institute. It includes various datasets readily loadable from the shared cluster as well as useful image/video tools such as data augmentation and visualization utilities.You can find the repository at https://github.com/VectorInstitute/vector_cv_tools
21 | 
22 | ## Usage 
23 | Each folder corresponding to a use case includes instructions to run the experiments. It should be noted that this repository is no longer maintained and solely serves as an artifact of the project. 
24 | 
25 | ## Citations
26 | Please ensure you cite [Computer Vision: Applications in Manufacturing, Surgery, Traffic, Satellites, and Unlabelled Data Recognition Technical Report](https://vectorinstitute.ai/wp-content/uploads/2022/05/computer_vision_project_report_may252022.pdf) whenever you are citing this GitHub repository
27 | 
28 | ## Acknowledgements 
29 | Many thanks to our sponsor companies, researchers and Vector Institute staff for making this collaboration possible and providing academic support and computing infrastructure during all phases of this work. We would specifically like to thank the following individuals for their contributions. 
30 | 
31 | * Elham Ahmadi
32 | * Andrew Alberts-Scherer
33 | * Raghav Goyal
34 | * John Jewell
35 | * Shuja Khalid
36 | * Matthew Kowal
37 | * Andriy Levitskyy
38 | * Jinbiao Ning
39 | * Tristan Trim
40 | * Kuldeep Panjwani
41 | * Saeed Pouryazdian
42 | * Sim Sachar
43 | * Yilei Wu
44 | * An Zhou
45 | 


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/README.md:
--------------------------------------------------------------------------------
 1 | # Building Footprint Extraction
 2 | 
 3 | ## Overview
 4 | 
 5 | As high resolution satellite imagery becomes increasingly available in both the public and private domain, a number of beneficial applications that leverage this data are enabled. Extraction of building footprints in satellite imagery is a core component of many downstream applications of satellite imagery such as humanitarian assistance and disaster response. This paper offers a comparative study of methods for building footprint extraction in satellite imagery. The focus is to explore state-of-the-art semantic segmentation models in computer vision using the SpaceNet 2 Building Detection Dataset. Four high-level approaches, and six total variants, are trained and evaluated including U-Net, UNet++, Fully Convolutional Networks (FCN) and DeepLabv3. The Intersection over Union (IoU) is used to quantify the segmentation performance on a held out test set. In our experiments, we found that Deeplabv3 with a Resnet-101 backbone is the most accurate approach to building footprint extraction out of the surveyed methods. In general, models that leverage pretraining achieve high accuracy and require minimal training. Conversely, models that do not leverage pretraining are inaccurate and require longer training regimes. 
 6 | 
 7 | ## Dataset 
 8 | In order to benchmark the aforementioned approaches on building footprint extraction in satellite images, the [SpaceNet Building Detection V2 dataset](https://spacenet.ai/spacenet-buildings-dataset-v2/) is used. This dataset contains high resolution satellite imagery and corresponding labels that specify the location of building footprints. The dataset includes 302,701 Building Labels from across 10,593 multi-spectral satellite images of Vegas, Paris, Shanghai and Khartoum. The labels are binary and indicate whether each pixel is building or background. 
 9 | 
10 | <p align="center">
11 | <img width="600" alt="Screen Shot 2021-09-28 at 5 41 20 PM" src="https://user-images.githubusercontent.com/34798787/160892992-c6586c15-308f-481c-a2d9-61f5f46cdc8e.png">  
12 |     <br>
13 | <div align="center"> 
14 |    <b> Figure 1:</b>  An example of images (left) and labels (right) in the Spacenet Building
15 | Detection V2.
16 | </div> 
17 | </p>
18 | 
19 | ## Experimental Setup 
20 | 
21 | The dataset is divided into training (80%), validating (10%) and testing (10%) sets. Images are resized from 650x650 to 384x384 using bi-cubic interpolation and normalized using the mean and standard deviation of the Imagenet dataset.
22 | The proposed semantic segmentation models are trained on the training set, while the validating set is used to determine a stopping criteria. Lastly, the trained model is evaluated on the testing set. Intersection over Union (IoU) is the metric used to evaluate the model performance and measures the overlap between the labels of the prediction and ground truth. IoU ranges from 0 to 1 where 1 denotes perfect and complete overlap.
23 | 
24 | ## Results 
25 | 
26 | <p align="center">
27 | <img width="200" alt="Screen Shot 2021-09-28 at 5 41 20 PM" src="https://user-images.githubusercontent.com/34798787/160891571-1c38cdc6-a2ae-4b00-af71-c85dd50603e1.png">  
28 |     <br>
29 | <div align="center"> 
30 |    <b> Figure 2:</b> IOU score on test set for each approach.
31 | </div> 
32 | </p>
33 | 
34 | <p align="center">
35 | <img width="1000" alt="Screen Shot 2021-09-28 at 5 41 20 PM" src="https://user-images.githubusercontent.com/34798787/160889601-98814c3e-47e8-45f4-9eb7-18f93daebf75.jpg">  
36 |     <br> 
37 | <div align="center"> 
38 |     <b>Figure 3: </b> A visualization of the predictions generated by each approach along with the input image (far left) and ground truth label (far right).
39 | </div> 
40 | </p>
41 | 
42 | <p align="center">
43 | <img width="600" alt="Screen Shot 2021-09-28 at 5 41 20 PM" src="https://user-images.githubusercontent.com/34798787/160892139-b52cc258-651b-40b6-8f00-ae2deed1de7f.png">  
44 |     <br> 
45 | <div align="center"> 
46 |     <b>Figure 4: </b> Binary cross entropy loss for training set (top) and validation set
47 | (bottom) across epochs.
48 | </div> 
49 | </p>
50 | 
51 | ## Running Code
52 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 
53 | 
54 | ```
55 | conda create -n new_env
56 | conda activate new_env 
57 | pip install -r requirements.txt
58 | ```
59 | 
60 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 
61 | ```
62 | python train.py --model fcn50 --epochs 10 --batch_size 4 --data_path /path/to/spacenet
63 | ```
64 | 
65 | The **train.py** script has the following arguments: 
66 | - **model**:        (str): Architecture variation for experiments. *required*
67 | - **data_path**    (str): The root directory of the dataset. *required*
68 | - **epochs**        (int): The number of epochs to train the model. Default 25
69 | - **batch_size**    (int) The batch size for training, validation and testing. Default 8
70 | - **learning_rate** (float): Learning rates of model. Default .0001
71 | - **size**          (int): Side length of input image. Default 384 
72 | - **train_perc**   (float): The proportion of samples used for train. Default .8
73 | - **val_perc**    (float): The proportion of samples used for validation. Default .1
74 | 


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | from PIL import Image
 8 | from torch.utils.data.dataset import Dataset
 9 | 
10 | class SpaceNet_Dataset(Dataset): 
11 |     def __init__(self, img_dir_list, mask_dir_list, img_transform = None, mask_transform=None):
12 |         self.img_dir_list = img_dir_list
13 |         self.mask_dir_list = mask_dir_list
14 |         
15 |         img_paths, mask_paths = [], []
16 |         
17 |         for img_dir, mask_dir in zip(img_dir_list, mask_dir_list):
18 |             img_paths += [f"{img_dir}/{img_file}" for img_file in os.listdir(img_dir)]
19 |             mask_paths += [f"{mask_dir}/{mask_file}" for mask_file in os.listdir(mask_dir)]
20 |         
21 |         self.img_paths = sorted(img_paths) 
22 |         self.mask_paths = [f"{new_mask_path}_mask.png" for new_mask_path in sorted([mask_path[:-9] for mask_path in mask_paths])]
23 |         
24 |         self.img_transform = img_transform
25 |         self.mask_transform = mask_transform
26 | 
27 |         
28 |     
29 |     def __len__(self):
30 |         return len(self.img_paths)
31 |     
32 |     def __getitem__(self, index):
33 |         img_path = self.img_paths[index]
34 |         mask_path = self.mask_paths[index]
35 |         
36 |         img = Image.open(img_path)
37 |         img = self.img_transform(img) 
38 |         
39 |         mask = Image.open(mask_path).convert("1")
40 |         mask = self.mask_transform(mask)
41 |         mask = torch.from_numpy(np.array(mask).astype(int)).unsqueeze(0)
42 |        
43 |         return img, mask


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | class DiceCoeff(Function):
 6 |     """Dice coeff for individual examples"""
 7 | 
 8 |     def forward(self, input, target):
 9 |         self.save_for_backward(input, target)
10 |         eps = 0.0001
11 |         self.inter = torch.dot(input.view(-1), target.view(-1))
12 |         self.union = torch.sum(input) + torch.sum(target) + eps
13 | 
14 |         t = (2 * self.inter.float() + eps) / self.union.float()
15 |         return t
16 | 
17 |     # This function has only a single output, so it gets only one gradient
18 |     def backward(self, grad_output):
19 | 
20 |         input, target = self.saved_variables
21 |         grad_input = grad_target = None
22 | 
23 |         if self.needs_input_grad[0]:
24 |             grad_input = grad_output * 2 * (target * self.union - self.inter) \
25 |                          / (self.union * self.union)
26 |         if self.needs_input_grad[1]:
27 |             grad_target = None
28 | 
29 |         return grad_input, grad_target
30 | 
31 | 
32 | def dice_coeff(input, target):
33 |     """Dice coeff for batches"""
34 |     if input.is_cuda:
35 |         s = torch.FloatTensor(1).cuda().zero_()
36 |     else:
37 |         s = torch.FloatTensor(1).zero_()
38 | 
39 |     for i, c in enumerate(zip(input, target)):
40 |         s = s + DiceCoeff().forward(c[0], c[1])
41 | 
42 |     return s / (i + 1)
43 | 
44 | 
45 | 
46 | class IoU(Function):
47 |     """IoU for individual examples"""
48 |     def forward(self, input, target):
49 |         eps = 0.0001
50 |         self.inter = torch.dot(input.view(-1), target.view(-1))
51 |         self.union = torch.sum(input) + torch.sum(target) + eps-self.inter
52 | 
53 |         t = (self.inter.float()+eps) / self.union.float()
54 |         return t
55 | 
56 | def iou(input, target):
57 |     """IoU for batches"""
58 |     if input.is_cuda:
59 |         s = torch.FloatTensor(1).cuda().zero_()
60 |     else:
61 |         s = torch.FloatTensor(1).zero_()
62 | 
63 |     for i, c in enumerate(zip(input, target)):
64 |         s = s + IoU().forward(c[0], c[1])
65 | 
66 |     return s / (i + 1)


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.11.0
  2 | alabaster==0.7.12
  3 | anaconda-client==1.7.2
  4 | anaconda-navigator==1.7.0
  5 | anaconda-project==0.8.3
  6 | appdirs==1.4.3
  7 | asn1crypto==0.24.0
  8 | astor==0.8.1
  9 | astroid==2.2.5
 10 | astropy==3.2.1
 11 | astunparse==1.6.3
 12 | atomicwrites==1.3.0
 13 | attrs==19.1.0
 14 | Automat==0.7.0
 15 | Babel==2.7.0
 16 | backcall==0.1.0
 17 | backports.os==0.1.1
 18 | backports.shutil-get-terminal-size==1.0.0
 19 | beautifulsoup4==4.7.1
 20 | bitarray==0.9.3
 21 | bkcharts==0.2
 22 | blaze==0.11.3
 23 | bleach==3.1.0
 24 | bokeh==1.2.0
 25 | boto==2.49.0
 26 | Bottleneck==1.2.1
 27 | cachetools==4.2.1
 28 | certifi==2019.6.16
 29 | cffi==1.12.3
 30 | chardet==3.0.4
 31 | Click==7.0
 32 | cloudpickle==1.6.0
 33 | clyent==1.2.2
 34 | colorama==0.4.1
 35 | conda==4.7.12
 36 | conda-build==3.17.6
 37 | conda-package-handling==1.6.0
 38 | conda-verify==3.1.1
 39 | constantly==15.1.0
 40 | contextlib2==0.5.5
 41 | convertdate==2.3.2
 42 | cryptography==2.7
 43 | cycler==0.10.0
 44 | Cython==0.29.12
 45 | cytoolz==0.10.0
 46 | dask==2.1.0
 47 | dataclasses==0.8
 48 | datashape==0.5.4
 49 | decorator==4.4.0
 50 | defusedxml==0.6.0
 51 | distributed==2.1.0
 52 | dm-tree==0.1.5
 53 | docutils==0.14
 54 | entrypoints==0.3
 55 | et-xmlfile==1.0.1
 56 | fastcache==1.1.0
 57 | filelock==3.0.12
 58 | Flask==1.1.1
 59 | Flask-Cors==3.0.7
 60 | flatbuffers==1.12
 61 | fredapi==0.4.3
 62 | future==0.17.1
 63 | gast==0.3.3
 64 | gevent==1.4.0
 65 | glob2==0.7
 66 | gluonts==0.8.1
 67 | gmpy2==2.0.8
 68 | google-auth==1.27.0
 69 | google-auth-oauthlib==0.4.2
 70 | google-pasta==0.2.0
 71 | googledrivedownloader==0.4
 72 | graphviz==0.8.4
 73 | greenlet==0.4.15
 74 | grpcio==1.32.0
 75 | h5py==2.10.0
 76 | heapdict==1.0.0
 77 | hijri-converter==2.2.2
 78 | holidays==0.11.3.1
 79 | html5lib==1.0.1
 80 | hyperlink==18.0.0
 81 | idna==2.8
 82 | imageio==2.5.0
 83 | imagesize==1.1.0
 84 | importlib-metadata==0.17
 85 | incremental==17.5.0
 86 | ipykernel==5.1.1
 87 | ipython==7.6.1
 88 | ipython_genutils==0.2.0
 89 | ipywidgets==7.5.0
 90 | isodate==0.6.0
 91 | isort==4.3.21
 92 | itsdangerous==1.1.0
 93 | jdcal==1.4.1
 94 | jedi==0.13.3
 95 | jeepney==0.4
 96 | Jinja2==2.10.1
 97 | joblib==0.13.2
 98 | json5==0.8.4
 99 | jsonschema==3.0.1
100 | jupyter==1.0.0
101 | jupyter-client==5.3.1
102 | jupyter-console==6.0.0
103 | jupyter-core==4.5.0
104 | jupyterlab==1.0.2
105 | jupyterlab-launcher==0.13.1
106 | jupyterlab-server==1.0.0
107 | Keras-Applications==1.0.8
108 | Keras-Preprocessing==1.1.2
109 | keyring==18.0.0
110 | kiwisolver==1.1.0
111 | korean-lunar-calendar==0.2.1
112 | lazy-object-proxy==1.4.1
113 | libarchive-c==2.8
114 | lief==0.9.0
115 | lightgbm==3.3.2
116 | llvmlite==0.29.0
117 | locket==0.2.0
118 | lxml==4.3.4
119 | Markdown==3.3.3
120 | MarkupSafe==1.1.1
121 | matplotlib==3.1.0
122 | mccabe==0.6.1
123 | mistune==0.8.4
124 | mkl-fft==1.0.12
125 | mkl-random==1.0.2
126 | mkl-service==2.0.2
127 | mock==3.0.5
128 | more-itertools==7.0.0
129 | mpmath==1.1.0
130 | msgpack==0.6.1
131 | multipledispatch==0.6.0
132 | mxnet-cu112==1.8.0.post0
133 | navigator-updater==0.1.0
134 | nbconvert==5.5.0
135 | nbformat==4.4.0
136 | networkx==2.3
137 | nltk==3.4.4
138 | nose==1.3.7
139 | notebook==6.0.0
140 | numba==0.45.0
141 | numexpr==2.6.9
142 | numpy==1.19.5
143 | numpydoc==0.9.1
144 | oauthlib==3.1.0
145 | odo==0.5.1
146 | olefile==0.46
147 | openpyxl==2.6.2
148 | opt-einsum==3.3.0
149 | packaging==19.0
150 | pandas==1.1.5
151 | pandocfilters==1.4.2
152 | parso==0.5.0
153 | partd==1.0.0
154 | path.py==12.0.1
155 | pathlib2==2.3.4
156 | patsy==0.5.1
157 | pep8==1.7.1
158 | pexpect==4.7.0
159 | pickleshare==0.7.5
160 | Pillow==8.2.0
161 | pkginfo==1.5.0.1
162 | plotly==5.6.0
163 | plotly-express==0.4.1
164 | pluggy==0.12.0
165 | ply==3.11
166 | prometheus-client==0.7.1
167 | promise==2.3
168 | prompt-toolkit==2.0.9
169 | protobuf==3.15.1
170 | psutil==5.6.3
171 | ptyprocess==0.6.0
172 | py==1.8.0
173 | pyasn1==0.4.4
174 | pyasn1-modules==0.2.2
175 | pycodestyle==2.5.0
176 | pycosat==0.6.3
177 | pycparser==2.19
178 | pycrypto==2.6.1
179 | pycurl==7.43.0.3
180 | pydantic==1.8.2
181 | pyflakes==2.1.1
182 | Pygments==2.4.2
183 | pylint==2.3.1
184 | PyMeeus==0.5.11
185 | pyodbc==4.0.26
186 | pyOpenSSL==19.0.0
187 | pyparsing==2.4.0
188 | pyrsistent==0.14.11
189 | PySocks==1.7.0
190 | pytest==5.0.1
191 | pytest-arraydiff==0.3
192 | pytest-astropy==0.5.0
193 | pytest-doctestplus==0.3.0
194 | pytest-openfiles==0.3.2
195 | pytest-remotedata==0.3.1
196 | python-dateutil==2.8.0
197 | pytz==2019.1
198 | PyWavelets==1.0.3
199 | PyYAML==5.1.1
200 | pyzmq==18.0.0
201 | QtAwesome==0.5.7
202 | qtconsole==4.5.1
203 | QtPy==1.8.0
204 | rdflib==5.0.0
205 | requests==2.22.0
206 | requests-oauthlib==1.3.0
207 | rope==0.14.0
208 | rsa==4.7.1
209 | ruamel_yaml==0.15.46
210 | scikit-image==0.15.0
211 | scikit-learn==0.21.2
212 | scipy==1.4.1
213 | seaborn==0.9.0
214 | SecretStorage==3.1.1
215 | Send2Trash==1.5.0
216 | service-identity==17.0.0
217 | simplegeneric==0.8.1
218 | singledispatch==3.4.0.3
219 | six==1.15.0
220 | snowballstemmer==1.9.0
221 | sortedcollections==1.1.2
222 | sortedcontainers==2.1.0
223 | soupsieve==1.8
224 | Sphinx==2.1.2
225 | sphinxcontrib-applehelp==1.0.1
226 | sphinxcontrib-devhelp==1.0.1
227 | sphinxcontrib-htmlhelp==1.0.2
228 | sphinxcontrib-jsmath==1.0.1
229 | sphinxcontrib-qthelp==1.0.2
230 | sphinxcontrib-serializinghtml==1.1.3
231 | sphinxcontrib-websupport==1.1.2
232 | spyder==3.3.6
233 | spyder-kernels==0.5.1
234 | SQLAlchemy==1.3.5
235 | statsmodels==0.10.0
236 | sympy==1.4
237 | tables==3.5.2
238 | tblib==1.4.0
239 | tenacity==8.0.1
240 | tensorboard==2.4.1
241 | tensorboard-plugin-wit==1.8.0
242 | tensorflow==2.4.1
243 | tensorflow-estimator==2.4.0
244 | tensorflow-gpu==2.4.1
245 | tensorflow-probability==0.12.1
246 | termcolor==1.1.0
247 | terminado==0.8.2
248 | testpath==0.4.2
249 | toolz==0.10.0
250 | torch-scatter==2.0.7
251 | tornado==6.0.3
252 | tqdm==4.32.1
253 | traitlets==4.3.2
254 | Twisted==18.7.0
255 | typed-ast==1.3.4
256 | typing==3.6.2
257 | typing-extensions==3.10.0.2
258 | unicodecsv==0.14.1
259 | urllib3==1.24.2
260 | wcwidth==0.1.7
261 | webencodings==0.5.1
262 | Werkzeug==0.15.4
263 | widgetsnbextension==3.5.0
264 | wrapt==1.12.1
265 | wurlitzer==1.0.2
266 | xlrd==1.2.0
267 | XlsxWriter==1.1.8
268 | xlwt==1.3.0
269 | yacs==0.1.8
270 | zict==1.0.0
271 | zipp==0.5.1
272 | zope.interface==4.5.0
273 | 


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/training.py:
--------------------------------------------------------------------------------
 1 | import torch, numpy as np
 2 | from utils import save_viz
 3 | from metrics import iou
 4 | 
 5 | def get_label_dist(loader):
 6 |     count_list = []
 7 |     for _, (_, lbl) in enumerate(loader):
 8 |         cnt = torch.bincount(lbl.int().flatten())
 9 |         count_list.append(cnt)
10 | 
11 |     cnts = torch.stack(count_list, dim=0).sum(dim=0).tolist()
12 |     zero_count, one_count = cnts[0], cnts[1]
13 |     perc = zero_count / (zero_count + one_count)
14 |     return perc
15 | 
16 | 
17 | def train_fn(loader, model, opt, loss_fn, device):
18 |     loss_list = []
19 |     for batch_id, (data, targets) in enumerate(loader):
20 |         data = data.to(device=device)
21 |         targets = targets.float().to(device)
22 |         predictions = model(data)['out']
23 |         loss = loss_fn(predictions, targets)
24 |         opt.zero_grad()
25 |         loss.backward()
26 |         opt.step()
27 |         loss_list.append(loss.item())
28 | 
29 |     mean_loss = np.mean(loss_list)
30 |     return mean_loss
31 | 
32 | 
33 | def val_fn(loader, model, loss_fn, device, color_map, sample_path, epoch, perc, viz):
34 |     loss_list = []
35 |     for batch_id, (data, targets) in enumerate(loader):
36 |         data = data.to(device=device)
37 |         targets = targets.float().to(device=device)
38 |         with torch.no_grad():
39 |             predictions = model(data)['out']
40 |             loss = loss_fn(predictions, targets)
41 |         loss_list.append(loss.item())
42 |         if viz:
43 |             save_viz(data, predictions, targets, color_map, epoch, sample_path, perc)
44 |             viz = False
45 | 
46 |     mean_loss = np.mean(loss_list)
47 |     return mean_loss
48 | 
49 | 
50 | def test_fn(loader, model, loss_fn, device, perc):
51 |     target_list, pred_list = [], []
52 |     for batch_id, (data, targets) in enumerate(loader):
53 |         data = data.to(device=device)
54 |         targets = targets.float().to(device=device)
55 |         with torch.no_grad():
56 |             pred = model(data)['out']
57 |         pred_list.append(pred)
58 |         target_list.append(targets)
59 | 
60 |     pred = torch.cat(pred_list, dim=0)
61 |     target = torch.cat(target_list, dim=0)
62 |     thresh = np.quantile(pred.flatten().cpu().numpy(), perc)
63 |     test_loss = loss_fn(pred, target).item()
64 |     pred = (pred > thresh).float()
65 |     test_iou = iou(pred, target).item()
66 |     return (
67 |      test_loss, test_iou)


--------------------------------------------------------------------------------
/image/Building_Footprint_Extraction/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, deeplabv3_resnet50, deeplabv3_resnet101
  4 | from torchvision.models.segmentation.deeplabv3 import DeepLabHead
  5 | from torchvision.models.segmentation.fcn import FCNHead
  6 | 
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from model import UNET, UNETPlus
 11 | 
 12 | def get_model(model_type, pretrained):
 13 |     model = None
 14 |     if model_type == "fcn50":
 15 |         model = get_model_fcn50(pretrained)
 16 |         
 17 |     elif model_type == "fcn101":
 18 |         model = get_model_fcn101(pretrained)
 19 |      
 20 |     elif model_type == "dlv350": 
 21 |         model = get_model_dlv350(pretrained)
 22 |     
 23 |     elif model_type == "dlv3101": 
 24 |         model = get_model_dlv3101(pretrained)
 25 |      
 26 |     elif model_type == "unet":
 27 |         model = UNET(in_channels=3, out_channels=1)
 28 |      
 29 |     elif model_type == "unetplus":
 30 |         model = UNETPlus(n_channels=3, n_classes=1)
 31 |     
 32 |     return model
 33 | 
 34 | def get_model_fcn50(pretrained=True, c_out=1):
 35 |     # Prepare Model and Save to Checkpoint Directory
 36 |     model = fcn_resnet50(pretrained=pretrained)
 37 | 
 38 |     model.classifier = FCNHead(2048, c_out)
 39 |     model.aux_classifier = None
 40 |     model = nn.DataParallel(model)
 41 |     return model
 42 | 
 43 | def get_model_fcn101(pretrained=True, c_out=1):
 44 |     # Prepare Model and Save to Checkpoint Directory
 45 |     model = fcn_resnet101(pretrained=pretrained)
 46 |     model.classifier = FCNHead(2048, c_out)
 47 |     model.aux_classifier = None
 48 |     model = nn.DataParallel(model)
 49 |     return model
 50 | 
 51 | def get_model_dlv350(pretrained=True, c_out=1):
 52 |     # Prepare Model and Save to Checkpoint Directory
 53 |     model = deeplabv3_resnet50(pretrained=pretrained)
 54 |     model.classifier = DeepLabHead(2048, c_out)
 55 |     model.aux_classifier = None
 56 |     model = nn.DataParallel(model)
 57 | 
 58 |     return model
 59 | 
 60 | def get_model_dlv3101(pretrained=True, c_out=1):
 61 |     # Prepare Model and Save to Checkpoint Directory
 62 |     model = deeplabv3_resnet101(pretrained=pretrained)
 63 |     model.classifier = DeepLabHead(2048, c_out)
 64 |     model.aux_classifier = None
 65 |     model = nn.DataParallel(model)
 66 |     return model
 67 | 
 68 | def save_checkpoint(model, opt, epoch, path, train_loss_list=[], val_loss_list=[]):
 69 |     """Save Checkpoint"""
 70 | 
 71 |     torch.save({
 72 |         "model": model.state_dict(),
 73 |         "opt": opt.state_dict(),
 74 |         "epoch": epoch,
 75 |         "train_loss_list": train_loss_list,
 76 |         "val_loss_list": val_loss_list
 77 |         },
 78 |         path)
 79 | 
 80 | 
 81 | def save_viz(img, out, lbl, color_map, epoch, sample_path, perc):
 82 |     img = img.cpu().numpy()
 83 |     out = out.cpu().numpy()
 84 |     lbl = lbl.cpu().numpy()
 85 |     
 86 |     thresh = np.quantile(out, perc)
 87 |     
 88 |     print("thresh", thresh) 
 89 |     
 90 |     img = (img - np.min(img)) / (np.max(img) - np.min(img))
 91 |     rows = out.shape[2]
 92 |     cols = out.shape[3]
 93 | 
 94 |     masks = []
 95 |     masks_gt = []
 96 |     for index, (im, o, l) in enumerate(zip(img, out, lbl)):
 97 |         o, l = o.squeeze(), l.squeeze()
 98 |         
 99 |         o = (o > thresh).astype(int) 
100 |  
101 | 
102 |         mask = np.zeros((rows, cols, 3), dtype=np.uint8)
103 |         mask_gt = np.zeros((rows, cols, 3), dtype=np.uint8)
104 | 
105 |         for j in range(rows):
106 |             for i in range(cols):
107 |                 mask[j, i] = color_map[o[j, i]]
108 |                 mask_gt[j, i] = color_map[l[j, i]]
109 |         
110 | 
111 | 
112 |         f, axarr = plt.subplots(1, 3, figsize=(20, 20))
113 |         im = np.moveaxis(im, 0, -1)
114 |         axarr[0].imshow(im)
115 |         axarr[0].title.set_text('Image')
116 |         axarr[1].imshow(mask_gt)
117 |         axarr[1].title.set_text('Label')
118 |         axarr[2].imshow(mask)
119 |         axarr[2].title.set_text('Prediction')
120 |         f.savefig( f"{sample_path}/epoch_{str(epoch)}_{str(index)}.jpg")


--------------------------------------------------------------------------------
/image/Defect_Detection/README.md:
--------------------------------------------------------------------------------
 1 | # Defect Detection
 2 | 
 3 | ## Overview
 4 | 
 5 | Anomaly detection is an important task in computer vision that is concerned with identifying anomalous images given a training set of only normal images. In anomaly segmentation, the concept of anomaly detection is extended to the pixel level in order to identify anomalous regions of images. There are many applications to anomaly detection including biomedical image segmentation, video surveillance and defect detection. In particular, defect detection involves detecting abnormalities in manufacturing components and so is widely used in the industry to enhance quality assurance and efficiency in the production process \cite{bergmann2019mvtec}. However, having a person manually inspect each component is not feasible in most cases. To address this, systems have been proposed to automate the detection of defective components. These approaches generally take as input an image of a component and output a label or pixel-level mask that predicts whether the image or pixel is anomalous. Although initial approaches were generally ineffective, newer, deep learning based approaches have shown very strong performance in anomaly detection and segmentation. Thus, these new methods have the potential to dramatically increase quality assurance and efficiency. In order to compare anomaly detection methods, several datasets have been proposed as benchmarks such as MNIST, CIFAR, and UCSD, whereas there are much fewer benchmark datasets for the anomaly segmentation task. To address this, the MVTec Anomaly Detection Dataset was recently introduced as a benchmark for anomaly segmentation. 
 6 | 
 7 | MVTec is focused on industrial inspection; consisting of a training set of normal images of objects and textures as well as a test set with both normal and anomalous samples along with their corresponding labels. There are over 70 different types of defects across the anomalous images that are typical in the manufacturing process. The quality and practical nature of the MVTec dataset has made it a popular benchmark for recently proposed anomaly segmentation methods. The goal of this focus phase of the project is to apply state-of-the-art methods to accurately segment anomalies in the MVTec dataset. In doing so, we compared the performance of different anomaly segmentation methods in the industrial inspection setting. Additionally, we sought to optimize the performance of the methods  by altering the hyperparameters and architectures of the approaches. 
 8 | 
 9 | ## Dataset 
10 | The MVTec anomaly detection dataset contains 5354 high-resolution images from 15 different object categories and includes 70 different types of defects across the anomalous images that are typical in the manufacturing process. For each object category, a training set of normal images of objects and textures as well as a test set with both normal and anomalous samples along with their corresponding labels.
11 |   
12 | <p align="center">
13 | <img width="600" alt="mvtec dataset" src="https://user-images.githubusercontent.com/34798787/162048399-331745f0-1924-4323-af32-8174b5913ccf.png">  
14 |     <br>
15 | <div align="center"> 
16 |    <b> Figure 1:</b>  An example of inlier images (left) and labels (right) for multiple object categories in the MVTec dataset.
17 | </div> 
18 | </p>
19 | 
20 | ## Experimental Setup 
21 | The MVTEC dataset object categories each include a train set of normal samples and a test set of both normal and anomalous samples. Models were optimized to be able to reconstruct samples from the inlier distribution during the training phase. Subsequently, at test time, both normal and anomalous images are input to the model and the pixelwise reconstruction error of samples is used to identify anomalous regions. Specifically, the models were evaluated on the testing data for each of the object categories and the average area under the ROC curve (AUC) is reported. A small validation set of normal images is used to determine which model step yields the most optimal set of parameters. Specifically, 10\% of images were randomly removed from the train set and used as the validation set. For testing, the entire test set was used and the average AUC across object categories is reported for each method. 
22 |   
23 | ## Results 
24 | 
25 | <p align="center">
26 | <img width="600" alt="vtec results" src="https://user-images.githubusercontent.com/34798787/162048982-38f64064-0893-440b-8ad9-9677d907d6ad.png">  
27 |     <br>
28 | <div align="center"> 
29 |    <b> Figure 2:</b> Avergae AUC score on test set for each approach.
30 | </div> 
31 | </p>
32 | 
33 | <p align="center">
34 | <img width="600" alt="mvtec visual result" src="https://user-images.githubusercontent.com/34798787/162049359-00a997e7-69ef-42d9-852f-b0e9e98c242d.png">  
35 |     <br> 
36 | <div align="center"> 
37 |     <b>Figure 3: </b> A visualization of the predictions generated by the network for an anomolous sample. 
38 | </div> 
39 | </p>
40 | 
41 | ## Running Code
42 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 
43 | 
44 | ```
45 | conda create -n new_env
46 | conda activate new_env 
47 | pip install -r requirements.txt
48 | ```
49 | 
50 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 
51 | ```
52 | python train.py --model vae --epochs 10 --ckpt_path /path/to/checkpoint/folder --dataset_path /path/to/mvtec
53 | ```
54 | 
55 | The **train.py** script has the following arguments: 
56 | - **model**:        (str): Architecture variation for experiments. ae or vae. *required*
57 | - **data_path**    (str): The root directory of the dataset. *required*
58 | - **ckpt_path**    (str): The directory to save model checkpoints. *required*
59 | - **epochs**        (int): The number of epochs to train the model. Default 100
60 | - **batch_size**    (int) The batch size for trainingtesting. Default 32
61 | - **learning_rate** (float): Learning rates of model. Default .001
62 | - **size**          (int): Side length of input image Default 128
63 | 
64 | 


--------------------------------------------------------------------------------
/image/Defect_Detection/datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import glob
 4 | from PIL import Image
 5 | import numpy as np
 6 | 
 7 | from torch.utils.data import Dataset
 8 | 
 9 | class MVTecADDataset(Dataset):
10 |     def __init__(self, img_dir, mode, transform, size=128):
11 |         self.img_dir = img_dir
12 |         self.mode = mode
13 |         self.size = size
14 | 
15 |         if self.mode == "train":
16 |             self.img_paths = glob.glob(f"{self.img_dir}/train/good/*.png")
17 |         else:
18 | 
19 |             paths = glob.glob(f"{self.img_dir}/test/*/*.png")
20 |             
21 |             inlier_img_paths = glob.glob(f"{self.img_dir}/test/good/*.png")
22 |             outlier_img_paths = list(set(paths) - set(inlier_img_paths))
23 |             self.img_paths = inlier_img_paths + outlier_img_paths
24 |             self.outlier_lbl_paths = [f"{self.img_dir}/ground_truth/{path.split('/')[-2]}/{path.split('/')[-1][:-4]}_mask.png" for path in outlier_img_paths]
25 |             
26 |             self.outlier_lbl = np.array([np.array(Image.open(path).convert('1').resize((self.size, self.size))) for path in self.outlier_lbl_paths])
27 |             
28 |         
29 |             self.inlier_lbl = np.zeros(shape=(len(inlier_img_paths), self.outlier_lbl.shape[1], self.outlier_lbl.shape[2]))
30 |             
31 |             self.labels = torch.from_numpy(np.concatenate([self.inlier_lbl, self.outlier_lbl])).int()
32 |   
33 | 
34 |         self.transform = transform
35 | 
36 |     def __getitem__(self, index):
37 |         if self.mode == "test":
38 |             x = Image.open(self.img_paths[index]).convert("RGB")
39 |             if self.transform is not None:
40 |                 x = self.transform(x)
41 |             
42 | 
43 |             y = self.labels[index]
44 |             return x, y
45 |         else:
46 |             x = Image.open(self.img_paths[index]).convert('RGB')
47 |             if self.transform is not None:
48 |                 x = self.transform(x)
49 |             return x
50 | 
51 |     def __len__(self):
52 |         return len(self.img_paths)


--------------------------------------------------------------------------------
/image/Defect_Detection/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | 
  6 | class Decoder(nn.Module):
  7 |     """
  8 |     The model architecture is taken from https://github.com/pytorch/examples/issues/70
  9 |     """
 10 | 
 11 |     def __init__(self, in_channels, dec_channels, hidden_dim):
 12 |         self.in_channels = in_channels
 13 |         self.dec_channels = dec_channels
 14 |         self.hidden_dim = hidden_dim
 15 |         
 16 |         super().__init__()
 17 |         self.main = nn.Sequential(
 18 |             # input is Z, going into a convolution
 19 |             nn.ConvTranspose2d(self.hidden_dim, self.dec_channels * 16, 4, 1, 0, bias=False),
 20 |             nn.BatchNorm2d(self.dec_channels * 16),
 21 |             nn.ReLU(True),
 22 |             # state size. (NGF*16) x 4 x 4
 23 |             nn.ConvTranspose2d(self.dec_channels * 16, self.dec_channels * 8, 4, 2, 1, bias=False),
 24 |             nn.BatchNorm2d(self.dec_channels * 8),
 25 |             nn.ReLU(True),
 26 |             # state size. (NGF*8) x 8 x 8
 27 |             nn.ConvTranspose2d(self.dec_channels * 8, self.dec_channels * 4, 4, 2, 1, bias=False),
 28 |             nn.BatchNorm2d(self.dec_channels * 4),
 29 |             nn.ReLU(True),
 30 |             # state size. (NGF*4) x 16 x 16
 31 |             nn.ConvTranspose2d(self.dec_channels * 4, self.dec_channels * 2, 4, 2, 1, bias=False),
 32 |             nn.BatchNorm2d(self.dec_channels * 2),
 33 |             nn.ReLU(True),
 34 |             # state size. (NGF*2) x 32 x 32
 35 |             nn.ConvTranspose2d(self.dec_channels * 2, self.dec_channels, 4, 2, 1, bias=False),
 36 |             nn.BatchNorm2d(self.dec_channels),
 37 |             nn.ReLU(True),
 38 |             # state size. (NGF) x 64 x 64
 39 |             nn.ConvTranspose2d(self.dec_channels, self.in_channels, 4, 2, 1, bias=False),
 40 |             nn.Sigmoid()
 41 |             # state size. (NC) x 128 x 128
 42 |         )
 43 | 
 44 |     def forward(self, x):
 45 |         return self.main(x)
 46 | 
 47 | 
 48 | class Encoder(nn.Module):
 49 |     """
 50 |     The model architecture is taken from https://github.com/pytorch/examples/issues/70
 51 |     """
 52 | 
 53 |     def __init__(self, in_channels, enc_channels, hidden_dim):
 54 |         self.in_channels = in_channels
 55 |         self.enc_channels = enc_channels
 56 |         self.hidden_dim = hidden_dim
 57 |         
 58 |         super().__init__()
 59 |         self.main = nn.Sequential(
 60 |             # input is (NC) x 128 x 128
 61 |             nn.Conv2d(self.in_channels, self.enc_channels, 4, stride=2, padding=1, bias=False),
 62 |             nn.LeakyReLU(0.2, inplace=True),
 63 |             # state size. (NDF) x 64 x 64
 64 |             nn.Conv2d(self.enc_channels, self.enc_channels * 2, 4, stride=2, padding=1, bias=False),
 65 |             nn.BatchNorm2d(self.enc_channels * 2),
 66 |             nn.LeakyReLU(0.2, inplace=True),
 67 |             # state size. (NDF*2) x 32 x 32
 68 |             nn.Conv2d(self.enc_channels * 2, self.enc_channels * 4, 4, stride=2, padding=1, bias=False),
 69 |             nn.BatchNorm2d(self.enc_channels * 4),
 70 |             nn.LeakyReLU(0.2, inplace=True),
 71 |             # state size. (NDF*4) x 16 x 16
 72 |             nn.Conv2d(self.enc_channels * 4, self.enc_channels * 8, 4, stride=2, padding=1, bias=False),
 73 |             nn.BatchNorm2d(self.enc_channels * 8),
 74 |             nn.LeakyReLU(0.2, inplace=True),
 75 |             # state size. (NDF*8) x 8 x 8
 76 |             nn.Conv2d(self.enc_channels * 8, self.enc_channels * 16, 4, stride=2, padding=1, bias=False),
 77 |             nn.BatchNorm2d(self.enc_channels * 16),
 78 |             nn.LeakyReLU(0.2, inplace=True),
 79 |             # state size. (NDF*16) x 4 x 4
 80 |             nn.Conv2d(self.enc_channels * 16, self.hidden_dim, 4, stride=1, padding=0, bias=False),
 81 |             nn.Flatten(),
 82 |         )
 83 | 
 84 |     def forward(self, x):
 85 |         return self.main(x)
 86 | 
 87 | def vae_loss_fn(x, recon_batch, mu, logvar):
 88 |     
 89 |     recon_loss = ae_loss_fn(x, recon_batch)
 90 |             
 91 |     KLD = torch.mean(-0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(),dim=1),dim=0)
 92 | 
 93 |     return recon_loss + KLD
 94 | 
 95 | def ae_loss_fn(x, recon_batch):
 96 |     """Function taken and modified from
 97 |         https://github.com/pytorch/examples/tree/master/vae
 98 |     """
 99 |     MSE = ((x - recon_batch) ** 2).mean()
100 |     return MSE
101 | 
102 | class ConvVAE(nn.Module):
103 | 
104 |     def __init__(self, in_channels=3, enc_channels=128, dec_channels=128, hidden_dim=100):
105 |         super().__init__()
106 |         
107 |         self.in_channels = in_channels
108 |         self.enc_channels = enc_channels
109 |         self.dec_channels = dec_channels
110 |         self.hidden_dim = hidden_dim
111 |         
112 |         self.encoder = Encoder(self.in_channels, self.enc_channels, self.hidden_dim*2)
113 |         self.decoder = Decoder(self.in_channels, self.dec_channels, self.hidden_dim)
114 | 
115 |     def reparameterize(self, mu, logvar):
116 |         std = (0.5 * logvar).exp()
117 |         eps = torch.randn_like(std)
118 |         
119 |         return mu + eps * std
120 | 
121 |     def forward(self, x):
122 |         enc_out = self.encoder(x)
123 |         mu, logvar = enc_out[..., :self.hidden_dim], enc_out[..., self.hidden_dim:]
124 |         z = self.reparameterize(mu, logvar)
125 |         recon_batch = self.decoder(z.unsqueeze(-1).unsqueeze(-1))
126 |         return recon_batch, mu, logvar
127 | 
128 | class AE(nn.Module):
129 | 
130 |     def __init__(self, in_channels=3, enc_channels=128, dec_channels=128, hidden_dim=100):
131 |         super().__init__()
132 |         self.in_channels = in_channels
133 |         self.enc_channels = enc_channels
134 |         self.dec_channels = dec_channels
135 |         self.hidden_dim = hidden_dim
136 | 
137 |         self.encoder = Encoder(self.in_channels, self.enc_channels, self.hidden_dim)
138 |         self.decoder = Decoder(self.in_channels, self.dec_channels, self.hidden_dim)
139 |                  
140 |     def forward(self, x):
141 |         enc_out = self.encoder(x)
142 |         recon_batch = self.decoder(enc_out.unsqueeze(-1).unsqueeze(-1))
143 |         return recon_batch
144 | 
145 |     


--------------------------------------------------------------------------------
/image/Defect_Detection/requirements.txt:
--------------------------------------------------------------------------------
  1 | anyio==3.7.0
  2 | argon2-cffi==21.3.0
  3 | argon2-cffi-bindings==21.2.0
  4 | arrow==1.2.3
  5 | asttokens==2.2.1
  6 | attrs==23.1.0
  7 | backcall==0.2.0
  8 | beautifulsoup4==4.12.2
  9 | bleach==6.0.0
 10 | certifi==2023.5.7
 11 | cffi==1.15.1
 12 | charset-normalizer==3.1.0
 13 | comm==0.1.3
 14 | debugpy==1.6.7
 15 | decorator==5.1.1
 16 | defusedxml==0.7.1
 17 | exceptiongroup==1.1.1
 18 | executing==1.2.0
 19 | fastjsonschema==2.17.1
 20 | fqdn==1.5.1
 21 | idna==3.4
 22 | importlib-metadata==6.7.0
 23 | ipykernel==6.23.3
 24 | ipython==8.14.0
 25 | ipython-genutils==0.2.0
 26 | ipywidgets==8.0.6
 27 | isoduration==20.11.0
 28 | jedi==0.18.2
 29 | Jinja2==3.1.2
 30 | joblib==1.2.0
 31 | jsonpointer==2.4
 32 | jsonschema==4.17.3
 33 | jupyter==1.0.0
 34 | jupyter-console==6.6.3
 35 | jupyter-events==0.6.3
 36 | jupyter_client==8.3.0
 37 | jupyter_core==5.3.1
 38 | jupyter_server==2.6.0
 39 | jupyter_server_terminals==0.4.4
 40 | jupyterlab-pygments==0.2.2
 41 | jupyterlab-widgets==3.0.7
 42 | MarkupSafe==2.1.3
 43 | matplotlib-inline==0.1.6
 44 | mistune==3.0.1
 45 | nbclassic==1.0.0
 46 | nbclient==0.8.0
 47 | nbconvert==7.6.0
 48 | nbformat==5.9.0
 49 | nest-asyncio==1.5.6
 50 | notebook==6.5.4
 51 | notebook_shim==0.2.3
 52 | numpy==1.25.0
 53 | overrides==7.3.1
 54 | packaging==23.1
 55 | pandas==2.0.2
 56 | pandocfilters==1.5.0
 57 | parso==0.8.3
 58 | pexpect==4.8.0
 59 | pickleshare==0.7.5
 60 | Pillow==9.5.0
 61 | platformdirs==3.8.0
 62 | prometheus-client==0.17.0
 63 | prompt-toolkit==3.0.38
 64 | psutil==5.9.5
 65 | ptyprocess==0.7.0
 66 | pure-eval==0.2.2
 67 | pycparser==2.21
 68 | Pygments==2.15.1
 69 | pyrsistent==0.19.3
 70 | python-dateutil==2.8.2
 71 | python-json-logger==2.0.7
 72 | pytz==2023.3
 73 | PyYAML==6.0
 74 | pyzmq==25.1.0
 75 | qtconsole==5.4.3
 76 | QtPy==2.3.1
 77 | requests==2.31.0
 78 | rfc3339-validator==0.1.4
 79 | rfc3986-validator==0.1.1
 80 | scikit-learn==1.2.2
 81 | scipy==1.10.1
 82 | Send2Trash==1.8.2
 83 | six==1.16.0
 84 | sniffio==1.3.0
 85 | soupsieve==2.4.1
 86 | stack-data==0.6.2
 87 | terminado==0.17.1
 88 | threadpoolctl==3.1.0
 89 | tinycss2==1.2.1
 90 | torch==1.11.0
 91 | torchvision==0.12.0
 92 | tornado==6.3.2
 93 | traitlets==5.9.0
 94 | typing_extensions==4.6.3
 95 | tzdata==2023.3
 96 | uri-template==1.3.0
 97 | urllib3==2.0.3
 98 | wcwidth==0.2.6
 99 | webcolors==1.13
100 | webencodings==0.5.1
101 | websocket-client==1.6.1
102 | widgetsnbextension==4.0.7
103 | zipp==3.15.0
104 | 


--------------------------------------------------------------------------------
/image/Defect_Detection/train.py:
--------------------------------------------------------------------------------
  1 | # system imports
  2 | import os
  3 | import logging
  4 | import glob
  5 | from pathlib import Path
  6 | import re
  7 | import argparse
  8 | 
  9 | # external dependencies
 10 | import torch
 11 | import torch.nn as nn
 12 | from torch.optim import Adam
 13 | from torchvision import transforms
 14 | from torch.utils.data import DataLoader
 15 | 
 16 | # relative imports
 17 | from model import AE, ConvVAE, ae_loss_fn, vae_loss_fn
 18 | from datasets import MVTecADDataset
 19 | from utils import train_step, test_step, save_checkpoint
 20 | 
 21 | parser = argparse.ArgumentParser(description="Feature Memory for Anomaly Detection")
 22 | 
 23 | # basic config
 24 | parser.add_argument('--model', type=str, help='Architecture variation for experiments. ae or vae.')
 25 | parser.add_argument('--epochs', type=int,  default=100, help=' The number of epochs to train the model.')
 26 | parser.add_argument('--batch_size', type=int,  default=8, help=' The batch size for training, validation and testing.')
 27 | parser.add_argument('--learning_rate', type=float,  default=.001, help='Learning rates of model.')
 28 | parser.add_argument('--size', type=int,  default=128, help='Side length of input image')
 29 | parser.add_argument('--data_path', type=str, help='The root directory of the dataset.')
 30 | parser.add_argument('--ckpt_path', type=str, help='The directory to save model checkpoints.')
 31 | 
 32 | args = parser.parse_args()
 33 | 
 34 | # Data Paths
 35 | 
 36 | CLASSES =  ["toothbrush",
 37 |             "pill",
 38 |            "leather", 
 39 |            "hazelnut", 
 40 |            "capsule", 
 41 |            "cable", 
 42 |            "bottle", 
 43 |            "zipper", 
 44 |            "tile", 
 45 |            "transistor", 
 46 |            "wood", 
 47 |            "metal_nut", 
 48 |            "screw", 
 49 |            "carpet",  
 50 |            "grid"]
 51 | 
 52 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 53 | 
 54 | def main():
 55 | 
 56 |     transform = transforms.Compose([
 57 |          transforms.ToTensor(),
 58 |          transforms.Resize(size=(args.size, args.size)),
 59 |          transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 60 |     ])
 61 | 
 62 |     test_auc_list = []
 63 |     for inlier in CLASSES: 
 64 |         # Prepare Data
 65 |         print("class", inlier)
 66 |         current_epoch = 0
 67 |         ckpt_path = f"{args.ckpt_path}/{inlier}.pth"
 68 |         img_dir = f"{args.data_path}/{inlier}"
 69 |         train_dataset = MVTecADDataset(img_dir, "train", transform)
 70 |         test_dataset = MVTecADDataset(img_dir, "test", transform, args.size)
 71 | 
 72 |         train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
 73 |         test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True)
 74 |         
 75 |         
 76 |         model = ConvVAE() if args.model == "vae" else AE()
 77 |         model = torch.nn.DataParallel(model)
 78 |         
 79 |         optimizer = Adam(model.parameters(), lr=args.learning_rate)
 80 |         save_checkpoint(model, optimizer, epoch=current_epoch, path=ckpt_path)
 81 |         
 82 |         loss_fn = vae_loss_fn if args.model == "vae" else ae_loss_fn
 83 |         
 84 |         highest_auc = 0
 85 |         while True:
 86 |             ckpt = torch.load(ckpt_path)
 87 |             epoch = ckpt["epoch"]
 88 |             
 89 |             if epoch == args.epochs:
 90 |                 break
 91 |             
 92 |             model = ConvVAE() if args.model == "vae" else AE()
 93 |             model = nn.DataParallel(model)
 94 |             model.load_state_dict(ckpt["model"])
 95 |             model.to(DEVICE)
 96 |      
 97 |             model.train()
 98 |             train_loss = train_step(train_loader, model, optimizer, loss_fn, DEVICE, args.model)
 99 |             
100 |             model.eval()
101 |             test_auc, test_loss = test_step(test_loader, model, loss_fn, DEVICE, args.model)
102 |             
103 |             print(f"Train Loss: {str(train_loss)} \t Test AUC: {str(test_auc)}")
104 |             
105 |             if test_auc > highest_auc: 
106 |                 highest_auc = test_auc
107 |             
108 |             save_checkpoint(model, optimizer, epoch + 1, ckpt_path)
109 |                   
110 |         test_auc_list.append(highest_auc)
111 | 
112 | 
113 |     print(f"Average AUC: {str(np.mean(test_auc_list))}")
114 | 
115 | ######################################################
116 | 
117 | if __name__ == "__main__":
118 |     main()


--------------------------------------------------------------------------------
/image/Defect_Detection/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from sklearn.metrics import roc_auc_score, roc_curve
 5 | 
 6 | def get_auc(preds, lbls):
 7 |     preds = preds.flatten().cpu().numpy()
 8 |     lbls = lbls.flatten().cpu().numpy()
 9 |     
10 |     auc = roc_auc_score(lbls, preds)
11 |     return auc
12 | 
13 | def save_checkpoint(model, opt, epoch, path):
14 |     """Save Checkpoint"""
15 | 
16 |     torch.save({
17 |         "model": model.state_dict(),
18 |         "opt": opt.state_dict(),
19 |         "epoch": epoch
20 |         },
21 |         path)
22 |     
23 | 
24 | def train_step(loader, model, optimizer, loss_fn, device, model_str):
25 |     
26 |     train_loss_list = []
27 |     
28 |     for i, data in enumerate(loader): 
29 |         data = data.to(device) 
30 |         optimizer.zero_grad()
31 |         if model_str == "vae":
32 |             recon, mu, logvar = model(data)
33 |             loss = loss_fn(data, recon, mu, logvar)
34 |         else: 
35 |             recon = model(data) 
36 |             loss = loss_fn(data, recon)
37 |         loss.backward()
38 |         optimizer.step()
39 |         train_loss_list.append(loss.item())
40 |         
41 |     return np.mean(train_loss_list)
42 | 
43 | def test_step(loader, model, loss_fn, device, model_str):
44 |     
45 |     loss_list, error_map_list, lbl_list = [], [], []
46 |     for i, (data, lbl) in enumerate(loader): 
47 |         data, lbl = data.to(device), lbl.to(device)
48 |         
49 |         with torch.no_grad():
50 |             if model_str == "vae":
51 |                 recon, mu, logvar = model(data)
52 |                 loss = loss_fn(data, recon, mu, logvar)
53 |             else: 
54 |                 recon = model(data) 
55 |                 loss = loss_fn(data, recon)
56 |         loss_list.append(loss.item())
57 |         error_map = torch.mean((data - recon)**2, dim=1).unsqueeze(1) 
58 |         error_map_list.append(error_map)
59 |         lbl_list.append(lbl)
60 | 
61 |     error_maps = torch.cat(error_map_list, dim=0)
62 |     lbls = torch.cat(lbl_list, dim=0)
63 |     preds = (error_maps - torch.min(error_maps)) / (torch.max(error_maps) - torch.min(error_maps))
64 |     
65 |     auc = get_auc(preds, lbls)
66 |     loss = np.mean(loss_list)
67 |     
68 |     return auc, loss
69 |         


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/README.md:
--------------------------------------------------------------------------------
 1 | # Road Obstacle Detection
 2 | 
 3 | ## Overview
 4 | 
 5 | Detecting obstacles on the road/railway is a critical part of the driving task which has not been mastered by fully autonomous vehicles. Semantic segmentation plays an important role in addressing the challenges of identifying the locations of obstacles. In this phase of the project, we explore the application of semantic segmentation methods to the task of detecting road obstacles using the Lost and Found Dataset. The goal of the experiments is to determine which model architecture is the best for road obstacle detection - something that is of interest to both the practioner and researchers. 
 6 | 
 7 | ## Dataset
 8 | The Lost and Found dataset was introduced to evaluate the performance of small road obstacle detection approaches. The Lost and Found Dataset includes 2k images recording from 13 different challenging street scenarios, featuring 37 different obstacles types.  Each object is labeled with a unique ID, allowing for a later refinement into subcategories. An overview of the Lost and Found dataset is available below, which is refined into three classes: driveable area, non drivable area and obstacles.
 9 | 
10 | <p align="center">
11 | <img width="750" alt="The Lost and Found Dataset" src="https://user-images.githubusercontent.com/34798787/163045378-e327a9c9-0738-4c4a-8a68-273ae659d3f7.png">  
12 |     <br>
13 | <div align="center"> 
14 |    <b> Figure 1:</b> The Lost and Found Dataset.
15 | </div> 
16 | </p>
17 | 
18 | ## Results
19 | 
20 | <p align="center">
21 | <img width="750" alt="Validation Cross Entropy" src="https://user-images.githubusercontent.com/34798787/163045762-eee689fe-3115-4a49-8453-3e6ef5ab7deb.png">  
22 |     <br>
23 | <div align="center"> 
24 |    <b> Figure 2:</b> The validation cross entropy loss for each model across epochs.
25 | </div> 
26 | </p>
27 | 
28 | <p align="center">
29 | <img width="750" alt="Visual Result" src="https://user-images.githubusercontent.com/34798787/163046353-4929f6bb-126f-4ad5-b924-68a724cfa2f1.png">  
30 |     <br>
31 | <div align="center"> 
32 |    <b> Figure 3:</b> Visual results comparing prediction made by each model for a test image. 
33 | </div> 
34 | </p>
35 | 
36 | ## Running Code
37 | To configure the environment to run the experiments navigate to the base of this directory and execute the following commands: 
38 | 
39 | ```
40 | conda create -n new_env
41 | conda activate new_env 
42 | pip install -r requirements.txt
43 | ```
44 | 
45 | To obtain results for a specific architecture simply pass the appropriate arguments to the **train.py** script: 
46 | ```
47 | python train.py --epochs 10 --batch_size 4
48 | ```
49 | 
50 | The **train.py** script has the following arguments: 
51 | - **epochs**        (int): The number of epochs to train the memory.
52 | - **batch_size**    (int) The batch size for training, validation and testing.
53 | - **learning_rate** (float): Learning rates of memory units.
54 | - **height**        (int): Height of input image. 
55 | - **width**         (int): Width of input image. 
56 | - **train_perc**    (float): The proportion of samples used for train.
57 | - **data_path**    (str): The root directory of the dataset.
58 | - **ckpt_path**    (str): Path of checkpoint file. 
59 | - **best_ckpt_path**  (str): Path of checkpoint file for best performing model on the validation set. 
60 | - **sample_path**    (str): Path of file to save example images. 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/dice_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | class DiceCoeff(Function):
 6 |     """Dice coeff for individual examples"""
 7 | 
 8 |     def forward(self, input, target):
 9 |         self.save_for_backward(input, target)
10 |         eps = 0.0001
11 |         self.inter = torch.dot(input.view(-1), target.view(-1))
12 |         self.union = torch.sum(input) + torch.sum(target) + eps
13 | 
14 |         t = (2 * self.inter.float() + eps) / self.union.float()
15 |         return t
16 | 
17 |     # This function has only a single output, so it gets only one gradient
18 |     def backward(self, grad_output):
19 | 
20 |         input, target = self.saved_variables
21 |         grad_input = grad_target = None
22 | 
23 |         if self.needs_input_grad[0]:
24 |             grad_input = grad_output * 2 * (target * self.union - self.inter) \
25 |                          / (self.union * self.union)
26 |         if self.needs_input_grad[1]:
27 |             grad_target = None
28 | 
29 |         return grad_input, grad_target
30 | 
31 | 
32 | def dice_coeff(input, target):
33 |     """Dice coeff for batches"""
34 |     if input.is_cuda:
35 |         s = torch.FloatTensor(1).cuda().zero_()
36 |     else:
37 |         s = torch.FloatTensor(1).zero_()
38 | 
39 |     for i, c in enumerate(zip(input, target)):
40 |         s = s + DiceCoeff().forward(c[0], c[1])
41 | 
42 |     return s / (i + 1)
43 | 


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/eval.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | import torch
 3 | from tqdm import tqdm
 4 | 
 5 | from dice_loss import dice_coeff
 6 | 
 7 | 
 8 | def eval_net(net, loader, device):
 9 |     """Evaluation without the densecrf with the dice coefficient"""
10 |     net.eval()
11 |     mask_type = torch.long
12 |     n_val = len(loader)  # the number of batch
13 |     tot = 0
14 | 
15 |     with tqdm(total=n_val, desc='Validation round', unit='batch', leave=False) as pbar:
16 |         for batch in loader:
17 |             imgs, true_masks =batch #batch['image'], batch['mask']
18 |             #true_masks=(true_masks > 0.5).float()
19 |             imgs = imgs.to(device=device, dtype=torch.float32)
20 |             true_masks = true_masks.to(device=device, dtype=mask_type)
21 | 
22 |             with torch.no_grad():
23 |                 mask_pred = net(imgs)
24 | 
25 |          
26 |             tot += F.cross_entropy(mask_pred, true_masks).item()
27 |             pbar.update()
28 | 
29 |     net.train()
30 |     return tot / n_val
31 | 


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import argparse
  4 | from tqdm import tqdm
  5 | import torch.nn.functional as F
  6 | 
  7 | import numpy as np
  8 | from PIL import Image
  9 | import torch.utils.data as data
 10 | 
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | import torch
 14 | import torch.utils.data as data
 15 | import torch.nn as nn
 16 | from torch.utils.data import DataLoader
 17 | from torch.nn import CrossEntropyLoss
 18 | 
 19 | from torchvision.datasets import Cityscapes
 20 | from torchvision.utils import make_grid
 21 | 
 22 | from lf_loader import lostandfoundLoader
 23 | 
 24 | from eval import eval_net
 25 | 
 26 | from utils import train_step, val_step, get_model, save_viz, save_checkpoint
 27 | 
 28 | parser = argparse.ArgumentParser(description="Feature Memory for Anomaly Detection")
 29 | 
 30 | # basic config
 31 | parser.add_argument('--epochs', type=int,  default=2, help=' The number of epochs to train the memory.')
 32 | parser.add_argument('--batch_size', type=int,  default=4, help=' The batch size for training, validation and testing.')
 33 | parser.add_argument('--learning_rate', type=float,  default=3e-4, help='Learning rates of model.')
 34 | parser.add_argument('--height', type=int,  default=128, help='Height of input image')
 35 | parser.add_argument('--width', type=int,  default=256, help='Width of input image')
 36 | parser.add_argument('--train_perc', type=float,  default=.9, help='Proportion of samples to use in training set')
 37 | parser.add_argument('--data_path', type=str,  default="/scratch/ssd002/datasets/lostandfound", help='The root directory of the dataset.')
 38 | parser.add_argument('--ckpt_path', type=str,  default="ckpt/run_1.pth", help='The file to save model checkpoints.')
 39 | parser.add_argument('--best_ckpt_path', type=str,  default="ckpt/best_run_1.pth", help='The file to save best model checkpoint.')
 40 | parser.add_argument('--sample_path', type=str,  default="samples", help='The file to save best model checkpoint.')
 41 | 
 42 | 
 43 | args = parser.parse_args()
 44 | 
 45 | # Global Variables 
 46 | IMG_SIZE = (args.height, args.width) #H, W
 47 | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 48 | CURRENT_EPOCH = 0
 49 | 
 50 | LF_MAP = {
 51 |     0: (0, 0, 0),
 52 |     1: (255, 0, 0),
 53 |     2: (0, 255, 0),
 54 |     3: (0, 0, 255),
 55 | }
 56 | 
 57 | def main(): 
 58 | 
 59 |     # Prepare Dataset and Dataloader
 60 |     dataset = lostandfoundLoader(args.data_path, is_transform=True, augmentations=None)
 61 | 
 62 |     train_size = int(len(dataset) * args.train_perc)
 63 |     val_size = len(dataset) - train_size 
 64 |     train_dataset, val_dataset =  torch.utils.data.random_split(dataset, [train_size, val_size]) 
 65 | 
 66 |     train_dataloader = data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=2)
 67 |     val_dataloader = data.DataLoader(val_dataset, batch_size=args.batch_size, num_workers=2)
 68 | 
 69 |     model = get_model(pretrained=True)
 70 | 
 71 |     # Loss and Optimizer
 72 |     criterion = CrossEntropyLoss()
 73 |     opt = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
 74 | 
 75 |     # Save Initial checkpoint to be subsquently restored from 
 76 |     save_checkpoint(model, opt, epoch=CURRENT_EPOCH, path=args.ckpt_path)
 77 | 
 78 |     train_loss_list = []
 79 |     val_loss_list = []
 80 |     max_val_loss = 1e10
 81 |     while True:
 82 |         # Load checkpoint
 83 |         ckpt = torch.load(args.ckpt_path) 
 84 | 
 85 |         epoch = ckpt["epoch"]
 86 | 
 87 |         if epoch == args.epochs: 
 88 |             break 
 89 | 
 90 |         model = get_model(pretrained=False)
 91 |         model.load_state_dict(ckpt["model"])
 92 |         model.to(DEVICE)
 93 | 
 94 |         opt = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
 95 |         opt.load_state_dict(ckpt["opt"])
 96 | 
 97 |         model.train()
 98 |         train_loss = train_step(model, opt, criterion, train_dataloader, epoch, DEVICE)
 99 |         train_loss_list.append(train_loss)
100 | 
101 |         model.eval()
102 |         val_loss =  val_step(model, criterion, val_dataloader, epoch, DEVICE, LF_MAP, args.sample_path)
103 |         val_loss_list.append(val_loss)
104 | 
105 | 
106 |         with open("train_loss.txt", "a") as myfile:
107 |             myfile.write(f"{str(epoch)}\t{str(train_loss)}\n")
108 | 
109 |         with open("val_loss.txt", "a") as myfile:
110 |             myfile.write(f"{str(epoch)}\t{str(val_loss)}\n")
111 | 
112 |         if val_loss < max_val_loss: 
113 |             torch.save({
114 |                 "model": model.state_dict(),
115 |                 "opt": opt.state_dict(),
116 |                 "epoch": epoch,
117 |                 },
118 |             args.best_ckpt_path)
119 | 
120 |         save_checkpoint(model, opt, epoch + 1, args.ckpt_path)
121 |         model.cpu()
122 | 
123 | 
124 |     f, axarr = plt.subplots(1, 2, figsize=(20,20))
125 |     axarr[0].plot(train_loss_list)
126 |     axarr[0].title.set_text("Train Loss") 
127 |     axarr[1].plot(val_loss_list)
128 |     axarr[1].title.set_text("Validation Loss") 
129 | 
130 |     fig_path = f"{args.sample_path}/loss_figure.jpg"
131 |     f.savefig(fig_path)
132 | 
133 | if __name__ == "__main__":
134 |     main()


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/train_loss.txt:
--------------------------------------------------------------------------------
1 | 0	0.16359891243630725
2 | 1	0.08223168643055556
3 | 


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/utils.py:
--------------------------------------------------------------------------------
 1 | import tqdm 
 2 | 
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | from torchvision.models.segmentation import fcn_resnet50
10 | 
11 | 
12 | def train_step(model, opt, criterion, dataloader, epoch, device):
13 |     losses = []
14 |     counter = 0
15 |     for i, (img, lbl) in enumerate(dataloader):
16 |         lbl = lbl.long()
17 |         img, lbl = img.to(device), lbl.to(device)
18 |         opt.zero_grad()
19 |         out = model(img)["out"]
20 |         loss = criterion(out, lbl)
21 |         loss.backward()
22 |         opt.step()
23 |         losses.append(loss.item())
24 |         
25 |     return np.mean(losses)   
26 | 
27 | def val_step(model, criterion, dataloader, epoch, device, lf_map, sample_path):
28 |     losses = []
29 |     dices = []
30 |     viz = True
31 |     for i, (img, lbl) in enumerate(dataloader):
32 |         lbl = lbl.long()
33 |         img, lbl = img.to(device), lbl.to(device)
34 |         
35 |         with torch.no_grad():
36 |             out = model(img)["out"]
37 | 
38 |         loss = criterion(out, lbl)
39 |         losses.append(loss.item())
40 | 
41 |         if viz:
42 |             save_viz(img, out, lbl, lf_map, epoch, sample_path) 
43 |             viz = False 
44 |          
45 |     return np.mean(losses)
46 | 
47 | def save_viz(img, out, lbl, color_map, epoch, sample_path):
48 |     img = img.cpu().numpy()
49 |     out = out.cpu().numpy()
50 |     lbl = lbl.cpu().numpy()
51 |     rows = out.shape[2]
52 |     cols = out.shape[3]
53 |     
54 |     masks = []
55 |     masks_gt = []
56 |     for index, (im, o, l) in enumerate(zip(img, out, lbl)):
57 |         mask = np.zeros((rows, cols, 3), dtype=np.uint8)
58 |         mask_gt = np.zeros((rows, cols, 3), dtype=np.uint8)
59 |         for j in range(rows):
60 |             for i in range(cols):
61 |                 mask[j, i] = color_map[np.argmax(o[:, j, i]-1, axis=0)]
62 |                 mask_gt[j, i] = color_map[l[j, i]]
63 |                 
64 |         mask_path = f"{sample_path}/epoch_{str(epoch)}_pred_{str(index)}.jpg"
65 |         lbl_path = f"{sample_path}/epoch_{str(epoch)}_lbl_{str(index)}.jpg"
66 |         img_path = f"{sample_path}/epoch_{str(epoch)}_img_{str(index)}.jpg"
67 |         f, axarr = plt.subplots(1, 3, figsize=(20, 20))
68 |         im = np.moveaxis(im, 0, -1)
69 |         axarr[0].imshow(im)
70 |         axarr[0].title.set_text('Image')
71 |         axarr[1].imshow(mask_gt)
72 |         axarr[1].title.set_text('Label')
73 |         axarr[2].imshow(mask)
74 |         axarr[2].title.set_text('Prediction')
75 |         f.savefig( f"{sample_path}/epoch_{str(epoch)}_{str(index)}.jpg")
76 | 
77 | def get_model(pretrained=False):
78 |     # Prepare Model and Save to Checkpoint Directory
79 |     model = fcn_resnet50(pretrained=pretrained)
80 |     model.classifier[4] = nn.Conv2d(512, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
81 |     model.aux_classifier = None
82 |     model = nn.DataParallel(model)
83 |     return model
84 | 
85 | def save_checkpoint(model, opt, epoch, path):
86 |     """Save Checkpoint"""
87 | 
88 |     torch.save({
89 |         "model": model.state_dict(),
90 |         "opt": opt.state_dict(),
91 |         "epoch": epoch,
92 |         },
93 |         path)
94 | 


--------------------------------------------------------------------------------
/image/Road_Obstacle_Detection/val_loss.txt:
--------------------------------------------------------------------------------
1 | 0	0.08754148219640438
2 | 1	0.1392884962260723
3 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/__init__.py


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import _C as cfg
2 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/config/defaults.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Malong Technologies Co., Ltd.
 2 | # All rights reserved.
 3 | #
 4 | # Contact: github@malong.com
 5 | #
 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
 7 | 
 8 | from yacs.config import CfgNode as CN
 9 | 
10 | 
11 | _C = CN()
12 | 
13 | _C.INPUT_SIZE = 2280
14 | _C.SIZE_DIVISIBILITY = 1
15 | _C.WEIGHT= ""
16 | 
17 | _C.CHAR_DICT_FILE = ""
18 | _C.WORD_LEXICON_PATH = ""
19 | 
20 | _C.WORD_MIN_SCORE = 0.95
21 | _C.WORD_NMS_IOU_THRESH = 0.15
22 | _C.CHAR_MIN_SCORE = 0.25
23 | _C.CHAR_NMS_IOU_THRESH = 0.3
24 | _C.MAGNITUDE_THRESH = 0.2
25 | 
26 | _C.WORD_STRIDE = 4
27 | _C.CHAR_STRIDE = 4
28 | _C.NUM_CHAR_CLASSES = 68
29 | 
30 | _C.WORD_DETECTOR_DILATION = 1
31 | _C.RESULTS_SEPARATOR = chr(31)
32 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/modeling/__init__.py


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/charnet/modeling/backbone/__init__.py


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/backbone/decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Malong Technologies Co., Ltd.
 2 | # All rights reserved.
 3 | #
 4 | # Contact: github@malong.com
 5 | #
 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
 7 | 
 8 | from torch import nn
 9 | from collections import OrderedDict
10 | from torch.functional import F
11 | 
12 | 
13 | class Decoder(nn.Module):
14 |     def __init__(self, in_channels_list, out_channels):
15 |         super(Decoder, self).__init__()
16 |         self.backbone_feature_reduction = nn.ModuleList()
17 |         self.top_down_feature_reduction = nn.ModuleList()
18 |         for i, in_channels in enumerate(in_channels_list[::-1]):
19 |             self.backbone_feature_reduction.append(
20 |                 self._conv1x1_relu(in_channels, out_channels)
21 |             )
22 |             if i < len(in_channels_list) - 2:
23 |                 self.top_down_feature_reduction.append(
24 |                     self._conv1x1_relu(out_channels, out_channels)
25 |                 )
26 | 
27 |     def _conv1x1_relu(self, in_channels, out_channels):
28 |         return nn.Sequential(OrderedDict([
29 |             ("conv", nn.Conv2d(
30 |                 in_channels, out_channels,
31 |                 kernel_size=1, stride=1,
32 |                 bias=False
33 |             )),
34 |             ("relu", nn.ReLU())
35 |         ]))
36 | 
37 |     def forward(self, x):
38 |         x = x[::-1]  # to lowest resolution first
39 |         top_down_feature = None
40 |         for i, feature in enumerate(x):
41 |             feature = self.backbone_feature_reduction[i](feature)
42 |             if i == 0:
43 |                 top_down_feature = feature
44 |             else:
45 |                 upsampled_feature = F.interpolate(
46 |                     top_down_feature,
47 |                     size=feature.size()[-2:],
48 |                     mode='bilinear',
49 |                     align_corners=True
50 |                 )
51 |                 if i < len(x) - 1:
52 |                     top_down_feature = self.top_down_feature_reduction[i - 1](
53 |                         feature + upsampled_feature
54 |                     )
55 |                 else:
56 |                     top_down_feature = feature + upsampled_feature
57 |         return top_down_feature
58 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/backbone/hourglass.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Malong Technologies Co., Ltd.
  2 | # All rights reserved.
  3 | #
  4 | # Contact: github@malong.com
  5 | #
  6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch
  9 | from torch import nn
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | _norm_func = lambda num_features: nn.BatchNorm2d(num_features, eps=1e-5)
 14 | 
 15 | 
 16 | def _make_layer(in_channels, out_channels, num_blocks, **kwargs):
 17 |     blocks = []
 18 |     blocks.append(Residual(in_channels, out_channels))
 19 |     for _ in range(1, num_blocks):
 20 |         blocks.append(Residual(out_channels, out_channels, **kwargs))
 21 |     return nn.Sequential(*blocks)
 22 | 
 23 | 
 24 | def _make_layer_revr(in_channels, out_channels, num_blocks, **kwargs):
 25 |     blocks = []
 26 |     for _ in range(num_blocks - 1):
 27 |         blocks.append(Residual(in_channels, in_channels, **kwargs))
 28 |     blocks.append(Residual(in_channels, out_channels, **kwargs))
 29 |     return nn.Sequential(*blocks)
 30 | 
 31 | 
 32 | class Residual(nn.Module):
 33 |     def __init__(self, in_channels, out_channels, stride=1):
 34 |         super(Residual, self).__init__()
 35 | 
 36 |         self.conv_1 = nn.Sequential(
 37 |             nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False),
 38 |             _norm_func(out_channels),
 39 |             nn.ReLU()
 40 |         )
 41 |         self.conv_2 = nn.Sequential(
 42 |             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1, bias=False),
 43 |             _norm_func(out_channels)
 44 |         )
 45 |         if stride != 1 or in_channels != out_channels:
 46 |             self.skip = nn.Sequential(
 47 |                 nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False),
 48 |                 _norm_func(out_channels)
 49 |             )
 50 |         else:
 51 |             self.skip = None
 52 |         self.out_relu = nn.ReLU()
 53 | 
 54 |     def forward(self, x):
 55 |         b1 = self.conv_2(self.conv_1(x))
 56 |         if self.skip is None:
 57 |             return self.out_relu(b1 + x)
 58 |         else:
 59 |             return self.out_relu(b1 + self.skip(x))
 60 | 
 61 | 
 62 | class HourGlassBlock(nn.Module):
 63 |     def __init__(self, n, channels, blocks):
 64 |         super(HourGlassBlock, self).__init__()
 65 | 
 66 |         self.up_1 = _make_layer(channels[0], channels[0], blocks[0])
 67 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
 68 |         self.low_1 = _make_layer(channels[0], channels[1], blocks[0])
 69 |         if n <= 1:
 70 |             self.low_2 = _make_layer(channels[1], channels[1], blocks[1])
 71 |         else:
 72 |             self.low_2 = HourGlassBlock(n - 1, channels[1:], blocks[1:])
 73 |         self.low_3 = _make_layer_revr(channels[1], channels[0], blocks[0])
 74 | 
 75 |     def forward(self, x):
 76 |         upsample = lambda input: F.interpolate(input, scale_factor=2, mode='bilinear', align_corners=True)
 77 |         up_1 = self.up_1(x)
 78 |         low = self.low_3(self.low_2(self.low_1(self.pool(x))))
 79 |         return upsample(low) + up_1
 80 | 
 81 | 
 82 | class HourGlassNet(nn.Module):
 83 |     def __init__(self, n, channels, blocks):
 84 |         super(HourGlassNet, self).__init__()
 85 |         self.pre = nn.Sequential(
 86 |             nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3, bias=False),
 87 |             _norm_func(128),
 88 |             nn.ReLU(),
 89 |             Residual(128, 256, stride=2)
 90 |         )
 91 |         hourglass_blocks = []
 92 |         for _ in range(2):
 93 |             hourglass_blocks.append(
 94 |                 HourGlassBlock(n, channels, blocks)
 95 |             )
 96 |         self.hourglass_blocks = nn.Sequential(*hourglass_blocks)
 97 | 
 98 |     def forward(self, x):
 99 |         return self.hourglass_blocks(self.pre(x))
100 | 
101 | 
102 | def hourglass88():
103 |     return HourGlassNet(3, [256, 256, 256, 512], [2, 2, 2, 2])
104 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Malong Technologies Co., Ltd.
 2 | # All rights reserved.
 3 | #
 4 | # Contact: github@malong.com
 5 | #
 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
 7 | 
 8 | from .misc import Conv2d
 9 | from .misc import ConvTranspose2d
10 | from .misc import BatchNorm2d
11 | from .misc import interpolate
12 | from .scale import Scale
13 | 
14 | 
15 | __all__ = [
16 |     "Conv2d",
17 |     "ConvTranspose2d",
18 |     "interpolate",
19 |     "BatchNorm2d",
20 |     "Scale"
21 | ]
22 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/layers/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | helper class that supports empty tensors on some nn functions.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in
  6 | those functions.
  7 | 
  8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013
  9 | is implemented
 10 | """
 11 | 
 12 | import math
 13 | import torch
 14 | from torch.nn.modules.utils import _ntuple
 15 | 
 16 | 
 17 | class _NewEmptyTensorOp(torch.autograd.Function):
 18 |     @staticmethod
 19 |     def forward(ctx, x, new_shape):
 20 |         ctx.shape = x.shape
 21 |         return x.new_empty(new_shape)
 22 | 
 23 |     @staticmethod
 24 |     def backward(ctx, grad):
 25 |         shape = ctx.shape
 26 |         return _NewEmptyTensorOp.apply(grad, shape), None
 27 | 
 28 | 
 29 | class Conv2d(torch.nn.Conv2d):
 30 |     def forward(self, x):
 31 |         if x.numel() > 0:
 32 |             return super(Conv2d, self).forward(x)
 33 |         # get output shape
 34 | 
 35 |         output_shape = [
 36 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
 37 |             for i, p, di, k, d in zip(
 38 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
 39 |             )
 40 |         ]
 41 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 42 |         return _NewEmptyTensorOp.apply(x, output_shape)
 43 | 
 44 | 
 45 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
 46 |     def forward(self, x):
 47 |         if x.numel() > 0:
 48 |             return super(ConvTranspose2d, self).forward(x)
 49 |         # get output shape
 50 | 
 51 |         output_shape = [
 52 |             (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
 53 |             for i, p, di, k, d, op in zip(
 54 |                 x.shape[-2:],
 55 |                 self.padding,
 56 |                 self.dilation,
 57 |                 self.kernel_size,
 58 |                 self.stride,
 59 |                 self.output_padding,
 60 |             )
 61 |         ]
 62 |         output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
 63 |         return _NewEmptyTensorOp.apply(x, output_shape)
 64 | 
 65 | 
 66 | class BatchNorm2d(torch.nn.BatchNorm2d):
 67 |     def forward(self, x):
 68 |         if x.numel() > 0:
 69 |             return super(BatchNorm2d, self).forward(x)
 70 |         # get output shape
 71 |         output_shape = x.shape
 72 |         return _NewEmptyTensorOp.apply(x, output_shape)
 73 | 
 74 | 
 75 | def interpolate(
 76 |     input, size=None, scale_factor=None, mode="nearest", align_corners=None
 77 | ):
 78 |     if input.numel() > 0:
 79 |         return torch.nn.functional.interpolate(
 80 |             input, size, scale_factor, mode, align_corners
 81 |         )
 82 | 
 83 |     def _check_size_scale_factor(dim):
 84 |         if size is None and scale_factor is None:
 85 |             raise ValueError("either size or scale_factor should be defined")
 86 |         if size is not None and scale_factor is not None:
 87 |             raise ValueError("only one of size or scale_factor should be defined")
 88 |         if (
 89 |             scale_factor is not None
 90 |             and isinstance(scale_factor, tuple)
 91 |             and len(scale_factor) != dim
 92 |         ):
 93 |             raise ValueError(
 94 |                 "scale_factor shape must match input shape. "
 95 |                 "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
 96 |             )
 97 | 
 98 |     def _output_size(dim):
 99 |         _check_size_scale_factor(dim)
100 |         if size is not None:
101 |             return size
102 |         scale_factors = _ntuple(dim)(scale_factor)
103 |         # math.floor might return float in py2.7
104 |         return [
105 |             int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
106 |         ]
107 | 
108 |     output_shape = tuple(_output_size(2))
109 |     output_shape = input.shape[:-2] + output_shape
110 |     return _NewEmptyTensorOp.apply(input, output_shape)
111 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/layers/scale.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Malong Technologies Co., Ltd.
 2 | # All rights reserved.
 3 | #
 4 | # Contact: github@malong.com
 5 | #
 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
 7 | 
 8 | import torch
 9 | from torch import nn
10 | 
11 | 
12 | class Scale(nn.Module):
13 |     def __init__(self, init_value=1.0):
14 |         super(Scale, self).__init__()
15 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
16 | 
17 |     def forward(self, input):
18 |         return input * self.scale
19 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/charnet/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Malong Technologies Co., Ltd.
 2 | # All rights reserved.
 3 | #
 4 | # Contact: github@malong.com
 5 | #
 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree.
 7 | 
 8 | import math
 9 | 
10 | 
11 | def rotate_rect(x1, y1, x2, y2, degree, center_x, center_y):
12 |     points = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
13 |     new_points = list()
14 |     for point in points:
15 |         dx = point[0] - center_x
16 |         dy = point[1] - center_y
17 |         new_x = center_x + dx * math.cos(degree) - dy * math.sin(degree)
18 |         new_y = center_y + dx * math.sin(degree) + dy * math.cos(degree)
19 |         new_points.append([(new_x), (new_y)])
20 |     return new_points
21 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/configs/icdar2015_hourglass88.yaml:
--------------------------------------------------------------------------------
1 | INPUT_SIZE: 2280
2 | WEIGHT: "weights/icdar2015_hourglass88.pth"
3 | CHAR_DICT_FILE: "datasets/ICDAR2015/test/char_dict.txt"
4 | WORD_LEXICON_PATH: "datasets/ICDAR2015/test/GenericVocabulary.txt"
5 | RESULTS_SEPARATOR: ","
6 | SIZE_DIVISIBILITY: 128
7 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/datasets/ICDAR2015/test/char_dict.txt:
--------------------------------------------------------------------------------
 1 | a0
 2 | b1
 3 | c2
 4 | d3
 5 | e4
 6 | f5
 7 | g6
 8 | h7
 9 | i8
10 | j9
11 | k10
12 | l11
13 | m12
14 | n13
15 | o14
16 | p15
17 | q16
18 | r17
19 | s18
20 | t19
21 | u20
22 | v21
23 | w22
24 | x23
25 | y24
26 | z25
27 | 026
28 | 127
29 | 228
30 | 329
31 | 430
32 | 531
33 | 632
34 | 733
35 | 834
36 | 935
37 | !36
38 | #37
39 | "38
40 | %39
41 | $40
42 | '41
43 | &42
44 | )43
45 | (44
46 | +45
47 | *46
48 | -47
49 | ,48
50 | /49
51 | .50
52 | ;51
53 | :52
54 | =53
55 | <54
56 | ?55
57 | >56
58 | @57
59 | [58
60 | ]59
61 | \60
62 | _61
63 | ^62
64 | `63
65 | {64
66 | }65
67 | |66
68 | ~67
69 | 


--------------------------------------------------------------------------------
/image/fastlane/OCR/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/OCR/sample.jpg


--------------------------------------------------------------------------------
/image/fastlane/Object_Detection/dataset.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from torch.utils.data import Dataset
 3 | import pandas as pd
 4 | import torch
 5 | import numpy as np
 6 | import os
 7 | from PIL import Image
 8 | from utils import iou_width_height
 9 | 
10 | class YOLODataset(Dataset):
11 |     def __init__(
12 |         self,
13 |         csv_file,
14 |         img_dir,
15 |         label_dir,
16 |         anchors,
17 |         image_size=416,
18 |         S=[13, 26, 52],
19 |         C=20,
20 |         transform=None,
21 |     ):
22 |         self.annotations = pd.read_csv(csv_file)
23 |         self.img_dir = img_dir
24 |         self.label_dir = label_dir
25 |         self.image_size = image_size
26 |         self.transform = transform
27 |         self.S = S
28 |         self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])  # for all 3 scales
29 |         self.num_anchors = self.anchors.shape[0]
30 |         self.num_anchors_per_scale = self.num_anchors // 3
31 |         self.C = C
32 |         self.ignore_iou_thresh = 0.5
33 | 
34 |     def __len__(self):
35 |         return len(self.annotations)
36 | 
37 |     def __getitem__(self, index):
38 |         label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
39 |         bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
40 |         img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
41 |         image = np.array(Image.open(img_path).convert("RGB"))
42 | 
43 |         if self.transform:
44 |             augmentations = self.transform(image=image, bboxes=bboxes)
45 |             image = augmentations["image"]
46 |             bboxes = augmentations["bboxes"]
47 | 
48 |         # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
49 |         targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
50 |         for box in bboxes:
51 |             iou_anchors = iou_width_height(torch.tensor(box[2:4]), self.anchors)
52 |             anchor_indices = iou_anchors.argsort(descending=True, dim=0)
53 |             x, y, width, height, class_label = box
54 |             has_anchor = [False] * 3  # each scale should have one anchor
55 |             for anchor_idx in anchor_indices:
56 |                 scale_idx = anchor_idx // self.num_anchors_per_scale
57 |                 anchor_on_scale = anchor_idx % self.num_anchors_per_scale
58 |                 S = self.S[scale_idx]
59 |                 i, j = int(S * y), int(S * x)  # which cell
60 |                 anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
61 |                 if not anchor_taken and not has_anchor[scale_idx]:
62 |                     targets[scale_idx][anchor_on_scale, i, j, 0] = 1
63 |                     x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
64 |                     width_cell, height_cell = (
65 |                         width * S,
66 |                         height * S,
67 |                     )  # can be greater than 1 since it's relative to cell
68 |                     box_coordinates = torch.tensor(
69 |                         [x_cell, y_cell, width_cell, height_cell]
70 |                     )
71 |                     targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
72 |                     targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
73 |                     has_anchor[scale_idx] = True
74 | 
75 |                 elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
76 |                     targets[scale_idx][anchor_on_scale, i, j, 0] = -1  # ignore prediction
77 | 
78 |         return image, tuple(targets)
79 | 


--------------------------------------------------------------------------------
/image/fastlane/Object_Detection/models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implementation of YOLOv3 architecture
  3 | """
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | """ 
  9 | Information about architecture config:
 10 | Tuple is structured by (filters, kernel_size, stride) 
 11 | Every conv is a same convolution. 
 12 | List is structured by "B" indicating a residual block followed by the number of repeats
 13 | "S" is for scale prediction block and computing the yolo loss
 14 | "U" is for upsampling the feature map and concatenating with a previous layer
 15 | """
 16 | config = [
 17 |     (32, 3, 1),
 18 |     (64, 3, 2),
 19 |     ["B", 1],
 20 |     (128, 3, 2),
 21 |     ["B", 2],
 22 |     (256, 3, 2),
 23 |     ["B", 8],
 24 |     (512, 3, 2),
 25 |     ["B", 8],
 26 |     (1024, 3, 2),
 27 |     ["B", 4],  # To this point is Darknet-53
 28 |     (512, 1, 1),
 29 |     (1024, 3, 1),
 30 |     "S",
 31 |     (256, 1, 1),
 32 |     "U",
 33 |     (256, 1, 1),
 34 |     (512, 3, 1),
 35 |     "S",
 36 |     (128, 1, 1),
 37 |     "U",
 38 |     (128, 1, 1),
 39 |     (256, 3, 1),
 40 |     "S",
 41 | ]
 42 | 
 43 | 
 44 | class CNNBlock(nn.Module):
 45 |     def __init__(self, in_channels, out_channels, bn_act=True, **kwargs):
 46 |         super().__init__()
 47 |         self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs)
 48 |         self.bn = nn.BatchNorm2d(out_channels)
 49 |         self.leaky = nn.LeakyReLU(0.1)
 50 |         self.use_bn_act = bn_act
 51 | 
 52 |     def forward(self, x):
 53 |         if self.use_bn_act:
 54 |             return self.leaky(self.bn(self.conv(x)))
 55 |         else:
 56 |             return self.conv(x)
 57 | 
 58 | 
 59 | class ResidualBlock(nn.Module):
 60 |     def __init__(self, channels, use_residual=True, num_repeats=1):
 61 |         super().__init__()
 62 |         self.layers = nn.ModuleList()
 63 |         for repeat in range(num_repeats):
 64 |             self.layers += [
 65 |                 nn.Sequential(
 66 |                     CNNBlock(channels, channels // 2, kernel_size=1),
 67 |                     CNNBlock(channels // 2, channels, kernel_size=3, padding=1),
 68 |                 )
 69 |             ]
 70 | 
 71 |         self.use_residual = use_residual
 72 |         self.num_repeats = num_repeats
 73 | 
 74 |     def forward(self, x):
 75 |         for layer in self.layers:
 76 |             if self.use_residual:
 77 |                 x = x + layer(x)
 78 |             else:
 79 |                 x = layer(x)
 80 | 
 81 |         return x
 82 | 
 83 | 
 84 | class ScalePrediction(nn.Module):
 85 |     def __init__(self, in_channels, num_classes):
 86 |         super().__init__()
 87 |         self.pred = nn.Sequential(
 88 |             CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
 89 |             CNNBlock(
 90 |                 2 * in_channels, (num_classes + 5) * 3, bn_act=False, kernel_size=1
 91 |             ),
 92 |         )
 93 |         self.num_classes = num_classes
 94 | 
 95 |     def forward(self, x):
 96 |         return (
 97 |             self.pred(x)
 98 |             .reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3])
 99 |             .permute(0, 1, 3, 4, 2)
100 |         )
101 | 
102 | 
103 | class YOLOv3(nn.Module):
104 |     def __init__(self, in_channels=3, num_classes=80):
105 |         super().__init__()
106 |         self.num_classes = num_classes
107 |         self.in_channels = in_channels
108 |         self.layers = self._create_conv_layers()
109 | 
110 |     def forward(self, x):
111 |         outputs = []  # for each scale
112 |         route_connections = []
113 |         for layer in self.layers:
114 |             if isinstance(layer, ScalePrediction):
115 |                 outputs.append(layer(x))
116 |                 continue
117 | 
118 |             x = layer(x)
119 | 
120 |             if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
121 |                 route_connections.append(x)
122 | 
123 |             elif isinstance(layer, nn.Upsample):
124 |                 x = torch.cat([x, route_connections[-1]], dim=1)
125 |                 route_connections.pop()
126 | 
127 |         return outputs
128 | 
129 |     def _create_conv_layers(self):
130 |         layers = nn.ModuleList()
131 |         in_channels = self.in_channels
132 | 
133 |         for module in config:
134 |             if isinstance(module, tuple):
135 |                 out_channels, kernel_size, stride = module
136 |                 layers.append(
137 |                     CNNBlock(
138 |                         in_channels,
139 |                         out_channels,
140 |                         kernel_size=kernel_size,
141 |                         stride=stride,
142 |                         padding=1 if kernel_size == 3 else 0,
143 |                     )
144 |                 )
145 |                 in_channels = out_channels
146 | 
147 |             elif isinstance(module, list):
148 |                 num_repeats = module[1]
149 |                 layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))
150 | 
151 |             elif isinstance(module, str):
152 |                 if module == "S":
153 |                     layers += [
154 |                         ResidualBlock(in_channels, use_residual=False, num_repeats=1),
155 |                         CNNBlock(in_channels, in_channels // 2, kernel_size=1),
156 |                         ScalePrediction(in_channels // 2, num_classes=self.num_classes),
157 |                     ]
158 |                     in_channels = in_channels // 2
159 | 
160 |                 elif module == "U":
161 |                     layers.append(nn.Upsample(scale_factor=2),)
162 |                     in_channels = in_channels * 3
163 | 
164 |         return layers
165 | 
166 | 
167 | if __name__ == "__main__":
168 |     num_classes = 20
169 |     IMAGE_SIZE = 416
170 |     model = YOLOv3(num_classes=num_classes)
171 |     x = torch.randn((2, 3, IMAGE_SIZE, IMAGE_SIZE))
172 |     out = model(x)
173 |     assert model(x)[0].shape == (2, 3, IMAGE_SIZE//32, IMAGE_SIZE//32, num_classes + 5)
174 |     assert model(x)[1].shape == (2, 3, IMAGE_SIZE//16, IMAGE_SIZE//16, num_classes + 5)
175 |     assert model(x)[2].shape == (2, 3, IMAGE_SIZE//8, IMAGE_SIZE//8, num_classes + 5)
176 |     print("Success!")
177 | 


--------------------------------------------------------------------------------
/image/fastlane/Object_Detection/skynews-boeing-737-plane_5435020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/image/fastlane/Object_Detection/skynews-boeing-737-plane_5435020.jpg


--------------------------------------------------------------------------------
/image/fastlane/README.md:
--------------------------------------------------------------------------------
 1 | # Vector Fastlane
 2 | 
 3 | You need to have conda installed on you machine. Follow these intructions
 4 | 
 5 | ```
 6 | conda create -n pytorch181 python=3.9
 7 | conda activate pytorch181
 8 | conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
 9 | pip install albumentations scikit-learn scikit-image matplotlib opencv-python yacs joblib natsort h5py tqdm
10 | pip install gdown addict future pyyaml requests scipy yapf editdistance pyclipper pandas==1.4.0 shapely==2.0.1
11 | ```
12 | 
13 | You can download the datasets and pretrained weights from this [link](https://drive.google.com/drive/folders/1qqK1uQsgkj0MT7yOhx33mTRlISy27QCA?usp=share_link).
14 | 


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/GallbladderFiles/NOGO1_319 via_project_14May2021_13h54m.json:
--------------------------------------------------------------------------------
1 | {"_via_settings":{"ui":{"annotation_editor_height":25,"annotation_editor_fontsize":0.8,"leftsidebar_width":18,"image_grid":{"img_height":80,"rshape_fill":"none","rshape_fill_opacity":0.3,"rshape_stroke":"yellow","rshape_stroke_width":2,"show_region_shape":true,"show_image_policy":"all"},"image":{"region_label":"__via_region_id__","region_color":"__via_default_region_color__","region_label_font":"10px Sans","on_image_annotation_editor_placement":"NEAR_REGION"}},"core":{"buffer_size":18,"filepath":{},"default_filepath":""},"project":{"name":"NOGO1_319 via_project_14May2021_13h54m"}},"_via_img_metadata":{"frame_317_endo.png426401":{"filename":"frame_317_endo.png","size":426401,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[386,412,435,443,436,405,394],"all_points_y":[384,388,385,379,348,356,364]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[494,545,592,625,623,626,675,705,726,761,783,727,416,443,448,447,463,498],"all_points_y":[241,274,284,294,322,332,332,339,345,358,357,475,471,409,381,367,359,287]},"region_attributes":{}}],"file_attributes":{}},"frame_318_endo.png446373":{"filename":"frame_318_endo.png","size":446373,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[386,412,435,443,436,405,394],"all_points_y":[377,381,378,372,341,349,357]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[494,545,618,625,623,626,673,705,726,761,783,727,416,443,448,447,463,498],"all_points_y":[241,274,291,294,322,332,327,339,345,358,357,475,471,409,381,367,359,287]},"region_attributes":{}}],"file_attributes":{}},"frame_319_endo.png429032":{"filename":"frame_319_endo.png","size":429032,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[382,408,431,439,432,401,390],"all_points_y":[375,379,376,370,339,347,355]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[492,543,584,623,621,624,671,703,724,759,781,725,414,441,446,445,461,496],"all_points_y":[242,275,280,295,323,333,328,340,346,359,358,476,472,410,382,368,360,288]},"region_attributes":{}}],"file_attributes":{}},"frame_311_endo.png432467":{"filename":"frame_311_endo.png","size":432467,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[486,520,561,589,613,610,616,667,699,721,771,722,403,429,436,433,457,480],"all_points_y":[245,272,286,289,301,323,342,340,345,355,373,477,477,421,385,372,357,315]},"region_attributes":{}}],"file_attributes":{}},"frame_312_endo.png443219":{"filename":"frame_312_endo.png","size":443219,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[488,522,563,591,615,612,618,669,701,723,773,724,405,431,438,435,459,482],"all_points_y":[245,272,286,289,301,323,342,340,345,355,373,477,477,421,385,372,357,315]},"region_attributes":{}}],"file_attributes":{}},"frame_313_endo.png425114":{"filename":"frame_313_endo.png","size":425114,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[376,402,425,433,426,411,384],"all_points_y":[386,390,387,381,350,350,366]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[489,524,565,593,617,614,620,671,703,725,775,726,407,433,441,437,461,484],"all_points_y":[252,273,287,290,302,324,343,341,346,356,374,478,478,422,399,373,358,316]},"region_attributes":{}}],"file_attributes":{}},"frame_314_endo.png438937":{"filename":"frame_314_endo.png","size":438937,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[491,526,567,595,619,616,622,673,705,727,777,728,409,435,443,439,463,486],"all_points_y":[254,275,289,292,304,326,345,343,348,358,376,480,480,424,401,375,360,318]},"region_attributes":{}}],"file_attributes":{}},"frame_315_endo.png391587":{"filename":"frame_315_endo.png","size":391587,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[488,539,586,619,617,620,669,699,720,755,777,724,409,437,442,441,457,492],"all_points_y":[248,281,291,301,329,339,339,346,352,365,364,475,473,416,388,374,366,294]},"region_attributes":{}}],"file_attributes":{}},"frame_316_endo.png438069":{"filename":"frame_316_endo.png","size":438069,"regions":[{"shape_attributes":{"name":"polygon","all_points_x":[379,405,428,436,429,414,387],"all_points_y":[389,393,390,384,353,353,369]},"region_attributes":{}},{"shape_attributes":{"name":"polygon","all_points_x":[490,541,588,621,619,622,671,701,722,757,779,723,412,439,444,443,459,494],"all_points_y":[243,276,286,296,324,334,334,341,347,360,359,477,473,411,383,369,361,289]},"region_attributes":{}}],"file_attributes":{}}},"_via_attributes":{"region":{},"file":{}},"_via_data_format_version":"2.0.10","_via_image_id_list":["frame_317_endo.png426401","frame_318_endo.png446373","frame_319_endo.png429032","frame_311_endo.png432467","frame_312_endo.png443219","frame_313_endo.png425114","frame_314_endo.png438937","frame_315_endo.png391587","frame_316_endo.png438069"]}


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Gallbladder Segmentation
 2 | 
 3 | To work with this project, it is a prerequisite to install detectron2 and it's dependencies. The instructions for this is in the website https://github.com/facebookresearch/detectron2
 4 | 
 5 | ## Training for classes for personal project in detectron2:
 6 | 
 7 | Detectron2 has a prespecified workflow for common machine learning datasets such as COCO, Pascal VOC, and cityscapes. There are also arrangements for the tasks that can be performed within these datasets such as object detection, and the different types of segmentation (see "detectron2/configs/" folder).  However, there are some additions required for the sake of using detectron2 in custom projects and external datasets. In our case, we are trying to use detectron2 to detect the No-Go-Zone in a laparoscopic surgery. 
 8 | 
 9 | To enable this, we first had to register the dataset under MetadataCatalog and DatasetCatalog. We need the dataset to be in a specific list-of-dictionaries format (keys=filename, imageId, height, width, annotations). Next, we simply had to call the DatasetCatalog and MetadataCatalog objects to register the training and evaluation parts of the dataset and the classes within it. A tutorial for this can be found in the official collaboratory page for detectron2 which is in "https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5" in the "Train on a custom dataset" section.
10 | 
11 | Also, to perform periodic evaluation during training, we followed the recommendations given in the build_evaluator method from the defaults.py module. We added a class called MyTrainer which inherits DefaultTrainer from the detectron2.engine location. Another addition we had to make was the LossEvalHook class which inherits the HookBase class from the detectron2.engine.hooks location. This enables us to register our own events on which the evaluation steps should automatically take place during training.
12 | 
13 | ## Changes made to default demo workflow:
14 | 
15 | Another change required was for creating an output video. In the VisualizationDemo class of predictor.py, we have to make sure the metadata it picks up is from our dataset, so we had to use the line 
16 | 
17 | `self.metadata = MetadataCatalog.get("bladder_val")`
18 | 
19 | instead of the old line used to set the self.metadata variable in the __init__ part. To make the colour the same for all the frames, we had to add a line in video_visualizer.py which hard-codes the colour by making a list repeating the same RGB code. For example, we added "colors=[[0,0.502,0.502]]*10" after the line where it gets the colour.
20 | 
21 | For video output smoothing, we added some lines of code where it sums up the area of the segment predictions over an interval and outputs it to the video so the predictions look stable for that amount of frames. These changes were made in video_visualizer.py and predictor.py. These changes mainly included adding a buffer value to choose the interval over the averaging of prediction mask area, a way to retain the masks until the buffer criteria is met, and a signal to the draw_instance_predictions method in video_visualizer.py.
22 | 
23 | ## Instructions to run training and inference:
24 | 
25 | To run the training, there is an slrm file called runt4v1Detectron.slrm. Essentially that file runs the command:
26 | 
27 | ```python
28 | python DetectronGBScript.py 
29 | --wd <weight decay> 
30 | --ims <images per batch>
31 | --lr <learning rate> 
32 | --e <epochs> 
33 | --roi <regions of interest to check per image>
34 | --d <output directory>
35 | ```
36 | 
37 | Example:
38 | 
39 | 
40 | ```python
41 | python DetectronGBScript.py 
42 | --wd 0.0001 
43 | --ims 8 
44 | --lr 0.00001 
45 | --e 30 
46 | --roi 512 
47 | --d 'detectron2/output/0.0001_8_0.00001_30/'
48 | ```
49 | 
50 | Also, the command in the jupyter notebook that we used from the root directory to run the inference on video and save the output is in the format:
51 | 
52 | ```python
53 | %run detectron2/demo/demo.py 
54 | --config-file <config file location> 
55 | --video-input <input video location> 
56 | --confidence-threshold <pct confidence needed for segment prediction to be included in output> 
57 | --output <desired output location>  
58 | --opts MODEL.WEIGHTS <trained model location> MODEL.ROI_HEADS.NUM_CLASSES <number of classes in your dataset>
59 | ```
60 | 
61 | Example:
62 | 
63 | ```python
64 | %run detectron2/demo/demo.py 
65 | --config-file detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml 
66 | --video-input video64.mp4 
67 | --confidence-threshold 0.7 
68 | --output video-outputBigger2.mkv
69 | --opts MODEL.WEIGHTS './output/model_final-Confident20210818.pth' MODEL.ROI_HEADS.NUM_CLASSES 1
70 | ```
71 | 
72 | ## Step By Step Tutorial:
73 | 
74 | For a step by step notebook tutorial, go to Detectron2StepByStep.ipynb in this folder and run the notebook cells
75 | 
76 | ## Set Up Instructions For Step-By-Step tutorial:
77 | 
78 | To run the step-by-step training, 
79 | - Put the JSON annotation files in the "<root>/GallbladderFiles/" folder. 
80 | - The images from the JSON file annotations need to be in the exact location as in the JSON file with respect to the root 	  of this project. For example, there are 2 folders where the images from the JSON file are specified, "archive", and "CTC_Frames". 
81 | - The segmentation masks used for training also have to be stored in the "<root>/gallBladder/masks/" folder
82 | - Further description of the steps are in the notebook itself.
83 | 
84 | 


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/_launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -N 1
 3 | #SBATCH -n 1
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH -p p100
 6 | #SBATCH --cpus-per-task=2
 7 | #SBATCH --time=180:00:00
 8 | #SBATCH --mem=12GB
 9 | #SBATCH --job-name=gabgab
10 | #SBATCH --output=gabgab_job_%j.out
11 | 
12 | . /etc/profile.d/lmod.sh
13 | . grandproj.env
14 | module use /pkgs/environment-modules/
15 | module load pytorch1.7.1-cuda10.2-python3.6
16 | /h/skhalid/cv_vector/_runner.sh
17 | #(while true; do nvidia-smi; top -b -n 1 | head -20; sleep 10; done) &
18 | #python /h/skhalid/pytorch.py
19 | #wait
20 | 


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/_runner.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | jupyter nbconvert dataLoaderGBPract.ipynb --to python 
4 | ipython ./dataLoaderGBPract.py
5 | 


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/grandproj.env:
--------------------------------------------------------------------------------
1 | export PATH=/pkgs/anaconda3/bin:$PATH
2 | export PYTHONPATH=/h/$USER/grandproj:$PYTHONPATH
3 | 


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/jsonOutput/bladder_val_coco_format.json.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Galbladder_Segmentation/jsonOutput/bladder_val_coco_format.json.lock


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/jsonOutput/instances_predictions.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Galbladder_Segmentation/jsonOutput/instances_predictions.pth


--------------------------------------------------------------------------------
/video/Galbladder_Segmentation/runt4v1Detectron.slrm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # The lines that start with #SBATCH are read by slurm to set up the job
 4 | # any #SBATCH argument after the first non-empty/non-comment line will be ignored
 5 | 
 6 | #SBATCH --job-name=abc123
 7 | # Change this for a different type of GPU
 8 | #SBATCH --partition=t4v1
 9 | 
10 | # Change this for a different quality of service (priority)
11 | #SBATCH --qos=normal
12 | 
13 | # Change this for request different number of CPUs/GPU/Memory, they must fit on a single node
14 | #SBATCH --cpus-per-task=2
15 | #SBATCH --gres=gpu:1
16 | #SBATCH --mem=8G
17 | 
18 | # stdout/err are directed to file, these two arguments speficify where they should go, %j is a formatter for the job id
19 | #SBATCH --output=./%j_testJob.out
20 | #SBATCH --error=./%j_testJob.err
21 | 
22 | # Set the file mode to append, otherwise preemption resets the file and the previous output will be overwritten
23 | #SBATCH --open-mode=append
24 | 
25 | 
26 | if [ -z "$SLURM_JOB_ID" ]
27 | then
28 |     echo ------------- FAILED ----------------
29 |     echo \$SLURM_JOB_ID is empty, did you launch the script with "sbatch" ?
30 |     exit
31 | else
32 |     echo Job $SLRUM_JOB_ID is running
33 | fi
34 | 
35 | 
36 | module load vector_cv_project
37 | hostname
38 | which python
39 | nvidia-smi
40 | 
41 | echo "This goes to stderr" 1>&2
42 | 
43 | wd=0.0001
44 | ims=8
45 | lr=0.00001
46 | e=30
47 | roi=512
48 | #d='lr2e_5_ims_4_wd_1e_5'
49 | 
50 | touch $SLURM_JOB_ID'_'$wd'_'$ims'_'$lr'_'$e'_.txt'
51 | 
52 | python DetectronGBScript.py --wd $wd --ims $ims --lr $lr --e $e --roi $roi --d 'detectron2/output/'$wd'_'$ims'_'$lr'_'$e'/'
53 | 
54 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | *.egg-info/
 23 | .installed.cfg
 24 | *.egg
 25 | MANIFEST
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # trained models
 34 | *.pth
 35 | 
 36 | # log files
 37 | *.log
 38 | 
 39 | # text files
 40 | *.txt
 41 | 
 42 | # Some folders
 43 | backup/
 44 | ucf_detections/
 45 | jhmdb_detections/
 46 | dota_detections/
 47 | weights/
 48 | vector_cv_tools
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # Environments
 96 | .env
 97 | .venv
 98 | env/
 99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 | 
104 | # Spyder project settings
105 | .spyderproject
106 | .spyproject
107 | 
108 | # Rope project settings
109 | .ropeproject
110 | 
111 | # mkdocs documentation
112 | /site
113 | 
114 | # mypy
115 | .mypy_cache/
116 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/deployment.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="PublishConfigData" serverName="vector_dota" remoteFilesAllowedToDisappearOnAutoupload="false">
 4 |     <serverData>
 5 |       <paths name="vector_dota">
 6 |         <serverdata>
 7 |           <mappings>
 8 |             <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
 9 |           </mappings>
10 |         </serverdata>
11 |       </paths>
12 |     </serverData>
13 |   </component>
14 | </project>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (dota) (3)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/yowo.iml" filepath="$PROJECT_DIR$/.idea/yowo.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 |     <mapping directory="$PROJECT_DIR$/vector_cv_tools" vcs="Git" />
6 |     <mapping directory="$PROJECT_DIR$/vector_cv_toolsxxx" vcs="Git" />
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/webServers.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="WebServers">
 4 |     <option name="servers">
 5 |       <webServer id="a48b7051-6f3b-46ed-9247-be0e606b5eeb" name="vector_dota">
 6 |         <fileTransfer rootFolder="/h/mkowal/cv_project/Computer_Vision_Project/yowo" accessType="SFTP" sshConfigId="91e29147-9835-43a0-be67-7a74a3e01eb2" sshConfig="mkowal@vector:22:6968 password">
 7 |           <advancedOptions>
 8 |             <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
 9 |           </advancedOptions>
10 |         </fileTransfer>
11 |       </webServer>
12 |     </option>
13 |   </component>
14 | </project>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/.idea/yowo.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.8 (dota) (3)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="GOOGLE" />
10 |     <option name="myDocStringFormat" value="Google" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/README.md:
--------------------------------------------------------------------------------
 1 | # Vector CV Project - Two Stream Traffic Accident Detection with YOWO
 2 | 
 3 | In this project, we use ***YOWO*** (***Y**ou **O**nly **W**atch **O**nce*), a unified CNN architecture designed for real-time spatiotemporal action localization, to detect traffic accidents in video. The codebase is built off of the [YOWO PyTorch Official Repository](https://github.com/wei-tim/YOWO).
 4 | 
 5 | The repository contains PyTorch code for accident detection on the DoTA dataset and spatiotemporal action localization AVA, UCF101-24 and JHMDB datasets!
 6 | 
 7 | Please see the [Computer Vision Project Report - (https://vectorinstitute.ai/wp-content/uploads/2022/05/computer_vision_project_report_may252022.pdf]() for more details on the project and method.
 8 | 
 9 | ## Installation
10 | 
11 | ### Datasets
12 | 
13 | * DOTA     : Download from [here](https://github.com/MoonBlvd/Detection-of-Traffic-Anomaly)
14 | * AVA	   : Download from [here](https://github.com/cvdfoundation/ava-dataset)
15 | * UCF101-24: Download from [here](https://drive.google.com/file/d/1o2l6nYhd-0DDXGP-IPReBP4y1ffVmGSE/view?usp=sharing)
16 | * J-HMDB-21: Download from [here](http://jhmdb.is.tue.mpg.de/challenge/JHMDB/datasets)
17 | 
18 | Use instructions [here](https://github.com/facebookresearch/SlowFast/blob/master/slowfast/datasets/DATASET.md) for the preparation of AVA dataset.
19 | 
20 | Modify the paths in ucf24.data and jhmdb21.data under cfg directory accordingly.
21 | Download the dataset annotations from [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0).
22 | 
23 | ### Download backbone pretrained weights
24 | 
25 | * Darknet-19 weights can be downloaded via:
26 | ```bash
27 | wget http://pjreddie.com/media/files/yolo.weights
28 | ```
29 | 
30 | * ResNet pretrained models can be downloaded from [here](https://drive.google.com/drive/folders/1zvl89AgFAApbH0At-gMuZSeQB_LpNP-M?usp=sharing).
31 | * For resource efficient 3D CNN architectures (ShuffleNet, ShuffleNetv2, MobileNet, MobileNetv2), pretrained models can be downloaded from [here](https://github.com/okankop/Efficient-3DCNNs).
32 | 
33 | ### Pretrained YOWO models
34 | 
35 | Pretrained models for UCF101-24 and J-HMDB-21 datasets can be downloaded from [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0).
36 | Pretrained models for AVA dataset can be downloaded from [here](https://drive.google.com/drive/folders/1g-jTfxCV9_uNFr61pjo4VxNfgDlbWLlb?usp=sharing).
37 | 
38 | All materials (annotations and pretrained models) are also available in Baiduyun Disk:
39 | [here](https://pan.baidu.com/s/1yaOYqzcEx96z9gAkOhMnvQ) with password 95mm
40 | 
41 | ## Running the code
42 | 
43 | * All training configurations are given in cfg/dota_train.yaml cfg/ava.yaml cfg/ucf24.yaml, and cfg/jhmdb.yaml files.
44 | * DoTA training:
45 | ```bash
46 | python main_dota.py --cfg cfg/dota_train.yaml
47 | ```
48 | * AVA training:
49 | ```bash
50 | python main.py --cfg cfg/ava.yaml
51 | ```
52 | * UCF101-24 training:
53 | ```bash
54 | python main.py --cfg cfg/ucf24.yaml
55 | ```
56 | * J-HMDB-21 training:
57 | ```bash
58 | python main.py --cfg cfg/jhmdb.yaml
59 | ```
60 | 
61 | ## Validating the model
62 | 
63 | * For AVA dataset, after each epoch, validation is performed and frame-mAP score is provided.
64 | 
65 | * Note that calculating frame-mAP with DoTA is not currently implemented and precision and recall from the validation epoch are used as the main evaluation metrics.
66 | 
67 | * For DoTA, UCF101-24 and J-HMDB-21 datasets, after each validation, frame detections is recorded under 'dota_detections', 'jhmdb_detections' or 'ucf_detections'. From [here](https://www.dropbox.com/sh/16jv2kwzom1pmlt/AABL3cFWDfG5MuH9PwnjSJf0a?dl=0), 'groundtruths_jhmdb.zip' and 'groundtruths_jhmdb.zip' should be downloaded and extracted to "evaluation/Object-Detection-Metrics". Then, run the following command to calculate frame_mAP.
68 | 
69 | ```bash
70 | python evaluation/Object-Detection-Metrics/pascalvoc.py --gtfolder PATH-TO-GROUNDTRUTHS-FOLDER --detfolder PATH-TO-DETECTIONS-FOLDER
71 | 
72 | ```
73 | 
74 | * For video_mAP, set the pretrained model in the correct yaml file and run:
75 | ```bash
76 | python video_mAP.py --cfg cfg/ucf24.yaml
77 | ```
78 | 
79 | ## Running on a test video
80 | 
81 | * You can run AVA pretrained model on any test video with the following code:
82 | ```bash
83 | python test_video_ava.py --cfg cfg/ava.yaml
84 | ```
85 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Gongfan Fang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .voc import VOCSegmentation
2 | from .cityscapes import Cityscapes


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import hashlib
  4 | import errno
  5 | from tqdm import tqdm
  6 | 
  7 | 
  8 | def gen_bar_updater(pbar):
  9 |     def bar_update(count, block_size, total_size):
 10 |         if pbar.total is None and total_size:
 11 |             pbar.total = total_size
 12 |         progress_bytes = count * block_size
 13 |         pbar.update(progress_bytes - pbar.n)
 14 | 
 15 |     return bar_update
 16 | 
 17 | 
 18 | def check_integrity(fpath, md5=None):
 19 |     if md5 is None:
 20 |         return True
 21 |     if not os.path.isfile(fpath):
 22 |         return False
 23 |     md5o = hashlib.md5()
 24 |     with open(fpath, 'rb') as f:
 25 |         # read in 1MB chunks
 26 |         for chunk in iter(lambda: f.read(1024 * 1024), b''):
 27 |             md5o.update(chunk)
 28 |     md5c = md5o.hexdigest()
 29 |     if md5c != md5:
 30 |         return False
 31 |     return True
 32 | 
 33 | 
 34 | def makedir_exist_ok(dirpath):
 35 |     """
 36 |     Python2 support for os.makedirs(.., exist_ok=True)
 37 |     """
 38 |     try:
 39 |         os.makedirs(dirpath)
 40 |     except OSError as e:
 41 |         if e.errno == errno.EEXIST:
 42 |             pass
 43 |         else:
 44 |             raise
 45 | 
 46 | 
 47 | def download_url(url, root, filename=None, md5=None):
 48 |     """Download a file from a url and place it in root.
 49 |     Args:
 50 |         url (str): URL to download file from
 51 |         root (str): Directory to place downloaded file in
 52 |         filename (str): Name to save the file under. If None, use the basename of the URL
 53 |         md5 (str): MD5 checksum of the download. If None, do not check
 54 |     """
 55 |     from six.moves import urllib
 56 | 
 57 |     root = os.path.expanduser(root)
 58 |     if not filename:
 59 |         filename = os.path.basename(url)
 60 |     fpath = os.path.join(root, filename)
 61 | 
 62 |     makedir_exist_ok(root)
 63 | 
 64 |     # downloads file
 65 |     if os.path.isfile(fpath) and check_integrity(fpath, md5):
 66 |         print('Using downloaded and verified file: ' + fpath)
 67 |     else:
 68 |         try:
 69 |             print('Downloading ' + url + ' to ' + fpath)
 70 |             urllib.request.urlretrieve(
 71 |                 url, fpath,
 72 |                 reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True))
 73 |             )
 74 |         except OSError:
 75 |             if url[:5] == 'https':
 76 |                 url = url.replace('https:', 'http:')
 77 |                 print('Failed download. Trying https -> http instead.'
 78 |                       ' Downloading ' + url + ' to ' + fpath)
 79 |                 urllib.request.urlretrieve(
 80 |                     url, fpath,
 81 |                     reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True))
 82 |                 )
 83 | 
 84 | 
 85 | def list_dir(root, prefix=False):
 86 |     """List all directories at a given root
 87 |     Args:
 88 |         root (str): Path to directory whose folders need to be listed
 89 |         prefix (bool, optional): If true, prepends the path to each result, otherwise
 90 |             only returns the name of the directories found
 91 |     """
 92 |     root = os.path.expanduser(root)
 93 |     directories = list(
 94 |         filter(
 95 |             lambda p: os.path.isdir(os.path.join(root, p)),
 96 |             os.listdir(root)
 97 |         )
 98 |     )
 99 | 
100 |     if prefix is True:
101 |         directories = [os.path.join(root, d) for d in directories]
102 | 
103 |     return directories
104 | 
105 | 
106 | def list_files(root, suffix, prefix=False):
107 |     """List all files ending with a suffix at a given root
108 |     Args:
109 |         root (str): Path to directory whose folders need to be listed
110 |         suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
111 |             It uses the Python "str.endswith" method and is passed directly
112 |         prefix (bool, optional): If true, prepends the path to each result, otherwise
113 |             only returns the name of the files found
114 |     """
115 |     root = os.path.expanduser(root)
116 |     files = list(
117 |         filter(
118 |             lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix),
119 |             os.listdir(root)
120 |         )
121 |     )
122 | 
123 |     if prefix is True:
124 |         files = [os.path.join(root, d) for d in files]
125 | 
126 |     return files


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .stream_metrics import StreamSegMetrics, AverageMeter
2 | 
3 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/metrics/stream_metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.metrics import confusion_matrix
  3 | 
  4 | class _StreamMetrics(object):
  5 |     def __init__(self):
  6 |         """ Overridden by subclasses """
  7 |         raise NotImplementedError()
  8 | 
  9 |     def update(self, gt, pred):
 10 |         """ Overridden by subclasses """
 11 |         raise NotImplementedError()
 12 | 
 13 |     def get_results(self):
 14 |         """ Overridden by subclasses """
 15 |         raise NotImplementedError()
 16 | 
 17 |     def to_str(self, metrics):
 18 |         """ Overridden by subclasses """
 19 |         raise NotImplementedError()
 20 | 
 21 |     def reset(self):
 22 |         """ Overridden by subclasses """
 23 |         raise NotImplementedError()      
 24 | 
 25 | class StreamSegMetrics(_StreamMetrics):
 26 |     """
 27 |     Stream Metrics for Semantic Segmentation Task
 28 |     """
 29 |     def __init__(self, n_classes):
 30 |         self.n_classes = n_classes
 31 |         self.confusion_matrix = np.zeros((n_classes, n_classes))
 32 | 
 33 |     def update(self, label_trues, label_preds):
 34 |         for lt, lp in zip(label_trues, label_preds):
 35 |             self.confusion_matrix += self._fast_hist( lt.flatten(), lp.flatten() )
 36 |     
 37 |     @staticmethod
 38 |     def to_str(results):
 39 |         string = "\n"
 40 |         for k, v in results.items():
 41 |             if k!="Class IoU":
 42 |                 string += "%s: %f\n"%(k, v)
 43 |         
 44 |         #string+='Class IoU:\n'
 45 |         #for k, v in results['Class IoU'].items():
 46 |         #    string += "\tclass %d: %f\n"%(k, v)
 47 |         return string
 48 | 
 49 |     def _fast_hist(self, label_true, label_pred):
 50 |         mask = (label_true >= 0) & (label_true < self.n_classes)
 51 |         hist = np.bincount(
 52 |             self.n_classes * label_true[mask].astype(int) + label_pred[mask],
 53 |             minlength=self.n_classes ** 2,
 54 |         ).reshape(self.n_classes, self.n_classes)
 55 |         return hist
 56 | 
 57 |     def get_results(self):
 58 |         """Returns accuracy score evaluation result.
 59 |             - overall accuracy
 60 |             - mean accuracy
 61 |             - mean IU
 62 |             - fwavacc
 63 |         """
 64 |         hist = self.confusion_matrix
 65 |         acc = np.diag(hist).sum() / hist.sum()
 66 |         acc_cls = np.diag(hist) / hist.sum(axis=1)
 67 |         acc_cls = np.nanmean(acc_cls)
 68 |         iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
 69 |         mean_iu = np.nanmean(iu)
 70 |         freq = hist.sum(axis=1) / hist.sum()
 71 |         fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
 72 |         cls_iu = dict(zip(range(self.n_classes), iu))
 73 | 
 74 |         return {
 75 |                 "Overall Acc": acc,
 76 |                 "Mean Acc": acc_cls,
 77 |                 "FreqW Acc": fwavacc,
 78 |                 "Mean IoU": mean_iu,
 79 |                 "Class IoU": cls_iu,
 80 |             }
 81 |         
 82 |     def reset(self):
 83 |         self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
 84 | 
 85 | class AverageMeter(object):
 86 |     """Computes average values"""
 87 |     def __init__(self):
 88 |         self.book = dict()
 89 | 
 90 |     def reset_all(self):
 91 |         self.book.clear()
 92 |     
 93 |     def reset(self, id):
 94 |         item = self.book.get(id, None)
 95 |         if item is not None:
 96 |             item[0] = 0
 97 |             item[1] = 0
 98 | 
 99 |     def update(self, id, val):
100 |         record = self.book.get(id, None)
101 |         if record is None:
102 |             self.book[id] = [val, 1]
103 |         else:
104 |             record[0]+=val
105 |             record[1]+=1
106 | 
107 |     def get_results(self, id):
108 |         record = self.book.get(id, None)
109 |         assert record is not None
110 |         return record[0] / record[1]
111 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling import *
2 | from ._deeplab import convert_to_separable_conv


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from . import resnet
2 | from . import mobilenetv2
3 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/network/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import torch.nn.functional as F
 5 | from collections import OrderedDict
 6 | 
 7 | class _SimpleSegmentationModel(nn.Module):
 8 |     def __init__(self, backbone, classifier):
 9 |         super(_SimpleSegmentationModel, self).__init__()
10 |         self.backbone = backbone
11 |         self.classifier = classifier
12 |         
13 |     def forward(self, x):
14 |         input_shape = x.shape[-2:]
15 |         features = self.backbone(x)
16 |         x = self.classifier(features)
17 |         x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
18 |         return x
19 | 
20 | 
21 | class IntermediateLayerGetter(nn.ModuleDict):
22 |     """
23 |     Module wrapper that returns intermediate layers from a model
24 | 
25 |     It has a strong assumption that the modules have been registered
26 |     into the model in the same order as they are used.
27 |     This means that one should **not** reuse the same nn.Module
28 |     twice in the forward if you want this to work.
29 | 
30 |     Additionally, it is only able to query submodules that are directly
31 |     assigned to the model. So if `model` is passed, `model.feature1` can
32 |     be returned, but not `model.feature1.layer2`.
33 | 
34 |     Arguments:
35 |         model (nn.Module): model on which we will extract the features
36 |         return_layers (Dict[name, new_name]): a dict containing the names
37 |             of the modules for which the activations will be returned as
38 |             the key of the dict, and the value of the dict is the name
39 |             of the returned activation (which the user can specify).
40 | 
41 |     Examples::
42 | 
43 |         >>> m = torchvision.models.resnet18(pretrained=True)
44 |         >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
45 |         >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
46 |         >>>     {'layer1': 'feat1', 'layer3': 'feat2'})
47 |         >>> out = new_m(torch.rand(1, 3, 224, 224))
48 |         >>> print([(k, v.shape) for k, v in out.items()])
49 |         >>>     [('feat1', torch.Size([1, 64, 56, 56])),
50 |         >>>      ('feat2', torch.Size([1, 256, 14, 14]))]
51 |     """
52 |     def __init__(self, model, return_layers):
53 |         if not set(return_layers).issubset([name for name, _ in model.named_children()]):
54 |             raise ValueError("return_layers are not present in model")
55 | 
56 |         orig_return_layers = return_layers
57 |         return_layers = {k: v for k, v in return_layers.items()}
58 |         layers = OrderedDict()
59 |         for name, module in model.named_children():
60 |             layers[name] = module
61 |             if name in return_layers:
62 |                 del return_layers[name]
63 |             if not return_layers:
64 |                 break
65 | 
66 |         super(IntermediateLayerGetter, self).__init__(layers)
67 |         self.return_layers = orig_return_layers
68 | 
69 |     def forward(self, x):
70 |         out = OrderedDict()
71 |         for name, module in self.named_children():
72 |             x = module(x)
73 |             if name in self.return_layers:
74 |                 out_name = self.return_layers[name]
75 |                 out[out_name] = x
76 |         return out
77 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | from .visualizer import Visualizer
3 | from .scheduler import PolyLR
4 | from .loss import FocalLoss


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | import torch 
 4 | 
 5 | class FocalLoss(nn.Module):
 6 |     def __init__(self, alpha=1, gamma=0, size_average=True, ignore_index=255):
 7 |         super(FocalLoss, self).__init__()
 8 |         self.alpha = alpha
 9 |         self.gamma = gamma
10 |         self.ignore_index = ignore_index
11 |         self.size_average = size_average
12 | 
13 |     def forward(self, inputs, targets):
14 |         ce_loss = F.cross_entropy(
15 |             inputs, targets, reduction='none', ignore_index=self.ignore_index)
16 |         pt = torch.exp(-ce_loss)
17 |         focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
18 |         if self.size_average:
19 |             return focal_loss.mean()
20 |         else:
21 |             return focal_loss.sum()


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/scheduler.py:
--------------------------------------------------------------------------------
 1 | from torch.optim.lr_scheduler import _LRScheduler, StepLR
 2 | 
 3 | class PolyLR(_LRScheduler):
 4 |     def __init__(self, optimizer, max_iters, power=0.9, last_epoch=-1, min_lr=1e-6):
 5 |         self.power = power
 6 |         self.max_iters = max_iters  # avoid zero lr
 7 |         self.min_lr = min_lr
 8 |         super(PolyLR, self).__init__(optimizer, last_epoch)
 9 |     
10 |     def get_lr(self):
11 |         return [ max( base_lr * ( 1 - self.last_epoch/self.max_iters )**self.power, self.min_lr)
12 |                 for base_lr in self.base_lrs]


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from torchvision.transforms.functional import normalize
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import os 
 5 | 
 6 | def denormalize(tensor, mean, std):
 7 |     mean = np.array(mean)
 8 |     std = np.array(std)
 9 | 
10 |     _mean = -mean/std
11 |     _std = 1/std
12 |     return normalize(tensor, _mean, _std)
13 | 
14 | class Denormalize(object):
15 |     def __init__(self, mean, std):
16 |         mean = np.array(mean)
17 |         std = np.array(std)
18 |         self._mean = -mean/std
19 |         self._std = 1/std
20 | 
21 |     def __call__(self, tensor):
22 |         if isinstance(tensor, np.ndarray):
23 |             return (tensor - self._mean.reshape(-1,1,1)) / self._std.reshape(-1,1,1)
24 |         return normalize(tensor, self._mean, self._std)
25 | 
26 | def set_bn_momentum(model, momentum=0.1):
27 |     for m in model.modules():
28 |         if isinstance(m, nn.BatchNorm2d):
29 |             m.momentum = momentum
30 | 
31 | def fix_bn(model):
32 |     for m in model.modules():
33 |         if isinstance(m, nn.BatchNorm2d):
34 |             m.eval()
35 | 
36 | def mkdir(path):
37 |     if not os.path.exists(path):
38 |         os.mkdir(path)
39 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_2d/DeepLabV3PlusPytorch/utils/visualizer.py:
--------------------------------------------------------------------------------
 1 | from visdom import Visdom
 2 | import json 
 3 | 
 4 | class Visualizer(object):
 5 |     """ Visualizer
 6 |     """
 7 |     def __init__(self, port='13579', env='main', id=None):
 8 |         #self.cur_win = {}
 9 |         self.vis = Visdom(port=port, env=env)
10 |         self.id = id
11 |         self.env = env
12 |         # Restore
13 |         #ori_win = self.vis.get_window_data()
14 |         #ori_win = json.loads(ori_win)
15 |         #print(ori_win)
16 |         #self.cur_win = { v['title']: k for k, v in ori_win.items()  }
17 | 
18 |     def vis_scalar(self, name, x, y, opts=None):
19 |         if not isinstance(x, list):
20 |             x = [x]
21 |         if not isinstance(y, list):
22 |             y = [y]
23 |         
24 |         if self.id is not None:
25 |             name = "[%s]"%self.id + name
26 |         default_opts = { 'title': name }
27 |         if opts is not None:
28 |             default_opts.update(opts)
29 | 
30 |         #win = self.cur_win.get(name, None)
31 |         #if win is not None:
32 |         self.vis.line( X=x, Y=y, win=name, opts=default_opts, update='append')
33 |         #else:
34 |         #    self.cur_win[name] = self.vis.line( X=x, Y=y, opts=default_opts)
35 | 
36 |     def vis_image(self, name, img, env=None, opts=None):
37 |         """ vis image in visdom
38 |         """
39 |         if env is None:
40 |             env = self.env 
41 |         if self.id is not None:
42 |             name = "[%s]"%self.id + name
43 |         #win = self.cur_win.get(name, None)
44 |         default_opts = { 'title': name }
45 |         if opts is not None:
46 |                 default_opts.update(opts)
47 |         #if win is not None:
48 |         self.vis.image( img=img, win=name, opts=opts, env=env )
49 |         #else:
50 |         #    self.cur_win[name] = self.vis.image( img=img, opts=default_opts, env=env )
51 |     
52 |     def vis_table(self, name, tbl, opts=None):
53 |         #win = self.cur_win.get(name, None)
54 | 
55 |         tbl_str = "<table width=\"100%\"> "
56 |         tbl_str+="<tr> \
57 |                  <th>Term</th> \
58 |                  <th>Value</th> \
59 |                  </tr>"
60 |         for k, v in tbl.items():
61 |             tbl_str+=  "<tr> \
62 |                        <td>%s</td> \
63 |                        <td>%s</td> \
64 |                        </tr>"%(k, v)
65 | 
66 |         tbl_str+="</table>"
67 | 
68 |         default_opts = { 'title': name }
69 |         if opts is not None:
70 |             default_opts.update(opts)
71 |         #if win is not None:
72 |         self.vis.text(tbl_str, win=name, opts=default_opts)
73 |         #else:
74 |         #self.cur_win[name] = self.vis.text(tbl_str, opts=default_opts)
75 | 
76 | 
77 | if __name__=='__main__':
78 |     import numpy as np
79 |     vis = Visualizer(port=35588, env='main')
80 |     tbl = {"lr": 214, "momentum": 0.9}
81 |     vis.vis_table("test_table", tbl)
82 |     tbl = {"lr": 244444, "momentum": 0.9, "haha": "hoho"}
83 |     vis.vis_table("test_table", tbl)
84 | 
85 |     vis.vis_scalar(name='loss', x=0, y=1)
86 |     vis.vis_scalar(name='loss', x=2, y=4)
87 |     vis.vis_scalar(name='loss', x=4, y=6)


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_3d/mobilenet.py:
--------------------------------------------------------------------------------
  1 | '''MobileNet in PyTorch.
  2 | 
  3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
  4 | for more details.
  5 | '''
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | def conv_bn(inp, oup, stride):
 12 |     return nn.Sequential(
 13 |         nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False),
 14 |         nn.BatchNorm3d(oup),
 15 |         nn.ReLU(inplace=True)
 16 |     )
 17 | 
 18 | 
 19 | class Block(nn.Module):
 20 |     '''Depthwise conv + Pointwise conv'''
 21 |     def __init__(self, in_planes, out_planes, stride=1):
 22 |         super(Block, self).__init__()
 23 |         self.conv1 = nn.Conv3d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
 24 |         self.bn1 = nn.BatchNorm3d(in_planes)
 25 |         self.conv2 = nn.Conv3d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 26 |         self.bn2 = nn.BatchNorm3d(out_planes)
 27 | 
 28 |     def forward(self, x):
 29 |         out = F.relu(self.bn1(self.conv1(x)))
 30 |         out = F.relu(self.bn2(self.conv2(out)))
 31 |         return out
 32 | 
 33 | 
 34 | class MobileNet(nn.Module):
 35 |     def __init__(self, width_mult=1.):
 36 |         super(MobileNet, self).__init__()
 37 | 
 38 |         input_channel = 32
 39 |         last_channel = 1024
 40 |         input_channel = int(input_channel * width_mult)
 41 |         last_channel = int(last_channel * width_mult)
 42 |         cfg = [
 43 |         # c, n, s
 44 |         [64,   1, (2,2,2)],
 45 |         [128,  2, (2,2,2)],
 46 |         [256,  2, (2,2,2)],
 47 |         [512,  6, (2,2,2)],
 48 |         [1024, 2, (1,1,1)],
 49 |         ]
 50 | 
 51 |         self.features = [conv_bn(3, input_channel, (1,2,2))]
 52 |         # building inverted residual blocks
 53 |         for c, n, s in cfg:
 54 |             output_channel = int(c * width_mult)
 55 |             for i in range(n):
 56 |                 stride = s if i == 0 else 1
 57 |                 self.features.append(Block(input_channel, output_channel, stride))
 58 |                 input_channel = output_channel
 59 |         # make it nn.Sequential
 60 |         self.features = nn.Sequential(*self.features)
 61 |         self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1)
 62 | 
 63 |     def forward(self, x):
 64 |         x = self.features(x)
 65 | 
 66 |         if x.size(2) == 2:
 67 |             x = self.avgpool(x)
 68 |         
 69 |         return x
 70 | 
 71 | 
 72 | def get_fine_tuning_parameters(model, ft_portion):
 73 |     if ft_portion == "complete":
 74 |         return model.parameters()
 75 | 
 76 |     elif ft_portion == "last_layer":
 77 |         ft_module_names = []
 78 |         ft_module_names.append('classifier')
 79 | 
 80 |         parameters = []
 81 |         for k, v in model.named_parameters():
 82 |             for ft_module in ft_module_names:
 83 |                 if ft_module in k:
 84 |                     parameters.append({'params': v})
 85 |                     break
 86 |             else:
 87 |                 parameters.append({'params': v, 'lr': 0.0})
 88 |         return parameters
 89 | 
 90 |     else:
 91 |         raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected")
 92 |     
 93 | 
 94 | def get_model(**kwargs):
 95 |     """
 96 |     Returns the model.
 97 |     """
 98 |     model = MobileNet(**kwargs)
 99 |     return model
100 | 
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     model = get_model(width_mult=1.)
105 |     model = model.cuda()
106 |     model = nn.DataParallel(model, device_ids=None)
107 |     print(model)
108 | 
109 |     input_var = Variable(torch.randn(8, 3, 16, 112, 112))
110 |     output = model(input_var)
111 |     print(output.shape)
112 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_3d/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | '''MobilenetV2 in PyTorch.
  2 | 
  3 | See the paper "MobileNetV2: Inverted Residuals and Linear Bottlenecks" for more details.
  4 | '''
  5 | import torch
  6 | import math
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | 
 13 | 
 14 | def conv_bn(inp, oup, stride):
 15 |     return nn.Sequential(
 16 |         nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False),
 17 |         nn.BatchNorm3d(oup),
 18 |         nn.ReLU6(inplace=True)
 19 |     )
 20 | 
 21 | 
 22 | def conv_1x1x1_bn(inp, oup):
 23 |     return nn.Sequential(
 24 |         nn.Conv3d(inp, oup, 1, 1, 0, bias=False),
 25 |         nn.BatchNorm3d(oup),
 26 |         nn.ReLU6(inplace=True)
 27 |     )
 28 | 
 29 | 
 30 | class InvertedResidual(nn.Module):
 31 |     def __init__(self, inp, oup, stride, expand_ratio):
 32 |         super(InvertedResidual, self).__init__()
 33 |         self.stride = stride
 34 | 
 35 |         hidden_dim = round(inp * expand_ratio)
 36 |         self.use_res_connect = self.stride == (1,1,1) and inp == oup
 37 | 
 38 |         if expand_ratio == 1:
 39 |             self.conv = nn.Sequential(
 40 |                 # dw
 41 |                 nn.Conv3d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 42 |                 nn.BatchNorm3d(hidden_dim),
 43 |                 nn.ReLU6(inplace=True),
 44 |                 # pw-linear
 45 |                 nn.Conv3d(hidden_dim, oup, 1, 1, 0, bias=False),
 46 |                 nn.BatchNorm3d(oup),
 47 |             )
 48 |         else:
 49 |             self.conv = nn.Sequential(
 50 |                 # pw
 51 |                 nn.Conv3d(inp, hidden_dim, 1, 1, 0, bias=False),
 52 |                 nn.BatchNorm3d(hidden_dim),
 53 |                 nn.ReLU6(inplace=True),
 54 |                 # dw
 55 |                 nn.Conv3d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 56 |                 nn.BatchNorm3d(hidden_dim),
 57 |                 nn.ReLU6(inplace=True),
 58 |                 # pw-linear
 59 |                 nn.Conv3d(hidden_dim, oup, 1, 1, 0, bias=False),
 60 |                 nn.BatchNorm3d(oup),
 61 |             )
 62 | 
 63 |     def forward(self, x):
 64 |         if self.use_res_connect:
 65 |             return x + self.conv(x)
 66 |         else:
 67 |             return self.conv(x)
 68 | 
 69 | 
 70 | class MobileNetV2(nn.Module):
 71 |     def __init__(self, width_mult=1.):
 72 |         super(MobileNetV2, self).__init__()
 73 |         block = InvertedResidual
 74 |         input_channel = 32
 75 |         last_channel = 1280
 76 |         interverted_residual_setting = [
 77 |             # t, c, n, s
 78 |             [1,  16, 1, (1,1,1)],
 79 |             [6,  24, 2, (2,2,2)],
 80 |             [6,  32, 3, (2,2,2)],
 81 |             [6,  64, 4, (2,2,2)],
 82 |             [6,  96, 3, (1,1,1)],
 83 |             [6, 160, 3, (2,2,2)],
 84 |             [6, 320, 1, (1,1,1)],
 85 |         ]
 86 | 
 87 |         # building first layer
 88 |         input_channel = int(input_channel * width_mult)
 89 |         self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
 90 |         self.features = [conv_bn(3, input_channel, (1,2,2))]
 91 |         # building inverted residual blocks
 92 |         for t, c, n, s in interverted_residual_setting:
 93 |             output_channel = int(c * width_mult)
 94 |             for i in range(n):
 95 |                 stride = s if i == 0 else (1,1,1)
 96 |                 self.features.append(block(input_channel, output_channel, stride, expand_ratio=t))
 97 |                 input_channel = output_channel
 98 |         # building last several layers
 99 |         self.features.append(conv_1x1x1_bn(input_channel, self.last_channel))
100 |         # make it nn.Sequential
101 |         self.features = nn.Sequential(*self.features)
102 |         self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1)
103 | 
104 |         self._initialize_weights()
105 | 
106 |     def forward(self, x):
107 |         x = self.features(x)
108 | 
109 |         if x.size(2) == 2:
110 |             x = self.avgpool(x)
111 | 
112 |         return x
113 | 
114 |     def _initialize_weights(self):
115 |         for m in self.modules():
116 |             if isinstance(m, nn.Conv3d):
117 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[2] * m.out_channels
118 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
119 |                 if m.bias is not None:
120 |                     m.bias.data.zero_()
121 |             elif isinstance(m, nn.BatchNorm3d):
122 |                 m.weight.data.fill_(1)
123 |                 m.bias.data.zero_()
124 |             elif isinstance(m, nn.Linear):
125 |                 n = m.weight.size(1)
126 |                 m.weight.data.normal_(0, 0.01)
127 |                 m.bias.data.zero_()
128 | 
129 | 
130 | def get_fine_tuning_parameters(model, ft_portion):
131 |     if ft_portion == "complete":
132 |         return model.parameters()
133 | 
134 |     elif ft_portion == "last_layer":
135 |         ft_module_names = []
136 |         ft_module_names.append('classifier')
137 | 
138 |         parameters = []
139 |         for k, v in model.named_parameters():
140 |             for ft_module in ft_module_names:
141 |                 if ft_module in k:
142 |                     parameters.append({'params': v})
143 |                     break
144 |             else:
145 |                 parameters.append({'params': v, 'lr': 0.0})
146 |         return parameters
147 | 
148 |     else:
149 |         raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected")
150 | 
151 |     
152 | def get_model(**kwargs):
153 |     """
154 |     Returns the model.
155 |     """
156 |     model = MobileNetV2(**kwargs)
157 |     return model
158 | 
159 | 
160 | if __name__ == "__main__":
161 |     model = get_model(width_mult=1.)
162 |     model = model.cuda()
163 |     model = nn.DataParallel(model, device_ids=None)
164 |     print(model)
165 | 
166 | 
167 |     input_var = Variable(torch.randn(8, 3, 16, 112, 112))
168 |     output = model(input_var)
169 |     print(output.shape)
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/backbones_3d/shufflenet.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNet in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | def conv_bn(inp, oup, stride):
 12 |     return nn.Sequential(
 13 |         nn.Conv3d(inp, oup, kernel_size=3, stride=stride, padding=(1,1,1), bias=False),
 14 |         nn.BatchNorm3d(oup),
 15 |         nn.ReLU(inplace=True)
 16 |     )
 17 | 
 18 | 
 19 | def channel_shuffle(x, groups):
 20 |     '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 21 |     batchsize, num_channels, depth, height, width = x.data.size()
 22 |     channels_per_group = num_channels // groups
 23 |     # reshape
 24 |     x = x.view(batchsize, groups, 
 25 |         channels_per_group, depth, height, width)
 26 |     #permute
 27 |     x = x.permute(0,2,1,3,4,5).contiguous()
 28 |     # flatten
 29 |     x = x.view(batchsize, num_channels, depth, height, width)
 30 |     return x
 31 | 
 32 | 
 33 | 
 34 | class Bottleneck(nn.Module):
 35 |     def __init__(self, in_planes, out_planes, stride, groups):
 36 |         super(Bottleneck, self).__init__()
 37 |         self.stride = stride
 38 |         self.groups = groups
 39 |         mid_planes = out_planes//4
 40 |         if self.stride == 2:
 41 |             out_planes = out_planes - in_planes
 42 |         g = 1 if in_planes==24 else groups
 43 |         self.conv1    = nn.Conv3d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
 44 |         self.bn1      = nn.BatchNorm3d(mid_planes)
 45 |         self.conv2    = nn.Conv3d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
 46 |         self.bn2      = nn.BatchNorm3d(mid_planes)
 47 |         self.conv3    = nn.Conv3d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
 48 |         self.bn3      = nn.BatchNorm3d(out_planes)
 49 |         self.relu     = nn.ReLU(inplace=True)
 50 | 
 51 |         if stride == 2:
 52 |             self.shortcut = nn.AvgPool3d(kernel_size=(2,3,3), stride=2, padding=(0,1,1))
 53 | 
 54 | 
 55 |     def forward(self, x):
 56 |         out = self.relu(self.bn1(self.conv1(x)))
 57 |         out = channel_shuffle(out, self.groups)
 58 |         out = self.bn2(self.conv2(out))
 59 |         out = self.bn3(self.conv3(out))
 60 | 
 61 |         if self.stride == 2:
 62 |             a = self.shortcut(x)
 63 |             out = self.relu(torch.cat([out, self.shortcut(x)], 1))
 64 |         else:
 65 |             out = self.relu(out + x)
 66 | 
 67 |         return out
 68 | 
 69 | 
 70 | class ShuffleNet(nn.Module):
 71 |     def __init__(self,
 72 |                  groups,
 73 |                  width_mult=1,
 74 |                  num_classes=400):
 75 |         super(ShuffleNet, self).__init__()
 76 |         self.num_classes = num_classes
 77 |         self.groups = groups
 78 |         num_blocks = [4,8,4]
 79 | 
 80 |         # index 0 is invalid and should never be called.
 81 |         # only used for indexing convenience.
 82 |         if groups == 1:
 83 |             out_planes = [24, 144, 288, 567]
 84 |         elif groups == 2:
 85 |             out_planes = [24, 200, 400, 800]
 86 |         elif groups == 3:
 87 |             out_planes = [24, 240, 480, 960]
 88 |         elif groups == 4:
 89 |             out_planes = [24, 272, 544, 1088]
 90 |         elif groups == 8:
 91 |             out_planes = [24, 384, 768, 1536]
 92 |         else:
 93 |             raise ValueError(
 94 |                 """{} groups is not supported for
 95 |                    1x1 Grouped Convolutions""".format(num_groups))
 96 |         out_planes = [int(i * width_mult) for i in out_planes]
 97 |         self.in_planes = out_planes[0]
 98 |         self.conv1   = conv_bn(3, self.in_planes, stride=(1,2,2))
 99 |         self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
100 |         self.layer1  = self._make_layer(out_planes[1], num_blocks[0], self.groups)
101 |         self.layer2  = self._make_layer(out_planes[2], num_blocks[1], self.groups)
102 |         self.layer3  = self._make_layer(out_planes[3], num_blocks[2], self.groups)
103 |         self.avgpool = nn.AvgPool3d((2, 1, 1), stride=1)
104 | 
105 |     def _make_layer(self, out_planes, num_blocks, groups):
106 |         layers = []
107 |         for i in range(num_blocks):
108 |             stride = 2 if i == 0 else 1
109 |             layers.append(Bottleneck(self.in_planes, out_planes, stride=stride, groups=groups))
110 |             self.in_planes = out_planes
111 |         return nn.Sequential(*layers)
112 | 
113 |     def forward(self, x):
114 |         out = self.conv1(x)
115 |         out = self.maxpool(out)
116 |         out = self.layer1(out)
117 |         out = self.layer2(out)
118 |         out = self.layer3(out)
119 | 
120 |         if out.size(2) == 2:
121 |             out = self.avgpool(out)
122 | 
123 |         return out
124 | 
125 | def get_fine_tuning_parameters(model, ft_portion):
126 |     if ft_portion == "complete":
127 |         return model.parameters()
128 | 
129 |     elif ft_portion == "last_layer":
130 |         ft_module_names = []
131 |         ft_module_names.append('classifier')
132 | 
133 |         parameters = []
134 |         for k, v in model.named_parameters():
135 |             for ft_module in ft_module_names:
136 |                 if ft_module in k:
137 |                     parameters.append({'params': v})
138 |                     break
139 |             else:
140 |                 parameters.append({'params': v, 'lr': 0.0})
141 |         return parameters
142 | 
143 |     else:
144 |         raise ValueError("Unsupported ft_portion: 'complete' or 'last_layer' expected")
145 | 
146 | 
147 | def get_model(**kwargs):
148 |     """
149 |     Returns the model.
150 |     """
151 |     model = ShuffleNet(**kwargs)
152 |     return model
153 | 
154 | 
155 | if __name__ == "__main__":
156 |     model = get_model(groups=3, num_classes=600, width_mult=1)
157 |     model = model.cuda()
158 |     model = nn.DataParallel(model, device_ids=None)
159 |     print(model)
160 | 
161 |     input_var = Variable(torch.randn(8, 3, 16, 112, 112))
162 |     output = model(input_var)
163 |     print(output.shape)
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ava.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   RESUME_PATH: "/usr/home/sut/YOWO/backup/ava/yowo_ava_32f_s1_best_ap_01905.pth"
 3 |   DATASET: ava  # `ava`, `ucf24` or `jhmdb21`
 4 |   BATCH_SIZE: 6
 5 |   TOTAL_BATCH_SIZE: 128
 6 |   LEARNING_RATE: 1e-4
 7 |   EVALUATE: True
 8 |   MODE: "val"  # `train`, `test` or `val`
 9 |   BEGIN_EPOCH: 1
10 |   END_EPOCH: 10
11 |   CLASS_RATIO_FILE: "cfg/ava_categories_ratio.json"
12 |   USE_GROUNDTRUTH: False
13 |   USE_SLOWFAST: False
14 | DATA:
15 |   NUM_FRAMES: 32
16 |   SAMPLING_RATE: 1
17 |   TRAIN_JITTER_SCALES: [256, 320]
18 |   TRAIN_CROP_SIZE: 224
19 |   TEST_CROP_SIZE: 224
20 | SOLVER:
21 |   STEPS: [3, 4, 5, 6]
22 |   LR_DECAY_RATE: 0.5
23 |   ANCHORS: [0.71626, 2.13583, 1.28967, 4.15014, 2.12714, 5.09344, 3.27212, 5.87423, 5.16303, 6.33821]
24 | AVA:
25 |   BGR: False
26 |   DETECTION_SCORE_THRESH: 0.8
27 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
28 | MODEL:
29 |   NUM_CLASSES: 80
30 |   BACKBONE_3D: resnext101
31 |   BACKBONE_2D: darknet
32 | WEIGHTS:
33 |   BACKBONE_3D: "weights/resnext-101-kinetics.pth"
34 |   BACKBONE_2D: "weights/yolo.weights"
35 |   FREEZE_BACKBONE_3D: False
36 |   FREEZE_BACKBONE_2D: False
37 | DATA_LOADER:
38 |   NUM_WORKERS: 8
39 |   PIN_MEMORY: True
40 | BACKUP_DIR: "backup/ava"
41 | DEMO:
42 |   ENABLE: False
43 |   OUT_PATH: "ava_detections/videos"
44 |   LABEL_FILE_PATH: "/usr/home/sut/dataset_factory/AVA/annotations/ava_classnames.json"
45 | RNG_SEED: 1
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ava_categories_count.json:
--------------------------------------------------------------------------------
1 | {"watch (a person)": 214424, "stand": 210079, "talk to (e.g., self, a person, a group)": 140020, "listen to (a person)": 136621, "sit": 129830, "carry/hold (an object)": 103526, "walk": 53041, "touch (an object)": 22537, "bend/bow (at the waist)": 10810, "lie/sleep": 7252, "ride (e.g., a bike, a car, a horse)": 6794, "dance": 4752, "answer phone": 4392, "run/jog": 4340, "eat": 4100, "smoke": 3698, "crouch/kneel": 3289, "fight/hit (a person)": 3173, "drink": 2856, "read": 2854, "grab (a person)": 2757, "martial art": 2590, "sing to (e.g., self, a person, a group)": 2232, "watch (e.g., TV)": 2190, "play musical instrument": 2079, "drive (e.g., a car, a truck)": 1749, "open (e.g., a window, a car door)": 1574, "hand clap": 1550, "hug (a person)": 1547, "get up": 1451, "give/serve (an object) to (a person)": 1415, "listen (e.g., to music)": 1072, "write": 1051, "close (e.g., a door, a box)": 998, "kiss (a person)": 918, "take (an object) from (a person)": 802, "sail boat": 797, "hand shake": 736, "put down": 659, "lift/pick up": 658, "text on/look at a cellphone": 542, "lift (a person)": 511, "pull (an object)": 479, "push (an object)": 479, "hand wave": 459, "dress/put on clothing": 454, "push (another person)": 441, "fall down": 382, "throw": 348, "climb (e.g., a mountain)": 344, "jump/leap": 318, "work on a computer": 287, "enter": 282, "shoot": 269, "hit (an object)": 230, "cut": 226, "take a photo": 217, "turn (e.g., a screwdriver)": 172, "swim": 146, "point to (an object)": 131}


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ava_categories_ratio.json:
--------------------------------------------------------------------------------
1 | {"80": 0.5059319458527529, "12": 0.5005430972490094, "79": 0.3317767554377424, "74": 0.32139321745252786, "11": 0.3018567607333167, "17": 0.24206491290886498, "14": 0.12267378359756043, "59": 0.05155061034502472, "1": 0.02512088050957867, "8": 0.016115395377806128, "49": 0.014466546112115732, "10": 0.010040529196366514, "15": 0.00986601595291753, "4": 0.00982990976461774, "29": 0.009101768300571982, "54": 0.008999467433722578, "64": 0.008108848122327766, "27": 0.007025662473334075, "3": 0.006983538586984321, "48": 0.006456990007612388, "9": 0.006369733385887896, "66": 0.006026724597039895, "61": 0.0059966361067900695, "77": 0.004943538948046204, "41": 0.003902477185402268, "38": 0.0037640701302530744, "28": 0.0035745126416791783, "67": 0.003571503792654196, "6": 0.0033819463040803003, "70": 0.0033187604745556682, "65": 0.003228495003806194, "22": 0.0023649553336362243, "63": 0.002337875692411382, "51": 0.0021633624489623983, "72": 0.002148318203837486, "37": 0.0020099111486882922, "68": 0.001862477546464151, "78": 0.0018293802071893438, "47": 0.0016037165303156584, "36": 0.0015615926439659037, "57": 0.0012486723453677265, "73": 0.0012035396099929893, "46": 0.0011644245726682172, "76": 0.0010681414038687781, "69": 0.001056106007768848, "45": 0.001035044064593971, "26": 0.001029026366544006, "5": 0.0008755750662698998, "20": 0.0008063715386953028, "58": 0.0007492034072206358, "7": 0.000737168011120706, "30": 0.0006769910306210565, "52": 0.0006679644835461091, "24": 0.0005536282205967751, "56": 0.0005446016735218277, "34": 0.0005325662774218978, "62": 0.0005295574283969153, "2": 0.0004723892969222483, "42": 0.0004152211654475813, "60": 0.00041221231642259886, "75": 0.00038212382617277413, "40": 0.00035504418494793186, "13": 0.00033398224177305457, "43": 0.00029185835542329993, "44": 0.0002828318083483525, "18": 0.00024070792199859788, "33": 0.0002376990729736154, "39": 0.0002015928846738257, "53": 0.0002015928846738257, "23": 0.00019557518662386078, "50": 0.00018654863954891334, "55": 0.00018053094149894842, "21": 0.0001594689983240711, "31": 0.0001594689983240711, "71": 0.00015044245124912366, "25": 0.00012336281002428142, "19": 0.00010831856489936904, "35": 0.00010530971587438658, "16": 6.017698049964947e-05, "32": 2.1061943174877314e-05}


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/custom_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Add custom configs and default values"""
 5 | 
 6 | 
 7 | def add_custom_config(_C):
 8 |     # Add your own customized configs.
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/dota_config.yaml:
--------------------------------------------------------------------------------
 1 | # Dataset
 2 | root: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset'
 3 | data_root: "/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/DoTA_fol_train_data"
 4 | val_data_root: "/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/DoTA_fol_val_data"# DoTA_fol_val_data
 5 | label_file: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/metadata_val.json'
 6 | train_split: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/train_split.txt'
 7 | val_split: '/scratch/ssd002/datasets/cv_project/Detection-of-Traffic-Anomaly/dataset/val_split.txt'
 8 | 
 9 | track_dir: "./sort_output/" 
10 | flow_dir: "./flownet2/" 
11 | ego_motion_dir: ""
12 | img_dir: "/scratch/ssd002/dataset_factory/cv_project/Detection-of-Traffic-Anomaly/dataset/frames"
13 | 
14 | # dataset arguments
15 | seed_max: 5
16 | segment_len: 10 #16
17 | 
18 | device: 'cuda'
19 | # fol model parameters
20 | pred_timesteps: 5
21 | with_ego: False 
22 | pred_dim: 4
23 | 
24 | # dataloader parameters
25 | shuffle: True
26 | num_workers: 32
27 | GPU: 0
28 | batch_size: 16
29 | 
30 | # image parameters
31 | H: 720
32 | W: 1280
33 | channels: 3
34 | 
35 | flow_roi_size: [5,5,2]
36 | max_age: 10
37 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/dota_train.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 | #  RESUME_PATH: "backup/dota/matt_run2_SlowerLrDecay/yowo_dota_16f_best.pth"
 3 |   RESUME_PATH: ""
 4 |   DATASET: dota  # `ava`, `ucf24` or `jhmdb21`
 5 |   BATCH_SIZE: 48
 6 |   TOTAL_BATCH_SIZE: 128
 7 |   LEARNING_RATE: 1e-3
 8 |   EVALUATE: False
 9 |   FINE_TUNE: False
10 |   BEGIN_EPOCH: 1
11 |   END_EPOCH: 300
12 | SOLVER:
13 |   MOMENTUM: 0.9
14 |   WEIGHT_DECAY: 1e-4
15 | #  STEPS: [5, 10, 20, 40,]
16 |   STEPS: [20, 40, 60, 80, 120, 160, 200, 280]
17 |   LR_DECAY_RATE: 0.5
18 | ######################################## ANCHOR BOXES
19 |   # 5x DOTA ANCHORS 224x224
20 | #  ANCHORS: [0.46875,0.78125,0.9375,1.90625,1.25,2.4375,1.625,4.09375,2.59375,4.15625]
21 | #  NUM_ANCHORS: 5
22 | # 9x DOTA ANCHORS 224x224
23 |   ANCHORS: [0.34375,0.625,0.6875,1.09375,0.71875,1.78125,1.0625,1.96875,1.3125,2.625,1.34375,2.8125,1.6875,2.8125,2.53125,4.625,2.5625,4.6875]
24 |   NUM_ANCHORS: 9
25 | #  # 5x DOTA ANCHORS 1280x720
26 | #  ANCHORS: [2.34375,2.6875, 5.09375, 5.84375, 7.90625, 7.375, 8.6875, 11.3125, 14.5, 13.9375]
27 | #  NUM_ANCHORS: 5
28 | #  # 9x DOTA ANCHORS 1280x720
29 | #  ANCHORS: [1.71875, 2.0625, 3.59375, 3.25, 3.90625, 5.90625,5.96875,6.84375,6.28125,7.81258.78125,8.03125,10.1875,12.21875,12.46875,13.09375,16.25,15.21875]
30 | #  NUM_ANCHORS: 5
31 | ########################################
32 |   OBJECT_SCALE: 5
33 |   NOOBJECT_SCALE: 1
34 |   CLASS_SCALE: 1
35 |   COORD_SCALE: 1
36 |   NO_ANOMALY: True
37 |   COMBINED_BOX: False
38 |   CE_LOSS_WEIGHT: 1.0
39 | DATA:
40 |   NUM_FRAMES: 16
41 |   SAMPLING_RATE: 1
42 |   TRAIN_JITTER_SCALES: [256, 320]
43 |   TRAIN_CROP_SIZE: 224
44 |   TEST_CROP_SIZE: 224
45 |   MEAN: [0.4345, 0.4051, 0.3775]
46 |   STD: [0.2768, 0.2713, 0.2737]
47 | MODEL:
48 |   NUM_CLASSES: 11
49 |   BACKBONE_3D: shufflenetv2_2x
50 |   BACKBONE_2D: darknet
51 | WEIGHTS:
52 |   BACKBONE_3D: "weights/kinetics_shufflenetv2_2.0x_RGB_16_best.pth"
53 |   BACKBONE_2D: "weights/yolo.weights"
54 |   FREEZE_BACKBONE_3D: False
55 |   FREEZE_BACKBONE_2D: False
56 | LISTDATA:
57 |   BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24"
58 |   TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt"
59 |   TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt"
60 |   TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt"
61 |   MAX_OBJS: 7
62 |   CLASS_NAMES: [
63 |     "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 
64 |     "Diving", "Fencing", "FloorGymnastics", "GolfSwing"
65 |   ]
66 | RNG_SEED: 1
67 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/jhmdb.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   RESUME_PATH: ""
 3 |   DATASET: jhmdb21  # `ava`, `ucf24` or `jhmdb21`
 4 |   BATCH_SIZE: 18
 5 |   TOTAL_BATCH_SIZE: 128
 6 |   LEARNING_RATE: 1e-4
 7 |   EVALUATE: False
 8 |   FINE_TUNE: False
 9 |   BEGIN_EPOCH: 1
10 |   END_EPOCH: 10
11 | SOLVER:
12 |   MOMENTUM: 0.9
13 |   WEIGHT_DECAY: 5e-4
14 |   STEPS: [3, 4, 5, 6]
15 |   LR_DECAY_RATE: 0.5
16 |   ANCHORS: [0.95878, 3.10197, 1.67204, 4.0040, 1.75482, 5.64937, 3.09299, 5.80857, 4.91803, 6.25225]
17 |   NUM_ANCHORS: 5
18 |   OBJECT_SCALE: 5
19 |   NOOBJECT_SCALE: 1
20 |   CLASS_SCALE: 1
21 |   COORD_SCALE: 1
22 | DATA:
23 |   NUM_FRAMES: 32
24 |   SAMPLING_RATE: 1
25 |   TRAIN_JITTER_SCALES: [256, 320]
26 |   TRAIN_CROP_SIZE: 224
27 |   TEST_CROP_SIZE: 224
28 |   MEAN: [0.4345, 0.4051, 0.3775]
29 |   STD: [0.2768, 0.2713, 0.2737]
30 | MODEL:
31 |   NUM_CLASSES: 21
32 |   BACKBONE_3D: resnext101
33 |   BACKBONE_2D: darknet
34 | WEIGHTS:
35 |   BACKBONE_3D: "weights/resnext-101-kinetics-hmdb51_split1.pth"
36 |   BACKBONE_2D: "weights/yolo.weights"
37 |   FREEZE_BACKBONE_3D: True
38 |   FREEZE_BACKBONE_2D: True
39 | LISTDATA:
40 |   BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb"
41 |   TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb/trainlist.txt"
42 |   TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_jhmdb/testlist.txt"
43 |   TEST_VIDEO_FILE: "/data1/jhmdb/testlist_video.txt"
44 |   MAX_OBJS: 1
45 |   CLASS_NAMES: [
46 |     "brush_hair", "catch", "clap", "climb_stairs", "golf", 
47 |     "jump", "kick_ball", "pick", "pour", "pullup", "push",
48 |     "run", "shoot_ball", "shoot_bow", "shoot_gun", "sit",
49 |     "stand", "swing_baseball", "throw", "walk", "wave"
50 |   ]
51 | BACKUP_DIR: "backup/jhmdb"
52 | RNG_SEED: 1
53 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Argument parser functions."""
 5 | 
 6 | import argparse
 7 | import sys
 8 | 
 9 | from cfg.defaults import get_cfg
10 | 
11 | 
12 | def parse_args():
13 |     """
14 |     Args:
15 |         shard_id (int): shard id for the current machine. Starts from 0 to
16 |             num_shards - 1. If single machine is used, then set shard id to 0.
17 |         num_shards (int): number of shards using by the job.
18 |         init_method (str): initialization method to launch the job with multiple
19 |             devices. Options includes TCP or shared file-system for
20 |             initialization. details can be find in
21 |             https://pytorch.org/docs/stable/distributed.html#tcp-initialization
22 |         cfg (str): path to the config file.
23 |         opts (argument): provide addtional options from the command line, it
24 |             overwrites the config loaded from file.
25 |         """
26 |     parser = argparse.ArgumentParser(
27 |         description="Provide YOWO video training and testing pipeline."
28 |     )
29 |     parser.add_argument( # added for now
30 |         '--dataset',
31 |         default='ucf101-24', 
32 |         type=str, 
33 |         help='Select dataset from (ucf101-24, jhmdb-21, ava)'
34 |     )
35 |     parser.add_argument(
36 |         "--cfg",
37 |         dest="cfg_file",
38 |         help="Path to the config file",
39 |         default="cfg/dota_train.yaml",
40 |         type=str,
41 |     )
42 |     parser.add_argument(
43 |         "opts",
44 |         help="See slowfast/config/defaults.py for all options",
45 |         default=None,
46 |         nargs=argparse.REMAINDER,
47 |     )
48 |     parser.add_argument(
49 |         "--save_dir",
50 |         help="Save directory customized with slurm ID",
51 |         default="backup",
52 |         type=str,
53 |     )
54 | 
55 |     if len(sys.argv) == 1:
56 |         parser.print_help()
57 |     return parser.parse_args()
58 | 
59 | 
60 | def load_config(args):
61 |     """
62 |     Given the arguemnts, load and initialize the configs.
63 |     Args:
64 |         args (argument): arguments includes `shard_id`, `num_shards`,
65 |             `init_method`, `cfg_file`, and `opts`.
66 |     """
67 |     # Setup cfg.
68 |     cfg = get_cfg()
69 |     # Load config from cfg.
70 |     if args.cfg_file is not None:
71 |         cfg.merge_from_file(args.cfg_file)
72 |     # Load config from command line, overwrite config from opts.
73 |     if args.opts is not None:
74 |         cfg.merge_from_list(args.opts)
75 | 
76 |     # Inherit parameters from args.
77 | 
78 |     # Create the checkpoint dir.
79 |     return cfg
80 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ucf24.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   RESUME_PATH: ""
 3 |   DATASET: ucf24  # `ava`, `ucf24` or `jhmdb21`
 4 |   BATCH_SIZE: 12
 5 |   TOTAL_BATCH_SIZE: 128
 6 |   LEARNING_RATE: 1e-4
 7 |   EVALUATE: False
 8 |   FINE_TUNE: False
 9 |   BEGIN_EPOCH: 1
10 |   END_EPOCH: 5
11 | SOLVER:
12 |   MOMENTUM: 0.9
13 |   WEIGHT_DECAY: 5e-4
14 |   STEPS: [2, 3, 4, 5]
15 |   LR_DECAY_RATE: 0.5
16 |   ANCHORS: [0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979]
17 |   NUM_ANCHORS: 5
18 |   OBJECT_SCALE: 5
19 |   NOOBJECT_SCALE: 1
20 |   CLASS_SCALE: 1
21 |   COORD_SCALE: 1
22 | DATA:
23 |   NUM_FRAMES: 16
24 |   SAMPLING_RATE: 1
25 |   TRAIN_JITTER_SCALES: [256, 320]
26 |   TRAIN_CROP_SIZE: 224
27 |   TEST_CROP_SIZE: 224
28 |   MEAN: [0.4345, 0.4051, 0.3775]
29 |   STD: [0.2768, 0.2713, 0.2737]
30 | MODEL:
31 |   NUM_CLASSES: 24
32 |   BACKBONE_3D: resnext101
33 |   BACKBONE_2D: darknet
34 | WEIGHTS:
35 |   BACKBONE_3D: "weights/resnext-101-kinetics.pth"
36 |   BACKBONE_2D: "weights/yolo.weights"
37 |   FREEZE_BACKBONE_3D: True
38 |   FREEZE_BACKBONE_2D: True
39 | LISTDATA:
40 |   BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24"
41 |   TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt"
42 |   TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt"
43 |   TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt"
44 |   MAX_OBJS: 6
45 |   CLASS_NAMES: [
46 |     "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 
47 |     "Diving", "Fencing", "FloorGymnastics", "GolfSwing", "HorseRiding",
48 |     "IceDancing", "LongJump", "PoleVault", "RopeClimbing", "SalsaSpin",
49 |     "SkateBoarding", "Skiing", "Skijet", "SoccerJuggling", "Surfing",
50 |     "TennisSwing", "TrampolineJumping", "VolleyballSpiking", "WalkingWithDog"
51 |   ]
52 | BACKUP_DIR: "backup/ucf24"
53 | RNG_SEED: 1
54 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ucf24_charmed-leaf-23_copy.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   RESUME_PATH: ""
 3 |   DATASET: ucf24  # `ava`, `ucf24` or `jhmdb21`
 4 |   BATCH_SIZE: 18
 5 |   TOTAL_BATCH_SIZE: 128
 6 |   LEARNING_RATE: 1e-4
 7 |   EVALUATE: False
 8 |   FINE_TUNE: False
 9 |   BEGIN_EPOCH: 1
10 |   END_EPOCH: 5
11 | SOLVER:
12 |   MOMENTUM: 0.9
13 |   WEIGHT_DECAY: 5e-4
14 |   STEPS: [2, 3, 4, 5]
15 |   LR_DECAY_RATE: 0.5
16 |   ANCHORS: [0.70458, 1.18803, 1.26654, 2.55121, 1.59382, 4.08321, 2.30548, 4.94180, 3.52332, 5.91979]
17 |   NUM_ANCHORS: 5
18 |   OBJECT_SCALE: 5
19 |   NOOBJECT_SCALE: 1
20 |   CLASS_SCALE: 1
21 |   COORD_SCALE: 1
22 | DATA:
23 |   NUM_FRAMES: 16
24 |   SAMPLING_RATE: 1
25 |   TRAIN_JITTER_SCALES: [256, 320]
26 |   TRAIN_CROP_SIZE: 224
27 |   TEST_CROP_SIZE: 224
28 |   MEAN: [0.4345, 0.4051, 0.3775]
29 |   STD: [0.2768, 0.2713, 0.2737]
30 | MODEL:
31 |   NUM_CLASSES: 24
32 |   BACKBONE_3D: resnext101
33 |   BACKBONE_2D: darknet
34 | WEIGHTS:
35 |   BACKBONE_3D: "weights/resnext-101-kinetics.pth"
36 |   BACKBONE_2D: "weights/yolo.weights"
37 |   FREEZE_BACKBONE_3D: False
38 |   FREEZE_BACKBONE_2D: False
39 | LISTDATA:
40 |   BASE_PTH: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24"
41 |   TRAIN_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/trainlist.txt"
42 |   TEST_FILE: "/scratch/ssd002/dataset_factory/cv_project/yowo_ucf24/testlist.txt"
43 |   TEST_VIDEO_FILE: "/usr/home/sut/dataset_factory/ucf24/testlist_video.txt"
44 |   MAX_OBJS: 6
45 |   CLASS_NAMES: [
46 |     "Basketball", "BasketballDunk", "Biking", "CliffDiving", "CricketBowling", 
47 |     "Diving", "Fencing", "FloorGymnastics", "GolfSwing", "HorseRiding",
48 |     "IceDancing", "LongJump", "PoleVault", "RopeClimbing", "SalsaSpin",
49 |     "SkateBoarding", "Skiing", "Skijet", "SoccerJuggling", "Surfing",
50 |     "TennisSwing", "TrampolineJumping", "VolleyballSpiking", "WalkingWithDog"
51 |   ]
52 | BACKUP_DIR: "backup/ucf24"
53 | RNG_SEED: 1
54 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/ucf24_finalAnnots.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Traffic_Incident_Detection/cfg/ucf24_finalAnnots.mat


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/cfg/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=224#1280#224
  9 | height=224#720#224
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/core/FocalLoss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # --------------------------------------------------------
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Chao CHEN (chaochancs@gmail.com)
  6 | # Created On: 2017-08-11
  7 | # --------------------------------------------------------
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from torch.autograd import Variable
 12 | 
 13 | class FocalLoss(nn.Module):
 14 |     r"""
 15 |         This criterion is a implemenation of Focal Loss, which is proposed in 
 16 |         Focal Loss for Dense Object Detection.
 17 |             
 18 |             Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
 19 |     
 20 |         The losses are averaged across observations for each minibatch.
 21 | 
 22 |         Args:
 23 |             alpha(1D Tensor, Variable) : the scalar factor for this criterion
 24 |             gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5), 
 25 |                                    putting more focus on hard, misclassiﬁed examples
 26 |             size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch.
 27 |                                 However, if the field size_average is set to False, the losses are
 28 |                                 instead summed for each minibatch.
 29 | 
 30 |     """
 31 |     def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
 32 |         super(FocalLoss, self).__init__()
 33 |         if alpha is None:
 34 |             self.alpha = Variable(torch.ones(class_num, 1))
 35 |         else:
 36 |             if isinstance(alpha, Variable):
 37 |                 self.alpha = alpha
 38 |             else:
 39 |                 self.alpha = Variable(alpha)
 40 |         self.gamma = gamma
 41 |         self.class_num = class_num
 42 |         self.size_average = size_average
 43 | 
 44 |     def forward(self, inputs, targets):
 45 |         N = inputs.size(0)
 46 |         #print(N)
 47 |         C = inputs.size(1)
 48 |         P = F.softmax(inputs, dim=1)
 49 | 
 50 |         class_mask = inputs.data.new(N, C).fill_(0)
 51 |         class_mask = Variable(class_mask)
 52 |         ids = targets.view(-1, 1)
 53 |         class_mask.scatter_(1, ids, 1.)
 54 | 
 55 | 
 56 |         if inputs.is_cuda and not self.alpha.is_cuda:
 57 |             self.alpha = self.alpha.cuda()
 58 |         alpha = self.alpha[ids.data.view(-1)]
 59 |         
 60 |         probs = (P*class_mask).sum(1).view(-1,1)
 61 | 
 62 |         log_p = probs.log()
 63 | 
 64 |         batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
 65 | 
 66 |         if self.size_average:
 67 |             loss = batch_loss.mean()
 68 |         else:
 69 |             loss = batch_loss.sum()
 70 |         return loss
 71 | 
 72 |         
 73 | 
 74 | if __name__ == "__main__":
 75 |     alpha = torch.rand(21, 1)
 76 |     print(alpha)
 77 |     FL = FocalLoss(class_num=5, gamma=0 )
 78 |     CE = nn.CrossEntropyLoss()
 79 |     N = 4
 80 |     C = 5
 81 |     inputs = torch.rand(N, C)
 82 |     targets = torch.LongTensor(N).random_(C)
 83 |     inputs_fl = Variable(inputs.clone(), requires_grad=True)
 84 |     targets_fl = Variable(targets.clone())
 85 | 
 86 |     inputs_ce = Variable(inputs.clone(), requires_grad=True)
 87 |     targets_ce = Variable(targets.clone())
 88 |     print('----inputs----')
 89 |     print(inputs)
 90 |     print('---target-----')
 91 |     print(targets)
 92 | 
 93 |     fl_loss = FL(inputs_fl, targets_fl)
 94 |     ce_loss = CE(inputs_ce, targets_ce)
 95 |     print('ce = {}, fl ={}'.format(ce_loss.data[0], fl_loss.data[0]))
 96 |     fl_loss.backward()
 97 |     ce_loss.backward()
 98 |     #print(inputs_fl.grad.data)
 99 |     print(inputs_ce.grad.data)
100 | 
101 |         
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/core/cfam.py:
--------------------------------------------------------------------------------
  1 | """
  2 | We thank CASIA IVA for sharing his code 'https://github.com/junfu1115/DANet'
  3 | that we have build our code on top.
  4 | """
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class CAM_Module(nn.Module):
 13 |     """ Channel attention module """
 14 |     def __init__(self, in_dim):
 15 |         super(CAM_Module, self).__init__()
 16 |         self.chanel_in = in_dim
 17 | 
 18 | 
 19 |         self.gamma = nn.Parameter(torch.zeros(1))
 20 |         self.softmax  = nn.Softmax(dim=-1)
 21 |     def forward(self,x):
 22 |         """
 23 |             inputs :
 24 |                 x : input feature maps( B X C X H X W )
 25 |             returns :
 26 |                 out : attention value + input feature
 27 |                 attention: B X C X C
 28 |         """
 29 |         m_batchsize, C, height, width = x.size()
 30 |         proj_query = x.view(m_batchsize, C, -1)
 31 |         proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1)
 32 |         energy = torch.bmm(proj_query, proj_key)
 33 |         energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy
 34 |         attention = self.softmax(energy_new)
 35 |         proj_value = x.view(m_batchsize, C, -1)
 36 | 
 37 |         out = torch.bmm(attention, proj_value)
 38 |         out = out.view(m_batchsize, C, height, width)
 39 | 
 40 |         out = self.gamma*out + x
 41 |         return out
 42 | 
 43 | 
 44 | class CFAMBlock(nn.Module):
 45 |     def __init__(self, in_channels, out_channels):
 46 |         super(CFAMBlock, self).__init__()
 47 |         inter_channels = 1024
 48 |         self.conv_bn_relu1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, kernel_size=1, bias=False),
 49 |                                     nn.BatchNorm2d(inter_channels),
 50 |                                     nn.ReLU())
 51 |         
 52 |         self.conv_bn_relu2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False),
 53 |                                     nn.BatchNorm2d(inter_channels),
 54 |                                     nn.ReLU())
 55 | 
 56 |         self.sc = CAM_Module(inter_channels)
 57 | 
 58 |         self.conv_bn_relu3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False),
 59 |                                    nn.BatchNorm2d(inter_channels),
 60 |                                    nn.ReLU())
 61 | 
 62 |         self.conv_out = nn.Sequential(nn.Dropout2d(0.1, False), nn.Conv2d(inter_channels, out_channels, 1))
 63 | 
 64 |     def forward(self, x):
 65 | 
 66 |         x = self.conv_bn_relu1(x)
 67 |         x = self.conv_bn_relu2(x)
 68 |         x = self.sc(x)
 69 |         x = self.conv_bn_relu3(x)
 70 |         output = self.conv_out(x)
 71 | 
 72 |         return output
 73 | 
 74 | 
 75 | if __name__ == "__main__":
 76 |     data = torch.randn(18, 2473, 7, 7).cuda()
 77 |     in_channels = data.size()[1]
 78 |     out_channels = 145 
 79 |     model = CFAMBlock(in_channels, out_channels).cuda()
 80 |     print(model)
 81 |     output = model(data)
 82 |     print(output.size())
 83 |     
 84 |     
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/core/detection_visualization.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import matplotlib.patches as patches
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | import argparse
  8 | import yaml
  9 | from argparse import Namespace
 10 | from dataset_factory.dota import DoTA
 11 | 
 12 | def get_detections_data(vid_id, epoch=200):
 13 |     detection_path = f'{detection_save_path}/detections_{epoch}/{vid_id}'
 14 |     f = open(detection_path, 'r')
 15 |     t = f.read()
 16 |     detections = pd.DataFrame([s.split(' ')
 17 |                                for s in t.split('\n') if s != ''],
 18 |                               columns=['label', 'conf', 'x1', 'y1', 'x2', 'y2'])
 19 |     return detections.sort_values('conf', ascending=False)
 20 | 
 21 | 
 22 | def create_detection_rectangles(detections, top_k=1):
 23 |     rects = []
 24 |     labels = []
 25 |     confs = []
 26 |     xs = []
 27 |     ys = []
 28 | 
 29 |     for i in range(0, top_k):
 30 |         if i >= len(detections):
 31 |             break
 32 | 
 33 |         detection = detections.iloc[i, :]
 34 |         x1 = int(detection['x1'])
 35 |         y1 = int(detection['y1'])
 36 |         x2 = int(detection['x2'])
 37 |         y2 = int(detection['y2'])
 38 |         conf = round(float(detection['conf']), 3)
 39 |         lbl = detection['label']
 40 | 
 41 |         confs.append(conf)
 42 |         labels.append(lbl)
 43 |         xs.append(x1)
 44 |         ys.append(y1)
 45 | 
 46 |         if i == 0:
 47 |             rects.append(patches.Rectangle((x1, y1), (x2 - x1), (y2 - y1),
 48 |                                            linewidth=2, edgecolor='b', facecolor='none', linestyle='dashed'))
 49 |         else:
 50 |             rects.append(patches.Rectangle((x1, y1), (x2 - x1), (y2 - y1),
 51 |                                            linewidth=1, edgecolor='r', facecolor='none', linestyle='dashed'))
 52 |     return rects, labels, confs, xs, ys
 53 | 
 54 | 
 55 | def visualize_detections(data_loader, epoch, n=100, top_k=4):
 56 |     for i in range(0, n):
 57 |         test_sample = data_loader[i]
 58 |         vid_id = test_sample[0]
 59 |         video = test_sample[1]
 60 |         gt = test_sample[2][0:5]
 61 |         label = gt[0]
 62 |         bbox = gt[1:5]
 63 |         last_frame = video[:, -1, :, :].permute(1, 2, 0)
 64 | 
 65 |         factor = 224
 66 |         cx, cy, iw, ih = bbox
 67 |         lx = int(factor * (cx - (iw / 2)))
 68 |         ly = int(factor * (cy - (ih / 2)))
 69 |         w = int(factor * iw)
 70 |         h = int(factor * ih)
 71 | 
 72 |         detections = get_detections_data(vid_id, epoch)
 73 |         pred_rects, labels, confs, xs, ys = create_detection_rectangles(detections, top_k)
 74 | 
 75 |         print("True", label, "    | Prediction", labels[0], "(", confs[0], ")", "     ", vid_id)
 76 |         fig, ax = plt.subplots()
 77 |         ax.imshow(last_frame)
 78 |         true_rect = patches.Rectangle((lx, ly), w, h, linewidth=2, edgecolor='g', facecolor='none', linestyle='dashed')
 79 |         ax.add_patch(true_rect)
 80 |         plt.text(lx, ly, f"GT-{label+1}", fontsize=12, color='g')
 81 | 
 82 |         for j, pred_rect in enumerate(pred_rects):
 83 |             if confs[j] > 0.25:
 84 |                 ax.add_patch(pred_rect)
 85 | 
 86 |                 if j == 0:
 87 |                     plt.text(xs[j], ys[j], f"Pred-{labels[j]}-{confs[j]}", fontsize=10, color='b')
 88 |                 else:
 89 |                     plt.text(xs[j], ys[j], f"Pred-{labels[j]}-{confs[j]}", fontsize=8, color='r')
 90 | 
 91 |         if not os.path.exists("figures"):
 92 |             os.mkdir("figures")
 93 |         if not os.path.exists(f"figures/epoch_{epoch}"):
 94 |             os.mkdir(f"figures/epoch_{epoch}")
 95 |         plt.savefig(f"figures/epoch_{epoch}/{vid_id}")
 96 | 
 97 | config_file = 'cfg/dota_config.yaml'
 98 | with open(config_file, 'r') as f:
 99 |     dl_args = yaml.load(f)
100 | dl_args = Namespace(**dl_args)
101 | 
102 | if not os.path.exists(dl_args.root):
103 |     print('did not find data! -------------')
104 |     sys.exit()
105 | 
106 | detection_save_path = 'dota_detections/run1'
107 | 
108 | d = DoTA(dl_args, phase='test', n_frames=16, combined_bbox=True)
109 | 
110 | visualize_detections(d, epoch=200, n=100, top_k=4)


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/core/plot_ava_result.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import operator
 4 | import pdb
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | #import _init_paths
 9 | #from config.defaults import get_cfg
10 | #from utils.ava_eval_helper import read_labelmap
11 | #from dataset_factory.ava_dataset import Ava
12 | 
13 | 
14 | def main(json_file):
15 |     with open('categories_count.json', 'r') as fb:
16 |         categories_count = json.load(fb)
17 | 
18 |     with open(json_file, 'r') as fb:
19 |         detection_result = json.load(fb)
20 | 
21 |     prefix = 'PascalBoxes_PerformanceByCategory/AP@0.5IOU/'
22 |     categories = list(categories_count.keys())
23 |     mAP_list = []
24 |     for category in categories:
25 |         mAP = detection_result[prefix + category]
26 |         print(mAP)
27 |         mAP_list.append(mAP)
28 | 
29 |     # width = np.diff(mAP_list).min()
30 |     fig, ax = plt.subplots(figsize=(20, 8))
31 |     x = list(range(len(categories)))
32 |     ax.bar(x, mAP_list, align='center', width=0.8)
33 |     for i, y in enumerate(mAP_list):
34 |         ax.text(x[i] - 0.5, y + 0.01, '{:.2f}'.format(y), fontsize='x-small')
35 |     ax.set(xticks=list(range(0, len(categories))), xticklabels=categories)
36 |     plt.xticks(rotation='vertical')
37 |     plt.gcf().subplots_adjust(bottom=0.34)
38 |     #fig.autofmt_xdate()
39 |     plt.savefig('ava_output_histogram.eps', format='eps')
40 |     #plt.xticks(list(range(len(categories))), categories, rotation='vertical')
41 |     #plt.show()
42 |     
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     main('latest_detection.json')
47 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/README.md:
--------------------------------------------------------------------------------
1 | The code under this folder is from the official [ActivityNet repo](https://github.com/activitynet/ActivityNet).
2 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VectorInstitute/Computer_Vision_Project/337d2dd041b575a31304c2052370b816bf92b2be/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/__init__.py


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Numpy BoxList classes and functions."""
 17 | 
 18 | from __future__ import (
 19 |     absolute_import,
 20 |     division,
 21 |     print_function,
 22 |     unicode_literals,
 23 | )
 24 | import numpy as np
 25 | 
 26 | 
 27 | class BoxList(object):
 28 |     """Box collection.
 29 | 
 30 |   BoxList represents a list of bounding boxes as numpy array, where each
 31 |   bounding box is represented as a row of 4 numbers,
 32 |   [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
 33 |   given list correspond to a single image.
 34 | 
 35 |   Optionally, users can add additional related fields (such as
 36 |   objectness/classification scores).
 37 |   """
 38 | 
 39 |     def __init__(self, data):
 40 |         """Constructs box collection.
 41 | 
 42 |     Args:
 43 |       data: a numpy array of shape [N, 4] representing box coordinates
 44 | 
 45 |     Raises:
 46 |       ValueError: if bbox data is not a numpy array
 47 |       ValueError: if invalid dimensions for bbox data
 48 |     """
 49 |         if not isinstance(data, np.ndarray):
 50 |             raise ValueError("data must be a numpy array.")
 51 |         if len(data.shape) != 2 or data.shape[1] != 4:
 52 |             raise ValueError("Invalid dimensions for box data.")
 53 |         if data.dtype != np.float32 and data.dtype != np.float64:
 54 |             raise ValueError(
 55 |                 "Invalid data type for box data: float is required."
 56 |             )
 57 |         if not self._is_valid_boxes(data):
 58 |             raise ValueError(
 59 |                 "Invalid box data. data must be a numpy array of "
 60 |                 "N*[y_min, x_min, y_max, x_max]"
 61 |             )
 62 |         self.data = {"boxes": data}
 63 | 
 64 |     def num_boxes(self):
 65 |         """Return number of boxes held in collections."""
 66 |         return self.data["boxes"].shape[0]
 67 | 
 68 |     def get_extra_fields(self):
 69 |         """Return all non-box fields."""
 70 |         return [k for k in self.data.keys() if k != "boxes"]
 71 | 
 72 |     def has_field(self, field):
 73 |         return field in self.data
 74 | 
 75 |     def add_field(self, field, field_data):
 76 |         """Add data to a specified field.
 77 | 
 78 |     Args:
 79 |       field: a string parameter used to speficy a related field to be accessed.
 80 |       field_data: a numpy array of [N, ...] representing the data associated
 81 |           with the field.
 82 |     Raises:
 83 |       ValueError: if the field is already exist or the dimension of the field
 84 |           data does not matches the number of boxes.
 85 |     """
 86 |         if self.has_field(field):
 87 |             raise ValueError("Field " + field + "already exists")
 88 |         if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
 89 |             raise ValueError("Invalid dimensions for field data")
 90 |         self.data[field] = field_data
 91 | 
 92 |     def get(self):
 93 |         """Convenience function for accesssing box coordinates.
 94 | 
 95 |     Returns:
 96 |       a numpy array of shape [N, 4] representing box corners
 97 |     """
 98 |         return self.get_field("boxes")
 99 | 
100 |     def get_field(self, field):
101 |         """Accesses data associated with the specified field in the box collection.
102 | 
103 |     Args:
104 |       field: a string parameter used to speficy a related field to be accessed.
105 | 
106 |     Returns:
107 |       a numpy 1-d array representing data of an associated field
108 | 
109 |     Raises:
110 |       ValueError: if invalid field
111 |     """
112 |         if not self.has_field(field):
113 |             raise ValueError("field {} does not exist".format(field))
114 |         return self.data[field]
115 | 
116 |     def get_coordinates(self):
117 |         """Get corner coordinates of boxes.
118 | 
119 |     Returns:
120 |      a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
121 |     """
122 |         box_coordinates = self.get()
123 |         y_min = box_coordinates[:, 0]
124 |         x_min = box_coordinates[:, 1]
125 |         y_max = box_coordinates[:, 2]
126 |         x_max = box_coordinates[:, 3]
127 |         return [y_min, x_min, y_max, x_max]
128 | 
129 |     def _is_valid_boxes(self, data):
130 |         """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
131 | 
132 |     Args:
133 |       data: a numpy array of shape [N, 4] representing box coordinates
134 | 
135 |     Returns:
136 |       a boolean indicating whether all ymax of boxes are equal or greater than
137 |           ymin, and all xmax of boxes are equal or greater than xmin.
138 |     """
139 |         if data.shape[0] > 0:
140 |             for i in range(data.shape[0]):
141 |                 if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
142 |                     return False
143 |         return True
144 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_mask_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Numpy BoxMaskList classes and functions."""
17 | 
18 | from __future__ import (
19 |     absolute_import,
20 |     division,
21 |     print_function,
22 |     unicode_literals,
23 | )
24 | import numpy as np
25 | 
26 | from . import np_box_list
27 | 
28 | 
29 | class BoxMaskList(np_box_list.BoxList):
30 |     """Convenience wrapper for BoxList with masks.
31 | 
32 |   BoxMaskList extends the np_box_list.BoxList to contain masks as well.
33 |   In particular, its constructor receives both boxes and masks. Note that the
34 |   masks correspond to the full image.
35 |   """
36 | 
37 |     def __init__(self, box_data, mask_data):
38 |         """Constructs box collection.
39 | 
40 |     Args:
41 |       box_data: a numpy array of shape [N, 4] representing box coordinates
42 |       mask_data: a numpy array of shape [N, height, width] representing masks
43 |         with values are in {0,1}. The masks correspond to the full
44 |         image. The height and the width will be equal to image height and width.
45 | 
46 |     Raises:
47 |       ValueError: if bbox data is not a numpy array
48 |       ValueError: if invalid dimensions for bbox data
49 |       ValueError: if mask data is not a numpy array
50 |       ValueError: if invalid dimension for mask data
51 |     """
52 |         super(BoxMaskList, self).__init__(box_data)
53 |         if not isinstance(mask_data, np.ndarray):
54 |             raise ValueError("Mask data must be a numpy array.")
55 |         if len(mask_data.shape) != 3:
56 |             raise ValueError("Invalid dimensions for mask data.")
57 |         if mask_data.dtype != np.uint8:
58 |             raise ValueError(
59 |                 "Invalid data type for mask data: uint8 is required."
60 |             )
61 |         if mask_data.shape[0] != box_data.shape[0]:
62 |             raise ValueError(
63 |                 "There should be the same number of boxes and masks."
64 |             )
65 |         self.data["masks"] = mask_data
66 | 
67 |     def get_masks(self):
68 |         """Convenience function for accessing masks.
69 | 
70 |     Returns:
71 |       a numpy array of shape [N, height, width] representing masks
72 |     """
73 |         return self.get_field("masks")
74 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, 4] numpy arrays representing bounding boxes.
 17 | 
 18 | Example box operations that are supported:
 19 |   * Areas: compute bounding box areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | from __future__ import (
 23 |     absolute_import,
 24 |     division,
 25 |     print_function,
 26 |     unicode_literals,
 27 | )
 28 | import numpy as np
 29 | 
 30 | 
 31 | def area(boxes):
 32 |     """Computes area of boxes.
 33 | 
 34 |   Args:
 35 |     boxes: Numpy array with shape [N, 4] holding N boxes
 36 | 
 37 |   Returns:
 38 |     a numpy array with shape [N*1] representing box areas
 39 |   """
 40 |     return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
 41 | 
 42 | 
 43 | def intersection(boxes1, boxes2):
 44 |     """Compute pairwise intersection areas between boxes.
 45 | 
 46 |   Args:
 47 |     boxes1: a numpy array with shape [N, 4] holding N boxes
 48 |     boxes2: a numpy array with shape [M, 4] holding M boxes
 49 | 
 50 |   Returns:
 51 |     a numpy array with shape [N*M] representing pairwise intersection area
 52 |   """
 53 |     [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
 54 |     [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
 55 | 
 56 |     all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
 57 |     all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
 58 |     intersect_heights = np.maximum(
 59 |         np.zeros(all_pairs_max_ymin.shape),
 60 |         all_pairs_min_ymax - all_pairs_max_ymin,
 61 |     )
 62 |     all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
 63 |     all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
 64 |     intersect_widths = np.maximum(
 65 |         np.zeros(all_pairs_max_xmin.shape),
 66 |         all_pairs_min_xmax - all_pairs_max_xmin,
 67 |     )
 68 |     return intersect_heights * intersect_widths
 69 | 
 70 | 
 71 | def iou(boxes1, boxes2):
 72 |     """Computes pairwise intersection-over-union between box collections.
 73 | 
 74 |   Args:
 75 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
 76 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
 77 | 
 78 |   Returns:
 79 |     a numpy array with shape [N, M] representing pairwise iou scores.
 80 |   """
 81 |     intersect = intersection(boxes1, boxes2)
 82 |     area1 = area(boxes1)
 83 |     area2 = area(boxes2)
 84 |     union = (
 85 |         np.expand_dims(area1, axis=1)
 86 |         + np.expand_dims(area2, axis=0)
 87 |         - intersect
 88 |     )
 89 |     return intersect / union
 90 | 
 91 | 
 92 | def ioa(boxes1, boxes2):
 93 |     """Computes pairwise intersection-over-area between box collections.
 94 | 
 95 |   Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
 96 |   their intersection area over box2's area. Note that ioa is not symmetric,
 97 |   that is, IOA(box1, box2) != IOA(box2, box1).
 98 | 
 99 |   Args:
100 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
101 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
102 | 
103 |   Returns:
104 |     a numpy array with shape [N, M] representing pairwise ioa scores.
105 |   """
106 |     intersect = intersection(boxes1, boxes2)
107 |     areas = np.expand_dims(area(boxes2), axis=0)
108 |     return intersect / areas
109 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/ava_evaluation/np_mask_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, height, width] numpy arrays representing masks.
 17 | 
 18 | Example mask operations that are supported:
 19 |   * Areas: compute mask areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | from __future__ import (
 23 |     absolute_import,
 24 |     division,
 25 |     print_function,
 26 |     unicode_literals,
 27 | )
 28 | import numpy as np
 29 | 
 30 | EPSILON = 1e-7
 31 | 
 32 | 
 33 | def area(masks):
 34 |     """Computes area of masks.
 35 | 
 36 |   Args:
 37 |     masks: Numpy array with shape [N, height, width] holding N masks. Masks
 38 |       values are of type np.uint8 and values are in {0,1}.
 39 | 
 40 |   Returns:
 41 |     a numpy array with shape [N*1] representing mask areas.
 42 | 
 43 |   Raises:
 44 |     ValueError: If masks.dtype is not np.uint8
 45 |   """
 46 |     if masks.dtype != np.uint8:
 47 |         raise ValueError("Masks type should be np.uint8")
 48 |     return np.sum(masks, axis=(1, 2), dtype=np.float32)
 49 | 
 50 | 
 51 | def intersection(masks1, masks2):
 52 |     """Compute pairwise intersection areas between masks.
 53 | 
 54 |   Args:
 55 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 56 |       values are of type np.uint8 and values are in {0,1}.
 57 |     masks2: a numpy array with shape [M, height, width] holding M masks. Masks
 58 |       values are of type np.uint8 and values are in {0,1}.
 59 | 
 60 |   Returns:
 61 |     a numpy array with shape [N*M] representing pairwise intersection area.
 62 | 
 63 |   Raises:
 64 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 65 |   """
 66 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 67 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
 68 |     n = masks1.shape[0]
 69 |     m = masks2.shape[0]
 70 |     answer = np.zeros([n, m], dtype=np.float32)
 71 |     for i in np.arange(n):
 72 |         for j in np.arange(m):
 73 |             answer[i, j] = np.sum(
 74 |                 np.minimum(masks1[i], masks2[j]), dtype=np.float32
 75 |             )
 76 |     return answer
 77 | 
 78 | 
 79 | def iou(masks1, masks2):
 80 |     """Computes pairwise intersection-over-union between mask collections.
 81 | 
 82 |   Args:
 83 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 84 |       values are of type np.uint8 and values are in {0,1}.
 85 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
 86 |       values are of type np.uint8 and values are in {0,1}.
 87 | 
 88 |   Returns:
 89 |     a numpy array with shape [N, M] representing pairwise iou scores.
 90 | 
 91 |   Raises:
 92 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 93 |   """
 94 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 95 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
 96 |     intersect = intersection(masks1, masks2)
 97 |     area1 = area(masks1)
 98 |     area2 = area(masks2)
 99 |     union = (
100 |         np.expand_dims(area1, axis=1)
101 |         + np.expand_dims(area2, axis=0)
102 |         - intersect
103 |     )
104 |     return intersect / np.maximum(union, EPSILON)
105 | 
106 | 
107 | def ioa(masks1, masks2):
108 |     """Computes pairwise intersection-over-area between box collections.
109 | 
110 |   Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
111 |   their intersection area over mask2's area. Note that ioa is not symmetric,
112 |   that is, IOA(mask1, mask2) != IOA(mask2, mask1).
113 | 
114 |   Args:
115 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
116 |       values are of type np.uint8 and values are in {0,1}.
117 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
118 |       values are of type np.uint8 and values are in {0,1}.
119 | 
120 |   Returns:
121 |     a numpy array with shape [N, M] representing pairwise ioa scores.
122 | 
123 |   Raises:
124 |     ValueError: If masks1 and masks2 are not of type np.uint8.
125 |   """
126 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
127 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
128 |     intersect = intersection(masks1, masks2)
129 |     areas = np.expand_dims(area(masks2), axis=0)
130 |     return intersect / (areas + EPSILON)
131 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Feb 20, 2017
  3 | @author: jumabek
  4 | 
  5 | Acquired from https://github.com/Jumabek/darknet_scripts/blob/master/gen_anchors.py
  6 | '''
  7 | from os import listdir
  8 | from os.path import isfile, join
  9 | import argparse
 10 | import numpy as np
 11 | import sys
 12 | import os
 13 | import shutil
 14 | import random 
 15 | import math
 16 | 
 17 | width_in_cfg_file = 224.
 18 | height_in_cfg_file = 224.
 19 | 
 20 | def IOU(x,centroids):
 21 |     similarities = []
 22 |     k = len(centroids)
 23 |     for centroid in centroids:
 24 |         c_w,c_h = centroid
 25 |         w,h = x
 26 |         if c_w>=w and c_h>=h:
 27 |             similarity = w*h/(c_w*c_h)
 28 |         elif c_w>=w and c_h<=h:
 29 |             similarity = w*c_h/(w*h + (c_w-w)*c_h)
 30 |         elif c_w<=w and c_h>=h:
 31 |             similarity = c_w*h/(w*h + c_w*(c_h-h))
 32 |         else: #means both w,h are bigger than c_w and c_h respectively
 33 |             similarity = (c_w*c_h)/(w*h)
 34 |         similarities.append(similarity) # will become (k,) shape
 35 |     return np.array(similarities) 
 36 | 
 37 | def avg_IOU(X,centroids):
 38 |     n,d = X.shape
 39 |     sum = 0.
 40 |     for i in range(X.shape[0]):
 41 |         #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
 42 |         sum+= max(IOU(X[i],centroids)) 
 43 |     return sum/n
 44 | 
 45 | def write_anchors_to_file(centroids,X,anchor_file):
 46 |     f = open(anchor_file,'w')
 47 |     
 48 |     anchors = centroids.copy()
 49 |     print(anchors.shape)
 50 | 
 51 |     for i in range(anchors.shape[0]):
 52 |         anchors[i][0]*=width_in_cfg_file/32.
 53 |         anchors[i][1]*=height_in_cfg_file/32.
 54 |          
 55 | 
 56 |     widths = anchors[:,0]
 57 |     sorted_indices = np.argsort(widths)
 58 | 
 59 |     print('Anchors = ', anchors[sorted_indices])
 60 |         
 61 |     for i in sorted_indices[:-1]:
 62 |         f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1]))
 63 | 
 64 |     #there should not be comma after last anchor, that's why
 65 |     f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1]))
 66 |     
 67 |     f.write('%f\n'%(avg_IOU(X,centroids)))
 68 | 
 69 | def kmeans(X,centroids,eps,anchor_file):
 70 |     
 71 |     N = X.shape[0]
 72 |     iterations = 0
 73 |     k,dim = centroids.shape
 74 |     prev_assignments = np.ones(N)*(-1)    
 75 |     iter = 0
 76 |     old_D = np.zeros((N,k))
 77 | 
 78 |     while True:
 79 |         D = [] 
 80 |         iter+=1           
 81 |         for i in range(N):
 82 |             d = 1 - IOU(X[i],centroids)
 83 |             D.append(d)
 84 |         D = np.array(D) # D.shape = (N,k)
 85 |         
 86 |         print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D))))
 87 |             
 88 |         #assign samples to centroids 
 89 |         assignments = np.argmin(D,axis=1)
 90 |         
 91 |         if (assignments == prev_assignments).all() :
 92 |             print("Centroids = ",centroids)
 93 |             write_anchors_to_file(centroids,X,anchor_file)
 94 |             return
 95 | 
 96 |         #calculate new centroids
 97 |         centroid_sums=np.zeros((k,dim),np.float)
 98 |         for i in range(N):
 99 |             centroid_sums[assignments[i]]+=X[i]        
100 |         for j in range(k):            
101 |             centroids[j] = centroid_sums[j]/(np.sum(assignments==j))
102 |         
103 |         prev_assignments = assignments.copy()     
104 |         old_D = D.copy()  
105 | 
106 | def main(argv):
107 |     parser = argparse.ArgumentParser()
108 |     parser.add_argument('-filelist', default = '/usr/home/sut/dataset_factory/AVA/annotations/ava_train_v2.2.csv',
109 |                         help='path to filelist\n' )
110 |     parser.add_argument('-output_dir', default = '/usr/home/sut/dataset_factory/AVA/annotations', type = str,
111 |                         help='Output anchor directory\n' )  
112 |     parser.add_argument('-num_clusters', default = 5, type = int, 
113 |                         help='number of clusters\n' )  
114 | 
115 |    
116 |     args = parser.parse_args()
117 |     
118 |     if not os.path.exists(args.output_dir):
119 |         os.mkdir(args.output_dir)
120 | 
121 |     f = open(args.filelist)
122 |   
123 |     lines = [line.rstrip('\n') for line in f.readlines()]
124 |     
125 |     annotation_dims = []
126 | 
127 |     size = np.zeros((1,1,3))
128 |     for line in lines:
129 |         line_annot = line.split(',')
130 |         w = float(line_annot[4]) - float(line_annot[2])
131 |         h = float(line_annot[5]) - float(line_annot[3])                    
132 |         annotation_dims.append(tuple(map(float,(w,h))))
133 | 
134 |     annotation_dims = np.array(annotation_dims)
135 | 
136 |     eps = 0.005
137 |     
138 |     if args.num_clusters == 0:
139 |         for num_clusters in range(1,11): #we make 1 through 10 clusters 
140 |             anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters))
141 | 
142 |             indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
143 |             centroids = annotation_dims[indices]
144 |             kmeans(annotation_dims,centroids,eps,anchor_file)
145 |             print('centroids.shape', centroids.shape)
146 |     else:
147 |         anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters))
148 |         indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)]
149 |         centroids = annotation_dims[indices]
150 |         kmeans(annotation_dims,centroids,eps,anchor_file)
151 |         print('centroids.shape', centroids.shape)
152 | 
153 | if __name__=="__main__":
154 |     main(sys.argv)
155 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/list_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # encoding: utf-8
 3 | 
 4 | import os
 5 | import glob
 6 | import random
 7 | import numpy as np
 8 | 
 9 | import torch
10 | from torch.utils.data import Dataset
11 | from PIL import Image
12 | 
13 | from datasets.clip import *
14 | 
15 | 
16 | class UCF_JHMDB_Dataset(Dataset):
17 | 
18 |     # clip duration = 8, i.e, for each time 8 frames are considered together
19 |     def __init__(self, base, root, dataset='ucf24', shape=None,
20 |                  transform=None, target_transform=None, 
21 |                  train=False, clip_duration=16, sampling_rate=1):
22 |         with open(root, 'r') as file:
23 |             self.lines = file.readlines()
24 | 
25 |         self.base_path = base
26 |         self.dataset = dataset
27 |         self.nSamples  = len(self.lines)
28 |         self.transform = transform
29 |         self.target_transform = target_transform
30 |         self.train = train
31 |         self.shape = shape
32 |         self.clip_duration = clip_duration
33 |         self.sampling_rate = sampling_rate
34 | 
35 |     def __len__(self):
36 |         return self.nSamples
37 | 
38 |     def __getitem__(self, index):
39 |         assert index <= len(self), 'index range error'
40 |         imgpath = self.lines[index].rstrip()
41 | 
42 |         if self.train: # For Training
43 |             jitter = 0.2
44 |             hue = 0.1
45 |             saturation = 1.5 
46 |             exposure = 1.5
47 | 
48 |             clip, label = load_data_detection(self.base_path, imgpath,  self.train, self.clip_duration, self.sampling_rate, self.shape, self.dataset, jitter, hue, saturation, exposure)
49 | 
50 |         else: # For Testing
51 |             frame_idx, clip, label = load_data_detection(self.base_path, imgpath, False, self.clip_duration, self.sampling_rate, self.shape, self.dataset)
52 |             clip = [img.resize(self.shape) for img in clip]
53 | 
54 |         if self.transform is not None:
55 |             clip = [self.transform(img) for img in clip]
56 | 
57 |         # (self.duration, -1) + self.shape = (8, -1, 224, 224)
58 |         clip = torch.cat(clip, 0).view((self.clip_duration, -1) + self.shape).permute(1, 0, 2, 3)
59 | 
60 |         if self.target_transform is not None:
61 |             label = self.target_transform(label)
62 | 
63 |         if self.train:
64 |             return (clip, label)
65 |         else:
66 |             return (frame_idx, clip, label)


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/logging.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Logging."""
 5 | 
 6 | import time
 7 | import os
 8 | import logging
 9 | import functools
10 | import sys
11 | from fvcore.common.file_io import PathManager
12 | 
13 | 
14 | @functools.lru_cache(maxsize=None)
15 | def _cached_log_stream(filename):
16 |     return PathManager.open(filename, "a")
17 | 
18 | 
19 | def setup_logging(output_dir=None):
20 |     """
21 |     Sets up the logging for multiple processes. Only enable the logging for the
22 |     master process, and suppress logging for the non-master processes.
23 |     """
24 |     # Set up logging format.
25 |     _FORMAT = "[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s"
26 | 
27 |     logging.root.handlers = []
28 | 
29 |     logger = logging.getLogger()
30 |     logger.setLevel(logging.DEBUG)
31 |     logger.propagate = False
32 |     plain_formatter = logging.Formatter(
33 |         "[%(asctime)s][%(levelname)s] %(name)s: %(lineno)4d: %(message)s",
34 |         datefmt="%m/%d %H:%M:%S",
35 |     )
36 | 
37 |     ch = logging.StreamHandler(stream=sys.stdout)
38 |     ch.setLevel(logging.INFO)
39 |     ch.setFormatter(plain_formatter)
40 |     logger.addHandler(ch)
41 | 
42 |     if output_dir is not None:
43 |         filename = os.path.join(output_dir, "stdout.log")
44 |         fh = logging.StreamHandler(_cached_log_stream(filename))
45 |         fh.setLevel(logging.DEBUG)
46 |         fh.setFormatter(plain_formatter)
47 |         logger.addHandler(fh)
48 | 
49 | 
50 | def get_logger(name):
51 |     """
52 |     Retrieve the logger with the specified name or, if name is None, return a
53 |     logger which is the root logger of the hierarchy.
54 |     Args:
55 |         name (string): name of the logger.
56 |     """
57 |     return logging.getLogger(name)
58 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dataset_factory/meters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | """Meters."""
  5 | 
  6 | import datetime
  7 | import time
  8 | import numpy as np
  9 | import os
 10 | from collections import defaultdict, deque
 11 | import torch
 12 | from fvcore.common.timer import Timer
 13 | import json
 14 | 
 15 | from dataset_factory import logging
 16 | from dataset_factory import ava_helper
 17 | from dataset_factory.ava_eval_helper import (
 18 |     run_evaluation,
 19 |     read_csv,
 20 |     read_exclusions,
 21 |     read_labelmap,
 22 |     write_results
 23 | )
 24 | 
 25 | logger = logging.get_logger(__name__)
 26 | 
 27 | 
 28 | def get_ava_mini_groundtruth(full_groundtruth):
 29 |     """
 30 |     Get the groundtruth annotations corresponding the "subset" of AVA val set.
 31 |     We define the subset to be the frames such that (second % 4 == 0).
 32 |     We optionally use subset for faster evaluation during training
 33 |     (in order to track training progress).
 34 |     Args:
 35 |         full_groundtruth(dict): list of groundtruth.
 36 |     """
 37 |     ret = [defaultdict(list), defaultdict(list), defaultdict(list)]
 38 | 
 39 |     for i in range(3):
 40 |         for key in full_groundtruth[i].keys():
 41 |             if int(key.split(",")[1]) % 4 == 0:
 42 |                 ret[i][key] = full_groundtruth[i][key]
 43 |     return ret
 44 | 
 45 | 
 46 | class AVAMeter(object):
 47 |     def __init__(self, cfg, mode, output_json):
 48 |         self.cfg = cfg
 49 |         self.all_preds = []
 50 |         self.mode = mode
 51 |         self.output_json = os.path.join(self.cfg.BACKUP_DIR, output_json)
 52 |         self.full_ava_test = cfg.AVA.FULL_TEST_ON_VAL
 53 |         self.excluded_keys = read_exclusions(
 54 |             os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.EXCLUSION_FILE)
 55 |         )
 56 |         self.categories, self.class_whitelist = read_labelmap(
 57 |             os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.LABEL_MAP_FILE)
 58 |         )
 59 |         gt_filename = os.path.join(
 60 |             cfg.AVA.ANNOTATION_DIR, cfg.AVA.GROUNDTRUTH_FILE
 61 |         )
 62 |         self.full_groundtruth = read_csv(gt_filename, self.class_whitelist)
 63 |         self.mini_groundtruth = get_ava_mini_groundtruth(self.full_groundtruth)
 64 |         _, self.video_idx_to_name = ava_helper.load_image_lists(cfg, self.mode == 'train')
 65 | 
 66 |     def update_stats(self, preds):
 67 |         self.all_preds.extend(preds)
 68 | 
 69 |     def evaluate_ava(self):
 70 |         eval_start = time.time()
 71 |         detections = self.get_ava_eval_data()
 72 |         if self.mode == 'test' or (self.full_ava_test and self.mode == "val"):
 73 |             groundtruth = self.full_groundtruth
 74 |         else:
 75 |             groundtruth = self.mini_groundtruth
 76 |         logger.info("Evaluating with %d unique GT frames." % len(groundtruth[0]))
 77 |         logger.info("Evaluating with %d unique detection frames" % len(detections[0]))
 78 | 
 79 |         name = "latest"
 80 |         write_results(detections, os.path.join(self.cfg.BACKUP_DIR, "detections_%s.csv" % name))
 81 |         write_results(groundtruth, os.path.join(self.cfg.BACKUP_DIR, "groundtruth_%s.csv" % name))
 82 |         results = run_evaluation(self.categories, groundtruth, detections, self.excluded_keys)
 83 |         with open(self.output_json, 'w') as fp:
 84 |             json.dump(results, fp)
 85 |         logger.info("Save eval results in {}".format(self.output_json))
 86 | 
 87 |         logger.info("AVA eval done in %f seconds." % (time.time() - eval_start))
 88 | 
 89 |         return results["PascalBoxes_Precision/mAP@0.5IOU"]
 90 | 
 91 |     def get_ava_eval_data(self):
 92 |         out_scores = defaultdict(list)
 93 |         out_labels = defaultdict(list)
 94 |         out_boxes = defaultdict(list)
 95 |         count = 0
 96 | 
 97 |         # each pred is [[x1, y1, x2, y2], [scores], [video_idx, src]]
 98 |         for i in range(len(self.all_preds)):
 99 |             pred = self.all_preds[i]
100 |             assert len(pred) == 3
101 |             video_idx = int(np.round(pred[-1][0]))
102 |             sec = int(np.round(pred[-1][1]))
103 |             box = pred[0]
104 |             scores = pred[1]
105 |             assert len(scores) == 80
106 |             # try:
107 |             #     assert len(scores) == len(labels)
108 |             # except TypeError:
109 |             #     pdb.set_trace()
110 | 
111 |             video = self.video_idx_to_name[video_idx]
112 |             key = video + ',' + "%04d" % (sec)
113 |             box = [box[1], box[0], box[3], box[2]]  # turn to y1,x1,y2,x2
114 | 
115 |             for cls_idx, score in enumerate(scores):
116 |                 if cls_idx + 1 in self.class_whitelist:
117 |                     out_scores[key].append(score)
118 |                     out_labels[key].append(cls_idx + 1)
119 |                     out_boxes[key].append(box)
120 |                     count += 1
121 | 
122 |         return out_boxes, out_labels, out_scores
123 | 
124 | 
125 | class AverageMeter(object):
126 |     """Computes and stores the average and current value"""
127 | 
128 |     def __init__(self):
129 |         self.reset()
130 | 
131 |     def reset(self):
132 |         self.val = 0
133 |         self.avg = 0
134 |         self.sum = 0
135 |         self.count = 0
136 | 
137 |     def update(self, val, n=1):
138 |         self.val = val
139 |         self.sum += val * n
140 |         self.count += n
141 |         self.avg = self.sum / self.count
142 | 


--------------------------------------------------------------------------------
/video/Traffic_Incident_Detection/dota_anchors.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import yaml
  5 | from argparse import Namespace
  6 | from dataset_factory.dota import DoTA
  7 | import seaborn as sns
  8 | 
  9 | 
 10 | num_anchors = 9
 11 | # image_size_wh = (224,224)
 12 | image_size_wh = (1280,720)
 13 | 
 14 | sns.set()  # for plot styling
 15 | 
 16 | with open("cfg/dota_config.yaml", 'r') as f:
 17 |     args = yaml.load(f)
 18 | args = Namespace(**args)
 19 | 
 20 | if not os.path.exists('/scratch/ssd002/dataset_factory/cv_project/Detection-of-Traffic-Anomaly/dataset'):
 21 |     args.root = '/home/matthew/Desktop/Datasets/DoTA'
 22 |     args.data_root = '/home/matthew/Desktop/Datasets/DoTA/DoTA_fol_train_data'
 23 |     args.val_data_root = '/home/matthew/Desktop/Datasets/DoTA/DoTA_fol_val_data'
 24 |     args.label_file = '/home/matthew/Desktop/Datasets/DoTA/metadata_val.json'
 25 |     args.train_split = '/home/matthew/Desktop/Datasets/DoTA/train_split.txt'
 26 |     args.val_split = '/home/matthew/Desktop/Datasets/DoTA/val_split.txt'
 27 |     args.img_dir = '/home/matthew/Desktop/Datasets/DoTA/frames'
 28 | 
 29 | d = DoTA(args, phase='train', n_frames=16, combined_bbox=True)
 30 | 
 31 | 
 32 | # RESIZE TO 224
 33 | 
 34 | lbls = [i[2] for i in d.data_list]
 35 | 
 36 | w, h = [], []
 37 | 
 38 | for lbl in lbls:
 39 |     w1 = lbl[2] - lbl[0]
 40 |     h1 = lbl[3] - lbl[1]
 41 | 
 42 |     # RESIZE
 43 |     if image_size_wh[0] != 1280 or image_size_wh[1] != 720:
 44 |         w2 = image_size_wh[0] * w1 / 1280
 45 |         h2 = image_size_wh[1] * h1 / 720
 46 |     else:
 47 |         w2 = w1
 48 |         h2 = h1
 49 |     w.append(w2)
 50 |     h.append(h2)
 51 | 
 52 |     # w.append(lbl[2] - lbl[0])
 53 |     # h.append(lbl[3] - lbl[1])
 54 | 
 55 | w = np.asarray(w)
 56 | h = np.asarray(h)
 57 | 
 58 | x = [w, h]
 59 | x = np.asarray(x)
 60 | x = x.transpose()
 61 | ##########################################   K- Means
 62 | ##########################################
 63 | 
 64 | from sklearn.cluster import KMeans
 65 | 
 66 | kmeans3 = KMeans(n_clusters=num_anchors)
 67 | kmeans3.fit(x)
 68 | y_kmeans3 = kmeans3.predict(x)
 69 | 
 70 | ##########################################
 71 | centers3 = kmeans3.cluster_centers_
 72 | 
 73 | yolo_anchor_average = []
 74 | for ind in range(num_anchors):
 75 |     yolo_anchor_average.append(np.mean(x[y_kmeans3 == ind], axis=0))
 76 | 
 77 | yolo_anchor_average = np.array(yolo_anchor_average)
 78 | 
 79 | plt.scatter(x[:, 0], x[:, 1], c=y_kmeans3, s=2, cmap='viridis')
 80 | plt.scatter(yolo_anchor_average[:, 0], yolo_anchor_average[:, 1], c='red', s=50);
 81 | yoloV3anchors = yolo_anchor_average
 82 | yoloV3anchors[:, 0] = yolo_anchor_average[:, 0] / 1280 * 608
 83 | yoloV3anchors[:, 1] = yolo_anchor_average[:, 1] / 720 * 608
 84 | yoloV3anchors = np.rint(yoloV3anchors)
 85 | fig, ax = plt.subplots()
 86 | for ind in range(num_anchors):
 87 |     rectangle = plt.Rectangle((304 - yoloV3anchors[ind, 0] / 2, 304 - yoloV3anchors[ind, 1] / 2), yoloV3anchors[ind, 0],
 88 |                               yoloV3anchors[ind, 1], fc='b', edgecolor='b', fill=None)
 89 |     ax.add_patch(rectangle)
 90 | ax.set_aspect(1.0)
 91 | plt.axis([0, 608, 0, 608])
 92 | plt.show()
 93 | yoloV3anchors.sort(axis=0)
 94 | print("Your custom anchor boxes for the original image space are {}".format(yoloV3anchors))
 95 | x = np.array([[0.70458, 1.18803], [1.26654, 2.55121], [1.59382, 4.08321], [2.30548, 4.94180], [3.52332, 5.91979]])
 96 | print("YOWO boxes for 224 image space are {}".format(x*32))
 97 | print('>>>')
 98 | print('>>>')
 99 | print('>>>')
100 | print("Your custom anchor boxes for 224 image space to a 7x7 feature map (i.e., 32x reduction) are {}".format(yoloV3anchors/32))
101 | # YOWO anchor boxes
102 | x = np.array([[0.70458, 1.18803], [1.26654, 2.55121], [1.59382, 4.08321], [2.30548, 4.94180], [3.52332, 5.91979]])
103 | print("YOWO boxes for 224 image space to a 7x7 feature map (i.e., 32x reduction) are {}".format(x))
104 | 


--------------------------------------------------------------------------------