├── .dockerignore
├── .gitignore
├── LICENSE
├── README.md
├── configs
    ├── VoVNet
    │   ├── Base-Panoptic-FPN.yaml
    │   ├── Base-RCNN-MobileNet-FPN.yaml
    │   ├── Base-RCNN-VoVNet-FPN.yaml
    │   ├── faster_rcnn_Mv2_FPNLite_3x.yaml
    │   ├── faster_rcnn_V_19_FPNLite_3x.yaml
    │   ├── faster_rcnn_V_19_FPN_3x.yaml
    │   ├── faster_rcnn_V_19_dw_FPNLite_3x.yaml
    │   ├── faster_rcnn_V_19_slim_FPNLite_3x.yaml
    │   ├── faster_rcnn_V_19_slim_dw_FPNLite_3x.yaml
    │   ├── faster_rcnn_V_39_FPN_3x.yaml
    │   ├── faster_rcnn_V_57_FPN_3x.yaml
    │   ├── faster_rcnn_V_99_FPN_3x.yaml
    │   ├── mask_rcnn_V_19_FPNLite_3x.yaml
    │   ├── mask_rcnn_V_19_FPN_3x.yaml
    │   ├── mask_rcnn_V_39_FPN_3x.yaml
    │   ├── mask_rcnn_V_57_FPN_3x.yaml
    │   ├── mask_rcnn_V_99_FPN_3x.yaml
    │   ├── panoptic_fpn_V_39_FPN_3x.yaml
    │   └── panoptic_fpn_V_57_FPN_3x.yaml
    ├── aruco-detector.yaml
    ├── deepformable-main.yaml
    └── detectron2
    │   ├── Base-RCNN-C4.yaml
    │   ├── Base-RCNN-DilatedC5.yaml
    │   ├── Base-RCNN-FPN.yaml
    │   └── Base-RetinaNet.yaml
├── deepformable
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── dataset_mapper.py
    │   └── register_datasets.py
    ├── engine
    │   ├── __init__.py
    │   └── trainers.py
    ├── evaluation
    │   ├── __init__.py
    │   └── evaluation.py
    ├── layers
    │   ├── __init__.py
    │   ├── adaptive_loss.py
    │   └── dist_ops.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── vovnet.py
    │   ├── intermediate_augmentor
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── color_augmentations.py
    │   │   ├── jpeg_augmentor.py
    │   │   ├── perspective_augmentor.py
    │   │   ├── radial_distortion_augmentor.py
    │   │   └── tps_augmentor.py
    │   ├── marker_generator
    │   │   ├── __init__.py
    │   │   ├── april_generator.py
    │   │   ├── aruco_generator.py
    │   │   ├── build.py
    │   │   └── generalized_generator.py
    │   ├── marker_roi_heads
    │   │   ├── __init__.py
    │   │   ├── corner_head.py
    │   │   ├── decoder_head.py
    │   │   ├── marker_roi_heads.py
    │   │   ├── naive_transform_head.py
    │   │   └── transformer_head.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── classical_detector.py
    │   │   ├── render_rcnn.py
    │   │   └── utils.py
    │   ├── renderer.py
    │   └── rpn.py
    └── utils
    │   ├── __init__.py
    │   ├── aruco_utils.py
    │   ├── board_utils.py
    │   ├── config.py
    │   ├── env.py
    │   ├── general_utils.py
    │   ├── image_utils.py
    │   ├── inpaint_utils.py
    │   └── visualize_utils.py
├── docker
    ├── Cpu.Dockerfile
    ├── DeepfillInpaint.Dockerfile
    ├── Dockerfile
    ├── env_cpu.yml
    └── env_gpu.yml
├── files
    ├── example_boards
    │   ├── charuco.pdf
    │   ├── markers_regular.pdf
    │   ├── random_board1.pdf
    │   ├── random_board2.pdf
    │   └── two_markers_large.pdf
    ├── example_input.png
    ├── example_video.mov
    ├── prediction_output.gif
    ├── teaser.jpg
    └── template_config.json
├── inpaint
    ├── .gitignore
    ├── deepfill.py
    └── deepfill_ops.py
├── jupyter
    └── deepformable_tutorial.ipynb
├── setup.cfg
├── setup.py
└── tools
    ├── README.md
    ├── calibrate.py
    ├── generate_board_json.py
    ├── generate_board_pdf.py
    ├── predictor_demo.py
    ├── preprocess_dataset.py
    ├── train.py
    └── training_visualizer.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /models
3 | files/boards
4 | /output


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | *.diff
 4 | 
 5 | # compilation and distribution
 6 | __pycache__
 7 | _ext
 8 | *.pyc
 9 | *.pyd
10 | *.so
11 | *.egg-info/
12 | build/
13 | dist/
14 | wheels/
15 | 
16 | # pytorch/python/numpy formats
17 | *.pth
18 | *.pkl
19 | *.npy
20 | 
21 | # ipython/jupyter notebooks
22 | **/.ipynb_checkpoints/
23 | 
24 | # Editor temporaries
25 | *.swn
26 | *.swo
27 | *.swp
28 | *~
29 | 
30 | # editor settings
31 | .idea
32 | .vscode
33 | 
34 | # project dirs
35 | /models
36 | /output


--------------------------------------------------------------------------------
/configs/VoVNet/Base-Panoptic-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   MASK_ON: True
 5 |   SEM_SEG_HEAD:
 6 |     LOSS_WEIGHT: 0.5
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_separated",)
 9 |   TEST: ("coco_2017_val_panoptic_separated",)
10 | 


--------------------------------------------------------------------------------
/configs/VoVNet/Base-RCNN-MobileNet-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_mobilenetv2_fpn_backbone"
 5 |     FREEZE_AT: 0
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   ANCHOR_GENERATOR:
11 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
12 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
13 |   RPN:
14 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
16 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
17 |     # Detectron1 uses 2000 proposals per-batch,
18 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20 |     POST_NMS_TOPK_TRAIN: 1000
21 |     POST_NMS_TOPK_TEST: 1000
22 |   ROI_HEADS:
23 |     NAME: "StandardROIHeads"
24 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     NUM_FC: 2
28 |     POOLER_RESOLUTION: 7
29 |   ROI_MASK_HEAD:
30 |     NAME: "MaskRCNNConvUpsampleHead"
31 |     NUM_CONV: 4
32 |     POOLER_RESOLUTION: 14
33 | DATASETS:
34 |   TRAIN: ("coco_2017_train",)
35 |   TEST: ("coco_2017_val",)
36 | SOLVER:
37 |   IMS_PER_BATCH: 16
38 |   BASE_LR: 0.02
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | INPUT:
42 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
43 | 


--------------------------------------------------------------------------------
/configs/VoVNet/Base-RCNN-VoVNet-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_vovnet_fpn_backbone"
 5 |     FREEZE_AT: 0
 6 |   VOVNET:
 7 |     OUT_FEATURES: ["stage2", "stage3", "stage4", "stage5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["stage2", "stage3", "stage4", "stage5"]
10 |   ANCHOR_GENERATOR:
11 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
12 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
13 |   RPN:
14 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
16 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
17 |     # Detectron1 uses 2000 proposals per-batch,
18 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20 |     POST_NMS_TOPK_TRAIN: 1000
21 |     POST_NMS_TOPK_TEST: 1000
22 |   ROI_HEADS:
23 |     NAME: "StandardROIHeads"
24 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     NUM_FC: 2
28 |     POOLER_RESOLUTION: 7
29 |   ROI_MASK_HEAD:
30 |     NAME: "MaskRCNNConvUpsampleHead"
31 |     NUM_CONV: 4
32 |     POOLER_RESOLUTION: 14
33 | DATASETS:
34 |   TRAIN: ("coco_2017_train",)
35 |   TEST: ("coco_2017_val",)
36 | SOLVER:
37 |   IMS_PER_BATCH: 16
38 |   BASE_LR: 0.02
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | INPUT:
42 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
43 | 


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_Mv2_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-MobileNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/yduxbc13s3ip6qn/mobilenet_v2_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   FPN:
 6 |     OUT_CHANNELS: 128
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/FRCN-MNv2-FPNLite-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_19_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-eSE"
 7 |   FPN:
 8 |     OUT_CHANNELS: 128
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "checkpoints/FRCN-V2-FPNLite-19-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_19_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_19_dw_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/9awvl0mxye3nqz1/vovnet19_ese_dw_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-dw-eSE"
 7 |   FPN:
 8 |     OUT_CHANNELS: 128
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-dw-FPNLite-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_19_slim_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/8h5ybmi4ftbcom0/vovnet19_ese_slim_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-slim-eSE"
 7 |   FPN:
 8 |     OUT_CHANNELS: 128
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-slim-FPNLite-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_19_slim_dw_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/f3s7ospitqoals1/vovnet19_ese_slim_dw_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-slim-dw-eSE"
 7 |   FPN:
 8 |     OUT_CHANNELS: 128
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-slim-dw-FPNLite-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_39_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-39-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/FRCN-V2-39-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_57_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-57-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/FRCN-V2-57-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/faster_rcnn_V_99_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: False
 5 |   VOVNET:
 6 |     CONV_BODY : "V-99-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/FRCN-V2-99-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/mask_rcnn_V_19_FPNLite_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: True
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-eSE"
 7 |   FPN:
 8 |     OUT_CHANNELS: 128
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "checkpoints/MRCN-V2-19-FPNLite-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/mask_rcnn_V_19_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: True
 5 |   VOVNET:
 6 |     CONV_BODY : "V-19-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/MRCN-V2-19-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/mask_rcnn_V_39_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: True
 5 |   VOVNET:
 6 |     CONV_BODY : "V-39-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/MRCN-V2-39-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/mask_rcnn_V_57_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: True
 5 |   VOVNET:
 6 |     CONV_BODY : "V-57-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/MRCN-V2-57-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/mask_rcnn_V_99_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1"
 4 |   MASK_ON: True
 5 |   VOVNET:
 6 |     CONV_BODY : "V-99-eSE"
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | OUTPUT_DIR: "checkpoints/MRCN-V2-99-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/panoptic_fpn_V_39_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
4 |   VOVNET:
5 |     CONV_BODY : "V-39-eSE"
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | OUTPUT_DIR: "checkpoints/Panoptic-FPN-V2-39-3x"


--------------------------------------------------------------------------------
/configs/VoVNet/panoptic_fpn_V_57_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
4 |   VOVNET:
5 |     CONV_BODY : "V-57-eSE"
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | OUTPUT_DIR: "checkpoints/Panoptic-FPN-V2-57-3x"


--------------------------------------------------------------------------------
/configs/aruco-detector.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "ClassicalDetector"
 3 |   MARKER_GENERATOR:
 4 |     NAME: "ArucoGenerator"
 5 |     MARKER_SIZE: (8, 8)
 6 |     BORDER_BITS: 1
 7 |     ARUCO_DICT: "6x6_100"
 8 |     NUM_GENERATION_BITS: 36
 9 |   ROI_HEADS:
10 |     NUM_CLASSES: 96
11 |     # NUM_CLASSES: 1000
12 | DATASETS:
13 |   TRAIN: ("deepformable-train",)
14 |   TEST: ("deepformable-test",)
15 | INPUT:
16 |   MIN_SIZE_TEST: 0
17 |   MAX_SIZE_TEST: 1920
18 |   RANDOM_FLIP: "none" # Markers will also be mirror flipped in this case, so we disable it
19 |   MASK_FORMAT: "polygon"
20 | VERSION: 2


--------------------------------------------------------------------------------
/configs/deepformable-main.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "VoVNet/faster_rcnn_V_19_dw_FPNLite_3x.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN_RenderInput"
 4 |   # WEIGHTS: "/host/models/deepformable_model.pth"
 5 |   WEIGHTS: "https://www.dropbox.com/s/7h6zn0owumucs48/faster_rcnn_V_19_eSE_dw_FPNLite_ms_3x.pth?dl=1"
 6 |   MASK_ON: True
 7 |   MARKER_GENERATOR:
 8 |     NAME: "GeneralizedGenerator"
 9 |     CONV_DIMS: [[8], [6], [6]]
10 |     FINAL_CONV_KERNEL_SIZE: 1
11 |     NUM_GENERATION_BITS: 36
12 |     MARKER_SIZE: (32, 32)
13 |   VOVNET:
14 |     OUT_FEATURES: ["stem", "stage2", "stage3", "stage4", "stage5"]
15 |   ROI_HEADS:
16 |     NAME: "MarkerROIHeads"
17 |     NUM_CLASSES: 96
18 |     NMS_THRESH_TEST: 0.1
19 |     SCORE_THRESH_TEST: 0.8
20 |   PROPOSAL_GENERATOR:
21 |     NAME: "RPN_AdaptiveLoss"
22 |   RPN:
23 |     PRE_NMS_TOPK_TEST: 5000
24 |     POST_NMS_TOPK_TEST: 1000
25 | INTERMEDIATE_AUGMENTOR:
26 |   AUG_LIST: [
27 |     "PerspectiveAugmentor", "TpsAugmentor", "RadialDistortionAugmentor",
28 |     "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 
29 |     "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", 
30 |     "GammaCorrector", "JPEGAugmentor"]
31 |   EXEC_PROBA_LIST: [0.4, 0.5, 0.3, 0.4, 0.4, 0.4, 0.4, 0.45, 0.3, 1.0, 0.35]
32 | RENDERER:
33 |   # NAME: "MarkerRenderer"
34 |   SHADING_METHOD: "cook-torrance"
35 | DATASETS:
36 |   TRAIN: ("deepformable-rendered-train",)
37 |   TEST: ("deepformable-rendered-test1", "deepformable-rendered-aug-test1",)
38 |   # TEST: (
39 |   #   "deepformable-rendered-test1", "deepformable-rendered-aug-test1", 
40 |   #   "deepformable_flat-real-load_markers-test2", "deepformable_flat-real-load_markers-aug-test2",
41 |   #   "deepformable_deformation-real-load_markers-test3", "deepformable_deformation-real-load_markers-aug-test3",)
42 | TEST:
43 |   EVAL_PERIOD: 2500
44 |   DETECTIONS_PER_IMAGE: 1000
45 | SOLVER:
46 |   BASE_LR: 0.02
47 |   IMS_PER_BATCH: 16
48 |   STEPS: (20000,25000)
49 |   MAX_ITER: 30000
50 |   CLIP_GRADIENTS:
51 |     ENABLED: True
52 | VIS_PERIOD: 500
53 | INPUT:
54 |   MIN_SIZE_TEST: 0
55 |   MIN_SIZE_TRAIN: (640, 672, 704, 720, 736, 768, 800, 1080)
56 |   MAX_SIZE_TRAIN: 1920
57 |   MAX_SIZE_TEST: 1920
58 |   RANDOM_FLIP: "none" # Markers will also be mirror flipped in this case, so we disable it
59 |   MASK_FORMAT: "polygon"
60 | DATALOADER:
61 |   NUM_WORKERS: 8
62 | SEED: 13
63 | OUTPUT_DIR: "/Data/Experiments/test"
64 | VERSION: 2
65 | 


--------------------------------------------------------------------------------
/configs/detectron2/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   IMS_PER_BATCH: 16
13 |   BASE_LR: 0.02
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | VERSION: 2
19 | 


--------------------------------------------------------------------------------
/configs/detectron2/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 | 


--------------------------------------------------------------------------------
/configs/detectron2/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/detectron2/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 |     SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 |   TRAIN: ("coco_2017_train",)
17 |   TEST: ("coco_2017_val",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 16
20 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
21 |   STEPS: (60000, 80000)
22 |   MAX_ITER: 90000
23 | INPUT:
24 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
25 | VERSION: 2
26 | 


--------------------------------------------------------------------------------
/deepformable/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # Edited by Mustafa B. Yaldiz (VCLAB, KAIST)
 3 | 
 4 | from .utils.env import setup_environment
 5 | 
 6 | setup_environment()
 7 | 
 8 | 
 9 | # This line will be programatically read/write by setup.py.
10 | # Leave them at the bottom of this file and don't touch them.
11 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/deepformable/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .register_datasets import register_deepformable_dataset
3 | from .dataset_mapper import DeepformableMapper, DetectronMapperWAnn
4 | from .build import build_detection_train_loader


--------------------------------------------------------------------------------
/deepformable/data/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import logging
 3 | import random
 4 | 
 5 | import torch
 6 | import torch.utils.data as torchdata
 7 | 
 8 | from detectron2.config import configurable
 9 | from detectron2.data.build import _train_loader_from_config, build_batch_data_loader
10 | from detectron2.data.common import _MapIterableDataset, DatasetFromList
11 | from detectron2.data.samplers import TrainingSampler
12 | from detectron2.utils.serialize import PicklableWrapper
13 | 
14 | 
15 | class MapDataset(torchdata.Dataset):
16 |     """
17 |     This method tries several times to map given data, if cannot picks
18 |     another data. It is modified from original MapDataset located at Detectron2.    
19 |     """
20 |     def __init__(self, dataset, map_func, retry_count=10):
21 |         self._dataset = dataset
22 |         self._map_func = PicklableWrapper(map_func)  # wrap so that a lambda will work
23 |         self._rng = random.Random(42)
24 |         self.retry_count = retry_count
25 | 
26 |     def __new__(cls, dataset, map_func):
27 |         is_iterable = isinstance(dataset, torchdata.IterableDataset)
28 |         if is_iterable:
29 |             return _MapIterableDataset(dataset, map_func)
30 |         else:
31 |             return super().__new__(cls)
32 | 
33 |     def __getnewargs__(self):
34 |         return self._dataset, self._map_func
35 | 
36 |     def __len__(self):
37 |         return len(self._dataset)
38 | 
39 |     def __getitem__(self, idx):
40 |         cur_idx = int(idx)
41 |         d = self._dataset[cur_idx]
42 | 
43 |         for _ in range(self.retry_count):
44 |             data = self._map_func(d)
45 |             if data is not None:
46 |                 return data
47 |         
48 |         logger = logging.getLogger(__name__)
49 |         # This id should be same as image_id
50 |         warn_str = "Failed to apply `_map_func` for idx: {}".format(idx)    
51 |         logger.warning(warn_str)
52 |             
53 |         return self.__getitem__(self._rng.randint(0, self.__len__()-1))
54 |         
55 | 
56 | @configurable(from_config=_train_loader_from_config)
57 | def build_detection_train_loader(
58 |     dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
59 | ):
60 |     """
61 |     This method is modified to use our MapDataset implementation.
62 |     """
63 |     if isinstance(dataset, list):
64 |         dataset = DatasetFromList(dataset, copy=False)
65 |     if mapper is not None:
66 |         dataset = MapDataset(dataset, mapper)
67 | 
68 |     """
69 |     TODO: Can get output class id's and thing classes 
70 |     from mapper and change metadata to eliminate problems here.
71 |     """
72 | 
73 |     if isinstance(dataset, torchdata.IterableDataset):
74 |         assert sampler is None, "sampler must be None if dataset is IterableDataset"
75 |     else:
76 |         if sampler is None:
77 |             sampler = TrainingSampler(len(dataset))
78 |         assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
79 |     return build_batch_data_loader(
80 |         dataset,
81 |         sampler,
82 |         total_batch_size,
83 |         aspect_ratio_grouping=aspect_ratio_grouping,
84 |         num_workers=num_workers,
85 |     )
86 | 


--------------------------------------------------------------------------------
/deepformable/data/register_datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code is modified from detectron2 implementation to load additional labels from datasets.
  3 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  4 | """
  5 | import contextlib
  6 | import io
  7 | import logging
  8 | import numpy as np
  9 | import os
 10 | import pycocotools.mask as mask_util
 11 | from fvcore.common.file_io import PathManager
 12 | from fvcore.common.timer import Timer
 13 | 
 14 | from detectron2.structures import Boxes, BoxMode, PolygonMasks
 15 | 
 16 | from detectron2.data import DatasetCatalog, MetadataCatalog
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | def load_deepformable_json(
 21 |     json_file, image_root, dataset_name=None, extra_annotation_keys=None
 22 | ):
 23 |     """
 24 |     This method modified to support loading the marker instances.
 25 |     """
 26 |     from pycocotools.coco import COCO
 27 | 
 28 |     timer = Timer()
 29 |     json_file = PathManager.get_local_path(json_file)
 30 |     with contextlib.redirect_stdout(io.StringIO()):
 31 |         coco_api = COCO(json_file)
 32 |     if timer.seconds() > 1:
 33 |         logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
 34 | 
 35 |     category_info = None
 36 |     id_map = None
 37 |     if dataset_name is not None:
 38 |         meta = MetadataCatalog.get(dataset_name)
 39 |         cat_ids = sorted(coco_api.getCatIds())
 40 |         cats = coco_api.loadCats(cat_ids)
 41 |         if "thing_classes" not in meta.as_dict():
 42 |             # The categories in a custom json file may not be sorted.
 43 |             thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
 44 |             meta.thing_classes = thing_classes
 45 |         
 46 |         if "marker_ids" in cats[0]:
 47 |             category_info = {c["id"]: c for c in cats}
 48 | 
 49 |         # In COCO, certain category ids are artificially removed,
 50 |         # and by convention they are always ignored.
 51 |         # We deal with COCO's id issue and translate
 52 |         # the category ids to contiguous ids in [0, 80).
 53 | 
 54 |         # It works by looking at the "categories" field in the json, therefore
 55 |         # if users' own json also have incontiguous ids, we'll
 56 |         # apply this mapping as well but print a warning.
 57 |         if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
 58 |             if "coco" not in dataset_name:
 59 |                 logger.warning(
 60 |                     """
 61 | Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
 62 | """
 63 |                 )
 64 |         if "thing_dataset_id_to_contiguous_id" not in meta.as_dict():
 65 |             id_map = {v: i for i, v in enumerate(cat_ids)}
 66 |             meta.thing_dataset_id_to_contiguous_id = id_map
 67 |         else:
 68 |             id_map = meta.thing_dataset_id_to_contiguous_id
 69 | 
 70 |     # sort indices for reproducible results
 71 |     img_ids = sorted(coco_api.imgs.keys())
 72 |     # imgs is a list of dicts, each looks something like:
 73 |     # {'license': 4,
 74 |     #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
 75 |     #  'file_name': 'COCO_val2014_000000001268.jpg',
 76 |     #  'height': 427,
 77 |     #  'width': 640,
 78 |     #  'date_captured': '2013-11-17 05:57:24',
 79 |     #  'id': 1268}
 80 |     imgs = coco_api.loadImgs(img_ids)
 81 |     # anns is a list[list[dict]], where each dict is an annotation
 82 |     # record for an object. The inner list enumerates the objects in an image
 83 |     # and the outer list enumerates over images. Example of anns[0]:
 84 |     # [{'segmentation': [[192.81,
 85 |     #     247.09,
 86 |     #     ...
 87 |     #     219.03,
 88 |     #     249.06]],
 89 |     #   'area': 1035.749,
 90 |     #   'iscrowd': 0,
 91 |     #   'image_id': 1268,
 92 |     #   'bbox': [192.81, 224.8, 74.73, 33.43],
 93 |     #   'category_id': 16,
 94 |     #   'id': 42986},
 95 |     #  ...]
 96 |     anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
 97 |     total_num_valid_anns = sum([len(x) for x in anns])
 98 |     total_num_anns = len(coco_api.anns)
 99 |     if total_num_valid_anns < total_num_anns:
100 |         logger.warning(
101 |             f"{json_file} contains {total_num_anns} annotations, but only "
102 |             f"{total_num_valid_anns} of them match to images in the file."
103 |         )
104 | 
105 |     if "minival" not in json_file:
106 |         # The popular valminusminival & minival annotations for COCO2014 contain this bug.
107 |         # However the ratio of buggy annotations there is tiny and does not affect accuracy.
108 |         # Therefore we explicitly white-list them.
109 |         ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
110 |         assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
111 |             json_file
112 |         )
113 | 
114 |     imgs_anns = list(zip(imgs, anns))
115 |     logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
116 | 
117 |     dataset_dicts = []
118 | 
119 |     ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
120 | 
121 |     # Load camera dictionary
122 |     if "cameras" in coco_api.dataset:
123 |         cam_dict = {i["id"]: i for i in coco_api.dataset["cameras"]}
124 |     else:
125 |         cam_dict = None
126 |     
127 |     num_instances_without_valid_segmentation = 0
128 | 
129 |     for (img_dict, anno_dict_list) in imgs_anns:
130 |         record = {}
131 |         record["file_name"] = os.path.join(image_root, img_dict["file_name"])
132 |         record["height"] = img_dict["height"]
133 |         record["width"] = img_dict["width"]
134 |         image_id = record["image_id"] = img_dict["id"]
135 |         # Add camera to record
136 |         if cam_dict:
137 |             record["camera"] = cam_dict[img_dict["camera_id"]]
138 | 
139 |         objs = []
140 |         for anno in anno_dict_list:
141 |             # Check that the image_id in this annotation is the same as
142 |             # the image_id we're looking at.
143 |             # This fails only when the data parsing logic or the annotation file is buggy.
144 | 
145 |             # The original COCO valminusminival2014 & minival2014 annotation files
146 |             # actually contains bugs that, together with certain ways of using COCO API,
147 |             # can trigger this assertion.
148 |             assert anno["image_id"] == image_id
149 | 
150 |             assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
151 | 
152 |             if category_info:
153 |                 for key, value in category_info[anno["category_id"]].items():
154 |                     if key in extra_annotation_keys:
155 |                         anno[key] = value
156 | 
157 |             obj = {key: anno[key] for key in ann_keys if key in anno}
158 |             if "bbox" in obj and len(obj["bbox"]) == 0:
159 |                 raise ValueError(
160 |                     f"One annotation of image {image_id} contains empty 'bbox' value! "
161 |                     "This json does not have valid COCO format."
162 |                 )
163 | 
164 |             segm = anno.get("segmentation", None)
165 |             if segm:  # either list[list[float]] or dict(RLE)
166 |                 if isinstance(segm, dict):
167 |                     if isinstance(segm["counts"], list):
168 |                         # convert to compressed RLE
169 |                         segm = mask_util.frPyObjects(segm, *segm["size"])
170 |                 else:
171 |                     # filter out invalid polygons (< 3 points)
172 |                     segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
173 |                     if len(segm) == 0:
174 |                         num_instances_without_valid_segmentation += 1
175 |                         continue  # ignore this instance
176 |                 obj["segmentation"] = segm
177 | 
178 |             keypts = anno.get("keypoints", None)
179 |             if keypts:  # list[int]
180 |                 for idx, v in enumerate(keypts):
181 |                     if idx % 3 != 2:
182 |                         # COCO's segmentation coordinates are floating points in [0, H or W],
183 |                         # but keypoint coordinates are integers in [0, H-1 or W-1]
184 |                         # Therefore we assume the coordinates are "pixel indices" and
185 |                         # add 0.5 to convert to floating point coordinates.
186 |                         keypts[idx] = v + 0.5
187 |                 obj["keypoints"] = keypts
188 | 
189 |             obj["bbox_mode"] = BoxMode.XYWH_ABS
190 |             if id_map:
191 |                 annotation_category_id = obj["category_id"]
192 |                 try:
193 |                     obj["category_id"] = id_map[annotation_category_id]
194 |                 except KeyError as e:
195 |                     raise KeyError(
196 |                         f"Encountered category_id={annotation_category_id} "
197 |                         "but this id does not exist in 'categories' of the json file."
198 |                     ) from e
199 |             objs.append(obj)
200 |         record["annotations"] = objs
201 |         dataset_dicts.append(record)
202 | 
203 |     if num_instances_without_valid_segmentation > 0:
204 |         logger.warning(
205 |             "Filtered out {} instances without valid segmentation. ".format(
206 |                 num_instances_without_valid_segmentation
207 |             )
208 |             + "There might be issues in your dataset generation process.  Please "
209 |             "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully"
210 |         )
211 |     return dataset_dicts
212 | 
213 | def register_deepformable_dataset(
214 |     name, metadata, json_file, 
215 |     image_root, load_markers=True,
216 | ):
217 |     assert isinstance(name, str), name
218 |     assert isinstance(json_file, (str, os.PathLike)), json_file
219 |     assert isinstance(image_root, (str, os.PathLike)), image_root
220 |     extra_annotation_keys = [
221 |         "rvec", "tvec", "normal", "refl_dir", "view_dir",
222 |         "brightness_max", "avg_color", "board_world"]
223 |     if load_markers: extra_annotation_keys += ["marker_ids", "markers_world"]
224 |     # 1. register a function which returns dicts
225 |     DatasetCatalog.register(
226 |         name, lambda: load_deepformable_json(
227 |             json_file, image_root, name, extra_annotation_keys))
228 | 
229 |     # 2. Optionally, add metadata about this dataset,
230 |     # since they might be useful in evaluation, visualization or logging
231 |     MetadataCatalog.get(name).set(
232 |         json_file=json_file, image_root=image_root, evaluator_type="deepformable", **metadata
233 |     )
234 | 


--------------------------------------------------------------------------------
/deepformable/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .trainers import DeepformableTrainer


--------------------------------------------------------------------------------
/deepformable/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .evaluation import DeepformableEvaluator
3 | 


--------------------------------------------------------------------------------
/deepformable/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .adaptive_loss import AdaptiveLoss
3 | from .dist_ops import MarkerGatherFunc, AllReduce


--------------------------------------------------------------------------------
/deepformable/layers/adaptive_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import torch
 3 | import torch.distributed as dist
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from detectron2.utils.comm import get_world_size, is_main_process
 8 | from .dist_ops import AllReduce
 9 | 
10 | 
11 | class AdaptiveLoss(nn.Module):
12 |     """
13 |     This class is implemented to filter the loss values that cause exploding gradients.
14 |     """
15 |     def __init__(
16 |         self,
17 |         n=3.0,
18 |         beta=0.995,
19 |         beta2=0.999,
20 |         loss_type='bce',
21 |         adaptive_on=True,
22 |     ):
23 |         super().__init__()
24 |         self.register_buffer("n", torch.tensor(n, dtype=torch.float32))
25 |         self.register_buffer("beta", torch.tensor(beta, dtype=torch.float32))
26 |         self.register_buffer("beta2", torch.tensor(beta2, dtype=torch.float32))
27 |         self.register_buffer("running_mean", torch.tensor(-1, dtype=torch.float32))
28 |         self.register_buffer("running_std", torch.tensor(-1, dtype=torch.float32))
29 |         if loss_type == 'bce':   
30 |             self.loss = F.binary_cross_entropy_with_logits
31 |         elif loss_type == 'l1':
32 |             self.loss = F.l1_loss
33 |         elif loss_type == 'l2' or loss_type == 'mse':
34 |             self.loss = F.mse_loss
35 |         else:
36 |             raise "Unknown loss type!"
37 |         self.adaptive_on = adaptive_on
38 |     
39 |     def forward(self, input, target):
40 |         if not self.adaptive_on:
41 |             return self.loss(input, target, reduction='sum')
42 |         loss_values = self.loss(input, target, reduction='none')
43 |         
44 |         threshold = self.running_mean + self.n * self.running_std
45 |         #loss_filtered = loss_values[loss_values > threshold].detach()
46 |         loss_filtered = torch.clamp(loss_values[loss_values > threshold], 0, threshold.item())
47 |         loss_passed = loss_values[loss_values <= threshold]
48 |         loss_final = torch.sum(loss_filtered) + torch.sum(loss_passed)
49 | 
50 |         mean, meansqr = loss_values.mean(), torch.mean(loss_values * loss_values)
51 |         world_size = get_world_size()
52 |         if world_size != 1:
53 |             vec = torch.cat([mean.view(1), meansqr.view(1)])
54 |             mean, meansqr = (AllReduce.apply(vec) * (1.0 / world_size)).detach()
55 |         std = torch.sqrt(meansqr - mean * mean)
56 |         
57 |         if self.running_mean > 0:
58 |             mean_step = min(mean * (1.0 - self.beta), self.running_std * 0.75)
59 |             self.running_mean = self.running_mean * self.beta + mean_step
60 |             std_step =  min(std * (1.0 - self.beta2), self.running_std * 0.25)
61 |             self.running_std = self.running_std * self.beta2 + std_step
62 |         else:
63 |             self.running_mean = mean * 1.5
64 |             self.running_std = std
65 | 
66 |         # if is_main_process():
67 |         #     print("Mean:", self.running_mean, "Std:", self.running_std)
68 |         #     if len(loss_filtered) != 0:
69 |         #         print("Filtered:", loss_filtered, 
70 |         #             "-- Running_val:", self.running_mean, self.running_std,
71 |         #             "-- Calc val:", mean, std,)
72 |         # loss_final = F.binary_cross_entropy_with_logits(
73 |         #     input, target, reduction='sum')
74 |         return loss_final
75 | 


--------------------------------------------------------------------------------
/deepformable/layers/dist_ops.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code references https://github.com/ag14774/diffdist/blob/b5c17c7354bbbe98b6e8a791ea78614861b4997a/diffdist/
 3 | It is primarily used to distribute marker generation task across GPUs.
 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 5 | """
 6 | import torch
 7 | import torch.distributed as dist
 8 | from torch.autograd import Function
 9 | 
10 | from detectron2.utils.comm import get_world_size, get_rank
11 | 
12 | class MarkerGatherFunc(Function):
13 |     @staticmethod
14 |     def forward(ctx, markers, rank, group, world_size, backend):
15 |         ctx.backend, ctx.marker_size = backend, len(markers)
16 |         ctx.world_size, ctx.rank, ctx.group = world_size, rank, group
17 |         if world_size == 1:
18 |             return markers
19 |         gather_list = [torch.zeros_like(markers, device=markers.device) for _ in range(world_size)]
20 |         if backend == 'nccl':
21 |             gather_list = [gather_list]
22 |             dist.all_gather_multigpu(gather_list, [markers], group=group)
23 |             gather_list = gather_list[0]
24 |         else:
25 |             dist.all_gather(gather_list, markers, group=group)
26 |             gather_list = [i.to(markers.device) for i in gather_list]
27 |         return torch.cat(gather_list, dim=0)
28 | 
29 |     @staticmethod
30 |     def backward(ctx, markers_grad):
31 |         if ctx.world_size == 1:
32 |             return markers_grad, None, None, None, None
33 |         if ctx.backend == 'nccl':
34 |             markers_grad = [markers_grad]
35 |             dist.all_reduce_multigpu(markers_grad, group=ctx.group)
36 |             markers_grad = markers_grad[0]
37 |         else:
38 |             dist.all_reduce(markers_grad, group=ctx.group)
39 |         return markers_grad[ctx.marker_size*ctx.rank:ctx.marker_size*(ctx.rank+1)], None, None, None, None
40 | 
41 | 
42 | class AllReduce(Function):
43 |     @staticmethod
44 |     def forward(ctx, input):
45 |         input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())]
46 |         # Use allgather instead of allreduce since I don't trust in-place operations ..
47 |         dist.all_gather(input_list, input, async_op=False)
48 |         inputs = torch.stack(input_list, dim=0)
49 |         return torch.sum(inputs, dim=0)
50 | 
51 |     @staticmethod
52 |     def backward(ctx, grad_output):
53 |         dist.all_reduce(grad_output, async_op=False)
54 |         return grad_output


--------------------------------------------------------------------------------
/deepformable/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .intermediate_augmentor import *
3 | from .marker_generator import *
4 | from .backbone import *
5 | from .marker_roi_heads import *
6 | from .meta_arch import *
7 | 
8 | from .renderer import MarkerRenderer
9 | from .rpn import RPN_AdaptiveLoss


--------------------------------------------------------------------------------
/deepformable/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone


--------------------------------------------------------------------------------
/deepformable/modeling/backbone/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This code modifies FPN implementation from detectron2 to output stem features.
 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 5 | """
 6 | import torch
 7 | import torch.nn.functional as F
 8 | 
 9 | from detectron2.modeling.backbone.fpn import FPN as FPN_detectron2
10 | 
11 | class FPN(FPN_detectron2):
12 |     def forward(self, x):
13 |         # Reverse feature maps into top-down order (from low to high resolution)
14 |         bottom_up_features = self.bottom_up(x)
15 |         x = [bottom_up_features[f] for f in self.in_features[::-1]]
16 |         results = []
17 |         prev_features = self.lateral_convs[0](x[0])
18 |         results.append(self.output_convs[0](prev_features))
19 |         for features, lateral_conv, output_conv in zip(
20 |             x[1:], self.lateral_convs[1:], self.output_convs[1:]
21 |         ):
22 |             top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest")
23 |             lateral_features = lateral_conv(features)
24 |             prev_features = lateral_features + top_down_features
25 |             if self._fuse_type == "avg":
26 |                 prev_features /= 2
27 |             results.insert(0, output_conv(prev_features))
28 | 
29 |         if self.top_block is not None:
30 |             top_block_in_feature = bottom_up_features.get(self.top_block.in_feature, None)
31 |             if top_block_in_feature is None:
32 |                 top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
33 |             results.extend(self.top_block(top_block_in_feature))
34 |         assert len(self._out_features) == len(results)
35 |         out = dict(zip(self._out_features, results))
36 |         
37 |         # -- MODIFICATION: Make sure out includes stem features added in output of backbone --
38 |         for key in bottom_up_features.keys():
39 |             if key not in self.in_features and key in self.bottom_up.output_shape():
40 |                 out[key] = bottom_up_features[key]
41 |         
42 |         return out
43 | 


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | from .build import (
 3 |     INTERMEDIATE_AUGMENTOR_REGISTRY, 
 4 |     build_intermediate_augmentations,
 5 |     IntermediateAugmentor
 6 | )
 7 | 
 8 | from .color_augmentations import (
 9 |     GammaAugmentor, GammaCorrector, DefocusBlurAugmentor,
10 |     MotionBlurAugmentor, HueShiftAugmentor, 
11 |     BrightnessAugmentor, NoiseAugmentor)
12 | 
13 | from .jpeg_augmentor import JPEGAugmentor
14 | 
15 | from .perspective_augmentor import PerspectiveAugmentor
16 | from .radial_distortion_augmentor import RadialDistortionAugmentor
17 | from .tps_augmentor import TpsAugmentor
18 | 


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/build.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  3 | TODO: 
  4 | - Support batch operations for the images. Currently the input is (C, H, W).
  5 | """
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | 
 10 | from detectron2.utils.registry import Registry
 11 | from abc import ABCMeta, abstractmethod
 12 | 
 13 | INTERMEDIATE_AUGMENTOR_REGISTRY = Registry("INTERMEDIATE_AUGMENTOR") 
 14 | INTERMEDIATE_AUGMENTOR_REGISTRY.__doc__ = """
 15 | Registry for the differentiable intermediate augmentations after rendering
 16 | """
 17 | 
 18 | 
 19 | def build_intermediate_augmentations(cfg):
 20 |     """
 21 |     Build the intermediate augmentor, defined by ``cfg.INTERMEDIATE_AUGMENTOR``.
 22 |     """
 23 |     augmentations = []
 24 |     for aug_name in cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST:
 25 |         aug = INTERMEDIATE_AUGMENTOR_REGISTRY.get(aug_name)(cfg)
 26 |         aug.to(torch.device(cfg.MODEL.DEVICE))
 27 |         augmentations.append(aug)
 28 |      
 29 |     return augmentations
 30 | 
 31 | 
 32 | class IntermediateAugmentor(nn.Module, metaclass=ABCMeta):
 33 |     """
 34 |     Abstract base class for intermediate augmentors.
 35 |     apply_image transformations needs to be differentiable!
 36 |     """
 37 |     def __init__(
 38 |         self,
 39 |         skip_coords=False,
 40 |     ):
 41 |         super().__init__()
 42 |         self.skip_coords = skip_coords
 43 |     
 44 |     @abstractmethod
 45 |     def apply_image(self, image):
 46 |         """
 47 |         Apply transformation to the images
 48 |         """
 49 |         pass
 50 |     
 51 |     def apply_coords(self, coords):
 52 |         """
 53 |         Apply transformation to the coordinates of the labels
 54 |         """
 55 |         return coords
 56 |     
 57 |     def generate_params(self, image, gt_instances, strength=None):
 58 |         """
 59 |         Generates random numbers required to apply augmentations
 60 |         """
 61 |         return
 62 | 
 63 |     def apply_instances(self, gt_instances):
 64 |         if self.skip_coords or gt_instances is None:
 65 |             return gt_instances
 66 |         
 67 |         with torch.no_grad():
 68 |             if gt_instances.has("gt_sample_locs"):
 69 |                 sample_loc_shape = gt_instances.gt_sample_locs.shape
 70 |                 gt_instances.gt_sample_locs = self.apply_coords(gt_instances.gt_sample_locs.view(-1,2)).view(sample_loc_shape)
 71 |             if gt_instances.has("gt_segm"):
 72 |                 gt_instances.gt_segm = self.apply_coords(gt_instances.gt_segm.view(-1,2)).view(-1,8,2)
 73 |             elif gt_instances.has("gt_masks"):
 74 |                 device, dtype = gt_instances.gt_boxes.device, gt_instances.gt_boxes.tensor.dtype
 75 |                 polygons = torch.as_tensor(gt_instances.gt_masks.polygons, dtype=dtype, device=device)
 76 |                 polygons = self.apply_coords(polygons.view(-1,2)).view(-1,8)
 77 |                 gt_instances.gt_masks.polygons = [[i.cpu().numpy()] for i in polygons]
 78 |             elif gt_instances.has("gt_boxes"):
 79 |                 gt_instances.gt_boxes.tensor = self.apply_coords(gt_instances.gt_boxes.tensor.view(-1,2)).view(-1,4)
 80 |             
 81 |         return gt_instances
 82 |     
 83 |     @classmethod
 84 |     def fix_instances(cls, gt_instances):
 85 |         if gt_instances.has("gt_segm"):
 86 |             min_c, max_c = torch.min(gt_instances.gt_segm, dim=1)[0], torch.max(gt_instances.gt_segm, dim=1)[0]
 87 |             gt_instances.gt_boxes.tensor = torch.cat([min_c, max_c], dim=1)
 88 |             gt_instances.gt_corners = gt_instances.gt_segm[:,[0,2,4,6]]
 89 |         elif gt_instances.has("gt_masks"):
 90 |             device, dtype = gt_instances.gt_boxes.device, gt_instances.gt_boxes.tensor.dtype
 91 |             polygons = torch.as_tensor(gt_instances.gt_masks.polygons, dtype=dtype, device=device).view(-1,4,2)
 92 |             min_c, max_c = torch.min(polygons, dim=1)[0], torch.max(polygons, dim=1)[0]
 93 |             gt_instances.gt_boxes.tensor = torch.cat([min_c, max_c], dim=1)
 94 |         
 95 |         # # Convert segmentation to polygon masks
 96 |         # segm = gt_instances.gt_segm.flatten(start_dim=1)
 97 |         # polygons_per_instance = torch.chunk(segm, segm.shape[0])
 98 |         # polygon_masks = []
 99 |         # for instance in polygons_per_instance:
100 |         #     polygon_masks.append([instance.squeeze().cpu()])
101 |         # gt_instances._fields["gt_masks"] = PolygonMasks(polygon_masks)
102 |         # gt_instances.remove("gt_segm")
103 |         return gt_instances
104 | 
105 |     def forward(self, image, gt_instances):
106 |         # image.shape is (C, H, W)
107 |         self.generate_params(image, gt_instances)
108 |         image = self.apply_image(image)
109 |         if not self.skip_coords:
110 |             gt_instances = self.apply_instances(gt_instances)
111 |         return image, gt_instances
112 | 


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/color_augmentations.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz
  3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved.
  4 | """
  5 | import torch
  6 | from torch import nn
  7 | import torch.nn.functional as F
  8 | import kornia
  9 | import numpy as np
 10 | 
 11 | import detectron2
 12 | from detectron2.config import configurable
 13 | 
 14 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor
 15 | from deepformable.utils import (
 16 |     get_disk_blur_kernel, sample_param, 
 17 |     rgb_to_hls, hls_to_rgb,
 18 | )
 19 | 
 20 | 
 21 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
 22 | class GammaAugmentor(IntermediateAugmentor):
 23 |     @configurable
 24 |     def __init__(
 25 |         self,
 26 |         *,        
 27 |         gamma_range,
 28 |     ):
 29 |         super().__init__(True)
 30 |         self.gamma_range = gamma_range
 31 | 
 32 |     @classmethod
 33 |     def from_config(cls, cfg):
 34 |         return {
 35 |             "gamma_range": cfg.INTERMEDIATE_AUGMENTOR.GammaAugmentor.GAMMA_RANGE,
 36 |         }
 37 | 
 38 |     def apply_image(self, image):
 39 |         return (F.relu(image) + 1e-8) ** self.gamma
 40 | 
 41 |     def generate_params(self, image, gt_instances, strength=None):
 42 |         self.gamma = sample_param(
 43 |             self.gamma_range, strength=strength, 
 44 |             training=self.training, device=image.device)
 45 | 
 46 | 
 47 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
 48 | class GammaCorrector(IntermediateAugmentor):
 49 |     @configurable
 50 |     def __init__(
 51 |         self,
 52 |         gamma,
 53 |     ):
 54 |         super().__init__(True)
 55 |         self.register_buffer("gamma", torch.tensor(1.0/gamma), False)
 56 | 
 57 |     @classmethod
 58 |     def from_config(cls, cfg):
 59 |         return {"gamma": cfg.RENDERER.GAMMA}
 60 | 
 61 |     def apply_image(self, image):
 62 |         return torch.clamp((F.relu(image) + 1e-8) ** self.gamma.to(image.device), 0, 1)
 63 | 
 64 | 
 65 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
 66 | class DefocusBlurAugmentor(IntermediateAugmentor):
 67 |     @configurable
 68 |     def __init__(
 69 |         self,
 70 |         *,
 71 |         blur_radius_range,
 72 |     ):
 73 |         super().__init__(True)
 74 |         self.blur_radius_range = blur_radius_range
 75 | 
 76 |     @classmethod
 77 |     def from_config(cls, cfg):
 78 |         return {
 79 |             "blur_radius_range": cfg.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor.BLUR_RADIUS_RANGE,
 80 |         }
 81 | 
 82 |     def apply_image(self, image):
 83 |         pad = self.kernel.size(-1)//2
 84 |         padded_image = F.pad(image.unsqueeze(0),
 85 |             pad=(pad, pad, pad, pad),
 86 |             mode="reflect")
 87 |         return F.conv2d(
 88 |             padded_image,
 89 |             self.kernel.expand(3,1,self.kernel.shape[-1], self.kernel.shape[-1]),
 90 |             groups=3, padding=0)[0]
 91 | 
 92 |     def generate_params(self, image, gt_instances, strength=None):
 93 |         device = image.device
 94 |         blur_radius = sample_param(
 95 |             self.blur_radius_range, strength=strength, 
 96 |             training=self.training, device=device)
 97 |         self.kernel = get_disk_blur_kernel(blur_radius, device=device)
 98 | 
 99 | 
100 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
101 | class MotionBlurAugmentor(IntermediateAugmentor):
102 |     @configurable
103 |     def __init__(
104 |         self,
105 |         *,
106 |         blur_radius_range,
107 |     ):
108 |         super().__init__(True)
109 |         self.blur_radius_range = blur_radius_range
110 | 
111 |     @classmethod
112 |     def from_config(cls, cfg):
113 |         return {
114 |             "blur_radius_range": cfg.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor.BLUR_RADIUS_RANGE,
115 |         }
116 | 
117 |     def apply_image(self, image):
118 |         return kornia.filters.motion_blur(
119 |             image.unsqueeze(0), self.blur_radius, self.angle, 
120 |             self.direction, border_type='replicate', mode='bilinear')[0]
121 | 
122 |     def generate_params(self, image, gt_instances, strength=None):
123 |         device = image.device
124 |         blur_radius = sample_param(
125 |             self.blur_radius_range, shape=(1,), strength=strength, 
126 |             training=self.training, device=device)
127 |         self.blur_radius = (torch.round(blur_radius).int()*2+1).item()
128 |         self.angle = sample_param(
129 |             (0,180,30), shape=(1,),
130 |             training=self.training, device=device) # Blur at 30° for testing
131 |         self.direction = torch.zeros(1, device=image.device)
132 | 
133 | 
134 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
135 | class HueShiftAugmentor(IntermediateAugmentor):
136 |     @configurable
137 |     def __init__(
138 |         self,
139 |         *,
140 |         hue_shift_range,
141 |     ):
142 |         super().__init__(True)
143 |         self.hue_shift_range = hue_shift_range
144 | 
145 |     @classmethod
146 |     def from_config(cls, cfg):
147 |         return {
148 |             "hue_shift_range": cfg.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor.HUE_SHIFT_RANGE,
149 |         }
150 | 
151 |     def apply_image(self, image):
152 |         image = torch.clamp(image, 0, 1 - 1e-6)
153 |         hsv = rgb_to_hls(image)
154 |         hsv[0, :, :] = torch.fmod(hsv[0, :, :] + self.hue_shift * kornia.constants.pi, 2*kornia.constants.pi)
155 |         return hls_to_rgb(hsv)
156 | 
157 |     def generate_params(self, image, gt_instances, strength=None):
158 |         device = image.device
159 |         self.hue_shift = sample_param(
160 |             self.hue_shift_range, strength=strength, 
161 |             training=self.training, device=device)
162 | 
163 | 
164 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
165 | class BrightnessAugmentor(IntermediateAugmentor):
166 |     @configurable
167 |     def __init__(
168 |         self,
169 |         *,
170 |         brightness_range,
171 |     ):
172 |         super().__init__(True)
173 |         self.brightness_range = brightness_range
174 | 
175 |     @classmethod
176 |     def from_config(cls, cfg):
177 |         return {
178 |             "brightness_range": cfg.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor.BRIGHTNESS_RANGE, 
179 |         }
180 |     def apply_image(self, image):
181 |         return image * self.factor
182 |     
183 |     def generate_params(self, image, gt_instances, strength=None):
184 |         device = image.device
185 |         if strength == None:
186 |             self.factor = sample_param(
187 |                 self.brightness_range, training=self.training, device=device)
188 |         else:
189 |             brightness_range = (self.brightness_range[0], 1.0, 0.4)
190 |             self.factor = sample_param(
191 |                 brightness_range, strength=1.0 - strength,
192 |                 training=self.training, device=device)
193 | 
194 | 
195 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
196 | class NoiseAugmentor(IntermediateAugmentor):
197 |     @configurable
198 |     def __init__(
199 |         self,
200 |         *,       
201 |         noise_range,
202 |     ):
203 |         super().__init__(True)
204 |         self.noise_range=noise_range
205 | 
206 |     @classmethod
207 |     def from_config(cls, cfg):
208 |         return {
209 |             "noise_range": cfg.INTERMEDIATE_AUGMENTOR.NoiseAugmentor.NOISE_RANGE,
210 |         }
211 | 
212 |     def apply_image(self, image):
213 |         return image + self.sigma * torch.randn(image.shape, device=image.device)
214 | 
215 |     def generate_params(self, image, gt_instances, strength=None):
216 |         self.sigma = sample_param(
217 |             self.noise_range, strength=strength, 
218 |             training=self.training, device=image.device)
219 | 


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/jpeg_augmentor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | JPEG compression augmentation, see https://github.com/ando-khachatryan/HiDDeN
  3 | Modified by Andreas Meulueman and Mustafa B. Yaldiz.
  4 | Copyright (c) 2018 ando-khachatryan
  5 | """
  6 | import torch
  7 | from torch import nn
  8 | import torch.nn.functional as F
  9 | import kornia
 10 | import numpy as np
 11 | 
 12 | import detectron2
 13 | from detectron2.config import configurable
 14 | 
 15 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor
 16 | from deepformable.utils import sample_param
 17 | 
 18 | 
 19 | def gen_filters(size_x: int, size_y: int, dct_or_idct_fun: callable) -> np.ndarray:
 20 |     tile_size_x = 8
 21 |     filters = np.zeros((size_x * size_y, size_x, size_y))
 22 |     for k_y in range(size_y):
 23 |         for k_x in range(size_x):
 24 |             for n_y in range(size_y):
 25 |                 for n_x in range(size_x):
 26 |                     filters[k_y * tile_size_x + k_x, n_y, n_x] = dct_or_idct_fun(n_y, k_y, size_y) * dct_or_idct_fun(n_x,
 27 |                                                                                                             k_x,
 28 |                                                                                                             size_x)
 29 |     return filters
 30 | 
 31 | def create_jpeg_masks(min_keep:int=1, max_keep:int=64):
 32 |     index_order = np.array(
 33 |         sorted(((x, y) for x in range(8) for y in range(8)),
 34 |         key=lambda p: (p[0] + p[1], -p[1] if (p[0] + p[1]) % 2 else p[1]))
 35 |     )
 36 |     masks = []
 37 |     for keep_count in range(min_keep, max_keep):
 38 |         mask = np.zeros((8, 8))
 39 |         mask[index_order[:keep_count,0], index_order[:keep_count,1]] = 1
 40 |         masks.append(mask)
 41 |     return np.stack(masks,axis=0)
 42 | 
 43 | def dct_coeff(n, k, N):
 44 |     return np.cos(np.pi / N * (n + 1. / 2.) * k)
 45 | 
 46 | def idct_coeff(n, k, N):
 47 |     return (int(0 == n) * (- 1 / 2) + np.cos(
 48 |         np.pi / N * (k + 1. / 2.) * n)) * np.sqrt(1 / (2. * N))
 49 | 
 50 | 
 51 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
 52 | class JPEGAugmentor(IntermediateAugmentor):
 53 |     @configurable
 54 |     def __init__(
 55 |         self,
 56 |         *,
 57 |         y_range,
 58 |         uv_range,
 59 |         max_image_size,
 60 |     ):
 61 |         super().__init__(True)
 62 |         self.y_range = y_range
 63 |         self.uv_range = uv_range
 64 |         self.register_buffer("dct_conv_weights",
 65 |             torch.tensor(gen_filters(8, 8, dct_coeff), dtype=torch.float32).unsqueeze(1), False)
 66 |         self.register_buffer("idct_conv_weights",
 67 |             torch.tensor(gen_filters(8, 8, idct_coeff), dtype=torch.float32).view(64,64,1,1), False)
 68 |         self.register_buffer("jpeg_masks",
 69 |             torch.tensor(create_jpeg_masks(), dtype=torch.float32), False)
 70 | 
 71 |     @property
 72 |     def device(self):
 73 |         return self.dct_conv_weights.device
 74 | 
 75 |     @classmethod
 76 |     def from_config(cls, cfg):
 77 |         return {
 78 |             "y_range": cfg.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.Y_QUALITY_RANGE,
 79 |             "uv_range": cfg.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.UV_QUALITY_RANGE,
 80 |             "max_image_size": cfg.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE,
 81 |         }
 82 | 
 83 |     def apply_image(self, image):
 84 |         image = image.unsqueeze(0)
 85 |         N, C, H, W = image.shape
 86 | 
 87 |         mask = self.jpeg_masks[None, self.mask_keep_weights].view(1,C,8,8,1,1)
 88 | 
 89 |         # Convert to YUV
 90 |         if C == 3:
 91 |             image = kornia.color.rgb_to_yuv(image)
 92 |         # Pad image
 93 |         image_padded = F.pad(image, (0, (8 - W) % 8, 0, (8 - H) % 8), 'replicate')
 94 |         H_pad, W_pad = image_padded.shape[-2:]
 95 |         
 96 |         # Apply dct transform
 97 |         image_dct = F.conv2d(
 98 |             image_padded, self.dct_conv_weights.repeat(C,1,1,1), 
 99 |             stride=8, groups=C)
100 |         image_dct = image_dct.view(N,C,8,8,*image_dct.shape[-2:])
101 |         # Mask in dct domain
102 |         image_dct_masked = image_dct * mask
103 |         # Convert back to idct
104 |         image_idct = F.conv2d(
105 |             image_dct_masked.view(N, C*64, *image_dct.shape[-2:]),
106 |             self.idct_conv_weights.repeat(C,1,1,1), groups=C)
107 |         image_idct = image_idct.view(N,3,8,8,*image_dct.shape[-2:])\
108 |                         .permute(0,1,4,2,5,3).contiguous().view(-1,C,H_pad,W_pad)
109 | 
110 |         # Convert back to RGB
111 |         if C == 3:
112 |             image_idct = kornia.color.yuv_to_rgb(image_idct)
113 |         
114 |         return torch.clamp(image_idct[0,:,:H,:W], 0, 1)
115 |     
116 |     def generate_params(self, image, gt_instances, strength=None):
117 |         if image.device != self.device:
118 |             self.to(image.device)
119 |         y_weight = sample_param(
120 |             self.y_range, strength=strength, 
121 |             training=self.training, device=self.device)
122 |         if image.shape[0] == 3:
123 |             u_weight, v_weight = sample_param(
124 |                 self.uv_range, shape=(2,), strength=strength, 
125 |                 training=self.training, device=self.device)
126 |             self.mask_keep_weights = (int(y_weight), int(u_weight.item()), int(v_weight.item()))
127 |         else:
128 |             self.mask_keep_weights = (int(y_weight),) * image.shape[0]


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/perspective_augmentor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz
 3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved.
 4 | """
 5 | import itertools
 6 | 
 7 | import torch
 8 | from torch import nn
 9 | 
10 | import kornia
11 | 
12 | from detectron2.config import configurable
13 | 
14 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor
15 | from deepformable.utils import sample_param
16 | 
17 | 
18 | def create_perspective_sampling_grid(image_size, target_corners, device):
19 |     target_corners = torch.tensor([
20 |         [target_corners[0, 0], target_corners[0, 1]],
21 |         [target_corners[1, 0], 1 - target_corners[1, 1]],
22 |         [1 - target_corners[2, 0], target_corners[2, 1]],
23 |         [1 - target_corners[3, 0], 1 - target_corners[3, 1]]], device=device)
24 |     
25 |     target_corners[:, 0] = target_corners[:, 0] * (image_size[1])
26 |     target_corners[:, 1] = target_corners[:, 1] * (image_size[0])
27 | 
28 |     source_corners = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], device=device).float()
29 |     source_corners[:, 0] = source_corners[:, 0] * (image_size[1])
30 |     source_corners[:, 1] = source_corners[:, 1] * (image_size[0])
31 | 
32 |     homography = kornia.geometry.find_homography_dlt(source_corners.unsqueeze(0), 
33 |         target_corners.unsqueeze(0), 
34 |         torch.ones(1, 4, device=device))
35 | 
36 |     x, y = torch.meshgrid([torch.arange(0, image_size[0], device=device), 
37 |         torch.arange(0, image_size[1], device=device)])
38 | 
39 |     x = x.float() + 0.5
40 |     y = y.float() + 0.5
41 |     coord = torch.cat((y.unsqueeze(-1), x.unsqueeze(-1)), 2).view(-1, 2).float()
42 | 
43 |     grid = kornia.geometry.linalg.transform_points(
44 |         torch.inverse(homography), coord).view(image_size[0], image_size[1], 2)
45 |     grid = (grid / torch.tensor([image_size[1], image_size[0]], device=device).view(1, 1, 2)) * 2 - 1
46 |     
47 |     return grid, homography
48 | 
49 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
50 | class PerspectiveAugmentor(IntermediateAugmentor):
51 |     @configurable
52 |     def __init__(
53 |         self,
54 |         *,
55 |         corner_shift_range,
56 |     ):
57 |         super().__init__(False)
58 |         self.corner_shift_range=corner_shift_range
59 | 
60 |     @classmethod
61 |     def from_config(cls, cfg):
62 |         return {
63 |             "corner_shift_range": cfg.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor.CORNER_SHIFT_RANGE,
64 |         }
65 | 
66 |     def apply_image(self, image):
67 |         return torch.nn.functional.grid_sample(image.unsqueeze(0), self.grid.unsqueeze(0), align_corners=False)[0]
68 |     
69 |     def apply_coords(self, coords):
70 |         return kornia.geometry.linalg.transform_points(self.homography, coords.unsqueeze(0))[0]
71 | 
72 |     def generate_params(self, image, gt_instances, strength=None):
73 |         device = image.device
74 |         if self.training:
75 |             target_corners = sample_param(
76 |                 self.corner_shift_range, shape=(4,2),
77 |                 strength=strength, device=device)
78 |         else:
79 |             target_corners = torch.zeros((4, 2), device=device)
80 |             target_corners[[0,1],:] = sample_param(
81 |                 self.corner_shift_range, strength=strength, 
82 |                 training=False, device=device)
83 |         self.grid, self.homography = create_perspective_sampling_grid(
84 |             image.shape[-2:], target_corners, device=device)


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/radial_distortion_augmentor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz
  3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved.
  4 | """
  5 | import itertools
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | from detectron2.config import configurable
 11 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor
 12 | from deepformable.utils import sample_param
 13 | 
 14 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
 15 | class RadialDistortionAugmentor(IntermediateAugmentor):
 16 |     @configurable
 17 |     def __init__(
 18 |         self,
 19 |         *,
 20 |         undistort_iter,
 21 |         focal_length_range,
 22 |         center_shift_range,
 23 |         distortion_range
 24 |     ):
 25 |         super().__init__(False)
 26 |         self.undistort_iter=undistort_iter
 27 |         self.focal_length_range = focal_length_range
 28 |         self.center_shift_range = center_shift_range
 29 |         self.distortion_range = distortion_range
 30 | 
 31 |     @classmethod
 32 |     def from_config(cls, cfg):
 33 |         return {
 34 |             "undistort_iter": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.UNDISTORT_ITER,
 35 |             "focal_length_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.FOCAL_LENGTH_RANGE,
 36 |             "center_shift_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.CENTER_SHIFT_RANGE,
 37 |             "distortion_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.DISTORTION_RANGE,
 38 |         }
 39 | 
 40 |     def apply_image(self, image):
 41 |         return F.grid_sample(image.unsqueeze(0), self.grid.unsqueeze(0), align_corners=False)[0]
 42 |     
 43 |     def distort(self, coord):
 44 |         xy = (coord.float() - self.center.view(1, 2)) / self.focal_length.view(1, 2)
 45 | 
 46 |         r2 = xy[:, 0]**2 + xy[:, 1]**2
 47 | 
 48 |         r2_distorted = 1
 49 |         for i in range(self.k.shape[0]):
 50 |             r2_distorted = r2_distorted + self.k[i] * r2**(i+1)
 51 | 
 52 |         xy_distorted = xy * r2_distorted.unsqueeze(-1)
 53 | 
 54 |         return xy_distorted * self.focal_length.view(1, 2) + self.center.view(1, 2)
 55 | 
 56 |     def undistort(self, coord):
 57 |         xy = (coord.float() - self.center.view(1, 2)) / self.focal_length.view(1, 2)
 58 |         xy0 = xy.clone()
 59 | 
 60 |         for iteration in range(self.undistort_iter):
 61 |             r2 = (xy[:, 0])**2 + (xy[:, 1])**2
 62 | 
 63 |             # This works up to the third order 
 64 |             #r2_undistorted = (1+((self.k[5]*r2 + self.k[4])*r2 + self.k[3])*r2)/(1 + ((self.k[2]*r2 + self.k[1])*r2 + self.k[0])*r2)
 65 |             r2_undistorted = (1 + ((self.k[2]*r2 + self.k[1])*r2 + self.k[0])*r2)
 66 | 
 67 |             xy = xy0 / r2_undistorted.unsqueeze(-1)
 68 |             x = xy[:, 0]
 69 |             x[r2_undistorted < 0] = -1
 70 |             y = xy[:, 1]
 71 |             y[r2_undistorted < 0] = -1
 72 | 
 73 |         return xy * self.focal_length.view(1, 2) + self.center.view(1, 2)
 74 | 
 75 |     def apply_coords(self, coords):
 76 |         return self.distort(coords)
 77 | 
 78 |     def generate_params(self, image, gt_instances, strength=None):
 79 |         image_size, device = image.shape[-2:], image.device
 80 |         image_size_xy = torch.tensor([image_size[1], image_size[0]], device=device)
 81 | 
 82 |         self.focal_length = sample_param(
 83 |             self.focal_length_range, shape=(2,), 
 84 |             strength = None if strength == None else 1.0-strength,
 85 |             training=self.training, device=device) * torch.max(image_size_xy)
 86 |         center_shift = sample_param(
 87 |             self.center_shift_range, shape=(2,), strength=strength,
 88 |             training=self.training, device=device)
 89 |         if self.training:
 90 |             center_shift *= 1 if torch.randn(1) > 0.5 else -1
 91 |         self.center = (0.5 + center_shift) * image_size_xy
 92 |         self.k = -sample_param(
 93 |             self.distortion_range, shape=(3,), strength=strength,
 94 |             training=self.training, device=device)
 95 | 
 96 |         x, y = torch.meshgrid([torch.arange(0, image_size[0], device=device), 
 97 |             torch.arange(0, image_size[1], device=device)])
 98 |         x = x.float() + 0.5
 99 |         y = y.float() + 0.5
100 |         coord = torch.cat((y.unsqueeze(-1), x.unsqueeze(-1)), 2).view(-1, 2)
101 | 
102 |         grid = self.undistort(coord).view(image_size[0], image_size[1], 2)
103 |         self.grid = (grid / image_size_xy.view(1, 1, 2)) * 2 - 1
104 | 


--------------------------------------------------------------------------------
/deepformable/modeling/intermediate_augmentor/tps_augmentor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz
  3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved.
  4 | """
  5 | import itertools
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from detectron2.config import configurable
 12 | 
 13 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor
 14 | from deepformable.utils import sample_param
 15 | 
 16 | # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
 17 | def compute_partial_repr(input_points, control_points):
 18 |     N = input_points.size(0)
 19 |     M = control_points.size(0)
 20 |     pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
 21 |     # original implementation, very slow
 22 |     # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
 23 |     pairwise_diff_square = pairwise_diff * pairwise_diff
 24 |     del pairwise_diff
 25 |     pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
 26 |     del pairwise_diff_square
 27 | 
 28 |     repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
 29 |     del pairwise_dist
 30 |     # fix numerical error for 0 * log(0), substitute all nan with 0
 31 |     # mask = repr_matrix != repr_matrix
 32 |     repr_matrix.masked_fill_(repr_matrix != repr_matrix, 0)
 33 | 
 34 |     return repr_matrix
 35 | 
 36 | class TPSGridGen(nn.Module):
 37 |     def __init__(self, target_height, target_width, target_control_points):
 38 |         super(TPSGridGen, self).__init__()
 39 |         assert target_control_points.ndimension() == 2
 40 |         assert target_control_points.size(1) == 2
 41 |         N = target_control_points.size(0)
 42 |         self.num_points = N
 43 |         self.height = target_height
 44 |         self.width = target_width
 45 | 
 46 |         # create padded kernel matrix
 47 |         forward_kernel = torch.zeros(N + 3, N + 3)
 48 |         target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points)
 49 |         forward_kernel[:N, :N].copy_(target_control_partial_repr)
 50 |         forward_kernel[:N, -3].fill_(1)
 51 |         forward_kernel[-3, :N].fill_(1)
 52 |         forward_kernel[:N, -2:].copy_(target_control_points)
 53 |         forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
 54 |         # compute inverse matrix
 55 |         inverse_kernel = torch.inverse(forward_kernel)
 56 | 
 57 |         # create target coordinate matrix
 58 |         HW = target_height * target_width
 59 |         y, x = torch.meshgrid([torch.arange(0, target_height), 
 60 |             torch.arange(0, target_width)])
 61 |         x = x.reshape(HW, 1).float() + 0.5
 62 |         y = y.reshape(HW, 1).float() + 0.5
 63 |         y = y * 2 / (target_height) - 1
 64 |         x = x * 2 / (target_width) - 1
 65 |         target_coordinate = torch.cat([x, y], dim = 1) # convert from (y, x) to (x, y)
 66 |         # print(x.shape)
 67 |         del x
 68 |         del y
 69 |         target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points)
 70 |         target_coordinate_repr = torch.cat([
 71 |             target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
 72 |         ], dim = 1)
 73 | 
 74 |         padding_matrix = torch.zeros(3, 2)
 75 | 
 76 |         self.inverse_kernel = inverse_kernel
 77 |         self.padding_matrix = padding_matrix
 78 |         self.target_coordinate_repr = target_coordinate_repr
 79 | 
 80 |     @property
 81 |     def device(self):
 82 |         return self.inverse_kernel.device
 83 | 
 84 |     def _apply(self, fn):
 85 |         super(TPSGridGen, self)._apply(fn)
 86 |         self.inverse_kernel = fn(self.inverse_kernel)
 87 |         self.padding_matrix = fn(self.padding_matrix)
 88 |         self.target_coordinate_repr = fn(self.target_coordinate_repr)
 89 |         return self
 90 | 
 91 |     def forward(self, source_control_points):
 92 |         assert source_control_points.ndimension() == 3
 93 |         assert source_control_points.size(1) == self.num_points
 94 |         assert source_control_points.size(2) == 2
 95 |         batch_size = source_control_points.size(0)
 96 | 
 97 |         Y = torch.cat([source_control_points, (self.padding_matrix.expand(batch_size, 3, 2))], 1)
 98 |         mapping_matrix = torch.matmul((self.inverse_kernel), Y)
 99 |         source_coordinate = torch.matmul((self.target_coordinate_repr), mapping_matrix)
100 |         return source_coordinate
101 | 
102 | 
103 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register()
104 | class TpsAugmentor(IntermediateAugmentor):
105 |     """
106 |     Transformation with thin plate spline
107 |     """
108 |     @configurable
109 |     def __init__(
110 |         self,
111 |         *,
112 |         ctrl_pts_size,
113 |         max_image_size,
114 |         warp_range,
115 |         stop_threshold,
116 |         max_iter,
117 |     ):
118 |         super().__init__(False)
119 | 
120 |         target_control_points = torch.Tensor(list(itertools.product(
121 |             torch.arange(-1.0, 1.000001, 2.0 / ctrl_pts_size[0]),
122 |             torch.arange(-1.0, 1.000001, 2.0 / ctrl_pts_size[1]),
123 |         ))).float()
124 | 
125 |         self.warp_range = warp_range
126 |         self.stop_threshold = stop_threshold
127 |         self.max_iter = max_iter
128 |         self.max_image_size = max_image_size
129 |         self.max_image_size_xy = torch.tensor([max_image_size[1], max_image_size[0]])
130 | 
131 |         self.tps_grid_generator = TPSGridGen(*max_image_size, target_control_points)
132 |         self.target_control_points = target_control_points
133 |     
134 |     def _apply(self, fn):
135 |         super(TpsAugmentor, self)._apply(fn)
136 |         self.tps_grid_generator = self.tps_grid_generator._apply(fn)
137 |         self.target_control_points = fn(self.target_control_points)
138 |         self.max_image_size_xy = fn(self.max_image_size_xy)
139 |         return self
140 |     
141 |     @classmethod
142 |     def from_config(cls, cfg):
143 |         return {
144 |             "ctrl_pts_size": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.CTRL_PTS_SIZE,
145 |             "max_image_size": cfg.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE,
146 |             "warp_range": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.WARP_RANGE,
147 |             "stop_threshold": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.STOP_THRESHOLD,
148 |             "max_iter": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.MAX_ITER,
149 |         }
150 | 
151 |     def apply_image(self, image):
152 |         return F.grid_sample(image.unsqueeze(0), self.grid, align_corners=False)[0]
153 |     
154 |     def apply_coords(self, coords):
155 |         device = coords.device
156 |         coords = coords * (2.0 / self.max_image_size_xy) - 1.0
157 | 
158 |         warped_coords0 = coords.clone()
159 |         converged, i = False, 0
160 |         while not converged:
161 |             coords_partial_repr = compute_partial_repr(warped_coords0, self.target_control_points)
162 |             coords_repr = torch.cat([
163 |                 coords_partial_repr, torch.ones(coords_partial_repr.shape[0], 1, device=device), warped_coords0
164 |             ], dim = 1)
165 | 
166 |             Y = torch.cat([self.source_control_points.unsqueeze(0), (self.tps_grid_generator.padding_matrix.expand(1, 3, 2))], 1)
167 |             mapping_matrix = torch.matmul((self.tps_grid_generator.inverse_kernel), Y)
168 |             warped_coords1 = torch.matmul((coords_repr), mapping_matrix)[0]
169 |             coord_dev = warped_coords1 - coords
170 |             warped_coords0 = warped_coords0 - coord_dev
171 |             i+=1
172 |             converged = i > self.max_iter or torch.max(torch.abs(coord_dev)) * max(self.max_image_size) < self.stop_threshold
173 | 
174 |         if i > self.max_iter:
175 |             print("Failed to converge. l_inf norm is: ", (torch.max(torch.abs(coord_dev)) * max(self.max_image_size)).item())
176 | 
177 |         return (warped_coords0 + 1.0) * (self.max_image_size_xy / 2.0)
178 | 
179 |     def generate_params(self, image, gt_instances, strength=None):
180 |         image_size, device = image.shape[-2:], image.device
181 |         self.image_size_xy = torch.tensor(
182 |             [image_size[1], image_size[0]], device=device)
183 |         if device != self.tps_grid_generator.device:
184 |             print(device, "is not equal to", self.tps_grid_generator.device)
185 |             self.to(device)
186 | 
187 |         ctrl_pts_displacement = sample_param(
188 |             self.warp_range, strength=strength,
189 |             training=self.training, device=device)
190 | 
191 |         source_control_points = self.target_control_points \
192 |             + (torch.rand(self.target_control_points.size(), device=device) * 2 - 1) * ctrl_pts_displacement
193 |         source_control_points[self.target_control_points <= -1 + ctrl_pts_displacement] = -1 - ctrl_pts_displacement
194 |         source_control_points[self.target_control_points >= 1 - ctrl_pts_displacement] = 1 + ctrl_pts_displacement
195 |         self.source_control_points = source_control_points
196 | 
197 |         source_coordinate = self.tps_grid_generator(torch.unsqueeze(source_control_points, 0))
198 |         grid_cropped = source_coordinate.view(1, *self.max_image_size, 2)[:,:image_size[0], :image_size[1]]
199 |         # Renormalize the grid
200 |         self.grid = ((grid_cropped + 1.0) * (self.max_image_size_xy / (self.image_size_xy * 2.0))) * 2.0 - 1.0
201 | 


--------------------------------------------------------------------------------
/deepformable/modeling/marker_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .build import MARKER_GENERATOR_REGISTRY, build_marker_generator
3 | from .generalized_generator import GeneralizedGenerator, KDTreeClassPredictor
4 | from .aruco_generator import ArucoGenerator
5 | # You need to install AprilTag, described in Dockerfile
6 | # from .april_generator import AprilGenerator
7 | 


--------------------------------------------------------------------------------
/deepformable/modeling/marker_generator/april_generator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import logging
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import aruco
 6 | from apriltag import apriltag
 7 | 
 8 | from detectron2.config import configurable
 9 | 
10 | from .build import MARKER_GENERATOR_REGISTRY
11 | from deepformable.utils import get_aruco_dict
12 | from .aruco_generator import ArucoGenerator
13 | 
14 | 
15 | @MARKER_GENERATOR_REGISTRY.register()
16 | class AprilGenerator(ArucoGenerator):
17 |     @configurable
18 |     def __init__(
19 |         self,
20 |         *,
21 |         april_dict,
22 |         border_bits,
23 |         num_classes,
24 |         shuffling,
25 |         vis_period=0
26 |     ):
27 |         super().__init__(
28 |             aruco_dict=april_dict, 
29 |             border_bits=border_bits, 
30 |             num_classes=num_classes, 
31 |             shuffling=shuffling, 
32 |             vis_period=vis_period)
33 |         self.detector = apriltag("tag36h11")
34 |         
35 | 
36 |     @classmethod
37 |     def from_config(cls, cfg):
38 |         april_dict = get_aruco_dict(cfg.MODEL.MARKER_GENERATOR.ARUCO_DICT, default=aruco.DICT_APRILTAG_36h11)
39 |         shuffling = cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON
40 |         return {
41 |             "april_dict": april_dict,
42 |             "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS,
43 |             "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
44 |             "shuffling": shuffling,
45 |             "vis_period": cfg.VIS_PERIOD,
46 |         }
47 | 
48 |     def recognize(self, img):
49 |         if len(img.shape) == 3:
50 |             img_ = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
51 |         elif len(img.shape) == 2:
52 |             img_ = img
53 | 
54 |         ### opencv
55 |         # marker_corners, ids, _ = cv2.aruco.detectMarkers(img, cv2.aruco.getPredefinedDictionary(
56 |         #     cv2.aruco.DICT_APRILTAG_36H11))
57 |         ###
58 | 
59 |         ### AprilRobotics
60 |         detections = self.detector.detect(img_)
61 |         marker_corners = []
62 |         ids = []
63 |         for i in detections:
64 |             corners = np.array(i['lb-rb-rt-lt'])
65 |             corners_ = corners.copy()
66 | 
67 |             corners_[0] = corners[1]
68 |             corners_[1] = corners[0]
69 |             corners_[2] = corners[3]
70 |             corners_[3] = corners[2]
71 | 
72 |             marker_corners.append(np.array([corners_], dtype=np.float32))
73 |             ids.append(i["id"])
74 |         ids = np.array(ids)
75 |         ###
76 | 
77 |         marker_corners = np.array(marker_corners).reshape(-1,4,2)
78 |         return marker_corners, ids


--------------------------------------------------------------------------------
/deepformable/modeling/marker_generator/aruco_generator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import logging
  3 | import numpy as np
  4 | import cv2
  5 | from cv2 import aruco
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from detectron2.config import configurable
 12 | from detectron2.utils.comm import all_gather, is_main_process
 13 | from detectron2.utils.events import get_event_storage
 14 | 
 15 | from .build import MARKER_GENERATOR_REGISTRY, MarkerGenerator
 16 | from deepformable.utils import get_aruco_dict
 17 | 
 18 | 
 19 | @MARKER_GENERATOR_REGISTRY.register()
 20 | class FixedGenerator(MarkerGenerator):
 21 |     @configurable
 22 |     def __init__(
 23 |         self,
 24 |         *,
 25 |         marker_size,
 26 |         border_bits,
 27 |         num_classes,
 28 |         init_method="uniform",
 29 |         init_std=1.4,
 30 |         out_channels=3,
 31 |         vis_period=0,
 32 |     ):
 33 |         super().__init__(num_classes, 1, vis_period)
 34 |         self.marker_size = marker_size
 35 |         self.border_bits = border_bits
 36 |         generation_size = self.marker_size - self.border_bits * 2
 37 |         self.markers = nn.Parameter(
 38 |             torch.empty(
 39 |                 self.num_classes, out_channels, generation_size, generation_size, 
 40 |                 requires_grad=True))
 41 |         if init_method == "uniform":
 42 |             nn.init.uniform(self.markers, -init_std, init_std)
 43 |         elif init_method == "kaiming":
 44 |             nn.init.kaiming_normal_(self.markers, mode="fan_out", nonlinearity="sigmoid")
 45 |         else:
 46 |             nn.init.normal_(self.markers, std=init_std)
 47 | 
 48 |         nn.init.normal_(self.markers, std=init_std)
 49 |         self.activation = nn.Sigmoid()
 50 | 
 51 |     @classmethod
 52 |     def from_config(cls, cfg):
 53 |         return {
 54 |             "marker_size": cfg.MODEL.MARKER_GENERATOR.MARKER_SIZE[0],
 55 |             "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS,
 56 |             "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
 57 |             "init_method": cfg.MODEL.MARKER_GENERATOR.INIT_METHOD.lower(),
 58 |             "init_std": cfg.MODEL.MARKER_GENERATOR.INIT_STD,
 59 |             "vis_period": cfg.VIS_PERIOD,
 60 |         }
 61 | 
 62 |     def message_generator(self):
 63 |         return None
 64 | 
 65 |     def batch_marker_generator(self, gt_classes_batch):
 66 |         markers = self.activation(self.markers)
 67 |         markers = F.pad(markers, [self.border_bits]*4)    # Add padding
 68 | 
 69 |         markers_batch, messages_batch = [], []
 70 |         for classes in gt_classes_batch:
 71 |             markers_batch.append(markers[classes])
 72 |             messages_batch.append(torch.zeros(len(classes)))
 73 |         
 74 |         return markers_batch, messages_batch, {}
 75 | 
 76 | 
 77 | @MARKER_GENERATOR_REGISTRY.register()
 78 | class ArucoGenerator(MarkerGenerator):
 79 |     @configurable
 80 |     def __init__(
 81 |         self,
 82 |         *,
 83 |         aruco_dict,
 84 |         border_bits,
 85 |         num_classes,
 86 |         shuffling,
 87 |         vis_period=0
 88 |     ):
 89 |         super().__init__(num_classes, aruco_dict.markerSize * aruco_dict.markerSize, vis_period)
 90 |         marker_size = aruco_dict.markerSize + 2 * border_bits
 91 |         self.aruco_dict = aruco_dict
 92 |         self.border_bits = border_bits
 93 |         self.num_markers = len(aruco_dict.bytesList)
 94 |         self.marker_size = marker_size
 95 |         self.shuffling = shuffling
 96 |         self.detect_params = aruco.DetectorParameters_create()
 97 |         self.subpix_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.00001)
 98 |         
 99 |         markers_binary = []
100 |         for i in range(self.num_markers):
101 |             marker = aruco_dict.drawMarker(i, marker_size, borderBits=1)
102 |             markers_binary.append(marker[1:-1, 1:-1].reshape(-1))
103 |             
104 |         # self.register_buffer("markers", torch.tensor(
105 |         #     markers, dtype=torch.float32).view(num_classes, 3, marker_size, marker_size)/255.0)
106 |         self.register_buffer("markers_binary", torch.tensor(markers_binary, dtype=torch.float32)/255.0, False)
107 |         self.register_buffer("messages", self.markers_binary[:num_classes], False)
108 | 
109 |     @classmethod
110 |     def from_config(cls, cfg):
111 |         aruco_dict = get_aruco_dict(cfg.MODEL.MARKER_GENERATOR.ARUCO_DICT)
112 |         shuffling = cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON
113 |         return {
114 |             "aruco_dict": aruco_dict,
115 |             "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS,
116 |             "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
117 |             "shuffling": shuffling,
118 |             "vis_period": cfg.VIS_PERIOD,
119 |         }
120 | 
121 |     def visualize_training(self):
122 |         if self.training and self.vis_period != 0:
123 |             storage = get_event_storage()
124 |             if storage.iter % self.vis_period == 0:
125 |                 marker = np.uint8(self.get_markers_numpy(0) * 255)
126 |                 marker = marker.transpose(2, 0, 1)
127 |                 storage.put_image("Generated Marker", marker)
128 |     
129 |     def message_generator(self):
130 |         if self.shuffling and self.training:
131 |             message_indexes = None
132 |             if is_main_process():
133 |                 message_indexes = torch.randperm(self.num_markers)[:self.num_classes]
134 |             message_indexes = all_gather(message_indexes)[0].to(self.device)
135 |             return self.markers_binary[message_indexes]
136 |         return self.markers_binary[:self.num_classes]
137 | 
138 |     def batch_marker_generator(self, gt_classes_batch):
139 |         markers_batch, messages_batch = [], []
140 |         for classes in gt_classes_batch:
141 |             messages = self.messages[classes]
142 |             messages_batch.append(messages)
143 |             
144 |             markers = torch.repeat_interleave(
145 |                 messages.view(-1, 1, self.aruco_dict.markerSize, self.aruco_dict.markerSize), 3, dim=1)
146 |             markers = F.pad(markers, [self.border_bits]*4)
147 |             markers_batch.append(markers)
148 |         
149 |         return markers_batch, messages_batch, {}
150 |     
151 |     def recognize(self, img):
152 |         marker_corners, ids, _ = cv2.aruco.detectMarkers(
153 |             img, self.aruco_dict, parameters=self.detect_params)
154 |         if len(img.shape) == 3:
155 |             gray_undistorted = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
156 |         else:
157 |             gray_undistorted = img
158 |         for corners in marker_corners:
159 |             cv2.cornerSubPix(gray_undistorted, corners,
160 |                                 winSize=(3, 3),
161 |                                 zeroZone=(-1, -1),
162 |                                 criteria=self.subpix_criteria)
163 | 
164 |         marker_corners = np.array(marker_corners).reshape(-1,4,2)+0.5
165 |         ids = ids.reshape(-1) if ids is not None else np.ones(len(marker_corners))
166 |         return marker_corners, ids


--------------------------------------------------------------------------------
/deepformable/modeling/marker_generator/build.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import torch
  3 | from torch import nn
  4 | import numpy as np
  5 | 
  6 | from detectron2.utils.registry import Registry
  7 | from detectron2.utils.events import get_event_storage
  8 | from abc import ABCMeta, abstractmethod
  9 | 
 10 | MARKER_GENERATOR_REGISTRY = Registry("MARKER_GENERATOR")  # noqa F401 isort:skip
 11 | MARKER_GENERATOR_REGISTRY.__doc__ = """
 12 | Registry for the marker generator architecture
 13 | """
 14 | 
 15 | 
 16 | def build_marker_generator(cfg):
 17 |     """
 18 |     Build the marker generator pipeline, defined by ``cfg.MODEL.MARKER_GENERATOR``.
 19 |     """
 20 |     marker_generator = cfg.MODEL.MARKER_GENERATOR.NAME
 21 |     generator = MARKER_GENERATOR_REGISTRY.get(marker_generator)(cfg)
 22 |     generator.to(torch.device(cfg.MODEL.DEVICE))
 23 |     return generator
 24 | 
 25 | 
 26 | class MarkerGenerator(nn.Module, metaclass=ABCMeta):
 27 |     def __init__(self, num_classes, num_bits, vis_period=0):
 28 |         super().__init__()
 29 |         self.num_classes = num_classes
 30 |         self.num_bits = num_bits
 31 |         self.vis_period = vis_period
 32 |         self.register_buffer("gamma", torch.tensor((1/2.2), dtype=torch.float32), False)
 33 |         # self.register_buffer("messages", torch.zeros(num_classes, num_bits, dtype=torch.float32), False)
 34 | 
 35 |     @property
 36 |     def device(self):
 37 |         return self.gamma.device
 38 | 
 39 |     @torch.no_grad()
 40 |     def get_markers_numpy(self, classes):
 41 |         if isinstance(classes, int):
 42 |             classes = [classes]
 43 |         classes = torch.tensor(classes, device=self.device)
 44 |         markers_batch, _, _ = self.batch_marker_generator([classes])
 45 |         markers = markers_batch[0] ** self.gamma
 46 |         markers = markers.permute(0,2,3,1)[...,[2,1,0]].cpu().numpy()
 47 |         if len(markers) == 1:
 48 |             return markers[0]
 49 |         return markers
 50 | 
 51 |     @abstractmethod
 52 |     def message_generator(self):
 53 |         """
 54 |         This method should save the internal variables required for batch_marker_generator
 55 |         """
 56 |         pass
 57 | 
 58 |     @abstractmethod
 59 |     def batch_marker_generator(self, gt_classes_batch):
 60 |         """
 61 |         This method returns markers and messages in list form
 62 |         """
 63 |         pass
 64 | 
 65 |     def postprocessing(self, pred_instances):
 66 |         return pred_instances
 67 | 
 68 |     def visualize_training(self):
 69 |         if self.training and self.vis_period != 0:
 70 |             storage = get_event_storage()
 71 |             if storage.iter % self.vis_period == 0:
 72 |                 if self.messages is not None:
 73 |                     messages = torch.zeros(2, self.num_bits, dtype=torch.float32)
 74 |                     messages[0, range(0,self.num_bits,2)] = 1.0
 75 |                     messages[1, range(1,self.num_bits,2)] = 1.0
 76 |                     old_messages = self.messages[:2].clone()
 77 |                     self.messages[:2] = messages
 78 |                     markers = np.uint8(self.get_markers_numpy([0, 1]) * 255)
 79 |                     self.messages[:2] = old_messages
 80 |                     marker1, marker2 = markers[0].transpose(2,0,1), markers[1].transpose(2,0,1)
 81 |                 else:
 82 |                     # TODO: What is this? :)
 83 |                     markers = np.uint8(self.get_markers_numpy([0, 1]) * 255)
 84 |                     marker1, marker2 = markers[0].transpose(2,0,1), markers[1].transpose(2,0,1)
 85 |                 storage.put_image("Marker (101010..)", marker1)
 86 |                 storage.put_image("Marker (010101..)", marker2)
 87 | 
 88 |     def forward(self, batch_instances):
 89 |         with torch.no_grad():
 90 |             self.visualize_training()
 91 |             self.messages = self.message_generator()
 92 |             if self.messages is not None:
 93 |                 self.messages = self.messages.to(self.device)
 94 |         
 95 |         gt_classes_batch = [i.gt_classes for i in batch_instances]
 96 |         markers_batch, messages_batch, loss = self.batch_marker_generator(gt_classes_batch)
 97 |         
 98 |         for messages, instances in zip(messages_batch, batch_instances):
 99 |             instances.gt_message = messages
100 | 
101 |         return markers_batch, loss


--------------------------------------------------------------------------------
/deepformable/modeling/marker_roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .marker_roi_heads import MarkerROIHeads
3 | from .naive_transform_head import NaiveTransformHead, ROI_TRANSFORM_HEAD_REGISTRY
4 | from .corner_head import ROI_CORNER_HEAD_REGISTRY, CornerHead, CornerHeadV2
5 | from .decoder_head import DecoderHead, ROI_DECODER_HEAD_REGISTRY
6 | from .transformer_head import SpatialTransformerHead, SpatialTransformerHeadV2
7 | 


--------------------------------------------------------------------------------
/deepformable/modeling/marker_roi_heads/decoder_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import numpy as np
  3 | from typing import List
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | 
  9 | from detectron2.config import configurable
 10 | from detectron2.layers import Conv2d, Linear, get_norm, ShapeSpec, cat, nonzero_tuple
 11 | from detectron2.utils.registry import Registry
 12 | from detectron2.structures import Instances
 13 | 
 14 | from deepformable.layers import AdaptiveLoss
 15 | 
 16 | __all__ = ["DecoderHead", "build_decoder_head", "ROI_DECODER_HEAD_REGISTRY"]
 17 | 
 18 | ROI_DECODER_HEAD_REGISTRY = Registry("ROI_DECODER_HEAD")
 19 | ROI_DECODER_HEAD_REGISTRY.__doc__ = """
 20 | Registry for corner heads
 21 | """
 22 | 
 23 | def build_decoder_head(cfg, input_shape):
 24 |     name = cfg.MODEL.ROI_DECODER_HEAD.NAME
 25 |     return ROI_DECODER_HEAD_REGISTRY.get(name)(cfg, input_shape)
 26 | 
 27 | 
 28 | 
 29 | @ROI_DECODER_HEAD_REGISTRY.register()
 30 | class DecoderHead(nn.Module):
 31 |     @configurable
 32 |     def __init__(
 33 |         self,
 34 |         input_shape: ShapeSpec,
 35 |         *,
 36 |         num_classes: int,
 37 |         num_bits: int,
 38 |         conv_dims: List[int],
 39 |         conv_norm="",
 40 |         fc_dims: List[int], 
 41 |         with_decoder: bool = True,
 42 |         decoding_loss_type: str = 'mse',
 43 |         decoding_loss_weight: float = 1.0,
 44 |         class_loss_weight: float = 1.0,
 45 |     ):
 46 |         super().__init__()
 47 |         if isinstance(input_shape, int):  # some backward compatibility
 48 |             input_shape = ShapeSpec(channels=input_shape)
 49 |         
 50 |         output_size = (input_shape.channels, (input_shape.height or 1), (input_shape.width or 1))
 51 | 
 52 |         self.conv_norm_relus = []
 53 |         for k, conv_dim in enumerate(conv_dims):
 54 |             conv = Conv2d(
 55 |                 output_size[0],
 56 |                 conv_dim,
 57 |                 kernel_size=3,
 58 |                 padding=0,
 59 |                 bias=not conv_norm,
 60 |                 norm=get_norm(conv_norm, conv_dim),
 61 |                 activation=F.relu,
 62 |             )
 63 |             self.add_module("conv{}".format(k + 1), conv)
 64 |             self.conv_norm_relus.append(conv)
 65 |             output_size = (conv_dim, output_size[1]-2, output_size[2]-2)
 66 |         for layer in self.conv_norm_relus:
 67 |             weight_init.c2_msra_fill(layer)
 68 | 
 69 |         self.fcs = []
 70 |         for k, fc_dim in enumerate(fc_dims):
 71 |             fc = Linear(np.prod(output_size), fc_dim)
 72 |             self.add_module("fc{}".format(k + 1), fc)
 73 |             self.fcs.append(fc)
 74 |             output_size = fc_dim
 75 |         for layer in self.fcs:
 76 |             weight_init.c2_xavier_fill(layer)
 77 |         
 78 |         self.with_decoder = with_decoder
 79 |         self.num_classes, self.num_bits = num_classes, num_bits
 80 |         if with_decoder:
 81 |             output_size = np.prod(output_size)
 82 |             self.decoder = Linear(output_size, num_bits)
 83 |             nn.init.normal_(self.decoder.weight, std=0.01)
 84 |             nn.init.constant_(self.decoder.bias, 0)
 85 |             self.cls_score = Linear(output_size, 1)
 86 |         else:
 87 |             self.cls_score = Linear(output_size, num_classes + 1)
 88 | 
 89 |         nn.init.normal_(self.cls_score.weight, std=0.01)
 90 |         nn.init.constant_(self.cls_score.bias, 0)
 91 |         
 92 |         self.decoding_loss_func = AdaptiveLoss(loss_type=decoding_loss_type)
 93 |         self.decoding_loss_weight = decoding_loss_weight
 94 |         self.objectness_loss_func = AdaptiveLoss(loss_type='bce')
 95 |         self.class_loss_weight = class_loss_weight
 96 | 
 97 |     @classmethod
 98 |     def from_config(cls, cfg, input_shape):
 99 |         return {
100 |             "input_shape": input_shape,
101 |             "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
102 |             "num_bits": cfg.MODEL.MARKER_GENERATOR.NUM_GENERATION_BITS,
103 |             "with_decoder": cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON,
104 |             "decoding_loss_type": cfg.MODEL.ROI_DECODER_HEAD.LOSS_TYPE,
105 |             "decoding_loss_weight": cfg.MODEL.ROI_DECODER_HEAD.DECODING_LOSS_WEIGHT,
106 |             "class_loss_weight": cfg.MODEL.ROI_DECODER_HEAD.CLASS_LOSS_WEIGHT,
107 |             "conv_dims": cfg.MODEL.ROI_DECODER_HEAD.CONV_DIMS,
108 |             "fc_dims": cfg.MODEL.ROI_DECODER_HEAD.FC_DIMS
109 |         }
110 | 
111 |     def forward(self, x: torch.Tensor, proposals_sampled: List[Instances]):
112 |         if self.training:
113 |             gt_classes = cat([p.gt_classes for p in proposals_sampled], dim=0)
114 |             fg_list = (gt_classes >= 0) & (gt_classes < self.num_classes)
115 |             fg_inds = nonzero_tuple(fg_list)[0]
116 |             gt_objectness = fg_list.to(torch.float32).view(-1,1)
117 | 
118 |         # Apply conv and relus
119 |         for layer in self.conv_norm_relus:
120 |             x = layer(x)
121 |         
122 |         if x.dim() > 2:
123 |             x = torch.flatten(x, start_dim=1)
124 |         
125 |         if len(self.fcs):    
126 |             for layer in self.fcs:
127 |                 x = F.relu(layer(x))
128 |         
129 |         obj_scores = self.cls_score(x)
130 |         
131 |         decoded_message = None
132 |         if self.with_decoder:
133 |             if self.training:
134 |                 x = x[fg_inds]
135 |             decoded_message = torch.sigmoid(self.decoder(x))
136 | 
137 |         if self.training:
138 |             if self.with_decoder:
139 |                 objectness_loss = self.objectness_loss_func(obj_scores, gt_objectness)
140 |                 gt_message = cat([p.gt_message for p in proposals_sampled], dim=0)[fg_inds]
141 |                 decoding_loss = self.decoding_loss_func(decoded_message, gt_message)
142 |                 div_factor = max((gt_classes.numel() * self.num_bits), 1)
143 |                 losses = {
144 |                     'objectness_loss': objectness_loss * self.class_loss_weight / obj_scores.size(0),
145 |                     'decoding_loss': decoding_loss * self.decoding_loss_weight / div_factor
146 |                 }
147 |             else:
148 |                 # TODO: Modify for adaptive clipping
149 |                 loss_cls = F.cross_entropy(
150 |                     obj_scores, gt_classes, reduction="mean") * self.class_loss_weight
151 |                 losses = {'loss_cls': loss_cls}
152 |             return losses
153 |         
154 |         i, score_batches, message_batches = 0, [], []
155 |         for p in proposals_sampled:
156 |             data_len = len(p.proposal_boxes)
157 |             score_batches.append(obj_scores[i:i+data_len])
158 |             if self.with_decoder:
159 |                 message_batches.append(decoded_message[i:i+data_len])
160 |             else:
161 |                 message_batches.append(None)
162 |             i += data_len
163 | 
164 |         return score_batches, message_batches
165 | 


--------------------------------------------------------------------------------
/deepformable/modeling/marker_roi_heads/marker_roi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import numpy as np
 3 | from typing import Dict, List, Optional, Tuple, Union
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from detectron2.config import configurable
 8 | from detectron2.structures import ImageList, Instances
 9 | 
10 | from detectron2.modeling import ROI_HEADS_REGISTRY, ROIHeads
11 | 
12 | from .naive_transform_head import build_transform_head
13 | from .corner_head import build_corner_head
14 | from .decoder_head import build_decoder_head
15 | 
16 | 
17 | 
18 | @ROI_HEADS_REGISTRY.register()
19 | class MarkerROIHeads(ROIHeads):
20 |     """
21 |     This class implements the corner prediction and decoding tasks.
22 |     It returns a dictionary of outputs that later converted to
23 |     instances after postprocessing.
24 |     """
25 |     @configurable
26 |     def __init__(
27 |         self,
28 |         *,
29 |         transform_head: nn.Module,
30 |         corner_head: nn.Module,
31 |         decoder_head: nn.Module,
32 |         **kwargs
33 |     ):
34 |         super().__init__(**kwargs)
35 |         self.transform_head = transform_head
36 |         self.corner_head = corner_head
37 |         self.decoder_head = decoder_head
38 | 
39 |     @classmethod
40 |     def from_config(cls, cfg, input_shape):
41 |         ret = super().from_config(cfg)
42 |         transform_head = build_transform_head(cfg, input_shape)
43 |         ret["transform_head"] = transform_head
44 |         corner_input_shape, decoder_input_shape = transform_head.output_shape
45 |         ret["corner_head"] = build_corner_head(cfg, corner_input_shape)
46 |         ret["decoder_head"] = build_decoder_head(cfg, decoder_input_shape)
47 |         return ret
48 | 
49 |     @property
50 |     def device(self):
51 |         return self.corner_head.device
52 |     
53 |     def forward(
54 |         self,
55 |         images: ImageList,
56 |         features: Dict[str, torch.Tensor],
57 |         proposals: List[Instances],
58 |         targets: Optional[List[Instances]] = None,
59 |     ) -> Tuple[List[Dict], Dict]:
60 |         # del images
61 |         if self.training:
62 |             assert targets
63 |             proposals = self.label_and_sample_proposals(proposals, targets)
64 |         # del targets
65 |         
66 |         if self.training:
67 |             corner_features, decoding_features, sample_locations_batch, losses = self.transform_head(images, features, proposals, targets)
68 |             losses.update(self.corner_head(corner_features, proposals))
69 |             losses.update(self.decoder_head(decoding_features, proposals))
70 |             del images, targets
71 |             return [], losses
72 |         
73 |         corner_features, decoding_features, sample_locations_batch, _ = self.transform_head(images, features, proposals, targets)
74 |         corners_batch = self.corner_head(corner_features, proposals)
75 |         obj_scores_batch, decoded_messages_batch = self.decoder_head(decoding_features, proposals)
76 | 
77 |         results = []
78 |         for i in range(len(proposals)):
79 |             output = {
80 |                 "corners": corners_batch[i], "obj_scores": obj_scores_batch[i],
81 |                 "decoded_messages": decoded_messages_batch[i],
82 |                 "image_shape": proposals[i].image_size}
83 |             if sample_locations_batch:
84 |                 output["sample_locations"] = sample_locations_batch[i]
85 |             results.append(output)
86 | 
87 |         return results, {}


--------------------------------------------------------------------------------
/deepformable/modeling/marker_roi_heads/naive_transform_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import numpy as np
  3 | from typing import Dict, List, Optional, Tuple, Union
  4 | import fvcore.nn.weight_init as weight_init
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | 
  9 | from detectron2.structures import ImageList, Instances
 10 | from detectron2.config import configurable
 11 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm
 12 | from detectron2.utils.registry import Registry
 13 | from detectron2.modeling.poolers import ROIPooler
 14 | 
 15 | __all__ = ["NaiveTransformHead", "build_transform_head", "ROI_TRANSFORM_HEAD_REGISTRY"]
 16 | 
 17 | ROI_TRANSFORM_HEAD_REGISTRY = Registry("ROI_TRANSFORM_HEAD")
 18 | ROI_TRANSFORM_HEAD_REGISTRY.__doc__ = """
 19 | Registry for transform heads, which transforms features into normalized
 20 | space for corner and class prediction.
 21 | """
 22 | 
 23 | def build_transform_head(cfg, input_shape):
 24 |     """
 25 |     Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
 26 |     """
 27 |     name = cfg.MODEL.ROI_TRANSFORM_HEAD.NAME
 28 |     return ROI_TRANSFORM_HEAD_REGISTRY.get(name)(cfg, input_shape)
 29 | 
 30 | 
 31 | @ROI_TRANSFORM_HEAD_REGISTRY.register()
 32 | class NaiveTransformHead(nn.Module):
 33 |     @configurable
 34 |     def __init__(
 35 |         self,
 36 |         input_shape: ShapeSpec,
 37 |         *,
 38 |         in_features,
 39 |         pooler: ROIPooler,
 40 |         conv_dims: List[int],
 41 |         fc_dims: List[int], 
 42 |         conv_norm=""
 43 |     ):
 44 |         super().__init__()
 45 |         assert len(conv_dims) + len(fc_dims) > 0
 46 | 
 47 |         self.in_features = in_features
 48 |         self.pooler = pooler
 49 | 
 50 |         output_size = (input_shape.channels, input_shape.height, input_shape.width)
 51 | 
 52 |         self.conv_norm_relus = []
 53 |         for k, conv_dim in enumerate(conv_dims):
 54 |             conv = Conv2d(
 55 |                 output_size[0],
 56 |                 conv_dim,
 57 |                 kernel_size=3,
 58 |                 padding=1,
 59 |                 bias=not conv_norm,
 60 |                 norm=get_norm(conv_norm, conv_dim),
 61 |                 activation=F.relu,
 62 |             )
 63 |             self.add_module("conv{}".format(k + 1), conv)
 64 |             self.conv_norm_relus.append(conv)
 65 |             output_size = (conv_dim, output_size[1], output_size[2])
 66 |         for layer in self.conv_norm_relus:
 67 |             weight_init.c2_msra_fill(layer)
 68 | 
 69 |         self.fcs = []
 70 |         for k, fc_dim in enumerate(fc_dims):
 71 |             fc = Linear(np.prod(output_size), fc_dim)
 72 |             self.add_module("fc{}".format(k + 1), fc)
 73 |             self.fcs.append(fc)
 74 |             output_size = fc_dim
 75 |         for layer in self.fcs:
 76 |             weight_init.c2_xavier_fill(layer)
 77 |         self._output_size = output_size
 78 | 
 79 |     @classmethod
 80 |     def from_config(cls, cfg, input_shape):
 81 |         # TODO: Create new parameters for transform head in the config
 82 |         in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
 83 |         pooler_resolution = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_RESOLUTION
 84 |         pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
 85 |         sampling_ratio    = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_SAMPLING_RATIO
 86 |         pooler_type       = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_TYPE
 87 | 
 88 |         num_conv = cfg.MODEL.ROI_TRANSFORM_HEAD.NUM_CONV
 89 |         conv_dim = cfg.MODEL.ROI_TRANSFORM_HEAD.CONV_DIM
 90 |         num_fc = cfg.MODEL.ROI_TRANSFORM_HEAD.NUM_FC
 91 |         fc_dim = cfg.MODEL.ROI_TRANSFORM_HEAD.FC_DIM
 92 | 
 93 |         in_channels = [input_shape[f].channels for f in in_features]
 94 |         # Check all channel counts are equal
 95 |         assert len(set(in_channels)) == 1, in_channels
 96 |         in_channels = in_channels[0]
 97 | 
 98 |         pooler = ROIPooler(
 99 |             output_size=pooler_resolution,
100 |             scales=pooler_scales,
101 |             sampling_ratio=sampling_ratio,
102 |             pooler_type=pooler_type,
103 |         )
104 | 
105 |         return {
106 |             "input_shape": ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution),
107 |             "in_features": in_features,
108 |             "pooler": pooler,
109 |             "conv_dims": [conv_dim] * num_conv,
110 |             "fc_dims": [fc_dim] * num_fc,
111 |             "conv_norm": cfg.MODEL.ROI_TRANSFORM_HEAD.NORM,
112 |         }
113 | 
114 |     def forward(
115 |         self,
116 |         images: ImageList,
117 |         features: Dict[str, torch.Tensor],
118 |         proposals: List[Instances],
119 |         targets: Optional[List[Instances]] = None,
120 |     ):
121 |         features = [features[f] for f in self.in_features]
122 |         x = self.pooler(features, [p.proposal_boxes for p in proposals])
123 |         
124 |         for layer in self.conv_norm_relus:
125 |             x = layer(x)
126 |         if len(self.fcs):
127 |             if x.dim() > 2:
128 |                 x = torch.flatten(x, start_dim=1)
129 |             for layer in self.fcs:
130 |                 x = F.relu(layer(x))
131 |         return x, x, None, {}
132 | 
133 |     @property
134 |     def output_shape(self):
135 |         """
136 |         Returns:
137 |             ShapeSpec: the output feature shape
138 |         """
139 |         out = ShapeSpec(channels=self._output_size)
140 |         return out, out


--------------------------------------------------------------------------------
/deepformable/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
2 | from .utils import ImageList
3 | from .render_rcnn import GeneralizedRCNN_RenderInput
4 | from .classical_detector import ClassicalDetector


--------------------------------------------------------------------------------
/deepformable/modeling/meta_arch/classical_detector.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import numpy as np
  3 | from typing import List, Dict
  4 | import torch
  5 | from torch import nn
  6 | 
  7 | import detectron2
  8 | from detectron2.structures import ImageList, Instances, Boxes
  9 | from detectron2.modeling import META_ARCH_REGISTRY
 10 | 
 11 | from ..marker_generator import build_marker_generator
 12 | 
 13 | 
 14 | @META_ARCH_REGISTRY.register()
 15 | class ClassicalDetector(nn.Module):
 16 |     def __init__(self, cfg):
 17 |         super().__init__()
 18 |         self.add_module("marker_generator", build_marker_generator(cfg))
 19 |         self.test_topk_per_image = 0
 20 |         self.test_sort_instances = False
 21 |         self.test_apply_nms = False
 22 |         self.nms_score_criteria = "none"
 23 |         self.marker_postprocessing = False
 24 | 
 25 |     def inference(self, images):
 26 |         results = []
 27 |         for img in images:
 28 |             img = img.permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8)
 29 |             marker_corners, ids = self.marker_generator.recognize(img)
 30 |             result = {
 31 |                 "corners": torch.tensor(marker_corners, dtype=torch.float32),
 32 |                 "image_shape": img.shape[:2],
 33 |                 "obj_scores": torch.as_tensor(np.ones(len(marker_corners)), dtype=torch.float32)}
 34 |             if len(marker_corners) != 0:
 35 |                 result["pred_classes"] = torch.as_tensor(ids, dtype=torch.int64)
 36 |             results.append(result)
 37 |             
 38 |         return results
 39 | 
 40 |     def postprocess_single(self, result: dict, output_height: int, output_width: int):
 41 |         if isinstance(output_width, torch.Tensor):
 42 |             # This shape might (but not necessarily) be tensors during tracing.
 43 |             # Converts integer tensors to float temporaries to ensure true
 44 |             # division is performed when computing scale_x and scale_y.
 45 |             output_width_tmp = output_width.float()
 46 |             output_height_tmp = output_height.float()
 47 |             new_size = torch.stack([output_height, output_width])
 48 |         else:
 49 |             new_size = (output_height, output_width)
 50 |             output_width_tmp = output_width
 51 |             output_height_tmp = output_height
 52 |  
 53 |         scale_x, scale_y = (
 54 |             output_width_tmp / result["image_shape"][1],
 55 |             output_height_tmp / result["image_shape"][0],
 56 |         )
 57 | 
 58 |         pred_instances = Instances(new_size)
 59 |         corners = result["corners"]
 60 |         if corners.shape[0] == 0:
 61 |             pred_instances.pred_corners = corners
 62 |             return pred_instances
 63 |       
 64 |         pred_instances.scores = result["obj_scores"]
 65 |         pred_instances.pred_classes = result["pred_classes"]
 66 | 
 67 |         # Scale corners and sample_locations
 68 |         scale_tensor = torch.tensor([scale_x, scale_y], device=corners.device)
 69 |         corners = corners * scale_tensor
 70 | 
 71 |         # Recalculate boxes
 72 |         min_c, max_c = torch.min(corners, dim=1)[0], torch.max(corners, dim=1)[0]
 73 |         boxes = torch.cat([min_c, max_c], dim=1)
 74 |         valid_mask = torch.isfinite(boxes).all(dim=1)
 75 |         
 76 |         # Add predictions to the instances, filter valid ones
 77 |         pred_instances.pred_boxes = Boxes(boxes)
 78 |         pred_instances.pred_corners = corners
 79 |         pred_instances = pred_instances[valid_mask]
 80 | 
 81 |         return pred_instances
 82 |     
 83 |     def postprocess(self, instances, batched_inputs: List[Dict[str, torch.Tensor]], image_sizes):
 84 |         # Rescale the output instances to the target size.
 85 |         processed_results = []
 86 |         for results_per_image, input_per_image, image_size in zip(
 87 |             instances, batched_inputs, image_sizes
 88 |         ):
 89 |             height = input_per_image.get("height", image_size[0])
 90 |             width = input_per_image.get("width", image_size[1])
 91 |             r = self.postprocess_single(results_per_image, height, width)
 92 |             processed_results.append({"instances": r})
 93 |         return processed_results
 94 | 
 95 |     def forward(self, batched_inputs, do_postprocess=True):
 96 |         images = ImageList.from_tensors([x["image"] for x in batched_inputs], 1)
 97 |         results = self.inference(images)
 98 | 
 99 |         if do_postprocess:
100 |             assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess."
101 |             return self.postprocess(results, batched_inputs, images.image_sizes)
102 |         else:
103 |             return results


--------------------------------------------------------------------------------
/deepformable/modeling/meta_arch/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is modified from detectron2 implementation, 
 3 | changes are logged in comments. 
 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 5 | """
 6 | from __future__ import division
 7 | from typing import List, Dict, Optional, Tuple
 8 | import numpy as np
 9 | 
10 | import torch
11 | from torch.nn import functional as F
12 | 
13 | from detectron2.layers.wrappers import shapes_to_tensor
14 | from detectron2.structures import ImageList as Detectron2_Imagelist
15 | 
16 | 
17 | class ImageList(Detectron2_Imagelist):
18 |     @staticmethod
19 |     def from_tensors(
20 |         tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
21 |     ) -> "ImageList":
22 |         """
23 |         Detectron2's ImageList implementation modified 
24 |         to allow proper gradient flow.
25 |         """
26 |         assert len(tensors) > 0
27 |         assert isinstance(tensors, (tuple, list))
28 |         for t in tensors:
29 |             assert isinstance(t, torch.Tensor), type(t)
30 |             assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
31 | 
32 |         image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
33 |         image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes]
34 |         max_size = torch.stack(image_sizes_tensor).max(0).values
35 | 
36 |         if size_divisibility > 1:
37 |             stride = size_divisibility
38 |             # the last two dims are H,W, both subject to divisibility requirement
39 |             max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride
40 | 
41 |         # handle weirdness of scripting and tracing ...
42 |         if torch.jit.is_scripting():
43 |             max_size: List[int] = max_size.to(dtype=torch.long).tolist()
44 |         else:
45 |             if torch.jit.is_tracing():
46 |                 image_sizes = image_sizes_tensor
47 | 
48 |         batched_imgs = []
49 |         for img, image_size in zip(tensors, image_sizes):
50 |             padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
51 |             batched_imgs.append(F.pad(img, padding_size, value=pad_value))
52 |         batched_imgs = torch.stack(batched_imgs, dim=0)
53 | 
54 |         return ImageList(batched_imgs.contiguous(), image_sizes)


--------------------------------------------------------------------------------
/deepformable/modeling/rpn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code is modified from detectron2 implementation, 
  3 | to add adaptive loss to region proposal network.
  4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  5 | """
  6 | from typing import Dict, List, Optional, Tuple, Union
  7 | import torch
  8 | from torch import nn
  9 | 
 10 | from fvcore.nn import giou_loss
 11 | from detectron2.layers import cat, ShapeSpec
 12 | from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY
 13 | from detectron2.structures import Boxes
 14 | from detectron2.utils.events import get_event_storage
 15 | from detectron2.modeling.proposal_generator import RPN
 16 | 
 17 | from deepformable.layers import AdaptiveLoss
 18 | 
 19 | 
 20 | @PROPOSAL_GENERATOR_REGISTRY.register()
 21 | class RPN_AdaptiveLoss(RPN):
 22 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 23 |         super().__init__(cfg, input_shape)
 24 |         adaptive_loss = cfg.MODEL.PROPOSAL_GENERATOR.ADAPTIVE_LOSS
 25 |         self.bbox_loss_function = AdaptiveLoss(loss_type='l1') if adaptive_loss else nn.L1Loss(reduction='sum')
 26 |         self.class_loss_function = AdaptiveLoss(loss_type='bce') if adaptive_loss else nn.BCELoss(reduction='sum')
 27 |     
 28 |     @torch.jit.unused
 29 |     def losses(
 30 |         self,
 31 |         anchors: List[Boxes],
 32 |         pred_objectness_logits: List[torch.Tensor],
 33 |         gt_labels: List[torch.Tensor],
 34 |         pred_anchor_deltas: List[torch.Tensor],
 35 |         gt_boxes: List[torch.Tensor],
 36 |     ) -> Dict[str, torch.Tensor]:
 37 |         """
 38 |         Return the losses from a set of RPN predictions and their associated ground-truth.
 39 | 
 40 |         Args:
 41 |             anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each
 42 |                 has shape (Hi*Wi*A, B), where B is box dimension (4 or 5).
 43 |             pred_objectness_logits (list[Tensor]): A list of L elements.
 44 |                 Element i is a tensor of shape (N, Hi*Wi*A) representing
 45 |                 the predicted objectness logits for all anchors.
 46 |             gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
 47 |             pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
 48 |                 (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors
 49 |                 to proposals.
 50 |             gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
 51 | 
 52 |         Returns:
 53 |             dict[loss name -> loss value]: A dict mapping from loss name to loss value.
 54 |                 Loss names are: `loss_rpn_cls` for objectness classification and
 55 |                 `loss_rpn_loc` for proposal localization.
 56 |         """
 57 |         num_images = len(gt_labels)
 58 |         gt_labels = torch.stack(gt_labels)  # (N, sum(Hi*Wi*Ai))
 59 | 
 60 |         # Log the number of positive/negative anchors per-image that's used in training
 61 |         pos_mask = gt_labels == 1
 62 |         num_pos_anchors = pos_mask.sum().item()
 63 |         num_neg_anchors = (gt_labels == 0).sum().item()
 64 |         storage = get_event_storage()
 65 |         storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images)
 66 |         storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images)
 67 | 
 68 |         if self.box_reg_loss_type == "smooth_l1":
 69 |             anchors = type(anchors[0]).cat(anchors).tensor  # Ax(4 or 5)
 70 |             gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes]
 71 |             gt_anchor_deltas = torch.stack(gt_anchor_deltas)  # (N, sum(Hi*Wi*Ai), 4 or 5)
 72 |             # ====CHANGE_ON_LOSS====
 73 |             localization_loss = self.bbox_loss_function(
 74 |                 cat(pred_anchor_deltas, dim=1)[pos_mask], gt_anchor_deltas[pos_mask])
 75 |             # localization_loss = smooth_l1_loss(
 76 |             #     cat(pred_anchor_deltas, dim=1)[pos_mask],
 77 |             #     gt_anchor_deltas[pos_mask],
 78 |             #     self.smooth_l1_beta,
 79 |             #     reduction="sum",
 80 |             # )
 81 |             # ======================
 82 |         elif self.box_reg_loss_type == "giou":
 83 |             pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
 84 |             pred_proposals = cat(pred_proposals, dim=1)
 85 |             pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1])
 86 |             pos_mask = pos_mask.view(-1)
 87 |             localization_loss = giou_loss(
 88 |                 pred_proposals[pos_mask], cat(gt_boxes)[pos_mask], reduction="sum"
 89 |             )
 90 |         else:
 91 |             raise ValueError(f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'")
 92 | 
 93 |         valid_mask = gt_labels >= 0
 94 |         # ====CHANGE_ON_LOSS====
 95 |         objectness_loss = self.class_loss_function(
 96 |             cat(pred_objectness_logits, dim=1)[valid_mask],
 97 |             gt_labels[valid_mask].to(torch.float32)
 98 |         )
 99 |         # ======================
100 |         normalizer = self.batch_size_per_image * num_images
101 |         return {
102 |             "loss_rpn_cls": objectness_loss / normalizer,
103 |             "loss_rpn_loc": localization_loss / normalizer,
104 |         }


--------------------------------------------------------------------------------
/deepformable/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | from .config import get_cfg
 3 | 
 4 | from .board_utils import (
 5 |     calculate_board_dims,
 6 |     is_polygon_intersects,
 7 |     marker_placer,
 8 |     marker_metadata_loader
 9 | )
10 | 
11 | from .aruco_utils import (
12 |     get_aruco_dict,
13 |     detect_aruco_markers
14 | )
15 | 
16 | from .general_utils import (
17 |     if_continue_execution,
18 |     img_flexible_reader
19 | )
20 | 
21 | from .inpaint_utils import (
22 |     NoInpaint,
23 |     OpenCVInpaint
24 | )
25 | 
26 | from .visualize_utils import (
27 |     convert_mapped_instances,
28 |     DeepformableVisualizer,
29 |     VisualizationDemo,
30 |     ModifiedPredictor
31 | )
32 | 
33 | from .image_utils import (
34 |     sample_param, 
35 |     get_disk_blur_kernel,
36 |     hls_to_rgb,
37 |     rgb_to_hls,
38 | )
39 | 
40 | from .env import (
41 |     load_seed_info,
42 |     save_seed_info
43 | )


--------------------------------------------------------------------------------
/deepformable/utils/aruco_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import numpy as np
 3 | import cv2
 4 | from cv2 import aruco
 5 | 
 6 | def get_aruco_dict(name, default=aruco.DICT_5X5_100):
 7 |     """
 8 |     For a given string returns corresponding aruco dictionary if exists.
 9 |     Check cv2.aruco.__dict__ keys for supported markers.
10 |     """
11 |     name = 'DICT_' + name.upper()
12 |     d = default
13 |     if name in aruco.__dict__:
14 |         d = aruco.__dict__[name]
15 |     return aruco.Dictionary_get(d)
16 | 
17 | def detect_aruco_markers(
18 |     img,
19 |     aruco_dict,
20 |     mtx=None,
21 |     detect_params=aruco.DetectorParameters_create(),
22 |     subpix_criteria=(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.00001),
23 |     max_winsize=9,
24 |     min_winsize=2,
25 | ):
26 |     """
27 |     Detects aruco markers and refines corners in subpixel accuracy.
28 |     """
29 |     marker_corners, ids, tmp = cv2.aruco.detectMarkers(
30 |         img, aruco_dict,
31 |         parameters=detect_params,
32 |         cameraMatrix=mtx)
33 |     gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img
34 |     if len(marker_corners) > 0 and subpix_criteria is not None:
35 |         for corners in marker_corners:
36 |             dif = corners[0]- np.roll(corners[0],2)
37 |             dist_avg = np.average(np.linalg.norm(dif, axis=1))
38 |             win_size = min(max(min_winsize, int(dist_avg/12)), max_winsize)
39 |             cv2.cornerSubPix(
40 |                 gray_img, corners,
41 |                 winSize=(win_size, win_size),
42 |                 zeroZone=(-1, -1),
43 |                 criteria=subpix_criteria)
44 |     return marker_corners, ids, tmp
45 | 
46 | 


--------------------------------------------------------------------------------
/deepformable/utils/board_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import json
  3 | import numpy as np
  4 | from pathlib import Path
  5 | 
  6 | import shapely
  7 | from shapely.geometry import MultiPolygon, Polygon
  8 | 
  9 | 
 10 | paper_sizes = {
 11 |     "a2": (420.0, 420.0 * (2.0 ** 0.5)),
 12 |     "a3": (210.0 * (2.0 ** 0.5), 420.0), # 296, 420
 13 |     "a4": (210.0, 210.0 * (2.0 ** 0.5)), # 210, 296
 14 |     "a5": (210.0 / (2.0 ** 0.5), 210.0),
 15 |     "a6": (105.0, 105.0 * (2.0 ** 0.5)),
 16 |     "a3-s": (305.0, 457.0),
 17 | }
 18 | 
 19 | 
 20 | def calculate_board_dims(board):
 21 |     if isinstance(board['paper_type'], str):
 22 |         paper_size = paper_sizes.get(board['paper_type'], 'a4')
 23 |     elif isinstance(board['paper_type'], list):
 24 |         paper_size = board['paper_type']
 25 |     margins = board.get('paper_margins', 10.5)
 26 |     board_dims = (paper_size[0] - margins, paper_size[1] - margins)
 27 |     return board_dims
 28 | 
 29 | 
 30 | def is_polygon_intersects(src_poly, polygons):
 31 |     if len(polygons) == 0:
 32 |         return False
 33 |     src_poly = Polygon(src_poly)
 34 |     polygons = MultiPolygon([Polygon(i) for i in polygons])
 35 |     return polygons.intersects(src_poly)
 36 | 
 37 | 
 38 | def marker_placer(
 39 |     board_size=(210,296),
 40 |     marker_min=40,
 41 |     marker_max=140,
 42 |     num_classes=64,
 43 |     class_array=[],
 44 |     safety_size=4,
 45 |     random_trials=75,
 46 |     p_reg=[0.2, 0.3, 0.2, 0.15, 0, 0.15],
 47 |     p_reg_rand=[0.57, 0.37, 0.06],
 48 | ):
 49 |     """
 50 |     TODO: This method requires bug-fix and clean-up!!
 51 |     """
 52 |     def place_random(marker_min, marker_max, board_size):
 53 |         polygons = np.empty((0,4,2))
 54 |         val_range = range(marker_min,marker_max+1)
 55 |         p = np.flip(np.array(val_range))
 56 |         p = p / np.sum(p)
 57 |         for _ in range(random_trials):
 58 |             marker_size = np.random.choice(val_range, p=p)
 59 |             src_poly_margin = np.array([[0,0],[1,0],[1,1],[0,1]]) * (marker_size+8)
 60 |             theta = np.random.uniform(0, np.pi)
 61 |             rotMatrix = np.array([[np.cos(theta), -np.sin(theta)], 
 62 |                                 [np.sin(theta),  np.cos(theta)]])
 63 |             src_poly_margin = np.matmul(src_poly_margin, rotMatrix.T)
 64 |             src_poly_margin -= np.amin(src_poly_margin, axis=0)
 65 |             bounding_box = np.amax(src_poly_margin, axis=0)
 66 |             offset = np.random.uniform((0,0), board_size - bounding_box)
 67 |             src_poly_margin += offset
 68 |             if not is_polygon_intersects(src_poly_margin, polygons):
 69 |                 src_poly = np.array([[0,0],[1,0],[1,1],[0,1]]) * marker_size
 70 |                 src_poly = np.matmul(src_poly, rotMatrix.T)
 71 |                 src_poly -= np.amin(src_poly, axis=0)
 72 |                 src_poly += offset + (bounding_box - np.amax(src_poly, axis=0))/2
 73 |                 polygons = np.append(polygons, [src_poly], axis=0)
 74 |         polygons -= np.amin(polygons, axis=(0,1))
 75 |         return polygons
 76 |     
 77 |     def place_regular(marker_min, marker_max, board_size):
 78 |         regular_max = marker_min + (marker_max-marker_min)//3 + 1
 79 |         norm_dims = np.array([[-1,-1],[1,-1],[1,1],[-1,1]]) * 0.5
 80 |         marker_size = np.random.randint(marker_min, regular_max)
 81 |         
 82 |         def checkerboard_regular():
 83 |             angle = lambda: np.random.choice([0,1,2,3])*np.pi/2
 84 |             r, c = marker_size, marker_size
 85 |             ofs = marker_size
 86 |             return r, c, ofs, angle
 87 | 
 88 |         def checkerboard_random():
 89 |             r, c, ofs, _ = checkerboard_regular()
 90 |             angle = lambda: np.random.uniform(0, np.pi)
 91 |             return r, c, ofs, angle
 92 | 
 93 |         def checkerboard_dense():
 94 |             _, _, ofs, angle = checkerboard_regular()
 95 |             r = np.random.randint(marker_size//2+3, marker_size+4)
 96 |             ofs += np.random.randint(safety_size, 15)
 97 |             c = ofs + np.random.randint(safety_size, 15)
 98 |             return r, c, ofs, angle
 99 | 
100 |         def grid_regular():
101 |             r, c, _, angle = checkerboard_regular()
102 |             ofs = 0
103 |             r += np.random.randint(safety_size, marker_size)
104 |             c = np.random.randint(safety_size, marker_size)
105 |             return r, c, ofs, angle
106 |         
107 |         def grid_regular2():
108 |             r, c, _, angle = checkerboard_regular()
109 |             angle = lambda: 0
110 |             ofs = 0
111 |             # r += 10
112 |             # c = 10
113 |             r += marker_size/3
114 |             c = marker_size/3
115 |             return r, c, ofs, angle
116 | 
117 |         def grid_skewed():
118 |             r, c, ofs, angle = checkerboard_regular()
119 |             r += np.random.randint(safety_size, marker_size)
120 |             c = ofs
121 |             return r, c, ofs, angle
122 | 
123 |         row_gap, column_gap, even_row_offset, angle_choice = np.random.choice([
124 |             checkerboard_regular, checkerboard_random, checkerboard_dense,
125 |             grid_regular, grid_regular2, grid_skewed],
126 |             p=p_reg
127 |         )()
128 |         
129 |         polygons = np.empty((0,4,2))
130 | 
131 |         cur_pos, index = np.array([0.0, 0.0]), 0
132 |         while np.all(cur_pos + marker_size < board_size):
133 |             while np.all(cur_pos + marker_size < board_size):
134 |                 theta = angle_choice()
135 |                 rotMatrix = np.array([[np.cos(theta), -np.sin(theta)], 
136 |                                     [np.sin(theta),  np.cos(theta)]])
137 |                 cur_poly = np.matmul(norm_dims, rotMatrix.T) * marker_size
138 |                 cur_poly += marker_size/2
139 |                 polygons = np.append(polygons, [cur_pos + cur_poly], axis=0)
140 |                 cur_pos += [marker_size+column_gap, 0]
141 |             cur_pos = np.array([0 if index%2 else even_row_offset, cur_pos[1]+row_gap])
142 |             index += 1
143 |         polygons -= np.amin(polygons, axis=(0,1))
144 |         pol_max = np.amax(polygons, axis=(0,1))
145 |         large_index = (pol_max > board_size)
146 |         if large_index.any():
147 |             polygons *= (np.array(board_size)[large_index] / pol_max[large_index])
148 |         return polygons
149 |     
150 |     def place_single(marker_min, marker_max, board_size):
151 |         marker_size = min(board_size) * np.random.uniform(0.8, 0.999)
152 |         polygon = np.array([[0,0],[1,0],[1,1],[0,1]]) * marker_size
153 |         return polygon.reshape(1,4,2)
154 |     
155 |     placer = np.random.choice([place_regular, place_random, place_single], p=p_reg_rand)
156 |     polygons = placer(marker_min, marker_max, board_size)
157 |     polygons += (board_size - np.amax(polygons, axis=(0,1)))/2
158 |     markers = np.dstack([polygons, np.zeros((*polygons.shape[:2],1))])[:,[0,1,3,2]]
159 | 
160 |     if len(class_array) == 0:
161 |         classes = np.random.randint(0, num_classes, size=len(markers))
162 |         # return [], []
163 |     else:
164 |         classes = []
165 |         for _ in range(len(markers)):
166 |             if len(class_array) == 0:
167 |                 break
168 |             # val = random.choice(class_array)
169 |             val = class_array[0]
170 |             class_array.remove(val)
171 |             classes.append(val)
172 |     return markers[:len(classes)], classes
173 | 
174 | def image_placer(
175 |     board_size=(210,296),
176 |     marker_ratio=(4,3),
177 |     margin_ratio=0.8,
178 |     marker_min=40,
179 |     marker_max=140,
180 |     num_classes=64,
181 |     class_array=[],
182 |     safety_size=4,
183 |     random_trials=75,
184 |     p_reg=[0.2, 0.3, 0.2, 0.15, 0, 0.15],
185 |     p_reg_rand=[0.57, 0.37, 0.06],
186 | ):
187 |     swp = False
188 |     if board_size[0] < board_size[1]:
189 |         board_size = (board_size[1], board_size[0])
190 |         swp = True
191 |     
192 |     mx, my = board_size[0] / 2, board_size[1] / 2
193 |     limx, limy = board_size[0] * margin_ratio, board_size[1] * margin_ratio
194 |     mulx, muly = (limx - mx) / marker_ratio[0], (limy - my) / marker_ratio[1]
195 |     mul = min(mulx, muly)
196 |     ux, uy = mx - mul * marker_ratio[0], my - mul * marker_ratio[1]
197 |     bx, by = mx + mul * marker_ratio[0], my + mul * marker_ratio[1]
198 |     markers = []
199 |     if not swp:
200 |         markers = [[[ux, uy, 0.0], [bx, uy, 0.0], [bx, by, 0.0], [ux, by, 0.0]]]
201 |     else:
202 |         markers = [[[by, ux, 0.0], [by, bx, 0.0], [uy, bx, 0.0], [uy, ux, 0.0]]]
203 |     
204 |     flip = np.random.randint(2, size=1).astype(np.bool).item()
205 |     if flip:
206 |         markers = [markers[0][2:], markers[0][:2]]
207 | 
208 |     classes = [np.random.randint(num_classes, size=1).item()]
209 |     return markers, classes
210 | 
211 | def marker_metadata_loader(cfg, marker_config_file):
212 |     from detectron2.data import MetadataCatalog
213 |     marker_config_path = Path(marker_config_file)
214 |     if marker_config_path.exists():
215 |         with open(marker_config_path) as cfg_file:
216 |             marker_config = json.load(cfg_file)
217 | 
218 |         markers = sorted(marker_config['markers'], key=lambda x: x['marker_id'])
219 |         binary_messages = [[float(i) for i in m['binary']] for m in markers]
220 |         marker_classes = [m['text'] for m in markers]
221 |         id_map = {m['marker_id']+1: m['marker_id'] for m in markers}
222 | 
223 |         cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(marker_classes)
224 | 
225 |         for dataset_name in cfg.DATASETS.TEST:
226 |             MetadataCatalog.get(dataset_name).set(
227 |                 messages=binary_messages, thing_classes=marker_classes,
228 |                 thing_dataset_id_to_contiguous_id=id_map,
229 |             )
230 |     else:
231 |         return False
232 |     return True


--------------------------------------------------------------------------------
/deepformable/utils/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | from detectron2.config import CfgNode as CN
  3 | 
  4 | def add_marker_generator_config(cfg: CN):
  5 |     _C = cfg
  6 |     _C.MODEL.MARKER_GENERATOR = CN()
  7 |     _C.MODEL.MARKER_GENERATOR.TRAINABLE = True
  8 |     _C.MODEL.MARKER_GENERATOR.NAME = "GeneralizedGenerator"
  9 |     _C.MODEL.MARKER_GENERATOR.MARKER_SIZE = (16, 16)
 10 |     _C.MODEL.MARKER_GENERATOR.ARUCO_DICT = "6x6_1000"
 11 |     _C.MODEL.MARKER_GENERATOR.BORDER_BITS = 0
 12 |     _C.MODEL.MARKER_GENERATOR.NUM_GENERATION_BITS = 36
 13 |     _C.MODEL.MARKER_GENERATOR.INIT_STD = 1.4
 14 |     _C.MODEL.MARKER_GENERATOR.INIT_METHOD = "uniform"
 15 |     _C.MODEL.MARKER_GENERATOR.CONV_DIMS = [[8],[6]]
 16 |     _C.MODEL.MARKER_GENERATOR.FC_DIMS = [256,256]
 17 |     _C.MODEL.MARKER_GENERATOR.UPSAMPLE_TYPE = "bilinear"
 18 |     _C.MODEL.MARKER_GENERATOR.UPSAMPLE_SCALE = 2
 19 |     _C.MODEL.MARKER_GENERATOR.INITIAL_SIZE = 4
 20 |     _C.MODEL.MARKER_GENERATOR.NORM_TYPE = "adain"
 21 |     _C.MODEL.MARKER_GENERATOR.ACTIVATION_TYPE = "leaky"
 22 |     _C.MODEL.MARKER_GENERATOR.RESIDUAL = False
 23 |     _C.MODEL.MARKER_GENERATOR.EQUALIZED = False
 24 |     _C.MODEL.MARKER_GENERATOR.PADDING_MODE = "zeros"
 25 |     _C.MODEL.MARKER_GENERATOR.FINAL_CONV_KERNEL_SIZE = 3
 26 |     _C.MODEL.MARKER_GENERATOR.MARKERS_FILE_LOCATION = "data/e2e_markers.npz"
 27 |     _C.MODEL.MARKER_GENERATOR.DATASET_ROOT = "/Data/Datasets/mirflickr_images1"
 28 | 
 29 | 
 30 | def add_intermediate_augmentor_config(cfg: CN):
 31 |     _C = cfg
 32 |     _C.INTERMEDIATE_AUGMENTOR = CN()
 33 |     # _C.INTERMEDIATE_AUGMENTOR.AUG_LIST = [
 34 |     # "PerspectiveAugmentor", "RadialDistortionAugmentor", "TpsTransformer", "ImageResize", 
 35 |     # "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 
 36 |     # "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", "GammaCorrector", "JPEGAugmentor"]   # Make sure the correct order of augmentations
 37 |     # _C.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [0.55, 0.55, 0.55, 1.0, 0.4, 0.4, 0.4, 0.4, 0.45, 0.4, 1.0, 0.4]   # Make sure the correct order of augmentations
 38 |     # _C.INTERMEDIATE_AUGMENTOR.TEST_STRENGTH_LIST = [0.6, 0.6, 0.6, 1.0, 0.3, 0.3, 0.3, 0.3, 0.4, 0.1, 1.0, 0.4]
 39 |     _C.INTERMEDIATE_AUGMENTOR.AUG_LIST = ["GammaCorrector"]
 40 |     _C.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [1.0]
 41 |     _C.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor = CN()
 42 |     _C.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor.CORNER_SHIFT_RANGE = (0.0, 0.2, 0.12)
 43 |     _C.INTERMEDIATE_AUGMENTOR.GammaAugmentor = CN()
 44 |     _C.INTERMEDIATE_AUGMENTOR.GammaAugmentor.GAMMA_RANGE = (0.85, 1.15, 1.0)    # Original is ~0.75
 45 |     _C.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor = CN()
 46 |     _C.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor.BLUR_RADIUS_RANGE = (0.5, 2.0, 1.5) # Original is not continuous
 47 |     _C.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor = CN()
 48 |     _C.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor.BLUR_RADIUS_RANGE = (0.51, 3.0, 2.0)
 49 |     _C.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor = CN()
 50 |     _C.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor.HUE_SHIFT_RANGE = (0.0, 0.1, 0.04) # Original 0.15
 51 |     _C.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor = CN()
 52 |     _C.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor.BRIGHTNESS_RANGE = (0.2, 1.2, 0.4)
 53 |     _C.INTERMEDIATE_AUGMENTOR.NoiseAugmentor = CN()
 54 |     _C.INTERMEDIATE_AUGMENTOR.NoiseAugmentor.NOISE_RANGE = (0.0, 0.012, 0.05)
 55 |     
 56 |     # Jpeg range is starts from 1 coefficient to 63, ranging from (0-62)
 57 |     _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor = CN()
 58 |     _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.Y_QUALITY_RANGE = (12, 61, 20) # Andreas_prev (10,20)
 59 |     _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.UV_QUALITY_RANGE = (10, 60, 15) # Andreas_prev (4,8)
 60 |     
 61 |     _C.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE = (1080, 1920) # Andreas_prev (4,8)
 62 |     _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor = CN()
 63 |     _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.UNDISTORT_ITER = 20
 64 |     _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.FOCAL_LENGTH_RANGE = (1.4, 2.0, 1.75)
 65 |     _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.CENTER_SHIFT_RANGE = (0.0, 0.1, 0.06)
 66 |     _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.DISTORTION_RANGE = (0.0, 1.25, 0.5)
 67 |     _C.INTERMEDIATE_AUGMENTOR.TpsTransformer = CN()
 68 |     # Number of control points (vertical,horizontal). 
 69 |     # More points yields increase computations and smaller scale warping patterns
 70 |     _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.CTRL_PTS_SIZE = (16, 20)
 71 |     # Maximum displacement of the control points. Should be bellow 2 / max(CTRL_PTS_HEIGHT, CTRL_PTS_WIDTH) to prevent unrealistic behaviour
 72 |     _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.WARP_RANGE = (0, 0.02, 0.012)
 73 |     # coordinates location maximum error in pixel, as we iteratively optimize their location
 74 |     _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.STOP_THRESHOLD = 0.05
 75 |     # Maximum number of iterations if the threshold is not reached
 76 |     _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.MAX_ITER = 1000
 77 | 
 78 | 
 79 | def add_roi_head_config(cfg: CN):
 80 |     _C = cfg
 81 |     _C.MODEL.ROI_TRANSFORM_HEAD = CN()
 82 |     _C.MODEL.ROI_TRANSFORM_HEAD.NAME = "SpatialTransformerHeadV2"
 83 |     _C.MODEL.ROI_TRANSFORM_HEAD.NORM = ""
 84 |     _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_SAMPLING_RATIO = 0
 85 |     _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_TYPE = "ROIAlignV2"
 86 |     _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_RESOLUTION = 12
 87 |     _C.MODEL.ROI_TRANSFORM_HEAD.TRANSFORMER_RESOLUTION = 8
 88 |     _C.MODEL.ROI_TRANSFORM_HEAD.NUM_FC = 2
 89 |     _C.MODEL.ROI_TRANSFORM_HEAD.FC_DIM = 512
 90 |     _C.MODEL.ROI_TRANSFORM_HEAD.NUM_CONV = 0
 91 |     _C.MODEL.ROI_TRANSFORM_HEAD.CONV_DIM = 256
 92 |     _C.MODEL.ROI_TRANSFORM_HEAD.LOSS_WEIGHT = 1.0
 93 |     _C.MODEL.ROI_TRANSFORM_HEAD.FC_COMMON_DIMS = [256]
 94 |     _C.MODEL.ROI_TRANSFORM_HEAD.FC_CORNER_DIMS = [128]
 95 |     _C.MODEL.ROI_TRANSFORM_HEAD.FC_RESAMPLE_DIMS = [128]
 96 |     _C.MODEL.ROI_TRANSFORM_HEAD.AFFINE_PREDICTOR_ON = False
 97 | 
 98 |     _C.MODEL.ROI_CORNER_HEAD = CN()
 99 |     _C.MODEL.ROI_CORNER_HEAD.NAME = "CornerHeadV2"
100 |     _C.MODEL.ROI_CORNER_HEAD.SMOOTH_L1_BETA = 0.0
101 |     _C.MODEL.ROI_CORNER_HEAD.LOSS_WEIGHT = 0.1 # 1.2 for CornerHead
102 |     _C.MODEL.ROI_CORNER_HEAD.REGRESSION_WEIGHTS = (10.0, 10.0)
103 |     _C.MODEL.ROI_CORNER_HEAD.SAMPLE_RESOLUTION = 8
104 |     _C.MODEL.ROI_CORNER_HEAD.CONV_DIMS = [32]
105 |     _C.MODEL.ROI_CORNER_HEAD.FC_DIMS = [128, 64]
106 | 
107 |     _C.MODEL.ROI_DECODER_HEAD = CN()
108 |     _C.MODEL.ROI_DECODER_HEAD.NAME = "DecoderHead"
109 |     _C.MODEL.ROI_DECODER_HEAD.DECODER_ON = True
110 |     _C.MODEL.ROI_DECODER_HEAD.LOSS_TYPE = "mse"
111 |     _C.MODEL.ROI_DECODER_HEAD.CONV_DIMS = []
112 |     _C.MODEL.ROI_DECODER_HEAD.FC_DIMS = [512, 256]
113 |     _C.MODEL.ROI_DECODER_HEAD.DECODING_LOSS_WEIGHT = 10.0
114 |     _C.MODEL.ROI_DECODER_HEAD.CLASS_LOSS_WEIGHT = 0.5
115 | 
116 |     _C.MODEL.PROPOSAL_GENERATOR.ADAPTIVE_LOSS = True
117 |     
118 |     _C.TEST.SORT_INSTANCES = True
119 |     _C.TEST.APPLY_NMS = True
120 |     _C.TEST.DECODING_SCORE_BY_MESSAGE_CONFIDENCE = True # Otherwise uses objectness score
121 |     _C.TEST.MARKER_POSTPROCESSING = True
122 |     _C.TEST.LOAD_MESSAGES = True
123 |     # This option choses which scoring criteria to use for NMS. Options are:
124 |     # "bit_similarity" uses the distance of predictions to the provided class of messages [used option in the paper]
125 |     # "message_confidence" uses the confidence of how each bit is predicted
126 |     # "objectness" uses the predicted objectness 
127 |     # "mc_obj_product" uses the product of "message_confidence" and "objectness"
128 |     # "mc_obj_bs_product" uses the product of "message_confidence", "bit_similarity" and "objectness"
129 |     _C.TEST.NMS_SCORE_CRITERIA = "mc_obj_bs_product" 
130 | 
131 | 
132 | def add_vovnet_config(cfg: CN):
133 |     _C = cfg
134 |     _C.MODEL.VOVNET = CN()
135 |     _C.MODEL.VOVNET.CONV_BODY = "V-39-eSE"
136 |     _C.MODEL.VOVNET.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"]
137 | 
138 |     # Options: FrozenBN, GN, "SyncBN", "BN"
139 |     _C.MODEL.VOVNET.NORM = "FrozenBN"
140 |     _C.MODEL.VOVNET.OUT_CHANNELS = 256
141 |     _C.MODEL.VOVNET.BACKBONE_OUT_CHANNELS = 256
142 | 
143 | 
144 | def add_model_other_config(cfg: CN):
145 |     _C = cfg
146 |     _C.MODEL.PREDICTIONS_PATH = 'datasets/evaluation/e2etags/flat/all.json'
147 |     _C.MODEL.SAVE_RENDERED = False
148 |     _C.MODEL.SAVE_RENDERED_DIR = 'datasets/evaluation/e2etags/rendered_outputs/'
149 | 
150 |     _C.INPUT.PREDICTOR_RESIZE = False
151 |     _C.INPUT.FILTER_BOX_THRESHOLD = 30
152 |     _C.INPUT.FILTER_BOX_THRESHOLD_TEST = 5
153 |     _C.INPUT.PLACEMENT_MARKER_MINMAX = (40, 190)
154 |     _C.INPUT.MAX_MARKERS_PER_IMAGE = 128
155 |     _C.INPUT.MARKER_TEST_SIZE = 50
156 | 
157 |     _C.RENDERER = CN()
158 |     _C.RENDERER.NAME = "MarkerRenderer"
159 |     _C.RENDERER.SHADING_METHOD = "cook-torrance"
160 |     _C.RENDERER.GAMMA = 2.2
161 |     _C.RENDERER.EPSILON = 1e-8
162 |     _C.RENDERER.BLUR_RANGE = (1.5, 2.0, 1.0)    # Third parameter is testing value
163 |     _C.RENDERER.ROUGHNESS_RANGE = (0.14, 0.6, 0.25)
164 |     _C.RENDERER.DIFFUSE_RANGE = (0.9, 1.0, 0.94)
165 |     _C.RENDERER.NORMAL_NOISE_RANGE = (0.0, 0.015, 0.005)
166 |     _C.RENDERER.SPECULAR_RANGE = (0.02, 1.0, 0.35)
167 | 
168 |     _C.DEMO = CN()
169 |     _C.DEMO.DRAW_MASK = False
170 |     _C.DEMO.DRAW_BBOX = True
171 |     _C.DEMO.DRAW_CORNERS = True
172 |     _C.DEMO.COLOR_REDGREEN_THRESHOLD = 0.0
173 | 
174 | def get_cfg() -> CN:
175 |     from detectron2.config.defaults import _C
176 |     cfg = _C.clone()
177 |     add_marker_generator_config(cfg)
178 |     add_intermediate_augmentor_config(cfg)
179 |     add_roi_head_config(cfg)
180 |     add_vovnet_config(cfg)
181 |     add_model_other_config(cfg)
182 |     return cfg


--------------------------------------------------------------------------------
/deepformable/utils/env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implemented by Facebook, Inc. and its affiliates.
 3 | Edited by Mustafa B. Yaldiz
 4 | """
 5 | import torch
 6 | import detectron2
 7 | import numpy as np
 8 | import random
 9 | 
10 | 
11 | _DEEPFORMABLE_ENV_SETUP_DONE = False
12 | 
13 | def setup_environment():
14 |     # Perform environment setup work.
15 |     global _DEEPFORMABLE_ENV_SETUP_DONE
16 |     if _DEEPFORMABLE_ENV_SETUP_DONE:
17 |         return
18 |     _DEEPFORMABLE_ENV_SETUP_DONE = True
19 | 
20 |     def get_version(module, digit=2):
21 |         return tuple(map(int, module.__version__.split(".")[:digit]))
22 |     
23 |     # fmt: off
24 |     assert get_version(torch) >= (1, 6), "Requires torch>=1.6"
25 |     assert get_version(detectron2, digit=3) >= (0, 4, 1), "Requires detectron2>=0.4.1"
26 |     # assert get_version(shapely) >= (1, 7, 1), "Requires shapely>=1.7.1"
27 |     # import shapely
28 |     # fmt: on
29 | 
30 | 
31 | # Use detectron2.utils.env.seed_all_rng to set the seed
32 | # to specified value.
33 | 
34 | def save_seed_info():
35 |     # Stores random seed states
36 |     return {
37 |         "torch": torch.get_rng_state(),
38 |         "numpy": np.random.get_state(),
39 |         "random": random.getstate()
40 |     }
41 | 
42 | def load_seed_info(seed_info):
43 |     # Loads seed states
44 |     torch.set_rng_state(seed_info["torch"])
45 |     np.random.set_state(seed_info["numpy"])
46 |     random.setstate(seed_info["random"])
47 | 


--------------------------------------------------------------------------------
/deepformable/utils/general_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import os
 3 | import pathlib
 4 | import cv2
 5 | from tqdm import tqdm
 6 | from os.path import isfile, join
 7 | 
 8 | def img_flexible_reader(directory, num_skip_frames=0, tqdm_on=False):
 9 |     """
10 |     Reads videos in sorted order for a give directory, 
11 |     if path is a file tries to read it.
12 |     """
13 |     directory = pathlib.Path(directory)
14 |     directory = str(directory.resolve())
15 | 
16 |     files = [directory]
17 |     if not isfile(directory):
18 |         files = [join(directory, f) for f in sorted(os.listdir(directory)) if isfile(join(directory, f))]
19 |     
20 |     total_frames = 0
21 |     for f in files:
22 |         cap = cv2.VideoCapture(f)
23 |         frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
24 |         frame_count = frame_count if frame_count >= 0 else 1
25 |         total_frames += int(frame_count)
26 | 
27 |     skip_count = 0
28 |     if tqdm_on: pbar = tqdm(total=total_frames, smoothing=0)
29 |     for f in files:
30 |         cap = cv2.VideoCapture(f)
31 |         num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
32 |         num_frames = int(num_frames if num_frames >= 0 else 1)
33 |         prev_frame, frame = None, None
34 |         for _ in range(num_frames):
35 |             prev_frame = frame if frame is not None else prev_frame
36 |             success, frame = cap.read()
37 |             if tqdm_on: pbar.update()
38 |             skip_count += 1
39 |             if success and skip_count == num_skip_frames+1:
40 |                 skip_count = 0
41 |                 yield frame
42 |     if tqdm_on: pbar.close()
43 | 
44 | def if_continue_execution(message="Continue (yes/no): "):
45 |     while True:
46 |         answer = str(input(message)).lower()
47 |         if answer in ["yes", "y", ""]:
48 |             return True
49 |         elif answer in ["no", "n"]:
50 |             return False


--------------------------------------------------------------------------------
/deepformable/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import math
  3 | from typing import List, Optional
  4 | import numpy as np
  5 | 
  6 | import kornia
  7 | import torch
  8 | import  torch.nn.functional as F
  9 | 
 10 | def sample_param(
 11 |     param_range, shape=1, strength=None,
 12 |     training=True, device=torch.device("cpu")
 13 | ):
 14 |     min_v, max_v, test_v = param_range
 15 |     if training or strength != None:
 16 |         if strength:
 17 |             rand_val = torch.ones(shape, device=device) * strength
 18 |         else:
 19 |             rand_val = torch.rand(shape, device=device)
 20 |         rand_val = min_v + (max_v-min_v) * rand_val
 21 |     else:
 22 |         rand_val = torch.ones(shape, device=device) * test_v
 23 |     return rand_val.item() if shape==1 else rand_val
 24 | 
 25 | @torch.jit.script
 26 | def get_disk_blur_kernel(
 27 |     kernel_radius: float, upscale_factor: int = 4, 
 28 |     device: torch.device=torch.device("cpu")
 29 | ):
 30 |     # TODO: Approximate implementation, fix for exact one
 31 |     r = kernel_radius * upscale_factor
 32 |     kernel_scaled_size = (math.ceil(kernel_radius)*2+1)*upscale_factor
 33 |     kernel = torch.ones(kernel_scaled_size, kernel_scaled_size, device=device)
 34 |     x, y = torch.meshgrid([
 35 |         torch.linspace(
 36 |             -(kernel_scaled_size-1.0)/2.0, (kernel_scaled_size-1.0)/2.0, kernel_scaled_size, device=device)]*2)
 37 |     kernel[x**2 + y**2 > r**2] = 0
 38 |     kernel = F.avg_pool2d(kernel.unsqueeze(0), (upscale_factor, upscale_factor))
 39 |     return (kernel / torch.sum(kernel, (1, 2)))
 40 | 
 41 | def rgb_to_hls(image: torch.Tensor) -> torch.Tensor:
 42 |     r"""Convert an RGB image to HLS
 43 |     The image data is assumed to be in the range of (0, 1).
 44 | 
 45 |     Args:
 46 |         input (torch.Tensor): RGB Image to be converted to HLS.
 47 | 
 48 | 
 49 |     Returns:
 50 |         torch.Tensor: HLS version of the image.
 51 |     """
 52 | 
 53 |     if not torch.is_tensor(image):
 54 |         raise TypeError("Input type is not a torch.Tensor. Got {}".format(
 55 |             type(image)))
 56 | 
 57 |     if len(image.shape) < 3 or image.shape[-3] != 3:
 58 |         raise ValueError("Input size must have a shape of (*, 3, H, W). Got {}"
 59 |                          .format(image.shape))
 60 | 
 61 |     r: torch.Tensor = image[..., 0, :, :]
 62 |     g: torch.Tensor = image[..., 1, :, :]
 63 |     b: torch.Tensor = image[..., 2, :, :]
 64 | 
 65 |     maxc: torch.Tensor = image.max(-3)[0]
 66 |     minc: torch.Tensor = image.min(-3)[0]
 67 | 
 68 |     imax: torch.Tensor = image.max(-3)[1]
 69 | 
 70 |     l: torch.Tensor = (maxc + minc) / 2  # luminance
 71 |     l2 = maxc + minc + 1e-8
 72 | 
 73 |     deltac: torch.Tensor = maxc - minc
 74 | 
 75 |     s: torch.Tensor = torch.where(l < 0.5, deltac / (l2), deltac /
 76 |                                   (torch.tensor(2.) - (l2)))  # saturation
 77 | 
 78 |     deltac = deltac + 1e-8
 79 | 
 80 |     hi: torch.Tensor = torch.zeros_like(deltac)
 81 | 
 82 |     hi[imax == 0] = (((g - b) / deltac) % 6)[imax == 0]
 83 |     hi[imax == 1] = (((b - r) / deltac) + 2)[imax == 1]
 84 |     hi[imax == 2] = (((r - g) / deltac) + 4)[imax == 2]
 85 | 
 86 |     h: torch.Tensor = 2. * kornia.constants.pi.to(image.device) * (60. * hi) / 360.  # hue [0, 2*pi]
 87 | 
 88 |     image_hls: torch.Tensor = torch.stack([h, l, s], dim=-3)
 89 | 
 90 |     image_hls[torch.isnan(image_hls)] = 0.
 91 | 
 92 |     return image_hls
 93 | 
 94 | def hls_to_rgb(image: torch.Tensor) -> torch.Tensor:
 95 |     r"""Convert an HLS image to RGB
 96 |     The image data is assumed to be in the range of (0, 1).
 97 | 
 98 |     Args:
 99 |         input (torch.Tensor): HLS Image to be converted to RGB.
100 | 
101 | 
102 |     Returns:
103 |         torch.Tensor: RGB version of the image.
104 |     """
105 | 
106 |     if not torch.is_tensor(image):
107 |         raise TypeError("Input type is not a torch.Tensor. Got {}".format(
108 |             type(image)))
109 | 
110 |     if len(image.shape) < 3 or image.shape[-3] != 3:
111 |         raise ValueError("Input size must have a shape of (*, 3, H, W). Got {}"
112 |                          .format(image.shape))
113 | 
114 |     h: torch.Tensor = image[..., 0, :, :] * 360 / (2 * kornia.constants.pi.to(image.device))
115 |     l: torch.Tensor = image[..., 1, :, :]
116 |     s: torch.Tensor = image[..., 2, :, :]
117 | 
118 |     kr = (0 + h / 30) % 12
119 |     kg = (8 + h / 30) % 12
120 |     kb = (4 + h / 30) % 12
121 |     a = s * torch.min(l, torch.tensor(1.) - l)
122 | 
123 |     ones_k = torch.ones_like(kr)
124 | 
125 |     fr: torch.Tensor = l - a * torch.max(torch.min(torch.min(kr - torch.tensor(3.),
126 |                                                              torch.tensor(9.) - kr), ones_k), -1 * ones_k)
127 |     fg: torch.Tensor = l - a * torch.max(torch.min(torch.min(kg - torch.tensor(3.),
128 |                                                              torch.tensor(9.) - kg), ones_k), -1 * ones_k)
129 |     fb: torch.Tensor = l - a * torch.max(torch.min(torch.min(kb - torch.tensor(3.),
130 |                                                              torch.tensor(9.) - kb), ones_k), -1 * ones_k)
131 | 
132 |     out: torch.Tensor = torch.stack([fr, fg, fb], dim=-3)
133 | 
134 |     return out


--------------------------------------------------------------------------------
/deepformable/utils/inpaint_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import numpy as np
  3 | import cv2
  4 | import multiprocessing, queue
  5 | import threading
  6 | 
  7 | class WorkerBase:
  8 |     def __init__(
  9 |         self,
 10 |         processing_workers=1,
 11 |         writing_workers=1,
 12 |         max_task_size=32,
 13 |         max_write_size=16,
 14 |         use_multiprocessing=False,
 15 |     ):
 16 |         self.use_multiprocessing = use_multiprocessing
 17 |         if use_multiprocessing:
 18 |             self.tasks = multiprocessing.JoinableQueue(maxsize=max_task_size)
 19 |             self.results = multiprocessing.JoinableQueue(maxsize=max_write_size)
 20 |             self.processing_workers = [
 21 |                 multiprocessing.Process(target=self.processing_worker) for _ in range(processing_workers)]
 22 |             self.writing_workers = [
 23 |                 multiprocessing.Process(target=self.writing_worker) for _ in range(writing_workers)]
 24 |         else:
 25 |             self.tasks = queue.Queue(maxsize=max_task_size)
 26 |             self.results = queue.Queue(maxsize=max_write_size)
 27 |             self.processing_workers = [
 28 |                 threading.Thread(target=self.processing_worker) for _ in range(processing_workers)]
 29 |             self.writing_workers = [
 30 |                 threading.Thread(target=self.writing_worker) for _ in range(writing_workers)]
 31 |         for p in [*self.processing_workers, *self.writing_workers]:
 32 |             p.start()
 33 |     
 34 |     def wait_finish(self):
 35 |         for _ in self.processing_workers:
 36 |             self.tasks.put(None)
 37 |         self.tasks.join()
 38 |         for _ in self.writing_workers:
 39 |             self.results.put(None)
 40 |         self.results.join()
 41 |         for p in [*self.processing_workers, *self.writing_workers]:
 42 |             p.join()  
 43 |     
 44 |     def processing_worker(self):
 45 |         while True:
 46 |             data = self.tasks.get()
 47 |             if data is None:
 48 |                 self.tasks.task_done()
 49 |                 break
 50 |             self.results.put(self.process_data(data))
 51 |             self.tasks.task_done()
 52 |     
 53 |     def writing_worker(self):
 54 |         while True:
 55 |             result = self.results.get()
 56 |             if result is None:
 57 |                 self.results.task_done()
 58 |                 break
 59 |             self.write_result(result)
 60 |             self.results.task_done()
 61 |     
 62 |     def process_data(self, data):
 63 |         return data
 64 | 
 65 |     def write_result(self, result):
 66 |         print(result)
 67 | 
 68 |     def __call__(self, data):
 69 |         if len(self.processing_workers) == 0:
 70 |             if len(self.writing_workers) == 0:
 71 |                 self.write_result(self.process_data(data))
 72 |             else:
 73 |                 self.results.put(self.process_data(data))
 74 |             return
 75 |         self.tasks.put(data)
 76 | 
 77 | 
 78 | class NoInpaint(WorkerBase):
 79 |     def __init__(
 80 |         self,
 81 |         processing_workers=0,
 82 |         writing_workers=8,
 83 |         max_task_size=24,
 84 |         max_write_size=24,
 85 |         use_multiprocessing=False,
 86 |     ):
 87 |         super().__init__(
 88 |             processing_workers, writing_workers, max_task_size,
 89 |             max_write_size, use_multiprocessing)
 90 | 
 91 |     def process_data(self, data):
 92 |         file_path, undistorted_frame, _, _, _ = data
 93 |         return file_path, undistorted_frame
 94 |     
 95 |     def write_result(self, result):
 96 |         cv2.imwrite(*result)
 97 | 
 98 | 
 99 | def generate_marker_mask(
100 |     undistorted_frame, 
101 |     cur_annotations, 
102 |     markers_world, 
103 |     mtx,
104 |     margin_ratio=10,
105 | ):
106 |     mask = np.zeros(undistorted_frame.shape)
107 |     for ann, markers in zip(cur_annotations, markers_world): 
108 |         markersw_margin = (markers - np.roll(markers, 2, 1))/margin_ratio + markers
109 |         markersw_margin = cv2.projectPoints(
110 |             markersw_margin.reshape(-1,3), 
111 |             np.array(ann['rvec']), np.array(ann['tvec']),
112 |                 mtx, None)[0].reshape(-1, 4, 2)
113 |         for p in markersw_margin:
114 |             cv2.fillConvexPoly(mask, np.int32(p), (1.0, 1.0, 1.0), cv2.LINE_4)
115 |     return mask
116 | 
117 | 
118 | class OpenCVInpaint(NoInpaint):
119 |     def __init__(
120 |         self,
121 |         processing_workers=8,
122 |         writing_workers=8,
123 |         max_task_size=24,
124 |         max_write_size=24,
125 |         use_multiprocessing=False,
126 |     ):
127 |         super().__init__(
128 |             processing_workers, writing_workers, max_task_size,
129 |             max_write_size, use_multiprocessing)
130 | 
131 |     def process_data(self, data):
132 |         file_path, undistorted_frame, cur_annotations, markers_world, mtx = data
133 |         mask = generate_marker_mask(undistorted_frame, cur_annotations, markers_world, mtx)
134 |         inpainted_frame = cv2.inpaint(
135 |             np.uint8(undistorted_frame), 
136 |             np.uint8(mask[...,0]*255), 
137 |             5, cv2.INPAINT_TELEA)
138 |         return file_path, inpainted_frame
139 | 
140 | if __name__ == '__main__':
141 |     test_worker = WorkerBase(
142 |         processing_workers=4, use_multiprocessing=True)
143 |     for i in range(10, 23):
144 |         # print(i)
145 |         test_worker(i)
146 |     test_worker.wait_finish()


--------------------------------------------------------------------------------
/docker/Cpu.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | ENV DEBIAN_FRONTEND noninteractive
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 | 		ca-certificates git wget vim cmake ninja-build build-essential curl \
 7 |     	libjpeg-dev libpng-dev x11-apps v4l-utils unzip \
 8 | 		rsync ffmpeg psmisc libcairo2-dev libgif-dev libpango1.0-dev \
 9 |   	&& rm -rf /var/lib/apt/lists/*
10 | 
11 | WORKDIR /opt
12 | ENV LC_ALL C.UTF-8
13 | ENV LANG C.UTF-8
14 | 
15 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
16 |      && chmod +x ~/miniconda.sh \
17 |      && ~/miniconda.sh -b -p /opt/conda \
18 |      && rm ~/miniconda.sh \
19 |      && /opt/conda/bin/conda install -y python=3.7 \
20 | 	 && /opt/conda/bin/conda install -y pytorch=1.8.2 torchvision=0.9.2 cpuonly -c pytorch-lts \
21 | 	 &&  /opt/conda/bin/conda install -y tensorboard pycairo matplotlib scikit-image jupyter ninja cython \
22 | 	 		typing future pytest scipy scikit-learn \
23 | 	 && /opt/conda/bin/conda install -y -c conda-forge plyfile \
24 | 	 && /opt/conda/bin/python -m pip install -U  pycocotools==2.0.4 \
25 | 	 		shapely==1.8.0 opencv-contrib-python==4.5.5.62 kornia==0.6.2 \
26 | 			glfw ipympl pyrr future-fstrings PyOpenGL PyOpenGL_accelerate \
27 | 	 && /opt/conda/bin/conda clean -ya
28 | 
29 | ENV PATH /opt/conda/bin:$PATH
30 | ENV PATH /root/.local/bin:$PATH
31 | ENV FVCORE_CACHE="/tmp"
32 | 
33 | RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo \
34 |      && cd detectron2_repo && git checkout "v0.6" \
35 |      && MAX_JOBS=1 python -m pip install -e .
36 | 
37 | # Installation command for original AprilTag implementation
38 | # RUN git clone --recurse-submodules https://github.com/AprilRobotics/apriltag.git \
39 | # 	&& cd apriltag && mkdir /root/.local && ln -s /opt/conda/lib /root/.local/lib \
40 | #     && mkdir build && cd build \
41 | # 	&& cmake -DPYTHON_EXECUTABLE=/opt/conda/bin/python -DPYTHON_PACKAGES_PATH=/opt/conda/lib/python3.8/site-packages/ \
42 | #      -DCMAKE_INSTALL_PREFIX=/opt/conda/ -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())")  \
43 | #      -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") ..\
44 | #     && make -j8 && make install
45 | 
46 | WORKDIR /


--------------------------------------------------------------------------------
/docker/DeepfillInpaint.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:1.15.2-gpu-py3-jupyter
 2 | 
 3 | ENV DEBIAN_FRONTEND noninteractive
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |     python-opencv wget git cmake \
 6 |     libcairo2-dev libjpeg-dev libpango1.0-dev \
 7 |     libgif-dev build-essential
 8 | 
 9 | RUN python -m pip install git+https://github.com/JiahuiYu/neuralgym \
10 |     && python -m pip uninstall -y enum34 \
11 |     && python -m pip install pyyaml opencv-python \
12 |         opencv-contrib-python tqdm Pillow pycairo shapely
13 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04
 2 | # If Cuda version is never than your host, you must modify above image with 
 3 | # the supported one and cudatoolkit=$cuda_version below for pytorch installation.
 4 | 
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | 
 7 | RUN apt-get update && apt-get install -y --no-install-recommends \
 8 | 		ca-certificates git wget vim cmake ninja-build build-essential curl \
 9 |     	libjpeg-dev libpng-dev x11-apps v4l-utils unzip \
10 | 		rsync ffmpeg psmisc libcairo2-dev libgif-dev libpango1.0-dev \
11 |   	&& rm -rf /var/lib/apt/lists/*
12 | 
13 | WORKDIR /opt
14 | ENV LC_ALL C.UTF-8
15 | ENV LANG C.UTF-8
16 | 
17 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
18 |      && chmod +x ~/miniconda.sh \
19 |      && ~/miniconda.sh -b -p /opt/conda \
20 |      && rm ~/miniconda.sh \
21 |      && /opt/conda/bin/conda install -y python=3.7 \
22 | 	 && /opt/conda/bin/conda install -y pytorch=1.8.2 torchvision=0.9.2 cudatoolkit=11.1 -c pytorch-lts -c nvidia \
23 | 	 &&  /opt/conda/bin/conda install -y tensorboard pycairo matplotlib scikit-image jupyter ninja cython \
24 | 	 		typing future pytest scipy scikit-learn \
25 | 	 && /opt/conda/bin/conda install -y -c conda-forge plyfile \
26 | 	 && /opt/conda/bin/python -m pip install -U  pycocotools==2.0.4 \
27 | 	 		shapely==1.8.0 opencv-contrib-python==4.5.5.62 kornia==0.6.2 \
28 | 			glfw ipympl pyrr future-fstrings PyOpenGL PyOpenGL_accelerate \
29 | 	 && /opt/conda/bin/conda clean -ya
30 | 
31 | ENV PATH /opt/conda/bin:$PATH
32 | ENV PATH /root/.local/bin:$PATH
33 | ENV FORCE_CUDA="1"
34 | ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing"
35 | ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
36 | ENV FVCORE_CACHE="/tmp"
37 | 
38 | RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo \
39 |      && cd detectron2_repo && git checkout "v0.6" \
40 |      && python -m pip install -e .
41 | 
42 | # Installation command for original AprilTag implementation
43 | # RUN git clone --recurse-submodules https://github.com/AprilRobotics/apriltag.git \
44 | # 	&& cd apriltag && mkdir /root/.local && ln -s /opt/conda/lib /root/.local/lib \
45 | #     && mkdir build && cd build \
46 | # 	&& cmake -DPYTHON_EXECUTABLE=/opt/conda/bin/python -DPYTHON_PACKAGES_PATH=/opt/conda/lib/python3.7/site-packages/ \
47 | #      -DCMAKE_INSTALL_PREFIX=/opt/conda/ -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())")  \
48 | #      -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") ..\
49 | #     && make -j8 && make install
50 | 
51 | WORKDIR /


--------------------------------------------------------------------------------
/docker/env_cpu.yml:
--------------------------------------------------------------------------------
 1 | name: base
 2 | channels:
 3 |   - pytorch-lts
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - pytorch=1.8.2=py3.7_cpu_0
 8 |   - torchvision=0.9.2=py37_cpu
 9 |   - cpuonly=1.0=0
10 |   - tensorboard
11 |   - scikit-image
12 |   - scikit-learn
13 |   - scipy
14 |   - pycairo
15 |   - matplotlib
16 |   - jupyter
17 |   - jupyterlab
18 |   - notebook
19 |   - ninja
20 |   - cython
21 |   - typing
22 |   - future
23 |   - pytest
24 |   - plyfile
25 |   - pip:
26 |     - opencv-contrib-python==4.5.5.62
27 |     - kornia==0.6.2
28 |     - pycocotools==2.0.4
29 |     - shapely==1.8.0
30 |     - glfw
31 |     - ipympl
32 |     - pyrr
33 |     - future-fstrings
34 |     - pyopengl
35 |     - pyopengl-accelerate
36 | 


--------------------------------------------------------------------------------
/docker/env_gpu.yml:
--------------------------------------------------------------------------------
 1 | name: base
 2 | channels:
 3 |   - pytorch-lts
 4 |   - nvidia
 5 |   - conda-forge
 6 |   - defaults
 7 | dependencies:
 8 |   - pytorch=1.8.2=py3.7_cuda11.1_cudnn8.0.5_0
 9 |   - torchvision=0.9.2=py37_cu111
10 |   - cudatoolkit=11.1.74=h6bb024c_0
11 |   - tensorboard
12 |   - scikit-image
13 |   - scikit-learn
14 |   - scipy
15 |   - pycairo
16 |   - matplotlib
17 |   - jupyter
18 |   - jupyterlab
19 |   - notebook
20 |   - ninja
21 |   - cython
22 |   - typing
23 |   - future
24 |   - pytest
25 |   - plyfile
26 |   - pip:
27 |     - opencv-contrib-python==4.5.5.62
28 |     - kornia==0.6.2
29 |     - pycocotools==2.0.4
30 |     - shapely==1.8.0
31 |     - glfw
32 |     - ipympl
33 |     - pyrr
34 |     - future-fstrings
35 |     - pyopengl
36 |     - pyopengl-accelerate


--------------------------------------------------------------------------------
/files/example_boards/charuco.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/charuco.pdf


--------------------------------------------------------------------------------
/files/example_boards/markers_regular.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/markers_regular.pdf


--------------------------------------------------------------------------------
/files/example_boards/random_board1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/random_board1.pdf


--------------------------------------------------------------------------------
/files/example_boards/random_board2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/random_board2.pdf


--------------------------------------------------------------------------------
/files/example_boards/two_markers_large.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/two_markers_large.pdf


--------------------------------------------------------------------------------
/files/example_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_input.png


--------------------------------------------------------------------------------
/files/example_video.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_video.mov


--------------------------------------------------------------------------------
/files/prediction_output.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/prediction_output.gif


--------------------------------------------------------------------------------
/files/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/teaser.jpg


--------------------------------------------------------------------------------
/inpaint/.gitignore:
--------------------------------------------------------------------------------
1 | inpaint_weights/


--------------------------------------------------------------------------------
/inpaint/deepfill.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | import numpy as np
 3 | import sys
 4 | from pathlib import Path
 5 | import tensorflow as tf
 6 | from deepfill_ops import init_inpaint_network, get_gpu_list
 7 | 
 8 | # Import classes from deepformable library
 9 | root_path = Path(__file__).parent.resolve()
10 | sys.path.insert(0, str(root_path.parent / "deepformable/utils"))
11 | from inpaint_utils import NoInpaint, generate_marker_mask
12 | 
13 | class DeepfillInpaint(NoInpaint):
14 |     def __init__(
15 |         self,
16 |         processing_workers=1,
17 |         writing_workers=8,
18 |         max_task_size=32,
19 |         max_write_size=24,
20 |     ):
21 |         config=tf.ConfigProto()
22 |         tf.reset_default_graph()
23 |         self.sess = tf.Session(config=config)
24 |         self.gpu_list = get_gpu_list()
25 |         self.inpaint_inputs, self.inpaint_outputs = init_inpaint_network(self.sess, self.gpu_list)
26 |         super().__init__(
27 |             processing_workers, writing_workers, max_task_size,
28 |             max_write_size, use_multiprocessing=False)
29 |     
30 |     def process_data(self, data):
31 |         feed_dict = {}
32 |         for i, (_, undistorted_frame, cur_annotations, markers_world, mtx) in enumerate(data):
33 |             mask = generate_marker_mask(undistorted_frame, cur_annotations, markers_world, mtx)
34 |             feed_dict[self.inpaint_inputs[i]] = (np.expand_dims(undistorted_frame, 0), np.expand_dims(mask, 0))
35 |         net_out = self.sess.run(self.inpaint_outputs[:len(feed_dict)], feed_dict=feed_dict)
36 |         return [(p[0], img[0][...,[2,1,0]]) for p, img in zip(data, net_out)]
37 |         # return [(p[0], p[1]) for p in data]
38 |     
39 |     # Modified this for multi-gpu batched input
40 |     def processing_worker(self):
41 |         worker_exit = False
42 |         while not worker_exit:
43 |             data = []
44 |             for _ in self.gpu_list:
45 |                 cur_task = self.tasks.get()
46 |                 if cur_task is None:
47 |                     worker_exit = True
48 |                     break
49 |                 else:
50 |                     data.append(cur_task)
51 |             for result in self.process_data(data):
52 |                 self.results.put(result)
53 |                 self.tasks.task_done()
54 |         self.tasks.task_done()


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | [isort]
 3 | line_length=100
 4 | multi_line_output=3
 5 | include_trailing_comma=True
 6 | known_standard_library=numpy,setuptools,mock
 7 | skip=./datasets,docs
 8 | skip_glob=*/__init__.py,**/configs/**,tests/config/**
 9 | known_myself=deepformable
10 | known_third_party=cv2,torch,torchvision,PIL,tqdm,scipy,pkg_resources,shapely,detectron2
11 | no_lines_before=STDLIB,THIRDPARTY
12 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
13 | default_section=FIRSTPARTY
14 | 
15 | [mypy]
16 | python_version=3.6
17 | ignore_missing_imports = True
18 | warn_unused_configs = True
19 | disallow_untyped_defs = True
20 | check_untyped_defs = True
21 | warn_unused_ignores = True
22 | warn_redundant_casts = True
23 | show_column_numbers = True
24 | follow_imports = silent
25 | allow_redefinition = True
26 | ; Require all functions to be annotated
27 | disallow_incomplete_defs = True


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | !/usr/bin/env python
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | Edited by Mustafa B. Yaldiz (VCLAB, KAIST)
 5 | """
 6 | import glob
 7 | import os
 8 | import shutil
 9 | from os import path
10 | from setuptools import find_packages, setup
11 | from typing import List
12 | import torch
13 | # from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
14 | 
15 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
16 | assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6"
17 | 
18 | 
19 | def get_version():
20 |     init_py_path = path.join(path.abspath(path.dirname(__file__)), "deepformable", "__init__.py")
21 |     init_py = open(init_py_path, "r").readlines()
22 |     version_line = [l.strip() for l in init_py if l.startswith("__version__")][0]
23 |     version = version_line.split("=")[-1].strip().strip("'\"")
24 | 
25 |     # The following is used to build release packages.
26 |     # Users should never use it.
27 |     suffix = os.getenv("D2_VERSION_SUFFIX", "")
28 |     version = version + suffix
29 |     if os.getenv("BUILD_NIGHTLY", "0") == "1":
30 |         from datetime import datetime
31 | 
32 |         date_str = datetime.today().strftime("%y%m%d")
33 |         version = version + ".dev" + date_str
34 | 
35 |         new_init_py = [l for l in init_py if not l.startswith("__version__")]
36 |         new_init_py.append('__version__ = "{}"\n'.format(version))
37 |         with open(init_py_path, "w") as f:
38 |             f.write("".join(new_init_py))
39 |     return version
40 | 
41 | # PROJECTS = {}
42 | 
43 | setup(
44 |     name="deepformable",
45 |     version=get_version(),
46 |     author="Mustafa B. YALDIZ",
47 |     url="https://github.com/KAIST-VCLAB/DeepFormableTag",
48 |     description="DeepformableTag is data-driven fiducial marker system.",
49 |     packages=find_packages(),
50 |     python_requires=">=3.7",
51 |     # install_requires=[
52 |     #     "detectron2>=0.4.1",
53 |     #     "shapely>=1.7.1",
54 |     # ],
55 | )


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
  1 | # DeepFormableTag Tools Instructions
  2 | 
  3 | The **tools** folder includes some of the necessary tools and 
  4 | components to interact with the marker system. 
  5 | It provides functionalities to create dataset, evaluate models, and
  6 | visualize the predictions.
  7 | Later we will provide the training code here in the near future.
  8 | 
  9 | Here is a short summary of what each file does:
 10 | - [preprocess_data.py](#preprocessing-videos-into-dataset) uses folder of videos and
 11 | preprocess it into the modified COCO format for the training.
 12 | - [generate_board_json.py](#marker-config-file) generates json files with random 
 13 | board arrangement.
 14 | - [generate_board_pdf.py](#generating-board-pdfs) generates vector graphics pdfs
 15 | of boards drawn.
 16 | - [calibrate.py](#calibration) uses video frames to calibrate cameras which is later
 17 | used to rectify the images while generating the COCO formatted dataset.
 18 | - [predictor_demo.py](../README.md/#predictor-demo) visualizes the predictions.
 19 | - [eval.py](../README.md/#evaluation-on-test-data) evaluates model given the dataset.
 20 | 
 21 | ## Preprocessing Videos into Dataset
 22 | 
 23 | In the preprocessing step, our `tools/preprocess_data.py` applies the following steps to create the dataset.
 24 | 1. Loads videos from provided directories
 25 | 2. For a frame from video, detects the markers and board position, creates annotations
 26 | 3. Inpaints the markers (optional)
 27 | 4. Saves the processed frames and combines annotations in the COCO format
 28 | 
 29 | - There are three different inpainting methods, to use the DeepFill method, which we use to create our training and testing dataset, build and run the environment:
 30 |   ```bash
 31 |   # Creates inpainting environment
 32 |   docker build -t deepfill-inpaint -f docker/DeepfillInpaint.Dockerfile .   
 33 |   # Runs the preprocessing code
 34 |   docker run --rm -it --runtime=nvidia --ipc=host -v $PWD:/host -v /home/myaldiz/Data/Deepformable:/Data deepfill-inpaint \
 35 |     /bin/sh -c 'cd /host; python tools/preprocess_dataset.py -v -i /Data/Dataset/train-raw/ -o /Data/Dataset/train --inpaint-method deepfill'	
 36 |   ```
 37 | - `preprocess_dataset.py` file has several options you might want to use:
 38 |   - `--least-pose-markers` option ignores detected board if provided number of markers are not detected for that board.
 39 |   - `--skip-frames` skips frames for processing. You can use this option to generate toy dataset.
 40 |   - `-i` specifies input folder for dataset, `-o` is the output folder. 
 41 |   - `-v` option is prints the progress to terminal.
 42 | - You can use OpenCV inpainting method as well but the inpainting quality is not as good, so we recommend deepfill. 
 43 | However, this option requires larger GPU memory (>12GB).
 44 | - You need to [download weights](https://drive.google.com/drive/folders/1y7Irxm3HSHGvp546hZdAZwuNmhLUVcjO) for deepfill into `inpaint/inpaint_weights` folder.
 45 | 
 46 | You can download training and testing videos from [this link](https://drive.google.com/drive/folders/1picphIb6Hbj6pM3Wu_Vxu53wzKBV0jdV?usp=sharing).
 47 | 
 48 | ## Preparing a Custom Dataset
 49 | 
 50 | Dataset preparation requires variety of scripts to process the video frames into the final COCO format dataset.
 51 | In summary you need to:
 52 | - Generate the `config.json` file with the boards
 53 | - Generate PDFs for the boards and capture datasets
 54 | - Calibrate cameras with charuco board and save it to config
 55 | - Capture and preprocess videos into frames.
 56 | 
 57 | ### Marker Config File
 58 | 
 59 | Marker config file is a file in the `json` format to specify board arrangements.
 60 | We provide a [template config file](../files/template_config.json).
 61 | Here are some information about they keys:
 62 | - `aruco_dict` is used to generate aruco markers for the board.
 63 | - `video_dir` is the folder to search for video files.
 64 | - `calib_video` is the video that will be used to calibrate the camera.
 65 | - `boards` provides information about the boards to be detected.
 66 |   - You can provide a name for the board to be created, paper margin sizes and
 67 |   descriptions of (ie: location, id) markers to be placed.
 68 | - `markers` provide a text for class id, binary message and name for the markers 
 69 | used in visualization demos.
 70 |   ```json
 71 |   {
 72 |       "marker_id": 0, 
 73 |       "binary": "111011011001000111101111100011011011",
 74 |       "text": "informational"
 75 |   }
 76 |   ``` 
 77 | 
 78 | `tools/generate_board_json.py` code reads the board sizes written in the config file and replaces them with random marker configurations. You need to enter the board names, type and dims. An example template config file is given at [template_config.json](../files/template_config.json).
 79 | Here is an example script:
 80 | ```bash
 81 | python tools/generate_board_json.py -i files/template_config.json -o output/config.json
 82 | ```
 83 | 
 84 | ### Generating Board PDFs
 85 | 
 86 | Here is an example script to generate pdfs of boards:
 87 | ```bash
 88 | python tools/generate_board_pdf.py -i tools/config.json -o tools/boards
 89 | ```
 90 | In the config file board descriptions, if the type of marker is `aruco` then it will produce aruco markers.
 91 | However, if the type is `marker`, then supplied model will be used to generate the markers like below:
 92 | ```bash
 93 | python tools/generate_board_pdf.py -i files/template_config.json -o output/boards \
 94 |   --marker-config-file configs/deepformable-main.yaml \
 95 |   --model-weights models/deepformable_model.pth
 96 | ```
 97 | Try to print the board pdfs without scaling. This way dimensions specified in the `location` key for each marker will match the printed size.
 98 | 
 99 | ### Calibration
100 | 
101 | Python script for calibration is located at `tools/calibrate.py`. Config file must include the relative path to calibration video or folder such as `calib_video: "../../calib/canon_28mm_5x5.MOV"`. Following script will calculate camera calibration parameters:
102 | ```bash
103 | python tools/calibrate.py -i /Data/Datasets/PlacementDataset_Nov2/train-raw/28mm/config.json
104 | ```
105 | Notes:
106 | - To save the parameters into json file use `-s` option.
107 | - If using datasets we provided, they most likely include calibration parameters, no need to run the scripts again.
108 | - Our code thresholds blurry frames. Depending on video length, it may take longer time. You can change the ratio of the selected frames using arguments.
109 | 
110 | ## Miscellaneous
111 | 
112 | ### Running a Docker Container with X11 Window Support on Mac
113 | In order to create windows from docker on Mac, you can follow below, taken from [stackoverflow](https://stackoverflow.com/questions/37826094/xt-error-cant-open-display-if-using-default-display).
114 | ```bash
115 | # Below will install required things to the host
116 | brew install socat
117 | brew install --cask xquartz
118 | # From xquartz Preferences/Security allow connections from network clients
119 | 
120 | # Create port for display
121 | socat TCP-LISTEN:6000,reuseaddr,fork UNIX-CLIENT:\"$DISPLAY\" 
122 | 
123 | # In another terminal run below to create the container
124 | docker run --rm -it --privileged --ipc=host -v $PWD:/host -e DISPLAY=docker.for.mac.host.internal:0  -v /tmp/.X11-unix:/tmp/.X11-unix deepformable /bin/sh -c 'cd /host; python -m pip install -e .; bash'
125 | ```
126 | 


--------------------------------------------------------------------------------
/tools/calibrate.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import json, argparse, random
  3 | import cv2
  4 | from cv2 import aruco
  5 | import numpy as np
  6 | from pathlib import Path
  7 | 
  8 | from deepformable.utils import (
  9 |     img_flexible_reader, get_aruco_dict, detect_aruco_markers, calculate_board_dims)
 10 | 
 11 | def detect_charuco_corners(
 12 |     img,
 13 |     aruco_dict,
 14 |     charuco_board,
 15 |     min_corners=5,
 16 | ):
 17 |     gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img
 18 |     marker_corners, ids, _ = detect_aruco_markers(gray_img, aruco_dict)
 19 |     num_corners, img_corners, corner_ids = cv2.aruco.interpolateCornersCharuco(
 20 |         marker_corners, ids, gray_img, charuco_board)
 21 |     if num_corners >= min_corners:
 22 |         row_size = charuco_board.getChessboardSize()[0]
 23 |         corner_world = np.zeros((num_corners, 3), np.float32)
 24 |         for i, index in enumerate(corner_ids):
 25 |             corner_world[i, 0] = index[0] % (row_size - 1)
 26 |             corner_world[i, 1] = index[0] // (row_size - 1)
 27 |         return num_corners, np.squeeze(img_corners), np.squeeze(corner_world)
 28 |     return 0, None, None
 29 | 
 30 | 
 31 | def calculate_sharpness(img, pts):
 32 |     gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img
 33 |     mask = np.zeros(gray_img.shape, dtype=np.uint8)
 34 |     cv2.fillConvexPoly(mask, pts, 255)
 35 |     masked_values = cv2.Laplacian(gray_img, cv2.CV_64F)[mask == 255]
 36 |     return masked_values.std() * (masked_values.shape[0] ** 0.5)
 37 | 
 38 | 
 39 | def calculate_reprojection_error(frames_info, mtx, dist):
 40 |     repr_dist = []
 41 |     for _, img_corners, corner_world in frames_info:
 42 |         retval, rvec, tvec = cv2.solvePnP(corner_world, img_corners, mtx, dist)
 43 |         projected_points, _ = cv2.projectPoints(corner_world, rvec, tvec, mtx, dist)
 44 |         dif = projected_points.squeeze() - img_corners.squeeze()
 45 |         repr_dist.append(np.linalg.norm(dif, axis=1))
 46 |     
 47 |     repr_dist = np.concatenate(repr_dist)
 48 |     return np.average(repr_dist), np.std(repr_dist)
 49 | 
 50 | 
 51 | def setup_argparse():
 52 |     parser = argparse.ArgumentParser()
 53 |     parser.add_argument(
 54 |         '-i', '--input', type=str, default='config.json', help='Input config file')
 55 |     parser.add_argument(
 56 |         '-s', '--save-params', action='store_true', help='Saves calibration params to provided json file')
 57 |     parser.add_argument(
 58 |         '--sharpness-cut-ratio', type=float, default=0.7, help='Ignores remaining portion of frames, sorted by sharpnes')
 59 |     parser.add_argument(
 60 |         '--random-cut-ratio', type=float, default=0.5, help='Ignores close frames by provided extent')
 61 |     parser.add_argument(
 62 |         '-v', '--verbose', action='store_true', help='Show steps if enabled')
 63 |     parser.add_argument(
 64 |         '-t', '--test-params', action='store_true', help='Test reprojection error with current params')
 65 |     return parser
 66 | 
 67 | 
 68 | if __name__ == '__main__':
 69 |     args = setup_argparse().parse_args()
 70 | 
 71 |     # Read the config json into python format
 72 |     data_json_path = Path(args.input)
 73 |     with open(data_json_path) as cfg_file:
 74 |         cfg = json.load(cfg_file)
 75 |     if args.verbose: print("Config loaded!")
 76 | 
 77 |     # Get necessary values for calibration
 78 |     calib_video_path = (data_json_path.parent / cfg["calib_video"]).resolve()
 79 |     aruco_dict = get_aruco_dict(cfg['aruco_dict'])
 80 |     boards_dict = {i['board_name']: i for i in cfg['boards']}
 81 |     charuco_board_info = boards_dict['charuco']['descriptions'][0]
 82 | 
 83 |     board_dims = calculate_board_dims(boards_dict['charuco'])
 84 |     charuco_scale = float(min([board_dims[i] / charuco_board_info['size'][i] for i in range(2)]) // 1)
 85 |     dims = (charuco_scale * charuco_board_info['size'][1], charuco_scale * charuco_board_info['size'][0])
 86 | 
 87 |     tag_length = aruco_dict.markerSize + 2 * cfg['border_bits']
 88 |     square_length = 2 * charuco_board_info['tag_border'] + tag_length
 89 |     charuco_board = aruco.CharucoBoard_create(*charuco_board_info['size'], square_length, tag_length, aruco_dict)
 90 |     board_size = charuco_board.getChessboardSize()
 91 | 
 92 |     if args.verbose: print("Dims parsed, detecting corners")
 93 |     frames_info = []
 94 |     for frame in img_flexible_reader(calib_video_path, tqdm_on=args.verbose):
 95 |         frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 96 |         num_corners, img_corners, corner_world = detect_charuco_corners(
 97 |             frame_gray, aruco_dict, charuco_board, 
 98 |             min_corners=(board_size[0] - 1) * (board_size[1] - 1))  # Accept only if all corners are detected
 99 |         
100 |         if num_corners == 0: continue
101 |         
102 |         pts = np.array([
103 |             img_corners[0], img_corners[board_size[0] - 2],
104 |             img_corners[-1], img_corners[-board_size[0] + 1]], dtype=np.int32)  # Provide corners of the board
105 |         sharpness_value = calculate_sharpness(frame_gray, pts)
106 |         frames_info.append((sharpness_value, img_corners, corner_world * charuco_scale))
107 | 
108 |     # Works better than previous calibration implementation 
109 |     if args.verbose: print("Thresholding frames.")
110 |     cut_index = int(len(frames_info) * args.sharpness_cut_ratio)
111 |     thresh_frames_info = sorted(frames_info, reverse=True, key=lambda x: x[0])[:cut_index]
112 |     random_select_index = int(len(thresh_frames_info) * args.random_cut_ratio)
113 |     random.shuffle(thresh_frames_info)
114 |     thresh_frames_info = thresh_frames_info[:random_select_index]
115 | 
116 |     image_points = np.array([i[1] for i in thresh_frames_info])
117 |     world_points = np.array([i[2] for i in thresh_frames_info])
118 | 
119 |     if not args.test_params:
120 |         if args.verbose: print("Calibrating...")
121 |         ret, mtx, dist, _, _ = cv2.calibrateCamera(
122 |             world_points,image_points,(frame.shape[1], frame.shape[0]), None, None)
123 |         if args.verbose:
124 |             avg, std = calculate_reprojection_error(thresh_frames_info, mtx, dist)
125 |             print("Training(selected frames) Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std))
126 |             avg, std = calculate_reprojection_error(frames_info, mtx, dist)
127 |             print("Testing    (all frames)   Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std))
128 |     else:
129 |         mtx, dist = np.array(cfg["calib_mtx"]), np.array(cfg["calib_dist"])
130 |         avg, std = calculate_reprojection_error(frames_info, mtx, dist)
131 |         print("Testing    (all frames)   Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std))
132 |     
133 |     if args.save_params:
134 |         cfg["calib_mtx"] = mtx.tolist()
135 |         cfg["calib_dist"] = dist.tolist()
136 |         with open(data_json_path, 'w') as cfg_file:
137 |             json.dump(cfg, cfg_file, indent=4)
138 |         if args.verbose: print("Config saved!")


--------------------------------------------------------------------------------
/tools/generate_board_json.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  3 | This code creates random board configurations.
  4 | """
  5 | import json, argparse
  6 | from pathlib import Path
  7 | import numpy as np
  8 | import requests
  9 | 
 10 | from deepformable.utils.general_utils import if_continue_execution
 11 | from deepformable.utils import (
 12 |     if_continue_execution, get_aruco_dict,
 13 |     calculate_board_dims, marker_placer
 14 | )
 15 | 
 16 | def setup_argparse():
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument(
 19 |         '-i', '--input', type=str, default='config.json', help='Input config file')
 20 |     parser.add_argument(
 21 |         '-o', '--output', type=str, default='out.json', help='Output config file')
 22 |     parser.add_argument(
 23 |         '--random-trials', type=int, default=200, help='Number of random trials to place non-overlapping markers')
 24 |     parser.add_argument(
 25 |         '--marker-min', type=int, default=25, help="Smallest marker size in cm's")
 26 |     parser.add_argument(
 27 |         '--marker-max', type=int, default=110, help="Biggest marker size in cm's")
 28 |     parser.add_argument(
 29 |         '--safety-size', type=int, default=10, help="Marker safety distance to each other")
 30 |     parser.add_argument(
 31 |         '--generate-aruco', action='store_true', help='Marker type will be aruco instead of general markers')
 32 |     parser.add_argument(
 33 |         '--num-bits', type=int, default=36, help="Number of bits markers encode")
 34 |     parser.add_argument(
 35 |         '--num-markers', type=int, default=0, help="Number of markers to be used")
 36 |     
 37 |     return parser
 38 | 
 39 | 
 40 | if __name__ == '__main__':
 41 |     args = setup_argparse().parse_args()
 42 | 
 43 |     # Read the config json into python format
 44 |     data_json_path = Path(args.input)
 45 |     with open(data_json_path) as cfg_file:
 46 |         cfg = json.load(cfg_file)
 47 |     print("Config loaded!")
 48 | 
 49 |     aruco_dict = get_aruco_dict(cfg['aruco_dict'])
 50 |     
 51 |     num_markers, num_bits = args.num_markers, args.num_bits
 52 |     if args.generate_aruco and num_markers == 0:
 53 |         num_markers = len(aruco_dict.bytesList)
 54 |     assert num_markers > 0, "Enter positive number for the number of markers"
 55 | 
 56 |     # Standard settings
 57 |     p_reg, p_reg_rand=[0.0,0.25,0.25,0.25,0.25,0.0], [0.5,0.5,0.0]
 58 |     # p_reg, p_reg_rand = [1/6,1/6,1/6,1/6,1/6,1/6], [1/3, 1/3, 1/3])
 59 |     # p_reg, p_reg_rand=[0.1,0.2,0.2,0.2,0.2,0.1], [0.4,0.4,0.2]
 60 |     # p_reg, p_reg_rand = [0,0,0,0,1,0], [1.0,0.0,0.0]
 61 |     
 62 |     # Generate markers
 63 |     if not args.generate_aruco:
 64 |         # Generate unique binary messages
 65 |         binary_messages = np.unique(
 66 |             np.random.randint(0, 2, (num_markers, num_bits)), axis=0)
 67 |         while len(binary_messages) != num_markers:
 68 |             additional_messages = np.random.randint(
 69 |                 0, 2, (num_markers-len(binary_messages), num_bits))
 70 |             binary_messages = np.concatenate([binary_messages, additional_messages], axis=0)
 71 |             binary_messages = np.unique(binary_messages, axis=0)
 72 |         
 73 |         # Load some random english words from web as a message
 74 |         word_site = "https://www.mit.edu/~ecprice/wordlist.10000"
 75 |         response = requests.get(word_site)
 76 |         words = [
 77 |             word.decode("utf-8") for word in response.content.splitlines() 
 78 |             if len(word) > 3
 79 |         ] if response.ok else None
 80 |         text_messages = np.random.choice(words, num_markers, replace=False)
 81 | 
 82 |         # Generate information and save it to config
 83 |         markers = []
 84 |         for index in range(num_markers):
 85 |             markers.append({
 86 |                 "marker_id": index,
 87 |                 "binary": "".join([str(i) for i in binary_messages[index]]),
 88 |                 "text": text_messages[index] if text_messages is not None else ""
 89 |             })
 90 |         cfg['markers'] = markers
 91 | 
 92 |     
 93 |     # Generate boards
 94 |     class_indexes = list(range(num_markers))
 95 |     for board in cfg['boards']:
 96 |         if board['board_name'] == 'charuco':
 97 |             continue
 98 |         board_dims = calculate_board_dims(board)
 99 |         markers, marker_indexes = marker_placer(
100 |             board_dims,
101 |             random_trials=args.random_trials,
102 |             marker_min=args.marker_min, marker_max=args.marker_max,
103 |             class_array=class_indexes, safety_size=args.safety_size, 
104 |             p_reg=p_reg, p_reg_rand=p_reg_rand)
105 |         
106 |         descriptions = []
107 |         for marker, marker_id in zip(markers, marker_indexes):
108 |             description = {
109 |                 "type": "aruco" if args.generate_aruco else "marker",
110 |                 "location": marker.tolist(),
111 |                 "marker_id": int(marker_id),
112 |             }
113 |             descriptions.append(description)
114 |         
115 |         board['descriptions'] = descriptions
116 | 
117 |         if len(class_indexes) == 0:
118 |             break
119 | 
120 |     if args.output != '':
121 |         if args.input == args.output and not if_continue_execution(
122 |             "This will override input file, continue? (yes/no): "):
123 |             exit(0)
124 |         print("Saving confing!")
125 |         with open(args.output, 'w') as cfg_file:
126 |             json.dump(cfg, cfg_file, indent=4)
127 | 


--------------------------------------------------------------------------------
/tools/generate_board_pdf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | """
  3 | This code creates pdfs for given configs.
  4 | """
  5 | import os, json, argparse
  6 | from pathlib import Path
  7 | from collections import OrderedDict
  8 | import cairo
  9 | import numpy as np
 10 | from cv2 import aruco
 11 | import torch
 12 | 
 13 | from deepformable.utils import (
 14 |     get_aruco_dict, calculate_board_dims, get_cfg)
 15 | from deepformable.modeling import build_marker_generator
 16 | 
 17 | def setup_cairo(board, output_file):
 18 |     board_dims = calculate_board_dims(board)
 19 |     width, height = [i * 7.2 / 2.54 for i in board_dims]
 20 |     cairo_surface = cairo.PDFSurface(output_file, width, height)
 21 |     cairo_ctx = cairo.Context(cairo_surface)
 22 |     cairo_ctx.scale(7.2 / 2.54, 7.2 / 2.54)  # Scale to mm back
 23 |     return cairo_surface, cairo_ctx
 24 |     # print("Cairo setup is done, paper scales are: {:.2f}mm-{:2f}mm".format(paper_size[0], paper_size[1]))
 25 |     # print("Please don't select scale to fit option during printing for accurate board scales")
 26 | 
 27 | def draw_marker(cairo_ctx, marker, locations):
 28 |     marker = np.array(marker)
 29 |     if marker.shape[-1] != 3:
 30 |         marker = np.repeat(marker, 3, axis=-1).reshape(*marker.shape, 3)
 31 |     locations = np.array(locations)[:,:2]
 32 |     x_tick = (locations[1] - locations[0])/marker.shape[0]
 33 |     y_tick = (locations[2] - locations[0])/marker.shape[1]
 34 |     for i in range(marker.shape[1]):
 35 |         for j in range(marker.shape[0]):
 36 |             if (marker[j,i] == [1,1,1]).all():
 37 |                 continue
 38 |             pos = (locations[0] + i * x_tick + j * y_tick)
 39 |             cairo_ctx.move_to(*(locations[0] + i * x_tick + j * y_tick))
 40 |             cairo_ctx.line_to(*(locations[0] + (i+1) * x_tick + j * y_tick))
 41 |             cairo_ctx.line_to(*(locations[0] + (i+1) * x_tick + (j+1) * y_tick))
 42 |             cairo_ctx.line_to(*(locations[0] + (i) * x_tick + (j+1) * y_tick))
 43 |             cairo_ctx.close_path()
 44 |             cairo_ctx.set_source_rgb(*marker[j,i])
 45 |             cairo_ctx.fill_preserve()
 46 |             cairo_ctx.set_line_width (0.001)
 47 |             cairo_ctx.set_source_rgb(*marker[j,i])
 48 |             cairo_ctx.stroke()
 49 | 
 50 | def draw_rectangle(cairo_ctx, locations, color=(0.0,0.0,0.0)):
 51 |     return draw_marker(cairo_ctx, np.array([[color]]), locations)
 52 | 
 53 | def draw_cutlines(cairo_ctx, locations, margin, color=(0.0,0.0,0.0)):
 54 |     locations = np.array(locations)[:,:2]
 55 |     x_tick = (locations[1] - locations[0])
 56 |     x_tick /= np.linalg.norm(x_tick)
 57 |     y_tick = (locations[2] - locations[0])
 58 |     y_tick /= np.linalg.norm(y_tick)
 59 | 
 60 |     locations[0] -= (x_tick+y_tick)*margin
 61 |     locations[1] += margin*(x_tick-y_tick)
 62 |     locations[3] += margin*(x_tick+y_tick)
 63 |     locations[2] -= margin*(x_tick-y_tick)
 64 |     
 65 |     cairo_ctx.set_line_width(0.1)
 66 |     cairo_ctx.move_to(*locations[0])
 67 |     cairo_ctx.line_to(*locations[1])
 68 |     cairo_ctx.line_to(*locations[3])
 69 |     cairo_ctx.line_to(*locations[2])
 70 |     
 71 |     cairo_ctx.close_path()
 72 |     cairo_ctx.set_source_rgb(*color)
 73 |     cairo_ctx.stroke()
 74 | 
 75 | def draw_marker_board(cairo_ctx, board, markers):
 76 |     aruco_markers, markers = markers
 77 |     for d in board["descriptions"]:
 78 |         # Get location for the object to draw
 79 |         loc = d['location']
 80 |         if d['type'] == 'marker':
 81 |             draw_marker(cairo_ctx, markers[d['marker_id']], loc)
 82 |         elif d['type'] == 'aruco':
 83 |             draw_marker(cairo_ctx, aruco_markers[d['marker_id']], loc)
 84 |         elif d['type'] == 'rectangle':
 85 |             draw_rectangle(cairo_ctx, loc, d['color'])
 86 |         else:
 87 |             raise ValueError("Unknown type of element, possible ones: marker, aruco, rectangle")
 88 | 
 89 | def draw_cutlines_board(cairo_ctx, board):
 90 |     for d in board["descriptions"]:
 91 |         loc = d['location']
 92 |         if d['type'] == 'marker':
 93 |             draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 5))
 94 |         elif d['type'] == 'aruco':
 95 |             draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 5))
 96 |         elif d['type'] == 'rectangle':
 97 |             draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 0))
 98 |         else:
 99 |             raise ValueError("Unknown type of element, possible ones: marker, aruco, rectangle")
100 | 
101 | def setup_argparse():
102 |     parser = argparse.ArgumentParser()
103 |     parser.add_argument(
104 |         '-i', '--input', type=str, default='config.json', help='Input config file')
105 |     parser.add_argument(
106 |         '-o', '--output', type=str, default='boards', help='Output board directory')
107 |     parser.add_argument(
108 |         '-d', '--draw-cutlines', action='store_true', help='Creates cutline pdfs')
109 |     parser.add_argument(
110 |         '--marker-config-file', type=str, default='config.yaml', help='marker model config file')
111 |     parser.add_argument(
112 |         '--model-weights', type=str, default='weights.pth', help='marker model config file')
113 |     return parser
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     args = setup_argparse().parse_args()
118 | 
119 |     # Read the config json into python format
120 |     data_json_path = Path(args.input)
121 |     with open(data_json_path) as cfg_file:
122 |         cfg = json.load(cfg_file)
123 |     print("Config loaded!")
124 | 
125 |     # Load the aruco markers
126 |     aruco_dict = get_aruco_dict(cfg['aruco_dict'])
127 |     tag_length = aruco_dict.markerSize + 2 * cfg['border_bits']
128 |     markers_aruco = []
129 |     for i in range(aruco_dict.bytesList.shape[0]):
130 |         markers_aruco.append(aruco_dict.drawMarker(i, tag_length, borderBits=cfg['border_bits']))
131 |     
132 |     # Load the model markers
133 |     markers_model = None
134 |     config_path = Path(args.marker_config_file)
135 |     model_weights_path = Path(args.model_weights)
136 |     if 'markers' in cfg and config_path.exists() and model_weights_path.exists():
137 |         # Load config
138 |         model_cfg = get_cfg()
139 |         model_cfg.merge_from_file(config_path)
140 | 
141 |         # Change default device if GPU is not available
142 |         if not torch.cuda.is_available():
143 |             model_cfg.MODEL.DEVICE = "cpu"
144 | 
145 |         # Get markers into tensor
146 |         markers = sorted(cfg['markers'], key=lambda x: x['marker_id'])
147 |         binary_messages = torch.tensor([[float(i) for i in m['binary']] for m in markers])
148 |         # Construct generator and load weights
149 |         model_cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(binary_messages)
150 |         marker_generator = build_marker_generator(model_cfg)
151 |         weights = torch.load(model_weights_path, map_location=model_cfg.MODEL.DEVICE)
152 |         state_dict = marker_generator.state_dict()
153 |         converted_weights = OrderedDict()
154 |         for key, value in weights['model'].items():
155 |             if 'marker_generator' in key:
156 |                 items = key.split('.')
157 |                 param_name = ".".join(items[items.index('marker_generator')+1:])
158 |                 if param_name in state_dict:
159 |                     converted_weights[param_name] = value
160 |                 else:
161 |                     print("- Ignoring:", param_name)
162 |         marker_generator.load_state_dict(converted_weights)
163 |         marker_generator.messages = binary_messages.to(marker_generator.device)
164 |         print("Model loaded!")
165 |         markers_model = marker_generator.get_markers_numpy([i for i in range(len(binary_messages))])
166 |     else:
167 |         print("WARNING: Could not load the model!")
168 | 
169 |     markers = np.array(markers_aruco), markers_model
170 | 
171 |     os.makedirs(args.output, exist_ok=True)
172 |     output_path = Path(args.output)
173 | 
174 |     charuco_board = None
175 |     for board in cfg['boards']:
176 |         if board['board_name'] == "charuco": 
177 |             charuco_board = board
178 |             continue
179 | 
180 |         output_file = str(output_path / f"{board['board_name']}.pdf")
181 |         cairo_surface, cairo_ctx = setup_cairo(board, output_file)
182 |         draw_marker_board(cairo_ctx, board, markers)
183 |         
184 |         cairo_surface.flush()
185 |         cairo_surface.finish()
186 | 
187 |         # Also create cutlines if requested
188 |         if args.draw_cutlines:
189 |             output_file = str(output_path / f"{board['board_name']}_cutline.pdf")
190 |             cairo_surface, cairo_ctx = setup_cairo(board, output_file)
191 |             draw_cutlines_board(cairo_ctx, board)
192 |             cairo_surface.flush()
193 |             cairo_surface.finish()
194 | 
195 |     # Draw charuco board if exists
196 |     if charuco_board:
197 |         # Calculate paper dimensions for the charuco
198 |         charuco_board_info = charuco_board['descriptions'][0]
199 |         dims = charuco_board_info['dims']
200 |         if dims == "max":
201 |             board_dims = calculate_board_dims(charuco_board)
202 |             charuco_scale = float(min([board_dims[i] / charuco_board_info['size'][i] for i in range(2)]) // 1)
203 |             dims = (charuco_scale * charuco_board_info['size'][1], charuco_scale * charuco_board_info['size'][0])
204 |         elif dims == 2:
205 |             charuco_scale = dims[0] / charuco_board_info['size'][1]
206 |         else:
207 |             print("Please provide 2 dimensional size for the charuco dimensions")
208 |             raise
209 |         print("Charuco unit size(length of two neighbouring corners) is calculated as {}mm".format(charuco_scale))
210 | 
211 |         cairo_surface, cairo_ctx = setup_cairo(
212 |             charuco_board, str(output_path / "charuco.pdf"))
213 | 
214 |         # Create the board
215 |         square_length = 2 * charuco_board_info['tag_border'] + tag_length
216 |         charuco_board = aruco.CharucoBoard_create(*charuco_board_info['size'], square_length, tag_length, aruco_dict)
217 |         
218 |         # Draw the board
219 |         draw_size = tuple([int(i * charuco_board.getSquareLength()) for i in charuco_board.getChessboardSize()])
220 |         board_svg = charuco_board.draw(draw_size)
221 |         loc = [[0,0], [dims[0], 0], [0, dims[1]], dims]
222 |         draw_marker(cairo_ctx, board_svg, loc) 
223 |         cairo_surface.flush()
224 |         cairo_surface.finish()
225 | 


--------------------------------------------------------------------------------
/tools/predictor_demo.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import argparse
  3 | import glob
  4 | from pathlib import Path
  5 | import multiprocessing as mp
  6 | import os
  7 | import tempfile
  8 | import time
  9 | import warnings
 10 | import tqdm
 11 | import json
 12 | import cv2
 13 | import numpy as np
 14 | import torch
 15 | 
 16 | from detectron2.data.detection_utils import read_image
 17 | from detectron2.utils.logger import setup_logger
 18 | 
 19 | import deepformable
 20 | import deepformable.modeling
 21 | from deepformable.utils import get_cfg, VisualizationDemo, marker_metadata_loader
 22 | from detectron2.data import MetadataCatalog
 23 | 
 24 | # constants
 25 | WINDOW_NAME = "COCO detections"
 26 | 
 27 | setup_logger(name="fvcore")
 28 | logger = setup_logger()
 29 | 
 30 | def setup_environment(args):
 31 |     # Setup Config
 32 |     # load config from file and command-line arguments
 33 |     cfg = get_cfg()
 34 |     # To use demo for Panoptic-DeepLab, please uncomment the following two lines.
 35 |     # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config  # noqa
 36 |     # add_panoptic_deeplab_config(cfg)
 37 |     cfg.merge_from_file(args.config_file)
 38 |     cfg.merge_from_list(args.opts)
 39 |     # Set score_threshold for builtin models
 40 |     if args.confidence_threshold > 0.0:
 41 |         cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
 42 |         cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
 43 |         cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
 44 |     if not torch.cuda.is_available():
 45 |         cfg.MODEL.DEVICE = "cpu"
 46 |         logger.info("CUDA not found, Will run the inference on CPU")
 47 | 
 48 |     # Handle metadata
 49 |     if marker_metadata_loader(cfg, args.marker_config_file):
 50 |         logger.info("Config loaded!")
 51 |     else:
 52 |         logger.info("Config loading failed!")
 53 | 
 54 |     cfg.freeze()
 55 |     return cfg
 56 | 
 57 | 
 58 | def get_parser():
 59 |     parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
 60 |     parser.add_argument(
 61 |         "--config-file",
 62 |         default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
 63 |         metavar="FILE",
 64 |         help="path to config file",
 65 |     )
 66 |     parser.add_argument(
 67 |         "--marker-config-file",
 68 |         default="tools/config.json",
 69 |         metavar="FILE",
 70 |         help="path to marker config file for the metadata",
 71 |     )
 72 | 
 73 |     parser.add_argument("--parallel", action="store_true", help="Runs prediction in parallel.")
 74 |     parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
 75 |     parser.add_argument("--video-input", help="Path to video file.")
 76 |     parser.add_argument(
 77 |         "--input",
 78 |         nargs="+",
 79 |         help="A list of space separated input images; "
 80 |         "or a single glob pattern such as 'directory/*.jpg'",
 81 |     )
 82 |     parser.add_argument(
 83 |         "--output",
 84 |         help="A file or directory to save output visualizations. "
 85 |         "If not given, will show output in an OpenCV window.",
 86 |     )
 87 |     parser.add_argument(
 88 |         "--confidence-threshold",
 89 |         type=float,
 90 |         default=-1.0,
 91 |         help="Minimum score for instance predictions to be shown",
 92 |     )
 93 |     parser.add_argument(
 94 |         "--opts",
 95 |         help="Modify config options using the command-line 'KEY VALUE' pairs",
 96 |         default=[],
 97 |         nargs=argparse.REMAINDER,
 98 |     )
 99 |     return parser
100 | 
101 | 
102 | def test_opencv_video_format(codec, file_ext):
103 |     with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
104 |         filename = os.path.join(dir, "test_file" + file_ext)
105 |         writer = cv2.VideoWriter(
106 |             filename=filename,
107 |             fourcc=cv2.VideoWriter_fourcc(*codec),
108 |             fps=float(30),
109 |             frameSize=(10, 10),
110 |             isColor=True,
111 |         )
112 |         [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
113 |         writer.release()
114 |         if os.path.isfile(filename):
115 |             return True
116 |         return False
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     mp.set_start_method("spawn", force=True)
121 |     args = get_parser().parse_args()
122 |     logger.info("Arguments: " + str(args))
123 | 
124 |     cfg = setup_environment(args)
125 | 
126 |     demo = VisualizationDemo(cfg, parallel=args.parallel)
127 | 
128 |     if args.input:
129 |         if len(args.input) == 1:
130 |             args.input = glob.glob(os.path.expanduser(args.input[0]))
131 |             assert args.input, "The input path(s) was not found"
132 |         for path in tqdm.tqdm(args.input, disable=not args.output):
133 |             # use PIL, to be consistent with evaluation
134 |             img = read_image(path, format="BGR")
135 |             start_time = time.time()
136 |             predictions, visualized_output = demo.run_on_image(img)
137 |             logger.info(
138 |                 "{}: {} in {:.2f}s".format(
139 |                     path,
140 |                     "detected {} instances".format(len(predictions["instances"]))
141 |                     if "instances" in predictions
142 |                     else "finished",
143 |                     time.time() - start_time,
144 |                 )
145 |             )
146 | 
147 |             if args.output:
148 |                 if os.path.isdir(args.output):
149 |                     assert os.path.isdir(args.output), args.output
150 |                     out_filename = os.path.join(args.output, os.path.basename(path))
151 |                 else:
152 |                     assert len(args.input) == 1, "Please specify a directory with args.output"
153 |                     out_filename = args.output
154 |                 visualized_output.save(out_filename)
155 |             else:
156 |                 cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
157 |                 cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
158 |                 if cv2.waitKey(0) == 27:
159 |                     break  # esc to quit
160 |     elif args.webcam:
161 |         assert args.input is None, "Cannot have both --input and --webcam!"
162 |         assert args.output is None, "output not yet supported with --webcam!"
163 |         cam = cv2.VideoCapture(0)
164 |         for vis in tqdm.tqdm(demo.run_on_video(cam)):
165 |             cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
166 |             cv2.imshow(WINDOW_NAME, vis)
167 |             if cv2.waitKey(1) == 27:
168 |                 break  # esc to quit
169 |         cam.release()
170 |         cv2.destroyAllWindows()
171 |     elif args.video_input:
172 |         video = cv2.VideoCapture(args.video_input)
173 |         width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
174 |         height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
175 |         frames_per_second = video.get(cv2.CAP_PROP_FPS)
176 |         num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
177 |         basename = os.path.basename(args.video_input)
178 |         codec, file_ext = (
179 |             ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
180 |         )
181 |         if codec == ".mp4v":
182 |             warnings.warn("x264 codec not available, switching to mp4v")
183 |         if args.output:
184 |             if os.path.isdir(args.output):
185 |                 output_fname = os.path.join(args.output, basename)
186 |                 output_fname = os.path.splitext(output_fname)[0] + file_ext
187 |             else:
188 |                 output_fname = args.output
189 |             assert not os.path.isfile(output_fname), output_fname
190 |             output_file = cv2.VideoWriter(
191 |                 filename=output_fname,
192 |                 # some installation of opencv may not support x264 (due to its license),
193 |                 # you can try other format (e.g. MPEG)
194 |                 fourcc=cv2.VideoWriter_fourcc(*codec),
195 |                 fps=float(frames_per_second),
196 |                 frameSize=(width, height),
197 |                 isColor=True,
198 |             )
199 |         assert os.path.isfile(args.video_input)
200 |         for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
201 |             if args.output:
202 |                 output_file.write(vis_frame)
203 |             else:
204 |                 cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
205 |                 cv2.imshow(basename, vis_frame)
206 |                 if cv2.waitKey(1) == 27:
207 |                     break  # esc to quit
208 |         video.release()
209 |         if args.output:
210 |             output_file.release()
211 |         else:
212 |             cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
 2 | from pathlib import Path
 3 | import json
 4 | 
 5 | import torch
 6 | import detectron2
 7 | import detectron2.utils.comm as comm
 8 | from detectron2.engine import default_argument_parser, default_setup, launch
 9 | 
10 | import deepformable
11 | from deepformable.engine import DeepformableTrainer
12 | from deepformable.utils import get_cfg, marker_metadata_loader
13 | from deepformable.data import register_deepformable_dataset
14 | 
15 | 
16 | def setup(args):
17 |     cfg = get_cfg()
18 |     cfg.merge_from_file(args.config_file)
19 |     cfg.merge_from_list(args.opts)
20 |     
21 |     # Register datasets
22 |     dataset_dir = Path(args.dataset_train_dir)
23 |     register_deepformable_dataset(
24 |         cfg.DATASETS.TRAIN[0], {},
25 |         str(dataset_dir / "annotations.json"),
26 |         str(dataset_dir),
27 |         load_markers=False)
28 | 
29 |     for test_dataset in cfg.DATASETS.TEST:
30 |         # Check if need to load markers
31 |         load_markers = True if "load_markers" in test_dataset.lower() else False
32 |         # Select proper dataset path
33 |         dataset_suffix = test_dataset.split("-")[-1].lower()
34 |         dataset_dir = args.dataset_test1_dir
35 |         if dataset_suffix == "test2":
36 |             dataset_dir = args.dataset_test2_dir
37 |         elif dataset_suffix == "test3":
38 |             dataset_dir = args.dataset_test3_dir
39 |         # Reguster dataset
40 |         dataset_dir = Path(dataset_dir)
41 |         register_deepformable_dataset(
42 |             test_dataset, {},
43 |             str(dataset_dir / "annotations.json"),
44 |             str(dataset_dir),
45 |             load_markers=load_markers     # if this option is false, mapper should create marker locations
46 |         )                         # based on board location information. For more info check the mapper
47 |     # Load metadata
48 |     if not marker_metadata_loader(cfg, args.marker_config_file):
49 |         print("Failed to load marker metadata")
50 | 
51 |     default_setup(cfg, args)
52 |     return cfg
53 | 
54 | 
55 | def main(args):
56 |     cfg = setup(args)
57 |     trainer = DeepformableTrainer(cfg, verbose=True)
58 |     trainer.resume_or_load(resume=args.resume)
59 |     
60 |     if args.eval_only:
61 |         res = trainer.test(cfg)
62 |         if comm.is_main_process():
63 |             result_path = Path(cfg.OUTPUT_DIR) / "results.json"
64 |             with open(result_path, 'w') as result_file:
65 |                 json.dump(res, result_file, indent=4)
66 |         return res
67 |     return trainer.train()
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     # Use --eval-only to skip training and only run evaluation
72 |     arg_parser = default_argument_parser()
73 |     arg_parser.add_argument(
74 |         '--dataset-train-dir', type=str, default='/Data/Datasets/train', help='Provide train dataset path')
75 |     arg_parser.add_argument(
76 |         '--dataset-test1-dir', type=str, default='/Data/Datasets/test-inpainted', help='Provide test1 dataset path')
77 |     arg_parser.add_argument(
78 |         '--dataset-test2-dir', type=str, default='/Data/Datasets/test-realworld/flat', help='Provide test2 dataset path')
79 |     arg_parser.add_argument(
80 |         '--dataset-test3-dir', type=str, default='/Data/Datasets/test-realworld/deformation', help='Provide test3 dataset path')
81 |     arg_parser.add_argument(
82 |         "--marker-config-file", default='/Data/Datasets/marker_config.json', metavar="FILE",
83 |         help="path to marker config file for the metadata")
84 |     args = arg_parser.parse_args()
85 | 
86 |     launch(
87 |         main,
88 |         args.num_gpus,
89 |         num_machines=args.num_machines,
90 |         machine_rank=args.machine_rank,
91 |         dist_url=args.dist_url,
92 |         args=(args,),
93 |     )
94 | 


--------------------------------------------------------------------------------
/tools/training_visualizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved.
  2 | import cv2
  3 | from cv2 import aruco
  4 | import random
  5 | import matplotlib.pyplot as plt
  6 | import json
  7 | import numpy as np
  8 | import argparse
  9 | import math
 10 | from pathlib import Path
 11 | from typing import Union
 12 | from copy import deepcopy
 13 | import datetime
 14 | import os
 15 | import random
 16 | from tqdm.notebook import tqdm
 17 | 
 18 | import torch
 19 | import torch.nn.functional as F
 20 | 
 21 | import detectron2
 22 | from detectron2.data import MetadataCatalog, DatasetCatalog
 23 | from detectron2.utils.events import EventStorage
 24 | from detectron2.config import configurable
 25 | 
 26 | from deepformable.modeling import MarkerRendererDiffrast
 27 | from deepformable.modeling import MarkerRenderer
 28 | 
 29 | import deepformable
 30 | from deepformable.utils import DeepformableVisualizer
 31 | from deepformable.data import (
 32 |     register_deepformable_dataset, DeepformableMapper,
 33 |     DetectronMapperWAnn
 34 | )
 35 | from deepformable.utils import (
 36 |     get_cfg, convert_mapped_instances,
 37 |     DeepformableVisualizer,
 38 |     marker_metadata_loader
 39 | )
 40 | from deepformable.modeling import MarkerRendererDiffrast, IntermediateAugmentor
 41 | from deepformable.engine import DeepformableTrainer
 42 | 
 43 | 
 44 | def make_config(
 45 |     config_path="/host/configs/deepformable-main.yaml",
 46 |     weights="/Data/Models/deepformable_model.pth",
 47 | ):
 48 |     # Setup Config
 49 |     cfg = get_cfg()
 50 |     cfg.OUTPUT_DIR = "/root"
 51 |     cfg.merge_from_file(config_path)
 52 |     cfg.MODEL.WEIGHTS = weights
 53 |     cfg.SOLVER.IMS_PER_BATCH = 1
 54 |     cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 720, 736, 768, 800, 960)
 55 | 
 56 |     # cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST = [
 57 |     #     "PerspectiveAugmentor", "TpsAugmentor", "RadialDistortionAugmentor",
 58 |     #     "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 
 59 |     #     "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", 
 60 |     #     "GammaCorrector", "JPEGAugmentor"]
 61 |     # cfg.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [0.4, 0.5, 0.3, 0.4, 0.4, 0.4, 0.4, 0.45, 0.3, 1.0, 0.35]
 62 |     # cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST = ["HueShiftAugmentor", "GammaCorrector"]
 63 |     # cfg.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [1.0] * len(cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST)
 64 | 
 65 |     # cfg.RENDERER.SHADING_METHOD = "diffuse"
 66 |     return cfg
 67 | 
 68 | 
 69 | def register_datasets(cfg, data_root=Path("/Data/Datasets")):
 70 |     # Register datasets
 71 |     cur_data_root = data_root / "train-reduced"
 72 |     # cur_data_root = data_root / "test-inpainted"
 73 |     register_deepformable_dataset(
 74 |         "deepformable-rendered-train", {},
 75 |         str(cur_data_root / "annotations.json"),
 76 |         str(cur_data_root),
 77 |         load_markers=False)
 78 | 
 79 |     # Test1: rendered test
 80 |     cur_data_root = data_root / "test-inpainted"
 81 |     register_deepformable_dataset(
 82 |         "deepformable-rendered-test1", {},
 83 |         str(cur_data_root / "annotations.json"),
 84 |         str(cur_data_root),
 85 |         load_markers=False)
 86 |     register_deepformable_dataset(
 87 |         "deepformable-rendered-aug-test1", {},
 88 |         str(cur_data_root / "annotations.json"),
 89 |         str(cur_data_root),
 90 |         load_markers=False)
 91 | 
 92 |     # Test2: real-flat test
 93 |     cur_data_root = data_root / "test-realworld/flat"
 94 |     register_deepformable_dataset(
 95 |         "deepformable_flat-real-load_markers-test2", {},
 96 |         str(cur_data_root / "annotations.json"),
 97 |         str(cur_data_root),
 98 |         load_markers=True)
 99 |     register_deepformable_dataset(
100 |         "deepformable_flat-real-load_markers-aug-test2", {},
101 |         str(cur_data_root / "annotations.json"),
102 |         str(cur_data_root),
103 |         load_markers=True)
104 | 
105 |     # Test2: real-deformation test
106 |     cur_data_root = data_root / "test-realworld/deformation"
107 |     register_deepformable_dataset(
108 |         "deepformable_deformation-real-load_markers-test3", {},
109 |         str(cur_data_root / "annotations.json"),
110 |         str(cur_data_root),
111 |         load_markers=True)
112 |     register_deepformable_dataset(
113 |         "deepformable_deformation-real-load_markers-aug-test3", {},
114 |         str(cur_data_root / "annotations.json"),
115 |         str(cur_data_root),
116 |         load_markers=True)
117 | 
118 |     # Load metadata
119 |     if not marker_metadata_loader(cfg, data_root / "test-realworld/marker_config.json"):
120 |         print("Failed to load marker metadata")
121 | 
122 | if __name__ == "__main__":
123 |     enable_aug = True
124 |     show_labels = True
125 |     config_path = "configs/aruco-learnable-mit.yaml"
126 |     
127 |     cfg = make_config(config_path, weights="")
128 |     register_datasets(cfg)
129 |     dataset_names = cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
130 | 
131 |     # Load datasets
132 |     datasets, datasets_metadata, datasets_mapper = [], [], []
133 |     for dataset_name in dataset_names:
134 |         dataset = DatasetCatalog.get(dataset_name)
135 |         is_train = "train" in dataset_name
136 |         print(f"{dataset_name} length: {len(dataset)}")
137 |         datasets.append(dataset)
138 |         datasets_metadata.append(MetadataCatalog.get(dataset_name))
139 |         mapper = DeepformableMapper(cfg, is_train) \
140 |             if "rendered" in dataset_name else DetectronMapperWAnn(cfg, is_train)
141 |         datasets_mapper.append(mapper)
142 | 
143 |     trainer = DeepformableTrainer(cfg, False)
144 |     trainer.resume_or_load(resume=False)
145 |     self = trainer.model
146 |     show_with_renderers = {}
147 |     # show_with_renderers = {cfg.RENDERER.NAME: self.renderer}
148 |     # show_with_renderers = {
149 |     #     "nvdiffrast": MarkerRendererDiffrast(cfg).to(self.device), 
150 |     #     # "homography": MarkerRenderer(cfg).to(self.device),
151 |     # }
152 | 
153 |     print("Input dataset index:")
154 |     for index, dataset_name in enumerate(dataset_names):
155 |         print(f" - {index}): {dataset_name}")
156 |     
157 |     d_idx = 0 # int(input())
158 |     dataset, metadata = datasets[d_idx], datasets_metadata[d_idx]
159 |     dataset_name, mapper = dataset_names[d_idx], datasets_mapper[d_idx]
160 |     visualizer = DeepformableVisualizer(metadata)
161 | 
162 |     while(1):
163 |         d = random.sample(dataset, 1)[0]
164 |         dataset_dict = mapper(d)
165 | 
166 |         if dataset_dict is None:
167 |             continue
168 | 
169 |         converted_dict = convert_mapped_instances(dataset_dict)
170 |         img = dataset_dict['image'].permute(1,2,0).cpu().numpy()
171 | 
172 |         # %matplotlib inline 
173 |         fig = plt.figure(figsize=(12,8))
174 |         vis_out = visualizer.draw_instance_predictions(img, converted_dict)
175 |         cv2.imshow("original", vis_out.get_image())
176 |         
177 |         with EventStorage(), torch.no_grad():
178 |             data = self.carry_to_gpu([deepcopy(dataset_dict)])
179 |             
180 |             for d in data:
181 |                 d["image"] = (d["image"] / 255.0) ** self.gamma
182 | 
183 |             markers_batch, marker_loss = self.marker_generator(
184 |                 [d["instances"] for d in data])
185 | 
186 |             for renderer_name, renderer in show_with_renderers.items():
187 |                 d, markers = data[0], markers_batch[0]
188 |                 image = (renderer(d, markers) ** (1/self.gamma)) * 255.0
189 |                 image = image.permute(1,2,0).cpu().numpy()
190 | 
191 |                 if not show_labels:
192 |                     converted_dict = {}
193 |                 vis_out = visualizer.draw_instance_predictions(image, converted_dict)
194 |                 cv2.imshow(renderer_name, vis_out.get_image())
195 | 
196 |             for d, markers in zip(data, markers_batch):
197 |                 d["image"] = self.renderer(d, markers)
198 | 
199 |             # data, marker_loss = self.render_data(data)
200 |             
201 |             # Apply Augmentations
202 |             for d in data:
203 |                 if enable_aug:
204 |                     probabilities = torch.rand(self.aug_prob.shape, device=self.device)
205 |                     indexes = (probabilities < self.aug_prob).nonzero(as_tuple=True)[0].tolist()
206 |                     selected_augmentations = [self.intermediate_augmentations[i] for i in indexes]
207 |                 
208 |                     for aug in selected_augmentations:
209 |                         d["image"], d["instances"] = aug(d["image"], d["instances"])
210 |                     d["instances"] = IntermediateAugmentor.fix_instances(d["instances"])
211 |                     
212 |                     print(selected_augmentations)
213 |                 else:
214 |                     d["image"] = d["image"] ** (1/self.gamma)
215 |                 d["image"] = d["image"] * 255.0    
216 |                 
217 |             image_np = data[0]["image"].permute(1,2,0).cpu().numpy()
218 |             instances = data[0]["instances"]
219 | 
220 |             fig = plt.figure(figsize=(12,8))
221 |             converted_dict = convert_mapped_instances(data[0])
222 |             # converted_dict = {}
223 |             if not show_labels:
224 |                 converted_dict = {}
225 |             vis_out = visualizer.draw_instance_predictions(image_np, converted_dict)
226 |             cv2.imshow("final_image", vis_out.get_image())
227 | 
228 |         k = cv2.waitKey(0)
229 |         # Esc key to stop
230 |         if k==27:    
231 |             break
232 |         else:
233 |             continue
234 |     


--------------------------------------------------------------------------------