├── .dockerignore ├── .gitignore ├── LICENSE ├── README.md ├── configs ├── VoVNet │ ├── Base-Panoptic-FPN.yaml │ ├── Base-RCNN-MobileNet-FPN.yaml │ ├── Base-RCNN-VoVNet-FPN.yaml │ ├── faster_rcnn_Mv2_FPNLite_3x.yaml │ ├── faster_rcnn_V_19_FPNLite_3x.yaml │ ├── faster_rcnn_V_19_FPN_3x.yaml │ ├── faster_rcnn_V_19_dw_FPNLite_3x.yaml │ ├── faster_rcnn_V_19_slim_FPNLite_3x.yaml │ ├── faster_rcnn_V_19_slim_dw_FPNLite_3x.yaml │ ├── faster_rcnn_V_39_FPN_3x.yaml │ ├── faster_rcnn_V_57_FPN_3x.yaml │ ├── faster_rcnn_V_99_FPN_3x.yaml │ ├── mask_rcnn_V_19_FPNLite_3x.yaml │ ├── mask_rcnn_V_19_FPN_3x.yaml │ ├── mask_rcnn_V_39_FPN_3x.yaml │ ├── mask_rcnn_V_57_FPN_3x.yaml │ ├── mask_rcnn_V_99_FPN_3x.yaml │ ├── panoptic_fpn_V_39_FPN_3x.yaml │ └── panoptic_fpn_V_57_FPN_3x.yaml ├── aruco-detector.yaml ├── deepformable-main.yaml └── detectron2 │ ├── Base-RCNN-C4.yaml │ ├── Base-RCNN-DilatedC5.yaml │ ├── Base-RCNN-FPN.yaml │ └── Base-RetinaNet.yaml ├── deepformable ├── __init__.py ├── data │ ├── __init__.py │ ├── build.py │ ├── dataset_mapper.py │ └── register_datasets.py ├── engine │ ├── __init__.py │ └── trainers.py ├── evaluation │ ├── __init__.py │ └── evaluation.py ├── layers │ ├── __init__.py │ ├── adaptive_loss.py │ └── dist_ops.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── utils.py │ │ └── vovnet.py │ ├── intermediate_augmentor │ │ ├── __init__.py │ │ ├── build.py │ │ ├── color_augmentations.py │ │ ├── jpeg_augmentor.py │ │ ├── perspective_augmentor.py │ │ ├── radial_distortion_augmentor.py │ │ └── tps_augmentor.py │ ├── marker_generator │ │ ├── __init__.py │ │ ├── april_generator.py │ │ ├── aruco_generator.py │ │ ├── build.py │ │ └── generalized_generator.py │ ├── marker_roi_heads │ │ ├── __init__.py │ │ ├── corner_head.py │ │ ├── decoder_head.py │ │ ├── marker_roi_heads.py │ │ ├── naive_transform_head.py │ │ └── transformer_head.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── classical_detector.py │ │ ├── render_rcnn.py │ │ └── utils.py │ ├── renderer.py │ └── rpn.py └── utils │ ├── __init__.py │ ├── aruco_utils.py │ ├── board_utils.py │ ├── config.py │ ├── env.py │ ├── general_utils.py │ ├── image_utils.py │ ├── inpaint_utils.py │ └── visualize_utils.py ├── docker ├── Cpu.Dockerfile ├── DeepfillInpaint.Dockerfile ├── Dockerfile ├── env_cpu.yml └── env_gpu.yml ├── files ├── example_boards │ ├── charuco.pdf │ ├── markers_regular.pdf │ ├── random_board1.pdf │ ├── random_board2.pdf │ └── two_markers_large.pdf ├── example_input.png ├── example_video.mov ├── prediction_output.gif ├── teaser.jpg └── template_config.json ├── inpaint ├── .gitignore ├── deepfill.py └── deepfill_ops.py ├── jupyter └── deepformable_tutorial.ipynb ├── setup.cfg ├── setup.py └── tools ├── README.md ├── calibrate.py ├── generate_board_json.py ├── generate_board_pdf.py ├── predictor_demo.py ├── preprocess_dataset.py ├── train.py └── training_visualizer.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /models 3 | files/boards 4 | /output -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | *.diff 4 | 5 | # compilation and distribution 6 | __pycache__ 7 | _ext 8 | *.pyc 9 | *.pyd 10 | *.so 11 | *.egg-info/ 12 | build/ 13 | dist/ 14 | wheels/ 15 | 16 | # pytorch/python/numpy formats 17 | *.pth 18 | *.pkl 19 | *.npy 20 | 21 | # ipython/jupyter notebooks 22 | **/.ipynb_checkpoints/ 23 | 24 | # Editor temporaries 25 | *.swn 26 | *.swo 27 | *.swp 28 | *~ 29 | 30 | # editor settings 31 | .idea 32 | .vscode 33 | 34 | # project dirs 35 | /models 36 | /output -------------------------------------------------------------------------------- /configs/VoVNet/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | -------------------------------------------------------------------------------- /configs/VoVNet/Base-RCNN-MobileNet-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_mobilenetv2_fpn_backbone" 5 | FREEZE_AT: 0 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | ANCHOR_GENERATOR: 11 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 12 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 13 | RPN: 14 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 15 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 16 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 17 | # Detectron1 uses 2000 proposals per-batch, 18 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 19 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 20 | POST_NMS_TOPK_TRAIN: 1000 21 | POST_NMS_TOPK_TEST: 1000 22 | ROI_HEADS: 23 | NAME: "StandardROIHeads" 24 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | ROI_MASK_HEAD: 30 | NAME: "MaskRCNNConvUpsampleHead" 31 | NUM_CONV: 4 32 | POOLER_RESOLUTION: 14 33 | DATASETS: 34 | TRAIN: ("coco_2017_train",) 35 | TEST: ("coco_2017_val",) 36 | SOLVER: 37 | IMS_PER_BATCH: 16 38 | BASE_LR: 0.02 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | INPUT: 42 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 43 | -------------------------------------------------------------------------------- /configs/VoVNet/Base-RCNN-VoVNet-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_vovnet_fpn_backbone" 5 | FREEZE_AT: 0 6 | VOVNET: 7 | OUT_FEATURES: ["stage2", "stage3", "stage4", "stage5"] 8 | FPN: 9 | IN_FEATURES: ["stage2", "stage3", "stage4", "stage5"] 10 | ANCHOR_GENERATOR: 11 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 12 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 13 | RPN: 14 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 15 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 16 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 17 | # Detectron1 uses 2000 proposals per-batch, 18 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 19 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 20 | POST_NMS_TOPK_TRAIN: 1000 21 | POST_NMS_TOPK_TEST: 1000 22 | ROI_HEADS: 23 | NAME: "StandardROIHeads" 24 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | ROI_MASK_HEAD: 30 | NAME: "MaskRCNNConvUpsampleHead" 31 | NUM_CONV: 4 32 | POOLER_RESOLUTION: 14 33 | DATASETS: 34 | TRAIN: ("coco_2017_train",) 35 | TEST: ("coco_2017_val",) 36 | SOLVER: 37 | IMS_PER_BATCH: 16 38 | BASE_LR: 0.02 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | INPUT: 42 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 43 | -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_Mv2_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-MobileNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/yduxbc13s3ip6qn/mobilenet_v2_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | FPN: 6 | OUT_CHANNELS: 128 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/FRCN-MNv2-FPNLite-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_19_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-19-eSE" 7 | FPN: 8 | OUT_CHANNELS: 128 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "checkpoints/FRCN-V2-FPNLite-19-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_19_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-19-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_19_dw_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/9awvl0mxye3nqz1/vovnet19_ese_dw_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-19-dw-eSE" 7 | FPN: 8 | OUT_CHANNELS: 128 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-dw-FPNLite-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_19_slim_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8h5ybmi4ftbcom0/vovnet19_ese_slim_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-19-slim-eSE" 7 | FPN: 8 | OUT_CHANNELS: 128 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-slim-FPNLite-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_19_slim_dw_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/f3s7ospitqoals1/vovnet19_ese_slim_dw_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-19-slim-dw-eSE" 7 | FPN: 8 | OUT_CHANNELS: 128 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "checkpoints/FRCN-V2-19-slim-dw-FPNLite-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_39_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-39-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/FRCN-V2-39-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_57_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-57-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/FRCN-V2-57-3x" -------------------------------------------------------------------------------- /configs/VoVNet/faster_rcnn_V_99_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" 4 | MASK_ON: False 5 | VOVNET: 6 | CONV_BODY : "V-99-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/FRCN-V2-99-3x" -------------------------------------------------------------------------------- /configs/VoVNet/mask_rcnn_V_19_FPNLite_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1" 4 | MASK_ON: True 5 | VOVNET: 6 | CONV_BODY : "V-19-eSE" 7 | FPN: 8 | OUT_CHANNELS: 128 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "checkpoints/MRCN-V2-19-FPNLite-3x" -------------------------------------------------------------------------------- /configs/VoVNet/mask_rcnn_V_19_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1" 4 | MASK_ON: True 5 | VOVNET: 6 | CONV_BODY : "V-19-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/MRCN-V2-19-3x" -------------------------------------------------------------------------------- /configs/VoVNet/mask_rcnn_V_39_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" 4 | MASK_ON: True 5 | VOVNET: 6 | CONV_BODY : "V-39-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/MRCN-V2-39-3x" -------------------------------------------------------------------------------- /configs/VoVNet/mask_rcnn_V_57_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" 4 | MASK_ON: True 5 | VOVNET: 6 | CONV_BODY : "V-57-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/MRCN-V2-57-3x" -------------------------------------------------------------------------------- /configs/VoVNet/mask_rcnn_V_99_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-VoVNet-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" 4 | MASK_ON: True 5 | VOVNET: 6 | CONV_BODY : "V-99-eSE" 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | OUTPUT_DIR: "checkpoints/MRCN-V2-99-3x" -------------------------------------------------------------------------------- /configs/VoVNet/panoptic_fpn_V_39_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" 4 | VOVNET: 5 | CONV_BODY : "V-39-eSE" 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "checkpoints/Panoptic-FPN-V2-39-3x" -------------------------------------------------------------------------------- /configs/VoVNet/panoptic_fpn_V_57_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" 4 | VOVNET: 5 | CONV_BODY : "V-57-eSE" 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "checkpoints/Panoptic-FPN-V2-57-3x" -------------------------------------------------------------------------------- /configs/aruco-detector.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "ClassicalDetector" 3 | MARKER_GENERATOR: 4 | NAME: "ArucoGenerator" 5 | MARKER_SIZE: (8, 8) 6 | BORDER_BITS: 1 7 | ARUCO_DICT: "6x6_100" 8 | NUM_GENERATION_BITS: 36 9 | ROI_HEADS: 10 | NUM_CLASSES: 96 11 | # NUM_CLASSES: 1000 12 | DATASETS: 13 | TRAIN: ("deepformable-train",) 14 | TEST: ("deepformable-test",) 15 | INPUT: 16 | MIN_SIZE_TEST: 0 17 | MAX_SIZE_TEST: 1920 18 | RANDOM_FLIP: "none" # Markers will also be mirror flipped in this case, so we disable it 19 | MASK_FORMAT: "polygon" 20 | VERSION: 2 -------------------------------------------------------------------------------- /configs/deepformable-main.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "VoVNet/faster_rcnn_V_19_dw_FPNLite_3x.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN_RenderInput" 4 | # WEIGHTS: "/host/models/deepformable_model.pth" 5 | WEIGHTS: "https://www.dropbox.com/s/7h6zn0owumucs48/faster_rcnn_V_19_eSE_dw_FPNLite_ms_3x.pth?dl=1" 6 | MASK_ON: True 7 | MARKER_GENERATOR: 8 | NAME: "GeneralizedGenerator" 9 | CONV_DIMS: [[8], [6], [6]] 10 | FINAL_CONV_KERNEL_SIZE: 1 11 | NUM_GENERATION_BITS: 36 12 | MARKER_SIZE: (32, 32) 13 | VOVNET: 14 | OUT_FEATURES: ["stem", "stage2", "stage3", "stage4", "stage5"] 15 | ROI_HEADS: 16 | NAME: "MarkerROIHeads" 17 | NUM_CLASSES: 96 18 | NMS_THRESH_TEST: 0.1 19 | SCORE_THRESH_TEST: 0.8 20 | PROPOSAL_GENERATOR: 21 | NAME: "RPN_AdaptiveLoss" 22 | RPN: 23 | PRE_NMS_TOPK_TEST: 5000 24 | POST_NMS_TOPK_TEST: 1000 25 | INTERMEDIATE_AUGMENTOR: 26 | AUG_LIST: [ 27 | "PerspectiveAugmentor", "TpsAugmentor", "RadialDistortionAugmentor", 28 | "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 29 | "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", 30 | "GammaCorrector", "JPEGAugmentor"] 31 | EXEC_PROBA_LIST: [0.4, 0.5, 0.3, 0.4, 0.4, 0.4, 0.4, 0.45, 0.3, 1.0, 0.35] 32 | RENDERER: 33 | # NAME: "MarkerRenderer" 34 | SHADING_METHOD: "cook-torrance" 35 | DATASETS: 36 | TRAIN: ("deepformable-rendered-train",) 37 | TEST: ("deepformable-rendered-test1", "deepformable-rendered-aug-test1",) 38 | # TEST: ( 39 | # "deepformable-rendered-test1", "deepformable-rendered-aug-test1", 40 | # "deepformable_flat-real-load_markers-test2", "deepformable_flat-real-load_markers-aug-test2", 41 | # "deepformable_deformation-real-load_markers-test3", "deepformable_deformation-real-load_markers-aug-test3",) 42 | TEST: 43 | EVAL_PERIOD: 2500 44 | DETECTIONS_PER_IMAGE: 1000 45 | SOLVER: 46 | BASE_LR: 0.02 47 | IMS_PER_BATCH: 16 48 | STEPS: (20000,25000) 49 | MAX_ITER: 30000 50 | CLIP_GRADIENTS: 51 | ENABLED: True 52 | VIS_PERIOD: 500 53 | INPUT: 54 | MIN_SIZE_TEST: 0 55 | MIN_SIZE_TRAIN: (640, 672, 704, 720, 736, 768, 800, 1080) 56 | MAX_SIZE_TRAIN: 1920 57 | MAX_SIZE_TEST: 1920 58 | RANDOM_FLIP: "none" # Markers will also be mirror flipped in this case, so we disable it 59 | MASK_FORMAT: "polygon" 60 | DATALOADER: 61 | NUM_WORKERS: 8 62 | SEED: 13 63 | OUTPUT_DIR: "/Data/Experiments/test" 64 | VERSION: 2 65 | -------------------------------------------------------------------------------- /configs/detectron2/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 19 | -------------------------------------------------------------------------------- /configs/detectron2/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /configs/detectron2/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/detectron2/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | INPUT: 24 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 25 | VERSION: 2 26 | -------------------------------------------------------------------------------- /deepformable/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Edited by Mustafa B. Yaldiz (VCLAB, KAIST) 3 | 4 | from .utils.env import setup_environment 5 | 6 | setup_environment() 7 | 8 | 9 | # This line will be programatically read/write by setup.py. 10 | # Leave them at the bottom of this file and don't touch them. 11 | __version__ = "0.1.0" -------------------------------------------------------------------------------- /deepformable/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .register_datasets import register_deepformable_dataset 3 | from .dataset_mapper import DeepformableMapper, DetectronMapperWAnn 4 | from .build import build_detection_train_loader -------------------------------------------------------------------------------- /deepformable/data/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import logging 3 | import random 4 | 5 | import torch 6 | import torch.utils.data as torchdata 7 | 8 | from detectron2.config import configurable 9 | from detectron2.data.build import _train_loader_from_config, build_batch_data_loader 10 | from detectron2.data.common import _MapIterableDataset, DatasetFromList 11 | from detectron2.data.samplers import TrainingSampler 12 | from detectron2.utils.serialize import PicklableWrapper 13 | 14 | 15 | class MapDataset(torchdata.Dataset): 16 | """ 17 | This method tries several times to map given data, if cannot picks 18 | another data. It is modified from original MapDataset located at Detectron2. 19 | """ 20 | def __init__(self, dataset, map_func, retry_count=10): 21 | self._dataset = dataset 22 | self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work 23 | self._rng = random.Random(42) 24 | self.retry_count = retry_count 25 | 26 | def __new__(cls, dataset, map_func): 27 | is_iterable = isinstance(dataset, torchdata.IterableDataset) 28 | if is_iterable: 29 | return _MapIterableDataset(dataset, map_func) 30 | else: 31 | return super().__new__(cls) 32 | 33 | def __getnewargs__(self): 34 | return self._dataset, self._map_func 35 | 36 | def __len__(self): 37 | return len(self._dataset) 38 | 39 | def __getitem__(self, idx): 40 | cur_idx = int(idx) 41 | d = self._dataset[cur_idx] 42 | 43 | for _ in range(self.retry_count): 44 | data = self._map_func(d) 45 | if data is not None: 46 | return data 47 | 48 | logger = logging.getLogger(__name__) 49 | # This id should be same as image_id 50 | warn_str = "Failed to apply `_map_func` for idx: {}".format(idx) 51 | logger.warning(warn_str) 52 | 53 | return self.__getitem__(self._rng.randint(0, self.__len__()-1)) 54 | 55 | 56 | @configurable(from_config=_train_loader_from_config) 57 | def build_detection_train_loader( 58 | dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0 59 | ): 60 | """ 61 | This method is modified to use our MapDataset implementation. 62 | """ 63 | if isinstance(dataset, list): 64 | dataset = DatasetFromList(dataset, copy=False) 65 | if mapper is not None: 66 | dataset = MapDataset(dataset, mapper) 67 | 68 | """ 69 | TODO: Can get output class id's and thing classes 70 | from mapper and change metadata to eliminate problems here. 71 | """ 72 | 73 | if isinstance(dataset, torchdata.IterableDataset): 74 | assert sampler is None, "sampler must be None if dataset is IterableDataset" 75 | else: 76 | if sampler is None: 77 | sampler = TrainingSampler(len(dataset)) 78 | assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}" 79 | return build_batch_data_loader( 80 | dataset, 81 | sampler, 82 | total_batch_size, 83 | aspect_ratio_grouping=aspect_ratio_grouping, 84 | num_workers=num_workers, 85 | ) 86 | -------------------------------------------------------------------------------- /deepformable/data/register_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is modified from detectron2 implementation to load additional labels from datasets. 3 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 4 | """ 5 | import contextlib 6 | import io 7 | import logging 8 | import numpy as np 9 | import os 10 | import pycocotools.mask as mask_util 11 | from fvcore.common.file_io import PathManager 12 | from fvcore.common.timer import Timer 13 | 14 | from detectron2.structures import Boxes, BoxMode, PolygonMasks 15 | 16 | from detectron2.data import DatasetCatalog, MetadataCatalog 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | def load_deepformable_json( 21 | json_file, image_root, dataset_name=None, extra_annotation_keys=None 22 | ): 23 | """ 24 | This method modified to support loading the marker instances. 25 | """ 26 | from pycocotools.coco import COCO 27 | 28 | timer = Timer() 29 | json_file = PathManager.get_local_path(json_file) 30 | with contextlib.redirect_stdout(io.StringIO()): 31 | coco_api = COCO(json_file) 32 | if timer.seconds() > 1: 33 | logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) 34 | 35 | category_info = None 36 | id_map = None 37 | if dataset_name is not None: 38 | meta = MetadataCatalog.get(dataset_name) 39 | cat_ids = sorted(coco_api.getCatIds()) 40 | cats = coco_api.loadCats(cat_ids) 41 | if "thing_classes" not in meta.as_dict(): 42 | # The categories in a custom json file may not be sorted. 43 | thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] 44 | meta.thing_classes = thing_classes 45 | 46 | if "marker_ids" in cats[0]: 47 | category_info = {c["id"]: c for c in cats} 48 | 49 | # In COCO, certain category ids are artificially removed, 50 | # and by convention they are always ignored. 51 | # We deal with COCO's id issue and translate 52 | # the category ids to contiguous ids in [0, 80). 53 | 54 | # It works by looking at the "categories" field in the json, therefore 55 | # if users' own json also have incontiguous ids, we'll 56 | # apply this mapping as well but print a warning. 57 | if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): 58 | if "coco" not in dataset_name: 59 | logger.warning( 60 | """ 61 | Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. 62 | """ 63 | ) 64 | if "thing_dataset_id_to_contiguous_id" not in meta.as_dict(): 65 | id_map = {v: i for i, v in enumerate(cat_ids)} 66 | meta.thing_dataset_id_to_contiguous_id = id_map 67 | else: 68 | id_map = meta.thing_dataset_id_to_contiguous_id 69 | 70 | # sort indices for reproducible results 71 | img_ids = sorted(coco_api.imgs.keys()) 72 | # imgs is a list of dicts, each looks something like: 73 | # {'license': 4, 74 | # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', 75 | # 'file_name': 'COCO_val2014_000000001268.jpg', 76 | # 'height': 427, 77 | # 'width': 640, 78 | # 'date_captured': '2013-11-17 05:57:24', 79 | # 'id': 1268} 80 | imgs = coco_api.loadImgs(img_ids) 81 | # anns is a list[list[dict]], where each dict is an annotation 82 | # record for an object. The inner list enumerates the objects in an image 83 | # and the outer list enumerates over images. Example of anns[0]: 84 | # [{'segmentation': [[192.81, 85 | # 247.09, 86 | # ... 87 | # 219.03, 88 | # 249.06]], 89 | # 'area': 1035.749, 90 | # 'iscrowd': 0, 91 | # 'image_id': 1268, 92 | # 'bbox': [192.81, 224.8, 74.73, 33.43], 93 | # 'category_id': 16, 94 | # 'id': 42986}, 95 | # ...] 96 | anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] 97 | total_num_valid_anns = sum([len(x) for x in anns]) 98 | total_num_anns = len(coco_api.anns) 99 | if total_num_valid_anns < total_num_anns: 100 | logger.warning( 101 | f"{json_file} contains {total_num_anns} annotations, but only " 102 | f"{total_num_valid_anns} of them match to images in the file." 103 | ) 104 | 105 | if "minival" not in json_file: 106 | # The popular valminusminival & minival annotations for COCO2014 contain this bug. 107 | # However the ratio of buggy annotations there is tiny and does not affect accuracy. 108 | # Therefore we explicitly white-list them. 109 | ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] 110 | assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( 111 | json_file 112 | ) 113 | 114 | imgs_anns = list(zip(imgs, anns)) 115 | logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) 116 | 117 | dataset_dicts = [] 118 | 119 | ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) 120 | 121 | # Load camera dictionary 122 | if "cameras" in coco_api.dataset: 123 | cam_dict = {i["id"]: i for i in coco_api.dataset["cameras"]} 124 | else: 125 | cam_dict = None 126 | 127 | num_instances_without_valid_segmentation = 0 128 | 129 | for (img_dict, anno_dict_list) in imgs_anns: 130 | record = {} 131 | record["file_name"] = os.path.join(image_root, img_dict["file_name"]) 132 | record["height"] = img_dict["height"] 133 | record["width"] = img_dict["width"] 134 | image_id = record["image_id"] = img_dict["id"] 135 | # Add camera to record 136 | if cam_dict: 137 | record["camera"] = cam_dict[img_dict["camera_id"]] 138 | 139 | objs = [] 140 | for anno in anno_dict_list: 141 | # Check that the image_id in this annotation is the same as 142 | # the image_id we're looking at. 143 | # This fails only when the data parsing logic or the annotation file is buggy. 144 | 145 | # The original COCO valminusminival2014 & minival2014 annotation files 146 | # actually contains bugs that, together with certain ways of using COCO API, 147 | # can trigger this assertion. 148 | assert anno["image_id"] == image_id 149 | 150 | assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' 151 | 152 | if category_info: 153 | for key, value in category_info[anno["category_id"]].items(): 154 | if key in extra_annotation_keys: 155 | anno[key] = value 156 | 157 | obj = {key: anno[key] for key in ann_keys if key in anno} 158 | if "bbox" in obj and len(obj["bbox"]) == 0: 159 | raise ValueError( 160 | f"One annotation of image {image_id} contains empty 'bbox' value! " 161 | "This json does not have valid COCO format." 162 | ) 163 | 164 | segm = anno.get("segmentation", None) 165 | if segm: # either list[list[float]] or dict(RLE) 166 | if isinstance(segm, dict): 167 | if isinstance(segm["counts"], list): 168 | # convert to compressed RLE 169 | segm = mask_util.frPyObjects(segm, *segm["size"]) 170 | else: 171 | # filter out invalid polygons (< 3 points) 172 | segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] 173 | if len(segm) == 0: 174 | num_instances_without_valid_segmentation += 1 175 | continue # ignore this instance 176 | obj["segmentation"] = segm 177 | 178 | keypts = anno.get("keypoints", None) 179 | if keypts: # list[int] 180 | for idx, v in enumerate(keypts): 181 | if idx % 3 != 2: 182 | # COCO's segmentation coordinates are floating points in [0, H or W], 183 | # but keypoint coordinates are integers in [0, H-1 or W-1] 184 | # Therefore we assume the coordinates are "pixel indices" and 185 | # add 0.5 to convert to floating point coordinates. 186 | keypts[idx] = v + 0.5 187 | obj["keypoints"] = keypts 188 | 189 | obj["bbox_mode"] = BoxMode.XYWH_ABS 190 | if id_map: 191 | annotation_category_id = obj["category_id"] 192 | try: 193 | obj["category_id"] = id_map[annotation_category_id] 194 | except KeyError as e: 195 | raise KeyError( 196 | f"Encountered category_id={annotation_category_id} " 197 | "but this id does not exist in 'categories' of the json file." 198 | ) from e 199 | objs.append(obj) 200 | record["annotations"] = objs 201 | dataset_dicts.append(record) 202 | 203 | if num_instances_without_valid_segmentation > 0: 204 | logger.warning( 205 | "Filtered out {} instances without valid segmentation. ".format( 206 | num_instances_without_valid_segmentation 207 | ) 208 | + "There might be issues in your dataset generation process. Please " 209 | "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" 210 | ) 211 | return dataset_dicts 212 | 213 | def register_deepformable_dataset( 214 | name, metadata, json_file, 215 | image_root, load_markers=True, 216 | ): 217 | assert isinstance(name, str), name 218 | assert isinstance(json_file, (str, os.PathLike)), json_file 219 | assert isinstance(image_root, (str, os.PathLike)), image_root 220 | extra_annotation_keys = [ 221 | "rvec", "tvec", "normal", "refl_dir", "view_dir", 222 | "brightness_max", "avg_color", "board_world"] 223 | if load_markers: extra_annotation_keys += ["marker_ids", "markers_world"] 224 | # 1. register a function which returns dicts 225 | DatasetCatalog.register( 226 | name, lambda: load_deepformable_json( 227 | json_file, image_root, name, extra_annotation_keys)) 228 | 229 | # 2. Optionally, add metadata about this dataset, 230 | # since they might be useful in evaluation, visualization or logging 231 | MetadataCatalog.get(name).set( 232 | json_file=json_file, image_root=image_root, evaluator_type="deepformable", **metadata 233 | ) 234 | -------------------------------------------------------------------------------- /deepformable/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .trainers import DeepformableTrainer -------------------------------------------------------------------------------- /deepformable/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .evaluation import DeepformableEvaluator 3 | -------------------------------------------------------------------------------- /deepformable/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .adaptive_loss import AdaptiveLoss 3 | from .dist_ops import MarkerGatherFunc, AllReduce -------------------------------------------------------------------------------- /deepformable/layers/adaptive_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import torch 3 | import torch.distributed as dist 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from detectron2.utils.comm import get_world_size, is_main_process 8 | from .dist_ops import AllReduce 9 | 10 | 11 | class AdaptiveLoss(nn.Module): 12 | """ 13 | This class is implemented to filter the loss values that cause exploding gradients. 14 | """ 15 | def __init__( 16 | self, 17 | n=3.0, 18 | beta=0.995, 19 | beta2=0.999, 20 | loss_type='bce', 21 | adaptive_on=True, 22 | ): 23 | super().__init__() 24 | self.register_buffer("n", torch.tensor(n, dtype=torch.float32)) 25 | self.register_buffer("beta", torch.tensor(beta, dtype=torch.float32)) 26 | self.register_buffer("beta2", torch.tensor(beta2, dtype=torch.float32)) 27 | self.register_buffer("running_mean", torch.tensor(-1, dtype=torch.float32)) 28 | self.register_buffer("running_std", torch.tensor(-1, dtype=torch.float32)) 29 | if loss_type == 'bce': 30 | self.loss = F.binary_cross_entropy_with_logits 31 | elif loss_type == 'l1': 32 | self.loss = F.l1_loss 33 | elif loss_type == 'l2' or loss_type == 'mse': 34 | self.loss = F.mse_loss 35 | else: 36 | raise "Unknown loss type!" 37 | self.adaptive_on = adaptive_on 38 | 39 | def forward(self, input, target): 40 | if not self.adaptive_on: 41 | return self.loss(input, target, reduction='sum') 42 | loss_values = self.loss(input, target, reduction='none') 43 | 44 | threshold = self.running_mean + self.n * self.running_std 45 | #loss_filtered = loss_values[loss_values > threshold].detach() 46 | loss_filtered = torch.clamp(loss_values[loss_values > threshold], 0, threshold.item()) 47 | loss_passed = loss_values[loss_values <= threshold] 48 | loss_final = torch.sum(loss_filtered) + torch.sum(loss_passed) 49 | 50 | mean, meansqr = loss_values.mean(), torch.mean(loss_values * loss_values) 51 | world_size = get_world_size() 52 | if world_size != 1: 53 | vec = torch.cat([mean.view(1), meansqr.view(1)]) 54 | mean, meansqr = (AllReduce.apply(vec) * (1.0 / world_size)).detach() 55 | std = torch.sqrt(meansqr - mean * mean) 56 | 57 | if self.running_mean > 0: 58 | mean_step = min(mean * (1.0 - self.beta), self.running_std * 0.75) 59 | self.running_mean = self.running_mean * self.beta + mean_step 60 | std_step = min(std * (1.0 - self.beta2), self.running_std * 0.25) 61 | self.running_std = self.running_std * self.beta2 + std_step 62 | else: 63 | self.running_mean = mean * 1.5 64 | self.running_std = std 65 | 66 | # if is_main_process(): 67 | # print("Mean:", self.running_mean, "Std:", self.running_std) 68 | # if len(loss_filtered) != 0: 69 | # print("Filtered:", loss_filtered, 70 | # "-- Running_val:", self.running_mean, self.running_std, 71 | # "-- Calc val:", mean, std,) 72 | # loss_final = F.binary_cross_entropy_with_logits( 73 | # input, target, reduction='sum') 74 | return loss_final 75 | -------------------------------------------------------------------------------- /deepformable/layers/dist_ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code references https://github.com/ag14774/diffdist/blob/b5c17c7354bbbe98b6e8a791ea78614861b4997a/diffdist/ 3 | It is primarily used to distribute marker generation task across GPUs. 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 5 | """ 6 | import torch 7 | import torch.distributed as dist 8 | from torch.autograd import Function 9 | 10 | from detectron2.utils.comm import get_world_size, get_rank 11 | 12 | class MarkerGatherFunc(Function): 13 | @staticmethod 14 | def forward(ctx, markers, rank, group, world_size, backend): 15 | ctx.backend, ctx.marker_size = backend, len(markers) 16 | ctx.world_size, ctx.rank, ctx.group = world_size, rank, group 17 | if world_size == 1: 18 | return markers 19 | gather_list = [torch.zeros_like(markers, device=markers.device) for _ in range(world_size)] 20 | if backend == 'nccl': 21 | gather_list = [gather_list] 22 | dist.all_gather_multigpu(gather_list, [markers], group=group) 23 | gather_list = gather_list[0] 24 | else: 25 | dist.all_gather(gather_list, markers, group=group) 26 | gather_list = [i.to(markers.device) for i in gather_list] 27 | return torch.cat(gather_list, dim=0) 28 | 29 | @staticmethod 30 | def backward(ctx, markers_grad): 31 | if ctx.world_size == 1: 32 | return markers_grad, None, None, None, None 33 | if ctx.backend == 'nccl': 34 | markers_grad = [markers_grad] 35 | dist.all_reduce_multigpu(markers_grad, group=ctx.group) 36 | markers_grad = markers_grad[0] 37 | else: 38 | dist.all_reduce(markers_grad, group=ctx.group) 39 | return markers_grad[ctx.marker_size*ctx.rank:ctx.marker_size*(ctx.rank+1)], None, None, None, None 40 | 41 | 42 | class AllReduce(Function): 43 | @staticmethod 44 | def forward(ctx, input): 45 | input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())] 46 | # Use allgather instead of allreduce since I don't trust in-place operations .. 47 | dist.all_gather(input_list, input, async_op=False) 48 | inputs = torch.stack(input_list, dim=0) 49 | return torch.sum(inputs, dim=0) 50 | 51 | @staticmethod 52 | def backward(ctx, grad_output): 53 | dist.all_reduce(grad_output, async_op=False) 54 | return grad_output -------------------------------------------------------------------------------- /deepformable/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .intermediate_augmentor import * 3 | from .marker_generator import * 4 | from .backbone import * 5 | from .marker_roi_heads import * 6 | from .meta_arch import * 7 | 8 | from .renderer import MarkerRenderer 9 | from .rpn import RPN_AdaptiveLoss -------------------------------------------------------------------------------- /deepformable/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone -------------------------------------------------------------------------------- /deepformable/modeling/backbone/utils.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This code modifies FPN implementation from detectron2 to output stem features. 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 5 | """ 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | from detectron2.modeling.backbone.fpn import FPN as FPN_detectron2 10 | 11 | class FPN(FPN_detectron2): 12 | def forward(self, x): 13 | # Reverse feature maps into top-down order (from low to high resolution) 14 | bottom_up_features = self.bottom_up(x) 15 | x = [bottom_up_features[f] for f in self.in_features[::-1]] 16 | results = [] 17 | prev_features = self.lateral_convs[0](x[0]) 18 | results.append(self.output_convs[0](prev_features)) 19 | for features, lateral_conv, output_conv in zip( 20 | x[1:], self.lateral_convs[1:], self.output_convs[1:] 21 | ): 22 | top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest") 23 | lateral_features = lateral_conv(features) 24 | prev_features = lateral_features + top_down_features 25 | if self._fuse_type == "avg": 26 | prev_features /= 2 27 | results.insert(0, output_conv(prev_features)) 28 | 29 | if self.top_block is not None: 30 | top_block_in_feature = bottom_up_features.get(self.top_block.in_feature, None) 31 | if top_block_in_feature is None: 32 | top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] 33 | results.extend(self.top_block(top_block_in_feature)) 34 | assert len(self._out_features) == len(results) 35 | out = dict(zip(self._out_features, results)) 36 | 37 | # -- MODIFICATION: Make sure out includes stem features added in output of backbone -- 38 | for key in bottom_up_features.keys(): 39 | if key not in self.in_features and key in self.bottom_up.output_shape(): 40 | out[key] = bottom_up_features[key] 41 | 42 | return out 43 | -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .build import ( 3 | INTERMEDIATE_AUGMENTOR_REGISTRY, 4 | build_intermediate_augmentations, 5 | IntermediateAugmentor 6 | ) 7 | 8 | from .color_augmentations import ( 9 | GammaAugmentor, GammaCorrector, DefocusBlurAugmentor, 10 | MotionBlurAugmentor, HueShiftAugmentor, 11 | BrightnessAugmentor, NoiseAugmentor) 12 | 13 | from .jpeg_augmentor import JPEGAugmentor 14 | 15 | from .perspective_augmentor import PerspectiveAugmentor 16 | from .radial_distortion_augmentor import RadialDistortionAugmentor 17 | from .tps_augmentor import TpsAugmentor 18 | -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 3 | TODO: 4 | - Support batch operations for the images. Currently the input is (C, H, W). 5 | """ 6 | 7 | import torch 8 | from torch import nn 9 | 10 | from detectron2.utils.registry import Registry 11 | from abc import ABCMeta, abstractmethod 12 | 13 | INTERMEDIATE_AUGMENTOR_REGISTRY = Registry("INTERMEDIATE_AUGMENTOR") 14 | INTERMEDIATE_AUGMENTOR_REGISTRY.__doc__ = """ 15 | Registry for the differentiable intermediate augmentations after rendering 16 | """ 17 | 18 | 19 | def build_intermediate_augmentations(cfg): 20 | """ 21 | Build the intermediate augmentor, defined by ``cfg.INTERMEDIATE_AUGMENTOR``. 22 | """ 23 | augmentations = [] 24 | for aug_name in cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST: 25 | aug = INTERMEDIATE_AUGMENTOR_REGISTRY.get(aug_name)(cfg) 26 | aug.to(torch.device(cfg.MODEL.DEVICE)) 27 | augmentations.append(aug) 28 | 29 | return augmentations 30 | 31 | 32 | class IntermediateAugmentor(nn.Module, metaclass=ABCMeta): 33 | """ 34 | Abstract base class for intermediate augmentors. 35 | apply_image transformations needs to be differentiable! 36 | """ 37 | def __init__( 38 | self, 39 | skip_coords=False, 40 | ): 41 | super().__init__() 42 | self.skip_coords = skip_coords 43 | 44 | @abstractmethod 45 | def apply_image(self, image): 46 | """ 47 | Apply transformation to the images 48 | """ 49 | pass 50 | 51 | def apply_coords(self, coords): 52 | """ 53 | Apply transformation to the coordinates of the labels 54 | """ 55 | return coords 56 | 57 | def generate_params(self, image, gt_instances, strength=None): 58 | """ 59 | Generates random numbers required to apply augmentations 60 | """ 61 | return 62 | 63 | def apply_instances(self, gt_instances): 64 | if self.skip_coords or gt_instances is None: 65 | return gt_instances 66 | 67 | with torch.no_grad(): 68 | if gt_instances.has("gt_sample_locs"): 69 | sample_loc_shape = gt_instances.gt_sample_locs.shape 70 | gt_instances.gt_sample_locs = self.apply_coords(gt_instances.gt_sample_locs.view(-1,2)).view(sample_loc_shape) 71 | if gt_instances.has("gt_segm"): 72 | gt_instances.gt_segm = self.apply_coords(gt_instances.gt_segm.view(-1,2)).view(-1,8,2) 73 | elif gt_instances.has("gt_masks"): 74 | device, dtype = gt_instances.gt_boxes.device, gt_instances.gt_boxes.tensor.dtype 75 | polygons = torch.as_tensor(gt_instances.gt_masks.polygons, dtype=dtype, device=device) 76 | polygons = self.apply_coords(polygons.view(-1,2)).view(-1,8) 77 | gt_instances.gt_masks.polygons = [[i.cpu().numpy()] for i in polygons] 78 | elif gt_instances.has("gt_boxes"): 79 | gt_instances.gt_boxes.tensor = self.apply_coords(gt_instances.gt_boxes.tensor.view(-1,2)).view(-1,4) 80 | 81 | return gt_instances 82 | 83 | @classmethod 84 | def fix_instances(cls, gt_instances): 85 | if gt_instances.has("gt_segm"): 86 | min_c, max_c = torch.min(gt_instances.gt_segm, dim=1)[0], torch.max(gt_instances.gt_segm, dim=1)[0] 87 | gt_instances.gt_boxes.tensor = torch.cat([min_c, max_c], dim=1) 88 | gt_instances.gt_corners = gt_instances.gt_segm[:,[0,2,4,6]] 89 | elif gt_instances.has("gt_masks"): 90 | device, dtype = gt_instances.gt_boxes.device, gt_instances.gt_boxes.tensor.dtype 91 | polygons = torch.as_tensor(gt_instances.gt_masks.polygons, dtype=dtype, device=device).view(-1,4,2) 92 | min_c, max_c = torch.min(polygons, dim=1)[0], torch.max(polygons, dim=1)[0] 93 | gt_instances.gt_boxes.tensor = torch.cat([min_c, max_c], dim=1) 94 | 95 | # # Convert segmentation to polygon masks 96 | # segm = gt_instances.gt_segm.flatten(start_dim=1) 97 | # polygons_per_instance = torch.chunk(segm, segm.shape[0]) 98 | # polygon_masks = [] 99 | # for instance in polygons_per_instance: 100 | # polygon_masks.append([instance.squeeze().cpu()]) 101 | # gt_instances._fields["gt_masks"] = PolygonMasks(polygon_masks) 102 | # gt_instances.remove("gt_segm") 103 | return gt_instances 104 | 105 | def forward(self, image, gt_instances): 106 | # image.shape is (C, H, W) 107 | self.generate_params(image, gt_instances) 108 | image = self.apply_image(image) 109 | if not self.skip_coords: 110 | gt_instances = self.apply_instances(gt_instances) 111 | return image, gt_instances 112 | -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/color_augmentations.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz 3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved. 4 | """ 5 | import torch 6 | from torch import nn 7 | import torch.nn.functional as F 8 | import kornia 9 | import numpy as np 10 | 11 | import detectron2 12 | from detectron2.config import configurable 13 | 14 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor 15 | from deepformable.utils import ( 16 | get_disk_blur_kernel, sample_param, 17 | rgb_to_hls, hls_to_rgb, 18 | ) 19 | 20 | 21 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 22 | class GammaAugmentor(IntermediateAugmentor): 23 | @configurable 24 | def __init__( 25 | self, 26 | *, 27 | gamma_range, 28 | ): 29 | super().__init__(True) 30 | self.gamma_range = gamma_range 31 | 32 | @classmethod 33 | def from_config(cls, cfg): 34 | return { 35 | "gamma_range": cfg.INTERMEDIATE_AUGMENTOR.GammaAugmentor.GAMMA_RANGE, 36 | } 37 | 38 | def apply_image(self, image): 39 | return (F.relu(image) + 1e-8) ** self.gamma 40 | 41 | def generate_params(self, image, gt_instances, strength=None): 42 | self.gamma = sample_param( 43 | self.gamma_range, strength=strength, 44 | training=self.training, device=image.device) 45 | 46 | 47 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 48 | class GammaCorrector(IntermediateAugmentor): 49 | @configurable 50 | def __init__( 51 | self, 52 | gamma, 53 | ): 54 | super().__init__(True) 55 | self.register_buffer("gamma", torch.tensor(1.0/gamma), False) 56 | 57 | @classmethod 58 | def from_config(cls, cfg): 59 | return {"gamma": cfg.RENDERER.GAMMA} 60 | 61 | def apply_image(self, image): 62 | return torch.clamp((F.relu(image) + 1e-8) ** self.gamma.to(image.device), 0, 1) 63 | 64 | 65 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 66 | class DefocusBlurAugmentor(IntermediateAugmentor): 67 | @configurable 68 | def __init__( 69 | self, 70 | *, 71 | blur_radius_range, 72 | ): 73 | super().__init__(True) 74 | self.blur_radius_range = blur_radius_range 75 | 76 | @classmethod 77 | def from_config(cls, cfg): 78 | return { 79 | "blur_radius_range": cfg.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor.BLUR_RADIUS_RANGE, 80 | } 81 | 82 | def apply_image(self, image): 83 | pad = self.kernel.size(-1)//2 84 | padded_image = F.pad(image.unsqueeze(0), 85 | pad=(pad, pad, pad, pad), 86 | mode="reflect") 87 | return F.conv2d( 88 | padded_image, 89 | self.kernel.expand(3,1,self.kernel.shape[-1], self.kernel.shape[-1]), 90 | groups=3, padding=0)[0] 91 | 92 | def generate_params(self, image, gt_instances, strength=None): 93 | device = image.device 94 | blur_radius = sample_param( 95 | self.blur_radius_range, strength=strength, 96 | training=self.training, device=device) 97 | self.kernel = get_disk_blur_kernel(blur_radius, device=device) 98 | 99 | 100 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 101 | class MotionBlurAugmentor(IntermediateAugmentor): 102 | @configurable 103 | def __init__( 104 | self, 105 | *, 106 | blur_radius_range, 107 | ): 108 | super().__init__(True) 109 | self.blur_radius_range = blur_radius_range 110 | 111 | @classmethod 112 | def from_config(cls, cfg): 113 | return { 114 | "blur_radius_range": cfg.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor.BLUR_RADIUS_RANGE, 115 | } 116 | 117 | def apply_image(self, image): 118 | return kornia.filters.motion_blur( 119 | image.unsqueeze(0), self.blur_radius, self.angle, 120 | self.direction, border_type='replicate', mode='bilinear')[0] 121 | 122 | def generate_params(self, image, gt_instances, strength=None): 123 | device = image.device 124 | blur_radius = sample_param( 125 | self.blur_radius_range, shape=(1,), strength=strength, 126 | training=self.training, device=device) 127 | self.blur_radius = (torch.round(blur_radius).int()*2+1).item() 128 | self.angle = sample_param( 129 | (0,180,30), shape=(1,), 130 | training=self.training, device=device) # Blur at 30° for testing 131 | self.direction = torch.zeros(1, device=image.device) 132 | 133 | 134 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 135 | class HueShiftAugmentor(IntermediateAugmentor): 136 | @configurable 137 | def __init__( 138 | self, 139 | *, 140 | hue_shift_range, 141 | ): 142 | super().__init__(True) 143 | self.hue_shift_range = hue_shift_range 144 | 145 | @classmethod 146 | def from_config(cls, cfg): 147 | return { 148 | "hue_shift_range": cfg.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor.HUE_SHIFT_RANGE, 149 | } 150 | 151 | def apply_image(self, image): 152 | image = torch.clamp(image, 0, 1 - 1e-6) 153 | hsv = rgb_to_hls(image) 154 | hsv[0, :, :] = torch.fmod(hsv[0, :, :] + self.hue_shift * kornia.constants.pi, 2*kornia.constants.pi) 155 | return hls_to_rgb(hsv) 156 | 157 | def generate_params(self, image, gt_instances, strength=None): 158 | device = image.device 159 | self.hue_shift = sample_param( 160 | self.hue_shift_range, strength=strength, 161 | training=self.training, device=device) 162 | 163 | 164 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 165 | class BrightnessAugmentor(IntermediateAugmentor): 166 | @configurable 167 | def __init__( 168 | self, 169 | *, 170 | brightness_range, 171 | ): 172 | super().__init__(True) 173 | self.brightness_range = brightness_range 174 | 175 | @classmethod 176 | def from_config(cls, cfg): 177 | return { 178 | "brightness_range": cfg.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor.BRIGHTNESS_RANGE, 179 | } 180 | def apply_image(self, image): 181 | return image * self.factor 182 | 183 | def generate_params(self, image, gt_instances, strength=None): 184 | device = image.device 185 | if strength == None: 186 | self.factor = sample_param( 187 | self.brightness_range, training=self.training, device=device) 188 | else: 189 | brightness_range = (self.brightness_range[0], 1.0, 0.4) 190 | self.factor = sample_param( 191 | brightness_range, strength=1.0 - strength, 192 | training=self.training, device=device) 193 | 194 | 195 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 196 | class NoiseAugmentor(IntermediateAugmentor): 197 | @configurable 198 | def __init__( 199 | self, 200 | *, 201 | noise_range, 202 | ): 203 | super().__init__(True) 204 | self.noise_range=noise_range 205 | 206 | @classmethod 207 | def from_config(cls, cfg): 208 | return { 209 | "noise_range": cfg.INTERMEDIATE_AUGMENTOR.NoiseAugmentor.NOISE_RANGE, 210 | } 211 | 212 | def apply_image(self, image): 213 | return image + self.sigma * torch.randn(image.shape, device=image.device) 214 | 215 | def generate_params(self, image, gt_instances, strength=None): 216 | self.sigma = sample_param( 217 | self.noise_range, strength=strength, 218 | training=self.training, device=image.device) 219 | -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/jpeg_augmentor.py: -------------------------------------------------------------------------------- 1 | """ 2 | JPEG compression augmentation, see https://github.com/ando-khachatryan/HiDDeN 3 | Modified by Andreas Meulueman and Mustafa B. Yaldiz. 4 | Copyright (c) 2018 ando-khachatryan 5 | """ 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | import kornia 10 | import numpy as np 11 | 12 | import detectron2 13 | from detectron2.config import configurable 14 | 15 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor 16 | from deepformable.utils import sample_param 17 | 18 | 19 | def gen_filters(size_x: int, size_y: int, dct_or_idct_fun: callable) -> np.ndarray: 20 | tile_size_x = 8 21 | filters = np.zeros((size_x * size_y, size_x, size_y)) 22 | for k_y in range(size_y): 23 | for k_x in range(size_x): 24 | for n_y in range(size_y): 25 | for n_x in range(size_x): 26 | filters[k_y * tile_size_x + k_x, n_y, n_x] = dct_or_idct_fun(n_y, k_y, size_y) * dct_or_idct_fun(n_x, 27 | k_x, 28 | size_x) 29 | return filters 30 | 31 | def create_jpeg_masks(min_keep:int=1, max_keep:int=64): 32 | index_order = np.array( 33 | sorted(((x, y) for x in range(8) for y in range(8)), 34 | key=lambda p: (p[0] + p[1], -p[1] if (p[0] + p[1]) % 2 else p[1])) 35 | ) 36 | masks = [] 37 | for keep_count in range(min_keep, max_keep): 38 | mask = np.zeros((8, 8)) 39 | mask[index_order[:keep_count,0], index_order[:keep_count,1]] = 1 40 | masks.append(mask) 41 | return np.stack(masks,axis=0) 42 | 43 | def dct_coeff(n, k, N): 44 | return np.cos(np.pi / N * (n + 1. / 2.) * k) 45 | 46 | def idct_coeff(n, k, N): 47 | return (int(0 == n) * (- 1 / 2) + np.cos( 48 | np.pi / N * (k + 1. / 2.) * n)) * np.sqrt(1 / (2. * N)) 49 | 50 | 51 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 52 | class JPEGAugmentor(IntermediateAugmentor): 53 | @configurable 54 | def __init__( 55 | self, 56 | *, 57 | y_range, 58 | uv_range, 59 | max_image_size, 60 | ): 61 | super().__init__(True) 62 | self.y_range = y_range 63 | self.uv_range = uv_range 64 | self.register_buffer("dct_conv_weights", 65 | torch.tensor(gen_filters(8, 8, dct_coeff), dtype=torch.float32).unsqueeze(1), False) 66 | self.register_buffer("idct_conv_weights", 67 | torch.tensor(gen_filters(8, 8, idct_coeff), dtype=torch.float32).view(64,64,1,1), False) 68 | self.register_buffer("jpeg_masks", 69 | torch.tensor(create_jpeg_masks(), dtype=torch.float32), False) 70 | 71 | @property 72 | def device(self): 73 | return self.dct_conv_weights.device 74 | 75 | @classmethod 76 | def from_config(cls, cfg): 77 | return { 78 | "y_range": cfg.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.Y_QUALITY_RANGE, 79 | "uv_range": cfg.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.UV_QUALITY_RANGE, 80 | "max_image_size": cfg.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE, 81 | } 82 | 83 | def apply_image(self, image): 84 | image = image.unsqueeze(0) 85 | N, C, H, W = image.shape 86 | 87 | mask = self.jpeg_masks[None, self.mask_keep_weights].view(1,C,8,8,1,1) 88 | 89 | # Convert to YUV 90 | if C == 3: 91 | image = kornia.color.rgb_to_yuv(image) 92 | # Pad image 93 | image_padded = F.pad(image, (0, (8 - W) % 8, 0, (8 - H) % 8), 'replicate') 94 | H_pad, W_pad = image_padded.shape[-2:] 95 | 96 | # Apply dct transform 97 | image_dct = F.conv2d( 98 | image_padded, self.dct_conv_weights.repeat(C,1,1,1), 99 | stride=8, groups=C) 100 | image_dct = image_dct.view(N,C,8,8,*image_dct.shape[-2:]) 101 | # Mask in dct domain 102 | image_dct_masked = image_dct * mask 103 | # Convert back to idct 104 | image_idct = F.conv2d( 105 | image_dct_masked.view(N, C*64, *image_dct.shape[-2:]), 106 | self.idct_conv_weights.repeat(C,1,1,1), groups=C) 107 | image_idct = image_idct.view(N,3,8,8,*image_dct.shape[-2:])\ 108 | .permute(0,1,4,2,5,3).contiguous().view(-1,C,H_pad,W_pad) 109 | 110 | # Convert back to RGB 111 | if C == 3: 112 | image_idct = kornia.color.yuv_to_rgb(image_idct) 113 | 114 | return torch.clamp(image_idct[0,:,:H,:W], 0, 1) 115 | 116 | def generate_params(self, image, gt_instances, strength=None): 117 | if image.device != self.device: 118 | self.to(image.device) 119 | y_weight = sample_param( 120 | self.y_range, strength=strength, 121 | training=self.training, device=self.device) 122 | if image.shape[0] == 3: 123 | u_weight, v_weight = sample_param( 124 | self.uv_range, shape=(2,), strength=strength, 125 | training=self.training, device=self.device) 126 | self.mask_keep_weights = (int(y_weight), int(u_weight.item()), int(v_weight.item())) 127 | else: 128 | self.mask_keep_weights = (int(y_weight),) * image.shape[0] -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/perspective_augmentor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz 3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved. 4 | """ 5 | import itertools 6 | 7 | import torch 8 | from torch import nn 9 | 10 | import kornia 11 | 12 | from detectron2.config import configurable 13 | 14 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor 15 | from deepformable.utils import sample_param 16 | 17 | 18 | def create_perspective_sampling_grid(image_size, target_corners, device): 19 | target_corners = torch.tensor([ 20 | [target_corners[0, 0], target_corners[0, 1]], 21 | [target_corners[1, 0], 1 - target_corners[1, 1]], 22 | [1 - target_corners[2, 0], target_corners[2, 1]], 23 | [1 - target_corners[3, 0], 1 - target_corners[3, 1]]], device=device) 24 | 25 | target_corners[:, 0] = target_corners[:, 0] * (image_size[1]) 26 | target_corners[:, 1] = target_corners[:, 1] * (image_size[0]) 27 | 28 | source_corners = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], device=device).float() 29 | source_corners[:, 0] = source_corners[:, 0] * (image_size[1]) 30 | source_corners[:, 1] = source_corners[:, 1] * (image_size[0]) 31 | 32 | homography = kornia.geometry.find_homography_dlt(source_corners.unsqueeze(0), 33 | target_corners.unsqueeze(0), 34 | torch.ones(1, 4, device=device)) 35 | 36 | x, y = torch.meshgrid([torch.arange(0, image_size[0], device=device), 37 | torch.arange(0, image_size[1], device=device)]) 38 | 39 | x = x.float() + 0.5 40 | y = y.float() + 0.5 41 | coord = torch.cat((y.unsqueeze(-1), x.unsqueeze(-1)), 2).view(-1, 2).float() 42 | 43 | grid = kornia.geometry.linalg.transform_points( 44 | torch.inverse(homography), coord).view(image_size[0], image_size[1], 2) 45 | grid = (grid / torch.tensor([image_size[1], image_size[0]], device=device).view(1, 1, 2)) * 2 - 1 46 | 47 | return grid, homography 48 | 49 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 50 | class PerspectiveAugmentor(IntermediateAugmentor): 51 | @configurable 52 | def __init__( 53 | self, 54 | *, 55 | corner_shift_range, 56 | ): 57 | super().__init__(False) 58 | self.corner_shift_range=corner_shift_range 59 | 60 | @classmethod 61 | def from_config(cls, cfg): 62 | return { 63 | "corner_shift_range": cfg.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor.CORNER_SHIFT_RANGE, 64 | } 65 | 66 | def apply_image(self, image): 67 | return torch.nn.functional.grid_sample(image.unsqueeze(0), self.grid.unsqueeze(0), align_corners=False)[0] 68 | 69 | def apply_coords(self, coords): 70 | return kornia.geometry.linalg.transform_points(self.homography, coords.unsqueeze(0))[0] 71 | 72 | def generate_params(self, image, gt_instances, strength=None): 73 | device = image.device 74 | if self.training: 75 | target_corners = sample_param( 76 | self.corner_shift_range, shape=(4,2), 77 | strength=strength, device=device) 78 | else: 79 | target_corners = torch.zeros((4, 2), device=device) 80 | target_corners[[0,1],:] = sample_param( 81 | self.corner_shift_range, strength=strength, 82 | training=False, device=device) 83 | self.grid, self.homography = create_perspective_sampling_grid( 84 | image.shape[-2:], target_corners, device=device) -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/radial_distortion_augmentor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz 3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved. 4 | """ 5 | import itertools 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from detectron2.config import configurable 11 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor 12 | from deepformable.utils import sample_param 13 | 14 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 15 | class RadialDistortionAugmentor(IntermediateAugmentor): 16 | @configurable 17 | def __init__( 18 | self, 19 | *, 20 | undistort_iter, 21 | focal_length_range, 22 | center_shift_range, 23 | distortion_range 24 | ): 25 | super().__init__(False) 26 | self.undistort_iter=undistort_iter 27 | self.focal_length_range = focal_length_range 28 | self.center_shift_range = center_shift_range 29 | self.distortion_range = distortion_range 30 | 31 | @classmethod 32 | def from_config(cls, cfg): 33 | return { 34 | "undistort_iter": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.UNDISTORT_ITER, 35 | "focal_length_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.FOCAL_LENGTH_RANGE, 36 | "center_shift_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.CENTER_SHIFT_RANGE, 37 | "distortion_range": cfg.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.DISTORTION_RANGE, 38 | } 39 | 40 | def apply_image(self, image): 41 | return F.grid_sample(image.unsqueeze(0), self.grid.unsqueeze(0), align_corners=False)[0] 42 | 43 | def distort(self, coord): 44 | xy = (coord.float() - self.center.view(1, 2)) / self.focal_length.view(1, 2) 45 | 46 | r2 = xy[:, 0]**2 + xy[:, 1]**2 47 | 48 | r2_distorted = 1 49 | for i in range(self.k.shape[0]): 50 | r2_distorted = r2_distorted + self.k[i] * r2**(i+1) 51 | 52 | xy_distorted = xy * r2_distorted.unsqueeze(-1) 53 | 54 | return xy_distorted * self.focal_length.view(1, 2) + self.center.view(1, 2) 55 | 56 | def undistort(self, coord): 57 | xy = (coord.float() - self.center.view(1, 2)) / self.focal_length.view(1, 2) 58 | xy0 = xy.clone() 59 | 60 | for iteration in range(self.undistort_iter): 61 | r2 = (xy[:, 0])**2 + (xy[:, 1])**2 62 | 63 | # This works up to the third order 64 | #r2_undistorted = (1+((self.k[5]*r2 + self.k[4])*r2 + self.k[3])*r2)/(1 + ((self.k[2]*r2 + self.k[1])*r2 + self.k[0])*r2) 65 | r2_undistorted = (1 + ((self.k[2]*r2 + self.k[1])*r2 + self.k[0])*r2) 66 | 67 | xy = xy0 / r2_undistorted.unsqueeze(-1) 68 | x = xy[:, 0] 69 | x[r2_undistorted < 0] = -1 70 | y = xy[:, 1] 71 | y[r2_undistorted < 0] = -1 72 | 73 | return xy * self.focal_length.view(1, 2) + self.center.view(1, 2) 74 | 75 | def apply_coords(self, coords): 76 | return self.distort(coords) 77 | 78 | def generate_params(self, image, gt_instances, strength=None): 79 | image_size, device = image.shape[-2:], image.device 80 | image_size_xy = torch.tensor([image_size[1], image_size[0]], device=device) 81 | 82 | self.focal_length = sample_param( 83 | self.focal_length_range, shape=(2,), 84 | strength = None if strength == None else 1.0-strength, 85 | training=self.training, device=device) * torch.max(image_size_xy) 86 | center_shift = sample_param( 87 | self.center_shift_range, shape=(2,), strength=strength, 88 | training=self.training, device=device) 89 | if self.training: 90 | center_shift *= 1 if torch.randn(1) > 0.5 else -1 91 | self.center = (0.5 + center_shift) * image_size_xy 92 | self.k = -sample_param( 93 | self.distortion_range, shape=(3,), strength=strength, 94 | training=self.training, device=device) 95 | 96 | x, y = torch.meshgrid([torch.arange(0, image_size[0], device=device), 97 | torch.arange(0, image_size[1], device=device)]) 98 | x = x.float() + 0.5 99 | y = y.float() + 0.5 100 | coord = torch.cat((y.unsqueeze(-1), x.unsqueeze(-1)), 2).view(-1, 2) 101 | 102 | grid = self.undistort(coord).view(image_size[0], image_size[1], 2) 103 | self.grid = (grid / image_size_xy.view(1, 1, 2)) * 2 - 1 104 | -------------------------------------------------------------------------------- /deepformable/modeling/intermediate_augmentor/tps_augmentor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code implemented by Andreas Meulueman and Mustafa B. Yaldiz 3 | Copyright (c) (VCLAB, KAIST) All Rights Reserved. 4 | """ 5 | import itertools 6 | 7 | import torch 8 | from torch import nn 9 | import torch.nn.functional as F 10 | 11 | from detectron2.config import configurable 12 | 13 | from .build import INTERMEDIATE_AUGMENTOR_REGISTRY, IntermediateAugmentor 14 | from deepformable.utils import sample_param 15 | 16 | # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2 17 | def compute_partial_repr(input_points, control_points): 18 | N = input_points.size(0) 19 | M = control_points.size(0) 20 | pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2) 21 | # original implementation, very slow 22 | # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance 23 | pairwise_diff_square = pairwise_diff * pairwise_diff 24 | del pairwise_diff 25 | pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1] 26 | del pairwise_diff_square 27 | 28 | repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist) 29 | del pairwise_dist 30 | # fix numerical error for 0 * log(0), substitute all nan with 0 31 | # mask = repr_matrix != repr_matrix 32 | repr_matrix.masked_fill_(repr_matrix != repr_matrix, 0) 33 | 34 | return repr_matrix 35 | 36 | class TPSGridGen(nn.Module): 37 | def __init__(self, target_height, target_width, target_control_points): 38 | super(TPSGridGen, self).__init__() 39 | assert target_control_points.ndimension() == 2 40 | assert target_control_points.size(1) == 2 41 | N = target_control_points.size(0) 42 | self.num_points = N 43 | self.height = target_height 44 | self.width = target_width 45 | 46 | # create padded kernel matrix 47 | forward_kernel = torch.zeros(N + 3, N + 3) 48 | target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points) 49 | forward_kernel[:N, :N].copy_(target_control_partial_repr) 50 | forward_kernel[:N, -3].fill_(1) 51 | forward_kernel[-3, :N].fill_(1) 52 | forward_kernel[:N, -2:].copy_(target_control_points) 53 | forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1)) 54 | # compute inverse matrix 55 | inverse_kernel = torch.inverse(forward_kernel) 56 | 57 | # create target coordinate matrix 58 | HW = target_height * target_width 59 | y, x = torch.meshgrid([torch.arange(0, target_height), 60 | torch.arange(0, target_width)]) 61 | x = x.reshape(HW, 1).float() + 0.5 62 | y = y.reshape(HW, 1).float() + 0.5 63 | y = y * 2 / (target_height) - 1 64 | x = x * 2 / (target_width) - 1 65 | target_coordinate = torch.cat([x, y], dim = 1) # convert from (y, x) to (x, y) 66 | # print(x.shape) 67 | del x 68 | del y 69 | target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points) 70 | target_coordinate_repr = torch.cat([ 71 | target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate 72 | ], dim = 1) 73 | 74 | padding_matrix = torch.zeros(3, 2) 75 | 76 | self.inverse_kernel = inverse_kernel 77 | self.padding_matrix = padding_matrix 78 | self.target_coordinate_repr = target_coordinate_repr 79 | 80 | @property 81 | def device(self): 82 | return self.inverse_kernel.device 83 | 84 | def _apply(self, fn): 85 | super(TPSGridGen, self)._apply(fn) 86 | self.inverse_kernel = fn(self.inverse_kernel) 87 | self.padding_matrix = fn(self.padding_matrix) 88 | self.target_coordinate_repr = fn(self.target_coordinate_repr) 89 | return self 90 | 91 | def forward(self, source_control_points): 92 | assert source_control_points.ndimension() == 3 93 | assert source_control_points.size(1) == self.num_points 94 | assert source_control_points.size(2) == 2 95 | batch_size = source_control_points.size(0) 96 | 97 | Y = torch.cat([source_control_points, (self.padding_matrix.expand(batch_size, 3, 2))], 1) 98 | mapping_matrix = torch.matmul((self.inverse_kernel), Y) 99 | source_coordinate = torch.matmul((self.target_coordinate_repr), mapping_matrix) 100 | return source_coordinate 101 | 102 | 103 | @INTERMEDIATE_AUGMENTOR_REGISTRY.register() 104 | class TpsAugmentor(IntermediateAugmentor): 105 | """ 106 | Transformation with thin plate spline 107 | """ 108 | @configurable 109 | def __init__( 110 | self, 111 | *, 112 | ctrl_pts_size, 113 | max_image_size, 114 | warp_range, 115 | stop_threshold, 116 | max_iter, 117 | ): 118 | super().__init__(False) 119 | 120 | target_control_points = torch.Tensor(list(itertools.product( 121 | torch.arange(-1.0, 1.000001, 2.0 / ctrl_pts_size[0]), 122 | torch.arange(-1.0, 1.000001, 2.0 / ctrl_pts_size[1]), 123 | ))).float() 124 | 125 | self.warp_range = warp_range 126 | self.stop_threshold = stop_threshold 127 | self.max_iter = max_iter 128 | self.max_image_size = max_image_size 129 | self.max_image_size_xy = torch.tensor([max_image_size[1], max_image_size[0]]) 130 | 131 | self.tps_grid_generator = TPSGridGen(*max_image_size, target_control_points) 132 | self.target_control_points = target_control_points 133 | 134 | def _apply(self, fn): 135 | super(TpsAugmentor, self)._apply(fn) 136 | self.tps_grid_generator = self.tps_grid_generator._apply(fn) 137 | self.target_control_points = fn(self.target_control_points) 138 | self.max_image_size_xy = fn(self.max_image_size_xy) 139 | return self 140 | 141 | @classmethod 142 | def from_config(cls, cfg): 143 | return { 144 | "ctrl_pts_size": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.CTRL_PTS_SIZE, 145 | "max_image_size": cfg.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE, 146 | "warp_range": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.WARP_RANGE, 147 | "stop_threshold": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.STOP_THRESHOLD, 148 | "max_iter": cfg.INTERMEDIATE_AUGMENTOR.TpsTransformer.MAX_ITER, 149 | } 150 | 151 | def apply_image(self, image): 152 | return F.grid_sample(image.unsqueeze(0), self.grid, align_corners=False)[0] 153 | 154 | def apply_coords(self, coords): 155 | device = coords.device 156 | coords = coords * (2.0 / self.max_image_size_xy) - 1.0 157 | 158 | warped_coords0 = coords.clone() 159 | converged, i = False, 0 160 | while not converged: 161 | coords_partial_repr = compute_partial_repr(warped_coords0, self.target_control_points) 162 | coords_repr = torch.cat([ 163 | coords_partial_repr, torch.ones(coords_partial_repr.shape[0], 1, device=device), warped_coords0 164 | ], dim = 1) 165 | 166 | Y = torch.cat([self.source_control_points.unsqueeze(0), (self.tps_grid_generator.padding_matrix.expand(1, 3, 2))], 1) 167 | mapping_matrix = torch.matmul((self.tps_grid_generator.inverse_kernel), Y) 168 | warped_coords1 = torch.matmul((coords_repr), mapping_matrix)[0] 169 | coord_dev = warped_coords1 - coords 170 | warped_coords0 = warped_coords0 - coord_dev 171 | i+=1 172 | converged = i > self.max_iter or torch.max(torch.abs(coord_dev)) * max(self.max_image_size) < self.stop_threshold 173 | 174 | if i > self.max_iter: 175 | print("Failed to converge. l_inf norm is: ", (torch.max(torch.abs(coord_dev)) * max(self.max_image_size)).item()) 176 | 177 | return (warped_coords0 + 1.0) * (self.max_image_size_xy / 2.0) 178 | 179 | def generate_params(self, image, gt_instances, strength=None): 180 | image_size, device = image.shape[-2:], image.device 181 | self.image_size_xy = torch.tensor( 182 | [image_size[1], image_size[0]], device=device) 183 | if device != self.tps_grid_generator.device: 184 | print(device, "is not equal to", self.tps_grid_generator.device) 185 | self.to(device) 186 | 187 | ctrl_pts_displacement = sample_param( 188 | self.warp_range, strength=strength, 189 | training=self.training, device=device) 190 | 191 | source_control_points = self.target_control_points \ 192 | + (torch.rand(self.target_control_points.size(), device=device) * 2 - 1) * ctrl_pts_displacement 193 | source_control_points[self.target_control_points <= -1 + ctrl_pts_displacement] = -1 - ctrl_pts_displacement 194 | source_control_points[self.target_control_points >= 1 - ctrl_pts_displacement] = 1 + ctrl_pts_displacement 195 | self.source_control_points = source_control_points 196 | 197 | source_coordinate = self.tps_grid_generator(torch.unsqueeze(source_control_points, 0)) 198 | grid_cropped = source_coordinate.view(1, *self.max_image_size, 2)[:,:image_size[0], :image_size[1]] 199 | # Renormalize the grid 200 | self.grid = ((grid_cropped + 1.0) * (self.max_image_size_xy / (self.image_size_xy * 2.0))) * 2.0 - 1.0 201 | -------------------------------------------------------------------------------- /deepformable/modeling/marker_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .build import MARKER_GENERATOR_REGISTRY, build_marker_generator 3 | from .generalized_generator import GeneralizedGenerator, KDTreeClassPredictor 4 | from .aruco_generator import ArucoGenerator 5 | # You need to install AprilTag, described in Dockerfile 6 | # from .april_generator import AprilGenerator 7 | -------------------------------------------------------------------------------- /deepformable/modeling/marker_generator/april_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import logging 3 | import numpy as np 4 | import cv2 5 | from cv2 import aruco 6 | from apriltag import apriltag 7 | 8 | from detectron2.config import configurable 9 | 10 | from .build import MARKER_GENERATOR_REGISTRY 11 | from deepformable.utils import get_aruco_dict 12 | from .aruco_generator import ArucoGenerator 13 | 14 | 15 | @MARKER_GENERATOR_REGISTRY.register() 16 | class AprilGenerator(ArucoGenerator): 17 | @configurable 18 | def __init__( 19 | self, 20 | *, 21 | april_dict, 22 | border_bits, 23 | num_classes, 24 | shuffling, 25 | vis_period=0 26 | ): 27 | super().__init__( 28 | aruco_dict=april_dict, 29 | border_bits=border_bits, 30 | num_classes=num_classes, 31 | shuffling=shuffling, 32 | vis_period=vis_period) 33 | self.detector = apriltag("tag36h11") 34 | 35 | 36 | @classmethod 37 | def from_config(cls, cfg): 38 | april_dict = get_aruco_dict(cfg.MODEL.MARKER_GENERATOR.ARUCO_DICT, default=aruco.DICT_APRILTAG_36h11) 39 | shuffling = cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON 40 | return { 41 | "april_dict": april_dict, 42 | "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS, 43 | "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, 44 | "shuffling": shuffling, 45 | "vis_period": cfg.VIS_PERIOD, 46 | } 47 | 48 | def recognize(self, img): 49 | if len(img.shape) == 3: 50 | img_ = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 51 | elif len(img.shape) == 2: 52 | img_ = img 53 | 54 | ### opencv 55 | # marker_corners, ids, _ = cv2.aruco.detectMarkers(img, cv2.aruco.getPredefinedDictionary( 56 | # cv2.aruco.DICT_APRILTAG_36H11)) 57 | ### 58 | 59 | ### AprilRobotics 60 | detections = self.detector.detect(img_) 61 | marker_corners = [] 62 | ids = [] 63 | for i in detections: 64 | corners = np.array(i['lb-rb-rt-lt']) 65 | corners_ = corners.copy() 66 | 67 | corners_[0] = corners[1] 68 | corners_[1] = corners[0] 69 | corners_[2] = corners[3] 70 | corners_[3] = corners[2] 71 | 72 | marker_corners.append(np.array([corners_], dtype=np.float32)) 73 | ids.append(i["id"]) 74 | ids = np.array(ids) 75 | ### 76 | 77 | marker_corners = np.array(marker_corners).reshape(-1,4,2) 78 | return marker_corners, ids -------------------------------------------------------------------------------- /deepformable/modeling/marker_generator/aruco_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import logging 3 | import numpy as np 4 | import cv2 5 | from cv2 import aruco 6 | 7 | import torch 8 | from torch import nn 9 | import torch.nn.functional as F 10 | 11 | from detectron2.config import configurable 12 | from detectron2.utils.comm import all_gather, is_main_process 13 | from detectron2.utils.events import get_event_storage 14 | 15 | from .build import MARKER_GENERATOR_REGISTRY, MarkerGenerator 16 | from deepformable.utils import get_aruco_dict 17 | 18 | 19 | @MARKER_GENERATOR_REGISTRY.register() 20 | class FixedGenerator(MarkerGenerator): 21 | @configurable 22 | def __init__( 23 | self, 24 | *, 25 | marker_size, 26 | border_bits, 27 | num_classes, 28 | init_method="uniform", 29 | init_std=1.4, 30 | out_channels=3, 31 | vis_period=0, 32 | ): 33 | super().__init__(num_classes, 1, vis_period) 34 | self.marker_size = marker_size 35 | self.border_bits = border_bits 36 | generation_size = self.marker_size - self.border_bits * 2 37 | self.markers = nn.Parameter( 38 | torch.empty( 39 | self.num_classes, out_channels, generation_size, generation_size, 40 | requires_grad=True)) 41 | if init_method == "uniform": 42 | nn.init.uniform(self.markers, -init_std, init_std) 43 | elif init_method == "kaiming": 44 | nn.init.kaiming_normal_(self.markers, mode="fan_out", nonlinearity="sigmoid") 45 | else: 46 | nn.init.normal_(self.markers, std=init_std) 47 | 48 | nn.init.normal_(self.markers, std=init_std) 49 | self.activation = nn.Sigmoid() 50 | 51 | @classmethod 52 | def from_config(cls, cfg): 53 | return { 54 | "marker_size": cfg.MODEL.MARKER_GENERATOR.MARKER_SIZE[0], 55 | "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS, 56 | "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, 57 | "init_method": cfg.MODEL.MARKER_GENERATOR.INIT_METHOD.lower(), 58 | "init_std": cfg.MODEL.MARKER_GENERATOR.INIT_STD, 59 | "vis_period": cfg.VIS_PERIOD, 60 | } 61 | 62 | def message_generator(self): 63 | return None 64 | 65 | def batch_marker_generator(self, gt_classes_batch): 66 | markers = self.activation(self.markers) 67 | markers = F.pad(markers, [self.border_bits]*4) # Add padding 68 | 69 | markers_batch, messages_batch = [], [] 70 | for classes in gt_classes_batch: 71 | markers_batch.append(markers[classes]) 72 | messages_batch.append(torch.zeros(len(classes))) 73 | 74 | return markers_batch, messages_batch, {} 75 | 76 | 77 | @MARKER_GENERATOR_REGISTRY.register() 78 | class ArucoGenerator(MarkerGenerator): 79 | @configurable 80 | def __init__( 81 | self, 82 | *, 83 | aruco_dict, 84 | border_bits, 85 | num_classes, 86 | shuffling, 87 | vis_period=0 88 | ): 89 | super().__init__(num_classes, aruco_dict.markerSize * aruco_dict.markerSize, vis_period) 90 | marker_size = aruco_dict.markerSize + 2 * border_bits 91 | self.aruco_dict = aruco_dict 92 | self.border_bits = border_bits 93 | self.num_markers = len(aruco_dict.bytesList) 94 | self.marker_size = marker_size 95 | self.shuffling = shuffling 96 | self.detect_params = aruco.DetectorParameters_create() 97 | self.subpix_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.00001) 98 | 99 | markers_binary = [] 100 | for i in range(self.num_markers): 101 | marker = aruco_dict.drawMarker(i, marker_size, borderBits=1) 102 | markers_binary.append(marker[1:-1, 1:-1].reshape(-1)) 103 | 104 | # self.register_buffer("markers", torch.tensor( 105 | # markers, dtype=torch.float32).view(num_classes, 3, marker_size, marker_size)/255.0) 106 | self.register_buffer("markers_binary", torch.tensor(markers_binary, dtype=torch.float32)/255.0, False) 107 | self.register_buffer("messages", self.markers_binary[:num_classes], False) 108 | 109 | @classmethod 110 | def from_config(cls, cfg): 111 | aruco_dict = get_aruco_dict(cfg.MODEL.MARKER_GENERATOR.ARUCO_DICT) 112 | shuffling = cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON 113 | return { 114 | "aruco_dict": aruco_dict, 115 | "border_bits": cfg.MODEL.MARKER_GENERATOR.BORDER_BITS, 116 | "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, 117 | "shuffling": shuffling, 118 | "vis_period": cfg.VIS_PERIOD, 119 | } 120 | 121 | def visualize_training(self): 122 | if self.training and self.vis_period != 0: 123 | storage = get_event_storage() 124 | if storage.iter % self.vis_period == 0: 125 | marker = np.uint8(self.get_markers_numpy(0) * 255) 126 | marker = marker.transpose(2, 0, 1) 127 | storage.put_image("Generated Marker", marker) 128 | 129 | def message_generator(self): 130 | if self.shuffling and self.training: 131 | message_indexes = None 132 | if is_main_process(): 133 | message_indexes = torch.randperm(self.num_markers)[:self.num_classes] 134 | message_indexes = all_gather(message_indexes)[0].to(self.device) 135 | return self.markers_binary[message_indexes] 136 | return self.markers_binary[:self.num_classes] 137 | 138 | def batch_marker_generator(self, gt_classes_batch): 139 | markers_batch, messages_batch = [], [] 140 | for classes in gt_classes_batch: 141 | messages = self.messages[classes] 142 | messages_batch.append(messages) 143 | 144 | markers = torch.repeat_interleave( 145 | messages.view(-1, 1, self.aruco_dict.markerSize, self.aruco_dict.markerSize), 3, dim=1) 146 | markers = F.pad(markers, [self.border_bits]*4) 147 | markers_batch.append(markers) 148 | 149 | return markers_batch, messages_batch, {} 150 | 151 | def recognize(self, img): 152 | marker_corners, ids, _ = cv2.aruco.detectMarkers( 153 | img, self.aruco_dict, parameters=self.detect_params) 154 | if len(img.shape) == 3: 155 | gray_undistorted = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 156 | else: 157 | gray_undistorted = img 158 | for corners in marker_corners: 159 | cv2.cornerSubPix(gray_undistorted, corners, 160 | winSize=(3, 3), 161 | zeroZone=(-1, -1), 162 | criteria=self.subpix_criteria) 163 | 164 | marker_corners = np.array(marker_corners).reshape(-1,4,2)+0.5 165 | ids = ids.reshape(-1) if ids is not None else np.ones(len(marker_corners)) 166 | return marker_corners, ids -------------------------------------------------------------------------------- /deepformable/modeling/marker_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | import numpy as np 5 | 6 | from detectron2.utils.registry import Registry 7 | from detectron2.utils.events import get_event_storage 8 | from abc import ABCMeta, abstractmethod 9 | 10 | MARKER_GENERATOR_REGISTRY = Registry("MARKER_GENERATOR") # noqa F401 isort:skip 11 | MARKER_GENERATOR_REGISTRY.__doc__ = """ 12 | Registry for the marker generator architecture 13 | """ 14 | 15 | 16 | def build_marker_generator(cfg): 17 | """ 18 | Build the marker generator pipeline, defined by ``cfg.MODEL.MARKER_GENERATOR``. 19 | """ 20 | marker_generator = cfg.MODEL.MARKER_GENERATOR.NAME 21 | generator = MARKER_GENERATOR_REGISTRY.get(marker_generator)(cfg) 22 | generator.to(torch.device(cfg.MODEL.DEVICE)) 23 | return generator 24 | 25 | 26 | class MarkerGenerator(nn.Module, metaclass=ABCMeta): 27 | def __init__(self, num_classes, num_bits, vis_period=0): 28 | super().__init__() 29 | self.num_classes = num_classes 30 | self.num_bits = num_bits 31 | self.vis_period = vis_period 32 | self.register_buffer("gamma", torch.tensor((1/2.2), dtype=torch.float32), False) 33 | # self.register_buffer("messages", torch.zeros(num_classes, num_bits, dtype=torch.float32), False) 34 | 35 | @property 36 | def device(self): 37 | return self.gamma.device 38 | 39 | @torch.no_grad() 40 | def get_markers_numpy(self, classes): 41 | if isinstance(classes, int): 42 | classes = [classes] 43 | classes = torch.tensor(classes, device=self.device) 44 | markers_batch, _, _ = self.batch_marker_generator([classes]) 45 | markers = markers_batch[0] ** self.gamma 46 | markers = markers.permute(0,2,3,1)[...,[2,1,0]].cpu().numpy() 47 | if len(markers) == 1: 48 | return markers[0] 49 | return markers 50 | 51 | @abstractmethod 52 | def message_generator(self): 53 | """ 54 | This method should save the internal variables required for batch_marker_generator 55 | """ 56 | pass 57 | 58 | @abstractmethod 59 | def batch_marker_generator(self, gt_classes_batch): 60 | """ 61 | This method returns markers and messages in list form 62 | """ 63 | pass 64 | 65 | def postprocessing(self, pred_instances): 66 | return pred_instances 67 | 68 | def visualize_training(self): 69 | if self.training and self.vis_period != 0: 70 | storage = get_event_storage() 71 | if storage.iter % self.vis_period == 0: 72 | if self.messages is not None: 73 | messages = torch.zeros(2, self.num_bits, dtype=torch.float32) 74 | messages[0, range(0,self.num_bits,2)] = 1.0 75 | messages[1, range(1,self.num_bits,2)] = 1.0 76 | old_messages = self.messages[:2].clone() 77 | self.messages[:2] = messages 78 | markers = np.uint8(self.get_markers_numpy([0, 1]) * 255) 79 | self.messages[:2] = old_messages 80 | marker1, marker2 = markers[0].transpose(2,0,1), markers[1].transpose(2,0,1) 81 | else: 82 | # TODO: What is this? :) 83 | markers = np.uint8(self.get_markers_numpy([0, 1]) * 255) 84 | marker1, marker2 = markers[0].transpose(2,0,1), markers[1].transpose(2,0,1) 85 | storage.put_image("Marker (101010..)", marker1) 86 | storage.put_image("Marker (010101..)", marker2) 87 | 88 | def forward(self, batch_instances): 89 | with torch.no_grad(): 90 | self.visualize_training() 91 | self.messages = self.message_generator() 92 | if self.messages is not None: 93 | self.messages = self.messages.to(self.device) 94 | 95 | gt_classes_batch = [i.gt_classes for i in batch_instances] 96 | markers_batch, messages_batch, loss = self.batch_marker_generator(gt_classes_batch) 97 | 98 | for messages, instances in zip(messages_batch, batch_instances): 99 | instances.gt_message = messages 100 | 101 | return markers_batch, loss -------------------------------------------------------------------------------- /deepformable/modeling/marker_roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .marker_roi_heads import MarkerROIHeads 3 | from .naive_transform_head import NaiveTransformHead, ROI_TRANSFORM_HEAD_REGISTRY 4 | from .corner_head import ROI_CORNER_HEAD_REGISTRY, CornerHead, CornerHeadV2 5 | from .decoder_head import DecoderHead, ROI_DECODER_HEAD_REGISTRY 6 | from .transformer_head import SpatialTransformerHead, SpatialTransformerHeadV2 7 | -------------------------------------------------------------------------------- /deepformable/modeling/marker_roi_heads/decoder_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | from typing import List 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from detectron2.config import configurable 10 | from detectron2.layers import Conv2d, Linear, get_norm, ShapeSpec, cat, nonzero_tuple 11 | from detectron2.utils.registry import Registry 12 | from detectron2.structures import Instances 13 | 14 | from deepformable.layers import AdaptiveLoss 15 | 16 | __all__ = ["DecoderHead", "build_decoder_head", "ROI_DECODER_HEAD_REGISTRY"] 17 | 18 | ROI_DECODER_HEAD_REGISTRY = Registry("ROI_DECODER_HEAD") 19 | ROI_DECODER_HEAD_REGISTRY.__doc__ = """ 20 | Registry for corner heads 21 | """ 22 | 23 | def build_decoder_head(cfg, input_shape): 24 | name = cfg.MODEL.ROI_DECODER_HEAD.NAME 25 | return ROI_DECODER_HEAD_REGISTRY.get(name)(cfg, input_shape) 26 | 27 | 28 | 29 | @ROI_DECODER_HEAD_REGISTRY.register() 30 | class DecoderHead(nn.Module): 31 | @configurable 32 | def __init__( 33 | self, 34 | input_shape: ShapeSpec, 35 | *, 36 | num_classes: int, 37 | num_bits: int, 38 | conv_dims: List[int], 39 | conv_norm="", 40 | fc_dims: List[int], 41 | with_decoder: bool = True, 42 | decoding_loss_type: str = 'mse', 43 | decoding_loss_weight: float = 1.0, 44 | class_loss_weight: float = 1.0, 45 | ): 46 | super().__init__() 47 | if isinstance(input_shape, int): # some backward compatibility 48 | input_shape = ShapeSpec(channels=input_shape) 49 | 50 | output_size = (input_shape.channels, (input_shape.height or 1), (input_shape.width or 1)) 51 | 52 | self.conv_norm_relus = [] 53 | for k, conv_dim in enumerate(conv_dims): 54 | conv = Conv2d( 55 | output_size[0], 56 | conv_dim, 57 | kernel_size=3, 58 | padding=0, 59 | bias=not conv_norm, 60 | norm=get_norm(conv_norm, conv_dim), 61 | activation=F.relu, 62 | ) 63 | self.add_module("conv{}".format(k + 1), conv) 64 | self.conv_norm_relus.append(conv) 65 | output_size = (conv_dim, output_size[1]-2, output_size[2]-2) 66 | for layer in self.conv_norm_relus: 67 | weight_init.c2_msra_fill(layer) 68 | 69 | self.fcs = [] 70 | for k, fc_dim in enumerate(fc_dims): 71 | fc = Linear(np.prod(output_size), fc_dim) 72 | self.add_module("fc{}".format(k + 1), fc) 73 | self.fcs.append(fc) 74 | output_size = fc_dim 75 | for layer in self.fcs: 76 | weight_init.c2_xavier_fill(layer) 77 | 78 | self.with_decoder = with_decoder 79 | self.num_classes, self.num_bits = num_classes, num_bits 80 | if with_decoder: 81 | output_size = np.prod(output_size) 82 | self.decoder = Linear(output_size, num_bits) 83 | nn.init.normal_(self.decoder.weight, std=0.01) 84 | nn.init.constant_(self.decoder.bias, 0) 85 | self.cls_score = Linear(output_size, 1) 86 | else: 87 | self.cls_score = Linear(output_size, num_classes + 1) 88 | 89 | nn.init.normal_(self.cls_score.weight, std=0.01) 90 | nn.init.constant_(self.cls_score.bias, 0) 91 | 92 | self.decoding_loss_func = AdaptiveLoss(loss_type=decoding_loss_type) 93 | self.decoding_loss_weight = decoding_loss_weight 94 | self.objectness_loss_func = AdaptiveLoss(loss_type='bce') 95 | self.class_loss_weight = class_loss_weight 96 | 97 | @classmethod 98 | def from_config(cls, cfg, input_shape): 99 | return { 100 | "input_shape": input_shape, 101 | "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, 102 | "num_bits": cfg.MODEL.MARKER_GENERATOR.NUM_GENERATION_BITS, 103 | "with_decoder": cfg.MODEL.ROI_DECODER_HEAD.DECODER_ON, 104 | "decoding_loss_type": cfg.MODEL.ROI_DECODER_HEAD.LOSS_TYPE, 105 | "decoding_loss_weight": cfg.MODEL.ROI_DECODER_HEAD.DECODING_LOSS_WEIGHT, 106 | "class_loss_weight": cfg.MODEL.ROI_DECODER_HEAD.CLASS_LOSS_WEIGHT, 107 | "conv_dims": cfg.MODEL.ROI_DECODER_HEAD.CONV_DIMS, 108 | "fc_dims": cfg.MODEL.ROI_DECODER_HEAD.FC_DIMS 109 | } 110 | 111 | def forward(self, x: torch.Tensor, proposals_sampled: List[Instances]): 112 | if self.training: 113 | gt_classes = cat([p.gt_classes for p in proposals_sampled], dim=0) 114 | fg_list = (gt_classes >= 0) & (gt_classes < self.num_classes) 115 | fg_inds = nonzero_tuple(fg_list)[0] 116 | gt_objectness = fg_list.to(torch.float32).view(-1,1) 117 | 118 | # Apply conv and relus 119 | for layer in self.conv_norm_relus: 120 | x = layer(x) 121 | 122 | if x.dim() > 2: 123 | x = torch.flatten(x, start_dim=1) 124 | 125 | if len(self.fcs): 126 | for layer in self.fcs: 127 | x = F.relu(layer(x)) 128 | 129 | obj_scores = self.cls_score(x) 130 | 131 | decoded_message = None 132 | if self.with_decoder: 133 | if self.training: 134 | x = x[fg_inds] 135 | decoded_message = torch.sigmoid(self.decoder(x)) 136 | 137 | if self.training: 138 | if self.with_decoder: 139 | objectness_loss = self.objectness_loss_func(obj_scores, gt_objectness) 140 | gt_message = cat([p.gt_message for p in proposals_sampled], dim=0)[fg_inds] 141 | decoding_loss = self.decoding_loss_func(decoded_message, gt_message) 142 | div_factor = max((gt_classes.numel() * self.num_bits), 1) 143 | losses = { 144 | 'objectness_loss': objectness_loss * self.class_loss_weight / obj_scores.size(0), 145 | 'decoding_loss': decoding_loss * self.decoding_loss_weight / div_factor 146 | } 147 | else: 148 | # TODO: Modify for adaptive clipping 149 | loss_cls = F.cross_entropy( 150 | obj_scores, gt_classes, reduction="mean") * self.class_loss_weight 151 | losses = {'loss_cls': loss_cls} 152 | return losses 153 | 154 | i, score_batches, message_batches = 0, [], [] 155 | for p in proposals_sampled: 156 | data_len = len(p.proposal_boxes) 157 | score_batches.append(obj_scores[i:i+data_len]) 158 | if self.with_decoder: 159 | message_batches.append(decoded_message[i:i+data_len]) 160 | else: 161 | message_batches.append(None) 162 | i += data_len 163 | 164 | return score_batches, message_batches 165 | -------------------------------------------------------------------------------- /deepformable/modeling/marker_roi_heads/marker_roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | from typing import Dict, List, Optional, Tuple, Union 4 | import torch 5 | from torch import nn 6 | 7 | from detectron2.config import configurable 8 | from detectron2.structures import ImageList, Instances 9 | 10 | from detectron2.modeling import ROI_HEADS_REGISTRY, ROIHeads 11 | 12 | from .naive_transform_head import build_transform_head 13 | from .corner_head import build_corner_head 14 | from .decoder_head import build_decoder_head 15 | 16 | 17 | 18 | @ROI_HEADS_REGISTRY.register() 19 | class MarkerROIHeads(ROIHeads): 20 | """ 21 | This class implements the corner prediction and decoding tasks. 22 | It returns a dictionary of outputs that later converted to 23 | instances after postprocessing. 24 | """ 25 | @configurable 26 | def __init__( 27 | self, 28 | *, 29 | transform_head: nn.Module, 30 | corner_head: nn.Module, 31 | decoder_head: nn.Module, 32 | **kwargs 33 | ): 34 | super().__init__(**kwargs) 35 | self.transform_head = transform_head 36 | self.corner_head = corner_head 37 | self.decoder_head = decoder_head 38 | 39 | @classmethod 40 | def from_config(cls, cfg, input_shape): 41 | ret = super().from_config(cfg) 42 | transform_head = build_transform_head(cfg, input_shape) 43 | ret["transform_head"] = transform_head 44 | corner_input_shape, decoder_input_shape = transform_head.output_shape 45 | ret["corner_head"] = build_corner_head(cfg, corner_input_shape) 46 | ret["decoder_head"] = build_decoder_head(cfg, decoder_input_shape) 47 | return ret 48 | 49 | @property 50 | def device(self): 51 | return self.corner_head.device 52 | 53 | def forward( 54 | self, 55 | images: ImageList, 56 | features: Dict[str, torch.Tensor], 57 | proposals: List[Instances], 58 | targets: Optional[List[Instances]] = None, 59 | ) -> Tuple[List[Dict], Dict]: 60 | # del images 61 | if self.training: 62 | assert targets 63 | proposals = self.label_and_sample_proposals(proposals, targets) 64 | # del targets 65 | 66 | if self.training: 67 | corner_features, decoding_features, sample_locations_batch, losses = self.transform_head(images, features, proposals, targets) 68 | losses.update(self.corner_head(corner_features, proposals)) 69 | losses.update(self.decoder_head(decoding_features, proposals)) 70 | del images, targets 71 | return [], losses 72 | 73 | corner_features, decoding_features, sample_locations_batch, _ = self.transform_head(images, features, proposals, targets) 74 | corners_batch = self.corner_head(corner_features, proposals) 75 | obj_scores_batch, decoded_messages_batch = self.decoder_head(decoding_features, proposals) 76 | 77 | results = [] 78 | for i in range(len(proposals)): 79 | output = { 80 | "corners": corners_batch[i], "obj_scores": obj_scores_batch[i], 81 | "decoded_messages": decoded_messages_batch[i], 82 | "image_shape": proposals[i].image_size} 83 | if sample_locations_batch: 84 | output["sample_locations"] = sample_locations_batch[i] 85 | results.append(output) 86 | 87 | return results, {} -------------------------------------------------------------------------------- /deepformable/modeling/marker_roi_heads/naive_transform_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | from typing import Dict, List, Optional, Tuple, Union 4 | import fvcore.nn.weight_init as weight_init 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from detectron2.structures import ImageList, Instances 10 | from detectron2.config import configurable 11 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm 12 | from detectron2.utils.registry import Registry 13 | from detectron2.modeling.poolers import ROIPooler 14 | 15 | __all__ = ["NaiveTransformHead", "build_transform_head", "ROI_TRANSFORM_HEAD_REGISTRY"] 16 | 17 | ROI_TRANSFORM_HEAD_REGISTRY = Registry("ROI_TRANSFORM_HEAD") 18 | ROI_TRANSFORM_HEAD_REGISTRY.__doc__ = """ 19 | Registry for transform heads, which transforms features into normalized 20 | space for corner and class prediction. 21 | """ 22 | 23 | def build_transform_head(cfg, input_shape): 24 | """ 25 | Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. 26 | """ 27 | name = cfg.MODEL.ROI_TRANSFORM_HEAD.NAME 28 | return ROI_TRANSFORM_HEAD_REGISTRY.get(name)(cfg, input_shape) 29 | 30 | 31 | @ROI_TRANSFORM_HEAD_REGISTRY.register() 32 | class NaiveTransformHead(nn.Module): 33 | @configurable 34 | def __init__( 35 | self, 36 | input_shape: ShapeSpec, 37 | *, 38 | in_features, 39 | pooler: ROIPooler, 40 | conv_dims: List[int], 41 | fc_dims: List[int], 42 | conv_norm="" 43 | ): 44 | super().__init__() 45 | assert len(conv_dims) + len(fc_dims) > 0 46 | 47 | self.in_features = in_features 48 | self.pooler = pooler 49 | 50 | output_size = (input_shape.channels, input_shape.height, input_shape.width) 51 | 52 | self.conv_norm_relus = [] 53 | for k, conv_dim in enumerate(conv_dims): 54 | conv = Conv2d( 55 | output_size[0], 56 | conv_dim, 57 | kernel_size=3, 58 | padding=1, 59 | bias=not conv_norm, 60 | norm=get_norm(conv_norm, conv_dim), 61 | activation=F.relu, 62 | ) 63 | self.add_module("conv{}".format(k + 1), conv) 64 | self.conv_norm_relus.append(conv) 65 | output_size = (conv_dim, output_size[1], output_size[2]) 66 | for layer in self.conv_norm_relus: 67 | weight_init.c2_msra_fill(layer) 68 | 69 | self.fcs = [] 70 | for k, fc_dim in enumerate(fc_dims): 71 | fc = Linear(np.prod(output_size), fc_dim) 72 | self.add_module("fc{}".format(k + 1), fc) 73 | self.fcs.append(fc) 74 | output_size = fc_dim 75 | for layer in self.fcs: 76 | weight_init.c2_xavier_fill(layer) 77 | self._output_size = output_size 78 | 79 | @classmethod 80 | def from_config(cls, cfg, input_shape): 81 | # TODO: Create new parameters for transform head in the config 82 | in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES 83 | pooler_resolution = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_RESOLUTION 84 | pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) 85 | sampling_ratio = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_SAMPLING_RATIO 86 | pooler_type = cfg.MODEL.ROI_TRANSFORM_HEAD.POOLER_TYPE 87 | 88 | num_conv = cfg.MODEL.ROI_TRANSFORM_HEAD.NUM_CONV 89 | conv_dim = cfg.MODEL.ROI_TRANSFORM_HEAD.CONV_DIM 90 | num_fc = cfg.MODEL.ROI_TRANSFORM_HEAD.NUM_FC 91 | fc_dim = cfg.MODEL.ROI_TRANSFORM_HEAD.FC_DIM 92 | 93 | in_channels = [input_shape[f].channels for f in in_features] 94 | # Check all channel counts are equal 95 | assert len(set(in_channels)) == 1, in_channels 96 | in_channels = in_channels[0] 97 | 98 | pooler = ROIPooler( 99 | output_size=pooler_resolution, 100 | scales=pooler_scales, 101 | sampling_ratio=sampling_ratio, 102 | pooler_type=pooler_type, 103 | ) 104 | 105 | return { 106 | "input_shape": ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution), 107 | "in_features": in_features, 108 | "pooler": pooler, 109 | "conv_dims": [conv_dim] * num_conv, 110 | "fc_dims": [fc_dim] * num_fc, 111 | "conv_norm": cfg.MODEL.ROI_TRANSFORM_HEAD.NORM, 112 | } 113 | 114 | def forward( 115 | self, 116 | images: ImageList, 117 | features: Dict[str, torch.Tensor], 118 | proposals: List[Instances], 119 | targets: Optional[List[Instances]] = None, 120 | ): 121 | features = [features[f] for f in self.in_features] 122 | x = self.pooler(features, [p.proposal_boxes for p in proposals]) 123 | 124 | for layer in self.conv_norm_relus: 125 | x = layer(x) 126 | if len(self.fcs): 127 | if x.dim() > 2: 128 | x = torch.flatten(x, start_dim=1) 129 | for layer in self.fcs: 130 | x = F.relu(layer(x)) 131 | return x, x, None, {} 132 | 133 | @property 134 | def output_shape(self): 135 | """ 136 | Returns: 137 | ShapeSpec: the output feature shape 138 | """ 139 | out = ShapeSpec(channels=self._output_size) 140 | return out, out -------------------------------------------------------------------------------- /deepformable/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .utils import ImageList 3 | from .render_rcnn import GeneralizedRCNN_RenderInput 4 | from .classical_detector import ClassicalDetector -------------------------------------------------------------------------------- /deepformable/modeling/meta_arch/classical_detector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | from typing import List, Dict 4 | import torch 5 | from torch import nn 6 | 7 | import detectron2 8 | from detectron2.structures import ImageList, Instances, Boxes 9 | from detectron2.modeling import META_ARCH_REGISTRY 10 | 11 | from ..marker_generator import build_marker_generator 12 | 13 | 14 | @META_ARCH_REGISTRY.register() 15 | class ClassicalDetector(nn.Module): 16 | def __init__(self, cfg): 17 | super().__init__() 18 | self.add_module("marker_generator", build_marker_generator(cfg)) 19 | self.test_topk_per_image = 0 20 | self.test_sort_instances = False 21 | self.test_apply_nms = False 22 | self.nms_score_criteria = "none" 23 | self.marker_postprocessing = False 24 | 25 | def inference(self, images): 26 | results = [] 27 | for img in images: 28 | img = img.permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8) 29 | marker_corners, ids = self.marker_generator.recognize(img) 30 | result = { 31 | "corners": torch.tensor(marker_corners, dtype=torch.float32), 32 | "image_shape": img.shape[:2], 33 | "obj_scores": torch.as_tensor(np.ones(len(marker_corners)), dtype=torch.float32)} 34 | if len(marker_corners) != 0: 35 | result["pred_classes"] = torch.as_tensor(ids, dtype=torch.int64) 36 | results.append(result) 37 | 38 | return results 39 | 40 | def postprocess_single(self, result: dict, output_height: int, output_width: int): 41 | if isinstance(output_width, torch.Tensor): 42 | # This shape might (but not necessarily) be tensors during tracing. 43 | # Converts integer tensors to float temporaries to ensure true 44 | # division is performed when computing scale_x and scale_y. 45 | output_width_tmp = output_width.float() 46 | output_height_tmp = output_height.float() 47 | new_size = torch.stack([output_height, output_width]) 48 | else: 49 | new_size = (output_height, output_width) 50 | output_width_tmp = output_width 51 | output_height_tmp = output_height 52 | 53 | scale_x, scale_y = ( 54 | output_width_tmp / result["image_shape"][1], 55 | output_height_tmp / result["image_shape"][0], 56 | ) 57 | 58 | pred_instances = Instances(new_size) 59 | corners = result["corners"] 60 | if corners.shape[0] == 0: 61 | pred_instances.pred_corners = corners 62 | return pred_instances 63 | 64 | pred_instances.scores = result["obj_scores"] 65 | pred_instances.pred_classes = result["pred_classes"] 66 | 67 | # Scale corners and sample_locations 68 | scale_tensor = torch.tensor([scale_x, scale_y], device=corners.device) 69 | corners = corners * scale_tensor 70 | 71 | # Recalculate boxes 72 | min_c, max_c = torch.min(corners, dim=1)[0], torch.max(corners, dim=1)[0] 73 | boxes = torch.cat([min_c, max_c], dim=1) 74 | valid_mask = torch.isfinite(boxes).all(dim=1) 75 | 76 | # Add predictions to the instances, filter valid ones 77 | pred_instances.pred_boxes = Boxes(boxes) 78 | pred_instances.pred_corners = corners 79 | pred_instances = pred_instances[valid_mask] 80 | 81 | return pred_instances 82 | 83 | def postprocess(self, instances, batched_inputs: List[Dict[str, torch.Tensor]], image_sizes): 84 | # Rescale the output instances to the target size. 85 | processed_results = [] 86 | for results_per_image, input_per_image, image_size in zip( 87 | instances, batched_inputs, image_sizes 88 | ): 89 | height = input_per_image.get("height", image_size[0]) 90 | width = input_per_image.get("width", image_size[1]) 91 | r = self.postprocess_single(results_per_image, height, width) 92 | processed_results.append({"instances": r}) 93 | return processed_results 94 | 95 | def forward(self, batched_inputs, do_postprocess=True): 96 | images = ImageList.from_tensors([x["image"] for x in batched_inputs], 1) 97 | results = self.inference(images) 98 | 99 | if do_postprocess: 100 | assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess." 101 | return self.postprocess(results, batched_inputs, images.image_sizes) 102 | else: 103 | return results -------------------------------------------------------------------------------- /deepformable/modeling/meta_arch/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is modified from detectron2 implementation, 3 | changes are logged in comments. 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 5 | """ 6 | from __future__ import division 7 | from typing import List, Dict, Optional, Tuple 8 | import numpy as np 9 | 10 | import torch 11 | from torch.nn import functional as F 12 | 13 | from detectron2.layers.wrappers import shapes_to_tensor 14 | from detectron2.structures import ImageList as Detectron2_Imagelist 15 | 16 | 17 | class ImageList(Detectron2_Imagelist): 18 | @staticmethod 19 | def from_tensors( 20 | tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 21 | ) -> "ImageList": 22 | """ 23 | Detectron2's ImageList implementation modified 24 | to allow proper gradient flow. 25 | """ 26 | assert len(tensors) > 0 27 | assert isinstance(tensors, (tuple, list)) 28 | for t in tensors: 29 | assert isinstance(t, torch.Tensor), type(t) 30 | assert t.shape[:-2] == tensors[0].shape[:-2], t.shape 31 | 32 | image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] 33 | image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes] 34 | max_size = torch.stack(image_sizes_tensor).max(0).values 35 | 36 | if size_divisibility > 1: 37 | stride = size_divisibility 38 | # the last two dims are H,W, both subject to divisibility requirement 39 | max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride 40 | 41 | # handle weirdness of scripting and tracing ... 42 | if torch.jit.is_scripting(): 43 | max_size: List[int] = max_size.to(dtype=torch.long).tolist() 44 | else: 45 | if torch.jit.is_tracing(): 46 | image_sizes = image_sizes_tensor 47 | 48 | batched_imgs = [] 49 | for img, image_size in zip(tensors, image_sizes): 50 | padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] 51 | batched_imgs.append(F.pad(img, padding_size, value=pad_value)) 52 | batched_imgs = torch.stack(batched_imgs, dim=0) 53 | 54 | return ImageList(batched_imgs.contiguous(), image_sizes) -------------------------------------------------------------------------------- /deepformable/modeling/rpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is modified from detectron2 implementation, 3 | to add adaptive loss to region proposal network. 4 | Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 5 | """ 6 | from typing import Dict, List, Optional, Tuple, Union 7 | import torch 8 | from torch import nn 9 | 10 | from fvcore.nn import giou_loss 11 | from detectron2.layers import cat, ShapeSpec 12 | from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY 13 | from detectron2.structures import Boxes 14 | from detectron2.utils.events import get_event_storage 15 | from detectron2.modeling.proposal_generator import RPN 16 | 17 | from deepformable.layers import AdaptiveLoss 18 | 19 | 20 | @PROPOSAL_GENERATOR_REGISTRY.register() 21 | class RPN_AdaptiveLoss(RPN): 22 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 23 | super().__init__(cfg, input_shape) 24 | adaptive_loss = cfg.MODEL.PROPOSAL_GENERATOR.ADAPTIVE_LOSS 25 | self.bbox_loss_function = AdaptiveLoss(loss_type='l1') if adaptive_loss else nn.L1Loss(reduction='sum') 26 | self.class_loss_function = AdaptiveLoss(loss_type='bce') if adaptive_loss else nn.BCELoss(reduction='sum') 27 | 28 | @torch.jit.unused 29 | def losses( 30 | self, 31 | anchors: List[Boxes], 32 | pred_objectness_logits: List[torch.Tensor], 33 | gt_labels: List[torch.Tensor], 34 | pred_anchor_deltas: List[torch.Tensor], 35 | gt_boxes: List[torch.Tensor], 36 | ) -> Dict[str, torch.Tensor]: 37 | """ 38 | Return the losses from a set of RPN predictions and their associated ground-truth. 39 | 40 | Args: 41 | anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each 42 | has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). 43 | pred_objectness_logits (list[Tensor]): A list of L elements. 44 | Element i is a tensor of shape (N, Hi*Wi*A) representing 45 | the predicted objectness logits for all anchors. 46 | gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. 47 | pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape 48 | (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors 49 | to proposals. 50 | gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. 51 | 52 | Returns: 53 | dict[loss name -> loss value]: A dict mapping from loss name to loss value. 54 | Loss names are: `loss_rpn_cls` for objectness classification and 55 | `loss_rpn_loc` for proposal localization. 56 | """ 57 | num_images = len(gt_labels) 58 | gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) 59 | 60 | # Log the number of positive/negative anchors per-image that's used in training 61 | pos_mask = gt_labels == 1 62 | num_pos_anchors = pos_mask.sum().item() 63 | num_neg_anchors = (gt_labels == 0).sum().item() 64 | storage = get_event_storage() 65 | storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) 66 | storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) 67 | 68 | if self.box_reg_loss_type == "smooth_l1": 69 | anchors = type(anchors[0]).cat(anchors).tensor # Ax(4 or 5) 70 | gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes] 71 | gt_anchor_deltas = torch.stack(gt_anchor_deltas) # (N, sum(Hi*Wi*Ai), 4 or 5) 72 | # ====CHANGE_ON_LOSS==== 73 | localization_loss = self.bbox_loss_function( 74 | cat(pred_anchor_deltas, dim=1)[pos_mask], gt_anchor_deltas[pos_mask]) 75 | # localization_loss = smooth_l1_loss( 76 | # cat(pred_anchor_deltas, dim=1)[pos_mask], 77 | # gt_anchor_deltas[pos_mask], 78 | # self.smooth_l1_beta, 79 | # reduction="sum", 80 | # ) 81 | # ====================== 82 | elif self.box_reg_loss_type == "giou": 83 | pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) 84 | pred_proposals = cat(pred_proposals, dim=1) 85 | pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) 86 | pos_mask = pos_mask.view(-1) 87 | localization_loss = giou_loss( 88 | pred_proposals[pos_mask], cat(gt_boxes)[pos_mask], reduction="sum" 89 | ) 90 | else: 91 | raise ValueError(f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'") 92 | 93 | valid_mask = gt_labels >= 0 94 | # ====CHANGE_ON_LOSS==== 95 | objectness_loss = self.class_loss_function( 96 | cat(pred_objectness_logits, dim=1)[valid_mask], 97 | gt_labels[valid_mask].to(torch.float32) 98 | ) 99 | # ====================== 100 | normalizer = self.batch_size_per_image * num_images 101 | return { 102 | "loss_rpn_cls": objectness_loss / normalizer, 103 | "loss_rpn_loc": localization_loss / normalizer, 104 | } -------------------------------------------------------------------------------- /deepformable/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from .config import get_cfg 3 | 4 | from .board_utils import ( 5 | calculate_board_dims, 6 | is_polygon_intersects, 7 | marker_placer, 8 | marker_metadata_loader 9 | ) 10 | 11 | from .aruco_utils import ( 12 | get_aruco_dict, 13 | detect_aruco_markers 14 | ) 15 | 16 | from .general_utils import ( 17 | if_continue_execution, 18 | img_flexible_reader 19 | ) 20 | 21 | from .inpaint_utils import ( 22 | NoInpaint, 23 | OpenCVInpaint 24 | ) 25 | 26 | from .visualize_utils import ( 27 | convert_mapped_instances, 28 | DeepformableVisualizer, 29 | VisualizationDemo, 30 | ModifiedPredictor 31 | ) 32 | 33 | from .image_utils import ( 34 | sample_param, 35 | get_disk_blur_kernel, 36 | hls_to_rgb, 37 | rgb_to_hls, 38 | ) 39 | 40 | from .env import ( 41 | load_seed_info, 42 | save_seed_info 43 | ) -------------------------------------------------------------------------------- /deepformable/utils/aruco_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | import cv2 4 | from cv2 import aruco 5 | 6 | def get_aruco_dict(name, default=aruco.DICT_5X5_100): 7 | """ 8 | For a given string returns corresponding aruco dictionary if exists. 9 | Check cv2.aruco.__dict__ keys for supported markers. 10 | """ 11 | name = 'DICT_' + name.upper() 12 | d = default 13 | if name in aruco.__dict__: 14 | d = aruco.__dict__[name] 15 | return aruco.Dictionary_get(d) 16 | 17 | def detect_aruco_markers( 18 | img, 19 | aruco_dict, 20 | mtx=None, 21 | detect_params=aruco.DetectorParameters_create(), 22 | subpix_criteria=(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.00001), 23 | max_winsize=9, 24 | min_winsize=2, 25 | ): 26 | """ 27 | Detects aruco markers and refines corners in subpixel accuracy. 28 | """ 29 | marker_corners, ids, tmp = cv2.aruco.detectMarkers( 30 | img, aruco_dict, 31 | parameters=detect_params, 32 | cameraMatrix=mtx) 33 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img 34 | if len(marker_corners) > 0 and subpix_criteria is not None: 35 | for corners in marker_corners: 36 | dif = corners[0]- np.roll(corners[0],2) 37 | dist_avg = np.average(np.linalg.norm(dif, axis=1)) 38 | win_size = min(max(min_winsize, int(dist_avg/12)), max_winsize) 39 | cv2.cornerSubPix( 40 | gray_img, corners, 41 | winSize=(win_size, win_size), 42 | zeroZone=(-1, -1), 43 | criteria=subpix_criteria) 44 | return marker_corners, ids, tmp 45 | 46 | -------------------------------------------------------------------------------- /deepformable/utils/board_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import json 3 | import numpy as np 4 | from pathlib import Path 5 | 6 | import shapely 7 | from shapely.geometry import MultiPolygon, Polygon 8 | 9 | 10 | paper_sizes = { 11 | "a2": (420.0, 420.0 * (2.0 ** 0.5)), 12 | "a3": (210.0 * (2.0 ** 0.5), 420.0), # 296, 420 13 | "a4": (210.0, 210.0 * (2.0 ** 0.5)), # 210, 296 14 | "a5": (210.0 / (2.0 ** 0.5), 210.0), 15 | "a6": (105.0, 105.0 * (2.0 ** 0.5)), 16 | "a3-s": (305.0, 457.0), 17 | } 18 | 19 | 20 | def calculate_board_dims(board): 21 | if isinstance(board['paper_type'], str): 22 | paper_size = paper_sizes.get(board['paper_type'], 'a4') 23 | elif isinstance(board['paper_type'], list): 24 | paper_size = board['paper_type'] 25 | margins = board.get('paper_margins', 10.5) 26 | board_dims = (paper_size[0] - margins, paper_size[1] - margins) 27 | return board_dims 28 | 29 | 30 | def is_polygon_intersects(src_poly, polygons): 31 | if len(polygons) == 0: 32 | return False 33 | src_poly = Polygon(src_poly) 34 | polygons = MultiPolygon([Polygon(i) for i in polygons]) 35 | return polygons.intersects(src_poly) 36 | 37 | 38 | def marker_placer( 39 | board_size=(210,296), 40 | marker_min=40, 41 | marker_max=140, 42 | num_classes=64, 43 | class_array=[], 44 | safety_size=4, 45 | random_trials=75, 46 | p_reg=[0.2, 0.3, 0.2, 0.15, 0, 0.15], 47 | p_reg_rand=[0.57, 0.37, 0.06], 48 | ): 49 | """ 50 | TODO: This method requires bug-fix and clean-up!! 51 | """ 52 | def place_random(marker_min, marker_max, board_size): 53 | polygons = np.empty((0,4,2)) 54 | val_range = range(marker_min,marker_max+1) 55 | p = np.flip(np.array(val_range)) 56 | p = p / np.sum(p) 57 | for _ in range(random_trials): 58 | marker_size = np.random.choice(val_range, p=p) 59 | src_poly_margin = np.array([[0,0],[1,0],[1,1],[0,1]]) * (marker_size+8) 60 | theta = np.random.uniform(0, np.pi) 61 | rotMatrix = np.array([[np.cos(theta), -np.sin(theta)], 62 | [np.sin(theta), np.cos(theta)]]) 63 | src_poly_margin = np.matmul(src_poly_margin, rotMatrix.T) 64 | src_poly_margin -= np.amin(src_poly_margin, axis=0) 65 | bounding_box = np.amax(src_poly_margin, axis=0) 66 | offset = np.random.uniform((0,0), board_size - bounding_box) 67 | src_poly_margin += offset 68 | if not is_polygon_intersects(src_poly_margin, polygons): 69 | src_poly = np.array([[0,0],[1,0],[1,1],[0,1]]) * marker_size 70 | src_poly = np.matmul(src_poly, rotMatrix.T) 71 | src_poly -= np.amin(src_poly, axis=0) 72 | src_poly += offset + (bounding_box - np.amax(src_poly, axis=0))/2 73 | polygons = np.append(polygons, [src_poly], axis=0) 74 | polygons -= np.amin(polygons, axis=(0,1)) 75 | return polygons 76 | 77 | def place_regular(marker_min, marker_max, board_size): 78 | regular_max = marker_min + (marker_max-marker_min)//3 + 1 79 | norm_dims = np.array([[-1,-1],[1,-1],[1,1],[-1,1]]) * 0.5 80 | marker_size = np.random.randint(marker_min, regular_max) 81 | 82 | def checkerboard_regular(): 83 | angle = lambda: np.random.choice([0,1,2,3])*np.pi/2 84 | r, c = marker_size, marker_size 85 | ofs = marker_size 86 | return r, c, ofs, angle 87 | 88 | def checkerboard_random(): 89 | r, c, ofs, _ = checkerboard_regular() 90 | angle = lambda: np.random.uniform(0, np.pi) 91 | return r, c, ofs, angle 92 | 93 | def checkerboard_dense(): 94 | _, _, ofs, angle = checkerboard_regular() 95 | r = np.random.randint(marker_size//2+3, marker_size+4) 96 | ofs += np.random.randint(safety_size, 15) 97 | c = ofs + np.random.randint(safety_size, 15) 98 | return r, c, ofs, angle 99 | 100 | def grid_regular(): 101 | r, c, _, angle = checkerboard_regular() 102 | ofs = 0 103 | r += np.random.randint(safety_size, marker_size) 104 | c = np.random.randint(safety_size, marker_size) 105 | return r, c, ofs, angle 106 | 107 | def grid_regular2(): 108 | r, c, _, angle = checkerboard_regular() 109 | angle = lambda: 0 110 | ofs = 0 111 | # r += 10 112 | # c = 10 113 | r += marker_size/3 114 | c = marker_size/3 115 | return r, c, ofs, angle 116 | 117 | def grid_skewed(): 118 | r, c, ofs, angle = checkerboard_regular() 119 | r += np.random.randint(safety_size, marker_size) 120 | c = ofs 121 | return r, c, ofs, angle 122 | 123 | row_gap, column_gap, even_row_offset, angle_choice = np.random.choice([ 124 | checkerboard_regular, checkerboard_random, checkerboard_dense, 125 | grid_regular, grid_regular2, grid_skewed], 126 | p=p_reg 127 | )() 128 | 129 | polygons = np.empty((0,4,2)) 130 | 131 | cur_pos, index = np.array([0.0, 0.0]), 0 132 | while np.all(cur_pos + marker_size < board_size): 133 | while np.all(cur_pos + marker_size < board_size): 134 | theta = angle_choice() 135 | rotMatrix = np.array([[np.cos(theta), -np.sin(theta)], 136 | [np.sin(theta), np.cos(theta)]]) 137 | cur_poly = np.matmul(norm_dims, rotMatrix.T) * marker_size 138 | cur_poly += marker_size/2 139 | polygons = np.append(polygons, [cur_pos + cur_poly], axis=0) 140 | cur_pos += [marker_size+column_gap, 0] 141 | cur_pos = np.array([0 if index%2 else even_row_offset, cur_pos[1]+row_gap]) 142 | index += 1 143 | polygons -= np.amin(polygons, axis=(0,1)) 144 | pol_max = np.amax(polygons, axis=(0,1)) 145 | large_index = (pol_max > board_size) 146 | if large_index.any(): 147 | polygons *= (np.array(board_size)[large_index] / pol_max[large_index]) 148 | return polygons 149 | 150 | def place_single(marker_min, marker_max, board_size): 151 | marker_size = min(board_size) * np.random.uniform(0.8, 0.999) 152 | polygon = np.array([[0,0],[1,0],[1,1],[0,1]]) * marker_size 153 | return polygon.reshape(1,4,2) 154 | 155 | placer = np.random.choice([place_regular, place_random, place_single], p=p_reg_rand) 156 | polygons = placer(marker_min, marker_max, board_size) 157 | polygons += (board_size - np.amax(polygons, axis=(0,1)))/2 158 | markers = np.dstack([polygons, np.zeros((*polygons.shape[:2],1))])[:,[0,1,3,2]] 159 | 160 | if len(class_array) == 0: 161 | classes = np.random.randint(0, num_classes, size=len(markers)) 162 | # return [], [] 163 | else: 164 | classes = [] 165 | for _ in range(len(markers)): 166 | if len(class_array) == 0: 167 | break 168 | # val = random.choice(class_array) 169 | val = class_array[0] 170 | class_array.remove(val) 171 | classes.append(val) 172 | return markers[:len(classes)], classes 173 | 174 | def image_placer( 175 | board_size=(210,296), 176 | marker_ratio=(4,3), 177 | margin_ratio=0.8, 178 | marker_min=40, 179 | marker_max=140, 180 | num_classes=64, 181 | class_array=[], 182 | safety_size=4, 183 | random_trials=75, 184 | p_reg=[0.2, 0.3, 0.2, 0.15, 0, 0.15], 185 | p_reg_rand=[0.57, 0.37, 0.06], 186 | ): 187 | swp = False 188 | if board_size[0] < board_size[1]: 189 | board_size = (board_size[1], board_size[0]) 190 | swp = True 191 | 192 | mx, my = board_size[0] / 2, board_size[1] / 2 193 | limx, limy = board_size[0] * margin_ratio, board_size[1] * margin_ratio 194 | mulx, muly = (limx - mx) / marker_ratio[0], (limy - my) / marker_ratio[1] 195 | mul = min(mulx, muly) 196 | ux, uy = mx - mul * marker_ratio[0], my - mul * marker_ratio[1] 197 | bx, by = mx + mul * marker_ratio[0], my + mul * marker_ratio[1] 198 | markers = [] 199 | if not swp: 200 | markers = [[[ux, uy, 0.0], [bx, uy, 0.0], [bx, by, 0.0], [ux, by, 0.0]]] 201 | else: 202 | markers = [[[by, ux, 0.0], [by, bx, 0.0], [uy, bx, 0.0], [uy, ux, 0.0]]] 203 | 204 | flip = np.random.randint(2, size=1).astype(np.bool).item() 205 | if flip: 206 | markers = [markers[0][2:], markers[0][:2]] 207 | 208 | classes = [np.random.randint(num_classes, size=1).item()] 209 | return markers, classes 210 | 211 | def marker_metadata_loader(cfg, marker_config_file): 212 | from detectron2.data import MetadataCatalog 213 | marker_config_path = Path(marker_config_file) 214 | if marker_config_path.exists(): 215 | with open(marker_config_path) as cfg_file: 216 | marker_config = json.load(cfg_file) 217 | 218 | markers = sorted(marker_config['markers'], key=lambda x: x['marker_id']) 219 | binary_messages = [[float(i) for i in m['binary']] for m in markers] 220 | marker_classes = [m['text'] for m in markers] 221 | id_map = {m['marker_id']+1: m['marker_id'] for m in markers} 222 | 223 | cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(marker_classes) 224 | 225 | for dataset_name in cfg.DATASETS.TEST: 226 | MetadataCatalog.get(dataset_name).set( 227 | messages=binary_messages, thing_classes=marker_classes, 228 | thing_dataset_id_to_contiguous_id=id_map, 229 | ) 230 | else: 231 | return False 232 | return True -------------------------------------------------------------------------------- /deepformable/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from detectron2.config import CfgNode as CN 3 | 4 | def add_marker_generator_config(cfg: CN): 5 | _C = cfg 6 | _C.MODEL.MARKER_GENERATOR = CN() 7 | _C.MODEL.MARKER_GENERATOR.TRAINABLE = True 8 | _C.MODEL.MARKER_GENERATOR.NAME = "GeneralizedGenerator" 9 | _C.MODEL.MARKER_GENERATOR.MARKER_SIZE = (16, 16) 10 | _C.MODEL.MARKER_GENERATOR.ARUCO_DICT = "6x6_1000" 11 | _C.MODEL.MARKER_GENERATOR.BORDER_BITS = 0 12 | _C.MODEL.MARKER_GENERATOR.NUM_GENERATION_BITS = 36 13 | _C.MODEL.MARKER_GENERATOR.INIT_STD = 1.4 14 | _C.MODEL.MARKER_GENERATOR.INIT_METHOD = "uniform" 15 | _C.MODEL.MARKER_GENERATOR.CONV_DIMS = [[8],[6]] 16 | _C.MODEL.MARKER_GENERATOR.FC_DIMS = [256,256] 17 | _C.MODEL.MARKER_GENERATOR.UPSAMPLE_TYPE = "bilinear" 18 | _C.MODEL.MARKER_GENERATOR.UPSAMPLE_SCALE = 2 19 | _C.MODEL.MARKER_GENERATOR.INITIAL_SIZE = 4 20 | _C.MODEL.MARKER_GENERATOR.NORM_TYPE = "adain" 21 | _C.MODEL.MARKER_GENERATOR.ACTIVATION_TYPE = "leaky" 22 | _C.MODEL.MARKER_GENERATOR.RESIDUAL = False 23 | _C.MODEL.MARKER_GENERATOR.EQUALIZED = False 24 | _C.MODEL.MARKER_GENERATOR.PADDING_MODE = "zeros" 25 | _C.MODEL.MARKER_GENERATOR.FINAL_CONV_KERNEL_SIZE = 3 26 | _C.MODEL.MARKER_GENERATOR.MARKERS_FILE_LOCATION = "data/e2e_markers.npz" 27 | _C.MODEL.MARKER_GENERATOR.DATASET_ROOT = "/Data/Datasets/mirflickr_images1" 28 | 29 | 30 | def add_intermediate_augmentor_config(cfg: CN): 31 | _C = cfg 32 | _C.INTERMEDIATE_AUGMENTOR = CN() 33 | # _C.INTERMEDIATE_AUGMENTOR.AUG_LIST = [ 34 | # "PerspectiveAugmentor", "RadialDistortionAugmentor", "TpsTransformer", "ImageResize", 35 | # "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 36 | # "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", "GammaCorrector", "JPEGAugmentor"] # Make sure the correct order of augmentations 37 | # _C.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [0.55, 0.55, 0.55, 1.0, 0.4, 0.4, 0.4, 0.4, 0.45, 0.4, 1.0, 0.4] # Make sure the correct order of augmentations 38 | # _C.INTERMEDIATE_AUGMENTOR.TEST_STRENGTH_LIST = [0.6, 0.6, 0.6, 1.0, 0.3, 0.3, 0.3, 0.3, 0.4, 0.1, 1.0, 0.4] 39 | _C.INTERMEDIATE_AUGMENTOR.AUG_LIST = ["GammaCorrector"] 40 | _C.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [1.0] 41 | _C.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor = CN() 42 | _C.INTERMEDIATE_AUGMENTOR.PerspectiveAugmentor.CORNER_SHIFT_RANGE = (0.0, 0.2, 0.12) 43 | _C.INTERMEDIATE_AUGMENTOR.GammaAugmentor = CN() 44 | _C.INTERMEDIATE_AUGMENTOR.GammaAugmentor.GAMMA_RANGE = (0.85, 1.15, 1.0) # Original is ~0.75 45 | _C.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor = CN() 46 | _C.INTERMEDIATE_AUGMENTOR.DefocusBlurAugmentor.BLUR_RADIUS_RANGE = (0.5, 2.0, 1.5) # Original is not continuous 47 | _C.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor = CN() 48 | _C.INTERMEDIATE_AUGMENTOR.MotionBlurAugmentor.BLUR_RADIUS_RANGE = (0.51, 3.0, 2.0) 49 | _C.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor = CN() 50 | _C.INTERMEDIATE_AUGMENTOR.HueShiftAugmentor.HUE_SHIFT_RANGE = (0.0, 0.1, 0.04) # Original 0.15 51 | _C.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor = CN() 52 | _C.INTERMEDIATE_AUGMENTOR.BrightnessAugmentor.BRIGHTNESS_RANGE = (0.2, 1.2, 0.4) 53 | _C.INTERMEDIATE_AUGMENTOR.NoiseAugmentor = CN() 54 | _C.INTERMEDIATE_AUGMENTOR.NoiseAugmentor.NOISE_RANGE = (0.0, 0.012, 0.05) 55 | 56 | # Jpeg range is starts from 1 coefficient to 63, ranging from (0-62) 57 | _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor = CN() 58 | _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.Y_QUALITY_RANGE = (12, 61, 20) # Andreas_prev (10,20) 59 | _C.INTERMEDIATE_AUGMENTOR.JPEGAugmentor.UV_QUALITY_RANGE = (10, 60, 15) # Andreas_prev (4,8) 60 | 61 | _C.INTERMEDIATE_AUGMENTOR.MAX_IMAGE_SIZE = (1080, 1920) # Andreas_prev (4,8) 62 | _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor = CN() 63 | _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.UNDISTORT_ITER = 20 64 | _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.FOCAL_LENGTH_RANGE = (1.4, 2.0, 1.75) 65 | _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.CENTER_SHIFT_RANGE = (0.0, 0.1, 0.06) 66 | _C.INTERMEDIATE_AUGMENTOR.RadialDistortionAugmentor.DISTORTION_RANGE = (0.0, 1.25, 0.5) 67 | _C.INTERMEDIATE_AUGMENTOR.TpsTransformer = CN() 68 | # Number of control points (vertical,horizontal). 69 | # More points yields increase computations and smaller scale warping patterns 70 | _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.CTRL_PTS_SIZE = (16, 20) 71 | # Maximum displacement of the control points. Should be bellow 2 / max(CTRL_PTS_HEIGHT, CTRL_PTS_WIDTH) to prevent unrealistic behaviour 72 | _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.WARP_RANGE = (0, 0.02, 0.012) 73 | # coordinates location maximum error in pixel, as we iteratively optimize their location 74 | _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.STOP_THRESHOLD = 0.05 75 | # Maximum number of iterations if the threshold is not reached 76 | _C.INTERMEDIATE_AUGMENTOR.TpsTransformer.MAX_ITER = 1000 77 | 78 | 79 | def add_roi_head_config(cfg: CN): 80 | _C = cfg 81 | _C.MODEL.ROI_TRANSFORM_HEAD = CN() 82 | _C.MODEL.ROI_TRANSFORM_HEAD.NAME = "SpatialTransformerHeadV2" 83 | _C.MODEL.ROI_TRANSFORM_HEAD.NORM = "" 84 | _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_SAMPLING_RATIO = 0 85 | _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_TYPE = "ROIAlignV2" 86 | _C.MODEL.ROI_TRANSFORM_HEAD.POOLER_RESOLUTION = 12 87 | _C.MODEL.ROI_TRANSFORM_HEAD.TRANSFORMER_RESOLUTION = 8 88 | _C.MODEL.ROI_TRANSFORM_HEAD.NUM_FC = 2 89 | _C.MODEL.ROI_TRANSFORM_HEAD.FC_DIM = 512 90 | _C.MODEL.ROI_TRANSFORM_HEAD.NUM_CONV = 0 91 | _C.MODEL.ROI_TRANSFORM_HEAD.CONV_DIM = 256 92 | _C.MODEL.ROI_TRANSFORM_HEAD.LOSS_WEIGHT = 1.0 93 | _C.MODEL.ROI_TRANSFORM_HEAD.FC_COMMON_DIMS = [256] 94 | _C.MODEL.ROI_TRANSFORM_HEAD.FC_CORNER_DIMS = [128] 95 | _C.MODEL.ROI_TRANSFORM_HEAD.FC_RESAMPLE_DIMS = [128] 96 | _C.MODEL.ROI_TRANSFORM_HEAD.AFFINE_PREDICTOR_ON = False 97 | 98 | _C.MODEL.ROI_CORNER_HEAD = CN() 99 | _C.MODEL.ROI_CORNER_HEAD.NAME = "CornerHeadV2" 100 | _C.MODEL.ROI_CORNER_HEAD.SMOOTH_L1_BETA = 0.0 101 | _C.MODEL.ROI_CORNER_HEAD.LOSS_WEIGHT = 0.1 # 1.2 for CornerHead 102 | _C.MODEL.ROI_CORNER_HEAD.REGRESSION_WEIGHTS = (10.0, 10.0) 103 | _C.MODEL.ROI_CORNER_HEAD.SAMPLE_RESOLUTION = 8 104 | _C.MODEL.ROI_CORNER_HEAD.CONV_DIMS = [32] 105 | _C.MODEL.ROI_CORNER_HEAD.FC_DIMS = [128, 64] 106 | 107 | _C.MODEL.ROI_DECODER_HEAD = CN() 108 | _C.MODEL.ROI_DECODER_HEAD.NAME = "DecoderHead" 109 | _C.MODEL.ROI_DECODER_HEAD.DECODER_ON = True 110 | _C.MODEL.ROI_DECODER_HEAD.LOSS_TYPE = "mse" 111 | _C.MODEL.ROI_DECODER_HEAD.CONV_DIMS = [] 112 | _C.MODEL.ROI_DECODER_HEAD.FC_DIMS = [512, 256] 113 | _C.MODEL.ROI_DECODER_HEAD.DECODING_LOSS_WEIGHT = 10.0 114 | _C.MODEL.ROI_DECODER_HEAD.CLASS_LOSS_WEIGHT = 0.5 115 | 116 | _C.MODEL.PROPOSAL_GENERATOR.ADAPTIVE_LOSS = True 117 | 118 | _C.TEST.SORT_INSTANCES = True 119 | _C.TEST.APPLY_NMS = True 120 | _C.TEST.DECODING_SCORE_BY_MESSAGE_CONFIDENCE = True # Otherwise uses objectness score 121 | _C.TEST.MARKER_POSTPROCESSING = True 122 | _C.TEST.LOAD_MESSAGES = True 123 | # This option choses which scoring criteria to use for NMS. Options are: 124 | # "bit_similarity" uses the distance of predictions to the provided class of messages [used option in the paper] 125 | # "message_confidence" uses the confidence of how each bit is predicted 126 | # "objectness" uses the predicted objectness 127 | # "mc_obj_product" uses the product of "message_confidence" and "objectness" 128 | # "mc_obj_bs_product" uses the product of "message_confidence", "bit_similarity" and "objectness" 129 | _C.TEST.NMS_SCORE_CRITERIA = "mc_obj_bs_product" 130 | 131 | 132 | def add_vovnet_config(cfg: CN): 133 | _C = cfg 134 | _C.MODEL.VOVNET = CN() 135 | _C.MODEL.VOVNET.CONV_BODY = "V-39-eSE" 136 | _C.MODEL.VOVNET.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"] 137 | 138 | # Options: FrozenBN, GN, "SyncBN", "BN" 139 | _C.MODEL.VOVNET.NORM = "FrozenBN" 140 | _C.MODEL.VOVNET.OUT_CHANNELS = 256 141 | _C.MODEL.VOVNET.BACKBONE_OUT_CHANNELS = 256 142 | 143 | 144 | def add_model_other_config(cfg: CN): 145 | _C = cfg 146 | _C.MODEL.PREDICTIONS_PATH = 'datasets/evaluation/e2etags/flat/all.json' 147 | _C.MODEL.SAVE_RENDERED = False 148 | _C.MODEL.SAVE_RENDERED_DIR = 'datasets/evaluation/e2etags/rendered_outputs/' 149 | 150 | _C.INPUT.PREDICTOR_RESIZE = False 151 | _C.INPUT.FILTER_BOX_THRESHOLD = 30 152 | _C.INPUT.FILTER_BOX_THRESHOLD_TEST = 5 153 | _C.INPUT.PLACEMENT_MARKER_MINMAX = (40, 190) 154 | _C.INPUT.MAX_MARKERS_PER_IMAGE = 128 155 | _C.INPUT.MARKER_TEST_SIZE = 50 156 | 157 | _C.RENDERER = CN() 158 | _C.RENDERER.NAME = "MarkerRenderer" 159 | _C.RENDERER.SHADING_METHOD = "cook-torrance" 160 | _C.RENDERER.GAMMA = 2.2 161 | _C.RENDERER.EPSILON = 1e-8 162 | _C.RENDERER.BLUR_RANGE = (1.5, 2.0, 1.0) # Third parameter is testing value 163 | _C.RENDERER.ROUGHNESS_RANGE = (0.14, 0.6, 0.25) 164 | _C.RENDERER.DIFFUSE_RANGE = (0.9, 1.0, 0.94) 165 | _C.RENDERER.NORMAL_NOISE_RANGE = (0.0, 0.015, 0.005) 166 | _C.RENDERER.SPECULAR_RANGE = (0.02, 1.0, 0.35) 167 | 168 | _C.DEMO = CN() 169 | _C.DEMO.DRAW_MASK = False 170 | _C.DEMO.DRAW_BBOX = True 171 | _C.DEMO.DRAW_CORNERS = True 172 | _C.DEMO.COLOR_REDGREEN_THRESHOLD = 0.0 173 | 174 | def get_cfg() -> CN: 175 | from detectron2.config.defaults import _C 176 | cfg = _C.clone() 177 | add_marker_generator_config(cfg) 178 | add_intermediate_augmentor_config(cfg) 179 | add_roi_head_config(cfg) 180 | add_vovnet_config(cfg) 181 | add_model_other_config(cfg) 182 | return cfg -------------------------------------------------------------------------------- /deepformable/utils/env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implemented by Facebook, Inc. and its affiliates. 3 | Edited by Mustafa B. Yaldiz 4 | """ 5 | import torch 6 | import detectron2 7 | import numpy as np 8 | import random 9 | 10 | 11 | _DEEPFORMABLE_ENV_SETUP_DONE = False 12 | 13 | def setup_environment(): 14 | # Perform environment setup work. 15 | global _DEEPFORMABLE_ENV_SETUP_DONE 16 | if _DEEPFORMABLE_ENV_SETUP_DONE: 17 | return 18 | _DEEPFORMABLE_ENV_SETUP_DONE = True 19 | 20 | def get_version(module, digit=2): 21 | return tuple(map(int, module.__version__.split(".")[:digit])) 22 | 23 | # fmt: off 24 | assert get_version(torch) >= (1, 6), "Requires torch>=1.6" 25 | assert get_version(detectron2, digit=3) >= (0, 4, 1), "Requires detectron2>=0.4.1" 26 | # assert get_version(shapely) >= (1, 7, 1), "Requires shapely>=1.7.1" 27 | # import shapely 28 | # fmt: on 29 | 30 | 31 | # Use detectron2.utils.env.seed_all_rng to set the seed 32 | # to specified value. 33 | 34 | def save_seed_info(): 35 | # Stores random seed states 36 | return { 37 | "torch": torch.get_rng_state(), 38 | "numpy": np.random.get_state(), 39 | "random": random.getstate() 40 | } 41 | 42 | def load_seed_info(seed_info): 43 | # Loads seed states 44 | torch.set_rng_state(seed_info["torch"]) 45 | np.random.set_state(seed_info["numpy"]) 46 | random.setstate(seed_info["random"]) 47 | -------------------------------------------------------------------------------- /deepformable/utils/general_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import os 3 | import pathlib 4 | import cv2 5 | from tqdm import tqdm 6 | from os.path import isfile, join 7 | 8 | def img_flexible_reader(directory, num_skip_frames=0, tqdm_on=False): 9 | """ 10 | Reads videos in sorted order for a give directory, 11 | if path is a file tries to read it. 12 | """ 13 | directory = pathlib.Path(directory) 14 | directory = str(directory.resolve()) 15 | 16 | files = [directory] 17 | if not isfile(directory): 18 | files = [join(directory, f) for f in sorted(os.listdir(directory)) if isfile(join(directory, f))] 19 | 20 | total_frames = 0 21 | for f in files: 22 | cap = cv2.VideoCapture(f) 23 | frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) 24 | frame_count = frame_count if frame_count >= 0 else 1 25 | total_frames += int(frame_count) 26 | 27 | skip_count = 0 28 | if tqdm_on: pbar = tqdm(total=total_frames, smoothing=0) 29 | for f in files: 30 | cap = cv2.VideoCapture(f) 31 | num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) 32 | num_frames = int(num_frames if num_frames >= 0 else 1) 33 | prev_frame, frame = None, None 34 | for _ in range(num_frames): 35 | prev_frame = frame if frame is not None else prev_frame 36 | success, frame = cap.read() 37 | if tqdm_on: pbar.update() 38 | skip_count += 1 39 | if success and skip_count == num_skip_frames+1: 40 | skip_count = 0 41 | yield frame 42 | if tqdm_on: pbar.close() 43 | 44 | def if_continue_execution(message="Continue (yes/no): "): 45 | while True: 46 | answer = str(input(message)).lower() 47 | if answer in ["yes", "y", ""]: 48 | return True 49 | elif answer in ["no", "n"]: 50 | return False -------------------------------------------------------------------------------- /deepformable/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import math 3 | from typing import List, Optional 4 | import numpy as np 5 | 6 | import kornia 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | def sample_param( 11 | param_range, shape=1, strength=None, 12 | training=True, device=torch.device("cpu") 13 | ): 14 | min_v, max_v, test_v = param_range 15 | if training or strength != None: 16 | if strength: 17 | rand_val = torch.ones(shape, device=device) * strength 18 | else: 19 | rand_val = torch.rand(shape, device=device) 20 | rand_val = min_v + (max_v-min_v) * rand_val 21 | else: 22 | rand_val = torch.ones(shape, device=device) * test_v 23 | return rand_val.item() if shape==1 else rand_val 24 | 25 | @torch.jit.script 26 | def get_disk_blur_kernel( 27 | kernel_radius: float, upscale_factor: int = 4, 28 | device: torch.device=torch.device("cpu") 29 | ): 30 | # TODO: Approximate implementation, fix for exact one 31 | r = kernel_radius * upscale_factor 32 | kernel_scaled_size = (math.ceil(kernel_radius)*2+1)*upscale_factor 33 | kernel = torch.ones(kernel_scaled_size, kernel_scaled_size, device=device) 34 | x, y = torch.meshgrid([ 35 | torch.linspace( 36 | -(kernel_scaled_size-1.0)/2.0, (kernel_scaled_size-1.0)/2.0, kernel_scaled_size, device=device)]*2) 37 | kernel[x**2 + y**2 > r**2] = 0 38 | kernel = F.avg_pool2d(kernel.unsqueeze(0), (upscale_factor, upscale_factor)) 39 | return (kernel / torch.sum(kernel, (1, 2))) 40 | 41 | def rgb_to_hls(image: torch.Tensor) -> torch.Tensor: 42 | r"""Convert an RGB image to HLS 43 | The image data is assumed to be in the range of (0, 1). 44 | 45 | Args: 46 | input (torch.Tensor): RGB Image to be converted to HLS. 47 | 48 | 49 | Returns: 50 | torch.Tensor: HLS version of the image. 51 | """ 52 | 53 | if not torch.is_tensor(image): 54 | raise TypeError("Input type is not a torch.Tensor. Got {}".format( 55 | type(image))) 56 | 57 | if len(image.shape) < 3 or image.shape[-3] != 3: 58 | raise ValueError("Input size must have a shape of (*, 3, H, W). Got {}" 59 | .format(image.shape)) 60 | 61 | r: torch.Tensor = image[..., 0, :, :] 62 | g: torch.Tensor = image[..., 1, :, :] 63 | b: torch.Tensor = image[..., 2, :, :] 64 | 65 | maxc: torch.Tensor = image.max(-3)[0] 66 | minc: torch.Tensor = image.min(-3)[0] 67 | 68 | imax: torch.Tensor = image.max(-3)[1] 69 | 70 | l: torch.Tensor = (maxc + minc) / 2 # luminance 71 | l2 = maxc + minc + 1e-8 72 | 73 | deltac: torch.Tensor = maxc - minc 74 | 75 | s: torch.Tensor = torch.where(l < 0.5, deltac / (l2), deltac / 76 | (torch.tensor(2.) - (l2))) # saturation 77 | 78 | deltac = deltac + 1e-8 79 | 80 | hi: torch.Tensor = torch.zeros_like(deltac) 81 | 82 | hi[imax == 0] = (((g - b) / deltac) % 6)[imax == 0] 83 | hi[imax == 1] = (((b - r) / deltac) + 2)[imax == 1] 84 | hi[imax == 2] = (((r - g) / deltac) + 4)[imax == 2] 85 | 86 | h: torch.Tensor = 2. * kornia.constants.pi.to(image.device) * (60. * hi) / 360. # hue [0, 2*pi] 87 | 88 | image_hls: torch.Tensor = torch.stack([h, l, s], dim=-3) 89 | 90 | image_hls[torch.isnan(image_hls)] = 0. 91 | 92 | return image_hls 93 | 94 | def hls_to_rgb(image: torch.Tensor) -> torch.Tensor: 95 | r"""Convert an HLS image to RGB 96 | The image data is assumed to be in the range of (0, 1). 97 | 98 | Args: 99 | input (torch.Tensor): HLS Image to be converted to RGB. 100 | 101 | 102 | Returns: 103 | torch.Tensor: RGB version of the image. 104 | """ 105 | 106 | if not torch.is_tensor(image): 107 | raise TypeError("Input type is not a torch.Tensor. Got {}".format( 108 | type(image))) 109 | 110 | if len(image.shape) < 3 or image.shape[-3] != 3: 111 | raise ValueError("Input size must have a shape of (*, 3, H, W). Got {}" 112 | .format(image.shape)) 113 | 114 | h: torch.Tensor = image[..., 0, :, :] * 360 / (2 * kornia.constants.pi.to(image.device)) 115 | l: torch.Tensor = image[..., 1, :, :] 116 | s: torch.Tensor = image[..., 2, :, :] 117 | 118 | kr = (0 + h / 30) % 12 119 | kg = (8 + h / 30) % 12 120 | kb = (4 + h / 30) % 12 121 | a = s * torch.min(l, torch.tensor(1.) - l) 122 | 123 | ones_k = torch.ones_like(kr) 124 | 125 | fr: torch.Tensor = l - a * torch.max(torch.min(torch.min(kr - torch.tensor(3.), 126 | torch.tensor(9.) - kr), ones_k), -1 * ones_k) 127 | fg: torch.Tensor = l - a * torch.max(torch.min(torch.min(kg - torch.tensor(3.), 128 | torch.tensor(9.) - kg), ones_k), -1 * ones_k) 129 | fb: torch.Tensor = l - a * torch.max(torch.min(torch.min(kb - torch.tensor(3.), 130 | torch.tensor(9.) - kb), ones_k), -1 * ones_k) 131 | 132 | out: torch.Tensor = torch.stack([fr, fg, fb], dim=-3) 133 | 134 | return out -------------------------------------------------------------------------------- /deepformable/utils/inpaint_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | import cv2 4 | import multiprocessing, queue 5 | import threading 6 | 7 | class WorkerBase: 8 | def __init__( 9 | self, 10 | processing_workers=1, 11 | writing_workers=1, 12 | max_task_size=32, 13 | max_write_size=16, 14 | use_multiprocessing=False, 15 | ): 16 | self.use_multiprocessing = use_multiprocessing 17 | if use_multiprocessing: 18 | self.tasks = multiprocessing.JoinableQueue(maxsize=max_task_size) 19 | self.results = multiprocessing.JoinableQueue(maxsize=max_write_size) 20 | self.processing_workers = [ 21 | multiprocessing.Process(target=self.processing_worker) for _ in range(processing_workers)] 22 | self.writing_workers = [ 23 | multiprocessing.Process(target=self.writing_worker) for _ in range(writing_workers)] 24 | else: 25 | self.tasks = queue.Queue(maxsize=max_task_size) 26 | self.results = queue.Queue(maxsize=max_write_size) 27 | self.processing_workers = [ 28 | threading.Thread(target=self.processing_worker) for _ in range(processing_workers)] 29 | self.writing_workers = [ 30 | threading.Thread(target=self.writing_worker) for _ in range(writing_workers)] 31 | for p in [*self.processing_workers, *self.writing_workers]: 32 | p.start() 33 | 34 | def wait_finish(self): 35 | for _ in self.processing_workers: 36 | self.tasks.put(None) 37 | self.tasks.join() 38 | for _ in self.writing_workers: 39 | self.results.put(None) 40 | self.results.join() 41 | for p in [*self.processing_workers, *self.writing_workers]: 42 | p.join() 43 | 44 | def processing_worker(self): 45 | while True: 46 | data = self.tasks.get() 47 | if data is None: 48 | self.tasks.task_done() 49 | break 50 | self.results.put(self.process_data(data)) 51 | self.tasks.task_done() 52 | 53 | def writing_worker(self): 54 | while True: 55 | result = self.results.get() 56 | if result is None: 57 | self.results.task_done() 58 | break 59 | self.write_result(result) 60 | self.results.task_done() 61 | 62 | def process_data(self, data): 63 | return data 64 | 65 | def write_result(self, result): 66 | print(result) 67 | 68 | def __call__(self, data): 69 | if len(self.processing_workers) == 0: 70 | if len(self.writing_workers) == 0: 71 | self.write_result(self.process_data(data)) 72 | else: 73 | self.results.put(self.process_data(data)) 74 | return 75 | self.tasks.put(data) 76 | 77 | 78 | class NoInpaint(WorkerBase): 79 | def __init__( 80 | self, 81 | processing_workers=0, 82 | writing_workers=8, 83 | max_task_size=24, 84 | max_write_size=24, 85 | use_multiprocessing=False, 86 | ): 87 | super().__init__( 88 | processing_workers, writing_workers, max_task_size, 89 | max_write_size, use_multiprocessing) 90 | 91 | def process_data(self, data): 92 | file_path, undistorted_frame, _, _, _ = data 93 | return file_path, undistorted_frame 94 | 95 | def write_result(self, result): 96 | cv2.imwrite(*result) 97 | 98 | 99 | def generate_marker_mask( 100 | undistorted_frame, 101 | cur_annotations, 102 | markers_world, 103 | mtx, 104 | margin_ratio=10, 105 | ): 106 | mask = np.zeros(undistorted_frame.shape) 107 | for ann, markers in zip(cur_annotations, markers_world): 108 | markersw_margin = (markers - np.roll(markers, 2, 1))/margin_ratio + markers 109 | markersw_margin = cv2.projectPoints( 110 | markersw_margin.reshape(-1,3), 111 | np.array(ann['rvec']), np.array(ann['tvec']), 112 | mtx, None)[0].reshape(-1, 4, 2) 113 | for p in markersw_margin: 114 | cv2.fillConvexPoly(mask, np.int32(p), (1.0, 1.0, 1.0), cv2.LINE_4) 115 | return mask 116 | 117 | 118 | class OpenCVInpaint(NoInpaint): 119 | def __init__( 120 | self, 121 | processing_workers=8, 122 | writing_workers=8, 123 | max_task_size=24, 124 | max_write_size=24, 125 | use_multiprocessing=False, 126 | ): 127 | super().__init__( 128 | processing_workers, writing_workers, max_task_size, 129 | max_write_size, use_multiprocessing) 130 | 131 | def process_data(self, data): 132 | file_path, undistorted_frame, cur_annotations, markers_world, mtx = data 133 | mask = generate_marker_mask(undistorted_frame, cur_annotations, markers_world, mtx) 134 | inpainted_frame = cv2.inpaint( 135 | np.uint8(undistorted_frame), 136 | np.uint8(mask[...,0]*255), 137 | 5, cv2.INPAINT_TELEA) 138 | return file_path, inpainted_frame 139 | 140 | if __name__ == '__main__': 141 | test_worker = WorkerBase( 142 | processing_workers=4, use_multiprocessing=True) 143 | for i in range(10, 23): 144 | # print(i) 145 | test_worker(i) 146 | test_worker.wait_finish() -------------------------------------------------------------------------------- /docker/Cpu.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | 5 | RUN apt-get update && apt-get install -y --no-install-recommends \ 6 | ca-certificates git wget vim cmake ninja-build build-essential curl \ 7 | libjpeg-dev libpng-dev x11-apps v4l-utils unzip \ 8 | rsync ffmpeg psmisc libcairo2-dev libgif-dev libpango1.0-dev \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | WORKDIR /opt 12 | ENV LC_ALL C.UTF-8 13 | ENV LANG C.UTF-8 14 | 15 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 16 | && chmod +x ~/miniconda.sh \ 17 | && ~/miniconda.sh -b -p /opt/conda \ 18 | && rm ~/miniconda.sh \ 19 | && /opt/conda/bin/conda install -y python=3.7 \ 20 | && /opt/conda/bin/conda install -y pytorch=1.8.2 torchvision=0.9.2 cpuonly -c pytorch-lts \ 21 | && /opt/conda/bin/conda install -y tensorboard pycairo matplotlib scikit-image jupyter ninja cython \ 22 | typing future pytest scipy scikit-learn \ 23 | && /opt/conda/bin/conda install -y -c conda-forge plyfile \ 24 | && /opt/conda/bin/python -m pip install -U pycocotools==2.0.4 \ 25 | shapely==1.8.0 opencv-contrib-python==4.5.5.62 kornia==0.6.2 \ 26 | glfw ipympl pyrr future-fstrings PyOpenGL PyOpenGL_accelerate \ 27 | && /opt/conda/bin/conda clean -ya 28 | 29 | ENV PATH /opt/conda/bin:$PATH 30 | ENV PATH /root/.local/bin:$PATH 31 | ENV FVCORE_CACHE="/tmp" 32 | 33 | RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo \ 34 | && cd detectron2_repo && git checkout "v0.6" \ 35 | && MAX_JOBS=1 python -m pip install -e . 36 | 37 | # Installation command for original AprilTag implementation 38 | # RUN git clone --recurse-submodules https://github.com/AprilRobotics/apriltag.git \ 39 | # && cd apriltag && mkdir /root/.local && ln -s /opt/conda/lib /root/.local/lib \ 40 | # && mkdir build && cd build \ 41 | # && cmake -DPYTHON_EXECUTABLE=/opt/conda/bin/python -DPYTHON_PACKAGES_PATH=/opt/conda/lib/python3.8/site-packages/ \ 42 | # -DCMAKE_INSTALL_PREFIX=/opt/conda/ -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 43 | # -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") ..\ 44 | # && make -j8 && make install 45 | 46 | WORKDIR / -------------------------------------------------------------------------------- /docker/DeepfillInpaint.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:1.15.2-gpu-py3-jupyter 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | python-opencv wget git cmake \ 6 | libcairo2-dev libjpeg-dev libpango1.0-dev \ 7 | libgif-dev build-essential 8 | 9 | RUN python -m pip install git+https://github.com/JiahuiYu/neuralgym \ 10 | && python -m pip uninstall -y enum34 \ 11 | && python -m pip install pyyaml opencv-python \ 12 | opencv-contrib-python tqdm Pillow pycairo shapely 13 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04 2 | # If Cuda version is never than your host, you must modify above image with 3 | # the supported one and cudatoolkit=$cuda_version below for pytorch installation. 4 | 5 | ENV DEBIAN_FRONTEND noninteractive 6 | 7 | RUN apt-get update && apt-get install -y --no-install-recommends \ 8 | ca-certificates git wget vim cmake ninja-build build-essential curl \ 9 | libjpeg-dev libpng-dev x11-apps v4l-utils unzip \ 10 | rsync ffmpeg psmisc libcairo2-dev libgif-dev libpango1.0-dev \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | WORKDIR /opt 14 | ENV LC_ALL C.UTF-8 15 | ENV LANG C.UTF-8 16 | 17 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 18 | && chmod +x ~/miniconda.sh \ 19 | && ~/miniconda.sh -b -p /opt/conda \ 20 | && rm ~/miniconda.sh \ 21 | && /opt/conda/bin/conda install -y python=3.7 \ 22 | && /opt/conda/bin/conda install -y pytorch=1.8.2 torchvision=0.9.2 cudatoolkit=11.1 -c pytorch-lts -c nvidia \ 23 | && /opt/conda/bin/conda install -y tensorboard pycairo matplotlib scikit-image jupyter ninja cython \ 24 | typing future pytest scipy scikit-learn \ 25 | && /opt/conda/bin/conda install -y -c conda-forge plyfile \ 26 | && /opt/conda/bin/python -m pip install -U pycocotools==2.0.4 \ 27 | shapely==1.8.0 opencv-contrib-python==4.5.5.62 kornia==0.6.2 \ 28 | glfw ipympl pyrr future-fstrings PyOpenGL PyOpenGL_accelerate \ 29 | && /opt/conda/bin/conda clean -ya 30 | 31 | ENV PATH /opt/conda/bin:$PATH 32 | ENV PATH /root/.local/bin:$PATH 33 | ENV FORCE_CUDA="1" 34 | ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" 35 | ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" 36 | ENV FVCORE_CACHE="/tmp" 37 | 38 | RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo \ 39 | && cd detectron2_repo && git checkout "v0.6" \ 40 | && python -m pip install -e . 41 | 42 | # Installation command for original AprilTag implementation 43 | # RUN git clone --recurse-submodules https://github.com/AprilRobotics/apriltag.git \ 44 | # && cd apriltag && mkdir /root/.local && ln -s /opt/conda/lib /root/.local/lib \ 45 | # && mkdir build && cd build \ 46 | # && cmake -DPYTHON_EXECUTABLE=/opt/conda/bin/python -DPYTHON_PACKAGES_PATH=/opt/conda/lib/python3.7/site-packages/ \ 47 | # -DCMAKE_INSTALL_PREFIX=/opt/conda/ -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 48 | # -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") ..\ 49 | # && make -j8 && make install 50 | 51 | WORKDIR / -------------------------------------------------------------------------------- /docker/env_cpu.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - pytorch-lts 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - pytorch=1.8.2=py3.7_cpu_0 8 | - torchvision=0.9.2=py37_cpu 9 | - cpuonly=1.0=0 10 | - tensorboard 11 | - scikit-image 12 | - scikit-learn 13 | - scipy 14 | - pycairo 15 | - matplotlib 16 | - jupyter 17 | - jupyterlab 18 | - notebook 19 | - ninja 20 | - cython 21 | - typing 22 | - future 23 | - pytest 24 | - plyfile 25 | - pip: 26 | - opencv-contrib-python==4.5.5.62 27 | - kornia==0.6.2 28 | - pycocotools==2.0.4 29 | - shapely==1.8.0 30 | - glfw 31 | - ipympl 32 | - pyrr 33 | - future-fstrings 34 | - pyopengl 35 | - pyopengl-accelerate 36 | -------------------------------------------------------------------------------- /docker/env_gpu.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - pytorch-lts 4 | - nvidia 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - pytorch=1.8.2=py3.7_cuda11.1_cudnn8.0.5_0 9 | - torchvision=0.9.2=py37_cu111 10 | - cudatoolkit=11.1.74=h6bb024c_0 11 | - tensorboard 12 | - scikit-image 13 | - scikit-learn 14 | - scipy 15 | - pycairo 16 | - matplotlib 17 | - jupyter 18 | - jupyterlab 19 | - notebook 20 | - ninja 21 | - cython 22 | - typing 23 | - future 24 | - pytest 25 | - plyfile 26 | - pip: 27 | - opencv-contrib-python==4.5.5.62 28 | - kornia==0.6.2 29 | - pycocotools==2.0.4 30 | - shapely==1.8.0 31 | - glfw 32 | - ipympl 33 | - pyrr 34 | - future-fstrings 35 | - pyopengl 36 | - pyopengl-accelerate -------------------------------------------------------------------------------- /files/example_boards/charuco.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/charuco.pdf -------------------------------------------------------------------------------- /files/example_boards/markers_regular.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/markers_regular.pdf -------------------------------------------------------------------------------- /files/example_boards/random_board1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/random_board1.pdf -------------------------------------------------------------------------------- /files/example_boards/random_board2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/random_board2.pdf -------------------------------------------------------------------------------- /files/example_boards/two_markers_large.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_boards/two_markers_large.pdf -------------------------------------------------------------------------------- /files/example_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_input.png -------------------------------------------------------------------------------- /files/example_video.mov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/example_video.mov -------------------------------------------------------------------------------- /files/prediction_output.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/prediction_output.gif -------------------------------------------------------------------------------- /files/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KAIST-VCLAB/DeepFormableTag/c2039d2e3fe8069d49212b8d9fafefcf997a265d/files/teaser.jpg -------------------------------------------------------------------------------- /inpaint/.gitignore: -------------------------------------------------------------------------------- 1 | inpaint_weights/ -------------------------------------------------------------------------------- /inpaint/deepfill.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import numpy as np 3 | import sys 4 | from pathlib import Path 5 | import tensorflow as tf 6 | from deepfill_ops import init_inpaint_network, get_gpu_list 7 | 8 | # Import classes from deepformable library 9 | root_path = Path(__file__).parent.resolve() 10 | sys.path.insert(0, str(root_path.parent / "deepformable/utils")) 11 | from inpaint_utils import NoInpaint, generate_marker_mask 12 | 13 | class DeepfillInpaint(NoInpaint): 14 | def __init__( 15 | self, 16 | processing_workers=1, 17 | writing_workers=8, 18 | max_task_size=32, 19 | max_write_size=24, 20 | ): 21 | config=tf.ConfigProto() 22 | tf.reset_default_graph() 23 | self.sess = tf.Session(config=config) 24 | self.gpu_list = get_gpu_list() 25 | self.inpaint_inputs, self.inpaint_outputs = init_inpaint_network(self.sess, self.gpu_list) 26 | super().__init__( 27 | processing_workers, writing_workers, max_task_size, 28 | max_write_size, use_multiprocessing=False) 29 | 30 | def process_data(self, data): 31 | feed_dict = {} 32 | for i, (_, undistorted_frame, cur_annotations, markers_world, mtx) in enumerate(data): 33 | mask = generate_marker_mask(undistorted_frame, cur_annotations, markers_world, mtx) 34 | feed_dict[self.inpaint_inputs[i]] = (np.expand_dims(undistorted_frame, 0), np.expand_dims(mask, 0)) 35 | net_out = self.sess.run(self.inpaint_outputs[:len(feed_dict)], feed_dict=feed_dict) 36 | return [(p[0], img[0][...,[2,1,0]]) for p, img in zip(data, net_out)] 37 | # return [(p[0], p[1]) for p in data] 38 | 39 | # Modified this for multi-gpu batched input 40 | def processing_worker(self): 41 | worker_exit = False 42 | while not worker_exit: 43 | data = [] 44 | for _ in self.gpu_list: 45 | cur_task = self.tasks.get() 46 | if cur_task is None: 47 | worker_exit = True 48 | break 49 | else: 50 | data.append(cur_task) 51 | for result in self.process_data(data): 52 | self.results.put(result) 53 | self.tasks.task_done() 54 | self.tasks.task_done() -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | [isort] 3 | line_length=100 4 | multi_line_output=3 5 | include_trailing_comma=True 6 | known_standard_library=numpy,setuptools,mock 7 | skip=./datasets,docs 8 | skip_glob=*/__init__.py,**/configs/**,tests/config/** 9 | known_myself=deepformable 10 | known_third_party=cv2,torch,torchvision,PIL,tqdm,scipy,pkg_resources,shapely,detectron2 11 | no_lines_before=STDLIB,THIRDPARTY 12 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 13 | default_section=FIRSTPARTY 14 | 15 | [mypy] 16 | python_version=3.6 17 | ignore_missing_imports = True 18 | warn_unused_configs = True 19 | disallow_untyped_defs = True 20 | check_untyped_defs = True 21 | warn_unused_ignores = True 22 | warn_redundant_casts = True 23 | show_column_numbers = True 24 | follow_imports = silent 25 | allow_redefinition = True 26 | ; Require all functions to be annotated 27 | disallow_incomplete_defs = True -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | !/usr/bin/env python 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | Edited by Mustafa B. Yaldiz (VCLAB, KAIST) 5 | """ 6 | import glob 7 | import os 8 | import shutil 9 | from os import path 10 | from setuptools import find_packages, setup 11 | from typing import List 12 | import torch 13 | # from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 14 | 15 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 16 | assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6" 17 | 18 | 19 | def get_version(): 20 | init_py_path = path.join(path.abspath(path.dirname(__file__)), "deepformable", "__init__.py") 21 | init_py = open(init_py_path, "r").readlines() 22 | version_line = [l.strip() for l in init_py if l.startswith("__version__")][0] 23 | version = version_line.split("=")[-1].strip().strip("'\"") 24 | 25 | # The following is used to build release packages. 26 | # Users should never use it. 27 | suffix = os.getenv("D2_VERSION_SUFFIX", "") 28 | version = version + suffix 29 | if os.getenv("BUILD_NIGHTLY", "0") == "1": 30 | from datetime import datetime 31 | 32 | date_str = datetime.today().strftime("%y%m%d") 33 | version = version + ".dev" + date_str 34 | 35 | new_init_py = [l for l in init_py if not l.startswith("__version__")] 36 | new_init_py.append('__version__ = "{}"\n'.format(version)) 37 | with open(init_py_path, "w") as f: 38 | f.write("".join(new_init_py)) 39 | return version 40 | 41 | # PROJECTS = {} 42 | 43 | setup( 44 | name="deepformable", 45 | version=get_version(), 46 | author="Mustafa B. YALDIZ", 47 | url="https://github.com/KAIST-VCLAB/DeepFormableTag", 48 | description="DeepformableTag is data-driven fiducial marker system.", 49 | packages=find_packages(), 50 | python_requires=">=3.7", 51 | # install_requires=[ 52 | # "detectron2>=0.4.1", 53 | # "shapely>=1.7.1", 54 | # ], 55 | ) -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # DeepFormableTag Tools Instructions 2 | 3 | The **tools** folder includes some of the necessary tools and 4 | components to interact with the marker system. 5 | It provides functionalities to create dataset, evaluate models, and 6 | visualize the predictions. 7 | Later we will provide the training code here in the near future. 8 | 9 | Here is a short summary of what each file does: 10 | - [preprocess_data.py](#preprocessing-videos-into-dataset) uses folder of videos and 11 | preprocess it into the modified COCO format for the training. 12 | - [generate_board_json.py](#marker-config-file) generates json files with random 13 | board arrangement. 14 | - [generate_board_pdf.py](#generating-board-pdfs) generates vector graphics pdfs 15 | of boards drawn. 16 | - [calibrate.py](#calibration) uses video frames to calibrate cameras which is later 17 | used to rectify the images while generating the COCO formatted dataset. 18 | - [predictor_demo.py](../README.md/#predictor-demo) visualizes the predictions. 19 | - [eval.py](../README.md/#evaluation-on-test-data) evaluates model given the dataset. 20 | 21 | ## Preprocessing Videos into Dataset 22 | 23 | In the preprocessing step, our `tools/preprocess_data.py` applies the following steps to create the dataset. 24 | 1. Loads videos from provided directories 25 | 2. For a frame from video, detects the markers and board position, creates annotations 26 | 3. Inpaints the markers (optional) 27 | 4. Saves the processed frames and combines annotations in the COCO format 28 | 29 | - There are three different inpainting methods, to use the DeepFill method, which we use to create our training and testing dataset, build and run the environment: 30 | ```bash 31 | # Creates inpainting environment 32 | docker build -t deepfill-inpaint -f docker/DeepfillInpaint.Dockerfile . 33 | # Runs the preprocessing code 34 | docker run --rm -it --runtime=nvidia --ipc=host -v $PWD:/host -v /home/myaldiz/Data/Deepformable:/Data deepfill-inpaint \ 35 | /bin/sh -c 'cd /host; python tools/preprocess_dataset.py -v -i /Data/Dataset/train-raw/ -o /Data/Dataset/train --inpaint-method deepfill' 36 | ``` 37 | - `preprocess_dataset.py` file has several options you might want to use: 38 | - `--least-pose-markers` option ignores detected board if provided number of markers are not detected for that board. 39 | - `--skip-frames` skips frames for processing. You can use this option to generate toy dataset. 40 | - `-i` specifies input folder for dataset, `-o` is the output folder. 41 | - `-v` option is prints the progress to terminal. 42 | - You can use OpenCV inpainting method as well but the inpainting quality is not as good, so we recommend deepfill. 43 | However, this option requires larger GPU memory (>12GB). 44 | - You need to [download weights](https://drive.google.com/drive/folders/1y7Irxm3HSHGvp546hZdAZwuNmhLUVcjO) for deepfill into `inpaint/inpaint_weights` folder. 45 | 46 | You can download training and testing videos from [this link](https://drive.google.com/drive/folders/1picphIb6Hbj6pM3Wu_Vxu53wzKBV0jdV?usp=sharing). 47 | 48 | ## Preparing a Custom Dataset 49 | 50 | Dataset preparation requires variety of scripts to process the video frames into the final COCO format dataset. 51 | In summary you need to: 52 | - Generate the `config.json` file with the boards 53 | - Generate PDFs for the boards and capture datasets 54 | - Calibrate cameras with charuco board and save it to config 55 | - Capture and preprocess videos into frames. 56 | 57 | ### Marker Config File 58 | 59 | Marker config file is a file in the `json` format to specify board arrangements. 60 | We provide a [template config file](../files/template_config.json). 61 | Here are some information about they keys: 62 | - `aruco_dict` is used to generate aruco markers for the board. 63 | - `video_dir` is the folder to search for video files. 64 | - `calib_video` is the video that will be used to calibrate the camera. 65 | - `boards` provides information about the boards to be detected. 66 | - You can provide a name for the board to be created, paper margin sizes and 67 | descriptions of (ie: location, id) markers to be placed. 68 | - `markers` provide a text for class id, binary message and name for the markers 69 | used in visualization demos. 70 | ```json 71 | { 72 | "marker_id": 0, 73 | "binary": "111011011001000111101111100011011011", 74 | "text": "informational" 75 | } 76 | ``` 77 | 78 | `tools/generate_board_json.py` code reads the board sizes written in the config file and replaces them with random marker configurations. You need to enter the board names, type and dims. An example template config file is given at [template_config.json](../files/template_config.json). 79 | Here is an example script: 80 | ```bash 81 | python tools/generate_board_json.py -i files/template_config.json -o output/config.json 82 | ``` 83 | 84 | ### Generating Board PDFs 85 | 86 | Here is an example script to generate pdfs of boards: 87 | ```bash 88 | python tools/generate_board_pdf.py -i tools/config.json -o tools/boards 89 | ``` 90 | In the config file board descriptions, if the type of marker is `aruco` then it will produce aruco markers. 91 | However, if the type is `marker`, then supplied model will be used to generate the markers like below: 92 | ```bash 93 | python tools/generate_board_pdf.py -i files/template_config.json -o output/boards \ 94 | --marker-config-file configs/deepformable-main.yaml \ 95 | --model-weights models/deepformable_model.pth 96 | ``` 97 | Try to print the board pdfs without scaling. This way dimensions specified in the `location` key for each marker will match the printed size. 98 | 99 | ### Calibration 100 | 101 | Python script for calibration is located at `tools/calibrate.py`. Config file must include the relative path to calibration video or folder such as `calib_video: "../../calib/canon_28mm_5x5.MOV"`. Following script will calculate camera calibration parameters: 102 | ```bash 103 | python tools/calibrate.py -i /Data/Datasets/PlacementDataset_Nov2/train-raw/28mm/config.json 104 | ``` 105 | Notes: 106 | - To save the parameters into json file use `-s` option. 107 | - If using datasets we provided, they most likely include calibration parameters, no need to run the scripts again. 108 | - Our code thresholds blurry frames. Depending on video length, it may take longer time. You can change the ratio of the selected frames using arguments. 109 | 110 | ## Miscellaneous 111 | 112 | ### Running a Docker Container with X11 Window Support on Mac 113 | In order to create windows from docker on Mac, you can follow below, taken from [stackoverflow](https://stackoverflow.com/questions/37826094/xt-error-cant-open-display-if-using-default-display). 114 | ```bash 115 | # Below will install required things to the host 116 | brew install socat 117 | brew install --cask xquartz 118 | # From xquartz Preferences/Security allow connections from network clients 119 | 120 | # Create port for display 121 | socat TCP-LISTEN:6000,reuseaddr,fork UNIX-CLIENT:\"$DISPLAY\" 122 | 123 | # In another terminal run below to create the container 124 | docker run --rm -it --privileged --ipc=host -v $PWD:/host -e DISPLAY=docker.for.mac.host.internal:0 -v /tmp/.X11-unix:/tmp/.X11-unix deepformable /bin/sh -c 'cd /host; python -m pip install -e .; bash' 125 | ``` 126 | -------------------------------------------------------------------------------- /tools/calibrate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import json, argparse, random 3 | import cv2 4 | from cv2 import aruco 5 | import numpy as np 6 | from pathlib import Path 7 | 8 | from deepformable.utils import ( 9 | img_flexible_reader, get_aruco_dict, detect_aruco_markers, calculate_board_dims) 10 | 11 | def detect_charuco_corners( 12 | img, 13 | aruco_dict, 14 | charuco_board, 15 | min_corners=5, 16 | ): 17 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img 18 | marker_corners, ids, _ = detect_aruco_markers(gray_img, aruco_dict) 19 | num_corners, img_corners, corner_ids = cv2.aruco.interpolateCornersCharuco( 20 | marker_corners, ids, gray_img, charuco_board) 21 | if num_corners >= min_corners: 22 | row_size = charuco_board.getChessboardSize()[0] 23 | corner_world = np.zeros((num_corners, 3), np.float32) 24 | for i, index in enumerate(corner_ids): 25 | corner_world[i, 0] = index[0] % (row_size - 1) 26 | corner_world[i, 1] = index[0] // (row_size - 1) 27 | return num_corners, np.squeeze(img_corners), np.squeeze(corner_world) 28 | return 0, None, None 29 | 30 | 31 | def calculate_sharpness(img, pts): 32 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.shape[-1]==3 else img 33 | mask = np.zeros(gray_img.shape, dtype=np.uint8) 34 | cv2.fillConvexPoly(mask, pts, 255) 35 | masked_values = cv2.Laplacian(gray_img, cv2.CV_64F)[mask == 255] 36 | return masked_values.std() * (masked_values.shape[0] ** 0.5) 37 | 38 | 39 | def calculate_reprojection_error(frames_info, mtx, dist): 40 | repr_dist = [] 41 | for _, img_corners, corner_world in frames_info: 42 | retval, rvec, tvec = cv2.solvePnP(corner_world, img_corners, mtx, dist) 43 | projected_points, _ = cv2.projectPoints(corner_world, rvec, tvec, mtx, dist) 44 | dif = projected_points.squeeze() - img_corners.squeeze() 45 | repr_dist.append(np.linalg.norm(dif, axis=1)) 46 | 47 | repr_dist = np.concatenate(repr_dist) 48 | return np.average(repr_dist), np.std(repr_dist) 49 | 50 | 51 | def setup_argparse(): 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument( 54 | '-i', '--input', type=str, default='config.json', help='Input config file') 55 | parser.add_argument( 56 | '-s', '--save-params', action='store_true', help='Saves calibration params to provided json file') 57 | parser.add_argument( 58 | '--sharpness-cut-ratio', type=float, default=0.7, help='Ignores remaining portion of frames, sorted by sharpnes') 59 | parser.add_argument( 60 | '--random-cut-ratio', type=float, default=0.5, help='Ignores close frames by provided extent') 61 | parser.add_argument( 62 | '-v', '--verbose', action='store_true', help='Show steps if enabled') 63 | parser.add_argument( 64 | '-t', '--test-params', action='store_true', help='Test reprojection error with current params') 65 | return parser 66 | 67 | 68 | if __name__ == '__main__': 69 | args = setup_argparse().parse_args() 70 | 71 | # Read the config json into python format 72 | data_json_path = Path(args.input) 73 | with open(data_json_path) as cfg_file: 74 | cfg = json.load(cfg_file) 75 | if args.verbose: print("Config loaded!") 76 | 77 | # Get necessary values for calibration 78 | calib_video_path = (data_json_path.parent / cfg["calib_video"]).resolve() 79 | aruco_dict = get_aruco_dict(cfg['aruco_dict']) 80 | boards_dict = {i['board_name']: i for i in cfg['boards']} 81 | charuco_board_info = boards_dict['charuco']['descriptions'][0] 82 | 83 | board_dims = calculate_board_dims(boards_dict['charuco']) 84 | charuco_scale = float(min([board_dims[i] / charuco_board_info['size'][i] for i in range(2)]) // 1) 85 | dims = (charuco_scale * charuco_board_info['size'][1], charuco_scale * charuco_board_info['size'][0]) 86 | 87 | tag_length = aruco_dict.markerSize + 2 * cfg['border_bits'] 88 | square_length = 2 * charuco_board_info['tag_border'] + tag_length 89 | charuco_board = aruco.CharucoBoard_create(*charuco_board_info['size'], square_length, tag_length, aruco_dict) 90 | board_size = charuco_board.getChessboardSize() 91 | 92 | if args.verbose: print("Dims parsed, detecting corners") 93 | frames_info = [] 94 | for frame in img_flexible_reader(calib_video_path, tqdm_on=args.verbose): 95 | frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 96 | num_corners, img_corners, corner_world = detect_charuco_corners( 97 | frame_gray, aruco_dict, charuco_board, 98 | min_corners=(board_size[0] - 1) * (board_size[1] - 1)) # Accept only if all corners are detected 99 | 100 | if num_corners == 0: continue 101 | 102 | pts = np.array([ 103 | img_corners[0], img_corners[board_size[0] - 2], 104 | img_corners[-1], img_corners[-board_size[0] + 1]], dtype=np.int32) # Provide corners of the board 105 | sharpness_value = calculate_sharpness(frame_gray, pts) 106 | frames_info.append((sharpness_value, img_corners, corner_world * charuco_scale)) 107 | 108 | # Works better than previous calibration implementation 109 | if args.verbose: print("Thresholding frames.") 110 | cut_index = int(len(frames_info) * args.sharpness_cut_ratio) 111 | thresh_frames_info = sorted(frames_info, reverse=True, key=lambda x: x[0])[:cut_index] 112 | random_select_index = int(len(thresh_frames_info) * args.random_cut_ratio) 113 | random.shuffle(thresh_frames_info) 114 | thresh_frames_info = thresh_frames_info[:random_select_index] 115 | 116 | image_points = np.array([i[1] for i in thresh_frames_info]) 117 | world_points = np.array([i[2] for i in thresh_frames_info]) 118 | 119 | if not args.test_params: 120 | if args.verbose: print("Calibrating...") 121 | ret, mtx, dist, _, _ = cv2.calibrateCamera( 122 | world_points,image_points,(frame.shape[1], frame.shape[0]), None, None) 123 | if args.verbose: 124 | avg, std = calculate_reprojection_error(thresh_frames_info, mtx, dist) 125 | print("Training(selected frames) Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std)) 126 | avg, std = calculate_reprojection_error(frames_info, mtx, dist) 127 | print("Testing (all frames) Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std)) 128 | else: 129 | mtx, dist = np.array(cfg["calib_mtx"]), np.array(cfg["calib_dist"]) 130 | avg, std = calculate_reprojection_error(frames_info, mtx, dist) 131 | print("Testing (all frames) Reprojection error:{:.4f}, std: {:.4f} for selected frames".format(avg, std)) 132 | 133 | if args.save_params: 134 | cfg["calib_mtx"] = mtx.tolist() 135 | cfg["calib_dist"] = dist.tolist() 136 | with open(data_json_path, 'w') as cfg_file: 137 | json.dump(cfg, cfg_file, indent=4) 138 | if args.verbose: print("Config saved!") -------------------------------------------------------------------------------- /tools/generate_board_json.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 3 | This code creates random board configurations. 4 | """ 5 | import json, argparse 6 | from pathlib import Path 7 | import numpy as np 8 | import requests 9 | 10 | from deepformable.utils.general_utils import if_continue_execution 11 | from deepformable.utils import ( 12 | if_continue_execution, get_aruco_dict, 13 | calculate_board_dims, marker_placer 14 | ) 15 | 16 | def setup_argparse(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument( 19 | '-i', '--input', type=str, default='config.json', help='Input config file') 20 | parser.add_argument( 21 | '-o', '--output', type=str, default='out.json', help='Output config file') 22 | parser.add_argument( 23 | '--random-trials', type=int, default=200, help='Number of random trials to place non-overlapping markers') 24 | parser.add_argument( 25 | '--marker-min', type=int, default=25, help="Smallest marker size in cm's") 26 | parser.add_argument( 27 | '--marker-max', type=int, default=110, help="Biggest marker size in cm's") 28 | parser.add_argument( 29 | '--safety-size', type=int, default=10, help="Marker safety distance to each other") 30 | parser.add_argument( 31 | '--generate-aruco', action='store_true', help='Marker type will be aruco instead of general markers') 32 | parser.add_argument( 33 | '--num-bits', type=int, default=36, help="Number of bits markers encode") 34 | parser.add_argument( 35 | '--num-markers', type=int, default=0, help="Number of markers to be used") 36 | 37 | return parser 38 | 39 | 40 | if __name__ == '__main__': 41 | args = setup_argparse().parse_args() 42 | 43 | # Read the config json into python format 44 | data_json_path = Path(args.input) 45 | with open(data_json_path) as cfg_file: 46 | cfg = json.load(cfg_file) 47 | print("Config loaded!") 48 | 49 | aruco_dict = get_aruco_dict(cfg['aruco_dict']) 50 | 51 | num_markers, num_bits = args.num_markers, args.num_bits 52 | if args.generate_aruco and num_markers == 0: 53 | num_markers = len(aruco_dict.bytesList) 54 | assert num_markers > 0, "Enter positive number for the number of markers" 55 | 56 | # Standard settings 57 | p_reg, p_reg_rand=[0.0,0.25,0.25,0.25,0.25,0.0], [0.5,0.5,0.0] 58 | # p_reg, p_reg_rand = [1/6,1/6,1/6,1/6,1/6,1/6], [1/3, 1/3, 1/3]) 59 | # p_reg, p_reg_rand=[0.1,0.2,0.2,0.2,0.2,0.1], [0.4,0.4,0.2] 60 | # p_reg, p_reg_rand = [0,0,0,0,1,0], [1.0,0.0,0.0] 61 | 62 | # Generate markers 63 | if not args.generate_aruco: 64 | # Generate unique binary messages 65 | binary_messages = np.unique( 66 | np.random.randint(0, 2, (num_markers, num_bits)), axis=0) 67 | while len(binary_messages) != num_markers: 68 | additional_messages = np.random.randint( 69 | 0, 2, (num_markers-len(binary_messages), num_bits)) 70 | binary_messages = np.concatenate([binary_messages, additional_messages], axis=0) 71 | binary_messages = np.unique(binary_messages, axis=0) 72 | 73 | # Load some random english words from web as a message 74 | word_site = "https://www.mit.edu/~ecprice/wordlist.10000" 75 | response = requests.get(word_site) 76 | words = [ 77 | word.decode("utf-8") for word in response.content.splitlines() 78 | if len(word) > 3 79 | ] if response.ok else None 80 | text_messages = np.random.choice(words, num_markers, replace=False) 81 | 82 | # Generate information and save it to config 83 | markers = [] 84 | for index in range(num_markers): 85 | markers.append({ 86 | "marker_id": index, 87 | "binary": "".join([str(i) for i in binary_messages[index]]), 88 | "text": text_messages[index] if text_messages is not None else "" 89 | }) 90 | cfg['markers'] = markers 91 | 92 | 93 | # Generate boards 94 | class_indexes = list(range(num_markers)) 95 | for board in cfg['boards']: 96 | if board['board_name'] == 'charuco': 97 | continue 98 | board_dims = calculate_board_dims(board) 99 | markers, marker_indexes = marker_placer( 100 | board_dims, 101 | random_trials=args.random_trials, 102 | marker_min=args.marker_min, marker_max=args.marker_max, 103 | class_array=class_indexes, safety_size=args.safety_size, 104 | p_reg=p_reg, p_reg_rand=p_reg_rand) 105 | 106 | descriptions = [] 107 | for marker, marker_id in zip(markers, marker_indexes): 108 | description = { 109 | "type": "aruco" if args.generate_aruco else "marker", 110 | "location": marker.tolist(), 111 | "marker_id": int(marker_id), 112 | } 113 | descriptions.append(description) 114 | 115 | board['descriptions'] = descriptions 116 | 117 | if len(class_indexes) == 0: 118 | break 119 | 120 | if args.output != '': 121 | if args.input == args.output and not if_continue_execution( 122 | "This will override input file, continue? (yes/no): "): 123 | exit(0) 124 | print("Saving confing!") 125 | with open(args.output, 'w') as cfg_file: 126 | json.dump(cfg, cfg_file, indent=4) 127 | -------------------------------------------------------------------------------- /tools/generate_board_pdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | """ 3 | This code creates pdfs for given configs. 4 | """ 5 | import os, json, argparse 6 | from pathlib import Path 7 | from collections import OrderedDict 8 | import cairo 9 | import numpy as np 10 | from cv2 import aruco 11 | import torch 12 | 13 | from deepformable.utils import ( 14 | get_aruco_dict, calculate_board_dims, get_cfg) 15 | from deepformable.modeling import build_marker_generator 16 | 17 | def setup_cairo(board, output_file): 18 | board_dims = calculate_board_dims(board) 19 | width, height = [i * 7.2 / 2.54 for i in board_dims] 20 | cairo_surface = cairo.PDFSurface(output_file, width, height) 21 | cairo_ctx = cairo.Context(cairo_surface) 22 | cairo_ctx.scale(7.2 / 2.54, 7.2 / 2.54) # Scale to mm back 23 | return cairo_surface, cairo_ctx 24 | # print("Cairo setup is done, paper scales are: {:.2f}mm-{:2f}mm".format(paper_size[0], paper_size[1])) 25 | # print("Please don't select scale to fit option during printing for accurate board scales") 26 | 27 | def draw_marker(cairo_ctx, marker, locations): 28 | marker = np.array(marker) 29 | if marker.shape[-1] != 3: 30 | marker = np.repeat(marker, 3, axis=-1).reshape(*marker.shape, 3) 31 | locations = np.array(locations)[:,:2] 32 | x_tick = (locations[1] - locations[0])/marker.shape[0] 33 | y_tick = (locations[2] - locations[0])/marker.shape[1] 34 | for i in range(marker.shape[1]): 35 | for j in range(marker.shape[0]): 36 | if (marker[j,i] == [1,1,1]).all(): 37 | continue 38 | pos = (locations[0] + i * x_tick + j * y_tick) 39 | cairo_ctx.move_to(*(locations[0] + i * x_tick + j * y_tick)) 40 | cairo_ctx.line_to(*(locations[0] + (i+1) * x_tick + j * y_tick)) 41 | cairo_ctx.line_to(*(locations[0] + (i+1) * x_tick + (j+1) * y_tick)) 42 | cairo_ctx.line_to(*(locations[0] + (i) * x_tick + (j+1) * y_tick)) 43 | cairo_ctx.close_path() 44 | cairo_ctx.set_source_rgb(*marker[j,i]) 45 | cairo_ctx.fill_preserve() 46 | cairo_ctx.set_line_width (0.001) 47 | cairo_ctx.set_source_rgb(*marker[j,i]) 48 | cairo_ctx.stroke() 49 | 50 | def draw_rectangle(cairo_ctx, locations, color=(0.0,0.0,0.0)): 51 | return draw_marker(cairo_ctx, np.array([[color]]), locations) 52 | 53 | def draw_cutlines(cairo_ctx, locations, margin, color=(0.0,0.0,0.0)): 54 | locations = np.array(locations)[:,:2] 55 | x_tick = (locations[1] - locations[0]) 56 | x_tick /= np.linalg.norm(x_tick) 57 | y_tick = (locations[2] - locations[0]) 58 | y_tick /= np.linalg.norm(y_tick) 59 | 60 | locations[0] -= (x_tick+y_tick)*margin 61 | locations[1] += margin*(x_tick-y_tick) 62 | locations[3] += margin*(x_tick+y_tick) 63 | locations[2] -= margin*(x_tick-y_tick) 64 | 65 | cairo_ctx.set_line_width(0.1) 66 | cairo_ctx.move_to(*locations[0]) 67 | cairo_ctx.line_to(*locations[1]) 68 | cairo_ctx.line_to(*locations[3]) 69 | cairo_ctx.line_to(*locations[2]) 70 | 71 | cairo_ctx.close_path() 72 | cairo_ctx.set_source_rgb(*color) 73 | cairo_ctx.stroke() 74 | 75 | def draw_marker_board(cairo_ctx, board, markers): 76 | aruco_markers, markers = markers 77 | for d in board["descriptions"]: 78 | # Get location for the object to draw 79 | loc = d['location'] 80 | if d['type'] == 'marker': 81 | draw_marker(cairo_ctx, markers[d['marker_id']], loc) 82 | elif d['type'] == 'aruco': 83 | draw_marker(cairo_ctx, aruco_markers[d['marker_id']], loc) 84 | elif d['type'] == 'rectangle': 85 | draw_rectangle(cairo_ctx, loc, d['color']) 86 | else: 87 | raise ValueError("Unknown type of element, possible ones: marker, aruco, rectangle") 88 | 89 | def draw_cutlines_board(cairo_ctx, board): 90 | for d in board["descriptions"]: 91 | loc = d['location'] 92 | if d['type'] == 'marker': 93 | draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 5)) 94 | elif d['type'] == 'aruco': 95 | draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 5)) 96 | elif d['type'] == 'rectangle': 97 | draw_cutlines(cairo_ctx, loc, d.get('cutline_margins', 0)) 98 | else: 99 | raise ValueError("Unknown type of element, possible ones: marker, aruco, rectangle") 100 | 101 | def setup_argparse(): 102 | parser = argparse.ArgumentParser() 103 | parser.add_argument( 104 | '-i', '--input', type=str, default='config.json', help='Input config file') 105 | parser.add_argument( 106 | '-o', '--output', type=str, default='boards', help='Output board directory') 107 | parser.add_argument( 108 | '-d', '--draw-cutlines', action='store_true', help='Creates cutline pdfs') 109 | parser.add_argument( 110 | '--marker-config-file', type=str, default='config.yaml', help='marker model config file') 111 | parser.add_argument( 112 | '--model-weights', type=str, default='weights.pth', help='marker model config file') 113 | return parser 114 | 115 | 116 | if __name__ == '__main__': 117 | args = setup_argparse().parse_args() 118 | 119 | # Read the config json into python format 120 | data_json_path = Path(args.input) 121 | with open(data_json_path) as cfg_file: 122 | cfg = json.load(cfg_file) 123 | print("Config loaded!") 124 | 125 | # Load the aruco markers 126 | aruco_dict = get_aruco_dict(cfg['aruco_dict']) 127 | tag_length = aruco_dict.markerSize + 2 * cfg['border_bits'] 128 | markers_aruco = [] 129 | for i in range(aruco_dict.bytesList.shape[0]): 130 | markers_aruco.append(aruco_dict.drawMarker(i, tag_length, borderBits=cfg['border_bits'])) 131 | 132 | # Load the model markers 133 | markers_model = None 134 | config_path = Path(args.marker_config_file) 135 | model_weights_path = Path(args.model_weights) 136 | if 'markers' in cfg and config_path.exists() and model_weights_path.exists(): 137 | # Load config 138 | model_cfg = get_cfg() 139 | model_cfg.merge_from_file(config_path) 140 | 141 | # Change default device if GPU is not available 142 | if not torch.cuda.is_available(): 143 | model_cfg.MODEL.DEVICE = "cpu" 144 | 145 | # Get markers into tensor 146 | markers = sorted(cfg['markers'], key=lambda x: x['marker_id']) 147 | binary_messages = torch.tensor([[float(i) for i in m['binary']] for m in markers]) 148 | # Construct generator and load weights 149 | model_cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(binary_messages) 150 | marker_generator = build_marker_generator(model_cfg) 151 | weights = torch.load(model_weights_path, map_location=model_cfg.MODEL.DEVICE) 152 | state_dict = marker_generator.state_dict() 153 | converted_weights = OrderedDict() 154 | for key, value in weights['model'].items(): 155 | if 'marker_generator' in key: 156 | items = key.split('.') 157 | param_name = ".".join(items[items.index('marker_generator')+1:]) 158 | if param_name in state_dict: 159 | converted_weights[param_name] = value 160 | else: 161 | print("- Ignoring:", param_name) 162 | marker_generator.load_state_dict(converted_weights) 163 | marker_generator.messages = binary_messages.to(marker_generator.device) 164 | print("Model loaded!") 165 | markers_model = marker_generator.get_markers_numpy([i for i in range(len(binary_messages))]) 166 | else: 167 | print("WARNING: Could not load the model!") 168 | 169 | markers = np.array(markers_aruco), markers_model 170 | 171 | os.makedirs(args.output, exist_ok=True) 172 | output_path = Path(args.output) 173 | 174 | charuco_board = None 175 | for board in cfg['boards']: 176 | if board['board_name'] == "charuco": 177 | charuco_board = board 178 | continue 179 | 180 | output_file = str(output_path / f"{board['board_name']}.pdf") 181 | cairo_surface, cairo_ctx = setup_cairo(board, output_file) 182 | draw_marker_board(cairo_ctx, board, markers) 183 | 184 | cairo_surface.flush() 185 | cairo_surface.finish() 186 | 187 | # Also create cutlines if requested 188 | if args.draw_cutlines: 189 | output_file = str(output_path / f"{board['board_name']}_cutline.pdf") 190 | cairo_surface, cairo_ctx = setup_cairo(board, output_file) 191 | draw_cutlines_board(cairo_ctx, board) 192 | cairo_surface.flush() 193 | cairo_surface.finish() 194 | 195 | # Draw charuco board if exists 196 | if charuco_board: 197 | # Calculate paper dimensions for the charuco 198 | charuco_board_info = charuco_board['descriptions'][0] 199 | dims = charuco_board_info['dims'] 200 | if dims == "max": 201 | board_dims = calculate_board_dims(charuco_board) 202 | charuco_scale = float(min([board_dims[i] / charuco_board_info['size'][i] for i in range(2)]) // 1) 203 | dims = (charuco_scale * charuco_board_info['size'][1], charuco_scale * charuco_board_info['size'][0]) 204 | elif dims == 2: 205 | charuco_scale = dims[0] / charuco_board_info['size'][1] 206 | else: 207 | print("Please provide 2 dimensional size for the charuco dimensions") 208 | raise 209 | print("Charuco unit size(length of two neighbouring corners) is calculated as {}mm".format(charuco_scale)) 210 | 211 | cairo_surface, cairo_ctx = setup_cairo( 212 | charuco_board, str(output_path / "charuco.pdf")) 213 | 214 | # Create the board 215 | square_length = 2 * charuco_board_info['tag_border'] + tag_length 216 | charuco_board = aruco.CharucoBoard_create(*charuco_board_info['size'], square_length, tag_length, aruco_dict) 217 | 218 | # Draw the board 219 | draw_size = tuple([int(i * charuco_board.getSquareLength()) for i in charuco_board.getChessboardSize()]) 220 | board_svg = charuco_board.draw(draw_size) 221 | loc = [[0,0], [dims[0], 0], [0, dims[1]], dims] 222 | draw_marker(cairo_ctx, board_svg, loc) 223 | cairo_surface.flush() 224 | cairo_surface.finish() 225 | -------------------------------------------------------------------------------- /tools/predictor_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import argparse 3 | import glob 4 | from pathlib import Path 5 | import multiprocessing as mp 6 | import os 7 | import tempfile 8 | import time 9 | import warnings 10 | import tqdm 11 | import json 12 | import cv2 13 | import numpy as np 14 | import torch 15 | 16 | from detectron2.data.detection_utils import read_image 17 | from detectron2.utils.logger import setup_logger 18 | 19 | import deepformable 20 | import deepformable.modeling 21 | from deepformable.utils import get_cfg, VisualizationDemo, marker_metadata_loader 22 | from detectron2.data import MetadataCatalog 23 | 24 | # constants 25 | WINDOW_NAME = "COCO detections" 26 | 27 | setup_logger(name="fvcore") 28 | logger = setup_logger() 29 | 30 | def setup_environment(args): 31 | # Setup Config 32 | # load config from file and command-line arguments 33 | cfg = get_cfg() 34 | # To use demo for Panoptic-DeepLab, please uncomment the following two lines. 35 | # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa 36 | # add_panoptic_deeplab_config(cfg) 37 | cfg.merge_from_file(args.config_file) 38 | cfg.merge_from_list(args.opts) 39 | # Set score_threshold for builtin models 40 | if args.confidence_threshold > 0.0: 41 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold 42 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold 43 | cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold 44 | if not torch.cuda.is_available(): 45 | cfg.MODEL.DEVICE = "cpu" 46 | logger.info("CUDA not found, Will run the inference on CPU") 47 | 48 | # Handle metadata 49 | if marker_metadata_loader(cfg, args.marker_config_file): 50 | logger.info("Config loaded!") 51 | else: 52 | logger.info("Config loading failed!") 53 | 54 | cfg.freeze() 55 | return cfg 56 | 57 | 58 | def get_parser(): 59 | parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs") 60 | parser.add_argument( 61 | "--config-file", 62 | default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", 63 | metavar="FILE", 64 | help="path to config file", 65 | ) 66 | parser.add_argument( 67 | "--marker-config-file", 68 | default="tools/config.json", 69 | metavar="FILE", 70 | help="path to marker config file for the metadata", 71 | ) 72 | 73 | parser.add_argument("--parallel", action="store_true", help="Runs prediction in parallel.") 74 | parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") 75 | parser.add_argument("--video-input", help="Path to video file.") 76 | parser.add_argument( 77 | "--input", 78 | nargs="+", 79 | help="A list of space separated input images; " 80 | "or a single glob pattern such as 'directory/*.jpg'", 81 | ) 82 | parser.add_argument( 83 | "--output", 84 | help="A file or directory to save output visualizations. " 85 | "If not given, will show output in an OpenCV window.", 86 | ) 87 | parser.add_argument( 88 | "--confidence-threshold", 89 | type=float, 90 | default=-1.0, 91 | help="Minimum score for instance predictions to be shown", 92 | ) 93 | parser.add_argument( 94 | "--opts", 95 | help="Modify config options using the command-line 'KEY VALUE' pairs", 96 | default=[], 97 | nargs=argparse.REMAINDER, 98 | ) 99 | return parser 100 | 101 | 102 | def test_opencv_video_format(codec, file_ext): 103 | with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: 104 | filename = os.path.join(dir, "test_file" + file_ext) 105 | writer = cv2.VideoWriter( 106 | filename=filename, 107 | fourcc=cv2.VideoWriter_fourcc(*codec), 108 | fps=float(30), 109 | frameSize=(10, 10), 110 | isColor=True, 111 | ) 112 | [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] 113 | writer.release() 114 | if os.path.isfile(filename): 115 | return True 116 | return False 117 | 118 | 119 | if __name__ == "__main__": 120 | mp.set_start_method("spawn", force=True) 121 | args = get_parser().parse_args() 122 | logger.info("Arguments: " + str(args)) 123 | 124 | cfg = setup_environment(args) 125 | 126 | demo = VisualizationDemo(cfg, parallel=args.parallel) 127 | 128 | if args.input: 129 | if len(args.input) == 1: 130 | args.input = glob.glob(os.path.expanduser(args.input[0])) 131 | assert args.input, "The input path(s) was not found" 132 | for path in tqdm.tqdm(args.input, disable=not args.output): 133 | # use PIL, to be consistent with evaluation 134 | img = read_image(path, format="BGR") 135 | start_time = time.time() 136 | predictions, visualized_output = demo.run_on_image(img) 137 | logger.info( 138 | "{}: {} in {:.2f}s".format( 139 | path, 140 | "detected {} instances".format(len(predictions["instances"])) 141 | if "instances" in predictions 142 | else "finished", 143 | time.time() - start_time, 144 | ) 145 | ) 146 | 147 | if args.output: 148 | if os.path.isdir(args.output): 149 | assert os.path.isdir(args.output), args.output 150 | out_filename = os.path.join(args.output, os.path.basename(path)) 151 | else: 152 | assert len(args.input) == 1, "Please specify a directory with args.output" 153 | out_filename = args.output 154 | visualized_output.save(out_filename) 155 | else: 156 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) 157 | cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) 158 | if cv2.waitKey(0) == 27: 159 | break # esc to quit 160 | elif args.webcam: 161 | assert args.input is None, "Cannot have both --input and --webcam!" 162 | assert args.output is None, "output not yet supported with --webcam!" 163 | cam = cv2.VideoCapture(0) 164 | for vis in tqdm.tqdm(demo.run_on_video(cam)): 165 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) 166 | cv2.imshow(WINDOW_NAME, vis) 167 | if cv2.waitKey(1) == 27: 168 | break # esc to quit 169 | cam.release() 170 | cv2.destroyAllWindows() 171 | elif args.video_input: 172 | video = cv2.VideoCapture(args.video_input) 173 | width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) 174 | height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) 175 | frames_per_second = video.get(cv2.CAP_PROP_FPS) 176 | num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 177 | basename = os.path.basename(args.video_input) 178 | codec, file_ext = ( 179 | ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") 180 | ) 181 | if codec == ".mp4v": 182 | warnings.warn("x264 codec not available, switching to mp4v") 183 | if args.output: 184 | if os.path.isdir(args.output): 185 | output_fname = os.path.join(args.output, basename) 186 | output_fname = os.path.splitext(output_fname)[0] + file_ext 187 | else: 188 | output_fname = args.output 189 | assert not os.path.isfile(output_fname), output_fname 190 | output_file = cv2.VideoWriter( 191 | filename=output_fname, 192 | # some installation of opencv may not support x264 (due to its license), 193 | # you can try other format (e.g. MPEG) 194 | fourcc=cv2.VideoWriter_fourcc(*codec), 195 | fps=float(frames_per_second), 196 | frameSize=(width, height), 197 | isColor=True, 198 | ) 199 | assert os.path.isfile(args.video_input) 200 | for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): 201 | if args.output: 202 | output_file.write(vis_frame) 203 | else: 204 | cv2.namedWindow(basename, cv2.WINDOW_NORMAL) 205 | cv2.imshow(basename, vis_frame) 206 | if cv2.waitKey(1) == 27: 207 | break # esc to quit 208 | video.release() 209 | if args.output: 210 | output_file.release() 211 | else: 212 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | from pathlib import Path 3 | import json 4 | 5 | import torch 6 | import detectron2 7 | import detectron2.utils.comm as comm 8 | from detectron2.engine import default_argument_parser, default_setup, launch 9 | 10 | import deepformable 11 | from deepformable.engine import DeepformableTrainer 12 | from deepformable.utils import get_cfg, marker_metadata_loader 13 | from deepformable.data import register_deepformable_dataset 14 | 15 | 16 | def setup(args): 17 | cfg = get_cfg() 18 | cfg.merge_from_file(args.config_file) 19 | cfg.merge_from_list(args.opts) 20 | 21 | # Register datasets 22 | dataset_dir = Path(args.dataset_train_dir) 23 | register_deepformable_dataset( 24 | cfg.DATASETS.TRAIN[0], {}, 25 | str(dataset_dir / "annotations.json"), 26 | str(dataset_dir), 27 | load_markers=False) 28 | 29 | for test_dataset in cfg.DATASETS.TEST: 30 | # Check if need to load markers 31 | load_markers = True if "load_markers" in test_dataset.lower() else False 32 | # Select proper dataset path 33 | dataset_suffix = test_dataset.split("-")[-1].lower() 34 | dataset_dir = args.dataset_test1_dir 35 | if dataset_suffix == "test2": 36 | dataset_dir = args.dataset_test2_dir 37 | elif dataset_suffix == "test3": 38 | dataset_dir = args.dataset_test3_dir 39 | # Reguster dataset 40 | dataset_dir = Path(dataset_dir) 41 | register_deepformable_dataset( 42 | test_dataset, {}, 43 | str(dataset_dir / "annotations.json"), 44 | str(dataset_dir), 45 | load_markers=load_markers # if this option is false, mapper should create marker locations 46 | ) # based on board location information. For more info check the mapper 47 | # Load metadata 48 | if not marker_metadata_loader(cfg, args.marker_config_file): 49 | print("Failed to load marker metadata") 50 | 51 | default_setup(cfg, args) 52 | return cfg 53 | 54 | 55 | def main(args): 56 | cfg = setup(args) 57 | trainer = DeepformableTrainer(cfg, verbose=True) 58 | trainer.resume_or_load(resume=args.resume) 59 | 60 | if args.eval_only: 61 | res = trainer.test(cfg) 62 | if comm.is_main_process(): 63 | result_path = Path(cfg.OUTPUT_DIR) / "results.json" 64 | with open(result_path, 'w') as result_file: 65 | json.dump(res, result_file, indent=4) 66 | return res 67 | return trainer.train() 68 | 69 | 70 | if __name__ == "__main__": 71 | # Use --eval-only to skip training and only run evaluation 72 | arg_parser = default_argument_parser() 73 | arg_parser.add_argument( 74 | '--dataset-train-dir', type=str, default='/Data/Datasets/train', help='Provide train dataset path') 75 | arg_parser.add_argument( 76 | '--dataset-test1-dir', type=str, default='/Data/Datasets/test-inpainted', help='Provide test1 dataset path') 77 | arg_parser.add_argument( 78 | '--dataset-test2-dir', type=str, default='/Data/Datasets/test-realworld/flat', help='Provide test2 dataset path') 79 | arg_parser.add_argument( 80 | '--dataset-test3-dir', type=str, default='/Data/Datasets/test-realworld/deformation', help='Provide test3 dataset path') 81 | arg_parser.add_argument( 82 | "--marker-config-file", default='/Data/Datasets/marker_config.json', metavar="FILE", 83 | help="path to marker config file for the metadata") 84 | args = arg_parser.parse_args() 85 | 86 | launch( 87 | main, 88 | args.num_gpus, 89 | num_machines=args.num_machines, 90 | machine_rank=args.machine_rank, 91 | dist_url=args.dist_url, 92 | args=(args,), 93 | ) 94 | -------------------------------------------------------------------------------- /tools/training_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Mustafa B. Yaldiz (VCLAB, KAIST) All Rights Reserved. 2 | import cv2 3 | from cv2 import aruco 4 | import random 5 | import matplotlib.pyplot as plt 6 | import json 7 | import numpy as np 8 | import argparse 9 | import math 10 | from pathlib import Path 11 | from typing import Union 12 | from copy import deepcopy 13 | import datetime 14 | import os 15 | import random 16 | from tqdm.notebook import tqdm 17 | 18 | import torch 19 | import torch.nn.functional as F 20 | 21 | import detectron2 22 | from detectron2.data import MetadataCatalog, DatasetCatalog 23 | from detectron2.utils.events import EventStorage 24 | from detectron2.config import configurable 25 | 26 | from deepformable.modeling import MarkerRendererDiffrast 27 | from deepformable.modeling import MarkerRenderer 28 | 29 | import deepformable 30 | from deepformable.utils import DeepformableVisualizer 31 | from deepformable.data import ( 32 | register_deepformable_dataset, DeepformableMapper, 33 | DetectronMapperWAnn 34 | ) 35 | from deepformable.utils import ( 36 | get_cfg, convert_mapped_instances, 37 | DeepformableVisualizer, 38 | marker_metadata_loader 39 | ) 40 | from deepformable.modeling import MarkerRendererDiffrast, IntermediateAugmentor 41 | from deepformable.engine import DeepformableTrainer 42 | 43 | 44 | def make_config( 45 | config_path="/host/configs/deepformable-main.yaml", 46 | weights="/Data/Models/deepformable_model.pth", 47 | ): 48 | # Setup Config 49 | cfg = get_cfg() 50 | cfg.OUTPUT_DIR = "/root" 51 | cfg.merge_from_file(config_path) 52 | cfg.MODEL.WEIGHTS = weights 53 | cfg.SOLVER.IMS_PER_BATCH = 1 54 | cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 720, 736, 768, 800, 960) 55 | 56 | # cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST = [ 57 | # "PerspectiveAugmentor", "TpsAugmentor", "RadialDistortionAugmentor", 58 | # "DefocusBlurAugmentor", "MotionBlurAugmentor", "HueShiftAugmentor", 59 | # "BrightnessAugmentor", "NoiseAugmentor", "GammaAugmentor", 60 | # "GammaCorrector", "JPEGAugmentor"] 61 | # cfg.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [0.4, 0.5, 0.3, 0.4, 0.4, 0.4, 0.4, 0.45, 0.3, 1.0, 0.35] 62 | # cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST = ["HueShiftAugmentor", "GammaCorrector"] 63 | # cfg.INTERMEDIATE_AUGMENTOR.EXEC_PROBA_LIST = [1.0] * len(cfg.INTERMEDIATE_AUGMENTOR.AUG_LIST) 64 | 65 | # cfg.RENDERER.SHADING_METHOD = "diffuse" 66 | return cfg 67 | 68 | 69 | def register_datasets(cfg, data_root=Path("/Data/Datasets")): 70 | # Register datasets 71 | cur_data_root = data_root / "train-reduced" 72 | # cur_data_root = data_root / "test-inpainted" 73 | register_deepformable_dataset( 74 | "deepformable-rendered-train", {}, 75 | str(cur_data_root / "annotations.json"), 76 | str(cur_data_root), 77 | load_markers=False) 78 | 79 | # Test1: rendered test 80 | cur_data_root = data_root / "test-inpainted" 81 | register_deepformable_dataset( 82 | "deepformable-rendered-test1", {}, 83 | str(cur_data_root / "annotations.json"), 84 | str(cur_data_root), 85 | load_markers=False) 86 | register_deepformable_dataset( 87 | "deepformable-rendered-aug-test1", {}, 88 | str(cur_data_root / "annotations.json"), 89 | str(cur_data_root), 90 | load_markers=False) 91 | 92 | # Test2: real-flat test 93 | cur_data_root = data_root / "test-realworld/flat" 94 | register_deepformable_dataset( 95 | "deepformable_flat-real-load_markers-test2", {}, 96 | str(cur_data_root / "annotations.json"), 97 | str(cur_data_root), 98 | load_markers=True) 99 | register_deepformable_dataset( 100 | "deepformable_flat-real-load_markers-aug-test2", {}, 101 | str(cur_data_root / "annotations.json"), 102 | str(cur_data_root), 103 | load_markers=True) 104 | 105 | # Test2: real-deformation test 106 | cur_data_root = data_root / "test-realworld/deformation" 107 | register_deepformable_dataset( 108 | "deepformable_deformation-real-load_markers-test3", {}, 109 | str(cur_data_root / "annotations.json"), 110 | str(cur_data_root), 111 | load_markers=True) 112 | register_deepformable_dataset( 113 | "deepformable_deformation-real-load_markers-aug-test3", {}, 114 | str(cur_data_root / "annotations.json"), 115 | str(cur_data_root), 116 | load_markers=True) 117 | 118 | # Load metadata 119 | if not marker_metadata_loader(cfg, data_root / "test-realworld/marker_config.json"): 120 | print("Failed to load marker metadata") 121 | 122 | if __name__ == "__main__": 123 | enable_aug = True 124 | show_labels = True 125 | config_path = "configs/aruco-learnable-mit.yaml" 126 | 127 | cfg = make_config(config_path, weights="") 128 | register_datasets(cfg) 129 | dataset_names = cfg.DATASETS.TRAIN + cfg.DATASETS.TEST 130 | 131 | # Load datasets 132 | datasets, datasets_metadata, datasets_mapper = [], [], [] 133 | for dataset_name in dataset_names: 134 | dataset = DatasetCatalog.get(dataset_name) 135 | is_train = "train" in dataset_name 136 | print(f"{dataset_name} length: {len(dataset)}") 137 | datasets.append(dataset) 138 | datasets_metadata.append(MetadataCatalog.get(dataset_name)) 139 | mapper = DeepformableMapper(cfg, is_train) \ 140 | if "rendered" in dataset_name else DetectronMapperWAnn(cfg, is_train) 141 | datasets_mapper.append(mapper) 142 | 143 | trainer = DeepformableTrainer(cfg, False) 144 | trainer.resume_or_load(resume=False) 145 | self = trainer.model 146 | show_with_renderers = {} 147 | # show_with_renderers = {cfg.RENDERER.NAME: self.renderer} 148 | # show_with_renderers = { 149 | # "nvdiffrast": MarkerRendererDiffrast(cfg).to(self.device), 150 | # # "homography": MarkerRenderer(cfg).to(self.device), 151 | # } 152 | 153 | print("Input dataset index:") 154 | for index, dataset_name in enumerate(dataset_names): 155 | print(f" - {index}): {dataset_name}") 156 | 157 | d_idx = 0 # int(input()) 158 | dataset, metadata = datasets[d_idx], datasets_metadata[d_idx] 159 | dataset_name, mapper = dataset_names[d_idx], datasets_mapper[d_idx] 160 | visualizer = DeepformableVisualizer(metadata) 161 | 162 | while(1): 163 | d = random.sample(dataset, 1)[0] 164 | dataset_dict = mapper(d) 165 | 166 | if dataset_dict is None: 167 | continue 168 | 169 | converted_dict = convert_mapped_instances(dataset_dict) 170 | img = dataset_dict['image'].permute(1,2,0).cpu().numpy() 171 | 172 | # %matplotlib inline 173 | fig = plt.figure(figsize=(12,8)) 174 | vis_out = visualizer.draw_instance_predictions(img, converted_dict) 175 | cv2.imshow("original", vis_out.get_image()) 176 | 177 | with EventStorage(), torch.no_grad(): 178 | data = self.carry_to_gpu([deepcopy(dataset_dict)]) 179 | 180 | for d in data: 181 | d["image"] = (d["image"] / 255.0) ** self.gamma 182 | 183 | markers_batch, marker_loss = self.marker_generator( 184 | [d["instances"] for d in data]) 185 | 186 | for renderer_name, renderer in show_with_renderers.items(): 187 | d, markers = data[0], markers_batch[0] 188 | image = (renderer(d, markers) ** (1/self.gamma)) * 255.0 189 | image = image.permute(1,2,0).cpu().numpy() 190 | 191 | if not show_labels: 192 | converted_dict = {} 193 | vis_out = visualizer.draw_instance_predictions(image, converted_dict) 194 | cv2.imshow(renderer_name, vis_out.get_image()) 195 | 196 | for d, markers in zip(data, markers_batch): 197 | d["image"] = self.renderer(d, markers) 198 | 199 | # data, marker_loss = self.render_data(data) 200 | 201 | # Apply Augmentations 202 | for d in data: 203 | if enable_aug: 204 | probabilities = torch.rand(self.aug_prob.shape, device=self.device) 205 | indexes = (probabilities < self.aug_prob).nonzero(as_tuple=True)[0].tolist() 206 | selected_augmentations = [self.intermediate_augmentations[i] for i in indexes] 207 | 208 | for aug in selected_augmentations: 209 | d["image"], d["instances"] = aug(d["image"], d["instances"]) 210 | d["instances"] = IntermediateAugmentor.fix_instances(d["instances"]) 211 | 212 | print(selected_augmentations) 213 | else: 214 | d["image"] = d["image"] ** (1/self.gamma) 215 | d["image"] = d["image"] * 255.0 216 | 217 | image_np = data[0]["image"].permute(1,2,0).cpu().numpy() 218 | instances = data[0]["instances"] 219 | 220 | fig = plt.figure(figsize=(12,8)) 221 | converted_dict = convert_mapped_instances(data[0]) 222 | # converted_dict = {} 223 | if not show_labels: 224 | converted_dict = {} 225 | vis_out = visualizer.draw_instance_predictions(image_np, converted_dict) 226 | cv2.imshow("final_image", vis_out.get_image()) 227 | 228 | k = cv2.waitKey(0) 229 | # Esc key to stop 230 | if k==27: 231 | break 232 | else: 233 | continue 234 | --------------------------------------------------------------------------------