├── lib
    ├── __init__.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── sg_eval.cpython-36.pyc
    │   ├── sg_eval_slow.py
    │   └── test_sg_eval.py
    ├── fpn
    │   ├── nms
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── nms
    │   │   │   │   ├── _nms.so
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   │   └── __init__.py
    │   │   │   └── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── src
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── cuda
    │   │   │   │   ├── nms.cu.o
    │   │   │   │   ├── nms_kernel.h
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── .ipynb_checkpoints
    │   │   │   │   │   └── Makefile-checkpoint
    │   │   │   │   └── nms_kernel.cu
    │   │   │   └── nms_cuda.c
    │   │   ├── functions
    │   │   │   ├── __pycache__
    │   │   │   │   └── nms.cpython-36.pyc
    │   │   │   ├── nms.py
    │   │   │   └── .ipynb_checkpoints
    │   │   │   │   └── nms-checkpoint.py
    │   │   ├── Makefile
    │   │   ├── .ipynb_checkpoints
    │   │   │   ├── Makefile-checkpoint
    │   │   │   └── build-checkpoint.py
    │   │   └── build.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_align
    │   │   │   │   ├── _roi_align.so
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   │   └── __init__.py
    │   │   │   └── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_align.cpython-36.pyc
    │   │   │   └── roi_align.py
    │   │   ├── src
    │   │   │   ├── cuda
    │   │   │   │   ├── roi_align.cu.o
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── .ipynb_checkpoints
    │   │   │   │   │   └── Makefile-checkpoint
    │   │   │   │   ├── roi_align_kernel.h
    │   │   │   │   └── roi_align_kernel.cu
    │   │   │   ├── roi_align_cuda.h
    │   │   │   └── roi_align_cuda.c
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── Makefile
    │   │   ├── .ipynb_checkpoints
    │   │   │   └── Makefile-checkpoint
    │   │   └── build.py
    │   ├── __pycache__
    │   │   ├── box_utils.cpython-36.pyc
    │   │   ├── anchor_targets.cpython-36.pyc
    │   │   └── generate_anchors.cpython-36.pyc
    │   ├── box_intersections_cpu
    │   │   ├── bbox.cpython-36m-x86_64-linux-gnu.so
    │   │   ├── build
    │   │   │   └── temp.linux-x86_64-3.6
    │   │   │   │   └── bbox.o
    │   │   ├── setup.py
    │   │   └── bbox.pyx
    │   ├── proposal_assignments
    │   │   ├── __pycache__
    │   │   │   ├── rel_assignments.cpython-36.pyc
    │   │   │   ├── proposal_assignments_det.cpython-36.pyc
    │   │   │   └── proposal_assignments_gtbox.cpython-36.pyc
    │   │   ├── proposal_assignments_gtbox.py
    │   │   ├── .ipynb_checkpoints
    │   │   │   ├── proposal_assignments_gtbox-checkpoint.py
    │   │   │   └── rel_assignments-checkpoint.py
    │   │   ├── proposal_assignments_det.py
    │   │   ├── proposal_assignments_postnms.py
    │   │   ├── rel_assignments.py
    │   │   └── proposal_assignments_rel.py
    │   ├── make.sh
    │   ├── generate_anchors.py
    │   ├── anchor_targets.py
    │   └── box_utils.py
    ├── __pycache__
    │   ├── ggnn.cpython-36.pyc
    │   ├── my_util.cpython-36.pyc
    │   ├── resnet.cpython-36.pyc
    │   ├── surgery.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── kern_model.cpython-36.pyc
    │   ├── my_ggnn_01.cpython-36.pyc
    │   ├── my_ggnn_02.cpython-36.pyc
    │   ├── my_ggnn_03.cpython-36.pyc
    │   ├── my_ggnn_04.cpython-36.pyc
    │   ├── my_ggnn_05.cpython-36.pyc
    │   ├── my_ggnn_06.cpython-36.pyc
    │   ├── my_ggnn_07.cpython-36.pyc
    │   ├── my_ggnn_08.cpython-36.pyc
    │   ├── my_ggnn_09.cpython-36.pyc
    │   ├── my_ggnn_10.cpython-36.pyc
    │   ├── my_ggnn_11.cpython-36.pyc
    │   ├── my_ggnn_12.cpython-36.pyc
    │   ├── my_ggnn_13.cpython-36.pyc
    │   ├── my_ggnn_14.cpython-36.pyc
    │   ├── my_ggnn_15.cpython-36.pyc
    │   ├── my_ggnn_16.cpython-36.pyc
    │   ├── my_model_01.cpython-36.pyc
    │   ├── my_model_02.cpython-36.pyc
    │   ├── my_model_03.cpython-36.pyc
    │   ├── my_model_04.cpython-36.pyc
    │   ├── my_model_05.cpython-36.pyc
    │   ├── my_model_06.cpython-36.pyc
    │   ├── my_model_07.cpython-36.pyc
    │   ├── my_model_08.cpython-36.pyc
    │   ├── my_model_10.cpython-36.pyc
    │   ├── my_model_11.cpython-36.pyc
    │   ├── my_model_12.cpython-36.pyc
    │   ├── my_model_13.cpython-36.pyc
    │   ├── my_model_14.cpython-36.pyc
    │   ├── my_model_15.cpython-36.pyc
    │   ├── my_model_16.cpython-36.pyc
    │   ├── my_model_17.cpython-36.pyc
    │   ├── my_model_18.cpython-36.pyc
    │   ├── my_model_19.cpython-36.pyc
    │   ├── my_model_20.cpython-36.pyc
    │   ├── my_model_21.cpython-36.pyc
    │   ├── my_model_22.cpython-36.pyc
    │   ├── my_model_23.cpython-36.pyc
    │   ├── my_model_24.cpython-36.pyc
    │   ├── my_model_26.cpython-36.pyc
    │   ├── my_model_27.cpython-36.pyc
    │   ├── my_model_28.cpython-36.pyc
    │   ├── my_model_29.cpython-36.pyc
    │   ├── my_model_30.cpython-36.pyc
    │   ├── my_model_31.cpython-36.pyc
    │   ├── my_model_32.cpython-36.pyc
    │   ├── my_model_33.cpython-36.pyc
    │   ├── pytorch_misc.cpython-36.pyc
    │   ├── get_union_boxes.cpython-36.pyc
    │   └── object_detector.cpython-36.pyc
    ├── draw_rectangles
    │   ├── build
    │   │   └── temp.linux-x86_64-3.6
    │   │   │   └── draw_rectangles.o
    │   ├── draw_rectangles.cpython-36m-x86_64-linux-gnu.so
    │   ├── setup.py
    │   └── draw_rectangles.pyx
    ├── my_util.py
    ├── surgery.py
    ├── get_union_boxes.py
    ├── resnet.py
    ├── ggnn.py
    ├── my_ggnn_17.py
    └── my_ggnn_10.py
├── dataloaders
    ├── __init__.py
    ├── __pycache__
    │   ├── blob.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── visual_genome.cpython-36.pyc
    │   └── image_transforms.cpython-36.pyc
    ├── image_transforms.py
    └── blob.py
├── graphs
    ├── 001
    │   ├── emb_mtx.pkl
    │   └── pred_counts.pkl
    └── 005
    │   └── all_edges.pkl
├── Makefile
├── requirements.txt
├── README.md
└── config.py


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graphs/001/emb_mtx.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/001/emb_mtx.pkl


--------------------------------------------------------------------------------
/graphs/001/pred_counts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/001/pred_counts.pkl


--------------------------------------------------------------------------------
/graphs/005/all_edges.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/005/all_edges.pkl


--------------------------------------------------------------------------------
/lib/fpn/nms/_ext/nms/_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/nms/_nms.so


--------------------------------------------------------------------------------
/lib/fpn/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh);


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/nms.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/src/cuda/nms.cu.o


--------------------------------------------------------------------------------
/lib/__pycache__/ggnn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/ggnn.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_util.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/resnet.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/surgery.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/surgery.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/kern_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/kern_model.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_01.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_01.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_02.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_02.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_03.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_03.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_04.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_04.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_05.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_05.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_06.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_06.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_07.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_07.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_08.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_08.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_09.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_09.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_10.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_10.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_11.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_11.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_12.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_12.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_13.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_13.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_14.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_14.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_15.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_15.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_ggnn_16.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_16.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/roi_align.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/src/cuda/roi_align.cu.o


--------------------------------------------------------------------------------
/dataloaders/__pycache__/blob.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/blob.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_01.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_01.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_02.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_02.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_03.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_03.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_04.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_04.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_05.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_05.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_06.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_06.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_07.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_07.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_08.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_08.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_10.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_10.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_11.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_11.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_12.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_12.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_13.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_13.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_14.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_14.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_15.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_15.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_16.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_16.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_17.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_17.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_18.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_18.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_19.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_19.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_20.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_20.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_21.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_21.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_22.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_22.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_23.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_23.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_24.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_24.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_26.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_26.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_27.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_27.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_28.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_28.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_29.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_29.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_30.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_30.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_31.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_31.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_32.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_32.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/my_model_33.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_33.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/pytorch_misc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/pytorch_misc.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/get_union_boxes.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/get_union_boxes.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/object_detector.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/object_detector.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/__pycache__/box_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/box_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/roi_align/_roi_align.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/roi_align/_roi_align.so


--------------------------------------------------------------------------------
/dataloaders/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/evaluation/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/evaluation/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/evaluation/__pycache__/sg_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/evaluation/__pycache__/sg_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/__pycache__/anchor_targets.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/anchor_targets.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/__pycache__/generate_anchors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/generate_anchors.cpython-36.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/visual_genome.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/visual_genome.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/nms/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/nms/functions/__pycache__/nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/functions/__pycache__/nms.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/roi_align/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/image_transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/image_transforms.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
2 |           float nms_overlap_thresh, int device_id);
3 | 
4 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/draw_rectangles/build/temp.linux-x86_64-3.6/draw_rectangles.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/draw_rectangles/build/temp.linux-x86_64-3.6/draw_rectangles.o


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/bbox.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/box_intersections_cpu/bbox.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/build/temp.linux-x86_64-3.6/bbox.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/box_intersections_cpu/build/temp.linux-x86_64-3.6/bbox.o


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/__pycache__/roi_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/functions/__pycache__/roi_align.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/draw_rectangles/draw_rectangles.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/draw_rectangles/draw_rectangles.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/fpn/nms/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/nms.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/__pycache__/rel_assignments.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/rel_assignments.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_det.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_det.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/roi_align/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/roi_align.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/.ipynb_checkpoints/Makefile-checkpoint:
--------------------------------------------------------------------------------
1 | all: src/cuda/nms.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_gtbox.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_gtbox.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | 
5 | setup(name="bbox_cython", ext_modules=cythonize('bbox.pyx'), include_dirs=[numpy.get_include()])


--------------------------------------------------------------------------------
/lib/draw_rectangles/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | 
5 | setup(name="draw_rectangles_cython", ext_modules=cythonize('draw_rectangles.pyx'), include_dirs=[numpy.get_include()])


--------------------------------------------------------------------------------
/lib/fpn/roi_align/.ipynb_checkpoints/Makefile-checkpoint:
--------------------------------------------------------------------------------
1 | all: src/cuda/roi_align.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | export PATH := /usr/local/cuda-9.0/bin:$(PATH)
 2 | 
 3 | all: draw_rectangles box_intersections nms roi_align
 4 | 
 5 | draw_rectangles:
 6 | 	cd lib/draw_rectangles; python setup.py build_ext --inplace
 7 | box_intersections:
 8 | 	cd lib/fpn/box_intersections_cpu; python setup.py build_ext --inplace
 9 | nms:
10 | 	cd lib/fpn/nms; make
11 | roi_align:
12 | 	cd lib/fpn/roi_align; make


--------------------------------------------------------------------------------
/lib/fpn/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd anchors
 4 | python setup.py build_ext --inplace
 5 | cd ..
 6 | 
 7 | cd box_intersections_cpu
 8 | python setup.py build_ext --inplace
 9 | cd ..
10 | 
11 | cd cpu_nms
12 | python build.py
13 | cd ..
14 | 
15 | cd roi_align
16 | python build.py -C src/cuda clean
17 | python build.py -C src/cuda clean
18 | cd ..
19 | 
20 | echo "Done compiling hopefully"
21 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/Makefile:
--------------------------------------------------------------------------------
 1 | all: nms_kernel.cu nms_kernel.h
 2 | 	/usr/local/cuda-9.0/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC \
 3 |     -gencode arch=compute_37,code=sm_37 \
 4 |     -gencode arch=compute_52,code=sm_52 \
 5 |     -gencode arch=compute_60,code=sm_60 \
 6 |     -gencode arch=compute_61,code=sm_61 \
 7 |     -gencode arch=compute_70,code=sm_70 
 8 | clean:
 9 | 	rm nms.cu.o
10 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois,
6 |                         THCudaTensor * bottom_grad);
7 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/.ipynb_checkpoints/Makefile-checkpoint:
--------------------------------------------------------------------------------
 1 | all: nms_kernel.cu nms_kernel.h
 2 | 	/usr/local/cuda-9.0/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC \
 3 |     -gencode arch=compute_37,code=sm_37 \
 4 |     -gencode arch=compute_52,code=sm_52 \
 5 |     -gencode arch=compute_60,code=sm_60 \
 6 |     -gencode arch=compute_61,code=sm_61 \
 7 |     -gencode arch=compute_70,code=sm_70 
 8 | clean:
 9 | 	rm nms.cu.o
10 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/Makefile:
--------------------------------------------------------------------------------
 1 | all: roi_align_kernel.cu roi_align_kernel.h
 2 | 	/usr/local/cuda-9.0/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC \
 3 |     -gencode arch=compute_37,code=sm_37 \
 4 |     -gencode arch=compute_52,code=sm_52 \
 5 |     -gencode arch=compute_60,code=sm_60 \
 6 |     -gencode arch=compute_61,code=sm_61 \
 7 |     -gencode arch=compute_70,code=sm_70 
 8 | clean:
 9 | 	rm roi_align.cu.o
10 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/.ipynb_checkpoints/Makefile-checkpoint:
--------------------------------------------------------------------------------
 1 | all: roi_align_kernel.cu roi_align_kernel.h
 2 | 	/usr/local/cuda-9.0/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC \
 3 |     -gencode arch=compute_37,code=sm_37 \
 4 |     -gencode arch=compute_52,code=sm_52 \
 5 |     -gencode arch=compute_60,code=sm_60 \
 6 |     -gencode arch=compute_61,code=sm_61 \
 7 |     -gencode arch=compute_70,code=sm_70 
 8 | clean:
 9 | 	rm roi_align.cu.o
10 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/nms_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh)
 8 | {
 9 |     int* keep_data = THIntTensor_data(keep);
10 |     const float* boxes_sorted_data = THCudaTensor_data(state, boxes_sorted);
11 | 
12 |     const int boxes_num = THCudaTensor_size(state, boxes_sorted, 0);
13 | 
14 |     const int devId = THCudaTensor_getDevice(state, boxes_sorted);
15 | 
16 |     int numTotalKeep = ApplyNMSGPU(keep_data, boxes_sorted_data, boxes_num, nms_thresh, devId);
17 |     return numTotalKeep;
18 | }
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
 5 | 
 6 | sources = []
 7 | headers = []
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/.ipynb_checkpoints/build-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
 5 | 
 6 | sources = []
 7 | headers = []
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/cuda/roi_align.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, int num_boxes, int batch, int image_height, int image_width, int crop_height,
 9 |   int crop_width, int depth, float extrapolation_value, float* crops_ptr);
10 | 
11 | int ROIAlignForwardLaucher(
12 |     const float* image_ptr, const float* boxes_ptr,
13 |          int num_boxes,  int batch, int image_height, int image_width, int crop_height,
14 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream);
15 | 
16 | __global__ void ROIAlignBackward(const int nthreads, const float* grads_ptr,
17 |     const float* boxes_ptr, int num_boxes, int batch, int image_height,
18 |     int image_width, int crop_height, int crop_width, int depth,
19 |     float* grads_image_ptr);
20 | 
21 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes,
22 |     int batch, int image_height, int image_width, int crop_height,
23 |     int crop_width, int depth, float* grads_image_ptr, cudaStream_t stream);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 
31 | 


--------------------------------------------------------------------------------
/lib/my_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | import numpy as np
 5 | 
 6 | class XavierLinear(nn.Module):
 7 |     '''
 8 |     Simple Linear layer with Xavier init
 9 | 
10 |     Paper by Xavier Glorot and Yoshua Bengio (2010):
11 |     Understanding the difficulty of training deep feedforward neural networks
12 |     http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf
13 |     '''
14 | 
15 |     def __init__(self, in_features, out_features, bias=True):
16 |         super(XavierLinear, self).__init__()
17 |         self.linear = nn.Linear(in_features, out_features, bias=bias)
18 |         init.xavier_normal(self.linear.weight)
19 | 
20 |     def forward(self, x):
21 |         return self.linear(x)
22 | 
23 | class MLP(nn.Module):
24 |     def __init__(self, dim_in_hid_out, act_fn='ReLU', last_act=False):
25 |         super(MLP, self).__init__()
26 |         layers = []
27 |         for i in range(len(dim_in_hid_out) - 1):
28 |             layers.append(XavierLinear(dim_in_hid_out[i], dim_in_hid_out[i + 1]))
29 |             if i < len(dim_in_hid_out) - 2 or last_act:
30 |                 layers.append(getattr(torch.nn, act_fn)())
31 |         self.model = torch.nn.Sequential(*layers)
32 |         
33 |     def forward(self, x):
34 |         return self.model(x)
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/functions/nms.py:
--------------------------------------------------------------------------------
 1 | # Le code for doing NMS
 2 | import torch
 3 | import numpy as np
 4 | from .._ext import nms
 5 | 
 6 | 
 7 | def apply_nms(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None,
 8 |               nms_thresh=0.7):
 9 |     """
10 |     Note - this function is non-differentiable so everything is assumed to be a tensor, not
11 |     a variable.
12 |         """
13 |     just_inds = boxes_per_im is None
14 |     if boxes_per_im is None:
15 |         boxes_per_im = [boxes.size(0)]
16 | 
17 | 
18 |     s = 0
19 |     keep = []
20 |     im_per = []
21 |     for bpi in boxes_per_im:
22 |         e = s + int(bpi)
23 |         keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh)
24 |         keep.append(keep_im + s)
25 |         im_per.append(keep_im.size(0))
26 | 
27 |         s = e
28 | 
29 |     inds = torch.cat(keep, 0)
30 |     if just_inds:
31 |         return inds
32 |     return inds, im_per
33 | 
34 | 
35 | def _nms_single_im(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7):
36 |     keep = torch.IntTensor(scores.size(0))
37 |     vs, idx = torch.sort(scores, dim=0, descending=True)
38 |     if idx.size(0) > pre_nms_topn:
39 |         idx = idx[:pre_nms_topn]
40 |     boxes_sorted = boxes[idx].contiguous()
41 |     num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh)
42 |     num_out = min(num_out, post_nms_topn)
43 |     keep = keep[:num_out].long()
44 |     keep = idx[keep.cuda(scores.get_device())]
45 |     return keep
46 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/functions/.ipynb_checkpoints/nms-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Le code for doing NMS
 2 | import torch
 3 | import numpy as np
 4 | from .._ext import nms
 5 | 
 6 | 
 7 | def apply_nms(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None,
 8 |               nms_thresh=0.7):
 9 |     """
10 |     Note - this function is non-differentiable so everything is assumed to be a tensor, not
11 |     a variable.
12 |         """
13 |     just_inds = boxes_per_im is None
14 |     if boxes_per_im is None:
15 |         boxes_per_im = [boxes.size(0)]
16 | 
17 | 
18 |     s = 0
19 |     keep = []
20 |     im_per = []
21 |     for bpi in boxes_per_im:
22 |         e = s + int(bpi)
23 |         keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh)
24 |         keep.append(keep_im + s)
25 |         im_per.append(keep_im.size(0))
26 | 
27 |         s = e
28 | 
29 |     inds = torch.cat(keep, 0)
30 |     if just_inds:
31 |         return inds
32 |     return inds, im_per
33 | 
34 | 
35 | def _nms_single_im(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7):
36 |     keep = torch.IntTensor(scores.size(0))
37 |     vs, idx = torch.sort(scores, dim=0, descending=True)
38 |     if idx.size(0) > pre_nms_topn:
39 |         idx = idx[:pre_nms_topn]
40 |     boxes_sorted = boxes[idx].contiguous()
41 |     num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh)
42 |     num_out = min(num_out, post_nms_topn)
43 |     keep = keep[:num_out].long()
44 |     keep = idx[keep.cuda(scores.get_device())]
45 |     return keep
46 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==19.3.0
 2 | awscli==1.15.84
 3 | backcall==0.1.0
 4 | bleach==3.1.0
 5 | certifi==2019.9.11
 6 | cffi==1.13.1
 7 | cycler==0.10.0
 8 | Cython==0.29.13
 9 | decorator==4.4.1
10 | defusedxml==0.6.0
11 | dill==0.2.7.1
12 | entrypoints==0.3
13 | graphviz==0.13.2
14 | h5py==2.10.0
15 | importlib-metadata==0.23
16 | ipykernel==5.1.3
17 | ipython==7.9.0
18 | ipython-genutils==0.2.0
19 | ipywidgets==7.5.1
20 | jedi==0.15.1
21 | Jinja2==2.10.3
22 | jmespath==0.9.3
23 | jsonschema==3.1.1
24 | jupyter==1.0.0
25 | jupyter-client==5.3.4
26 | jupyter-console==6.0.0
27 | jupyter-core==4.6.1
28 | kiwisolver==1.1.0
29 | MarkupSafe==1.1.1
30 | matplotlib==3.1.1
31 | mistune==0.8.4
32 | more-itertools==7.2.0
33 | nbconvert==5.6.1
34 | nbformat==4.4.0
35 | notebook==6.0.1
36 | numpy==1.17.3
37 | pandas==0.25.2
38 | pandocfilters==1.4.2
39 | parso==0.5.1
40 | pexpect==4.7.0
41 | pickleshare==0.7.5
42 | Pillow==6.2.1
43 | prometheus-client==0.7.1
44 | prompt-toolkit==2.0.10
45 | protobuf==3.10.0
46 | ptyprocess==0.6.0
47 | pyyaml==3.13
48 | pyasn1==0.4.4
49 | pycocotools==2.0.0
50 | pycparser==2.19
51 | Pygments==2.4.2
52 | pyparsing==2.4.2
53 | pyrsistent==0.15.5
54 | python-dateutil==2.8.0
55 | pytz==2019.3
56 | pyzmq==18.1.0
57 | qtconsole==4.5.5
58 | rsa==3.4.2
59 | Send2Trash==1.5.0
60 | six==1.12.0
61 | tensorboardX==1.9
62 | terminado==0.8.2
63 | testpath==0.4.2
64 | torch==0.3.0.post4
65 | torchvision==0.2.0
66 | tornado==6.0.3
67 | tqdm==4.36.1
68 | traitlets==4.3.3
69 | wcwidth==0.1.7
70 | webencodings==0.5.1
71 | widgetsnbextension==3.5.1
72 | zipp==0.6.0
73 | 
74 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * image_ptr = THCudaTensor_data(state, features);
12 |     float * boxes_ptr = THCudaTensor_data(state, rois);
13 | 
14 |     float * crops_ptr = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_boxes = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // batch size
25 |     int batch = THCudaTensor_size(state, features, 0);
26 |     // data height
27 |     int image_height = THCudaTensor_size(state, features, 2);
28 |     // data width
29 |     int image_width = THCudaTensor_size(state, features, 3);
30 |     // Number of channels
31 |     int depth = THCudaTensor_size(state, features, 1);
32 | 
33 |     cudaStream_t stream = THCState_getCurrentStream(state);
34 |     float extrapolation_value = 0.0;
35 | 
36 |     ROIAlignForwardLaucher(
37 |          image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
38 |          crop_height, crop_width, depth, extrapolation_value, crops_ptr,
39 |          stream);
40 | 
41 |     return 1;
42 | }
43 | 
44 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
45 |     THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
46 | {
47 |     // Grab the input tensor
48 |     float * grads_ptr = THCudaTensor_data(state, top_grad);
49 |     float * boxes_ptr = THCudaTensor_data(state, rois);
50 | 
51 |     float * grads_image_ptr = THCudaTensor_data(state, bottom_grad);
52 | 
53 |     // Number of ROIs
54 |     int num_boxes = THCudaTensor_size(state, rois, 0);
55 |     int size_rois = THCudaTensor_size(state, rois, 1);
56 |     if (size_rois != 5)
57 |     {
58 |         return 0;
59 |     }
60 | 
61 |     // batch size
62 |     int batch = THCudaTensor_size(state, bottom_grad, 0);
63 |     // data height
64 |     int image_height = THCudaTensor_size(state, bottom_grad, 2);
65 |     // data width
66 |     int image_width = THCudaTensor_size(state, bottom_grad, 3);
67 |     // Number of channels
68 |     int depth = THCudaTensor_size(state, bottom_grad, 1);
69 | 
70 |     cudaStream_t stream = THCState_getCurrentStream(state);
71 | 
72 |     ROIAlignBackwardLaucher(
73 |         grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
74 |         crop_height, crop_width, depth, grads_image_ptr, stream);
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | """
 2 | performs ROI aligning
 3 | """
 4 | 
 5 | import torch
 6 | from torch.autograd import Function
 7 | from .._ext import roi_align
 8 | 
 9 | class RoIAlignFunction(Function):
10 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
11 |         self.aligned_width = int(aligned_width)
12 |         self.aligned_height = int(aligned_height)
13 |         self.spatial_scale = float(spatial_scale)
14 | 
15 |         self.feature_size = None
16 | 
17 |     def forward(self, features, rois):
18 |         self.save_for_backward(rois)
19 | 
20 |         rois_normalized = rois.clone()
21 | 
22 |         self.feature_size = features.size()
23 |         batch_size, num_channels, data_height, data_width = self.feature_size
24 | 
25 |         height = (data_height -1) / self.spatial_scale
26 |         width = (data_width - 1) / self.spatial_scale
27 | 
28 |         rois_normalized[:,1] /= width
29 |         rois_normalized[:,2] /= height
30 |         rois_normalized[:,3] /= width
31 |         rois_normalized[:,4] /= height
32 | 
33 | 
34 |         num_rois = rois.size(0)
35 | 
36 |         output = features.new(num_rois, num_channels, self.aligned_height,
37 |             self.aligned_width).zero_()
38 | 
39 |         if features.is_cuda:
40 |             res = roi_align.roi_align_forward_cuda(self.aligned_height,
41 |                                              self.aligned_width,
42 |                                              self.spatial_scale, features,
43 |                                              rois_normalized, output)
44 |             assert res == 1
45 |         else:
46 |             raise ValueError
47 | 
48 |         return output
49 | 
50 |     def backward(self, grad_output):
51 |         assert(self.feature_size is not None and grad_output.is_cuda)
52 | 
53 |         rois = self.saved_tensors[0]
54 | 
55 |         rois_normalized = rois.clone()
56 | 
57 |         batch_size, num_channels, data_height, data_width = self.feature_size
58 | 
59 |         height = (data_height -1) / self.spatial_scale
60 |         width = (data_width - 1) / self.spatial_scale
61 | 
62 |         rois_normalized[:,1] /= width
63 |         rois_normalized[:,2] /= height
64 |         rois_normalized[:,3] /= width
65 |         rois_normalized[:,4] /= height
66 | 
67 |         grad_input = rois_normalized.new(batch_size, num_channels, data_height,
68 |                                   data_width).zero_()
69 |         res = roi_align.roi_align_backward_cuda(self.aligned_height,
70 |                                           self.aligned_width,
71 |                                           self.spatial_scale, grad_output,
72 |                                           rois_normalized, grad_input)
73 |         assert res == 1
74 |         return grad_input, None
75 | 


--------------------------------------------------------------------------------
/lib/draw_rectangles/draw_rectangles.pyx:
--------------------------------------------------------------------------------
 1 | ######
 2 | # Draws rectangles
 3 | ######
 4 | 
 5 | cimport cython
 6 | import numpy as np
 7 | cimport numpy as np
 8 | 
 9 | DTYPE = np.float32
10 | ctypedef np.float32_t DTYPE_t
11 | 
12 | def draw_union_boxes(bbox_pairs, pooling_size, padding=0):
13 |     """
14 |     Draws union boxes for the image.
15 |     :param box_pairs: [num_pairs, 8]
16 |     :param fmap_size: Size of the original feature map
17 |     :param stride: ratio between fmap size and original img (<1)
18 |     :param pooling_size: resize everything to this size
19 |     :return: [num_pairs, 2, pooling_size, pooling_size arr
20 |     """
21 |     assert padding == 0, "Padding>0 not supported yet"
22 |     return draw_union_boxes_c(bbox_pairs, pooling_size)
23 | 
24 | cdef DTYPE_t minmax(DTYPE_t x):
25 |     return min(max(x, 0), 1)
26 | 
27 | cdef np.ndarray[DTYPE_t, ndim=4] draw_union_boxes_c(
28 |         np.ndarray[DTYPE_t, ndim=2] box_pairs, unsigned int pooling_size):
29 |     """
30 |     Parameters
31 |     ----------
32 |     boxes: (N, 4) ndarray of float. everything has arbitrary ratios
33 |     query_boxes: (K, 4) ndarray of float
34 |     Returns
35 |     -------
36 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
37 |     """
38 |     cdef unsigned int N = box_pairs.shape[0]
39 | 
40 |     cdef np.ndarray[DTYPE_t, ndim = 4] uboxes = np.zeros(
41 |         (N, 2, pooling_size, pooling_size), dtype=DTYPE)
42 |     cdef DTYPE_t x1_union, y1_union, x2_union, y2_union, w, h, x1_box, y1_box, x2_box, y2_box, y_contrib, x_contrib
43 |     cdef unsigned int n, i, j, k
44 | 
45 |     for n in range(N):
46 |         x1_union = min(box_pairs[n, 0], box_pairs[n, 4])
47 |         y1_union = min(box_pairs[n, 1], box_pairs[n, 5])
48 |         x2_union = max(box_pairs[n, 2], box_pairs[n, 6])
49 |         y2_union = max(box_pairs[n, 3], box_pairs[n, 7])
50 | 
51 |         w = x2_union - x1_union
52 |         h = y2_union - y1_union
53 |        
54 |         for i in range(2):
55 |             # Now everything is in the range [0, pooling_size].
56 |             x1_box = (box_pairs[n, 0+4*i] - x1_union)*pooling_size / w
57 |             y1_box = (box_pairs[n, 1+4*i] - y1_union)*pooling_size / h
58 |             x2_box = (box_pairs[n, 2+4*i] - x1_union)*pooling_size / w
59 |             y2_box = (box_pairs[n, 3+4*i] - y1_union)*pooling_size / h
60 |             # print("{:.3f}, {:.3f}, {:.3f}, {:.3f}".format(x1_box, y1_box, x2_box, y2_box))
61 |             for j in range(pooling_size):
62 |                 y_contrib = minmax(j+1-y1_box)*minmax(y2_box-j)
63 |                 for k in range(pooling_size):
64 |                     x_contrib = minmax(k+1-x1_box)*minmax(x2_box-k)                
65 |                     # print("j {} yc {} k {} xc {}".format(j, y_contrib, k, x_contrib))
66 |                     uboxes[n,i,j,k] = x_contrib*y_contrib
67 |     return uboxes
68 | 


--------------------------------------------------------------------------------
/lib/surgery.py:
--------------------------------------------------------------------------------
 1 | # create predictions from the other stuff
 2 | """
 3 | Go from proposals + scores to relationships.
 4 | 
 5 | pred-cls: No bbox regression, obj dist is exactly known
 6 | sg-cls : No bbox regression
 7 | sg-det : Bbox regression
 8 | 
 9 | in all cases we'll return:
10 | boxes, objs, rels, pred_scores
11 | 
12 | """
13 | 
14 | import numpy as np
15 | import torch
16 | from lib.pytorch_misc import unravel_index
17 | from lib.fpn.box_utils import bbox_overlaps
18 | # from ad3 import factor_graph as fg
19 | from time import time
20 | 
21 | def filter_dets(boxes, obj_scores, obj_classes, rel_inds, pred_scores):
22 |     """
23 |     Filters detections....
24 |     :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
25 |     :param obj_scores: [num_box] probabilities for the scores
26 |     :param obj_classes: [num_box] class labels for the topk
27 |     :param rel_inds: [num_rel, 2] TENSOR consisting of (im_ind0, im_ind1)
28 |     :param pred_scores: [topk, topk, num_rel, num_predicates]
29 |     :param use_nms: True if use NMS to filter dets.
30 |     :return: boxes, objs, rels, pred_scores
31 | 
32 |     """
33 |     if boxes.dim() != 2:
34 |         raise ValueError("Boxes needs to be [num_box, 4] but its {}".format(boxes.size()))
35 | 
36 |     num_box = boxes.size(0)
37 |     assert obj_scores.size(0) == num_box
38 | 
39 |     assert obj_classes.size() == obj_scores.size()
40 |     num_rel = rel_inds.size(0)
41 |     assert rel_inds.size(1) == 2
42 |     assert pred_scores.size(0) == num_rel
43 | 
44 |     obj_scores0 = obj_scores.data[rel_inds[:,0]]
45 |     obj_scores1 = obj_scores.data[rel_inds[:,1]]
46 | 
47 |     pred_scores_max, pred_classes_argmax = pred_scores.data[:,1:].max(1)
48 |     pred_classes_argmax = pred_classes_argmax + 1
49 | 
50 |     rel_scores_argmaxed = pred_scores_max * obj_scores0 * obj_scores1
51 |     rel_scores_vs, rel_scores_idx = torch.sort(rel_scores_argmaxed.view(-1), dim=0, descending=True)
52 | 
53 |     rels = rel_inds[rel_scores_idx].cpu().numpy()
54 |     pred_scores_sorted = pred_scores[rel_scores_idx].data.cpu().numpy()
55 |     obj_scores_np = obj_scores.data.cpu().numpy()
56 |     objs_np = obj_classes.data.cpu().numpy()
57 |     boxes_out = boxes.data.cpu().numpy()
58 | 
59 |     return boxes_out, objs_np, obj_scores_np, rels, pred_scores_sorted
60 | 
61 | # def _get_similar_boxes(boxes, obj_classes_topk, nms_thresh=0.3):
62 | #     """
63 | #     Assuming bg is NOT A LABEL.
64 | #     :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
65 | #     :param obj_classes: [num_box, topk] class labels
66 | #     :return: num_box, topk, num_box, topk array containing similarities.
67 | #     """
68 | #     topk = obj_classes_topk.size(1)
69 | #     num_box = boxes.size(0)
70 | #
71 | #     box_flat = boxes.view(-1, 4) if boxes.dim() == 3 else boxes[:, None].expand(
72 | #         num_box, topk, 4).contiguous().view(-1, 4)
73 | #     jax = bbox_overlaps(box_flat, box_flat).data > nms_thresh
74 | #     # Filter out things that are not gonna compete.
75 | #     classes_eq = obj_classes_topk.data.view(-1)[:, None] == obj_classes_topk.data.view(-1)[None, :]
76 | #     jax &= classes_eq
77 | #     boxes_are_similar = jax.view(num_box, topk, num_box, topk)
78 | #     return boxes_are_similar.cpu().numpy().astype(np.bool)
79 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_gtbox.py:
--------------------------------------------------------------------------------
 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose
 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps
 3 | import torch
 4 | from lib.pytorch_misc import diagonal_inds, to_variable
 5 | from config import RELS_PER_IMG, REL_FG_FRACTION
 6 | 
 7 | 
 8 | @to_variable
 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5):
10 |     """
11 |     Assign object detection proposals to ground-truth targets. Produces proposal
12 |     classification labels and bounding-box regression targets.
13 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
14 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems
15 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
16 |         Note, the img_inds here start at image_offset
17 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type].
18 |         Note, the img_inds here start at image_offset
19 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
20 |     :return:
21 |         rois: [num_rois, 5]
22 |         labels: [num_rois] array of labels
23 |         bbox_targets [num_rois, 4] array of targets for the labels.
24 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
25 |     """
26 |     im_inds = rois[:,0].long()
27 | 
28 |     num_im = im_inds[-1] + 1
29 | 
30 |     # Offset the image indices in fg_rels to refer to absolute indices (not just within img i)
31 |     fg_rels = gt_rels.clone()
32 |     fg_rels[:,0] -= image_offset
33 |     offset = {}
34 |     for i, s, e in enumerate_by_image(im_inds):
35 |         offset[i] = s
36 |     for i, s, e in enumerate_by_image(fg_rels[:, 0]):
37 |         fg_rels[s:e, 1:3] += offset[i]
38 | 
39 |     # Try ALL things, not just intersections.
40 |     is_cand = (im_inds[:, None] == im_inds[None])
41 |     is_cand.view(-1)[diagonal_inds(is_cand)] = 0
42 | 
43 |     # # Compute salience
44 |     # gt_inds = fg_rels[:, 1:3].contiguous().view(-1)
45 |     # labels_arange = labels.data.new(labels.size(0))
46 |     # torch.arange(0, labels.size(0), out=labels_arange)
47 |     # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long()
48 |     # labels = torch.stack((labels, salience_labels), 1)
49 | 
50 |     # Add in some BG labels
51 | 
52 |     # NOW WE HAVE TO EXCLUDE THE FGs.
53 |     # TODO: check if this causes an error if many duplicate GTs havent been filtered out
54 | 
55 |     is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0
56 |     is_bgcand = is_cand.nonzero()
57 |     # TODO: make this sample on a per image case
58 |     # If too many then sample
59 |     num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im))
60 |     if num_fg < fg_rels.size(0):
61 |         fg_rels = random_choose(fg_rels, num_fg)
62 | 
63 |     # If too many then sample
64 |     num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0,
65 |                  int(RELS_PER_IMG * num_im) - num_fg)
66 |     if num_bg > 0:
67 |         bg_rels = torch.cat((
68 |             im_inds[is_bgcand[:, 0]][:, None],
69 |             is_bgcand,
70 |             (is_bgcand[:, 0, None] < -10).long(),
71 |         ), 1)
72 | 
73 |         if num_bg < is_bgcand.size(0):
74 |             bg_rels = random_choose(bg_rels, num_bg)
75 |         rel_labels = torch.cat((fg_rels, bg_rels), 0)
76 |     else:
77 |         rel_labels = fg_rels
78 | 
79 | 
80 |     # last sort by rel.
81 |     _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) +
82 |                          rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2])
83 | 
84 |     rel_labels = rel_labels[perm].contiguous()
85 | 
86 |     labels = gt_classes[:,1].contiguous()
87 |     return rois, labels, rel_labels
88 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/.ipynb_checkpoints/proposal_assignments_gtbox-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose
 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps
 3 | import torch
 4 | from lib.pytorch_misc import diagonal_inds, to_variable
 5 | from config import RELS_PER_IMG, REL_FG_FRACTION
 6 | 
 7 | 
 8 | @to_variable
 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5):
10 |     """
11 |     Assign object detection proposals to ground-truth targets. Produces proposal
12 |     classification labels and bounding-box regression targets.
13 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
14 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems
15 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
16 |         Note, the img_inds here start at image_offset
17 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type].
18 |         Note, the img_inds here start at image_offset
19 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
20 |     :return:
21 |         rois: [num_rois, 5]
22 |         labels: [num_rois] array of labels
23 |         bbox_targets [num_rois, 4] array of targets for the labels.
24 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
25 |     """
26 |     im_inds = rois[:,0].long()
27 | 
28 |     num_im = im_inds[-1] + 1
29 | 
30 |     # Offset the image indices in fg_rels to refer to absolute indices (not just within img i)
31 |     fg_rels = gt_rels.clone()
32 |     fg_rels[:,0] -= image_offset
33 |     offset = {}
34 |     for i, s, e in enumerate_by_image(im_inds):
35 |         offset[i] = s
36 |     for i, s, e in enumerate_by_image(fg_rels[:, 0]):
37 |         fg_rels[s:e, 1:3] += offset[i]
38 | 
39 |     # Try ALL things, not just intersections.
40 |     is_cand = (im_inds[:, None] == im_inds[None])
41 |     is_cand.view(-1)[diagonal_inds(is_cand)] = 0
42 | 
43 |     # # Compute salience
44 |     # gt_inds = fg_rels[:, 1:3].contiguous().view(-1)
45 |     # labels_arange = labels.data.new(labels.size(0))
46 |     # torch.arange(0, labels.size(0), out=labels_arange)
47 |     # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long()
48 |     # labels = torch.stack((labels, salience_labels), 1)
49 | 
50 |     # Add in some BG labels
51 | 
52 |     # NOW WE HAVE TO EXCLUDE THE FGs.
53 |     # TODO: check if this causes an error if many duplicate GTs havent been filtered out
54 | 
55 |     is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0
56 |     is_bgcand = is_cand.nonzero()
57 |     # TODO: make this sample on a per image case
58 |     # If too many then sample
59 |     num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im))
60 |     if num_fg < fg_rels.size(0):
61 |         fg_rels = random_choose(fg_rels, num_fg)
62 | 
63 |     # If too many then sample
64 |     num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0,
65 |                  int(RELS_PER_IMG * num_im) - num_fg)
66 |     if num_bg > 0:
67 |         bg_rels = torch.cat((
68 |             im_inds[is_bgcand[:, 0]][:, None],
69 |             is_bgcand,
70 |             (is_bgcand[:, 0, None] < -10).long(),
71 |         ), 1)
72 | 
73 |         if num_bg < is_bgcand.size(0):
74 |             bg_rels = random_choose(bg_rels, num_bg)
75 |         rel_labels = torch.cat((fg_rels, bg_rels), 0)
76 |     else:
77 |         rel_labels = fg_rels
78 | 
79 | 
80 |     # last sort by rel.
81 |     _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) +
82 |                          rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2])
83 | 
84 |     rel_labels = rel_labels[perm].contiguous()
85 | 
86 |     labels = gt_classes[:,1].contiguous()
87 |     return rois, labels, rel_labels
88 | 


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(boxes, query_boxes):
 16 |     cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
 17 |     cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
 18 | 
 19 |     return bbox_overlaps_c(boxes_contig, query_contig)
 20 | 
 21 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 22 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 23 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 24 |     """
 25 |     Parameters
 26 |     ----------
 27 |     boxes: (N, 4) ndarray of float
 28 |     query_boxes: (K, 4) ndarray of float
 29 |     Returns
 30 |     -------
 31 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 32 |     """
 33 |     cdef unsigned int N = boxes.shape[0]
 34 |     cdef unsigned int K = query_boxes.shape[0]
 35 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 36 |     cdef DTYPE_t iw, ih, box_area
 37 |     cdef DTYPE_t ua
 38 |     cdef unsigned int k, n
 39 |     for k in range(K):
 40 |         box_area = (
 41 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 42 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 43 |         )
 44 |         for n in range(N):
 45 |             iw = (
 46 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 47 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 48 |             )
 49 |             if iw > 0:
 50 |                 ih = (
 51 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 52 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 53 |                 )
 54 |                 if ih > 0:
 55 |                     ua = float(
 56 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 57 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 58 |                         box_area - iw * ih
 59 |                     )
 60 |                     overlaps[n, k] = iw * ih / ua
 61 |     return overlaps
 62 | 
 63 | 
 64 | def bbox_intersections(boxes, query_boxes):
 65 |     cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
 66 |     cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
 67 | 
 68 |     return bbox_intersections_c(boxes_contig, query_contig)
 69 | 
 70 | 
 71 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 72 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 73 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 74 |     """
 75 |     For each query box compute the intersection ratio covered by boxes
 76 |     ----------
 77 |     Parameters
 78 |     ----------
 79 |     boxes: (N, 4) ndarray of float
 80 |     query_boxes: (K, 4) ndarray of float
 81 |     Returns
 82 |     -------
 83 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 84 |     """
 85 |     cdef unsigned int N = boxes.shape[0]
 86 |     cdef unsigned int K = query_boxes.shape[0]
 87 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 88 |     cdef DTYPE_t iw, ih, box_area
 89 |     cdef DTYPE_t ua
 90 |     cdef unsigned int k, n
 91 |     for k in range(K):
 92 |         box_area = (
 93 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 94 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 95 |         )
 96 |         for n in range(N):
 97 |             iw = (
 98 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 99 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
100 |             )
101 |             if iw > 0:
102 |                 ih = (
103 |                     min(boxes[n, 3], query_boxes[k, 3]) -
104 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
105 |                 )
106 |                 if ih > 0:
107 |                     intersec[n, k] = iw * ih / box_area
108 |     return intersec


--------------------------------------------------------------------------------
/lib/get_union_boxes.py:
--------------------------------------------------------------------------------
 1 | """
 2 | credits to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/network.py#L91
 3 | """
 4 | 
 5 | import torch
 6 | from torch.autograd import Variable
 7 | from torch.nn import functional as F
 8 | from lib.fpn.roi_align.functions.roi_align import RoIAlignFunction
 9 | from lib.draw_rectangles.draw_rectangles import draw_union_boxes
10 | import numpy as np
11 | from torch.nn.modules.module import Module
12 | from torch import nn
13 | from config import BATCHNORM_MOMENTUM
14 | 
15 | class UnionBoxesAndFeats(Module):
16 |     def __init__(self, pooling_size=7, stride=16, dim=256, concat=False, use_feats=True):
17 |         """
18 |         :param pooling_size: Pool the union boxes to this dimension
19 |         :param stride: pixel spacing in the entire image
20 |         :param dim: Dimension of the feats
21 |         :param concat: Whether to concat (yes) or add (False) the representations
22 |         """
23 |         super(UnionBoxesAndFeats, self).__init__()
24 |         
25 |         self.pooling_size = pooling_size
26 |         self.stride = stride
27 | 
28 |         self.dim = dim
29 |         self.use_feats = use_feats
30 | 
31 |         self.conv = nn.Sequential(
32 |             nn.Conv2d(2, dim //2, kernel_size=7, stride=2, padding=3, bias=True),
33 |             nn.ReLU(inplace=True),
34 |             nn.BatchNorm2d(dim//2, momentum=BATCHNORM_MOMENTUM),
35 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
36 |             nn.Conv2d(dim // 2, dim, kernel_size=3, stride=1, padding=1, bias=True),
37 |             nn.ReLU(inplace=True),
38 |             nn.BatchNorm2d(dim, momentum=BATCHNORM_MOMENTUM),
39 |         )
40 |         self.concat = concat
41 | 
42 |     def forward(self, fmap, rois, union_inds):
43 |         union_pools = union_boxes(fmap, rois, union_inds, pooling_size=self.pooling_size, stride=self.stride)
44 |         if not self.use_feats:
45 |             return union_pools.detach()
46 | 
47 |         pair_rois = torch.cat((rois[:, 1:][union_inds[:, 0]], rois[:, 1:][union_inds[:, 1]]),1).data.cpu().numpy()
48 |         # rects_np = get_rect_features(pair_rois, self.pooling_size*2-1) - 0.5
49 |         rects_np = draw_union_boxes(pair_rois, self.pooling_size*4-1) - 0.5
50 |         rects = Variable(torch.FloatTensor(rects_np).cuda(fmap.get_device()), volatile=fmap.volatile)
51 |         if self.concat:
52 |             return torch.cat((union_pools, self.conv(rects)), 1)
53 |         return union_pools + self.conv(rects)
54 | 
55 | # def get_rect_features(roi_pairs, pooling_size):
56 | #     rects_np = draw_union_boxes(roi_pairs, pooling_size)
57 | #     # add union + intersection
58 | #     stuff_to_cat = [
59 | #         rects_np.max(1),
60 | #         rects_np.min(1),
61 | #         np.minimum(1-rects_np[:,0], rects_np[:,1]),
62 | #         np.maximum(1-rects_np[:,0], rects_np[:,1]),
63 | #         np.minimum(rects_np[:,0], 1-rects_np[:,1]),
64 | #         np.maximum(rects_np[:,0], 1-rects_np[:,1]),
65 | #         np.minimum(1-rects_np[:,0], 1-rects_np[:,1]),
66 | #         np.maximum(1-rects_np[:,0], 1-rects_np[:,1]),
67 | #     ]
68 | #     rects_np = np.concatenate([rects_np] + [x[:,None] for x in stuff_to_cat], 1)
69 | #     return rects_np
70 | 
71 | 
72 | def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16):
73 |     """
74 |     :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride)
75 |     :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2]
76 |     :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2]
77 |     :param pooling_size: we'll resize to this
78 |     :param stride:
79 |     :return:
80 |     """
81 |     assert union_inds.size(1) == 2
82 |     im_inds = rois[:,0][union_inds[:,0]]
83 |     assert (im_inds.data == rois.data[:,0][union_inds[:,1]]).sum() == union_inds.size(0)
84 |     union_rois = torch.cat((
85 |         im_inds[:,None],
86 |         torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:, 1]]),
87 |         torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:, 1]]),
88 |     ),1)
89 | 
90 |     # (num_rois, d, pooling_size, pooling_size)
91 |     union_pools = RoIAlignFunction(pooling_size, pooling_size,
92 |                                    spatial_scale=1/stride)(fmap, union_rois)
93 |     return union_pools
94 |  
95 | 


--------------------------------------------------------------------------------
/lib/fpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from config import IM_SCALE
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | # array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | def generate_anchors(base_size=16, feat_stride=16, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
 40 |   """ A wrapper function to generate anchors given different scales
 41 |     Also return the number of anchors in variable 'length'
 42 |   """
 43 |   anchors = generate_base_anchors(base_size=base_size, 
 44 |                                   ratios=np.array(anchor_ratios),
 45 |                                   scales=np.array(anchor_scales))
 46 |   A = anchors.shape[0]
 47 |   shift_x = np.arange(0, IM_SCALE // feat_stride) * feat_stride # Same as shift_x
 48 |   shift_x, shift_y = np.meshgrid(shift_x, shift_x)
 49 | 
 50 |   shifts = np.stack([shift_x, shift_y, shift_x, shift_y], -1)  # h, w, 4
 51 |   all_anchors = shifts[:, :, None] + anchors[None, None]  #h, w, A, 4
 52 |   return all_anchors
 53 | 
 54 |   # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
 55 |   # K = shifts.shape[0]
 56 |   # # width changes faster, so here it is H, W, C
 57 |   # anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 58 |   # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
 59 |   # length = np.int32(anchors.shape[0])
 60 | 
 61 | 
 62 | def generate_base_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)):
 63 |   """
 64 |   Generate anchor (reference) windows by enumerating aspect ratios X
 65 |   scales wrt a reference (0, 0, 15, 15) window.
 66 |   """
 67 | 
 68 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 69 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 70 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 71 |                        for i in range(ratio_anchors.shape[0])])
 72 |   return anchors
 73 | 
 74 | 
 75 | def _whctrs(anchor):
 76 |   """
 77 |   Return width, height, x center, and y center for an anchor (window).
 78 |   """
 79 | 
 80 |   w = anchor[2] - anchor[0] + 1
 81 |   h = anchor[3] - anchor[1] + 1
 82 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 83 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 84 |   return w, h, x_ctr, y_ctr
 85 | 
 86 | 
 87 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 88 |   """
 89 |   Given a vector of widths (ws) and heights (hs) around a center
 90 |   (x_ctr, y_ctr), output a set of anchors (windows).
 91 |   """
 92 | 
 93 |   ws = ws[:, np.newaxis]
 94 |   hs = hs[:, np.newaxis]
 95 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 96 |                        y_ctr - 0.5 * (hs - 1),
 97 |                        x_ctr + 0.5 * (ws - 1),
 98 |                        y_ctr + 0.5 * (hs - 1)))
 99 |   return anchors
100 | 
101 | 
102 | def _ratio_enum(anchor, ratios):
103 |   """
104 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
105 |   """
106 | 
107 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
108 |   size = w * h
109 |   size_ratios = size / ratios
110 |   # NOTE: CHANGED TO NOT HAVE ROUNDING
111 |   ws = np.sqrt(size_ratios)
112 |   hs = ws * ratios
113 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 |   return anchors
115 | 
116 | 
117 | def _scale_enum(anchor, scales):
118 |   """
119 |   Enumerate a set of anchors for each scale wrt an anchor.
120 |   """
121 | 
122 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
123 |   ws = w * scales
124 |   hs = h * scales
125 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
126 |   return anchors
127 | 


--------------------------------------------------------------------------------
/dataloaders/image_transforms.py:
--------------------------------------------------------------------------------
  1 | # Some image transforms
  2 | 
  3 | from PIL import Image, ImageOps, ImageFilter, ImageEnhance
  4 | import numpy as np
  5 | from random import randint
  6 | # All of these need to be called on PIL imagez
  7 | 
  8 | class SquarePad(object):
  9 |     def __call__(self, img):
 10 |         w, h = img.size
 11 |         img_padded = ImageOps.expand(img, border=(0, 0, max(h - w, 0), max(w - h, 0)),
 12 |                                      fill=(int(0.485 * 256), int(0.456 * 256), int(0.406 * 256)))
 13 |         return img_padded
 14 | 
 15 | 
 16 | class Grayscale(object):
 17 |     """
 18 |     Converts to grayscale (not always, sometimes).
 19 |     """
 20 |     def __call__(self, img):
 21 |         factor = np.sqrt(np.sqrt(np.random.rand(1)))
 22 |         # print("gray {}".format(factor))
 23 |         enhancer = ImageEnhance.Color(img)
 24 |         return enhancer.enhance(factor)
 25 | 
 26 | 
 27 | class Brightness(object):
 28 |     """
 29 |     Converts to grayscale (not always, sometimes).
 30 |     """
 31 |     def __call__(self, img):
 32 |         factor = np.random.randn(1)/6+1
 33 |         factor = min(max(factor, 0.5), 1.5)
 34 |         # print("brightness {}".format(factor))
 35 | 
 36 |         enhancer = ImageEnhance.Brightness(img)
 37 |         return enhancer.enhance(factor)
 38 | 
 39 | 
 40 | class Contrast(object):
 41 |     """
 42 |     Converts to grayscale (not always, sometimes).
 43 |     """
 44 |     def __call__(self, img):
 45 |         factor = np.random.randn(1)/8+1.0
 46 |         factor = min(max(factor, 0.5), 1.5)
 47 |         # print("contrast {}".format(factor))
 48 | 
 49 |         enhancer = ImageEnhance.Contrast(img)
 50 |         return enhancer.enhance(factor)
 51 | 
 52 | 
 53 | class Hue(object):
 54 |     """
 55 |     Converts to grayscale
 56 |     """
 57 |     def __call__(self, img):
 58 |         # 30 seems good
 59 |         factor = int(np.random.randn(1)*8)
 60 |         factor = min(max(factor, -30), 30)
 61 |         factor = np.array(factor, dtype=np.uint8)
 62 | 
 63 |         hsv = np.array(img.convert('HSV'))
 64 |         hsv[:,:,0] += factor
 65 |         new_img = Image.fromarray(hsv, 'HSV').convert('RGB')
 66 | 
 67 |         return new_img
 68 | 
 69 | 
 70 | class Sharpness(object):
 71 |     """
 72 |     Converts to grayscale
 73 |     """
 74 |     def __call__(self, img):
 75 |         factor = 1.0 + np.random.randn(1)/5
 76 |         # print("sharpness {}".format(factor))
 77 |         enhancer = ImageEnhance.Sharpness(img)
 78 |         return enhancer.enhance(factor)
 79 | 
 80 | 
 81 | def random_crop(img, boxes, box_scale, round_boxes=True, max_crop_fraction=0.1):
 82 |     """
 83 |     Randomly crops the image
 84 |     :param img: PIL image
 85 |     :param boxes: Ground truth boxes
 86 |     :param box_scale: This is the scale that the boxes are at (e.g. 1024 wide). We'll preserve that ratio
 87 |     :param round_boxes: Set this to true if we're going to round the boxes to ints
 88 |     :return: Cropped image, new boxes
 89 |     """
 90 | 
 91 |     w, h = img.size
 92 | 
 93 |     max_crop_w = int(w*max_crop_fraction)
 94 |     max_crop_h = int(h*max_crop_fraction)
 95 |     boxes_scaled = boxes * max(w,h) / box_scale
 96 |     max_to_crop_top = min(int(boxes_scaled[:, 1].min()), max_crop_h)
 97 |     max_to_crop_left = min(int(boxes_scaled[:, 0].min()), max_crop_w)
 98 |     max_to_crop_right = min(int(w - boxes_scaled[:, 2].max()), max_crop_w)
 99 |     max_to_crop_bottom = min(int(h - boxes_scaled[:, 3].max()), max_crop_h)
100 | 
101 |     crop_top = randint(0, max(max_to_crop_top, 0))
102 |     crop_left = randint(0, max(max_to_crop_left, 0))
103 |     crop_right = randint(0, max(max_to_crop_right, 0))
104 |     crop_bottom = randint(0, max(max_to_crop_bottom, 0))
105 |     img_cropped = img.crop((crop_left, crop_top, w - crop_right, h - crop_bottom))
106 | 
107 |     new_boxes = box_scale / max(img_cropped.size) * np.column_stack(
108 |         (boxes_scaled[:,0]-crop_left, boxes_scaled[:,1]-crop_top, boxes_scaled[:,2]-crop_left, boxes_scaled[:,3]-crop_top))
109 | 
110 |     if round_boxes:
111 |         new_boxes = np.round(new_boxes).astype(np.int32)
112 |     return img_cropped, new_boxes
113 | 
114 | 
115 | class RandomOrder(object):
116 |     """ Composes several transforms together in random order - or not at all!
117 |     """
118 | 
119 |     def __init__(self, transforms):
120 |         self.transforms = transforms
121 | 
122 |     def __call__(self, img):
123 |         if self.transforms is None:
124 |             return img
125 |         num_to_pick = np.random.choice(len(self.transforms))
126 |         if num_to_pick == 0:
127 |             return img
128 | 
129 |         order = np.random.choice(len(self.transforms), size=num_to_pick, replace=False)
130 |         for i in order:
131 |             img = self.transforms[i](img)
132 |         return img


--------------------------------------------------------------------------------
/lib/fpn/anchor_targets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generates anchor targets to train the detector. Does this during the collate step in training
  3 | as it's much cheaper to do this on a separate thread.
  4 | 
  5 | Heavily adapted from faster_rcnn/rpn_msr/anchor_target_layer.py.
  6 | """
  7 | import numpy as np
  8 | import numpy.random as npr
  9 | 
 10 | from config import IM_SCALE, RPN_NEGATIVE_OVERLAP, RPN_POSITIVE_OVERLAP, \
 11 |     RPN_BATCHSIZE, RPN_FG_FRACTION, ANCHOR_SIZE, ANCHOR_SCALES, ANCHOR_RATIOS
 12 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
 13 | from lib.fpn.generate_anchors import generate_anchors
 14 | 
 15 | 
 16 | def anchor_target_layer(gt_boxes, im_size, 
 17 |                         allowed_border=0):
 18 |     """
 19 |     Assign anchors to ground-truth targets. Produces anchor classification
 20 |     labels and bounding-box regression targets.
 21 | 
 22 |     for each (H, W) location i
 23 |       generate 3 anchor boxes centered on cell i
 24 |     filter out-of-image anchors
 25 |     measure GT overlap
 26 | 
 27 |     :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as
 28 |                      the image (IM_SCALE)
 29 |     :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE
 30 |     """
 31 |     if max(im_size) != IM_SCALE:
 32 |         raise ValueError("im size is {}".format(im_size))
 33 |     h, w = im_size
 34 | 
 35 |     # Get the indices of the anchors in the feature map.
 36 |     # h, w, A, 4
 37 |     ans_np = generate_anchors(base_size=ANCHOR_SIZE,
 38 |                               feat_stride=16,
 39 |                               anchor_scales=ANCHOR_SCALES,
 40 |                               anchor_ratios=ANCHOR_RATIOS,
 41 |                               )
 42 |     ans_np_flat = ans_np.reshape((-1, 4))
 43 |     inds_inside = np.where(
 44 |         (ans_np_flat[:, 0] >= -allowed_border) &
 45 |         (ans_np_flat[:, 1] >= -allowed_border) &
 46 |         (ans_np_flat[:, 2] < w + allowed_border) &  # width
 47 |         (ans_np_flat[:, 3] < h + allowed_border)  # height
 48 |     )[0]
 49 |     good_ans_flat = ans_np_flat[inds_inside]
 50 |     if good_ans_flat.size == 0:
 51 |         raise ValueError("There were no good anchors for an image of size {} with boxes {}".format(im_size, gt_boxes))
 52 | 
 53 |     # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes]
 54 |     overlaps = bbox_overlaps(good_ans_flat, gt_boxes)
 55 |     anchor_to_gtbox = overlaps.argmax(axis=1)
 56 |     max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox]
 57 |     gtbox_to_anchor = overlaps.argmax(axis=0)
 58 |     gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])]
 59 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 60 | 
 61 |     # Good anchors are those that match SOMEWHERE within a decent tolerance
 62 |     # label: 1 is positive, 0 is negative, -1 is dont care.
 63 |     # assign bg labels first so that positive labels can clobber them
 64 |     labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64)
 65 |     labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0
 66 |     labels[gt_argmax_overlaps] = 1
 67 |     labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1
 68 | 
 69 |     # subsample positive labels if we have too many
 70 |     num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
 71 |     fg_inds = np.where(labels == 1)[0]
 72 |     if len(fg_inds) > num_fg:
 73 |         labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1
 74 | 
 75 |     # subsample negative labels if we have too many
 76 |     num_bg = RPN_BATCHSIZE - np.sum(labels == 1)
 77 |     bg_inds = np.where(labels == 0)[0]
 78 |     if len(bg_inds) > num_bg:
 79 |         labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1
 80 |     # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0]))
 81 | 
 82 | 
 83 |     # Get the labels at the original size
 84 |     labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64)
 85 |     labels_unmap[inds_inside] = labels
 86 | 
 87 |     # h, w, A
 88 |     labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1])
 89 |     anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0))
 90 | 
 91 |     # These ought to be in the same order
 92 |     anchor_inds_flat = np.where(labels >= 0)[0]
 93 |     anchors = good_ans_flat[anchor_inds_flat]
 94 |     bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]]
 95 |     labels = labels[anchor_inds_flat]
 96 | 
 97 |     assert np.all(labels >= 0)
 98 | 
 99 | 
100 |     # Anchors: [num_used, 4]
101 |     # Anchor_inds: [num_used, 3] (h, w, A)
102 |     # bbox_targets: [num_used, 4]
103 |     # labels: [num_used]
104 | 
105 |     return anchors, anchor_inds, bbox_targets, labels
106 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_det.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import numpy.random as npr
  4 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION, ROIS_PER_IMG
  5 | from lib.fpn.box_utils import bbox_overlaps
  6 | from lib.pytorch_misc import to_variable
  7 | import torch
  8 | 
  9 | #############################################################
 10 | # The following is only for object detection
 11 | @to_variable
 12 | def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5):
 13 |     """
 14 |     Assign object detection proposals to ground-truth targets. Produces proposal
 15 |     classification labels and bounding-box regression targets.
 16 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 17 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1
 18 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 19 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 20 |     :return:
 21 |         rois: [num_rois, 5]
 22 |         labels: [num_rois] array of labels
 23 |         bbox_targets [num_rois, 4] array of targets for the labels.
 24 |     """
 25 |     fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION))
 26 | 
 27 |     gt_img_inds = gt_classes[:, 0] - image_offset
 28 | 
 29 |     all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0)
 30 | 
 31 |     ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0)
 32 | 
 33 |     im_sorted, idx = torch.sort(ims_per_box, 0)
 34 |     all_boxes = all_boxes[idx]
 35 | 
 36 |     # Assume that the GT boxes are already sorted in terms of image id
 37 |     num_images = int(im_sorted[-1]) + 1
 38 | 
 39 |     labels = []
 40 |     rois = []
 41 |     bbox_targets = []
 42 |     for im_ind in range(num_images):
 43 |         g_inds = (gt_img_inds == im_ind).nonzero()
 44 | 
 45 |         if g_inds.dim() == 0:
 46 |             continue
 47 |         g_inds = g_inds.squeeze(1)
 48 |         g_start = g_inds[0]
 49 |         g_end = g_inds[-1] + 1
 50 | 
 51 |         t_inds = (im_sorted == im_ind).nonzero().squeeze(1)
 52 |         t_start = t_inds[0]
 53 |         t_end = t_inds[-1] + 1
 54 | 
 55 |         # Max overlaps: for each predicted box, get the max ROI
 56 |         # Get the indices into the GT boxes too (must offset by the box start)
 57 |         ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end])
 58 |         max_overlaps, gt_assignment = ious.max(1)
 59 |         max_overlaps = max_overlaps.cpu().numpy()
 60 |         # print("Best overlap is {}".format(max_overlaps.max()))
 61 |         # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start))
 62 |         gt_assignment += g_start
 63 | 
 64 |         keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image,
 65 |                                          ROIS_PER_IMG)
 66 | 
 67 |         if keep_inds_np.size == 0:
 68 |             continue
 69 | 
 70 |         keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device())
 71 | 
 72 |         labels_ = gt_classes[:, 1][gt_assignment[keep_inds]]
 73 |         bbox_target_ = gt_boxes[gt_assignment[keep_inds]]
 74 | 
 75 |         # Clamp labels_ for the background RoIs to 0
 76 |         if num_fg < labels_.size(0):
 77 |             labels_[num_fg:] = 0
 78 | 
 79 |         rois_ = torch.cat((
 80 |             im_sorted[t_start:t_end, None][keep_inds].float(),
 81 |             all_boxes[t_start:t_end][keep_inds],
 82 |         ), 1)
 83 | 
 84 |         labels.append(labels_)
 85 |         rois.append(rois_)
 86 |         bbox_targets.append(bbox_target_)
 87 | 
 88 |     rois = torch.cat(rois, 0)
 89 |     labels = torch.cat(labels, 0)
 90 |     bbox_targets = torch.cat(bbox_targets, 0)
 91 |     return rois, labels, bbox_targets
 92 | 
 93 | 
 94 | def _sel_inds(max_overlaps, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256):
 95 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 96 |     fg_inds = np.where(max_overlaps >= fg_thresh)[0]
 97 | 
 98 |     # Guard against the case when an image has fewer than fg_rois_per_image
 99 |     # foreground RoIs
100 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.shape[0])
101 |     # Sample foreground regions without replacement
102 |     if fg_inds.size > 0:
103 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
104 | 
105 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
106 |     bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0]
107 | 
108 |     # Compute number of background RoIs to take from this image (guarding
109 |     # against there being fewer than desired)
110 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
111 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
112 |     # Sample background regions without replacement
113 |     if bg_inds.size > 0:
114 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
115 | 
116 |     return np.append(fg_inds, bg_inds), fg_rois_per_this_image
117 | 
118 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 4 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 0];
 49 |     block_boxes[threadIdx.x * 4 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 1];
 51 |     block_boxes[threadIdx.x * 4 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 2];
 53 |     block_boxes[threadIdx.x * 4 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 3];
 55 |   }
 56 |   __syncthreads();
 57 | 
 58 |   if (threadIdx.x < row_size) {
 59 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 60 |     const float *cur_box = dev_boxes + cur_box_idx * 4;
 61 |     int i = 0;
 62 |     unsigned long long t = 0;
 63 |     int start = 0;
 64 |     if (row_start == col_start) {
 65 |       start = threadIdx.x + 1;
 66 |     }
 67 |     for (i = start; i < col_size; i++) {
 68 |       if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) {
 69 |         t |= 1ULL << i;
 70 |       }
 71 |     }
 72 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 73 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 74 |   }
 75 | }
 76 | 
 77 | void _set_device(int device_id) {
 78 |   int current_device;
 79 |   CUDA_CHECK(cudaGetDevice(&current_device));
 80 |   if (current_device == device_id) {
 81 |     return;
 82 |   }
 83 |   // The call to cudaSetDevice must come before any calls to Get, which
 84 |   // may perform initialization using the GPU.
 85 |   CUDA_CHECK(cudaSetDevice(device_id));
 86 | }
 87 | 
 88 | extern "C" int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
 89 |           float nms_overlap_thresh, int device_id) {
 90 |   _set_device(device_id);
 91 | 
 92 |   unsigned long long* mask_dev = NULL;
 93 | 
 94 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 95 | 
 96 |   CUDA_CHECK(cudaMalloc(&mask_dev,
 97 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
 98 | 
 99 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
100 |               DIVUP(boxes_num, threadsPerBlock));
101 |   dim3 threads(threadsPerBlock);
102 |   nms_kernel<<<blocks, threads>>>(boxes_num,
103 |                                   nms_overlap_thresh,
104 |                                   boxes_dev,
105 |                                   mask_dev);
106 | 
107 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
108 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
109 |                         mask_dev,
110 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
111 |                         cudaMemcpyDeviceToHost));
112 | 
113 |   std::vector<unsigned long long> remv(col_blocks);
114 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
115 | 
116 |   int num_to_keep = 0;
117 |   for (int i = 0; i < boxes_num; i++) {
118 |     int nblock = i / threadsPerBlock;
119 |     int inblock = i % threadsPerBlock;
120 | 
121 |     if (!(remv[nblock] & (1ULL << inblock))) {
122 |       keep_out[num_to_keep++] = i;
123 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
124 |       for (int j = nblock; j < col_blocks; j++) {
125 |         remv[j] |= p[j];
126 |       }
127 |     }
128 |   }
129 | 
130 |   CUDA_CHECK(cudaFree(mask_dev));
131 |   return num_to_keep;
132 | }
133 | 


--------------------------------------------------------------------------------
/lib/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | from torchvision.models.resnet import model_urls, conv3x3, BasicBlock
  5 | from torchvision.models.vgg import vgg16
  6 | from config import BATCHNORM_MOMENTUM
  7 | 
  8 | class Bottleneck(nn.Module):
  9 |     expansion = 4
 10 | 
 11 |     def __init__(self, inplanes, planes, stride=1, downsample=None, relu_end=True):
 12 |         super(Bottleneck, self).__init__()
 13 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
 15 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 16 |                                padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
 18 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 19 |         self.bn3 = nn.BatchNorm2d(planes * 4, momentum=BATCHNORM_MOMENTUM)
 20 |         self.relu = nn.ReLU(inplace=True)
 21 |         self.downsample = downsample
 22 |         self.stride = stride
 23 |         self.relu_end = relu_end
 24 | 
 25 |     def forward(self, x):
 26 |         residual = x
 27 | 
 28 |         out = self.conv1(x)
 29 |         out = self.bn1(out)
 30 |         out = self.relu(out)
 31 | 
 32 |         out = self.conv2(out)
 33 |         out = self.bn2(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv3(out)
 37 |         out = self.bn3(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 | 
 44 |         if self.relu_end:
 45 |             out = self.relu(out)
 46 |         return out
 47 | 
 48 | 
 49 | class ResNet(nn.Module):
 50 | 
 51 |     def __init__(self, block, layers, num_classes=1000):
 52 |         self.inplanes = 64
 53 |         super(ResNet, self).__init__()
 54 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 55 |                                bias=False)
 56 |         self.bn1 = nn.BatchNorm2d(64, momentum=BATCHNORM_MOMENTUM)
 57 |         self.relu = nn.ReLU(inplace=True)
 58 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 59 |         self.layer1 = self._make_layer(block, 64, layers[0])
 60 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 61 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 62 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1)  # HACK
 63 |         self.avgpool = nn.AvgPool2d(7)
 64 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 65 | 
 66 |         for m in self.modules():
 67 |             if isinstance(m, nn.Conv2d):
 68 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 69 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 70 |             elif isinstance(m, nn.BatchNorm2d):
 71 |                 m.weight.data.fill_(1)
 72 |                 m.bias.data.zero_()
 73 | 
 74 |     def _make_layer(self, block, planes, blocks, stride=1):
 75 |         downsample = None
 76 |         if stride != 1 or self.inplanes != planes * block.expansion:
 77 |             downsample = nn.Sequential(
 78 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 79 |                           kernel_size=1, stride=stride, bias=False),
 80 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BATCHNORM_MOMENTUM),
 81 |             )
 82 | 
 83 |         layers = []
 84 |         layers.append(block(self.inplanes, planes, stride, downsample))
 85 |         self.inplanes = planes * block.expansion
 86 |         for i in range(1, blocks):
 87 |             layers.append(block(self.inplanes, planes))
 88 | 
 89 |         return nn.Sequential(*layers)
 90 | 
 91 |     def forward(self, x):
 92 |         x = self.conv1(x)
 93 |         x = self.bn1(x)
 94 |         x = self.relu(x)
 95 |         x = self.maxpool(x)
 96 | 
 97 |         x = self.layer1(x)
 98 |         x = self.layer2(x)
 99 |         x = self.layer3(x)
100 |         x = self.layer4(x)
101 | 
102 |         x = self.avgpool(x)
103 |         x = x.view(x.size(0), -1)
104 |         x = self.fc(x)
105 | 
106 |         return x
107 | 
108 | def resnet101(pretrained=False, **kwargs):
109 |     """Constructs a ResNet-101 model.
110 | 
111 |     Args:
112 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
113 |     """
114 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
115 |     if pretrained:
116 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
117 |     return model
118 | 
119 | def resnet_l123():
120 |     model = resnet101(pretrained=True)
121 |     del model.layer4
122 |     del model.avgpool
123 |     del model.fc
124 |     return model
125 | 
126 | def resnet_l4(relu_end=True):
127 |     model = resnet101(pretrained=True)
128 |     l4 = model.layer4
129 |     if not relu_end:
130 |         l4[-1].relu_end = False
131 |     l4[0].conv2.stride = (1, 1)
132 |     l4[0].downsample[0].stride = (1, 1)
133 |     return l4
134 | 
135 | def vgg_fc(relu_end=True, linear_end=True):
136 |     model = vgg16(pretrained=True)
137 |     vfc = model.classifier
138 |     del vfc._modules['6'] # Get rid of linear layer
139 |     del vfc._modules['5'] # Get rid of linear layer
140 |     if not relu_end:
141 |         del vfc._modules['4'] # Get rid of linear layer
142 |         if not linear_end:
143 |             del vfc._modules['3']
144 |     return vfc
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_postnms.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from .proposal_assignments_rel import _sel_rels
  9 | from lib.fpn.box_utils import bbox_overlaps
 10 | from lib.pytorch_misc import to_variable
 11 | import torch
 12 | 
 13 | 
 14 | @to_variable
 15 | def proposal_assignments_postnms(
 16 |         rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5,
 17 |         max_objs=100, max_rels=100, rand_val=0.01):
 18 |     """
 19 |     Assign object detection proposals to ground-truth targets. Produces proposal
 20 |     classification labels and bounding-box regression targets.
 21 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 22 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 23 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 24 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 25 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 26 |     :return:
 27 |         rois: [num_rois, 5]
 28 |         labels: [num_rois] array of labels
 29 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 30 |     """
 31 |     pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64)
 32 |     pred_boxes_np = rois[:, 1:].cpu().numpy()
 33 |     nms_inds_np = nms_inds.cpu().numpy()
 34 |     sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np)
 35 | 
 36 |     # split into chosen and suppressed
 37 |     chosen_inds_np = pred_inds_np[nms_inds_np]
 38 |     chosen_boxes_np = pred_boxes_np[nms_inds_np]
 39 | 
 40 |     suppre_inds_np = pred_inds_np[sup_inds_np]
 41 |     suppre_boxes_np = pred_boxes_np[sup_inds_np]
 42 | 
 43 |     gt_boxes_np = gt_boxes.cpu().numpy()
 44 |     gt_classes_np = gt_classes.cpu().numpy()
 45 |     gt_rels_np = gt_rels.cpu().numpy()
 46 | 
 47 |     gt_classes_np[:, 0] -= image_offset
 48 |     gt_rels_np[:, 0] -= image_offset
 49 | 
 50 |     num_im = gt_classes_np[:, 0].max()+1
 51 | 
 52 |     rois = []
 53 |     obj_labels = []
 54 |     rel_labels = []
 55 |     num_box_seen = 0
 56 | 
 57 |     for im_ind in range(num_im):
 58 |         chosen_ind = np.where(chosen_inds_np == im_ind)[0]
 59 |         suppre_ind = np.where(suppre_inds_np == im_ind)[0]
 60 | 
 61 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 62 |         gt_boxes_i = gt_boxes_np[gt_ind]
 63 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 64 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 65 | 
 66 |         # Get IOUs between chosen and GT boxes and if needed we'll add more in
 67 | 
 68 |         chosen_boxes_i = chosen_boxes_np[chosen_ind]
 69 |         suppre_boxes_i = suppre_boxes_np[suppre_ind]
 70 | 
 71 |         n_chosen = chosen_boxes_i.shape[0]
 72 |         n_suppre = suppre_boxes_i.shape[0]
 73 |         n_gt_box = gt_boxes_i.shape[0]
 74 | 
 75 |         # add a teensy bit of random noise because some GT boxes might be duplicated, etc.
 76 |         pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0)
 77 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*(
 78 |             np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5)
 79 | 
 80 |         # Let's say that a box can only be assigned ONCE for now because we've already done
 81 |         # the NMS and stuff.
 82 |         is_hit = ious > fg_thresh
 83 | 
 84 |         obj_assignments_i = is_hit.argmax(1)
 85 |         obj_assignments_i[~is_hit.any(1)] = -1
 86 | 
 87 |         vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True)
 88 |         obj_assignments_i[np.setdiff1d(
 89 |             np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1
 90 | 
 91 |         extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen
 92 | 
 93 |         # Add them in somewhere at random
 94 |         num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0])
 95 |         boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32)
 96 |         labels_i = np.zeros(num_inds_to_have, dtype=np.int64)
 97 | 
 98 |         inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False))
 99 |         inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms)
100 | 
101 |         boxes_i[inds_from_nms] = chosen_boxes_i
102 |         labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]]
103 | 
104 |         boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add]
105 |         labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]]
106 | 
107 |         # Now, we do the relationships. same as for rle
108 |         all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i),
109 |                                boxes_i,
110 |                                labels_i,
111 |                                gt_classes_i,
112 |                                gt_rels_i,
113 |                                fg_thresh=fg_thresh,
114 |                                fg_rels_per_image=100)
115 |         all_rels_i[:,0:2] += num_box_seen
116 | 
117 |         rois.append(np.column_stack((
118 |             im_ind * np.ones(boxes_i.shape[0], dtype=np.float32),
119 |             boxes_i,
120 |         )))
121 |         obj_labels.append(labels_i)
122 |         rel_labels.append(np.column_stack((
123 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
124 |             all_rels_i,
125 |         )))
126 |         num_box_seen += boxes_i.size
127 | 
128 |     rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True)
129 |     labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True)
130 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(),
131 |                                                                       async=True)
132 | 
133 |     return rois, labels, rel_labels
134 | 


--------------------------------------------------------------------------------
/lib/ggnn.py:
--------------------------------------------------------------------------------
  1 | # Well, this file contains modules of GGNN_obj and GGNN_rel
  2 | import os, sys
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import numpy as np
  7 | 
  8 | class GGNNObj(nn.Module):
  9 | 	def __init__(self, num_obj_cls=151, time_step_num=3, hidden_dim=512, output_dim=512, use_knowledge=True, prior_matrix=''):
 10 | 		super(GGNNObj, self).__init__()
 11 | 		self.num_obj_cls = num_obj_cls
 12 | 		self.time_step_num = time_step_num
 13 | 		self.output_dim = output_dim
 14 | 
 15 | 		if use_knowledge:
 16 | 			matrix_np = np.load(prior_matrix).astype(np.float32)
 17 | 		else:
 18 | 			matrix_np = np.ones((num_obj_cls, num_obj_cls)).astype(np.float32) / num_obj_cls
 19 | 
 20 | 		self.matrix = Variable(torch.from_numpy(matrix_np), requires_grad=False).cuda()
 21 | 		# if you want to use multi gpu to run this model, then you need to use the following line code to replace the last line code.
 22 | 		# And if you use this line code, the model will save prior matrix as parameters in saved models.
 23 | 		# self.matrix = nn.Parameter(torch.from_numpy(matrix_np), requires_grad=False)
 24 | 
 25 | 
 26 | 
 27 | 		# here we follow the paper "Gated graph sequence neural networks" to implement GGNN, so eq3 means equation 3 in this paper.
 28 | 		self.fc_eq3_w = nn.Linear(2*hidden_dim, hidden_dim)
 29 | 		self.fc_eq3_u = nn.Linear(hidden_dim, hidden_dim)
 30 | 		self.fc_eq4_w = nn.Linear(2*hidden_dim, hidden_dim)
 31 | 		self.fc_eq4_u = nn.Linear(hidden_dim, hidden_dim)
 32 | 		self.fc_eq5_w = nn.Linear(2*hidden_dim, hidden_dim)
 33 | 		self.fc_eq5_u = nn.Linear(hidden_dim, hidden_dim)
 34 | 
 35 | 		self.fc_output = nn.Linear(2*hidden_dim, output_dim)
 36 | 		self.ReLU = nn.ReLU(True)
 37 | 		self.fc_obj_cls = nn.Linear(self.num_obj_cls * output_dim, self.num_obj_cls)
 38 | 		
 39 | 
 40 | 	def forward(self, input_ggnn):
 41 | 		# propogation process
 42 | 		num_object = input_ggnn.size()[0]
 43 | 		hidden = input_ggnn.repeat(1, self.num_obj_cls).view(num_object, self.num_obj_cls, -1)
 44 | 		for t in range(self.time_step_num):
 45 | 			# eq(2)
 46 | 			# here we use some matrix operation skills
 47 | 			hidden_sum = torch.sum(hidden, 0)
 48 | 			av = torch.cat([torch.cat([self.matrix.transpose(0, 1) @ (hidden_sum - hidden_i) for hidden_i in hidden], 0),
 49 | 						   torch.cat([self.matrix @ (hidden_sum - hidden_i) for hidden_i in hidden], 0)], 1)
 50 | 
 51 | 			# eq(3)
 52 | 			hidden = hidden.view(num_object*self.num_obj_cls, -1)
 53 | 			zv = torch.sigmoid(self.fc_eq3_w(av) + self.fc_eq3_u(hidden))
 54 | 
 55 | 			# eq(4)
 56 | 			rv = torch.sigmoid(self.fc_eq4_w(av) + self.fc_eq3_u(hidden))
 57 | 
 58 | 			#eq(5)
 59 | 			hv = torch.tanh(self.fc_eq5_w(av) + self.fc_eq5_u(rv * hidden))
 60 | 
 61 | 			hidden = (1 - zv) * hidden + zv * hv
 62 | 			hidden = hidden.view(num_object, self.num_obj_cls, -1)
 63 | 
 64 | 
 65 | 		output = torch.cat((hidden.view(num_object*self.num_obj_cls, -1), 
 66 | 							input_ggnn.repeat(1, self.num_obj_cls).view(num_object*self.num_obj_cls, -1)), 1)
 67 | 		output = self.fc_output(output)
 68 | 		output = self.ReLU(output)
 69 | 		obj_dists = self.fc_obj_cls(output.view(-1, self.num_obj_cls * self.output_dim))
 70 | 		return obj_dists
 71 | 
 72 | 
 73 | 
 74 | class GGNNRel(nn.Module):
 75 | 	def __init__(self, num_rel_cls=51, time_step_num=3, hidden_dim=512, output_dim=512, use_knowledge=True, prior_matrix=''):
 76 | 		super(GGNNRel, self).__init__()
 77 | 		self.num_rel_cls = num_rel_cls
 78 | 		self.time_step_num = time_step_num
 79 | 		self.matrix = np.load(prior_matrix).astype(np.float32)
 80 | 		self.use_knowledge = use_knowledge
 81 | 
 82 | 		self.fc_eq3_w = nn.Linear(2 * hidden_dim, hidden_dim)
 83 | 		self.fc_eq3_u = nn.Linear(hidden_dim, hidden_dim)
 84 | 		self.fc_eq4_w = nn.Linear(2 * hidden_dim, hidden_dim)
 85 | 		self.fc_eq4_u = nn.Linear(hidden_dim, hidden_dim)
 86 | 		self.fc_eq5_w = nn.Linear(2 * hidden_dim, hidden_dim)
 87 | 		self.fc_eq5_u = nn.Linear(hidden_dim, hidden_dim)
 88 | 
 89 | 		self.fc_output = nn.Linear(2 * hidden_dim, output_dim)
 90 | 		self.ReLU = nn.ReLU(True)
 91 | 		self.fc_rel_cls = nn.Linear((self.num_rel_cls + 2) * output_dim, self.num_rel_cls)
 92 | 
 93 | 	def forward(self, rel_inds, sub_obj_preds, input_ggnn):
 94 | 		(input_rel_num, node_num, _) = input_ggnn.size()
 95 | 		assert input_rel_num == len(rel_inds)
 96 | 		batch_in_matrix_sub = np.zeros((input_rel_num, 2, self.num_rel_cls), dtype=np.float32)
 97 | 		
 98 | 		if self.use_knowledge: # construct adjacency matrix depending on the predicted labels of subject and object.
 99 | 			for index, rel in enumerate(rel_inds):
100 | 				batch_in_matrix_sub[index][0] = \
101 | 					self.matrix[sub_obj_preds[index, 0].cpu().data, sub_obj_preds[index, 1].cpu().data]
102 | 				batch_in_matrix_sub[index][1] = batch_in_matrix_sub[index][0]
103 | 		else:
104 | 			for index, rel in enumerate(rel_inds):
105 | 				batch_in_matrix_sub[index][0] = 1.0 / float(self.num_rel_cls)
106 | 				batch_in_matrix_sub[index][1] = batch_in_matrix_sub[index][0]
107 | 		batch_in_matrix_sub_gpu = Variable(torch.from_numpy(batch_in_matrix_sub), requires_grad=False).cuda()
108 | 		del batch_in_matrix_sub
109 | 
110 | 		hidden = input_ggnn
111 | 		for t in range(self.time_step_num):
112 | 			# eq(2)
113 | 			# becase in this case, A^(out) == A^(in), so we use function "repeat"
114 | 			# What is A^(out) and A^(in)? Please refer to paper "Gated graph sequence neural networks"
115 | 			av = torch.cat((torch.bmm(batch_in_matrix_sub_gpu, hidden[:, 2:]), 
116 | 							torch.bmm(batch_in_matrix_sub_gpu.transpose(1, 2), hidden[:, :2])), 1).repeat(1, 1, 2)
117 | 			av = av.view(input_rel_num * node_num, -1)
118 | 			flatten_hidden = hidden.view(input_rel_num * node_num, -1)
119 | 			# eq(3)
120 | 			zv = torch.sigmoid(self.fc_eq3_w(av) + self.fc_eq3_u(flatten_hidden))
121 | 			# eq(4)
122 | 			rv = torch.sigmoid(self.fc_eq4_w(av) + self.fc_eq3_u(flatten_hidden))
123 | 			#eq(5)
124 | 			hv = torch.tanh(self.fc_eq5_w(av) + self.fc_eq5_u(rv * flatten_hidden))
125 | 			flatten_hidden = (1 - zv) * flatten_hidden + zv * hv
126 | 			hidden = flatten_hidden.view(input_rel_num, node_num, -1)
127 | 
128 | 		output = torch.cat((flatten_hidden, input_ggnn.view(input_rel_num * node_num, -1)), 1)
129 | 		output = self.fc_output(output)
130 | 		output = self.ReLU(output)
131 | 
132 | 		rel_dists = self.fc_rel_cls(output.view(input_rel_num, -1))
133 | 		return rel_dists
134 | 
135 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Graph Bridging Network (GB-Net)
 2 | Code for the ECCV 2020 paper: [Bridging Knowledge Graphs to Generate Scene Graphs](https://arxiv.org/pdf/2001.02314.pdf)
 3 | ```
 4 | @InProceedings{Zareian_2020_ECCV,
 5 | author = {Zareian, Alireza and Karaman, Svebor and Chang, Shih-Fu},
 6 | title = {Bridging Knowledge Graphs to Generate Scene Graphs},
 7 | booktitle = {Proceedings of the European conference on computer vision (ECCV)},
 8 | month = {August},
 9 | year = {2020}
10 | }
11 | ```
12 | 
13 | Instructions to reproduce all numbers in table 1 and table 2 of our paper:
14 | 
15 | First, download and unpack Visual Genome images: [part 2](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) and [part 2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip)
16 | 
17 | Extract these two zip files and put the images in the same folder.
18 | 
19 | Then download VG metadata preprocessed by \[37\]: [annotations](http://svl.stanford.edu/projects/scene-graph/dataset/VG-SGG.h5), [class info](http://svl.stanford.edu/projects/scene-graph/dataset/VG-SGG-dicts.json),and [image metadata](http://svl.stanford.edu/projects/scene-graph/VG/image_data.json)
20 | 
21 | Copy those three files in a single folder
22 | 
23 | Then update `config.py` to with a path to the aforementioned data, as well as the absolute path to this directory.
24 | 
25 | Now download the pretrained faster r-cnn checkpoint trained by [42] from https://www.dropbox.com/s/cfyqhskypu7tp0q/vg-24.tar?dl=0 and place in `checkpoints/vgdet`
26 | 
27 | The next step is to configure a python environment and install pytorch. To do that, first make sure CUDA 9 is installed, and then download https://download.pytorch.org/whl/cu90/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl and pip install the downloaded `whl` file. Then install the rest of required packages by running `pip install -r requirements.txt`. This includes jupyter, as you need it to run the notebooks.
28 | 
29 | Finally, run the following to produce numbers for each table (In some cases order matters):
30 | ```
31 | Table 1, Column 8, Rows 17-24: train: ipynb/train_predcls/0045.ipynb, evaluate: ipynb/eval_predcls/0011.ipynb
32 | Table 1, Column 8, Rows 9-16: train: ipynb/train_sgcls/0051.ipynb, evaluate: ipynb/eval_sgcls/0015.ipynb
33 | Table 1, Column 8, Rows 1-8: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_sgdet/0027.ipynb
34 | 
35 | Table 1, Column 9, Rows 17-24: train: ipynb/train_predcls/0135.ipynb, evaluate: ipynb/eval_predcls/0025.ipynb
36 | Table 1, Column 9, Rows 9-16: train: ipynb/train_sgcls/0145.ipynb, evaluate: ipynb/eval_sgcls/0039.ipynb
37 | Table 1, Column 9, Rows 1-8: train: ipynb/train_predcls/0135.ipynb, evaluate: ipynb/eval_sgdet/0035.ipynb
38 | 
39 | Table 2, Row 1, Columns 6-9: train: ipynb/train_predcls/0140.ipynb, evaluate: ipynb/eval_predcls/0030.ipynb
40 | Table 2, Row 1, Columns 2-5: train: ipynb/train_predcls/0140.ipynb, evaluate: ipynb/eval_sgdet/0028.ipynb
41 | 
42 | Table 2, Row 2, Columns 6-9: train: ipynb/train_predcls/0134.ipynb, evaluate: ipynb/eval_predcls/0024.ipynb
43 | Table 2, Row 2, Columns 2-5: train: ipynb/train_predcls/0134.ipynb, evaluate: ipynb/eval_sgdet/0034.ipynb
44 | 
45 | Table 2, Row 3, Columns 6-9: train: ipynb/train_predcls/0136.ipynb, evaluate: ipynb/eval_predcls/0026.ipynb
46 | Table 2, Row 3, Columns 2-5: train: ipynb/train_predcls/0136.ipynb, evaluate: ipynb/eval_sgdet/0036.ipynb
47 | 
48 | Table 2, Row 4, Columns 6-9: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_predcls/0022.ipynb
49 | Table 2, Row 4, Columns 2-5: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_sgdet/0027.ipynb
50 | ```
51 | 
52 | Moreover, SGCls results for table 2, which is missing from the paper due to space constraint, can be produced by:
53 | ```
54 | Row 1: train: ipynb/train_predcls/0150.ipynb, evaluate: ipynb/eval_predcls/0041.ipynb
55 | Row 2: train: ipynb/train_predcls/0144.ipynb, evaluate: ipynb/eval_predcls/0038.ipynb
56 | Row 3: train: ipynb/train_predcls/0146.ipynb, evaluate: ipynb/eval_predcls/0040.ipynb
57 | Row 4: train: ipynb/train_predcls/0142.ipynb, evaluate: ipynb/eval_predcls/0037.ipynb
58 | ```
59 | 
60 | To skip training, you may download all our pretrained checkpoints from [here](https://www.dropbox.com/sh/r62mzgsg1f81776/AAAQKzPD8qJrBYeYzNHJ0p5Xa?dl=0) and place in the `checkpoints/` folder. Then you only need to run notebooks in `ipynb/eval_...`
61 | 
62 | If GPU is not available, to skip deploying the model altogether, you may download our pre-computed model outputs from [here](https://www.dropbox.com/sh/rbnkcnfh0bmw08m/AACVBegZ14YGG9XwcsmJFxFua?dl=0) and place in the `caches/` folder. Then if you run any notebook in `ipynb/eval_...`, it automatically uses the cached results and does not deploy the model. Note that there is no need to run the cell that creates the model (`detector = ...`) as well as the next one that transfers it to cuda (`detector.cuda()`) and the next one that loads the checkpoint (`ckpt = ...`). Only run the rest of the cells.
63 | 
64 | Finally, to avoid running the code, you may just open the notebooks in `ipynb/eval_...` and scroll down to see the evaluation results.
65 | 
66 | Note if you get cuda-related errors, it might be due to the cuda compatibility options that were used to compile this library. In that case, you need to change the compatibility in `lib/fpn/nms/src/cuda/Makefile` and `lib/fpn/roi_align/src/cuda/Makefile` and rebuild both by running make clean and then make in both directories. 
67 | Also note that pytorch 0.3.0 only has pre-built binaries for up to cuda 9. In order to run this with cuda 10 and newer GPUs, you need to build pytorch from source.
68 | 
69 | Acknowledgement: This repository is based on our references [\[1\]](https://github.com/yuweihao/KERN) and [\[42\]](https://github.com/rowanz/neural-motifs)
70 | 
71 | [1] Chen, Tianshui, et al. "Knowledge-Embedded Routing Network for Scene Graph Generation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019.
72 | 
73 | [37] Xu, Danfei, et al. "Scene graph generation by iterative message passing." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017.
74 | 
75 | [42] Zellers, Rowan, et al. "Neural motifs: Scene graph parsing with global context." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018.
76 | 
77 | Created and maintained by [Alireza Zareian](https://www.linkedin.com/in/az2407/) at [DVMM](http://www.ee.columbia.edu/ln/dvmm/) - Columbia University.
78 | 


--------------------------------------------------------------------------------
/lib/my_ggnn_17.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # From my_ggnn_16: no knowledge
  3 | ##################################################################
  4 | 
  5 | import os, sys
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable
 10 | import numpy as np
 11 | import pickle
 12 | from lib.my_util import MLP
 13 | 
 14 | def wrap(nparr):
 15 |     return Variable(torch.from_numpy(nparr).float().cuda(), requires_grad=False)
 16 | 
 17 | def arange(num):
 18 |     return torch.arange(num).type(torch.LongTensor).cuda()
 19 | 
 20 | def normalize(tensor, dim, eps=1e-4):
 21 |     return tensor / torch.sqrt(torch.max((tensor**2).sum(dim=dim, keepdim=True), wrap(np.asarray([eps]))))
 22 | 
 23 | class GGNN(nn.Module):
 24 |     def __init__(self, emb_path, graph_path, time_step_num=3, hidden_dim=512, output_dim=512, 
 25 |                  use_embedding=True, use_knowledge=True, refine_obj_cls=False, top_k_to_keep=5, normalize_messages=True):
 26 |         super(GGNN, self).__init__()
 27 |         self.time_step_num = time_step_num
 28 |                 
 29 |         self.fc_mp_send_img_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 30 |         self.fc_mp_send_img_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 31 |         
 32 |         self.fc_mp_receive_img_ent = MLP([2 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True)
 33 |         self.fc_mp_receive_img_pred = MLP([2 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True)
 34 |         
 35 |         self.fc_eq3_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 36 |         self.fc_eq3_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 37 |         self.fc_eq4_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 38 |         self.fc_eq4_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 39 |         self.fc_eq5_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 40 |         self.fc_eq5_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 41 | 
 42 |         self.fc_eq3_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 43 |         self.fc_eq3_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 44 |         self.fc_eq4_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 45 |         self.fc_eq4_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 46 |         self.fc_eq5_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 47 |         self.fc_eq5_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 48 | 
 49 |         self.fc_output_proj_img_pred = MLP([hidden_dim, hidden_dim, 51], act_fn='ReLU', last_act=False)
 50 |         
 51 |         self.refine_obj_cls = refine_obj_cls
 52 |         if self.refine_obj_cls:
 53 |             self.fc_output_proj_img_ent = MLP([hidden_dim, hidden_dim, 151], act_fn='ReLU', last_act=False)
 54 |         
 55 |         self.debug_info = {}
 56 |         self.top_k_to_keep = top_k_to_keep
 57 |         self.normalize_messages = normalize_messages
 58 |         
 59 |     def forward(self, rel_inds, ent_cls_logits, obj_fmaps, vr):
 60 |         num_img_ent = ent_cls_logits.size(0)
 61 |         num_img_pred = rel_inds.size(0)
 62 |         num_ont_ent = 151
 63 |         num_ont_pred = 51
 64 | 
 65 |         nodes_img_ent = obj_fmaps
 66 |         nodes_img_pred = vr
 67 |         
 68 |         edges_img_pred2subj = wrap(np.zeros((num_img_pred, num_img_ent)))
 69 |         edges_img_pred2subj[arange(num_img_pred), rel_inds[:, 0]] = 1
 70 |         edges_img_pred2obj = wrap(np.zeros((num_img_pred, num_img_ent)))
 71 |         edges_img_pred2obj[arange(num_img_pred), rel_inds[:, 1]] = 1
 72 |         edges_img_subj2pred = edges_img_pred2subj.t()
 73 |         edges_img_obj2pred = edges_img_pred2obj.t()
 74 | 
 75 |         edges_img_pred2subj = edges_img_pred2subj / torch.max(edges_img_pred2subj.sum(dim=0, keepdim=True), wrap(np.asarray([1.0])))
 76 |         edges_img_pred2obj = edges_img_pred2obj / torch.max(edges_img_pred2obj.sum(dim=0, keepdim=True), wrap(np.asarray([1.0])))
 77 |         
 78 |         for t in range(self.time_step_num):
 79 |             message_send_img_ent = self.fc_mp_send_img_ent(nodes_img_ent)
 80 |             message_send_img_pred = self.fc_mp_send_img_pred(nodes_img_pred)
 81 |             
 82 |             message_incoming_img_ent = torch.stack([
 83 |                 torch.mm(edges_img_pred2subj.t(), message_send_img_pred),
 84 |                 torch.mm(edges_img_pred2obj.t(), message_send_img_pred),
 85 |             ], 1)
 86 |             
 87 |             message_incoming_img_pred = torch.stack([
 88 |                 torch.mm(edges_img_subj2pred.t(), message_send_img_ent),
 89 |                 torch.mm(edges_img_obj2pred.t(), message_send_img_ent),
 90 |             ], 1)
 91 |                         
 92 |             if self.normalize_messages:
 93 |                 message_incoming_img_ent = normalize(message_incoming_img_ent, 2)
 94 |                 message_incoming_img_pred = normalize(message_incoming_img_pred, 2)
 95 |             
 96 |             message_received_img_ent = self.fc_mp_receive_img_ent(message_incoming_img_ent.view(num_img_ent, -1))            
 97 |             message_received_img_pred = self.fc_mp_receive_img_pred(message_incoming_img_pred.view(num_img_pred, -1))
 98 | 
 99 |             z_img_ent = torch.sigmoid(self.fc_eq3_w_img_ent(message_received_img_ent) + self.fc_eq3_u_img_ent(nodes_img_ent))
100 |             r_img_ent = torch.sigmoid(self.fc_eq4_w_img_ent(message_received_img_ent) + self.fc_eq4_u_img_ent(nodes_img_ent))
101 |             h_img_ent = torch.tanh(self.fc_eq5_w_img_ent(message_received_img_ent) + self.fc_eq5_u_img_ent(r_img_ent * nodes_img_ent))
102 |             nodes_img_ent_new = (1 - z_img_ent) * nodes_img_ent + z_img_ent * h_img_ent
103 | 
104 |             z_img_pred = torch.sigmoid(self.fc_eq3_w_img_pred(message_received_img_pred) + self.fc_eq3_u_img_pred(nodes_img_pred))
105 |             r_img_pred = torch.sigmoid(self.fc_eq4_w_img_pred(message_received_img_pred) + self.fc_eq4_u_img_pred(nodes_img_pred))
106 |             h_img_pred = torch.tanh(self.fc_eq5_w_img_pred(message_received_img_pred) + self.fc_eq5_u_img_pred(r_img_pred * nodes_img_pred))
107 |             nodes_img_pred_new = (1 - z_img_pred) * nodes_img_pred + z_img_pred * h_img_pred
108 | 
109 |             nodes_img_ent = nodes_img_ent_new
110 |             nodes_img_pred = nodes_img_pred_new
111 |             
112 |             
113 |         pred_cls_logits = self.fc_output_proj_img_pred(nodes_img_pred)
114 | 
115 |         if self.refine_obj_cls:
116 |             ent_cls_logits = self.fc_output_proj_img_ent(nodes_img_ent)
117 |                 
118 |         return pred_cls_logits, ent_cls_logits
119 | 
120 | 


--------------------------------------------------------------------------------
/lib/fpn/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from torch.nn import functional as F
  4 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps as bbox_overlaps_np
  5 | from lib.fpn.box_intersections_cpu.bbox import bbox_intersections as bbox_intersections_np
  6 | 
  7 | 
  8 | def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1):
  9 |     """
 10 |     Computes the loss for predicting the GT boxes from prior boxes
 11 |     :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2)
 12 |     :param deltas: [num_boxes, 4]    (tx, ty, th, tw)
 13 |     :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2)
 14 |     :return:
 15 |     """
 16 |     prior_centers = center_size(prior_boxes) #(cx, cy, w, h)
 17 |     gt_centers = center_size(gt_boxes) #(cx, cy, w, h)
 18 | 
 19 |     center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:]
 20 |     size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:])
 21 |     all_targets = torch.cat((center_targets, size_targets), 1)
 22 | 
 23 |     loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0))
 24 | 
 25 |     return loss
 26 | 
 27 | 
 28 | def bbox_preds(boxes, deltas):
 29 |     """
 30 |     Converts "deltas" (predicted by the network) along with prior boxes
 31 |     into (x1, y1, x2, y2) representation.
 32 |     :param boxes: Prior boxes, represented as (x1, y1, x2, y2)
 33 |     :param deltas: Offsets (tx, ty, tw, th)
 34 |     :param box_strides [num_boxes,] distance apart between boxes. anchor box can't go more than
 35 |        \pm box_strides/2 from its current position. If None then we'll use the widths
 36 |        and heights
 37 |     :return: Transformed boxes
 38 |     """
 39 | 
 40 |     if boxes.size(0) == 0:
 41 |         return boxes
 42 |     prior_centers = center_size(boxes)
 43 | 
 44 |     xys = prior_centers[:, :2] + prior_centers[:, 2:] * deltas[:, :2]
 45 | 
 46 |     whs = torch.exp(deltas[:, 2:]) * prior_centers[:, 2:]
 47 | 
 48 |     return point_form(torch.cat((xys, whs), 1))
 49 | 
 50 | 
 51 | def center_size(boxes):
 52 |     """ Convert prior_boxes to (cx, cy, w, h)
 53 |     representation for comparison to center-size form ground truth data.
 54 |     Args:
 55 |         boxes: (tensor) point_form boxes
 56 |     Return:
 57 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 58 |     """
 59 |     wh = boxes[:, 2:] - boxes[:, :2] + 1.0
 60 | 
 61 |     if isinstance(boxes, np.ndarray):
 62 |         return np.column_stack((boxes[:, :2] + 0.5 * wh, wh))
 63 |     return torch.cat((boxes[:, :2] + 0.5 * wh, wh), 1)
 64 | 
 65 | 
 66 | def point_form(boxes):
 67 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
 68 |     representation for comparison to point form ground truth data.
 69 |     Args:
 70 |         boxes: (tensor) center-size default boxes from priorbox layers.
 71 |     Return:
 72 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 73 |     """
 74 |     if isinstance(boxes, np.ndarray):
 75 |         return np.column_stack((boxes[:, :2] - 0.5 * boxes[:, 2:],
 76 |                                 boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)))
 77 |     return torch.cat((boxes[:, :2] - 0.5 * boxes[:, 2:],
 78 |                       boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)), 1)  # xmax, ymax
 79 | 
 80 | 
 81 | ###########################################################################
 82 | ### Torch Utils, creds to Max de Groot
 83 | ###########################################################################
 84 | 
 85 | def bbox_intersections(box_a, box_b):
 86 |     """ We resize both tensors to [A,B,2] without new malloc:
 87 |     [A,2] -> [A,1,2] -> [A,B,2]
 88 |     [B,2] -> [1,B,2] -> [A,B,2]
 89 |     Then we compute the area of intersect between box_a and box_b.
 90 |     Args:
 91 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 92 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 93 |     Return:
 94 |       (tensor) intersection area, Shape: [A,B].
 95 |     """
 96 |     if isinstance(box_a, np.ndarray):
 97 |         assert isinstance(box_b, np.ndarray)
 98 |         return bbox_intersections_np(box_a, box_b)
 99 |     A = box_a.size(0)
100 |     B = box_b.size(0)
101 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
102 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
103 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
104 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
105 |     inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
106 |     return inter[:, :, 0] * inter[:, :, 1]
107 | 
108 | 
109 | def bbox_overlaps(box_a, box_b):
110 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
111 |     is simply the intersection over union of two boxes.  Here we operate on
112 |     ground truth boxes and default boxes.
113 |     E.g.:
114 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
115 |     Args:
116 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
117 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
118 |     Return:
119 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
120 |     """
121 |     if isinstance(box_a, np.ndarray):
122 |         assert isinstance(box_b, np.ndarray)
123 |         return bbox_overlaps_np(box_a, box_b)
124 | 
125 |     inter = bbox_intersections(box_a, box_b)
126 |     area_a = ((box_a[:, 2] - box_a[:, 0] + 1.0) *
127 |               (box_a[:, 3] - box_a[:, 1] + 1.0)).unsqueeze(1).expand_as(inter)  # [A,B]
128 |     area_b = ((box_b[:, 2] - box_b[:, 0] + 1.0) *
129 |               (box_b[:, 3] - box_b[:, 1] + 1.0)).unsqueeze(0).expand_as(inter)  # [A,B]
130 |     union = area_a + area_b - inter
131 |     return inter / union  # [A,B]
132 | 
133 | 
134 | def nms_overlaps(boxes):
135 |     """ get overlaps for each channel"""
136 |     assert boxes.dim() == 3
137 |     N = boxes.size(0)
138 |     nc = boxes.size(1)
139 |     max_xy = torch.min(boxes[:, None, :, 2:].expand(N, N, nc, 2),
140 |                        boxes[None, :, :, 2:].expand(N, N, nc, 2))
141 | 
142 |     min_xy = torch.max(boxes[:, None, :, :2].expand(N, N, nc, 2),
143 |                        boxes[None, :, :, :2].expand(N, N, nc, 2))
144 | 
145 |     inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
146 | 
147 |     # n, n, 151
148 |     inters = inter[:,:,:,0]*inter[:,:,:,1]
149 |     boxes_flat = boxes.view(-1, 4)
150 |     areas_flat = (boxes_flat[:,2]- boxes_flat[:,0]+1.0)*(
151 |         boxes_flat[:,3]- boxes_flat[:,1]+1.0)
152 |     areas = areas_flat.view(boxes.size(0), boxes.size(1))
153 |     union = -inters + areas[None] + areas[:, None]
154 |     return inters / union
155 | 
156 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/rel_assignments.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE
  9 | from lib.fpn.box_utils import bbox_overlaps
 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds
 11 | from collections import defaultdict
 12 | import torch
 13 | 
 14 | @to_variable
 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset,
 16 |                     fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True):
 17 |     """
 18 |     Assign object detection proposals to ground-truth targets. Produces proposal
 19 |     classification labels and bounding-box regression targets.
 20 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 21 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 22 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 23 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 24 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 25 |     :return:
 26 |         rois: [num_rois, 5]
 27 |         labels: [num_rois] array of labels
 28 |         bbox_targets [num_rois, 4] array of targets for the labels.
 29 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 30 |     """
 31 |     fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64))
 32 | 
 33 |     pred_inds_np = im_inds.cpu().numpy()
 34 |     pred_boxes_np = rpn_rois.cpu().numpy()
 35 |     pred_boxlabels_np = roi_gtlabels.cpu().numpy()
 36 |     gt_boxes_np = gt_boxes.cpu().numpy()
 37 |     gt_classes_np = gt_classes.cpu().numpy()
 38 |     gt_rels_np = gt_rels.cpu().numpy()
 39 | 
 40 |     gt_classes_np[:, 0] -= image_offset
 41 |     gt_rels_np[:, 0] -= image_offset
 42 | 
 43 |     num_im = gt_classes_np[:, 0].max()+1
 44 | 
 45 |     # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
 46 |     #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
 47 |     # ))
 48 | 
 49 |     rel_labels = []
 50 |     num_box_seen = 0
 51 |     for im_ind in range(num_im):
 52 |         pred_ind = np.where(pred_inds_np == im_ind)[0]
 53 | 
 54 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 55 |         gt_boxes_i = gt_boxes_np[gt_ind]
 56 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 57 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 58 | 
 59 |         # [num_pred, num_gt]
 60 |         pred_boxes_i = pred_boxes_np[pred_ind]
 61 |         pred_boxlabels_i = pred_boxlabels_np[pred_ind]
 62 | 
 63 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
 64 |         is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh)
 65 | 
 66 |         # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
 67 |         pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
 68 |         if filter_non_overlap:
 69 |             rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
 70 |             rels_intersect = rel_possibilities
 71 |         else:
 72 |             rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
 73 |                                         dtype=np.int64) - np.eye(pred_boxes_i.shape[0],
 74 |                                                                  dtype=np.int64)
 75 |             rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)
 76 | 
 77 |         # ONLY select relations between ground truth because otherwise we get useless data
 78 |         rel_possibilities[pred_boxlabels_i == 0] = 0
 79 |         rel_possibilities[:, pred_boxlabels_i == 0] = 0
 80 | 
 81 |         # Sample the GT relationships.
 82 |         fg_rels = []
 83 |         p_size = []
 84 |         for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
 85 |             fg_rels_i = []
 86 |             fg_scores_i = []
 87 | 
 88 |             for from_ind in np.where(is_match[:, from_gtind])[0]:
 89 |                 for to_ind in np.where(is_match[:, to_gtind])[0]:
 90 |                     if from_ind != to_ind:
 91 |                         fg_rels_i.append((from_ind, to_ind, rel_id))
 92 |                         fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
 93 |                         rel_possibilities[from_ind, to_ind] = 0
 94 |             if len(fg_rels_i) == 0:
 95 |                 continue
 96 |             p = np.array(fg_scores_i)
 97 |             p = p / p.sum()
 98 |             p_size.append(p.shape[0])
 99 |             num_to_add = min(p.shape[0], num_sample_per_gt)
100 |             for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
101 |                 fg_rels.append(fg_rels_i[rel_to_add])
102 | 
103 |         fg_rels = np.array(fg_rels, dtype=np.int64)
104 |         if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
105 |             fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
106 |         elif fg_rels.size == 0:
107 |             fg_rels = np.zeros((0, 3), dtype=np.int64)
108 | 
109 |         bg_rels = np.column_stack(np.where(rel_possibilities))
110 |         bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))
111 | 
112 |         num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0])
113 |         if bg_rels.size > 0:
114 |             # Sample 4x as many intersecting relationships as non-intersecting.
115 |             # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
116 |             # p = bg_rels_intersect.astype(np.float32)
117 |             # p[bg_rels_intersect == 0] = 0.2
118 |             # p[bg_rels_intersect == 1] = 0.8
119 |             # p /= p.sum()
120 |             bg_rels = bg_rels[
121 |                 np.random.choice(bg_rels.shape[0],
122 |                                  #p=p,
123 |                                  size=num_bg_rel, replace=False)]
124 |         else:
125 |             bg_rels = np.zeros((0, 3), dtype=np.int64)
126 | 
127 |         if fg_rels.size == 0 and bg_rels.size == 0:
128 |             # Just put something here
129 |             bg_rels = np.array([[0, 0, 0]], dtype=np.int64)
130 | 
131 |         # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
132 |         all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
133 |         all_rels_i[:,0:2] += num_box_seen
134 | 
135 |         all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))]
136 | 
137 |         rel_labels.append(np.column_stack((
138 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
139 |             all_rels_i,
140 |         )))
141 | 
142 |         num_box_seen += pred_boxes_i.shape[0]
143 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(),
144 |                                                                       async=True)
145 |     return rel_labels
146 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/.ipynb_checkpoints/rel_assignments-checkpoint.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE
  9 | from lib.fpn.box_utils import bbox_overlaps
 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds
 11 | from collections import defaultdict
 12 | import torch
 13 | 
 14 | @to_variable
 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset,
 16 |                     fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True):
 17 |     """
 18 |     Assign object detection proposals to ground-truth targets. Produces proposal
 19 |     classification labels and bounding-box regression targets.
 20 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 21 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 22 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 23 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 24 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 25 |     :return:
 26 |         rois: [num_rois, 5]
 27 |         labels: [num_rois] array of labels
 28 |         bbox_targets [num_rois, 4] array of targets for the labels.
 29 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 30 |     """
 31 |     fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64))
 32 | 
 33 |     pred_inds_np = im_inds.cpu().numpy()
 34 |     pred_boxes_np = rpn_rois.cpu().numpy()
 35 |     pred_boxlabels_np = roi_gtlabels.cpu().numpy()
 36 |     gt_boxes_np = gt_boxes.cpu().numpy()
 37 |     gt_classes_np = gt_classes.cpu().numpy()
 38 |     gt_rels_np = gt_rels.cpu().numpy()
 39 | 
 40 |     gt_classes_np[:, 0] -= image_offset
 41 |     gt_rels_np[:, 0] -= image_offset
 42 | 
 43 |     num_im = gt_classes_np[:, 0].max()+1
 44 | 
 45 |     # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
 46 |     #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
 47 |     # ))
 48 | 
 49 |     rel_labels = []
 50 |     num_box_seen = 0
 51 |     for im_ind in range(num_im):
 52 |         pred_ind = np.where(pred_inds_np == im_ind)[0]
 53 | 
 54 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 55 |         gt_boxes_i = gt_boxes_np[gt_ind]
 56 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 57 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 58 | 
 59 |         # [num_pred, num_gt]
 60 |         pred_boxes_i = pred_boxes_np[pred_ind]
 61 |         pred_boxlabels_i = pred_boxlabels_np[pred_ind]
 62 | 
 63 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
 64 |         is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh)
 65 | 
 66 |         # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
 67 |         pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
 68 |         if filter_non_overlap:
 69 |             rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
 70 |             rels_intersect = rel_possibilities
 71 |         else:
 72 |             rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
 73 |                                         dtype=np.int64) - np.eye(pred_boxes_i.shape[0],
 74 |                                                                  dtype=np.int64)
 75 |             rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)
 76 | 
 77 |         # ONLY select relations between ground truth because otherwise we get useless data
 78 |         rel_possibilities[pred_boxlabels_i == 0] = 0
 79 |         rel_possibilities[:, pred_boxlabels_i == 0] = 0
 80 | 
 81 |         # Sample the GT relationships.
 82 |         fg_rels = []
 83 |         p_size = []
 84 |         for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
 85 |             fg_rels_i = []
 86 |             fg_scores_i = []
 87 | 
 88 |             for from_ind in np.where(is_match[:, from_gtind])[0]:
 89 |                 for to_ind in np.where(is_match[:, to_gtind])[0]:
 90 |                     if from_ind != to_ind:
 91 |                         fg_rels_i.append((from_ind, to_ind, rel_id))
 92 |                         fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
 93 |                         rel_possibilities[from_ind, to_ind] = 0
 94 |             if len(fg_rels_i) == 0:
 95 |                 continue
 96 |             p = np.array(fg_scores_i)
 97 |             p = p / p.sum()
 98 |             p_size.append(p.shape[0])
 99 |             num_to_add = min(p.shape[0], num_sample_per_gt)
100 |             for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
101 |                 fg_rels.append(fg_rels_i[rel_to_add])
102 | 
103 |         fg_rels = np.array(fg_rels, dtype=np.int64)
104 |         if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
105 |             fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
106 |         elif fg_rels.size == 0:
107 |             fg_rels = np.zeros((0, 3), dtype=np.int64)
108 | 
109 |         bg_rels = np.column_stack(np.where(rel_possibilities))
110 |         bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))
111 | 
112 |         num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0])
113 |         if bg_rels.size > 0:
114 |             # Sample 4x as many intersecting relationships as non-intersecting.
115 |             # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
116 |             # p = bg_rels_intersect.astype(np.float32)
117 |             # p[bg_rels_intersect == 0] = 0.2
118 |             # p[bg_rels_intersect == 1] = 0.8
119 |             # p /= p.sum()
120 |             bg_rels = bg_rels[
121 |                 np.random.choice(bg_rels.shape[0],
122 |                                  #p=p,
123 |                                  size=num_bg_rel, replace=False)]
124 |         else:
125 |             bg_rels = np.zeros((0, 3), dtype=np.int64)
126 | 
127 |         if fg_rels.size == 0 and bg_rels.size == 0:
128 |             # Just put something here
129 |             bg_rels = np.array([[0, 0, 0]], dtype=np.int64)
130 | 
131 |         # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
132 |         all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
133 |         all_rels_i[:,0:2] += num_box_seen
134 | 
135 |         all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))]
136 | 
137 |         rel_labels.append(np.column_stack((
138 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
139 |             all_rels_i,
140 |         )))
141 | 
142 |         num_box_seen += pred_boxes_i.shape[0]
143 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(),
144 |                                                                       async=True)
145 |     return rel_labels
146 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr,
 16 |          int num_boxes, int batch, int image_height, int image_width, int crop_height,
 17 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr) {
 18 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
 19 |         // (n, c, ph, pw) is an element in the aligned output
 20 |         int idx = out_idx;
 21 |         const int x = idx % crop_width;
 22 |         idx /= crop_width;
 23 |         const int y = idx % crop_height;
 24 |         idx /= crop_height;
 25 |         const int d = idx % depth;
 26 |         const int b = idx / depth;
 27 | 
 28 |         const int b_in = int(boxes_ptr[b*5]);
 29 |         const float x1 = boxes_ptr[b * 5 + 1];
 30 |         const float y1 = boxes_ptr[b * 5 + 2];
 31 |         const float x2 = boxes_ptr[b * 5 + 3];
 32 |         const float y2 = boxes_ptr[b * 5 + 4];
 33 |         if (b_in < 0 || b_in >= batch) {
 34 |             continue;
 35 |         }
 36 | 
 37 |         const float height_scale =
 38 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 39 |                               : 0;
 40 |         const float width_scale =
 41 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
 42 | 
 43 |         const float in_y = (crop_height > 1)
 44 |                                ? y1 * (image_height - 1) + y * height_scale
 45 |                                : 0.5 * (y1 + y2) * (image_height - 1);
 46 |         if (in_y < 0 || in_y > image_height - 1) {
 47 |             crops_ptr[out_idx] = extrapolation_value;
 48 |             continue;
 49 |         }
 50 | 
 51 |         const float in_x = (crop_width > 1)
 52 |                                ? x1 * (image_width - 1) + x * width_scale
 53 |                                : 0.5 * (x1 + x2) * (image_width - 1);
 54 |         if (in_x < 0 || in_x > image_width - 1) {
 55 |           crops_ptr[out_idx] = extrapolation_value;
 56 |           continue;
 57 |         }
 58 | 
 59 |         const int top_y_index = floorf(in_y);
 60 |         const int bottom_y_index = ceilf(in_y);
 61 |         const float y_lerp = in_y - top_y_index;
 62 | 
 63 |         const int left_x_index = floorf(in_x);
 64 |         const int right_x_index = ceilf(in_x);
 65 |         const float x_lerp = in_x - left_x_index;
 66 | 
 67 |         const float top_left = image_ptr[((b_in*depth + d) * image_height
 68 |             + top_y_index) * image_width + left_x_index];
 69 |         const float top_right = image_ptr[((b_in*depth + d) * image_height
 70 |             + top_y_index) * image_width + right_x_index];
 71 |         const float bottom_left = image_ptr[((b_in*depth + d) * image_height
 72 |             + bottom_y_index) * image_width + left_x_index];
 73 |         const float bottom_right = image_ptr[((b_in*depth + d) * image_height
 74 |             + bottom_y_index) * image_width + right_x_index];
 75 | 
 76 |         const float top = top_left + (top_right - top_left) * x_lerp;
 77 |         const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
 78 |         crops_ptr[out_idx] = top + (bottom - top) * y_lerp;
 79 |         }
 80 |     }
 81 | 
 82 |     int ROIAlignForwardLaucher(const float* image_ptr, const float* boxes_ptr,
 83 |          int num_boxes,  int batch, int image_height, int image_width, int crop_height,
 84 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream) {
 85 | 
 86 |         const int kThreadsPerBlock = 1024;
 87 |         const int output_size = num_boxes * crop_height * crop_width * depth;
 88 |         cudaError_t err;
 89 | 
 90 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>
 91 |         (output_size, image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
 92 |          crop_height, crop_width, depth, extrapolation_value, crops_ptr);
 93 | 
 94 |         err = cudaGetLastError();
 95 |         if(cudaSuccess != err) {
 96 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 97 |             exit( -1 );
 98 |         }
 99 | 
100 |         return 1;
101 |     }
102 | 
103 | __global__ void ROIAlignBackward(
104 |     const int nthreads, const float* grads_ptr, const float* boxes_ptr,
105 |     int num_boxes, int batch, int image_height,
106 |     int image_width, int crop_height, int crop_width, int depth,
107 |     float* grads_image_ptr) {
108 |   CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
109 | 
110 |     // out_idx = d + depth * (w + crop_width * (h + crop_height * b))
111 |     int idx = out_idx;
112 |     const int x = idx % crop_width;
113 |     idx /= crop_width;
114 |     const int y = idx % crop_height;
115 |     idx /= crop_height;
116 |     const int d = idx % depth;
117 |     const int b = idx / depth;
118 | 
119 |     const int b_in = boxes_ptr[b * 5];
120 |     const float x1 = boxes_ptr[b * 5 + 1];
121 |     const float y1 = boxes_ptr[b * 5 + 2];
122 |     const float x2 = boxes_ptr[b * 5 + 3];
123 |     const float y2 = boxes_ptr[b * 5 + 4];
124 |     if (b_in < 0 || b_in >= batch) {
125 |       continue;
126 |     }
127 | 
128 |     const float height_scale =
129 |         (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
130 |                           : 0;
131 |     const float width_scale =
132 |         (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
133 | 
134 |     const float in_y = (crop_height > 1)
135 |                            ? y1 * (image_height - 1) + y * height_scale
136 |                            : 0.5 * (y1 + y2) * (image_height - 1);
137 |     if (in_y < 0 || in_y > image_height - 1) {
138 |       continue;
139 |     }
140 | 
141 |     const float in_x = (crop_width > 1)
142 |                            ? x1 * (image_width - 1) + x * width_scale
143 |                            : 0.5 * (x1 + x2) * (image_width - 1);
144 |     if (in_x < 0 || in_x > image_width - 1) {
145 |       continue;
146 |     }
147 | 
148 |     const int top_y_index = floorf(in_y);
149 |     const int bottom_y_index = ceilf(in_y);
150 |     const float y_lerp = in_y - top_y_index;
151 | 
152 |     const int left_x_index = floorf(in_x);
153 |     const int right_x_index = ceilf(in_x);
154 |     const float x_lerp = in_x - left_x_index;
155 | 
156 |     const float dtop = (1 - y_lerp) * grads_ptr[out_idx];
157 |     atomicAdd(
158 |         grads_image_ptr + ((b_in*depth + d)*image_height + top_y_index) * image_width + left_x_index,
159 |         (1 - x_lerp) * dtop);
160 |     atomicAdd(grads_image_ptr +
161 |                       ((b_in * depth + d)*image_height+top_y_index)*image_width + right_x_index,
162 |                        x_lerp * dtop);
163 | 
164 |     const float dbottom = y_lerp * grads_ptr[out_idx];
165 |     atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+left_x_index,
166 |         (1 - x_lerp) * dbottom);
167 |     atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+right_x_index,
168 |         x_lerp * dbottom);
169 |   }
170 | }
171 | 
172 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes,
173 |     int batch, int image_height, int image_width, int crop_height, int crop_width, int depth,
174 |     float* grads_image_ptr, cudaStream_t stream) {
175 |         const int kThreadsPerBlock = 1024;
176 |         const int output_size = num_boxes * crop_height * crop_width * depth;
177 |         cudaError_t err;
178 | 
179 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>
180 |         (output_size, grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, crop_height,
181 |         crop_width, depth, grads_image_ptr);
182 | 
183 |         err = cudaGetLastError();
184 |         if(cudaSuccess != err) {
185 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
186 |             exit( -1 );
187 |         }
188 | 
189 |         return 1;
190 |     }
191 | 
192 | 
193 | #ifdef __cplusplus
194 | }
195 | #endif
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/lib/evaluation/sg_eval_slow.py:
--------------------------------------------------------------------------------
  1 | # JUST TO CHECK THAT IT IS EXACTLY THE SAME..................................
  2 | import numpy as np
  3 | from config import MODES
  4 | 
  5 | class BasicSceneGraphEvaluator:
  6 | 
  7 |     def __init__(self, mode):
  8 |         self.result_dict = {}
  9 |         self.mode = {'sgdet':'sg_det', 'sgcls':'sg_cls', 'predcls':'pred_cls'}[mode]
 10 | 
 11 |         self.result_dict = {}
 12 |         self.result_dict[self.mode + '_recall'] = {20:[], 50:[], 100:[]}
 13 | 
 14 | 
 15 |     @classmethod
 16 |     def all_modes(cls):
 17 |         evaluators = {m: cls(mode=m) for m in MODES}
 18 |         return evaluators
 19 |     def evaluate_scene_graph_entry(self, gt_entry, pred_entry, iou_thresh=0.5):
 20 | 
 21 |         roidb_entry = {
 22 |             'max_overlaps': np.ones(gt_entry['gt_classes'].shape[0], dtype=np.int64),
 23 |             'boxes': gt_entry['gt_boxes'],
 24 |             'gt_relations': gt_entry['gt_relations'],
 25 |             'gt_classes': gt_entry['gt_classes'],
 26 |         }
 27 |         sg_entry = {
 28 |             'boxes': pred_entry['pred_boxes'],
 29 |             'relations': pred_entry['pred_rels'],
 30 |             'obj_scores': pred_entry['obj_scores'],
 31 |             'rel_scores': pred_entry['rel_scores'],
 32 |             'pred_classes': pred_entry['pred_classes'],
 33 |         }
 34 | 
 35 |         pred_triplets, triplet_boxes = \
 36 |             eval_relation_recall(sg_entry, roidb_entry,
 37 |                                 self.result_dict,
 38 |                                 self.mode,
 39 |                                 iou_thresh=iou_thresh)
 40 |         return pred_triplets, triplet_boxes
 41 | 
 42 | 
 43 |     def save(self, fn):
 44 |         np.save(fn, self.result_dict)
 45 | 
 46 | 
 47 |     def print_stats(self):
 48 |         print('======================' + self.mode + '============================')
 49 |         for k, v in self.result_dict[self.mode + '_recall'].items():
 50 |             print('R@%i: %f' % (k, np.mean(v)))
 51 | 
 52 |     def save(self, fn):
 53 |         np.save(fn, self.result_dict)
 54 | 
 55 |     def print_stats(self):
 56 |         print('======================' + self.mode + '============================')
 57 |         for k, v in self.result_dict[self.mode + '_recall'].items():
 58 |             print('R@%i: %f' % (k, np.mean(v)))
 59 | 
 60 | 
 61 | def eval_relation_recall(sg_entry,
 62 |                          roidb_entry,
 63 |                          result_dict,
 64 |                          mode,
 65 |                          iou_thresh):
 66 | 
 67 |     # gt
 68 |     gt_inds = np.where(roidb_entry['max_overlaps'] == 1)[0]
 69 |     gt_boxes = roidb_entry['boxes'][gt_inds].copy().astype(float)
 70 |     num_gt_boxes = gt_boxes.shape[0]
 71 |     gt_relations = roidb_entry['gt_relations'].copy()
 72 |     gt_classes = roidb_entry['gt_classes'].copy()
 73 | 
 74 |     num_gt_relations = gt_relations.shape[0]
 75 |     if num_gt_relations == 0:
 76 |         return (None, None)
 77 |     gt_class_scores = np.ones(num_gt_boxes)
 78 |     gt_predicate_scores = np.ones(num_gt_relations)
 79 |     gt_triplets, gt_triplet_boxes, _ = _triplet(gt_relations[:,2],
 80 |                                              gt_relations[:,:2],
 81 |                                              gt_classes,
 82 |                                              gt_boxes,
 83 |                                              gt_predicate_scores,
 84 |                                              gt_class_scores)
 85 | 
 86 |     # pred
 87 |     box_preds = sg_entry['boxes']
 88 |     num_boxes = box_preds.shape[0]
 89 |     relations = sg_entry['relations']
 90 |     classes = sg_entry['pred_classes'].copy()
 91 |     class_scores = sg_entry['obj_scores'].copy()
 92 | 
 93 |     num_relations = relations.shape[0]
 94 | 
 95 |     if mode =='pred_cls':
 96 |         # if predicate classification task
 97 |         # use ground truth bounding boxes
 98 |         assert(num_boxes == num_gt_boxes)
 99 |         classes = gt_classes
100 |         class_scores = gt_class_scores
101 |         boxes = gt_boxes
102 |     elif mode =='sg_cls':
103 |         assert(num_boxes == num_gt_boxes)
104 |         # if scene graph classification task
105 |         # use gt boxes, but predicted classes
106 |         # classes = np.argmax(class_preds, 1)
107 |         # class_scores = class_preds.max(axis=1)
108 |         boxes = gt_boxes
109 |     elif mode =='sg_det':
110 |         # if scene graph detection task
111 |         # use preicted boxes and predicted classes
112 |         # classes = np.argmax(class_preds, 1)
113 |         # class_scores = class_preds.max(axis=1)
114 |         boxes = box_preds
115 |     else:
116 |         raise NotImplementedError('Incorrect Mode! %s' % mode)
117 | 
118 |     pred_triplets = np.column_stack((
119 |         classes[relations[:, 0]],
120 |         relations[:,2],
121 |         classes[relations[:, 1]],
122 |     ))
123 |     pred_triplet_boxes = np.column_stack((
124 |         boxes[relations[:, 0]],
125 |         boxes[relations[:, 1]],
126 |     ))
127 |     relation_scores = np.column_stack((
128 |         class_scores[relations[:, 0]],
129 |         sg_entry['rel_scores'],
130 |         class_scores[relations[:, 1]],
131 |     )).prod(1)
132 | 
133 |     sorted_inds = np.argsort(relation_scores)[::-1]
134 |     # compue recall
135 |     for k in result_dict[mode + '_recall']:
136 |         this_k = min(k, num_relations)
137 |         keep_inds = sorted_inds[:this_k]
138 |         recall = _relation_recall(gt_triplets,
139 |                                   pred_triplets[keep_inds,:],
140 |                                   gt_triplet_boxes,
141 |                                   pred_triplet_boxes[keep_inds,:],
142 |                                   iou_thresh)
143 |         result_dict[mode + '_recall'][k].append(recall)
144 | 
145 |     # for visualization
146 |     return pred_triplets[sorted_inds, :], pred_triplet_boxes[sorted_inds, :]
147 | 
148 | 
149 | def _triplet(predicates, relations, classes, boxes,
150 |              predicate_scores, class_scores):
151 | 
152 |     # format predictions into triplets
153 |     assert(predicates.shape[0] == relations.shape[0])
154 |     num_relations = relations.shape[0]
155 |     triplets = np.zeros([num_relations, 3]).astype(np.int32)
156 |     triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32)
157 |     triplet_scores = np.zeros([num_relations]).astype(np.float32)
158 |     for i in range(num_relations):
159 |         triplets[i, 1] = predicates[i]
160 |         sub_i, obj_i = relations[i,:2]
161 |         triplets[i, 0] = classes[sub_i]
162 |         triplets[i, 2] = classes[obj_i]
163 |         triplet_boxes[i, :4] = boxes[sub_i, :]
164 |         triplet_boxes[i, 4:] = boxes[obj_i, :]
165 |         # compute triplet score
166 |         score =  class_scores[sub_i]
167 |         score *= class_scores[obj_i]
168 |         score *= predicate_scores[i]
169 |         triplet_scores[i] = score
170 |     return triplets, triplet_boxes, triplet_scores
171 | 
172 | 
173 | def _relation_recall(gt_triplets, pred_triplets,
174 |                      gt_boxes, pred_boxes, iou_thresh):
175 | 
176 |     # compute the R@K metric for a set of predicted triplets
177 | 
178 |     num_gt = gt_triplets.shape[0]
179 |     num_correct_pred_gt = 0
180 | 
181 |     for gt, gt_box in zip(gt_triplets, gt_boxes):
182 |         keep = np.zeros(pred_triplets.shape[0]).astype(bool)
183 |         for i, pred in enumerate(pred_triplets):
184 |             if gt[0] == pred[0] and gt[1] == pred[1] and gt[2] == pred[2]:
185 |                 keep[i] = True
186 |         if not np.any(keep):
187 |             continue
188 |         boxes = pred_boxes[keep,:]
189 |         sub_iou = iou(gt_box[:4], boxes[:,:4])
190 |         obj_iou = iou(gt_box[4:], boxes[:,4:])
191 |         inds = np.intersect1d(np.where(sub_iou >= iou_thresh)[0],
192 |                               np.where(obj_iou >= iou_thresh)[0])
193 |         if inds.size > 0:
194 |             num_correct_pred_gt += 1
195 |     return float(num_correct_pred_gt) / float(num_gt)
196 | 
197 | 
198 | def iou(gt_box, pred_boxes):
199 |     # computer Intersection-over-Union between two sets of boxes
200 |     ixmin = np.maximum(gt_box[0], pred_boxes[:,0])
201 |     iymin = np.maximum(gt_box[1], pred_boxes[:,1])
202 |     ixmax = np.minimum(gt_box[2], pred_boxes[:,2])
203 |     iymax = np.minimum(gt_box[3], pred_boxes[:,3])
204 |     iw = np.maximum(ixmax - ixmin + 1., 0.)
205 |     ih = np.maximum(iymax - iymin + 1., 0.)
206 |     inters = iw * ih
207 | 
208 |     # union
209 |     uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) +
210 |             (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) *
211 |             (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters)
212 | 
213 |     overlaps = inters / uni
214 |     return overlaps
215 | 


--------------------------------------------------------------------------------
/dataloaders/blob.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Data blob, hopefully to make collating less painful and MGPU training possible
  3 | """
  4 | from lib.fpn.anchor_targets import anchor_target_layer
  5 | import numpy as np
  6 | import torch
  7 | from torch.autograd import Variable
  8 | 
  9 | 
 10 | class Blob(object):
 11 |     def __init__(self, mode='det', is_train=False, num_gpus=1, primary_gpu=0, batch_size_per_gpu=3):
 12 |         """
 13 |         Initializes an empty Blob object.
 14 |         :param mode: 'det' for detection and 'rel' for det+relationship
 15 |         :param is_train: True if it's training
 16 |         """
 17 |         assert mode in ('det', 'rel')
 18 |         assert num_gpus >= 1
 19 |         self.mode = mode
 20 |         self.is_train = is_train
 21 |         self.num_gpus = num_gpus
 22 |         self.batch_size_per_gpu = batch_size_per_gpu
 23 |         self.primary_gpu = primary_gpu
 24 | 
 25 |         self.imgs = []  # [num_images, 3, IM_SCALE, IM_SCALE] array
 26 |         self.im_sizes = []  # [num_images, 4] array of (h, w, scale, num_valid_anchors)
 27 |         self.all_anchor_inds = []  # [all_anchors, 2] array of (img_ind, anchor_idx). Only has valid
 28 |         # boxes (meaning some are gonna get cut out)
 29 |         self.all_anchors = []  # [num_im, IM_SCALE/4, IM_SCALE/4, num_anchors, 4] shapes. Anchors outside get squashed
 30 |                                # to 0
 31 |         self.gt_boxes = []  # [num_gt, 4] boxes
 32 |         self.gt_classes = []  # [num_gt,2] array of img_ind, class
 33 |         self.gt_rels = []  # [num_rels, 3]. Each row is (gtbox0, gtbox1, rel).
 34 | 
 35 |         self.gt_sents = []
 36 |         self.gt_nodes = []
 37 |         self.sent_lengths = []
 38 | 
 39 |         self.train_anchor_labels = []  # [train_anchors, 5] array of (img_ind, h, w, A, labels)
 40 |         self.train_anchors = []  # [train_anchors, 8] shapes with anchor, target
 41 | 
 42 |         self.train_anchor_inds = None  # This will be split into GPUs, just (img_ind, h, w, A).
 43 | 
 44 |         self.batch_size = None
 45 |         self.gt_box_chunks = None
 46 |         self.anchor_chunks = None
 47 |         self.train_chunks = None
 48 |         self.proposal_chunks = None
 49 |         self.proposals = []
 50 | 
 51 |     @property
 52 |     def is_flickr(self):
 53 |         return self.mode == 'flickr'
 54 | 
 55 |     @property
 56 |     def is_rel(self):
 57 |         return self.mode == 'rel'
 58 | 
 59 |     @property
 60 |     def volatile(self):
 61 |         return not self.is_train
 62 | 
 63 |     def append(self, d):
 64 |         """
 65 |         Adds a single image to the blob
 66 |         :param datom:
 67 |         :return:
 68 |         """
 69 |         i = len(self.imgs)
 70 |         self.imgs.append(d['img'])
 71 | 
 72 |         h, w, scale = d['img_size']
 73 | 
 74 |         # all anchors
 75 |         self.im_sizes.append((h, w, scale))
 76 | 
 77 |         gt_boxes_ = d['gt_boxes'].astype(np.float32) * d['scale']
 78 |         self.gt_boxes.append(gt_boxes_)
 79 | 
 80 |         self.gt_classes.append(np.column_stack((
 81 |             i * np.ones(d['gt_classes'].shape[0], dtype=np.int64),
 82 |             d['gt_classes'],
 83 |         )))
 84 | 
 85 |         # Add relationship info
 86 |         if self.is_rel:
 87 |             self.gt_rels.append(np.column_stack((
 88 |                 i * np.ones(d['gt_relations'].shape[0], dtype=np.int64),
 89 |                 d['gt_relations'])))
 90 | 
 91 |         # Augment with anchor targets
 92 |         if self.is_train:
 93 |             train_anchors_, train_anchor_inds_, train_anchor_targets_, train_anchor_labels_ = \
 94 |                 anchor_target_layer(gt_boxes_, (h, w))
 95 | 
 96 |             self.train_anchors.append(np.hstack((train_anchors_, train_anchor_targets_)))
 97 | 
 98 |             self.train_anchor_labels.append(np.column_stack((
 99 |                 i * np.ones(train_anchor_inds_.shape[0], dtype=np.int64),
100 |                 train_anchor_inds_,
101 |                 train_anchor_labels_,
102 |             )))
103 | 
104 |         if 'proposals' in d:
105 |             self.proposals.append(np.column_stack((i * np.ones(d['proposals'].shape[0], dtype=np.float32),
106 |                                                    d['scale'] * d['proposals'].astype(np.float32))))
107 | 
108 | 
109 | 
110 |     def _chunkize(self, datom, tensor=torch.LongTensor):
111 |         """
112 |         Turn data list into chunks, one per GPU
113 |         :param datom: List of lists of numpy arrays that will be concatenated.
114 |         :return:
115 |         """
116 |         chunk_sizes = [0] * self.num_gpus
117 |         for i in range(self.num_gpus):
118 |             for j in range(self.batch_size_per_gpu):
119 |                 chunk_sizes[i] += datom[i * self.batch_size_per_gpu + j].shape[0]
120 |         return Variable(tensor(np.concatenate(datom, 0)), volatile=self.volatile), chunk_sizes
121 | 
122 |     def reduce(self):
123 |         """ Merges all the detections into flat lists + numbers of how many are in each"""
124 |         if len(self.imgs) != self.batch_size_per_gpu * self.num_gpus:
125 |             raise ValueError("Wrong batch size? imgs len {} bsize/gpu {} numgpus {}".format(
126 |                 len(self.imgs), self.batch_size_per_gpu, self.num_gpus
127 |             ))
128 | 
129 |         self.imgs = Variable(torch.stack(self.imgs, 0), volatile=self.volatile)
130 |         self.im_sizes = np.stack(self.im_sizes).reshape(
131 |             (self.num_gpus, self.batch_size_per_gpu, 3))
132 | 
133 |         if self.is_rel:
134 |             self.gt_rels, self.gt_rel_chunks = self._chunkize(self.gt_rels)
135 | 
136 |         self.gt_boxes, self.gt_box_chunks = self._chunkize(self.gt_boxes, tensor=torch.FloatTensor)
137 |         self.gt_classes, _ = self._chunkize(self.gt_classes)
138 |         if self.is_train:
139 |             self.train_anchor_labels, self.train_chunks = self._chunkize(self.train_anchor_labels)
140 |             self.train_anchors, _ = self._chunkize(self.train_anchors, tensor=torch.FloatTensor)
141 |             self.train_anchor_inds = self.train_anchor_labels[:, :-1].contiguous()
142 | 
143 |         if len(self.proposals) != 0:
144 |             self.proposals, self.proposal_chunks = self._chunkize(self.proposals, tensor=torch.FloatTensor)
145 | 
146 | 
147 | 
148 |     def _scatter(self, x, chunk_sizes, dim=0):
149 |         """ Helper function"""
150 |         if self.num_gpus == 1:
151 |             return x.cuda(self.primary_gpu, async=True)
152 |         return torch.nn.parallel.scatter_gather.Scatter.apply(
153 |             list(range(self.num_gpus)), chunk_sizes, dim, x)
154 | 
155 |     def scatter(self):
156 |         """ Assigns everything to the GPUs"""
157 |         self.imgs = self._scatter(self.imgs, [self.batch_size_per_gpu] * self.num_gpus)
158 | 
159 |         self.gt_classes_primary = self.gt_classes.cuda(self.primary_gpu, async=True)
160 |         self.gt_boxes_primary = self.gt_boxes.cuda(self.primary_gpu, async=True)
161 | 
162 |         # Predcls might need these
163 |         self.gt_classes = self._scatter(self.gt_classes, self.gt_box_chunks)
164 |         self.gt_boxes = self._scatter(self.gt_boxes, self.gt_box_chunks)
165 | 
166 |         if self.is_train:
167 | 
168 |             self.train_anchor_inds = self._scatter(self.train_anchor_inds,
169 |                                                    self.train_chunks)
170 |             self.train_anchor_labels = self.train_anchor_labels.cuda(self.primary_gpu, async=True)
171 |             self.train_anchors = self.train_anchors.cuda(self.primary_gpu, async=True)
172 | 
173 |             if self.is_rel:
174 |                 self.gt_rels = self._scatter(self.gt_rels, self.gt_rel_chunks)
175 |         else:
176 |             if self.is_rel:
177 |                 self.gt_rels = self.gt_rels.cuda(self.primary_gpu, async=True)
178 | 
179 |         if self.proposal_chunks is not None:
180 |             self.proposals = self._scatter(self.proposals, self.proposal_chunks)
181 | 
182 |     def __getitem__(self, index):
183 |         """
184 |         Returns a tuple containing data
185 |         :param index: Which GPU we're on, or 0 if no GPUs
186 |         :return: If training:
187 |         (image, im_size, img_start_ind, anchor_inds, anchors, gt_boxes, gt_classes, 
188 |         train_anchor_inds)
189 |         test:
190 |         (image, im_size, img_start_ind, anchor_inds, anchors)
191 |         """
192 |         if index not in list(range(self.num_gpus)):
193 |             raise ValueError("Out of bounds with index {} and {} gpus".format(index, self.num_gpus))
194 | 
195 |         if self.is_rel:
196 |             rels = self.gt_rels
197 |             if index > 0 or self.num_gpus != 1:
198 |                 rels_i = rels[index] if self.is_rel else None
199 |         elif self.is_flickr:
200 |             rels = (self.gt_sents, self.gt_nodes)
201 |             if index > 0 or self.num_gpus != 1:
202 |                 rels_i = (self.gt_sents[index], self.gt_nodes[index])
203 |         else:
204 |             rels = None
205 |             rels_i = None
206 | 
207 |         if self.proposal_chunks is None:
208 |             proposals = None
209 |         else:
210 |             proposals = self.proposals
211 | 
212 |         if index == 0 and self.num_gpus == 1:
213 |             image_offset = 0
214 |             if self.is_train:
215 |                 return (self.imgs, self.im_sizes[0], image_offset,
216 |                         self.gt_boxes, self.gt_classes, rels, proposals, self.train_anchor_inds)
217 |             return self.imgs, self.im_sizes[0], image_offset, self.gt_boxes, self.gt_classes, rels, proposals
218 | 
219 |         # Otherwise proposals is None
220 |         assert proposals is None
221 | 
222 |         image_offset = self.batch_size_per_gpu * index
223 |         # TODO: Return a namedtuple
224 |         if self.is_train:
225 |             return (
226 |             self.imgs[index], self.im_sizes[index], image_offset,
227 |             self.gt_boxes[index], self.gt_classes[index], rels_i, None, self.train_anchor_inds[index])
228 |         return (self.imgs[index], self.im_sizes[index], image_offset,
229 |                 self.gt_boxes[index], self.gt_classes[index], rels_i, None)
230 | 
231 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration file!
  3 | """
  4 | import os
  5 | from argparse import ArgumentParser
  6 | import numpy as np
  7 | 
  8 | ROOT_PATH = '/path/to/GBNet-Supp'
  9 | 
 10 | def path(fn):
 11 |     return os.path.join(ROOT_PATH, fn)
 12 | 
 13 | def stanford_path(fn):
 14 |     return os.path.join('/path/to/stanford/preprocessed/metadata', fn)
 15 | 
 16 | # =============================================================================
 17 | # Update these with where your data is stored ~~~~~~~~~~~~~~~~~~~~~~~~~
 18 | 
 19 | VG_IMAGES = '/path/to/visual/genome/images'
 20 | RCNN_CHECKPOINT_FN = path('checkpoints/vgdet/vg-24.tar')
 21 | 
 22 | IM_DATA_FN = stanford_path('image_data.json')
 23 | VG_SGG_FN = stanford_path('VG-SGG.h5')
 24 | VG_SGG_DICT_FN = stanford_path('VG-SGG-dicts.json')
 25 | PROPOSAL_FN = stanford_path('proposals.h5')
 26 | 
 27 | # =============================================================================
 28 | # =============================================================================
 29 | 
 30 | 
 31 | MODES = ('sgdet', 'sgcls', 'predcls')
 32 | 
 33 | BOX_SCALE = 1024  # Scale at which we have the boxes
 34 | IM_SCALE = 592      # Our images will be resized to this res without padding
 35 | 
 36 | # Proposal assignments
 37 | BG_THRESH_HI = 0.5
 38 | BG_THRESH_LO = 0.0
 39 | 
 40 | RPN_POSITIVE_OVERLAP = 0.7
 41 | # IOU < thresh: negative example
 42 | RPN_NEGATIVE_OVERLAP = 0.3
 43 | 
 44 | # Max number of foreground examples
 45 | RPN_FG_FRACTION = 0.5
 46 | FG_FRACTION = 0.25
 47 | # Total number of examples
 48 | RPN_BATCHSIZE = 256
 49 | ROIS_PER_IMG = 256
 50 | REL_FG_FRACTION = 0.25
 51 | RELS_PER_IMG = 256
 52 | 
 53 | RELS_PER_IMG_REFINE = 64
 54 | 
 55 | BATCHNORM_MOMENTUM = 0.01
 56 | ANCHOR_SIZE = 16
 57 | 
 58 | ANCHOR_RATIOS = (0.23232838, 0.63365731, 1.28478321, 3.15089189) #(0.5, 1, 2)
 59 | ANCHOR_SCALES = (2.22152954, 4.12315647, 7.21692515, 12.60263013, 22.7102731) #(4, 8, 16, 32)
 60 | 
 61 | class ModelConfig(object):
 62 |     """Wrapper class for model hyperparameters."""
 63 |     def __init__(self, args_str=None):
 64 |         """
 65 |         Defaults
 66 |         """
 67 |         self.ckpt = None
 68 |         self.save_dir = None
 69 |         self.lr = None
 70 |         self.batch_size = None
 71 |         self.val_size = None
 72 |         self.l2 = None
 73 |         self.adamwd = None
 74 |         self.clip = None
 75 |         self.num_gpus = None
 76 |         self.num_workers = None
 77 |         self.print_interval = None
 78 |         self.mode = None
 79 |         self.test = False
 80 |         self.adam = False
 81 |         self.cache = None
 82 |         self.use_proposals=False
 83 |         self.use_resnet=False
 84 |         self.num_epochs=None
 85 |         self.pooling_dim = None
 86 | 
 87 |         self.use_ggnn_obj = False
 88 |         self.ggnn_obj_time_step_num = None
 89 |         self.ggnn_obj_hidden_dim = None
 90 |         self.ggnn_obj_output_dim = None
 91 |         self.use_obj_knowledge = False
 92 |         self.obj_knowledge = None
 93 | 
 94 |         self.use_ggnn_rel = False
 95 |         self.ggnn_rel_time_step_num = None
 96 |         self.ggnn_rel_hidden_dim = None
 97 |         self.ggnn_rel_output_dim = None
 98 |         self.use_rel_knowledge = False
 99 |         self.rel_knowledge = None
100 | 
101 |         self.tb_log_dir = None
102 |         self.save_rel_recall = None
103 | 
104 |         self.parser = self.setup_parser()
105 |         if args_str is None:
106 |             self.args = vars(self.parser.parse_args())
107 |         else:
108 |             self.args = vars(self.parser.parse_args(args_str.split()))
109 | 
110 |         print("~~~~~~~~ Hyperparameters used: ~~~~~~~")
111 |         for x, y in self.args.items():
112 |             print("{} : {}".format(x, y))
113 | 
114 |         self.__dict__.update(self.args)
115 | 
116 |         if len(self.ckpt) != 0:
117 |             self.ckpt = os.path.join(ROOT_PATH, self.ckpt)
118 |         else:
119 |             self.ckpt = None
120 | 
121 |         if len(self.cache) != 0:
122 |             if len(self.cache.split('/')) > 1:
123 |                 file_len = len(self.cache.split('/')[-1])
124 |                 cache_dir = self.cache[:-file_len]
125 |                 cache_dir = os.path.join(ROOT_PATH, cache_dir)
126 |                 if not os.path.exists(cache_dir):
127 |                     os.mkdir(cache_dir)
128 |             self.cache = os.path.join(ROOT_PATH, self.cache)
129 |         else:
130 |             self.cache = None
131 | 
132 |         if len(self.save_dir) == 0:
133 |             self.save_dir = None
134 |         else:
135 |             self.save_dir = os.path.join(ROOT_PATH, self.save_dir)
136 |             if not os.path.exists(self.save_dir):
137 |                 os.makedirs(self.save_dir)
138 | 
139 |         if len(self.tb_log_dir) != 0:
140 |             self.tb_log_dir = os.path.join(ROOT_PATH, self.tb_log_dir)
141 |             if not os.path.exists(self.tb_log_dir):
142 |                 os.makedirs(self.tb_log_dir) # help make multi depth directories, such as summaries/kern_predcls
143 |         else:
144 |             self.tb_log_dir = None
145 | 
146 |         if len(self.save_rel_recall) != 0:
147 |             if len(self.save_rel_recall.split('/')) > 1:
148 |                 file_len = len(self.save_rel_recall.split('/')[-1])
149 |                 save_rel_recall_dir = self.save_rel_recall[:-file_len]
150 |                 save_rel_recall_dir = os.path.join(ROOT_PATH, save_rel_recall_dir)
151 |                 if not os.path.exists(save_rel_recall_dir):
152 |                     os.mkdir(save_rel_recall_dir)
153 |             self.save_rel_recall = os.path.join(ROOT_PATH, self.save_rel_recall)
154 |         else:
155 |             self.save_rel_recall = None
156 |         
157 | 
158 |         assert self.val_size >= 0
159 | 
160 |         if self.mode not in MODES:
161 |             raise ValueError("Invalid mode: mode must be in {}".format(MODES))
162 | 
163 | 
164 |         if self.ckpt is not None and not os.path.exists(self.ckpt):
165 |             raise ValueError("Ckpt file ({}) doesnt exist".format(self.ckpt))
166 | 
167 |     def setup_parser(self):
168 |         """
169 |         Sets up an argument parser
170 |         :return:
171 |         """
172 |         parser = ArgumentParser(description='training code')
173 | 
174 | 
175 |         parser.add_argument('-ckpt', dest='ckpt', help='Filename to load from', type=str, default='')
176 |         parser.add_argument('-save_dir', dest='save_dir',
177 |                             help='Directory to save things to, such as checkpoints/save', default='', type=str)
178 | 
179 |         parser.add_argument('-ngpu', dest='num_gpus', help='cuantos GPUs tienes', type=int, default=1)
180 |         parser.add_argument('-nwork', dest='num_workers', help='num processes to use as workers', type=int, default=1)
181 | 
182 |         parser.add_argument('-lr', dest='lr', help='learning rate', type=float, default=1e-3)
183 | 
184 |         parser.add_argument('-b', dest='batch_size', help='batch size per GPU',type=int, default=2)
185 |         parser.add_argument('-val_size', dest='val_size', help='val size to use (if 0 we wont use val)', type=int, default=5000)
186 | 
187 |         parser.add_argument('-l2', dest='l2', help='weight decay of SGD', type=float, default=1e-4)
188 |         parser.add_argument('-adamwd', dest='adamwd', help='weight decay of adam', type=float, default=0.0)
189 | 
190 |         parser.add_argument('-clip', dest='clip', help='gradients will be clipped to have norm less than this', type=float, default=5.0)
191 |         parser.add_argument('-p', dest='print_interval', help='print during training', type=int,
192 |                             default=100)
193 |         parser.add_argument('-m', dest='mode', help='mode in {sgdet, sgcls, predcls}', type=str, default='sgdet')
194 | 
195 | 
196 |         parser.add_argument('-cache', dest='cache', help='where should we cache predictions', type=str,
197 |                             default='')
198 | 
199 |         parser.add_argument('-adam', dest='adam', help='use adam', action='store_true')
200 |         parser.add_argument('-test', dest='test', help='test set', action='store_true')
201 | 
202 |         parser.add_argument('-nepoch', dest='num_epochs', help='Number of epochs to train the model for',type=int, default=50)
203 |         parser.add_argument('-resnet', dest='use_resnet', help='use resnet instead of VGG', action='store_true')
204 |         parser.add_argument('-proposals', dest='use_proposals', help='Use Xu et als proposals', action='store_true')
205 |         parser.add_argument('-pooling_dim', dest='pooling_dim', help='Dimension of pooling', type=int, default=4096)
206 | 
207 | 
208 |         parser.add_argument('-use_ggnn_obj', dest='use_ggnn_obj', help='use GGNN_obj module', action='store_true')
209 |         parser.add_argument('-ggnn_obj_time_step_num', dest='ggnn_obj_time_step_num', help='time step number of GGNN_obj', type=int, default=3)
210 |         parser.add_argument('-ggnn_obj_hidden_dim', dest='ggnn_obj_hidden_dim', help='node hidden state dimension of GGNN_obj', type=int, default=512)
211 |         parser.add_argument('-ggnn_obj_output_dim', dest='ggnn_obj_output_dim', help='node output feature dimension of GGNN_obj', type=int, default=512)
212 |         parser.add_argument('-use_obj_knowledge', dest='use_obj_knowledge', help='use object cooccurrence knowledge', action='store_true')
213 |         parser.add_argument('-obj_knowledge', dest='obj_knowledge', help='Filename to load matrix of object cooccurrence knowledge', type=str, default='')
214 | 
215 | 
216 |         parser.add_argument('-use_ggnn_rel', dest='use_ggnn_rel', help='use GGNN_rel module', action='store_true')
217 |         parser.add_argument('-ggnn_rel_time_step_num', dest='ggnn_rel_time_step_num', help='time step number of GGNN_rel', type=int, default=3)
218 |         parser.add_argument('-ggnn_rel_hidden_dim', dest='ggnn_rel_hidden_dim', help='node hidden state dimension of GGNN_rel', type=int, default=512)
219 |         parser.add_argument('-ggnn_rel_output_dim', dest='ggnn_rel_output_dim', help='node output feature dimension of GGNN_rel', type=int, default=512)
220 |         parser.add_argument('-use_rel_knowledge', dest='use_rel_knowledge', help='use cooccurrence knowledge of object pairs and relationships', action='store_true')
221 |         parser.add_argument('-rel_knowledge', dest='rel_knowledge', help='Filename to load matrix of cooccurrence knowledge of object pairs and relationships', type=str, default='')
222 | 
223 | 
224 |         parser.add_argument('-tb_log_dir', dest='tb_log_dir', help='dir to save tensorboard summaries', type=str, default='')
225 |         parser.add_argument('-save_rel_recall', dest='save_rel_recall', help='dir to save relationship results', type=str, default='')
226 | 
227 |         return parser
228 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_rel.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION_REL, ROIS_PER_IMG_REL, REL_FG_FRACTION, \
  9 |     RELS_PER_IMG
 10 | from lib.fpn.box_utils import bbox_overlaps
 11 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds
 12 | from collections import defaultdict
 13 | import torch
 14 | 
 15 | 
 16 | @to_variable
 17 | def proposal_assignments_rel(rpn_rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5):
 18 |     """
 19 |     Assign object detection proposals to ground-truth targets. Produces proposal
 20 |     classification labels and bounding-box regression targets.
 21 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 22 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 23 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 24 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 25 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 26 |     :return:
 27 |         rois: [num_rois, 5]
 28 |         labels: [num_rois] array of labels
 29 |         bbox_targets [num_rois, 4] array of targets for the labels.
 30 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 31 |     """
 32 |     fg_rois_per_image = int(np.round(ROIS_PER_IMG_REL * FG_FRACTION_REL))
 33 |     fg_rels_per_image = int(np.round(REL_FG_FRACTION * RELS_PER_IMG))
 34 | 
 35 |     pred_inds_np = rpn_rois[:, 0].cpu().numpy().astype(np.int64)
 36 |     pred_boxes_np = rpn_rois[:, 1:].cpu().numpy()
 37 |     gt_boxes_np = gt_boxes.cpu().numpy()
 38 |     gt_classes_np = gt_classes.cpu().numpy()
 39 |     gt_rels_np = gt_rels.cpu().numpy()
 40 | 
 41 |     gt_classes_np[:, 0] -= image_offset
 42 |     gt_rels_np[:, 0] -= image_offset
 43 | 
 44 |     num_im = gt_classes_np[:, 0].max()+1
 45 | 
 46 |     rois = []
 47 |     obj_labels = []
 48 |     rel_labels = []
 49 |     bbox_targets = []
 50 | 
 51 |     num_box_seen = 0
 52 | 
 53 |     for im_ind in range(num_im):
 54 |         pred_ind = np.where(pred_inds_np == im_ind)[0]
 55 | 
 56 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 57 |         gt_boxes_i = gt_boxes_np[gt_ind]
 58 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 59 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 60 | 
 61 |         pred_boxes_i = np.concatenate((pred_boxes_np[pred_ind], gt_boxes_i), 0)
 62 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
 63 |  
 64 |         obj_inds_i, obj_labels_i, obj_assignments_i = _sel_inds(ious, gt_classes_i, 
 65 |             fg_thresh, fg_rois_per_image, ROIS_PER_IMG_REL)
 66 | 
 67 |         all_rels_i = _sel_rels(ious[obj_inds_i], pred_boxes_i[obj_inds_i], obj_labels_i,
 68 |                                gt_classes_i, gt_rels_i,
 69 |                                fg_thresh=fg_thresh, fg_rels_per_image=fg_rels_per_image)
 70 |         all_rels_i[:,0:2] += num_box_seen
 71 | 
 72 |         rois.append(np.column_stack((
 73 |             im_ind * np.ones(obj_inds_i.shape[0], dtype=np.float32),
 74 |             pred_boxes_i[obj_inds_i],
 75 |         )))
 76 |         obj_labels.append(obj_labels_i)
 77 |         rel_labels.append(np.column_stack((
 78 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
 79 |             all_rels_i,
 80 |         )))
 81 | 
 82 |         # print("Gtboxes i {} obj assignments i {}".format(gt_boxes_i, obj_assignments_i))
 83 |         bbox_targets.append(gt_boxes_i[obj_assignments_i])
 84 | 
 85 |         num_box_seen += obj_inds_i.size
 86 | 
 87 |     rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(rpn_rois.get_device(), async=True)
 88 |     labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(rpn_rois.get_device(), async=True)
 89 |     bbox_targets = torch.FloatTensor(np.concatenate(bbox_targets, 0)).cuda(rpn_rois.get_device(),
 90 |                                                                            async=True)
 91 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(),
 92 |                                                                       async=True)
 93 | 
 94 |     return rois, labels, bbox_targets, rel_labels
 95 | 
 96 | 
 97 | def _sel_rels(ious, pred_boxes, pred_labels, gt_classes, gt_rels, fg_thresh=0.5, fg_rels_per_image=128, num_sample_per_gt=1, filter_non_overlap=True):
 98 |     """
 99 |     Selects the relations needed
100 |     :param ious: [num_pred', num_gt]
101 |     :param pred_boxes: [num_pred', num_gt]
102 |     :param pred_labels: [num_pred']
103 |     :param gt_classes: [num_gt]
104 |     :param gt_rels: [num_gtrel, 3]
105 |     :param fg_thresh: 
106 |     :param fg_rels_per_image: 
107 |     :return: new rels, [num_predrel, 3] where each is (pred_ind1, pred_ind2, predicate)
108 |     """
109 |     is_match = (ious >= fg_thresh) & (pred_labels[:, None] == gt_classes[None, :])
110 | 
111 |     pbi_iou = bbox_overlaps(pred_boxes, pred_boxes)
112 | 
113 |     # Limit ourselves to only IOUs that overlap, but are not the exact same box
114 |     # since we duplicated stuff earlier.
115 |     if filter_non_overlap:
116 |         rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
117 |         rels_intersect = rel_possibilities
118 |     else:
119 |         rel_possibilities = np.ones((pred_labels.shape[0], pred_labels.shape[0]),
120 |                                     dtype=np.int64) - np.eye(pred_labels.shape[0], dtype=np.int64)
121 |         rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)
122 | 
123 |     # ONLY select relations between ground truth because otherwise we get useless data
124 |     rel_possibilities[pred_labels == 0] = 0
125 |     rel_possibilities[:,pred_labels == 0] = 0
126 | 
127 |     # For each GT relationship, sample exactly 1 relationship.
128 |     fg_rels = []
129 |     p_size = []
130 |     for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels):
131 |         fg_rels_i = []
132 |         fg_scores_i = []
133 | 
134 |         for from_ind in np.where(is_match[:,from_gtind])[0]:
135 |             for to_ind in np.where(is_match[:,to_gtind])[0]:
136 |                 if from_ind != to_ind:
137 |                     fg_rels_i.append((from_ind, to_ind, rel_id))
138 |                     fg_scores_i.append((ious[from_ind, from_gtind]*ious[to_ind, to_gtind]))
139 |                     rel_possibilities[from_ind, to_ind] = 0
140 |         if len(fg_rels_i) == 0:
141 |             continue
142 |         p = np.array(fg_scores_i)
143 |         p = p/p.sum()
144 |         p_size.append(p.shape[0])
145 |         num_to_add = min(p.shape[0], num_sample_per_gt)
146 |         for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
147 |             fg_rels.append(fg_rels_i[rel_to_add])
148 | 
149 |     bg_rels = np.column_stack(np.where(rel_possibilities))
150 |     bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))
151 | 
152 |     fg_rels = np.array(fg_rels, dtype=np.int64)
153 |     if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
154 |         fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
155 |         # print("{} scores for {} GT. max={} min={} BG rels {}".format(
156 |         #     fg_rels_scores.shape[0], gt_rels.shape[0], fg_rels_scores.max(), fg_rels_scores.min(),
157 |         #     bg_rels.shape))
158 |     elif fg_rels.size == 0:
159 |         fg_rels = np.zeros((0,3), dtype=np.int64)
160 | 
161 |     num_bg_rel = min(RELS_PER_IMG - fg_rels.shape[0], bg_rels.shape[0])
162 |     if bg_rels.size > 0:
163 | 
164 |         # Sample 4x as many intersecting relationships as non-intersecting.
165 |         bg_rels_intersect = rels_intersect[bg_rels[:,0], bg_rels[:,1]]
166 |         p = bg_rels_intersect.astype(np.float32)
167 |         p[bg_rels_intersect == 0] = 0.2
168 |         p[bg_rels_intersect == 1] = 0.8
169 |         p /= p.sum()
170 |         bg_rels = bg_rels[np.random.choice(bg_rels.shape[0], p=p, size=num_bg_rel, replace=False)]
171 |     else:
172 |         bg_rels = np.zeros((0,3), dtype=np.int64)
173 | 
174 |     #print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
175 | 
176 |     all_rels = np.concatenate((fg_rels, bg_rels), 0)
177 | 
178 |     # Sort by 2nd ind and then 1st ind
179 |     all_rels = all_rels[np.lexsort((all_rels[:, 1], all_rels[:, 0]))]
180 |     return all_rels
181 | 
182 | def _sel_inds(ious, gt_classes_i, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256, n_sample_per=1):
183 | 
184 |     #gt_assignment = ious.argmax(1)
185 |     #max_overlaps = ious[np.arange(ious.shape[0]), gt_assignment]
186 |     #fg_inds = np.where(max_overlaps >= fg_thresh)[0]
187 |     
188 |     fg_ious = ious.T >= fg_thresh #[num_gt, num_pred]
189 |     #is_bg = ~fg_ious.any(0)
190 | 
191 |     # Sample K inds per GT image.
192 |     fg_inds = []
193 |     for i, (ious_i, cls_i) in enumerate(zip(fg_ious, gt_classes_i)):
194 |         n_sample_this_roi = min(n_sample_per, ious_i.sum())
195 |         if n_sample_this_roi > 0:
196 |             p = ious_i.astype(np.float64) / ious_i.sum()
197 |             for ind in npr.choice(ious_i.shape[0], p=p, size=n_sample_this_roi, replace=False):
198 |                 fg_inds.append((ind, i))
199 |     
200 |     fg_inds = np.array(fg_inds, dtype=np.int64)
201 |     if fg_inds.size == 0:
202 |         fg_inds = np.zeros((0, 2), dtype=np.int64)
203 |     elif fg_inds.shape[0] > fg_rois_per_image:
204 |         #print("sample FG")
205 |         fg_inds = fg_inds[npr.choice(fg_inds.shape[0], size=fg_rois_per_image, replace=False)]
206 |     
207 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
208 |     max_overlaps = ious.max(1)
209 |     bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0]
210 | 
211 |     # Compute number of background RoIs to take from this image (guarding
212 |     # against there being fewer than desired)
213 |     bg_rois_per_this_image = min(rois_per_image-fg_inds.shape[0], bg_inds.size)
214 |     # Sample background regions without replacement
215 |     if bg_inds.size > 0:
216 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
217 | 
218 | 
219 |     # FIx for format issues
220 |     obj_inds = np.concatenate((fg_inds[:,0], bg_inds), 0)
221 |     obj_assignments_i = np.concatenate((fg_inds[:,1], np.zeros(bg_inds.shape[0], dtype=np.int64)))
222 |     obj_labels_i = gt_classes_i[obj_assignments_i]
223 |     obj_labels_i[fg_inds.shape[0]:] = 0
224 |     #print("{} FG and {} BG".format(fg_inds.shape[0], bg_inds.shape[0]))
225 |     return obj_inds, obj_labels_i, obj_assignments_i
226 | 
227 | 
228 | 


--------------------------------------------------------------------------------
/lib/evaluation/test_sg_eval.py:
--------------------------------------------------------------------------------
  1 | # Just some tests so you can be assured that sg_eval.py works the same as the (original) stanford evaluation
  2 | 
  3 | import numpy as np
  4 | from six.moves import xrange
  5 | from dataloaders.visual_genome import VG
  6 | from lib.evaluation.sg_eval import evaluate_from_dict
  7 | from tqdm import trange
  8 | from lib.fpn.box_utils import center_size, point_form
  9 | def eval_relation_recall(sg_entry,
 10 |                          roidb_entry,
 11 |                          result_dict,
 12 |                          mode,
 13 |                          iou_thresh):
 14 | 
 15 |     # gt
 16 |     gt_inds = np.where(roidb_entry['max_overlaps'] == 1)[0]
 17 |     gt_boxes = roidb_entry['boxes'][gt_inds].copy().astype(float)
 18 |     num_gt_boxes = gt_boxes.shape[0]
 19 |     gt_relations = roidb_entry['gt_relations'].copy()
 20 |     gt_classes = roidb_entry['gt_classes'].copy()
 21 | 
 22 |     num_gt_relations = gt_relations.shape[0]
 23 |     if num_gt_relations == 0:
 24 |         return (None, None)
 25 |     gt_class_scores = np.ones(num_gt_boxes)
 26 |     gt_predicate_scores = np.ones(num_gt_relations)
 27 |     gt_triplets, gt_triplet_boxes, _ = _triplet(gt_relations[:,2],
 28 |                                              gt_relations[:,:2],
 29 |                                              gt_classes,
 30 |                                              gt_boxes,
 31 |                                              gt_predicate_scores,
 32 |                                              gt_class_scores)
 33 | 
 34 |     # pred
 35 |     box_preds = sg_entry['boxes']
 36 |     num_boxes = box_preds.shape[0]
 37 |     predicate_preds = sg_entry['relations']
 38 |     class_preds = sg_entry['scores']
 39 |     predicate_preds = predicate_preds.reshape(num_boxes, num_boxes, -1)
 40 | 
 41 |     # no bg
 42 |     predicate_preds = predicate_preds[:, :, 1:]
 43 |     predicates = np.argmax(predicate_preds, 2).ravel() + 1
 44 |     predicate_scores = predicate_preds.max(axis=2).ravel()
 45 |     relations = []
 46 |     keep = []
 47 |     for i in xrange(num_boxes):
 48 |         for j in xrange(num_boxes):
 49 |             if i != j:
 50 |                 keep.append(num_boxes*i + j)
 51 |                 relations.append([i, j])
 52 |     # take out self relations
 53 |     predicates = predicates[keep]
 54 |     predicate_scores = predicate_scores[keep]
 55 | 
 56 |     relations = np.array(relations)
 57 |     assert(relations.shape[0] == num_boxes * (num_boxes - 1))
 58 |     assert(predicates.shape[0] == relations.shape[0])
 59 |     num_relations = relations.shape[0]
 60 | 
 61 |     if mode =='predcls':
 62 |         # if predicate classification task
 63 |         # use ground truth bounding boxes
 64 |         assert(num_boxes == num_gt_boxes)
 65 |         classes = gt_classes
 66 |         class_scores = gt_class_scores
 67 |         boxes = gt_boxes
 68 |     elif mode =='sgcls':
 69 |         assert(num_boxes == num_gt_boxes)
 70 |         # if scene graph classification task
 71 |         # use gt boxes, but predicted classes
 72 |         classes = np.argmax(class_preds, 1)
 73 |         class_scores = class_preds.max(axis=1)
 74 |         boxes = gt_boxes
 75 |     elif mode =='sgdet':
 76 |         # if scene graph detection task
 77 |         # use preicted boxes and predicted classes
 78 |         classes = np.argmax(class_preds, 1)
 79 |         class_scores = class_preds.max(axis=1)
 80 |         boxes = []
 81 |         for i, c in enumerate(classes):
 82 |             boxes.append(box_preds[i]) # no bbox regression, c*4:(c+1)*4])
 83 |         boxes = np.vstack(boxes)
 84 |     else:
 85 |         raise NotImplementedError('Incorrect Mode! %s' % mode)
 86 | 
 87 |     pred_triplets, pred_triplet_boxes, relation_scores = \
 88 |         _triplet(predicates, relations, classes, boxes,
 89 |                  predicate_scores, class_scores)
 90 | 
 91 | 
 92 |     sorted_inds = np.argsort(relation_scores)[::-1]
 93 |     # compue recall
 94 |     for k in result_dict[mode + '_recall']:
 95 |         this_k = min(k, num_relations)
 96 |         keep_inds = sorted_inds[:this_k]
 97 |         recall = _relation_recall(gt_triplets,
 98 |                                   pred_triplets[keep_inds,:],
 99 |                                   gt_triplet_boxes,
100 |                                   pred_triplet_boxes[keep_inds,:],
101 |                                   iou_thresh)
102 |         result_dict[mode + '_recall'][k].append(recall)
103 | 
104 |     # for visualization
105 |     return pred_triplets[sorted_inds, :], pred_triplet_boxes[sorted_inds, :]
106 | 
107 | 
108 | def _triplet(predicates, relations, classes, boxes,
109 |              predicate_scores, class_scores):
110 | 
111 |     # format predictions into triplets
112 |     assert(predicates.shape[0] == relations.shape[0])
113 |     num_relations = relations.shape[0]
114 |     triplets = np.zeros([num_relations, 3]).astype(np.int32)
115 |     triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32)
116 |     triplet_scores = np.zeros([num_relations]).astype(np.float32)
117 |     for i in xrange(num_relations):
118 |         triplets[i, 1] = predicates[i]
119 |         sub_i, obj_i = relations[i,:2]
120 |         triplets[i, 0] = classes[sub_i]
121 |         triplets[i, 2] = classes[obj_i]
122 |         triplet_boxes[i, :4] = boxes[sub_i, :]
123 |         triplet_boxes[i, 4:] = boxes[obj_i, :]
124 |         # compute triplet score
125 |         score =  class_scores[sub_i]
126 |         score *= class_scores[obj_i]
127 |         score *= predicate_scores[i]
128 |         triplet_scores[i] = score
129 |     return triplets, triplet_boxes, triplet_scores
130 | 
131 | 
132 | def _relation_recall(gt_triplets, pred_triplets,
133 |                      gt_boxes, pred_boxes, iou_thresh):
134 | 
135 |     # compute the R@K metric for a set of predicted triplets
136 | 
137 |     num_gt = gt_triplets.shape[0]
138 |     num_correct_pred_gt = 0
139 | 
140 |     for gt, gt_box in zip(gt_triplets, gt_boxes):
141 |         keep = np.zeros(pred_triplets.shape[0]).astype(bool)
142 |         for i, pred in enumerate(pred_triplets):
143 |             if gt[0] == pred[0] and gt[1] == pred[1] and gt[2] == pred[2]:
144 |                 keep[i] = True
145 |         if not np.any(keep):
146 |             continue
147 |         boxes = pred_boxes[keep,:]
148 |         sub_iou = iou(gt_box[:4], boxes[:,:4])
149 |         obj_iou = iou(gt_box[4:], boxes[:,4:])
150 |         inds = np.intersect1d(np.where(sub_iou >= iou_thresh)[0],
151 |                               np.where(obj_iou >= iou_thresh)[0])
152 |         if inds.size > 0:
153 |             num_correct_pred_gt += 1
154 |     return float(num_correct_pred_gt) / float(num_gt)
155 | 
156 | 
157 | def iou(gt_box, pred_boxes):
158 |     # computer Intersection-over-Union between two sets of boxes
159 |     ixmin = np.maximum(gt_box[0], pred_boxes[:,0])
160 |     iymin = np.maximum(gt_box[1], pred_boxes[:,1])
161 |     ixmax = np.minimum(gt_box[2], pred_boxes[:,2])
162 |     iymax = np.minimum(gt_box[3], pred_boxes[:,3])
163 |     iw = np.maximum(ixmax - ixmin + 1., 0.)
164 |     ih = np.maximum(iymax - iymin + 1., 0.)
165 |     inters = iw * ih
166 | 
167 |     # union
168 |     uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) +
169 |             (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) *
170 |             (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters)
171 | 
172 |     overlaps = inters / uni
173 |     return overlaps
174 | 
175 | train, val, test = VG.splits()
176 | 
177 | result_dict_mine = {'sgdet_recall': {20: [], 50: [], 100: []}}
178 | result_dict_theirs = {'sgdet_recall': {20: [], 50: [], 100: []}}
179 | 
180 | for img_i in trange(len(val)):
181 |     gt_entry = {
182 |         'gt_classes': val.gt_classes[img_i].copy(),
183 |         'gt_relations': val.relationships[img_i].copy(),
184 |         'gt_boxes': val.gt_boxes[img_i].copy(),
185 |     }
186 | 
187 |     # Use shuffled GT boxes
188 |     gt_indices = np.arange(gt_entry['gt_boxes'].shape[0]) #np.random.choice(gt_entry['gt_boxes'].shape[0], 20)
189 |     pred_boxes = gt_entry['gt_boxes'][gt_indices]
190 | 
191 |     # Jitter the boxes a bit
192 |     pred_boxes = center_size(pred_boxes)
193 |     pred_boxes[:,:2] += np.random.rand(pred_boxes.shape[0], 2)*128
194 |     pred_boxes[:,2:] *= (1+np.random.randn(pred_boxes.shape[0], 2).clip(-0.1, 0.1))
195 |     pred_boxes = point_form(pred_boxes)
196 | 
197 |     obj_scores = np.random.rand(pred_boxes.shape[0])
198 | 
199 |     rels_to_use = np.column_stack(np.where(1 - np.diag(np.ones(pred_boxes.shape[0], dtype=np.int32))))
200 |     rel_scores = np.random.rand(min(100, rels_to_use.shape[0]), 51)
201 |     rel_scores = rel_scores / rel_scores.sum(1, keepdims=True)
202 |     pred_rel_inds = rels_to_use[np.random.choice(rels_to_use.shape[0], rel_scores.shape[0],
203 |                                                                replace=False)]
204 | 
205 |     # We must sort by P(o, o, r)
206 |     rel_order = np.argsort(-rel_scores[:,1:].max(1) * obj_scores[pred_rel_inds[:,0]] * obj_scores[pred_rel_inds[:,1]])
207 | 
208 |     pred_entry = {
209 |         'pred_boxes': pred_boxes,
210 |         'pred_classes': gt_entry['gt_classes'][gt_indices], #1+np.random.choice(150, pred_boxes.shape[0], replace=True),
211 |         'obj_scores': obj_scores,
212 |         'pred_rel_inds': pred_rel_inds[rel_order],
213 |         'rel_scores': rel_scores[rel_order],
214 |     }
215 | 
216 |     # def check_whether_they_are_the_same(gt_entry, pred_entry):
217 |     evaluate_from_dict(gt_entry, pred_entry, 'sgdet', result_dict_mine, multiple_preds=False,
218 |                        viz_dict=None)
219 | 
220 |     #########################
221 |     predicate_scores_theirs = np.zeros((pred_boxes.shape[0], pred_boxes.shape[0], 51), dtype=np.float64)
222 |     for (o1, o2), s in zip(pred_entry['pred_rel_inds'], pred_entry['rel_scores']):
223 |         predicate_scores_theirs[o1, o2] = s
224 | 
225 |     obj_scores_theirs = np.zeros((obj_scores.shape[0], 151), dtype=np.float64)
226 |     obj_scores_theirs[np.arange(obj_scores.shape[0]), pred_entry['pred_classes']] = obj_scores
227 | 
228 |     sg_entry_orig_format = {
229 |         'boxes': pred_entry['pred_boxes'],
230 |         # 'gt_classes': gt_entry['gt_classes'],
231 |         # 'gt_relations': gt_entry['gt_relations'],
232 |         'relations': predicate_scores_theirs,
233 |         'scores': obj_scores_theirs
234 |     }
235 |     roidb_entry = {
236 |         'max_overlaps': np.concatenate((np.ones(gt_entry['gt_boxes'].shape[0]), np.zeros(pred_entry['pred_boxes'].shape[0])), 0),
237 |         'boxes': np.concatenate((gt_entry['gt_boxes'], pred_entry['pred_boxes']), 0),
238 |         'gt_classes': gt_entry['gt_classes'],
239 |         'gt_relations': gt_entry['gt_relations'],
240 |     }
241 |     eval_relation_recall(sg_entry_orig_format, roidb_entry, result_dict_theirs, 'sgdet', iou_thresh=0.5)
242 | 
243 | my_results = np.array(result_dict_mine['sgdet_recall'][20])
244 | their_results = np.array(result_dict_theirs['sgdet_recall'][20])
245 | 
246 | assert np.all(my_results == their_results)


--------------------------------------------------------------------------------
/lib/my_ggnn_10.py:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | # From my_ggnn_09: Dynamically connecting entities to ontology too
  3 | # Also a minor change: img2ont edges are now normalized over ont rather than img
  4 | ##################################################################
  5 | 
  6 | import os, sys
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torch.autograd import Variable
 11 | import numpy as np
 12 | import pickle
 13 | from lib.my_util import MLP
 14 | 
 15 | def wrap(nparr):
 16 |     return Variable(torch.from_numpy(nparr).float().cuda(), requires_grad=False)
 17 | 
 18 | def arange(num):
 19 |     return torch.arange(num).type(torch.LongTensor).cuda()
 20 | 
 21 | class GGNN(nn.Module):
 22 |     def __init__(self, emb_path, graph_path, time_step_num=3, hidden_dim=512, output_dim=512, 
 23 |                  use_embedding=True, use_knowledge=True, refine_obj_cls=False):
 24 |         super(GGNN, self).__init__()
 25 |         self.time_step_num = time_step_num
 26 |                 
 27 |         if use_embedding:
 28 |             with open(emb_path, 'rb') as fin:
 29 |                 self.emb_ent, self.emb_pred = pickle.load(fin)
 30 |         else:
 31 |             self.emb_ent = np.eye(151, dtype=np.float32)
 32 |             self.emb_pred = np.eye(51, dtype=np.float32)
 33 | 
 34 |         if use_knowledge:
 35 |             with open(graph_path, 'rb') as fin:
 36 |                 edge_dict = pickle.load(fin)
 37 |             self.adjmtx_ent2ent = edge_dict['edges_ent2ent']
 38 |             self.adjmtx_ent2pred = edge_dict['edges_ent2pred']
 39 |             self.adjmtx_pred2ent = edge_dict['edges_pred2ent']
 40 |             self.adjmtx_pred2pred = edge_dict['edges_pred2pred']
 41 |         else:
 42 |             self.adjmtx_ent2ent = np.zeros((1, 151, 151), dtype=np.float32)
 43 |             self.adjmtx_ent2pred = np.zeros((1, 151, 51), dtype=np.float32)
 44 |             self.adjmtx_pred2ent = np.zeros((1, 51, 151), dtype=np.float32)
 45 |             self.adjmtx_pred2pred = np.zeros((1, 51, 51), dtype=np.float32)
 46 |         
 47 |         self.num_edge_types_ent2ent = self.adjmtx_ent2ent.shape[0]
 48 |         self.num_edge_types_ent2pred = self.adjmtx_ent2pred.shape[0]
 49 |         self.num_edge_types_pred2ent = self.adjmtx_pred2ent.shape[0]
 50 |         self.num_edge_types_pred2pred = self.adjmtx_pred2pred.shape[0]
 51 |         
 52 |         self.fc_init_ont_ent = nn.Linear(self.emb_ent.shape[1], hidden_dim)
 53 |         self.fc_init_ont_pred = nn.Linear(self.emb_pred.shape[1], hidden_dim)
 54 |         
 55 |         self.fc_mp_send_ont_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 56 |         self.fc_mp_send_ont_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 57 |         self.fc_mp_send_img_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 58 |         self.fc_mp_send_img_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True)
 59 |         
 60 |         self.fc_mp_receive_ont_ent = MLP([(self.num_edge_types_ent2ent + self.num_edge_types_pred2ent + 1) * hidden_dim // 4, 
 61 |                                           (self.num_edge_types_ent2ent + self.num_edge_types_pred2ent + 1) * hidden_dim // 4, 
 62 |                                           hidden_dim], act_fn='ReLU', last_act=True)
 63 |         self.fc_mp_receive_ont_pred = MLP([(self.num_edge_types_ent2pred + self.num_edge_types_pred2pred + 1) * hidden_dim // 4, 
 64 |                                            (self.num_edge_types_ent2pred + self.num_edge_types_pred2pred + 1) * hidden_dim // 4, 
 65 |                                            hidden_dim], act_fn='ReLU', last_act=True)
 66 |         self.fc_mp_receive_img_ent = MLP([3 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True)
 67 |         self.fc_mp_receive_img_pred = MLP([3 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True)
 68 |         
 69 |         self.fc_eq3_w_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 70 |         self.fc_eq3_u_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 71 |         self.fc_eq4_w_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 72 |         self.fc_eq4_u_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 73 |         self.fc_eq5_w_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 74 |         self.fc_eq5_u_ont_ent = nn.Linear(hidden_dim, hidden_dim)
 75 | 
 76 |         self.fc_eq3_w_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 77 |         self.fc_eq3_u_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 78 |         self.fc_eq4_w_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 79 |         self.fc_eq4_u_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 80 |         self.fc_eq5_w_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 81 |         self.fc_eq5_u_ont_pred = nn.Linear(hidden_dim, hidden_dim)
 82 | 
 83 |         self.fc_eq3_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 84 |         self.fc_eq3_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 85 |         self.fc_eq4_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 86 |         self.fc_eq4_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 87 |         self.fc_eq5_w_img_ent = nn.Linear(hidden_dim, hidden_dim)
 88 |         self.fc_eq5_u_img_ent = nn.Linear(hidden_dim, hidden_dim)
 89 | 
 90 |         self.fc_eq3_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 91 |         self.fc_eq3_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 92 |         self.fc_eq4_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 93 |         self.fc_eq4_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 94 |         self.fc_eq5_w_img_pred = nn.Linear(hidden_dim, hidden_dim)
 95 |         self.fc_eq5_u_img_pred = nn.Linear(hidden_dim, hidden_dim)
 96 | 
 97 |         self.fc_output_proj_img_pred = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False)
 98 |         self.fc_output_proj_ont_pred = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False)
 99 |         
100 |         self.refine_obj_cls = refine_obj_cls
101 |         if self.refine_obj_cls:
102 |             self.fc_output_proj_img_ent = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False)
103 |             self.fc_output_proj_ont_ent = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False)            
104 |         
105 |         self.debug_info = {}
106 |         
107 |         
108 |     def forward(self, rel_inds, obj_probs, obj_fmaps, vr):
109 |         num_img_ent = obj_probs.size(0)
110 |         num_img_pred = rel_inds.size(0)
111 |         num_ont_ent = self.emb_ent.shape[0]
112 |         num_ont_pred = self.emb_pred.shape[0]
113 | 
114 |         self.debug_info['rel_inds'] = rel_inds
115 |         self.debug_info['obj_probs'] = obj_probs
116 |         
117 |         nodes_ont_ent = self.fc_init_ont_ent(wrap(self.emb_ent))
118 |         nodes_ont_pred = self.fc_init_ont_pred(wrap(self.emb_pred))        
119 |         nodes_img_ent = obj_fmaps
120 |         nodes_img_pred = vr
121 |         
122 |         edges_ont_ent2ent = wrap(self.adjmtx_ent2ent)
123 |         edges_ont_ent2pred = wrap(self.adjmtx_ent2pred)
124 |         edges_ont_pred2ent = wrap(self.adjmtx_pred2ent)
125 |         edges_ont_pred2pred = wrap(self.adjmtx_pred2pred)
126 | 
127 |         edges_img_pred2subj = wrap(np.zeros((num_img_pred, num_img_ent)))
128 |         edges_img_pred2subj[arange(num_img_pred), rel_inds[:, 0]] = 1
129 |         edges_img_pred2obj = wrap(np.zeros((num_img_pred, num_img_ent)))
130 |         edges_img_pred2obj[arange(num_img_pred), rel_inds[:, 1]] = 1
131 |         edges_img_subj2pred = edges_img_pred2subj.t()
132 |         edges_img_obj2pred = edges_img_pred2obj.t()
133 |         
134 |         edges_img2ont_ent = wrap(obj_probs.data.cpu().numpy())
135 |         edges_ont2img_ent = edges_img2ont_ent.t()
136 | 
137 |         edges_img2ont_pred =  wrap(np.zeros((num_img_pred, num_ont_pred)))
138 |         edges_ont2img_pred = edges_img2ont_pred.t()
139 |         
140 |         ent_cls_logits = None
141 |         
142 |         for t in range(self.time_step_num):
143 |             message_send_ont_ent = self.fc_mp_send_ont_ent(nodes_ont_ent)
144 |             message_send_ont_pred = self.fc_mp_send_ont_pred(nodes_ont_pred)
145 |             message_send_img_ent = self.fc_mp_send_img_ent(nodes_img_ent)
146 |             message_send_img_pred = self.fc_mp_send_img_pred(nodes_img_pred)
147 |             
148 |             message_received_ont_ent = self.fc_mp_receive_ont_ent(torch.cat(
149 |                 [torch.mm(edges_ont_ent2ent[i].t(), message_send_ont_ent) for i in range(self.num_edge_types_ent2ent)] +
150 |                 [torch.mm(edges_ont_pred2ent[i].t(), message_send_ont_pred) for i in range(self.num_edge_types_pred2ent)] +
151 |                 [torch.mm(edges_img2ont_ent.t(), message_send_img_ent),]
152 |             , 1))
153 |             
154 |             message_received_ont_pred = self.fc_mp_receive_ont_pred(torch.cat(
155 |                 [torch.mm(edges_ont_ent2pred[i].t(), message_send_ont_ent) for i in range(self.num_edge_types_ent2pred)] +
156 |                 [torch.mm(edges_ont_pred2pred[i].t(), message_send_ont_pred) for i in range(self.num_edge_types_pred2pred)] +
157 |                 [torch.mm(edges_img2ont_pred.t(), message_send_img_pred),]
158 |             , 1))
159 |             
160 |             message_received_img_ent = self.fc_mp_receive_img_ent(torch.cat([
161 |                 torch.mm(edges_img_pred2subj.t(), message_send_img_pred),
162 |                 torch.mm(edges_img_pred2obj.t(), message_send_img_pred),
163 |                 torch.mm(edges_ont2img_ent.t(), message_send_ont_ent),
164 |             ], 1))
165 |             
166 |             message_received_img_pred = self.fc_mp_receive_img_pred(torch.cat([
167 |                 torch.mm(edges_img_subj2pred.t(), message_send_img_ent),
168 |                 torch.mm(edges_img_obj2pred.t(), message_send_img_ent),
169 |                 torch.mm(edges_ont2img_pred.t(), message_send_ont_pred),
170 |             ], 1))
171 |             
172 |             z_ont_ent = torch.sigmoid(self.fc_eq3_w_ont_ent(message_received_ont_ent) + self.fc_eq3_u_ont_ent(nodes_ont_ent))
173 |             r_ont_ent = torch.sigmoid(self.fc_eq4_w_ont_ent(message_received_ont_ent) + self.fc_eq4_u_ont_ent(nodes_ont_ent))
174 |             h_ont_ent = torch.tanh(self.fc_eq5_w_ont_ent(message_received_ont_ent) + self.fc_eq5_u_ont_ent(r_ont_ent * nodes_ont_ent))
175 |             nodes_ont_ent_new = (1 - z_ont_ent) * nodes_ont_ent + z_ont_ent * h_ont_ent
176 | 
177 |             z_ont_pred = torch.sigmoid(self.fc_eq3_w_ont_pred(message_received_ont_pred) + self.fc_eq3_u_ont_pred(nodes_ont_pred))
178 |             r_ont_pred = torch.sigmoid(self.fc_eq4_w_ont_pred(message_received_ont_pred) + self.fc_eq4_u_ont_pred(nodes_ont_pred))
179 |             h_ont_pred = torch.tanh(self.fc_eq5_w_ont_pred(message_received_ont_pred) + self.fc_eq5_u_ont_pred(r_ont_pred * nodes_ont_pred))
180 |             nodes_ont_pred_new = (1 - z_ont_pred) * nodes_ont_pred + z_ont_pred * h_ont_pred
181 | 
182 |             z_img_ent = torch.sigmoid(self.fc_eq3_w_img_ent(message_received_img_ent) + self.fc_eq3_u_img_ent(nodes_img_ent))
183 |             r_img_ent = torch.sigmoid(self.fc_eq4_w_img_ent(message_received_img_ent) + self.fc_eq4_u_img_ent(nodes_img_ent))
184 |             h_img_ent = torch.tanh(self.fc_eq5_w_img_ent(message_received_img_ent) + self.fc_eq5_u_img_ent(r_img_ent * nodes_img_ent))
185 |             nodes_img_ent_new = (1 - z_img_ent) * nodes_img_ent + z_img_ent * h_img_ent
186 | 
187 |             z_img_pred = torch.sigmoid(self.fc_eq3_w_img_pred(message_received_img_pred) + self.fc_eq3_u_img_pred(nodes_img_pred))
188 |             r_img_pred = torch.sigmoid(self.fc_eq4_w_img_pred(message_received_img_pred) + self.fc_eq4_u_img_pred(nodes_img_pred))
189 |             h_img_pred = torch.tanh(self.fc_eq5_w_img_pred(message_received_img_pred) + self.fc_eq5_u_img_pred(r_img_pred * nodes_img_pred))
190 |             nodes_img_pred_new = (1 - z_img_pred) * nodes_img_pred + z_img_pred * h_img_pred
191 | 
192 |             relative_state_change_ont_ent = torch.sum(torch.abs(nodes_ont_ent_new - nodes_ont_ent)) / torch.sum(torch.abs(nodes_ont_ent))
193 |             relative_state_change_ont_pred = torch.sum(torch.abs(nodes_ont_pred_new - nodes_ont_pred)) / torch.sum(torch.abs(nodes_ont_pred))
194 |             relative_state_change_img_ent = torch.sum(torch.abs(nodes_img_ent_new - nodes_img_ent)) / torch.sum(torch.abs(nodes_img_ent))
195 |             relative_state_change_img_pred = torch.sum(torch.abs(nodes_img_pred_new - nodes_img_pred)) / torch.sum(torch.abs(nodes_img_pred))
196 |         
197 |             self.debug_info[f'relative_state_change_{t}'] = [relative_state_change_ont_ent, relative_state_change_ont_pred, relative_state_change_img_ent, relative_state_change_img_pred]
198 |         
199 |             nodes_ont_ent = nodes_ont_ent_new
200 |             nodes_ont_pred = nodes_ont_pred_new
201 |             nodes_img_ent = nodes_img_ent_new
202 |             nodes_img_pred = nodes_img_pred_new
203 |             
204 |             pred_cls_logits = torch.mm(self.fc_output_proj_img_pred(nodes_img_pred), self.fc_output_proj_ont_pred(nodes_ont_pred).t())
205 |             edges_img2ont_pred = F.softmax(pred_cls_logits, dim=1)
206 |             edges_ont2img_pred = edges_img2ont_pred.t()
207 |             
208 |             if self.refine_obj_cls:
209 |                 ent_cls_logits = torch.mm(self.fc_output_proj_img_ent(nodes_img_ent), self.fc_output_proj_ont_ent(nodes_ont_ent).t())
210 |                 edges_img2ont_ent = F.softmax(ent_cls_logits, dim=1)
211 |                 edges_ont2img_ent = edges_img2ont_ent.t()
212 |                 
213 |         return pred_cls_logits, ent_cls_logits
214 | 
215 | 


--------------------------------------------------------------------------------