├── lib ├── __init__.py ├── evaluation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── sg_eval.cpython-36.pyc │ ├── sg_eval_slow.py │ └── test_sg_eval.py ├── fpn │ ├── nms │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── nms │ │ │ │ ├── _nms.so │ │ │ │ ├── __pycache__ │ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ └── __init__.py │ │ │ └── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ ├── src │ │ │ ├── nms_cuda.h │ │ │ ├── cuda │ │ │ │ ├── nms.cu.o │ │ │ │ ├── nms_kernel.h │ │ │ │ ├── Makefile │ │ │ │ ├── .ipynb_checkpoints │ │ │ │ │ └── Makefile-checkpoint │ │ │ │ └── nms_kernel.cu │ │ │ └── nms_cuda.c │ │ ├── functions │ │ │ ├── __pycache__ │ │ │ │ └── nms.cpython-36.pyc │ │ │ ├── nms.py │ │ │ └── .ipynb_checkpoints │ │ │ │ └── nms-checkpoint.py │ │ ├── Makefile │ │ ├── .ipynb_checkpoints │ │ │ ├── Makefile-checkpoint │ │ │ └── build-checkpoint.py │ │ └── build.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── roi_align │ │ │ │ ├── _roi_align.so │ │ │ │ ├── __pycache__ │ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ └── __init__.py │ │ │ └── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── roi_align.cpython-36.pyc │ │ │ └── roi_align.py │ │ ├── src │ │ │ ├── cuda │ │ │ │ ├── roi_align.cu.o │ │ │ │ ├── Makefile │ │ │ │ ├── .ipynb_checkpoints │ │ │ │ │ └── Makefile-checkpoint │ │ │ │ ├── roi_align_kernel.h │ │ │ │ └── roi_align_kernel.cu │ │ │ ├── roi_align_cuda.h │ │ │ └── roi_align_cuda.c │ │ ├── __pycache__ │ │ │ └── __init__.cpython-36.pyc │ │ ├── Makefile │ │ ├── .ipynb_checkpoints │ │ │ └── Makefile-checkpoint │ │ └── build.py │ ├── __pycache__ │ │ ├── box_utils.cpython-36.pyc │ │ ├── anchor_targets.cpython-36.pyc │ │ └── generate_anchors.cpython-36.pyc │ ├── box_intersections_cpu │ │ ├── bbox.cpython-36m-x86_64-linux-gnu.so │ │ ├── build │ │ │ └── temp.linux-x86_64-3.6 │ │ │ │ └── bbox.o │ │ ├── setup.py │ │ └── bbox.pyx │ ├── proposal_assignments │ │ ├── __pycache__ │ │ │ ├── rel_assignments.cpython-36.pyc │ │ │ ├── proposal_assignments_det.cpython-36.pyc │ │ │ └── proposal_assignments_gtbox.cpython-36.pyc │ │ ├── proposal_assignments_gtbox.py │ │ ├── .ipynb_checkpoints │ │ │ ├── proposal_assignments_gtbox-checkpoint.py │ │ │ └── rel_assignments-checkpoint.py │ │ ├── proposal_assignments_det.py │ │ ├── proposal_assignments_postnms.py │ │ ├── rel_assignments.py │ │ └── proposal_assignments_rel.py │ ├── make.sh │ ├── generate_anchors.py │ ├── anchor_targets.py │ └── box_utils.py ├── __pycache__ │ ├── ggnn.cpython-36.pyc │ ├── my_util.cpython-36.pyc │ ├── resnet.cpython-36.pyc │ ├── surgery.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── kern_model.cpython-36.pyc │ ├── my_ggnn_01.cpython-36.pyc │ ├── my_ggnn_02.cpython-36.pyc │ ├── my_ggnn_03.cpython-36.pyc │ ├── my_ggnn_04.cpython-36.pyc │ ├── my_ggnn_05.cpython-36.pyc │ ├── my_ggnn_06.cpython-36.pyc │ ├── my_ggnn_07.cpython-36.pyc │ ├── my_ggnn_08.cpython-36.pyc │ ├── my_ggnn_09.cpython-36.pyc │ ├── my_ggnn_10.cpython-36.pyc │ ├── my_ggnn_11.cpython-36.pyc │ ├── my_ggnn_12.cpython-36.pyc │ ├── my_ggnn_13.cpython-36.pyc │ ├── my_ggnn_14.cpython-36.pyc │ ├── my_ggnn_15.cpython-36.pyc │ ├── my_ggnn_16.cpython-36.pyc │ ├── my_model_01.cpython-36.pyc │ ├── my_model_02.cpython-36.pyc │ ├── my_model_03.cpython-36.pyc │ ├── my_model_04.cpython-36.pyc │ ├── my_model_05.cpython-36.pyc │ ├── my_model_06.cpython-36.pyc │ ├── my_model_07.cpython-36.pyc │ ├── my_model_08.cpython-36.pyc │ ├── my_model_10.cpython-36.pyc │ ├── my_model_11.cpython-36.pyc │ ├── my_model_12.cpython-36.pyc │ ├── my_model_13.cpython-36.pyc │ ├── my_model_14.cpython-36.pyc │ ├── my_model_15.cpython-36.pyc │ ├── my_model_16.cpython-36.pyc │ ├── my_model_17.cpython-36.pyc │ ├── my_model_18.cpython-36.pyc │ ├── my_model_19.cpython-36.pyc │ ├── my_model_20.cpython-36.pyc │ ├── my_model_21.cpython-36.pyc │ ├── my_model_22.cpython-36.pyc │ ├── my_model_23.cpython-36.pyc │ ├── my_model_24.cpython-36.pyc │ ├── my_model_26.cpython-36.pyc │ ├── my_model_27.cpython-36.pyc │ ├── my_model_28.cpython-36.pyc │ ├── my_model_29.cpython-36.pyc │ ├── my_model_30.cpython-36.pyc │ ├── my_model_31.cpython-36.pyc │ ├── my_model_32.cpython-36.pyc │ ├── my_model_33.cpython-36.pyc │ ├── pytorch_misc.cpython-36.pyc │ ├── get_union_boxes.cpython-36.pyc │ └── object_detector.cpython-36.pyc ├── draw_rectangles │ ├── build │ │ └── temp.linux-x86_64-3.6 │ │ │ └── draw_rectangles.o │ ├── draw_rectangles.cpython-36m-x86_64-linux-gnu.so │ ├── setup.py │ └── draw_rectangles.pyx ├── my_util.py ├── surgery.py ├── get_union_boxes.py ├── resnet.py ├── ggnn.py ├── my_ggnn_17.py └── my_ggnn_10.py ├── dataloaders ├── __init__.py ├── __pycache__ │ ├── blob.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── visual_genome.cpython-36.pyc │ └── image_transforms.cpython-36.pyc ├── image_transforms.py └── blob.py ├── graphs ├── 001 │ ├── emb_mtx.pkl │ └── pred_counts.pkl └── 005 │ └── all_edges.pkl ├── Makefile ├── requirements.txt ├── README.md └── config.py /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/fpn/nms/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graphs/001/emb_mtx.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/001/emb_mtx.pkl -------------------------------------------------------------------------------- /graphs/001/pred_counts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/001/pred_counts.pkl -------------------------------------------------------------------------------- /graphs/005/all_edges.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/graphs/005/all_edges.pkl -------------------------------------------------------------------------------- /lib/fpn/nms/_ext/nms/_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/nms/_nms.so -------------------------------------------------------------------------------- /lib/fpn/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh); -------------------------------------------------------------------------------- /lib/fpn/nms/src/cuda/nms.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/src/cuda/nms.cu.o -------------------------------------------------------------------------------- /lib/__pycache__/ggnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/ggnn.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_util.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/surgery.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/surgery.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/kern_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/kern_model.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_01.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_01.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_02.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_02.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_03.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_03.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_04.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_04.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_05.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_05.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_06.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_06.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_07.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_07.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_08.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_08.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_09.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_09.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_10.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_10.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_11.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_11.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_12.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_12.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_13.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_13.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_14.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_14.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_15.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_15.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_ggnn_16.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_ggnn_16.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/cuda/roi_align.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/src/cuda/roi_align.cu.o -------------------------------------------------------------------------------- /dataloaders/__pycache__/blob.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/blob.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_01.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_01.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_02.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_02.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_03.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_03.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_04.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_04.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_05.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_05.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_06.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_06.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_07.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_07.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_08.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_08.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_10.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_10.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_11.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_11.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_12.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_12.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_13.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_13.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_14.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_14.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_15.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_15.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_16.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_16.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_17.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_17.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_18.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_18.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_19.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_19.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_20.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_20.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_21.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_21.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_22.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_22.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_23.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_23.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_24.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_24.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_26.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_26.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_27.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_27.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_28.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_28.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_29.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_29.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_30.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_30.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_31.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_31.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_32.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_32.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/my_model_33.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/my_model_33.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/pytorch_misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/pytorch_misc.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/get_union_boxes.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/get_union_boxes.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/object_detector.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/__pycache__/object_detector.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/__pycache__/box_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/box_utils.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/roi_align/_ext/roi_align/_roi_align.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/roi_align/_roi_align.so -------------------------------------------------------------------------------- /dataloaders/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/evaluation/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/evaluation/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/evaluation/__pycache__/sg_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/evaluation/__pycache__/sg_eval.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/__pycache__/anchor_targets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/anchor_targets.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/__pycache__/generate_anchors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/__pycache__/generate_anchors.cpython-36.pyc -------------------------------------------------------------------------------- /dataloaders/__pycache__/visual_genome.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/visual_genome.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/nms/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/nms/functions/__pycache__/nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/functions/__pycache__/nms.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/roi_align/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /dataloaders/__pycache__/image_transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/dataloaders/__pycache__/image_transforms.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/roi_align/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num, 2 | float nms_overlap_thresh, int device_id); 3 | 4 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/functions/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/functions/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/draw_rectangles/build/temp.linux-x86_64-3.6/draw_rectangles.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/draw_rectangles/build/temp.linux-x86_64-3.6/draw_rectangles.o -------------------------------------------------------------------------------- /lib/fpn/box_intersections_cpu/bbox.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/box_intersections_cpu/bbox.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/fpn/box_intersections_cpu/build/temp.linux-x86_64-3.6/bbox.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/box_intersections_cpu/build/temp.linux-x86_64-3.6/bbox.o -------------------------------------------------------------------------------- /lib/fpn/roi_align/functions/__pycache__/roi_align.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/functions/__pycache__/roi_align.cpython-36.pyc -------------------------------------------------------------------------------- /lib/draw_rectangles/draw_rectangles.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/draw_rectangles/draw_rectangles.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/fpn/nms/Makefile: -------------------------------------------------------------------------------- 1 | all: src/cuda/nms.cu.o 2 | python build.py 3 | 4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/__pycache__/rel_assignments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/rel_assignments.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/__pycache__/proposal_assignments_det.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_det.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/roi_align/Makefile: -------------------------------------------------------------------------------- 1 | all: src/cuda/roi_align.cu.o 2 | python build.py 3 | 4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /lib/fpn/nms/.ipynb_checkpoints/Makefile-checkpoint: -------------------------------------------------------------------------------- 1 | all: src/cuda/nms.cu.o 2 | python build.py 3 | 4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/__pycache__/proposal_assignments_gtbox.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alirezazareian/gbnet/HEAD/lib/fpn/proposal_assignments/__pycache__/proposal_assignments_gtbox.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fpn/box_intersections_cpu/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | 5 | setup(name="bbox_cython", ext_modules=cythonize('bbox.pyx'), include_dirs=[numpy.get_include()]) -------------------------------------------------------------------------------- /lib/draw_rectangles/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | 5 | setup(name="draw_rectangles_cython", ext_modules=cythonize('draw_rectangles.pyx'), include_dirs=[numpy.get_include()]) -------------------------------------------------------------------------------- /lib/fpn/roi_align/.ipynb_checkpoints/Makefile-checkpoint: -------------------------------------------------------------------------------- 1 | all: src/cuda/roi_align.cu.o 2 | python build.py 3 | 4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export PATH := /usr/local/cuda-9.0/bin:$(PATH) 2 | 3 | all: draw_rectangles box_intersections nms roi_align 4 | 5 | draw_rectangles: 6 | cd lib/draw_rectangles; python setup.py build_ext --inplace 7 | box_intersections: 8 | cd lib/fpn/box_intersections_cpu; python setup.py build_ext --inplace 9 | nms: 10 | cd lib/fpn/nms; make 11 | roi_align: 12 | cd lib/fpn/roi_align; make -------------------------------------------------------------------------------- /lib/fpn/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd anchors 4 | python setup.py build_ext --inplace 5 | cd .. 6 | 7 | cd box_intersections_cpu 8 | python setup.py build_ext --inplace 9 | cd .. 10 | 11 | cd cpu_nms 12 | python build.py 13 | cd .. 14 | 15 | cd roi_align 16 | python build.py -C src/cuda clean 17 | python build.py -C src/cuda clean 18 | cd .. 19 | 20 | echo "Done compiling hopefully" 21 | -------------------------------------------------------------------------------- /lib/fpn/nms/src/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: nms_kernel.cu nms_kernel.h 2 | /usr/local/cuda-9.0/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC \ 3 | -gencode arch=compute_37,code=sm_37 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 8 | clean: 9 | rm nms.cu.o 10 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, 6 | THCudaTensor * bottom_grad); 7 | -------------------------------------------------------------------------------- /lib/fpn/nms/src/cuda/.ipynb_checkpoints/Makefile-checkpoint: -------------------------------------------------------------------------------- 1 | all: nms_kernel.cu nms_kernel.h 2 | /usr/local/cuda-9.0/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC \ 3 | -gencode arch=compute_37,code=sm_37 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 8 | clean: 9 | rm nms.cu.o 10 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | /usr/local/cuda-9.0/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC \ 3 | -gencode arch=compute_37,code=sm_37 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 8 | clean: 9 | rm roi_align.cu.o 10 | -------------------------------------------------------------------------------- /lib/fpn/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/cuda/.ipynb_checkpoints/Makefile-checkpoint: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | /usr/local/cuda-9.0/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC \ 3 | -gencode arch=compute_37,code=sm_37 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 8 | clean: 9 | rm roi_align.cu.o 10 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/fpn/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/nms_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh) 8 | { 9 | int* keep_data = THIntTensor_data(keep); 10 | const float* boxes_sorted_data = THCudaTensor_data(state, boxes_sorted); 11 | 12 | const int boxes_num = THCudaTensor_size(state, boxes_sorted, 0); 13 | 14 | const int devId = THCudaTensor_getDevice(state, boxes_sorted); 15 | 16 | int numTotalKeep = ApplyNMSGPU(keep_data, boxes_sorted_data, boxes_num, nms_thresh, devId); 17 | return numTotalKeep; 18 | } 19 | 20 | 21 | -------------------------------------------------------------------------------- /lib/fpn/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}} 5 | 6 | sources = [] 7 | headers = [] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | 36 | -------------------------------------------------------------------------------- /lib/fpn/nms/.ipynb_checkpoints/build-checkpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}} 5 | 6 | sources = [] 7 | headers = [] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | 36 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}} 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/cuda/roi_align.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/cuda/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, int num_boxes, int batch, int image_height, int image_width, int crop_height, 9 | int crop_width, int depth, float extrapolation_value, float* crops_ptr); 10 | 11 | int ROIAlignForwardLaucher( 12 | const float* image_ptr, const float* boxes_ptr, 13 | int num_boxes, int batch, int image_height, int image_width, int crop_height, 14 | int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream); 15 | 16 | __global__ void ROIAlignBackward(const int nthreads, const float* grads_ptr, 17 | const float* boxes_ptr, int num_boxes, int batch, int image_height, 18 | int image_width, int crop_height, int crop_width, int depth, 19 | float* grads_image_ptr); 20 | 21 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes, 22 | int batch, int image_height, int image_width, int crop_height, 23 | int crop_width, int depth, float* grads_image_ptr, cudaStream_t stream); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | 31 | -------------------------------------------------------------------------------- /lib/my_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | import numpy as np 5 | 6 | class XavierLinear(nn.Module): 7 | ''' 8 | Simple Linear layer with Xavier init 9 | 10 | Paper by Xavier Glorot and Yoshua Bengio (2010): 11 | Understanding the difficulty of training deep feedforward neural networks 12 | http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf 13 | ''' 14 | 15 | def __init__(self, in_features, out_features, bias=True): 16 | super(XavierLinear, self).__init__() 17 | self.linear = nn.Linear(in_features, out_features, bias=bias) 18 | init.xavier_normal(self.linear.weight) 19 | 20 | def forward(self, x): 21 | return self.linear(x) 22 | 23 | class MLP(nn.Module): 24 | def __init__(self, dim_in_hid_out, act_fn='ReLU', last_act=False): 25 | super(MLP, self).__init__() 26 | layers = [] 27 | for i in range(len(dim_in_hid_out) - 1): 28 | layers.append(XavierLinear(dim_in_hid_out[i], dim_in_hid_out[i + 1])) 29 | if i < len(dim_in_hid_out) - 2 or last_act: 30 | layers.append(getattr(torch.nn, act_fn)()) 31 | self.model = torch.nn.Sequential(*layers) 32 | 33 | def forward(self, x): 34 | return self.model(x) 35 | 36 | -------------------------------------------------------------------------------- /lib/fpn/nms/functions/nms.py: -------------------------------------------------------------------------------- 1 | # Le code for doing NMS 2 | import torch 3 | import numpy as np 4 | from .._ext import nms 5 | 6 | 7 | def apply_nms(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None, 8 | nms_thresh=0.7): 9 | """ 10 | Note - this function is non-differentiable so everything is assumed to be a tensor, not 11 | a variable. 12 | """ 13 | just_inds = boxes_per_im is None 14 | if boxes_per_im is None: 15 | boxes_per_im = [boxes.size(0)] 16 | 17 | 18 | s = 0 19 | keep = [] 20 | im_per = [] 21 | for bpi in boxes_per_im: 22 | e = s + int(bpi) 23 | keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh) 24 | keep.append(keep_im + s) 25 | im_per.append(keep_im.size(0)) 26 | 27 | s = e 28 | 29 | inds = torch.cat(keep, 0) 30 | if just_inds: 31 | return inds 32 | return inds, im_per 33 | 34 | 35 | def _nms_single_im(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7): 36 | keep = torch.IntTensor(scores.size(0)) 37 | vs, idx = torch.sort(scores, dim=0, descending=True) 38 | if idx.size(0) > pre_nms_topn: 39 | idx = idx[:pre_nms_topn] 40 | boxes_sorted = boxes[idx].contiguous() 41 | num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh) 42 | num_out = min(num_out, post_nms_topn) 43 | keep = keep[:num_out].long() 44 | keep = idx[keep.cuda(scores.get_device())] 45 | return keep 46 | -------------------------------------------------------------------------------- /lib/fpn/nms/functions/.ipynb_checkpoints/nms-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Le code for doing NMS 2 | import torch 3 | import numpy as np 4 | from .._ext import nms 5 | 6 | 7 | def apply_nms(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None, 8 | nms_thresh=0.7): 9 | """ 10 | Note - this function is non-differentiable so everything is assumed to be a tensor, not 11 | a variable. 12 | """ 13 | just_inds = boxes_per_im is None 14 | if boxes_per_im is None: 15 | boxes_per_im = [boxes.size(0)] 16 | 17 | 18 | s = 0 19 | keep = [] 20 | im_per = [] 21 | for bpi in boxes_per_im: 22 | e = s + int(bpi) 23 | keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh) 24 | keep.append(keep_im + s) 25 | im_per.append(keep_im.size(0)) 26 | 27 | s = e 28 | 29 | inds = torch.cat(keep, 0) 30 | if just_inds: 31 | return inds 32 | return inds, im_per 33 | 34 | 35 | def _nms_single_im(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7): 36 | keep = torch.IntTensor(scores.size(0)) 37 | vs, idx = torch.sort(scores, dim=0, descending=True) 38 | if idx.size(0) > pre_nms_topn: 39 | idx = idx[:pre_nms_topn] 40 | boxes_sorted = boxes[idx].contiguous() 41 | num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh) 42 | num_out = min(num_out, post_nms_topn) 43 | keep = keep[:num_out].long() 44 | keep = idx[keep.cuda(scores.get_device())] 45 | return keep 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.3.0 2 | awscli==1.15.84 3 | backcall==0.1.0 4 | bleach==3.1.0 5 | certifi==2019.9.11 6 | cffi==1.13.1 7 | cycler==0.10.0 8 | Cython==0.29.13 9 | decorator==4.4.1 10 | defusedxml==0.6.0 11 | dill==0.2.7.1 12 | entrypoints==0.3 13 | graphviz==0.13.2 14 | h5py==2.10.0 15 | importlib-metadata==0.23 16 | ipykernel==5.1.3 17 | ipython==7.9.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.5.1 20 | jedi==0.15.1 21 | Jinja2==2.10.3 22 | jmespath==0.9.3 23 | jsonschema==3.1.1 24 | jupyter==1.0.0 25 | jupyter-client==5.3.4 26 | jupyter-console==6.0.0 27 | jupyter-core==4.6.1 28 | kiwisolver==1.1.0 29 | MarkupSafe==1.1.1 30 | matplotlib==3.1.1 31 | mistune==0.8.4 32 | more-itertools==7.2.0 33 | nbconvert==5.6.1 34 | nbformat==4.4.0 35 | notebook==6.0.1 36 | numpy==1.17.3 37 | pandas==0.25.2 38 | pandocfilters==1.4.2 39 | parso==0.5.1 40 | pexpect==4.7.0 41 | pickleshare==0.7.5 42 | Pillow==6.2.1 43 | prometheus-client==0.7.1 44 | prompt-toolkit==2.0.10 45 | protobuf==3.10.0 46 | ptyprocess==0.6.0 47 | pyyaml==3.13 48 | pyasn1==0.4.4 49 | pycocotools==2.0.0 50 | pycparser==2.19 51 | Pygments==2.4.2 52 | pyparsing==2.4.2 53 | pyrsistent==0.15.5 54 | python-dateutil==2.8.0 55 | pytz==2019.3 56 | pyzmq==18.1.0 57 | qtconsole==4.5.5 58 | rsa==3.4.2 59 | Send2Trash==1.5.0 60 | six==1.12.0 61 | tensorboardX==1.9 62 | terminado==0.8.2 63 | testpath==0.4.2 64 | torch==0.3.0.post4 65 | torchvision==0.2.0 66 | tornado==6.0.3 67 | tqdm==4.36.1 68 | traitlets==4.3.3 69 | wcwidth==0.1.7 70 | webencodings==0.5.1 71 | widgetsnbextension==3.5.1 72 | zipp==0.6.0 73 | 74 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * image_ptr = THCudaTensor_data(state, features); 12 | float * boxes_ptr = THCudaTensor_data(state, rois); 13 | 14 | float * crops_ptr = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_boxes = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // batch size 25 | int batch = THCudaTensor_size(state, features, 0); 26 | // data height 27 | int image_height = THCudaTensor_size(state, features, 2); 28 | // data width 29 | int image_width = THCudaTensor_size(state, features, 3); 30 | // Number of channels 31 | int depth = THCudaTensor_size(state, features, 1); 32 | 33 | cudaStream_t stream = THCState_getCurrentStream(state); 34 | float extrapolation_value = 0.0; 35 | 36 | ROIAlignForwardLaucher( 37 | image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, 38 | crop_height, crop_width, depth, extrapolation_value, crops_ptr, 39 | stream); 40 | 41 | return 1; 42 | } 43 | 44 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale, 45 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 46 | { 47 | // Grab the input tensor 48 | float * grads_ptr = THCudaTensor_data(state, top_grad); 49 | float * boxes_ptr = THCudaTensor_data(state, rois); 50 | 51 | float * grads_image_ptr = THCudaTensor_data(state, bottom_grad); 52 | 53 | // Number of ROIs 54 | int num_boxes = THCudaTensor_size(state, rois, 0); 55 | int size_rois = THCudaTensor_size(state, rois, 1); 56 | if (size_rois != 5) 57 | { 58 | return 0; 59 | } 60 | 61 | // batch size 62 | int batch = THCudaTensor_size(state, bottom_grad, 0); 63 | // data height 64 | int image_height = THCudaTensor_size(state, bottom_grad, 2); 65 | // data width 66 | int image_width = THCudaTensor_size(state, bottom_grad, 3); 67 | // Number of channels 68 | int depth = THCudaTensor_size(state, bottom_grad, 1); 69 | 70 | cudaStream_t stream = THCState_getCurrentStream(state); 71 | 72 | ROIAlignBackwardLaucher( 73 | grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, 74 | crop_height, crop_width, depth, grads_image_ptr, stream); 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | """ 2 | performs ROI aligning 3 | """ 4 | 5 | import torch 6 | from torch.autograd import Function 7 | from .._ext import roi_align 8 | 9 | class RoIAlignFunction(Function): 10 | def __init__(self, aligned_height, aligned_width, spatial_scale): 11 | self.aligned_width = int(aligned_width) 12 | self.aligned_height = int(aligned_height) 13 | self.spatial_scale = float(spatial_scale) 14 | 15 | self.feature_size = None 16 | 17 | def forward(self, features, rois): 18 | self.save_for_backward(rois) 19 | 20 | rois_normalized = rois.clone() 21 | 22 | self.feature_size = features.size() 23 | batch_size, num_channels, data_height, data_width = self.feature_size 24 | 25 | height = (data_height -1) / self.spatial_scale 26 | width = (data_width - 1) / self.spatial_scale 27 | 28 | rois_normalized[:,1] /= width 29 | rois_normalized[:,2] /= height 30 | rois_normalized[:,3] /= width 31 | rois_normalized[:,4] /= height 32 | 33 | 34 | num_rois = rois.size(0) 35 | 36 | output = features.new(num_rois, num_channels, self.aligned_height, 37 | self.aligned_width).zero_() 38 | 39 | if features.is_cuda: 40 | res = roi_align.roi_align_forward_cuda(self.aligned_height, 41 | self.aligned_width, 42 | self.spatial_scale, features, 43 | rois_normalized, output) 44 | assert res == 1 45 | else: 46 | raise ValueError 47 | 48 | return output 49 | 50 | def backward(self, grad_output): 51 | assert(self.feature_size is not None and grad_output.is_cuda) 52 | 53 | rois = self.saved_tensors[0] 54 | 55 | rois_normalized = rois.clone() 56 | 57 | batch_size, num_channels, data_height, data_width = self.feature_size 58 | 59 | height = (data_height -1) / self.spatial_scale 60 | width = (data_width - 1) / self.spatial_scale 61 | 62 | rois_normalized[:,1] /= width 63 | rois_normalized[:,2] /= height 64 | rois_normalized[:,3] /= width 65 | rois_normalized[:,4] /= height 66 | 67 | grad_input = rois_normalized.new(batch_size, num_channels, data_height, 68 | data_width).zero_() 69 | res = roi_align.roi_align_backward_cuda(self.aligned_height, 70 | self.aligned_width, 71 | self.spatial_scale, grad_output, 72 | rois_normalized, grad_input) 73 | assert res == 1 74 | return grad_input, None 75 | -------------------------------------------------------------------------------- /lib/draw_rectangles/draw_rectangles.pyx: -------------------------------------------------------------------------------- 1 | ###### 2 | # Draws rectangles 3 | ###### 4 | 5 | cimport cython 6 | import numpy as np 7 | cimport numpy as np 8 | 9 | DTYPE = np.float32 10 | ctypedef np.float32_t DTYPE_t 11 | 12 | def draw_union_boxes(bbox_pairs, pooling_size, padding=0): 13 | """ 14 | Draws union boxes for the image. 15 | :param box_pairs: [num_pairs, 8] 16 | :param fmap_size: Size of the original feature map 17 | :param stride: ratio between fmap size and original img (<1) 18 | :param pooling_size: resize everything to this size 19 | :return: [num_pairs, 2, pooling_size, pooling_size arr 20 | """ 21 | assert padding == 0, "Padding>0 not supported yet" 22 | return draw_union_boxes_c(bbox_pairs, pooling_size) 23 | 24 | cdef DTYPE_t minmax(DTYPE_t x): 25 | return min(max(x, 0), 1) 26 | 27 | cdef np.ndarray[DTYPE_t, ndim=4] draw_union_boxes_c( 28 | np.ndarray[DTYPE_t, ndim=2] box_pairs, unsigned int pooling_size): 29 | """ 30 | Parameters 31 | ---------- 32 | boxes: (N, 4) ndarray of float. everything has arbitrary ratios 33 | query_boxes: (K, 4) ndarray of float 34 | Returns 35 | ------- 36 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 37 | """ 38 | cdef unsigned int N = box_pairs.shape[0] 39 | 40 | cdef np.ndarray[DTYPE_t, ndim = 4] uboxes = np.zeros( 41 | (N, 2, pooling_size, pooling_size), dtype=DTYPE) 42 | cdef DTYPE_t x1_union, y1_union, x2_union, y2_union, w, h, x1_box, y1_box, x2_box, y2_box, y_contrib, x_contrib 43 | cdef unsigned int n, i, j, k 44 | 45 | for n in range(N): 46 | x1_union = min(box_pairs[n, 0], box_pairs[n, 4]) 47 | y1_union = min(box_pairs[n, 1], box_pairs[n, 5]) 48 | x2_union = max(box_pairs[n, 2], box_pairs[n, 6]) 49 | y2_union = max(box_pairs[n, 3], box_pairs[n, 7]) 50 | 51 | w = x2_union - x1_union 52 | h = y2_union - y1_union 53 | 54 | for i in range(2): 55 | # Now everything is in the range [0, pooling_size]. 56 | x1_box = (box_pairs[n, 0+4*i] - x1_union)*pooling_size / w 57 | y1_box = (box_pairs[n, 1+4*i] - y1_union)*pooling_size / h 58 | x2_box = (box_pairs[n, 2+4*i] - x1_union)*pooling_size / w 59 | y2_box = (box_pairs[n, 3+4*i] - y1_union)*pooling_size / h 60 | # print("{:.3f}, {:.3f}, {:.3f}, {:.3f}".format(x1_box, y1_box, x2_box, y2_box)) 61 | for j in range(pooling_size): 62 | y_contrib = minmax(j+1-y1_box)*minmax(y2_box-j) 63 | for k in range(pooling_size): 64 | x_contrib = minmax(k+1-x1_box)*minmax(x2_box-k) 65 | # print("j {} yc {} k {} xc {}".format(j, y_contrib, k, x_contrib)) 66 | uboxes[n,i,j,k] = x_contrib*y_contrib 67 | return uboxes 68 | -------------------------------------------------------------------------------- /lib/surgery.py: -------------------------------------------------------------------------------- 1 | # create predictions from the other stuff 2 | """ 3 | Go from proposals + scores to relationships. 4 | 5 | pred-cls: No bbox regression, obj dist is exactly known 6 | sg-cls : No bbox regression 7 | sg-det : Bbox regression 8 | 9 | in all cases we'll return: 10 | boxes, objs, rels, pred_scores 11 | 12 | """ 13 | 14 | import numpy as np 15 | import torch 16 | from lib.pytorch_misc import unravel_index 17 | from lib.fpn.box_utils import bbox_overlaps 18 | # from ad3 import factor_graph as fg 19 | from time import time 20 | 21 | def filter_dets(boxes, obj_scores, obj_classes, rel_inds, pred_scores): 22 | """ 23 | Filters detections.... 24 | :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4] 25 | :param obj_scores: [num_box] probabilities for the scores 26 | :param obj_classes: [num_box] class labels for the topk 27 | :param rel_inds: [num_rel, 2] TENSOR consisting of (im_ind0, im_ind1) 28 | :param pred_scores: [topk, topk, num_rel, num_predicates] 29 | :param use_nms: True if use NMS to filter dets. 30 | :return: boxes, objs, rels, pred_scores 31 | 32 | """ 33 | if boxes.dim() != 2: 34 | raise ValueError("Boxes needs to be [num_box, 4] but its {}".format(boxes.size())) 35 | 36 | num_box = boxes.size(0) 37 | assert obj_scores.size(0) == num_box 38 | 39 | assert obj_classes.size() == obj_scores.size() 40 | num_rel = rel_inds.size(0) 41 | assert rel_inds.size(1) == 2 42 | assert pred_scores.size(0) == num_rel 43 | 44 | obj_scores0 = obj_scores.data[rel_inds[:,0]] 45 | obj_scores1 = obj_scores.data[rel_inds[:,1]] 46 | 47 | pred_scores_max, pred_classes_argmax = pred_scores.data[:,1:].max(1) 48 | pred_classes_argmax = pred_classes_argmax + 1 49 | 50 | rel_scores_argmaxed = pred_scores_max * obj_scores0 * obj_scores1 51 | rel_scores_vs, rel_scores_idx = torch.sort(rel_scores_argmaxed.view(-1), dim=0, descending=True) 52 | 53 | rels = rel_inds[rel_scores_idx].cpu().numpy() 54 | pred_scores_sorted = pred_scores[rel_scores_idx].data.cpu().numpy() 55 | obj_scores_np = obj_scores.data.cpu().numpy() 56 | objs_np = obj_classes.data.cpu().numpy() 57 | boxes_out = boxes.data.cpu().numpy() 58 | 59 | return boxes_out, objs_np, obj_scores_np, rels, pred_scores_sorted 60 | 61 | # def _get_similar_boxes(boxes, obj_classes_topk, nms_thresh=0.3): 62 | # """ 63 | # Assuming bg is NOT A LABEL. 64 | # :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4] 65 | # :param obj_classes: [num_box, topk] class labels 66 | # :return: num_box, topk, num_box, topk array containing similarities. 67 | # """ 68 | # topk = obj_classes_topk.size(1) 69 | # num_box = boxes.size(0) 70 | # 71 | # box_flat = boxes.view(-1, 4) if boxes.dim() == 3 else boxes[:, None].expand( 72 | # num_box, topk, 4).contiguous().view(-1, 4) 73 | # jax = bbox_overlaps(box_flat, box_flat).data > nms_thresh 74 | # # Filter out things that are not gonna compete. 75 | # classes_eq = obj_classes_topk.data.view(-1)[:, None] == obj_classes_topk.data.view(-1)[None, :] 76 | # jax &= classes_eq 77 | # boxes_are_similar = jax.view(num_box, topk, num_box, topk) 78 | # return boxes_are_similar.cpu().numpy().astype(np.bool) 79 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/proposal_assignments_gtbox.py: -------------------------------------------------------------------------------- 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps 3 | import torch 4 | from lib.pytorch_misc import diagonal_inds, to_variable 5 | from config import RELS_PER_IMG, REL_FG_FRACTION 6 | 7 | 8 | @to_variable 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5): 10 | """ 11 | Assign object detection proposals to ground-truth targets. Produces proposal 12 | classification labels and bounding-box regression targets. 13 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 14 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems 15 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 16 | Note, the img_inds here start at image_offset 17 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]. 18 | Note, the img_inds here start at image_offset 19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 20 | :return: 21 | rois: [num_rois, 5] 22 | labels: [num_rois] array of labels 23 | bbox_targets [num_rois, 4] array of targets for the labels. 24 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 25 | """ 26 | im_inds = rois[:,0].long() 27 | 28 | num_im = im_inds[-1] + 1 29 | 30 | # Offset the image indices in fg_rels to refer to absolute indices (not just within img i) 31 | fg_rels = gt_rels.clone() 32 | fg_rels[:,0] -= image_offset 33 | offset = {} 34 | for i, s, e in enumerate_by_image(im_inds): 35 | offset[i] = s 36 | for i, s, e in enumerate_by_image(fg_rels[:, 0]): 37 | fg_rels[s:e, 1:3] += offset[i] 38 | 39 | # Try ALL things, not just intersections. 40 | is_cand = (im_inds[:, None] == im_inds[None]) 41 | is_cand.view(-1)[diagonal_inds(is_cand)] = 0 42 | 43 | # # Compute salience 44 | # gt_inds = fg_rels[:, 1:3].contiguous().view(-1) 45 | # labels_arange = labels.data.new(labels.size(0)) 46 | # torch.arange(0, labels.size(0), out=labels_arange) 47 | # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long() 48 | # labels = torch.stack((labels, salience_labels), 1) 49 | 50 | # Add in some BG labels 51 | 52 | # NOW WE HAVE TO EXCLUDE THE FGs. 53 | # TODO: check if this causes an error if many duplicate GTs havent been filtered out 54 | 55 | is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0 56 | is_bgcand = is_cand.nonzero() 57 | # TODO: make this sample on a per image case 58 | # If too many then sample 59 | num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im)) 60 | if num_fg < fg_rels.size(0): 61 | fg_rels = random_choose(fg_rels, num_fg) 62 | 63 | # If too many then sample 64 | num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0, 65 | int(RELS_PER_IMG * num_im) - num_fg) 66 | if num_bg > 0: 67 | bg_rels = torch.cat(( 68 | im_inds[is_bgcand[:, 0]][:, None], 69 | is_bgcand, 70 | (is_bgcand[:, 0, None] < -10).long(), 71 | ), 1) 72 | 73 | if num_bg < is_bgcand.size(0): 74 | bg_rels = random_choose(bg_rels, num_bg) 75 | rel_labels = torch.cat((fg_rels, bg_rels), 0) 76 | else: 77 | rel_labels = fg_rels 78 | 79 | 80 | # last sort by rel. 81 | _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) + 82 | rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2]) 83 | 84 | rel_labels = rel_labels[perm].contiguous() 85 | 86 | labels = gt_classes[:,1].contiguous() 87 | return rois, labels, rel_labels 88 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/.ipynb_checkpoints/proposal_assignments_gtbox-checkpoint.py: -------------------------------------------------------------------------------- 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps 3 | import torch 4 | from lib.pytorch_misc import diagonal_inds, to_variable 5 | from config import RELS_PER_IMG, REL_FG_FRACTION 6 | 7 | 8 | @to_variable 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5): 10 | """ 11 | Assign object detection proposals to ground-truth targets. Produces proposal 12 | classification labels and bounding-box regression targets. 13 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 14 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems 15 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 16 | Note, the img_inds here start at image_offset 17 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]. 18 | Note, the img_inds here start at image_offset 19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 20 | :return: 21 | rois: [num_rois, 5] 22 | labels: [num_rois] array of labels 23 | bbox_targets [num_rois, 4] array of targets for the labels. 24 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 25 | """ 26 | im_inds = rois[:,0].long() 27 | 28 | num_im = im_inds[-1] + 1 29 | 30 | # Offset the image indices in fg_rels to refer to absolute indices (not just within img i) 31 | fg_rels = gt_rels.clone() 32 | fg_rels[:,0] -= image_offset 33 | offset = {} 34 | for i, s, e in enumerate_by_image(im_inds): 35 | offset[i] = s 36 | for i, s, e in enumerate_by_image(fg_rels[:, 0]): 37 | fg_rels[s:e, 1:3] += offset[i] 38 | 39 | # Try ALL things, not just intersections. 40 | is_cand = (im_inds[:, None] == im_inds[None]) 41 | is_cand.view(-1)[diagonal_inds(is_cand)] = 0 42 | 43 | # # Compute salience 44 | # gt_inds = fg_rels[:, 1:3].contiguous().view(-1) 45 | # labels_arange = labels.data.new(labels.size(0)) 46 | # torch.arange(0, labels.size(0), out=labels_arange) 47 | # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long() 48 | # labels = torch.stack((labels, salience_labels), 1) 49 | 50 | # Add in some BG labels 51 | 52 | # NOW WE HAVE TO EXCLUDE THE FGs. 53 | # TODO: check if this causes an error if many duplicate GTs havent been filtered out 54 | 55 | is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0 56 | is_bgcand = is_cand.nonzero() 57 | # TODO: make this sample on a per image case 58 | # If too many then sample 59 | num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im)) 60 | if num_fg < fg_rels.size(0): 61 | fg_rels = random_choose(fg_rels, num_fg) 62 | 63 | # If too many then sample 64 | num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0, 65 | int(RELS_PER_IMG * num_im) - num_fg) 66 | if num_bg > 0: 67 | bg_rels = torch.cat(( 68 | im_inds[is_bgcand[:, 0]][:, None], 69 | is_bgcand, 70 | (is_bgcand[:, 0, None] < -10).long(), 71 | ), 1) 72 | 73 | if num_bg < is_bgcand.size(0): 74 | bg_rels = random_choose(bg_rels, num_bg) 75 | rel_labels = torch.cat((fg_rels, bg_rels), 0) 76 | else: 77 | rel_labels = fg_rels 78 | 79 | 80 | # last sort by rel. 81 | _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) + 82 | rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2]) 83 | 84 | rel_labels = rel_labels[perm].contiguous() 85 | 86 | labels = gt_classes[:,1].contiguous() 87 | return rois, labels, rel_labels 88 | -------------------------------------------------------------------------------- /lib/fpn/box_intersections_cpu/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(boxes, query_boxes): 16 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE) 17 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE) 18 | 19 | return bbox_overlaps_c(boxes_contig, query_contig) 20 | 21 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 22 | np.ndarray[DTYPE_t, ndim=2] boxes, 23 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 24 | """ 25 | Parameters 26 | ---------- 27 | boxes: (N, 4) ndarray of float 28 | query_boxes: (K, 4) ndarray of float 29 | Returns 30 | ------- 31 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 32 | """ 33 | cdef unsigned int N = boxes.shape[0] 34 | cdef unsigned int K = query_boxes.shape[0] 35 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 36 | cdef DTYPE_t iw, ih, box_area 37 | cdef DTYPE_t ua 38 | cdef unsigned int k, n 39 | for k in range(K): 40 | box_area = ( 41 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 42 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 43 | ) 44 | for n in range(N): 45 | iw = ( 46 | min(boxes[n, 2], query_boxes[k, 2]) - 47 | max(boxes[n, 0], query_boxes[k, 0]) + 1 48 | ) 49 | if iw > 0: 50 | ih = ( 51 | min(boxes[n, 3], query_boxes[k, 3]) - 52 | max(boxes[n, 1], query_boxes[k, 1]) + 1 53 | ) 54 | if ih > 0: 55 | ua = float( 56 | (boxes[n, 2] - boxes[n, 0] + 1) * 57 | (boxes[n, 3] - boxes[n, 1] + 1) + 58 | box_area - iw * ih 59 | ) 60 | overlaps[n, k] = iw * ih / ua 61 | return overlaps 62 | 63 | 64 | def bbox_intersections(boxes, query_boxes): 65 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE) 66 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE) 67 | 68 | return bbox_intersections_c(boxes_contig, query_contig) 69 | 70 | 71 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 72 | np.ndarray[DTYPE_t, ndim=2] boxes, 73 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 74 | """ 75 | For each query box compute the intersection ratio covered by boxes 76 | ---------- 77 | Parameters 78 | ---------- 79 | boxes: (N, 4) ndarray of float 80 | query_boxes: (K, 4) ndarray of float 81 | Returns 82 | ------- 83 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 84 | """ 85 | cdef unsigned int N = boxes.shape[0] 86 | cdef unsigned int K = query_boxes.shape[0] 87 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 88 | cdef DTYPE_t iw, ih, box_area 89 | cdef DTYPE_t ua 90 | cdef unsigned int k, n 91 | for k in range(K): 92 | box_area = ( 93 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 94 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 95 | ) 96 | for n in range(N): 97 | iw = ( 98 | min(boxes[n, 2], query_boxes[k, 2]) - 99 | max(boxes[n, 0], query_boxes[k, 0]) + 1 100 | ) 101 | if iw > 0: 102 | ih = ( 103 | min(boxes[n, 3], query_boxes[k, 3]) - 104 | max(boxes[n, 1], query_boxes[k, 1]) + 1 105 | ) 106 | if ih > 0: 107 | intersec[n, k] = iw * ih / box_area 108 | return intersec -------------------------------------------------------------------------------- /lib/get_union_boxes.py: -------------------------------------------------------------------------------- 1 | """ 2 | credits to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/network.py#L91 3 | """ 4 | 5 | import torch 6 | from torch.autograd import Variable 7 | from torch.nn import functional as F 8 | from lib.fpn.roi_align.functions.roi_align import RoIAlignFunction 9 | from lib.draw_rectangles.draw_rectangles import draw_union_boxes 10 | import numpy as np 11 | from torch.nn.modules.module import Module 12 | from torch import nn 13 | from config import BATCHNORM_MOMENTUM 14 | 15 | class UnionBoxesAndFeats(Module): 16 | def __init__(self, pooling_size=7, stride=16, dim=256, concat=False, use_feats=True): 17 | """ 18 | :param pooling_size: Pool the union boxes to this dimension 19 | :param stride: pixel spacing in the entire image 20 | :param dim: Dimension of the feats 21 | :param concat: Whether to concat (yes) or add (False) the representations 22 | """ 23 | super(UnionBoxesAndFeats, self).__init__() 24 | 25 | self.pooling_size = pooling_size 26 | self.stride = stride 27 | 28 | self.dim = dim 29 | self.use_feats = use_feats 30 | 31 | self.conv = nn.Sequential( 32 | nn.Conv2d(2, dim //2, kernel_size=7, stride=2, padding=3, bias=True), 33 | nn.ReLU(inplace=True), 34 | nn.BatchNorm2d(dim//2, momentum=BATCHNORM_MOMENTUM), 35 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 36 | nn.Conv2d(dim // 2, dim, kernel_size=3, stride=1, padding=1, bias=True), 37 | nn.ReLU(inplace=True), 38 | nn.BatchNorm2d(dim, momentum=BATCHNORM_MOMENTUM), 39 | ) 40 | self.concat = concat 41 | 42 | def forward(self, fmap, rois, union_inds): 43 | union_pools = union_boxes(fmap, rois, union_inds, pooling_size=self.pooling_size, stride=self.stride) 44 | if not self.use_feats: 45 | return union_pools.detach() 46 | 47 | pair_rois = torch.cat((rois[:, 1:][union_inds[:, 0]], rois[:, 1:][union_inds[:, 1]]),1).data.cpu().numpy() 48 | # rects_np = get_rect_features(pair_rois, self.pooling_size*2-1) - 0.5 49 | rects_np = draw_union_boxes(pair_rois, self.pooling_size*4-1) - 0.5 50 | rects = Variable(torch.FloatTensor(rects_np).cuda(fmap.get_device()), volatile=fmap.volatile) 51 | if self.concat: 52 | return torch.cat((union_pools, self.conv(rects)), 1) 53 | return union_pools + self.conv(rects) 54 | 55 | # def get_rect_features(roi_pairs, pooling_size): 56 | # rects_np = draw_union_boxes(roi_pairs, pooling_size) 57 | # # add union + intersection 58 | # stuff_to_cat = [ 59 | # rects_np.max(1), 60 | # rects_np.min(1), 61 | # np.minimum(1-rects_np[:,0], rects_np[:,1]), 62 | # np.maximum(1-rects_np[:,0], rects_np[:,1]), 63 | # np.minimum(rects_np[:,0], 1-rects_np[:,1]), 64 | # np.maximum(rects_np[:,0], 1-rects_np[:,1]), 65 | # np.minimum(1-rects_np[:,0], 1-rects_np[:,1]), 66 | # np.maximum(1-rects_np[:,0], 1-rects_np[:,1]), 67 | # ] 68 | # rects_np = np.concatenate([rects_np] + [x[:,None] for x in stuff_to_cat], 1) 69 | # return rects_np 70 | 71 | 72 | def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16): 73 | """ 74 | :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride) 75 | :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2] 76 | :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2] 77 | :param pooling_size: we'll resize to this 78 | :param stride: 79 | :return: 80 | """ 81 | assert union_inds.size(1) == 2 82 | im_inds = rois[:,0][union_inds[:,0]] 83 | assert (im_inds.data == rois.data[:,0][union_inds[:,1]]).sum() == union_inds.size(0) 84 | union_rois = torch.cat(( 85 | im_inds[:,None], 86 | torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:, 1]]), 87 | torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:, 1]]), 88 | ),1) 89 | 90 | # (num_rois, d, pooling_size, pooling_size) 91 | union_pools = RoIAlignFunction(pooling_size, pooling_size, 92 | spatial_scale=1/stride)(fmap, union_rois) 93 | return union_pools 94 | 95 | -------------------------------------------------------------------------------- /lib/fpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from config import IM_SCALE 8 | 9 | import numpy as np 10 | 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | # array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | def generate_anchors(base_size=16, feat_stride=16, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)): 40 | """ A wrapper function to generate anchors given different scales 41 | Also return the number of anchors in variable 'length' 42 | """ 43 | anchors = generate_base_anchors(base_size=base_size, 44 | ratios=np.array(anchor_ratios), 45 | scales=np.array(anchor_scales)) 46 | A = anchors.shape[0] 47 | shift_x = np.arange(0, IM_SCALE // feat_stride) * feat_stride # Same as shift_x 48 | shift_x, shift_y = np.meshgrid(shift_x, shift_x) 49 | 50 | shifts = np.stack([shift_x, shift_y, shift_x, shift_y], -1) # h, w, 4 51 | all_anchors = shifts[:, :, None] + anchors[None, None] #h, w, A, 4 52 | return all_anchors 53 | 54 | # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 55 | # K = shifts.shape[0] 56 | # # width changes faster, so here it is H, W, C 57 | # anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 58 | # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 59 | # length = np.int32(anchors.shape[0]) 60 | 61 | 62 | def generate_base_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)): 63 | """ 64 | Generate anchor (reference) windows by enumerating aspect ratios X 65 | scales wrt a reference (0, 0, 15, 15) window. 66 | """ 67 | 68 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 69 | ratio_anchors = _ratio_enum(base_anchor, ratios) 70 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 71 | for i in range(ratio_anchors.shape[0])]) 72 | return anchors 73 | 74 | 75 | def _whctrs(anchor): 76 | """ 77 | Return width, height, x center, and y center for an anchor (window). 78 | """ 79 | 80 | w = anchor[2] - anchor[0] + 1 81 | h = anchor[3] - anchor[1] + 1 82 | x_ctr = anchor[0] + 0.5 * (w - 1) 83 | y_ctr = anchor[1] + 0.5 * (h - 1) 84 | return w, h, x_ctr, y_ctr 85 | 86 | 87 | def _mkanchors(ws, hs, x_ctr, y_ctr): 88 | """ 89 | Given a vector of widths (ws) and heights (hs) around a center 90 | (x_ctr, y_ctr), output a set of anchors (windows). 91 | """ 92 | 93 | ws = ws[:, np.newaxis] 94 | hs = hs[:, np.newaxis] 95 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 96 | y_ctr - 0.5 * (hs - 1), 97 | x_ctr + 0.5 * (ws - 1), 98 | y_ctr + 0.5 * (hs - 1))) 99 | return anchors 100 | 101 | 102 | def _ratio_enum(anchor, ratios): 103 | """ 104 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 105 | """ 106 | 107 | w, h, x_ctr, y_ctr = _whctrs(anchor) 108 | size = w * h 109 | size_ratios = size / ratios 110 | # NOTE: CHANGED TO NOT HAVE ROUNDING 111 | ws = np.sqrt(size_ratios) 112 | hs = ws * ratios 113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 114 | return anchors 115 | 116 | 117 | def _scale_enum(anchor, scales): 118 | """ 119 | Enumerate a set of anchors for each scale wrt an anchor. 120 | """ 121 | 122 | w, h, x_ctr, y_ctr = _whctrs(anchor) 123 | ws = w * scales 124 | hs = h * scales 125 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 126 | return anchors 127 | -------------------------------------------------------------------------------- /dataloaders/image_transforms.py: -------------------------------------------------------------------------------- 1 | # Some image transforms 2 | 3 | from PIL import Image, ImageOps, ImageFilter, ImageEnhance 4 | import numpy as np 5 | from random import randint 6 | # All of these need to be called on PIL imagez 7 | 8 | class SquarePad(object): 9 | def __call__(self, img): 10 | w, h = img.size 11 | img_padded = ImageOps.expand(img, border=(0, 0, max(h - w, 0), max(w - h, 0)), 12 | fill=(int(0.485 * 256), int(0.456 * 256), int(0.406 * 256))) 13 | return img_padded 14 | 15 | 16 | class Grayscale(object): 17 | """ 18 | Converts to grayscale (not always, sometimes). 19 | """ 20 | def __call__(self, img): 21 | factor = np.sqrt(np.sqrt(np.random.rand(1))) 22 | # print("gray {}".format(factor)) 23 | enhancer = ImageEnhance.Color(img) 24 | return enhancer.enhance(factor) 25 | 26 | 27 | class Brightness(object): 28 | """ 29 | Converts to grayscale (not always, sometimes). 30 | """ 31 | def __call__(self, img): 32 | factor = np.random.randn(1)/6+1 33 | factor = min(max(factor, 0.5), 1.5) 34 | # print("brightness {}".format(factor)) 35 | 36 | enhancer = ImageEnhance.Brightness(img) 37 | return enhancer.enhance(factor) 38 | 39 | 40 | class Contrast(object): 41 | """ 42 | Converts to grayscale (not always, sometimes). 43 | """ 44 | def __call__(self, img): 45 | factor = np.random.randn(1)/8+1.0 46 | factor = min(max(factor, 0.5), 1.5) 47 | # print("contrast {}".format(factor)) 48 | 49 | enhancer = ImageEnhance.Contrast(img) 50 | return enhancer.enhance(factor) 51 | 52 | 53 | class Hue(object): 54 | """ 55 | Converts to grayscale 56 | """ 57 | def __call__(self, img): 58 | # 30 seems good 59 | factor = int(np.random.randn(1)*8) 60 | factor = min(max(factor, -30), 30) 61 | factor = np.array(factor, dtype=np.uint8) 62 | 63 | hsv = np.array(img.convert('HSV')) 64 | hsv[:,:,0] += factor 65 | new_img = Image.fromarray(hsv, 'HSV').convert('RGB') 66 | 67 | return new_img 68 | 69 | 70 | class Sharpness(object): 71 | """ 72 | Converts to grayscale 73 | """ 74 | def __call__(self, img): 75 | factor = 1.0 + np.random.randn(1)/5 76 | # print("sharpness {}".format(factor)) 77 | enhancer = ImageEnhance.Sharpness(img) 78 | return enhancer.enhance(factor) 79 | 80 | 81 | def random_crop(img, boxes, box_scale, round_boxes=True, max_crop_fraction=0.1): 82 | """ 83 | Randomly crops the image 84 | :param img: PIL image 85 | :param boxes: Ground truth boxes 86 | :param box_scale: This is the scale that the boxes are at (e.g. 1024 wide). We'll preserve that ratio 87 | :param round_boxes: Set this to true if we're going to round the boxes to ints 88 | :return: Cropped image, new boxes 89 | """ 90 | 91 | w, h = img.size 92 | 93 | max_crop_w = int(w*max_crop_fraction) 94 | max_crop_h = int(h*max_crop_fraction) 95 | boxes_scaled = boxes * max(w,h) / box_scale 96 | max_to_crop_top = min(int(boxes_scaled[:, 1].min()), max_crop_h) 97 | max_to_crop_left = min(int(boxes_scaled[:, 0].min()), max_crop_w) 98 | max_to_crop_right = min(int(w - boxes_scaled[:, 2].max()), max_crop_w) 99 | max_to_crop_bottom = min(int(h - boxes_scaled[:, 3].max()), max_crop_h) 100 | 101 | crop_top = randint(0, max(max_to_crop_top, 0)) 102 | crop_left = randint(0, max(max_to_crop_left, 0)) 103 | crop_right = randint(0, max(max_to_crop_right, 0)) 104 | crop_bottom = randint(0, max(max_to_crop_bottom, 0)) 105 | img_cropped = img.crop((crop_left, crop_top, w - crop_right, h - crop_bottom)) 106 | 107 | new_boxes = box_scale / max(img_cropped.size) * np.column_stack( 108 | (boxes_scaled[:,0]-crop_left, boxes_scaled[:,1]-crop_top, boxes_scaled[:,2]-crop_left, boxes_scaled[:,3]-crop_top)) 109 | 110 | if round_boxes: 111 | new_boxes = np.round(new_boxes).astype(np.int32) 112 | return img_cropped, new_boxes 113 | 114 | 115 | class RandomOrder(object): 116 | """ Composes several transforms together in random order - or not at all! 117 | """ 118 | 119 | def __init__(self, transforms): 120 | self.transforms = transforms 121 | 122 | def __call__(self, img): 123 | if self.transforms is None: 124 | return img 125 | num_to_pick = np.random.choice(len(self.transforms)) 126 | if num_to_pick == 0: 127 | return img 128 | 129 | order = np.random.choice(len(self.transforms), size=num_to_pick, replace=False) 130 | for i in order: 131 | img = self.transforms[i](img) 132 | return img -------------------------------------------------------------------------------- /lib/fpn/anchor_targets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates anchor targets to train the detector. Does this during the collate step in training 3 | as it's much cheaper to do this on a separate thread. 4 | 5 | Heavily adapted from faster_rcnn/rpn_msr/anchor_target_layer.py. 6 | """ 7 | import numpy as np 8 | import numpy.random as npr 9 | 10 | from config import IM_SCALE, RPN_NEGATIVE_OVERLAP, RPN_POSITIVE_OVERLAP, \ 11 | RPN_BATCHSIZE, RPN_FG_FRACTION, ANCHOR_SIZE, ANCHOR_SCALES, ANCHOR_RATIOS 12 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps 13 | from lib.fpn.generate_anchors import generate_anchors 14 | 15 | 16 | def anchor_target_layer(gt_boxes, im_size, 17 | allowed_border=0): 18 | """ 19 | Assign anchors to ground-truth targets. Produces anchor classification 20 | labels and bounding-box regression targets. 21 | 22 | for each (H, W) location i 23 | generate 3 anchor boxes centered on cell i 24 | filter out-of-image anchors 25 | measure GT overlap 26 | 27 | :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as 28 | the image (IM_SCALE) 29 | :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE 30 | """ 31 | if max(im_size) != IM_SCALE: 32 | raise ValueError("im size is {}".format(im_size)) 33 | h, w = im_size 34 | 35 | # Get the indices of the anchors in the feature map. 36 | # h, w, A, 4 37 | ans_np = generate_anchors(base_size=ANCHOR_SIZE, 38 | feat_stride=16, 39 | anchor_scales=ANCHOR_SCALES, 40 | anchor_ratios=ANCHOR_RATIOS, 41 | ) 42 | ans_np_flat = ans_np.reshape((-1, 4)) 43 | inds_inside = np.where( 44 | (ans_np_flat[:, 0] >= -allowed_border) & 45 | (ans_np_flat[:, 1] >= -allowed_border) & 46 | (ans_np_flat[:, 2] < w + allowed_border) & # width 47 | (ans_np_flat[:, 3] < h + allowed_border) # height 48 | )[0] 49 | good_ans_flat = ans_np_flat[inds_inside] 50 | if good_ans_flat.size == 0: 51 | raise ValueError("There were no good anchors for an image of size {} with boxes {}".format(im_size, gt_boxes)) 52 | 53 | # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes] 54 | overlaps = bbox_overlaps(good_ans_flat, gt_boxes) 55 | anchor_to_gtbox = overlaps.argmax(axis=1) 56 | max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox] 57 | gtbox_to_anchor = overlaps.argmax(axis=0) 58 | gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])] 59 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 60 | 61 | # Good anchors are those that match SOMEWHERE within a decent tolerance 62 | # label: 1 is positive, 0 is negative, -1 is dont care. 63 | # assign bg labels first so that positive labels can clobber them 64 | labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64) 65 | labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 66 | labels[gt_argmax_overlaps] = 1 67 | labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1 68 | 69 | # subsample positive labels if we have too many 70 | num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) 71 | fg_inds = np.where(labels == 1)[0] 72 | if len(fg_inds) > num_fg: 73 | labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1 74 | 75 | # subsample negative labels if we have too many 76 | num_bg = RPN_BATCHSIZE - np.sum(labels == 1) 77 | bg_inds = np.where(labels == 0)[0] 78 | if len(bg_inds) > num_bg: 79 | labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1 80 | # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0])) 81 | 82 | 83 | # Get the labels at the original size 84 | labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64) 85 | labels_unmap[inds_inside] = labels 86 | 87 | # h, w, A 88 | labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1]) 89 | anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0)) 90 | 91 | # These ought to be in the same order 92 | anchor_inds_flat = np.where(labels >= 0)[0] 93 | anchors = good_ans_flat[anchor_inds_flat] 94 | bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]] 95 | labels = labels[anchor_inds_flat] 96 | 97 | assert np.all(labels >= 0) 98 | 99 | 100 | # Anchors: [num_used, 4] 101 | # Anchor_inds: [num_used, 3] (h, w, A) 102 | # bbox_targets: [num_used, 4] 103 | # labels: [num_used] 104 | 105 | return anchors, anchor_inds, bbox_targets, labels 106 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/proposal_assignments_det.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import numpy.random as npr 4 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION, ROIS_PER_IMG 5 | from lib.fpn.box_utils import bbox_overlaps 6 | from lib.pytorch_misc import to_variable 7 | import torch 8 | 9 | ############################################################# 10 | # The following is only for object detection 11 | @to_variable 12 | def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5): 13 | """ 14 | Assign object detection proposals to ground-truth targets. Produces proposal 15 | classification labels and bounding-box regression targets. 16 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 17 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1 18 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 20 | :return: 21 | rois: [num_rois, 5] 22 | labels: [num_rois] array of labels 23 | bbox_targets [num_rois, 4] array of targets for the labels. 24 | """ 25 | fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION)) 26 | 27 | gt_img_inds = gt_classes[:, 0] - image_offset 28 | 29 | all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0) 30 | 31 | ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0) 32 | 33 | im_sorted, idx = torch.sort(ims_per_box, 0) 34 | all_boxes = all_boxes[idx] 35 | 36 | # Assume that the GT boxes are already sorted in terms of image id 37 | num_images = int(im_sorted[-1]) + 1 38 | 39 | labels = [] 40 | rois = [] 41 | bbox_targets = [] 42 | for im_ind in range(num_images): 43 | g_inds = (gt_img_inds == im_ind).nonzero() 44 | 45 | if g_inds.dim() == 0: 46 | continue 47 | g_inds = g_inds.squeeze(1) 48 | g_start = g_inds[0] 49 | g_end = g_inds[-1] + 1 50 | 51 | t_inds = (im_sorted == im_ind).nonzero().squeeze(1) 52 | t_start = t_inds[0] 53 | t_end = t_inds[-1] + 1 54 | 55 | # Max overlaps: for each predicted box, get the max ROI 56 | # Get the indices into the GT boxes too (must offset by the box start) 57 | ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end]) 58 | max_overlaps, gt_assignment = ious.max(1) 59 | max_overlaps = max_overlaps.cpu().numpy() 60 | # print("Best overlap is {}".format(max_overlaps.max())) 61 | # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start)) 62 | gt_assignment += g_start 63 | 64 | keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image, 65 | ROIS_PER_IMG) 66 | 67 | if keep_inds_np.size == 0: 68 | continue 69 | 70 | keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device()) 71 | 72 | labels_ = gt_classes[:, 1][gt_assignment[keep_inds]] 73 | bbox_target_ = gt_boxes[gt_assignment[keep_inds]] 74 | 75 | # Clamp labels_ for the background RoIs to 0 76 | if num_fg < labels_.size(0): 77 | labels_[num_fg:] = 0 78 | 79 | rois_ = torch.cat(( 80 | im_sorted[t_start:t_end, None][keep_inds].float(), 81 | all_boxes[t_start:t_end][keep_inds], 82 | ), 1) 83 | 84 | labels.append(labels_) 85 | rois.append(rois_) 86 | bbox_targets.append(bbox_target_) 87 | 88 | rois = torch.cat(rois, 0) 89 | labels = torch.cat(labels, 0) 90 | bbox_targets = torch.cat(bbox_targets, 0) 91 | return rois, labels, bbox_targets 92 | 93 | 94 | def _sel_inds(max_overlaps, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256): 95 | # Select foreground RoIs as those with >= FG_THRESH overlap 96 | fg_inds = np.where(max_overlaps >= fg_thresh)[0] 97 | 98 | # Guard against the case when an image has fewer than fg_rois_per_image 99 | # foreground RoIs 100 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.shape[0]) 101 | # Sample foreground regions without replacement 102 | if fg_inds.size > 0: 103 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 104 | 105 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 106 | bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] 107 | 108 | # Compute number of background RoIs to take from this image (guarding 109 | # against there being fewer than desired) 110 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 111 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 112 | # Sample background regions without replacement 113 | if bg_inds.size > 0: 114 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 115 | 116 | return np.append(fg_inds, bg_inds), fg_rois_per_this_image 117 | 118 | -------------------------------------------------------------------------------- /lib/fpn/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 4 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 0]; 49 | block_boxes[threadIdx.x * 4 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 1]; 51 | block_boxes[threadIdx.x * 4 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 2]; 53 | block_boxes[threadIdx.x * 4 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 3]; 55 | } 56 | __syncthreads(); 57 | 58 | if (threadIdx.x < row_size) { 59 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 60 | const float *cur_box = dev_boxes + cur_box_idx * 4; 61 | int i = 0; 62 | unsigned long long t = 0; 63 | int start = 0; 64 | if (row_start == col_start) { 65 | start = threadIdx.x + 1; 66 | } 67 | for (i = start; i < col_size; i++) { 68 | if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) { 69 | t |= 1ULL << i; 70 | } 71 | } 72 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 73 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 74 | } 75 | } 76 | 77 | void _set_device(int device_id) { 78 | int current_device; 79 | CUDA_CHECK(cudaGetDevice(¤t_device)); 80 | if (current_device == device_id) { 81 | return; 82 | } 83 | // The call to cudaSetDevice must come before any calls to Get, which 84 | // may perform initialization using the GPU. 85 | CUDA_CHECK(cudaSetDevice(device_id)); 86 | } 87 | 88 | extern "C" int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num, 89 | float nms_overlap_thresh, int device_id) { 90 | _set_device(device_id); 91 | 92 | unsigned long long* mask_dev = NULL; 93 | 94 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 95 | 96 | CUDA_CHECK(cudaMalloc(&mask_dev, 97 | boxes_num * col_blocks * sizeof(unsigned long long))); 98 | 99 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 100 | DIVUP(boxes_num, threadsPerBlock)); 101 | dim3 threads(threadsPerBlock); 102 | nms_kernel<<>>(boxes_num, 103 | nms_overlap_thresh, 104 | boxes_dev, 105 | mask_dev); 106 | 107 | std::vector mask_host(boxes_num * col_blocks); 108 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 109 | mask_dev, 110 | sizeof(unsigned long long) * boxes_num * col_blocks, 111 | cudaMemcpyDeviceToHost)); 112 | 113 | std::vector remv(col_blocks); 114 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 115 | 116 | int num_to_keep = 0; 117 | for (int i = 0; i < boxes_num; i++) { 118 | int nblock = i / threadsPerBlock; 119 | int inblock = i % threadsPerBlock; 120 | 121 | if (!(remv[nblock] & (1ULL << inblock))) { 122 | keep_out[num_to_keep++] = i; 123 | unsigned long long *p = &mask_host[0] + i * col_blocks; 124 | for (int j = nblock; j < col_blocks; j++) { 125 | remv[j] |= p[j]; 126 | } 127 | } 128 | } 129 | 130 | CUDA_CHECK(cudaFree(mask_dev)); 131 | return num_to_keep; 132 | } 133 | -------------------------------------------------------------------------------- /lib/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | from torchvision.models.resnet import model_urls, conv3x3, BasicBlock 5 | from torchvision.models.vgg import vgg16 6 | from config import BATCHNORM_MOMENTUM 7 | 8 | class Bottleneck(nn.Module): 9 | expansion = 4 10 | 11 | def __init__(self, inplanes, planes, stride=1, downsample=None, relu_end=True): 12 | super(Bottleneck, self).__init__() 13 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM) 15 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM) 18 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 19 | self.bn3 = nn.BatchNorm2d(planes * 4, momentum=BATCHNORM_MOMENTUM) 20 | self.relu = nn.ReLU(inplace=True) 21 | self.downsample = downsample 22 | self.stride = stride 23 | self.relu_end = relu_end 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu(out) 31 | 32 | out = self.conv2(out) 33 | out = self.bn2(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv3(out) 37 | out = self.bn3(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | 44 | if self.relu_end: 45 | out = self.relu(out) 46 | return out 47 | 48 | 49 | class ResNet(nn.Module): 50 | 51 | def __init__(self, block, layers, num_classes=1000): 52 | self.inplanes = 64 53 | super(ResNet, self).__init__() 54 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 55 | bias=False) 56 | self.bn1 = nn.BatchNorm2d(64, momentum=BATCHNORM_MOMENTUM) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 59 | self.layer1 = self._make_layer(block, 64, layers[0]) 60 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 61 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 62 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1) # HACK 63 | self.avgpool = nn.AvgPool2d(7) 64 | self.fc = nn.Linear(512 * block.expansion, num_classes) 65 | 66 | for m in self.modules(): 67 | if isinstance(m, nn.Conv2d): 68 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 69 | m.weight.data.normal_(0, math.sqrt(2. / n)) 70 | elif isinstance(m, nn.BatchNorm2d): 71 | m.weight.data.fill_(1) 72 | m.bias.data.zero_() 73 | 74 | def _make_layer(self, block, planes, blocks, stride=1): 75 | downsample = None 76 | if stride != 1 or self.inplanes != planes * block.expansion: 77 | downsample = nn.Sequential( 78 | nn.Conv2d(self.inplanes, planes * block.expansion, 79 | kernel_size=1, stride=stride, bias=False), 80 | nn.BatchNorm2d(planes * block.expansion, momentum=BATCHNORM_MOMENTUM), 81 | ) 82 | 83 | layers = [] 84 | layers.append(block(self.inplanes, planes, stride, downsample)) 85 | self.inplanes = planes * block.expansion 86 | for i in range(1, blocks): 87 | layers.append(block(self.inplanes, planes)) 88 | 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): 92 | x = self.conv1(x) 93 | x = self.bn1(x) 94 | x = self.relu(x) 95 | x = self.maxpool(x) 96 | 97 | x = self.layer1(x) 98 | x = self.layer2(x) 99 | x = self.layer3(x) 100 | x = self.layer4(x) 101 | 102 | x = self.avgpool(x) 103 | x = x.view(x.size(0), -1) 104 | x = self.fc(x) 105 | 106 | return x 107 | 108 | def resnet101(pretrained=False, **kwargs): 109 | """Constructs a ResNet-101 model. 110 | 111 | Args: 112 | pretrained (bool): If True, returns a model pre-trained on ImageNet 113 | """ 114 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 115 | if pretrained: 116 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 117 | return model 118 | 119 | def resnet_l123(): 120 | model = resnet101(pretrained=True) 121 | del model.layer4 122 | del model.avgpool 123 | del model.fc 124 | return model 125 | 126 | def resnet_l4(relu_end=True): 127 | model = resnet101(pretrained=True) 128 | l4 = model.layer4 129 | if not relu_end: 130 | l4[-1].relu_end = False 131 | l4[0].conv2.stride = (1, 1) 132 | l4[0].downsample[0].stride = (1, 1) 133 | return l4 134 | 135 | def vgg_fc(relu_end=True, linear_end=True): 136 | model = vgg16(pretrained=True) 137 | vfc = model.classifier 138 | del vfc._modules['6'] # Get rid of linear layer 139 | del vfc._modules['5'] # Get rid of linear layer 140 | if not relu_end: 141 | del vfc._modules['4'] # Get rid of linear layer 142 | if not linear_end: 143 | del vfc._modules['3'] 144 | return vfc 145 | 146 | 147 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/proposal_assignments_postnms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from .proposal_assignments_rel import _sel_rels 9 | from lib.fpn.box_utils import bbox_overlaps 10 | from lib.pytorch_misc import to_variable 11 | import torch 12 | 13 | 14 | @to_variable 15 | def proposal_assignments_postnms( 16 | rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5, 17 | max_objs=100, max_rels=100, rand_val=0.01): 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 22 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 23 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 24 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 25 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 26 | :return: 27 | rois: [num_rois, 5] 28 | labels: [num_rois] array of labels 29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 30 | """ 31 | pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64) 32 | pred_boxes_np = rois[:, 1:].cpu().numpy() 33 | nms_inds_np = nms_inds.cpu().numpy() 34 | sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np) 35 | 36 | # split into chosen and suppressed 37 | chosen_inds_np = pred_inds_np[nms_inds_np] 38 | chosen_boxes_np = pred_boxes_np[nms_inds_np] 39 | 40 | suppre_inds_np = pred_inds_np[sup_inds_np] 41 | suppre_boxes_np = pred_boxes_np[sup_inds_np] 42 | 43 | gt_boxes_np = gt_boxes.cpu().numpy() 44 | gt_classes_np = gt_classes.cpu().numpy() 45 | gt_rels_np = gt_rels.cpu().numpy() 46 | 47 | gt_classes_np[:, 0] -= image_offset 48 | gt_rels_np[:, 0] -= image_offset 49 | 50 | num_im = gt_classes_np[:, 0].max()+1 51 | 52 | rois = [] 53 | obj_labels = [] 54 | rel_labels = [] 55 | num_box_seen = 0 56 | 57 | for im_ind in range(num_im): 58 | chosen_ind = np.where(chosen_inds_np == im_ind)[0] 59 | suppre_ind = np.where(suppre_inds_np == im_ind)[0] 60 | 61 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 62 | gt_boxes_i = gt_boxes_np[gt_ind] 63 | gt_classes_i = gt_classes_np[gt_ind, 1] 64 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 65 | 66 | # Get IOUs between chosen and GT boxes and if needed we'll add more in 67 | 68 | chosen_boxes_i = chosen_boxes_np[chosen_ind] 69 | suppre_boxes_i = suppre_boxes_np[suppre_ind] 70 | 71 | n_chosen = chosen_boxes_i.shape[0] 72 | n_suppre = suppre_boxes_i.shape[0] 73 | n_gt_box = gt_boxes_i.shape[0] 74 | 75 | # add a teensy bit of random noise because some GT boxes might be duplicated, etc. 76 | pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0) 77 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*( 78 | np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5) 79 | 80 | # Let's say that a box can only be assigned ONCE for now because we've already done 81 | # the NMS and stuff. 82 | is_hit = ious > fg_thresh 83 | 84 | obj_assignments_i = is_hit.argmax(1) 85 | obj_assignments_i[~is_hit.any(1)] = -1 86 | 87 | vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True) 88 | obj_assignments_i[np.setdiff1d( 89 | np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1 90 | 91 | extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen 92 | 93 | # Add them in somewhere at random 94 | num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0]) 95 | boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32) 96 | labels_i = np.zeros(num_inds_to_have, dtype=np.int64) 97 | 98 | inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False)) 99 | inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms) 100 | 101 | boxes_i[inds_from_nms] = chosen_boxes_i 102 | labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]] 103 | 104 | boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add] 105 | labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]] 106 | 107 | # Now, we do the relationships. same as for rle 108 | all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i), 109 | boxes_i, 110 | labels_i, 111 | gt_classes_i, 112 | gt_rels_i, 113 | fg_thresh=fg_thresh, 114 | fg_rels_per_image=100) 115 | all_rels_i[:,0:2] += num_box_seen 116 | 117 | rois.append(np.column_stack(( 118 | im_ind * np.ones(boxes_i.shape[0], dtype=np.float32), 119 | boxes_i, 120 | ))) 121 | obj_labels.append(labels_i) 122 | rel_labels.append(np.column_stack(( 123 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 124 | all_rels_i, 125 | ))) 126 | num_box_seen += boxes_i.size 127 | 128 | rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True) 129 | labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True) 130 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(), 131 | async=True) 132 | 133 | return rois, labels, rel_labels 134 | -------------------------------------------------------------------------------- /lib/ggnn.py: -------------------------------------------------------------------------------- 1 | # Well, this file contains modules of GGNN_obj and GGNN_rel 2 | import os, sys 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import numpy as np 7 | 8 | class GGNNObj(nn.Module): 9 | def __init__(self, num_obj_cls=151, time_step_num=3, hidden_dim=512, output_dim=512, use_knowledge=True, prior_matrix=''): 10 | super(GGNNObj, self).__init__() 11 | self.num_obj_cls = num_obj_cls 12 | self.time_step_num = time_step_num 13 | self.output_dim = output_dim 14 | 15 | if use_knowledge: 16 | matrix_np = np.load(prior_matrix).astype(np.float32) 17 | else: 18 | matrix_np = np.ones((num_obj_cls, num_obj_cls)).astype(np.float32) / num_obj_cls 19 | 20 | self.matrix = Variable(torch.from_numpy(matrix_np), requires_grad=False).cuda() 21 | # if you want to use multi gpu to run this model, then you need to use the following line code to replace the last line code. 22 | # And if you use this line code, the model will save prior matrix as parameters in saved models. 23 | # self.matrix = nn.Parameter(torch.from_numpy(matrix_np), requires_grad=False) 24 | 25 | 26 | 27 | # here we follow the paper "Gated graph sequence neural networks" to implement GGNN, so eq3 means equation 3 in this paper. 28 | self.fc_eq3_w = nn.Linear(2*hidden_dim, hidden_dim) 29 | self.fc_eq3_u = nn.Linear(hidden_dim, hidden_dim) 30 | self.fc_eq4_w = nn.Linear(2*hidden_dim, hidden_dim) 31 | self.fc_eq4_u = nn.Linear(hidden_dim, hidden_dim) 32 | self.fc_eq5_w = nn.Linear(2*hidden_dim, hidden_dim) 33 | self.fc_eq5_u = nn.Linear(hidden_dim, hidden_dim) 34 | 35 | self.fc_output = nn.Linear(2*hidden_dim, output_dim) 36 | self.ReLU = nn.ReLU(True) 37 | self.fc_obj_cls = nn.Linear(self.num_obj_cls * output_dim, self.num_obj_cls) 38 | 39 | 40 | def forward(self, input_ggnn): 41 | # propogation process 42 | num_object = input_ggnn.size()[0] 43 | hidden = input_ggnn.repeat(1, self.num_obj_cls).view(num_object, self.num_obj_cls, -1) 44 | for t in range(self.time_step_num): 45 | # eq(2) 46 | # here we use some matrix operation skills 47 | hidden_sum = torch.sum(hidden, 0) 48 | av = torch.cat([torch.cat([self.matrix.transpose(0, 1) @ (hidden_sum - hidden_i) for hidden_i in hidden], 0), 49 | torch.cat([self.matrix @ (hidden_sum - hidden_i) for hidden_i in hidden], 0)], 1) 50 | 51 | # eq(3) 52 | hidden = hidden.view(num_object*self.num_obj_cls, -1) 53 | zv = torch.sigmoid(self.fc_eq3_w(av) + self.fc_eq3_u(hidden)) 54 | 55 | # eq(4) 56 | rv = torch.sigmoid(self.fc_eq4_w(av) + self.fc_eq3_u(hidden)) 57 | 58 | #eq(5) 59 | hv = torch.tanh(self.fc_eq5_w(av) + self.fc_eq5_u(rv * hidden)) 60 | 61 | hidden = (1 - zv) * hidden + zv * hv 62 | hidden = hidden.view(num_object, self.num_obj_cls, -1) 63 | 64 | 65 | output = torch.cat((hidden.view(num_object*self.num_obj_cls, -1), 66 | input_ggnn.repeat(1, self.num_obj_cls).view(num_object*self.num_obj_cls, -1)), 1) 67 | output = self.fc_output(output) 68 | output = self.ReLU(output) 69 | obj_dists = self.fc_obj_cls(output.view(-1, self.num_obj_cls * self.output_dim)) 70 | return obj_dists 71 | 72 | 73 | 74 | class GGNNRel(nn.Module): 75 | def __init__(self, num_rel_cls=51, time_step_num=3, hidden_dim=512, output_dim=512, use_knowledge=True, prior_matrix=''): 76 | super(GGNNRel, self).__init__() 77 | self.num_rel_cls = num_rel_cls 78 | self.time_step_num = time_step_num 79 | self.matrix = np.load(prior_matrix).astype(np.float32) 80 | self.use_knowledge = use_knowledge 81 | 82 | self.fc_eq3_w = nn.Linear(2 * hidden_dim, hidden_dim) 83 | self.fc_eq3_u = nn.Linear(hidden_dim, hidden_dim) 84 | self.fc_eq4_w = nn.Linear(2 * hidden_dim, hidden_dim) 85 | self.fc_eq4_u = nn.Linear(hidden_dim, hidden_dim) 86 | self.fc_eq5_w = nn.Linear(2 * hidden_dim, hidden_dim) 87 | self.fc_eq5_u = nn.Linear(hidden_dim, hidden_dim) 88 | 89 | self.fc_output = nn.Linear(2 * hidden_dim, output_dim) 90 | self.ReLU = nn.ReLU(True) 91 | self.fc_rel_cls = nn.Linear((self.num_rel_cls + 2) * output_dim, self.num_rel_cls) 92 | 93 | def forward(self, rel_inds, sub_obj_preds, input_ggnn): 94 | (input_rel_num, node_num, _) = input_ggnn.size() 95 | assert input_rel_num == len(rel_inds) 96 | batch_in_matrix_sub = np.zeros((input_rel_num, 2, self.num_rel_cls), dtype=np.float32) 97 | 98 | if self.use_knowledge: # construct adjacency matrix depending on the predicted labels of subject and object. 99 | for index, rel in enumerate(rel_inds): 100 | batch_in_matrix_sub[index][0] = \ 101 | self.matrix[sub_obj_preds[index, 0].cpu().data, sub_obj_preds[index, 1].cpu().data] 102 | batch_in_matrix_sub[index][1] = batch_in_matrix_sub[index][0] 103 | else: 104 | for index, rel in enumerate(rel_inds): 105 | batch_in_matrix_sub[index][0] = 1.0 / float(self.num_rel_cls) 106 | batch_in_matrix_sub[index][1] = batch_in_matrix_sub[index][0] 107 | batch_in_matrix_sub_gpu = Variable(torch.from_numpy(batch_in_matrix_sub), requires_grad=False).cuda() 108 | del batch_in_matrix_sub 109 | 110 | hidden = input_ggnn 111 | for t in range(self.time_step_num): 112 | # eq(2) 113 | # becase in this case, A^(out) == A^(in), so we use function "repeat" 114 | # What is A^(out) and A^(in)? Please refer to paper "Gated graph sequence neural networks" 115 | av = torch.cat((torch.bmm(batch_in_matrix_sub_gpu, hidden[:, 2:]), 116 | torch.bmm(batch_in_matrix_sub_gpu.transpose(1, 2), hidden[:, :2])), 1).repeat(1, 1, 2) 117 | av = av.view(input_rel_num * node_num, -1) 118 | flatten_hidden = hidden.view(input_rel_num * node_num, -1) 119 | # eq(3) 120 | zv = torch.sigmoid(self.fc_eq3_w(av) + self.fc_eq3_u(flatten_hidden)) 121 | # eq(4) 122 | rv = torch.sigmoid(self.fc_eq4_w(av) + self.fc_eq3_u(flatten_hidden)) 123 | #eq(5) 124 | hv = torch.tanh(self.fc_eq5_w(av) + self.fc_eq5_u(rv * flatten_hidden)) 125 | flatten_hidden = (1 - zv) * flatten_hidden + zv * hv 126 | hidden = flatten_hidden.view(input_rel_num, node_num, -1) 127 | 128 | output = torch.cat((flatten_hidden, input_ggnn.view(input_rel_num * node_num, -1)), 1) 129 | output = self.fc_output(output) 130 | output = self.ReLU(output) 131 | 132 | rel_dists = self.fc_rel_cls(output.view(input_rel_num, -1)) 133 | return rel_dists 134 | 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graph Bridging Network (GB-Net) 2 | Code for the ECCV 2020 paper: [Bridging Knowledge Graphs to Generate Scene Graphs](https://arxiv.org/pdf/2001.02314.pdf) 3 | ``` 4 | @InProceedings{Zareian_2020_ECCV, 5 | author = {Zareian, Alireza and Karaman, Svebor and Chang, Shih-Fu}, 6 | title = {Bridging Knowledge Graphs to Generate Scene Graphs}, 7 | booktitle = {Proceedings of the European conference on computer vision (ECCV)}, 8 | month = {August}, 9 | year = {2020} 10 | } 11 | ``` 12 | 13 | Instructions to reproduce all numbers in table 1 and table 2 of our paper: 14 | 15 | First, download and unpack Visual Genome images: [part 2](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) and [part 2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip) 16 | 17 | Extract these two zip files and put the images in the same folder. 18 | 19 | Then download VG metadata preprocessed by \[37\]: [annotations](http://svl.stanford.edu/projects/scene-graph/dataset/VG-SGG.h5), [class info](http://svl.stanford.edu/projects/scene-graph/dataset/VG-SGG-dicts.json),and [image metadata](http://svl.stanford.edu/projects/scene-graph/VG/image_data.json) 20 | 21 | Copy those three files in a single folder 22 | 23 | Then update `config.py` to with a path to the aforementioned data, as well as the absolute path to this directory. 24 | 25 | Now download the pretrained faster r-cnn checkpoint trained by [42] from https://www.dropbox.com/s/cfyqhskypu7tp0q/vg-24.tar?dl=0 and place in `checkpoints/vgdet` 26 | 27 | The next step is to configure a python environment and install pytorch. To do that, first make sure CUDA 9 is installed, and then download https://download.pytorch.org/whl/cu90/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl and pip install the downloaded `whl` file. Then install the rest of required packages by running `pip install -r requirements.txt`. This includes jupyter, as you need it to run the notebooks. 28 | 29 | Finally, run the following to produce numbers for each table (In some cases order matters): 30 | ``` 31 | Table 1, Column 8, Rows 17-24: train: ipynb/train_predcls/0045.ipynb, evaluate: ipynb/eval_predcls/0011.ipynb 32 | Table 1, Column 8, Rows 9-16: train: ipynb/train_sgcls/0051.ipynb, evaluate: ipynb/eval_sgcls/0015.ipynb 33 | Table 1, Column 8, Rows 1-8: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_sgdet/0027.ipynb 34 | 35 | Table 1, Column 9, Rows 17-24: train: ipynb/train_predcls/0135.ipynb, evaluate: ipynb/eval_predcls/0025.ipynb 36 | Table 1, Column 9, Rows 9-16: train: ipynb/train_sgcls/0145.ipynb, evaluate: ipynb/eval_sgcls/0039.ipynb 37 | Table 1, Column 9, Rows 1-8: train: ipynb/train_predcls/0135.ipynb, evaluate: ipynb/eval_sgdet/0035.ipynb 38 | 39 | Table 2, Row 1, Columns 6-9: train: ipynb/train_predcls/0140.ipynb, evaluate: ipynb/eval_predcls/0030.ipynb 40 | Table 2, Row 1, Columns 2-5: train: ipynb/train_predcls/0140.ipynb, evaluate: ipynb/eval_sgdet/0028.ipynb 41 | 42 | Table 2, Row 2, Columns 6-9: train: ipynb/train_predcls/0134.ipynb, evaluate: ipynb/eval_predcls/0024.ipynb 43 | Table 2, Row 2, Columns 2-5: train: ipynb/train_predcls/0134.ipynb, evaluate: ipynb/eval_sgdet/0034.ipynb 44 | 45 | Table 2, Row 3, Columns 6-9: train: ipynb/train_predcls/0136.ipynb, evaluate: ipynb/eval_predcls/0026.ipynb 46 | Table 2, Row 3, Columns 2-5: train: ipynb/train_predcls/0136.ipynb, evaluate: ipynb/eval_sgdet/0036.ipynb 47 | 48 | Table 2, Row 4, Columns 6-9: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_predcls/0022.ipynb 49 | Table 2, Row 4, Columns 2-5: train: ipynb/train_predcls/0132.ipynb, evaluate: ipynb/eval_sgdet/0027.ipynb 50 | ``` 51 | 52 | Moreover, SGCls results for table 2, which is missing from the paper due to space constraint, can be produced by: 53 | ``` 54 | Row 1: train: ipynb/train_predcls/0150.ipynb, evaluate: ipynb/eval_predcls/0041.ipynb 55 | Row 2: train: ipynb/train_predcls/0144.ipynb, evaluate: ipynb/eval_predcls/0038.ipynb 56 | Row 3: train: ipynb/train_predcls/0146.ipynb, evaluate: ipynb/eval_predcls/0040.ipynb 57 | Row 4: train: ipynb/train_predcls/0142.ipynb, evaluate: ipynb/eval_predcls/0037.ipynb 58 | ``` 59 | 60 | To skip training, you may download all our pretrained checkpoints from [here](https://www.dropbox.com/sh/r62mzgsg1f81776/AAAQKzPD8qJrBYeYzNHJ0p5Xa?dl=0) and place in the `checkpoints/` folder. Then you only need to run notebooks in `ipynb/eval_...` 61 | 62 | If GPU is not available, to skip deploying the model altogether, you may download our pre-computed model outputs from [here](https://www.dropbox.com/sh/rbnkcnfh0bmw08m/AACVBegZ14YGG9XwcsmJFxFua?dl=0) and place in the `caches/` folder. Then if you run any notebook in `ipynb/eval_...`, it automatically uses the cached results and does not deploy the model. Note that there is no need to run the cell that creates the model (`detector = ...`) as well as the next one that transfers it to cuda (`detector.cuda()`) and the next one that loads the checkpoint (`ckpt = ...`). Only run the rest of the cells. 63 | 64 | Finally, to avoid running the code, you may just open the notebooks in `ipynb/eval_...` and scroll down to see the evaluation results. 65 | 66 | Note if you get cuda-related errors, it might be due to the cuda compatibility options that were used to compile this library. In that case, you need to change the compatibility in `lib/fpn/nms/src/cuda/Makefile` and `lib/fpn/roi_align/src/cuda/Makefile` and rebuild both by running make clean and then make in both directories. 67 | Also note that pytorch 0.3.0 only has pre-built binaries for up to cuda 9. In order to run this with cuda 10 and newer GPUs, you need to build pytorch from source. 68 | 69 | Acknowledgement: This repository is based on our references [\[1\]](https://github.com/yuweihao/KERN) and [\[42\]](https://github.com/rowanz/neural-motifs) 70 | 71 | [1] Chen, Tianshui, et al. "Knowledge-Embedded Routing Network for Scene Graph Generation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019. 72 | 73 | [37] Xu, Danfei, et al. "Scene graph generation by iterative message passing." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017. 74 | 75 | [42] Zellers, Rowan, et al. "Neural motifs: Scene graph parsing with global context." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018. 76 | 77 | Created and maintained by [Alireza Zareian](https://www.linkedin.com/in/az2407/) at [DVMM](http://www.ee.columbia.edu/ln/dvmm/) - Columbia University. 78 | -------------------------------------------------------------------------------- /lib/my_ggnn_17.py: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # From my_ggnn_16: no knowledge 3 | ################################################################## 4 | 5 | import os, sys 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | import numpy as np 11 | import pickle 12 | from lib.my_util import MLP 13 | 14 | def wrap(nparr): 15 | return Variable(torch.from_numpy(nparr).float().cuda(), requires_grad=False) 16 | 17 | def arange(num): 18 | return torch.arange(num).type(torch.LongTensor).cuda() 19 | 20 | def normalize(tensor, dim, eps=1e-4): 21 | return tensor / torch.sqrt(torch.max((tensor**2).sum(dim=dim, keepdim=True), wrap(np.asarray([eps])))) 22 | 23 | class GGNN(nn.Module): 24 | def __init__(self, emb_path, graph_path, time_step_num=3, hidden_dim=512, output_dim=512, 25 | use_embedding=True, use_knowledge=True, refine_obj_cls=False, top_k_to_keep=5, normalize_messages=True): 26 | super(GGNN, self).__init__() 27 | self.time_step_num = time_step_num 28 | 29 | self.fc_mp_send_img_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 30 | self.fc_mp_send_img_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 31 | 32 | self.fc_mp_receive_img_ent = MLP([2 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True) 33 | self.fc_mp_receive_img_pred = MLP([2 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True) 34 | 35 | self.fc_eq3_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 36 | self.fc_eq3_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 37 | self.fc_eq4_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 38 | self.fc_eq4_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 39 | self.fc_eq5_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 40 | self.fc_eq5_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 41 | 42 | self.fc_eq3_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 43 | self.fc_eq3_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 44 | self.fc_eq4_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 45 | self.fc_eq4_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 46 | self.fc_eq5_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 47 | self.fc_eq5_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 48 | 49 | self.fc_output_proj_img_pred = MLP([hidden_dim, hidden_dim, 51], act_fn='ReLU', last_act=False) 50 | 51 | self.refine_obj_cls = refine_obj_cls 52 | if self.refine_obj_cls: 53 | self.fc_output_proj_img_ent = MLP([hidden_dim, hidden_dim, 151], act_fn='ReLU', last_act=False) 54 | 55 | self.debug_info = {} 56 | self.top_k_to_keep = top_k_to_keep 57 | self.normalize_messages = normalize_messages 58 | 59 | def forward(self, rel_inds, ent_cls_logits, obj_fmaps, vr): 60 | num_img_ent = ent_cls_logits.size(0) 61 | num_img_pred = rel_inds.size(0) 62 | num_ont_ent = 151 63 | num_ont_pred = 51 64 | 65 | nodes_img_ent = obj_fmaps 66 | nodes_img_pred = vr 67 | 68 | edges_img_pred2subj = wrap(np.zeros((num_img_pred, num_img_ent))) 69 | edges_img_pred2subj[arange(num_img_pred), rel_inds[:, 0]] = 1 70 | edges_img_pred2obj = wrap(np.zeros((num_img_pred, num_img_ent))) 71 | edges_img_pred2obj[arange(num_img_pred), rel_inds[:, 1]] = 1 72 | edges_img_subj2pred = edges_img_pred2subj.t() 73 | edges_img_obj2pred = edges_img_pred2obj.t() 74 | 75 | edges_img_pred2subj = edges_img_pred2subj / torch.max(edges_img_pred2subj.sum(dim=0, keepdim=True), wrap(np.asarray([1.0]))) 76 | edges_img_pred2obj = edges_img_pred2obj / torch.max(edges_img_pred2obj.sum(dim=0, keepdim=True), wrap(np.asarray([1.0]))) 77 | 78 | for t in range(self.time_step_num): 79 | message_send_img_ent = self.fc_mp_send_img_ent(nodes_img_ent) 80 | message_send_img_pred = self.fc_mp_send_img_pred(nodes_img_pred) 81 | 82 | message_incoming_img_ent = torch.stack([ 83 | torch.mm(edges_img_pred2subj.t(), message_send_img_pred), 84 | torch.mm(edges_img_pred2obj.t(), message_send_img_pred), 85 | ], 1) 86 | 87 | message_incoming_img_pred = torch.stack([ 88 | torch.mm(edges_img_subj2pred.t(), message_send_img_ent), 89 | torch.mm(edges_img_obj2pred.t(), message_send_img_ent), 90 | ], 1) 91 | 92 | if self.normalize_messages: 93 | message_incoming_img_ent = normalize(message_incoming_img_ent, 2) 94 | message_incoming_img_pred = normalize(message_incoming_img_pred, 2) 95 | 96 | message_received_img_ent = self.fc_mp_receive_img_ent(message_incoming_img_ent.view(num_img_ent, -1)) 97 | message_received_img_pred = self.fc_mp_receive_img_pred(message_incoming_img_pred.view(num_img_pred, -1)) 98 | 99 | z_img_ent = torch.sigmoid(self.fc_eq3_w_img_ent(message_received_img_ent) + self.fc_eq3_u_img_ent(nodes_img_ent)) 100 | r_img_ent = torch.sigmoid(self.fc_eq4_w_img_ent(message_received_img_ent) + self.fc_eq4_u_img_ent(nodes_img_ent)) 101 | h_img_ent = torch.tanh(self.fc_eq5_w_img_ent(message_received_img_ent) + self.fc_eq5_u_img_ent(r_img_ent * nodes_img_ent)) 102 | nodes_img_ent_new = (1 - z_img_ent) * nodes_img_ent + z_img_ent * h_img_ent 103 | 104 | z_img_pred = torch.sigmoid(self.fc_eq3_w_img_pred(message_received_img_pred) + self.fc_eq3_u_img_pred(nodes_img_pred)) 105 | r_img_pred = torch.sigmoid(self.fc_eq4_w_img_pred(message_received_img_pred) + self.fc_eq4_u_img_pred(nodes_img_pred)) 106 | h_img_pred = torch.tanh(self.fc_eq5_w_img_pred(message_received_img_pred) + self.fc_eq5_u_img_pred(r_img_pred * nodes_img_pred)) 107 | nodes_img_pred_new = (1 - z_img_pred) * nodes_img_pred + z_img_pred * h_img_pred 108 | 109 | nodes_img_ent = nodes_img_ent_new 110 | nodes_img_pred = nodes_img_pred_new 111 | 112 | 113 | pred_cls_logits = self.fc_output_proj_img_pred(nodes_img_pred) 114 | 115 | if self.refine_obj_cls: 116 | ent_cls_logits = self.fc_output_proj_img_ent(nodes_img_ent) 117 | 118 | return pred_cls_logits, ent_cls_logits 119 | 120 | -------------------------------------------------------------------------------- /lib/fpn/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.nn import functional as F 4 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps as bbox_overlaps_np 5 | from lib.fpn.box_intersections_cpu.bbox import bbox_intersections as bbox_intersections_np 6 | 7 | 8 | def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1): 9 | """ 10 | Computes the loss for predicting the GT boxes from prior boxes 11 | :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2) 12 | :param deltas: [num_boxes, 4] (tx, ty, th, tw) 13 | :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2) 14 | :return: 15 | """ 16 | prior_centers = center_size(prior_boxes) #(cx, cy, w, h) 17 | gt_centers = center_size(gt_boxes) #(cx, cy, w, h) 18 | 19 | center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:] 20 | size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:]) 21 | all_targets = torch.cat((center_targets, size_targets), 1) 22 | 23 | loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0)) 24 | 25 | return loss 26 | 27 | 28 | def bbox_preds(boxes, deltas): 29 | """ 30 | Converts "deltas" (predicted by the network) along with prior boxes 31 | into (x1, y1, x2, y2) representation. 32 | :param boxes: Prior boxes, represented as (x1, y1, x2, y2) 33 | :param deltas: Offsets (tx, ty, tw, th) 34 | :param box_strides [num_boxes,] distance apart between boxes. anchor box can't go more than 35 | \pm box_strides/2 from its current position. If None then we'll use the widths 36 | and heights 37 | :return: Transformed boxes 38 | """ 39 | 40 | if boxes.size(0) == 0: 41 | return boxes 42 | prior_centers = center_size(boxes) 43 | 44 | xys = prior_centers[:, :2] + prior_centers[:, 2:] * deltas[:, :2] 45 | 46 | whs = torch.exp(deltas[:, 2:]) * prior_centers[:, 2:] 47 | 48 | return point_form(torch.cat((xys, whs), 1)) 49 | 50 | 51 | def center_size(boxes): 52 | """ Convert prior_boxes to (cx, cy, w, h) 53 | representation for comparison to center-size form ground truth data. 54 | Args: 55 | boxes: (tensor) point_form boxes 56 | Return: 57 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 58 | """ 59 | wh = boxes[:, 2:] - boxes[:, :2] + 1.0 60 | 61 | if isinstance(boxes, np.ndarray): 62 | return np.column_stack((boxes[:, :2] + 0.5 * wh, wh)) 63 | return torch.cat((boxes[:, :2] + 0.5 * wh, wh), 1) 64 | 65 | 66 | def point_form(boxes): 67 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 68 | representation for comparison to point form ground truth data. 69 | Args: 70 | boxes: (tensor) center-size default boxes from priorbox layers. 71 | Return: 72 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 73 | """ 74 | if isinstance(boxes, np.ndarray): 75 | return np.column_stack((boxes[:, :2] - 0.5 * boxes[:, 2:], 76 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0))) 77 | return torch.cat((boxes[:, :2] - 0.5 * boxes[:, 2:], 78 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)), 1) # xmax, ymax 79 | 80 | 81 | ########################################################################### 82 | ### Torch Utils, creds to Max de Groot 83 | ########################################################################### 84 | 85 | def bbox_intersections(box_a, box_b): 86 | """ We resize both tensors to [A,B,2] without new malloc: 87 | [A,2] -> [A,1,2] -> [A,B,2] 88 | [B,2] -> [1,B,2] -> [A,B,2] 89 | Then we compute the area of intersect between box_a and box_b. 90 | Args: 91 | box_a: (tensor) bounding boxes, Shape: [A,4]. 92 | box_b: (tensor) bounding boxes, Shape: [B,4]. 93 | Return: 94 | (tensor) intersection area, Shape: [A,B]. 95 | """ 96 | if isinstance(box_a, np.ndarray): 97 | assert isinstance(box_b, np.ndarray) 98 | return bbox_intersections_np(box_a, box_b) 99 | A = box_a.size(0) 100 | B = box_b.size(0) 101 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 102 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 103 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 104 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 105 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0) 106 | return inter[:, :, 0] * inter[:, :, 1] 107 | 108 | 109 | def bbox_overlaps(box_a, box_b): 110 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 111 | is simply the intersection over union of two boxes. Here we operate on 112 | ground truth boxes and default boxes. 113 | E.g.: 114 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 115 | Args: 116 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 117 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 118 | Return: 119 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 120 | """ 121 | if isinstance(box_a, np.ndarray): 122 | assert isinstance(box_b, np.ndarray) 123 | return bbox_overlaps_np(box_a, box_b) 124 | 125 | inter = bbox_intersections(box_a, box_b) 126 | area_a = ((box_a[:, 2] - box_a[:, 0] + 1.0) * 127 | (box_a[:, 3] - box_a[:, 1] + 1.0)).unsqueeze(1).expand_as(inter) # [A,B] 128 | area_b = ((box_b[:, 2] - box_b[:, 0] + 1.0) * 129 | (box_b[:, 3] - box_b[:, 1] + 1.0)).unsqueeze(0).expand_as(inter) # [A,B] 130 | union = area_a + area_b - inter 131 | return inter / union # [A,B] 132 | 133 | 134 | def nms_overlaps(boxes): 135 | """ get overlaps for each channel""" 136 | assert boxes.dim() == 3 137 | N = boxes.size(0) 138 | nc = boxes.size(1) 139 | max_xy = torch.min(boxes[:, None, :, 2:].expand(N, N, nc, 2), 140 | boxes[None, :, :, 2:].expand(N, N, nc, 2)) 141 | 142 | min_xy = torch.max(boxes[:, None, :, :2].expand(N, N, nc, 2), 143 | boxes[None, :, :, :2].expand(N, N, nc, 2)) 144 | 145 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0) 146 | 147 | # n, n, 151 148 | inters = inter[:,:,:,0]*inter[:,:,:,1] 149 | boxes_flat = boxes.view(-1, 4) 150 | areas_flat = (boxes_flat[:,2]- boxes_flat[:,0]+1.0)*( 151 | boxes_flat[:,3]- boxes_flat[:,1]+1.0) 152 | areas = areas_flat.view(boxes.size(0), boxes.size(1)) 153 | union = -inters + areas[None] + areas[:, None] 154 | return inters / union 155 | 156 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/rel_assignments.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE 9 | from lib.fpn.box_utils import bbox_overlaps 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds 11 | from collections import defaultdict 12 | import torch 13 | 14 | @to_variable 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, 16 | fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): 17 | """ 18 | Assign object detection proposals to ground-truth targets. Produces proposal 19 | classification labels and bounding-box regression targets. 20 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 21 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 22 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 23 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 24 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 25 | :return: 26 | rois: [num_rois, 5] 27 | labels: [num_rois] array of labels 28 | bbox_targets [num_rois, 4] array of targets for the labels. 29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 30 | """ 31 | fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64)) 32 | 33 | pred_inds_np = im_inds.cpu().numpy() 34 | pred_boxes_np = rpn_rois.cpu().numpy() 35 | pred_boxlabels_np = roi_gtlabels.cpu().numpy() 36 | gt_boxes_np = gt_boxes.cpu().numpy() 37 | gt_classes_np = gt_classes.cpu().numpy() 38 | gt_rels_np = gt_rels.cpu().numpy() 39 | 40 | gt_classes_np[:, 0] -= image_offset 41 | gt_rels_np[:, 0] -= image_offset 42 | 43 | num_im = gt_classes_np[:, 0].max()+1 44 | 45 | # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( 46 | # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np 47 | # )) 48 | 49 | rel_labels = [] 50 | num_box_seen = 0 51 | for im_ind in range(num_im): 52 | pred_ind = np.where(pred_inds_np == im_ind)[0] 53 | 54 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 55 | gt_boxes_i = gt_boxes_np[gt_ind] 56 | gt_classes_i = gt_classes_np[gt_ind, 1] 57 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 58 | 59 | # [num_pred, num_gt] 60 | pred_boxes_i = pred_boxes_np[pred_ind] 61 | pred_boxlabels_i = pred_boxlabels_np[pred_ind] 62 | 63 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) 64 | is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh) 65 | 66 | # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box 67 | pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) 68 | if filter_non_overlap: 69 | rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) 70 | rels_intersect = rel_possibilities 71 | else: 72 | rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]), 73 | dtype=np.int64) - np.eye(pred_boxes_i.shape[0], 74 | dtype=np.int64) 75 | rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) 76 | 77 | # ONLY select relations between ground truth because otherwise we get useless data 78 | rel_possibilities[pred_boxlabels_i == 0] = 0 79 | rel_possibilities[:, pred_boxlabels_i == 0] = 0 80 | 81 | # Sample the GT relationships. 82 | fg_rels = [] 83 | p_size = [] 84 | for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): 85 | fg_rels_i = [] 86 | fg_scores_i = [] 87 | 88 | for from_ind in np.where(is_match[:, from_gtind])[0]: 89 | for to_ind in np.where(is_match[:, to_gtind])[0]: 90 | if from_ind != to_ind: 91 | fg_rels_i.append((from_ind, to_ind, rel_id)) 92 | fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) 93 | rel_possibilities[from_ind, to_ind] = 0 94 | if len(fg_rels_i) == 0: 95 | continue 96 | p = np.array(fg_scores_i) 97 | p = p / p.sum() 98 | p_size.append(p.shape[0]) 99 | num_to_add = min(p.shape[0], num_sample_per_gt) 100 | for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): 101 | fg_rels.append(fg_rels_i[rel_to_add]) 102 | 103 | fg_rels = np.array(fg_rels, dtype=np.int64) 104 | if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: 105 | fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] 106 | elif fg_rels.size == 0: 107 | fg_rels = np.zeros((0, 3), dtype=np.int64) 108 | 109 | bg_rels = np.column_stack(np.where(rel_possibilities)) 110 | bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) 111 | 112 | num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0]) 113 | if bg_rels.size > 0: 114 | # Sample 4x as many intersecting relationships as non-intersecting. 115 | # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] 116 | # p = bg_rels_intersect.astype(np.float32) 117 | # p[bg_rels_intersect == 0] = 0.2 118 | # p[bg_rels_intersect == 1] = 0.8 119 | # p /= p.sum() 120 | bg_rels = bg_rels[ 121 | np.random.choice(bg_rels.shape[0], 122 | #p=p, 123 | size=num_bg_rel, replace=False)] 124 | else: 125 | bg_rels = np.zeros((0, 3), dtype=np.int64) 126 | 127 | if fg_rels.size == 0 and bg_rels.size == 0: 128 | # Just put something here 129 | bg_rels = np.array([[0, 0, 0]], dtype=np.int64) 130 | 131 | # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) 132 | all_rels_i = np.concatenate((fg_rels, bg_rels), 0) 133 | all_rels_i[:,0:2] += num_box_seen 134 | 135 | all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))] 136 | 137 | rel_labels.append(np.column_stack(( 138 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 139 | all_rels_i, 140 | ))) 141 | 142 | num_box_seen += pred_boxes_i.shape[0] 143 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), 144 | async=True) 145 | return rel_labels 146 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/.ipynb_checkpoints/rel_assignments-checkpoint.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE 9 | from lib.fpn.box_utils import bbox_overlaps 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds 11 | from collections import defaultdict 12 | import torch 13 | 14 | @to_variable 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, 16 | fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): 17 | """ 18 | Assign object detection proposals to ground-truth targets. Produces proposal 19 | classification labels and bounding-box regression targets. 20 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 21 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 22 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 23 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 24 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 25 | :return: 26 | rois: [num_rois, 5] 27 | labels: [num_rois] array of labels 28 | bbox_targets [num_rois, 4] array of targets for the labels. 29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 30 | """ 31 | fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64)) 32 | 33 | pred_inds_np = im_inds.cpu().numpy() 34 | pred_boxes_np = rpn_rois.cpu().numpy() 35 | pred_boxlabels_np = roi_gtlabels.cpu().numpy() 36 | gt_boxes_np = gt_boxes.cpu().numpy() 37 | gt_classes_np = gt_classes.cpu().numpy() 38 | gt_rels_np = gt_rels.cpu().numpy() 39 | 40 | gt_classes_np[:, 0] -= image_offset 41 | gt_rels_np[:, 0] -= image_offset 42 | 43 | num_im = gt_classes_np[:, 0].max()+1 44 | 45 | # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( 46 | # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np 47 | # )) 48 | 49 | rel_labels = [] 50 | num_box_seen = 0 51 | for im_ind in range(num_im): 52 | pred_ind = np.where(pred_inds_np == im_ind)[0] 53 | 54 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 55 | gt_boxes_i = gt_boxes_np[gt_ind] 56 | gt_classes_i = gt_classes_np[gt_ind, 1] 57 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 58 | 59 | # [num_pred, num_gt] 60 | pred_boxes_i = pred_boxes_np[pred_ind] 61 | pred_boxlabels_i = pred_boxlabels_np[pred_ind] 62 | 63 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) 64 | is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh) 65 | 66 | # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box 67 | pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) 68 | if filter_non_overlap: 69 | rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) 70 | rels_intersect = rel_possibilities 71 | else: 72 | rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]), 73 | dtype=np.int64) - np.eye(pred_boxes_i.shape[0], 74 | dtype=np.int64) 75 | rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) 76 | 77 | # ONLY select relations between ground truth because otherwise we get useless data 78 | rel_possibilities[pred_boxlabels_i == 0] = 0 79 | rel_possibilities[:, pred_boxlabels_i == 0] = 0 80 | 81 | # Sample the GT relationships. 82 | fg_rels = [] 83 | p_size = [] 84 | for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): 85 | fg_rels_i = [] 86 | fg_scores_i = [] 87 | 88 | for from_ind in np.where(is_match[:, from_gtind])[0]: 89 | for to_ind in np.where(is_match[:, to_gtind])[0]: 90 | if from_ind != to_ind: 91 | fg_rels_i.append((from_ind, to_ind, rel_id)) 92 | fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) 93 | rel_possibilities[from_ind, to_ind] = 0 94 | if len(fg_rels_i) == 0: 95 | continue 96 | p = np.array(fg_scores_i) 97 | p = p / p.sum() 98 | p_size.append(p.shape[0]) 99 | num_to_add = min(p.shape[0], num_sample_per_gt) 100 | for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): 101 | fg_rels.append(fg_rels_i[rel_to_add]) 102 | 103 | fg_rels = np.array(fg_rels, dtype=np.int64) 104 | if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: 105 | fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] 106 | elif fg_rels.size == 0: 107 | fg_rels = np.zeros((0, 3), dtype=np.int64) 108 | 109 | bg_rels = np.column_stack(np.where(rel_possibilities)) 110 | bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) 111 | 112 | num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0]) 113 | if bg_rels.size > 0: 114 | # Sample 4x as many intersecting relationships as non-intersecting. 115 | # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] 116 | # p = bg_rels_intersect.astype(np.float32) 117 | # p[bg_rels_intersect == 0] = 0.2 118 | # p[bg_rels_intersect == 1] = 0.8 119 | # p /= p.sum() 120 | bg_rels = bg_rels[ 121 | np.random.choice(bg_rels.shape[0], 122 | #p=p, 123 | size=num_bg_rel, replace=False)] 124 | else: 125 | bg_rels = np.zeros((0, 3), dtype=np.int64) 126 | 127 | if fg_rels.size == 0 and bg_rels.size == 0: 128 | # Just put something here 129 | bg_rels = np.array([[0, 0, 0]], dtype=np.int64) 130 | 131 | # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) 132 | all_rels_i = np.concatenate((fg_rels, bg_rels), 0) 133 | all_rels_i[:,0:2] += num_box_seen 134 | 135 | all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))] 136 | 137 | rel_labels.append(np.column_stack(( 138 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 139 | all_rels_i, 140 | ))) 141 | 142 | num_box_seen += pred_boxes_i.shape[0] 143 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), 144 | async=True) 145 | return rel_labels 146 | -------------------------------------------------------------------------------- /lib/fpn/roi_align/src/cuda/roi_align_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_align_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, 16 | int num_boxes, int batch, int image_height, int image_width, int crop_height, 17 | int crop_width, int depth, float extrapolation_value, float* crops_ptr) { 18 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { 19 | // (n, c, ph, pw) is an element in the aligned output 20 | int idx = out_idx; 21 | const int x = idx % crop_width; 22 | idx /= crop_width; 23 | const int y = idx % crop_height; 24 | idx /= crop_height; 25 | const int d = idx % depth; 26 | const int b = idx / depth; 27 | 28 | const int b_in = int(boxes_ptr[b*5]); 29 | const float x1 = boxes_ptr[b * 5 + 1]; 30 | const float y1 = boxes_ptr[b * 5 + 2]; 31 | const float x2 = boxes_ptr[b * 5 + 3]; 32 | const float y2 = boxes_ptr[b * 5 + 4]; 33 | if (b_in < 0 || b_in >= batch) { 34 | continue; 35 | } 36 | 37 | const float height_scale = 38 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 39 | : 0; 40 | const float width_scale = 41 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 42 | 43 | const float in_y = (crop_height > 1) 44 | ? y1 * (image_height - 1) + y * height_scale 45 | : 0.5 * (y1 + y2) * (image_height - 1); 46 | if (in_y < 0 || in_y > image_height - 1) { 47 | crops_ptr[out_idx] = extrapolation_value; 48 | continue; 49 | } 50 | 51 | const float in_x = (crop_width > 1) 52 | ? x1 * (image_width - 1) + x * width_scale 53 | : 0.5 * (x1 + x2) * (image_width - 1); 54 | if (in_x < 0 || in_x > image_width - 1) { 55 | crops_ptr[out_idx] = extrapolation_value; 56 | continue; 57 | } 58 | 59 | const int top_y_index = floorf(in_y); 60 | const int bottom_y_index = ceilf(in_y); 61 | const float y_lerp = in_y - top_y_index; 62 | 63 | const int left_x_index = floorf(in_x); 64 | const int right_x_index = ceilf(in_x); 65 | const float x_lerp = in_x - left_x_index; 66 | 67 | const float top_left = image_ptr[((b_in*depth + d) * image_height 68 | + top_y_index) * image_width + left_x_index]; 69 | const float top_right = image_ptr[((b_in*depth + d) * image_height 70 | + top_y_index) * image_width + right_x_index]; 71 | const float bottom_left = image_ptr[((b_in*depth + d) * image_height 72 | + bottom_y_index) * image_width + left_x_index]; 73 | const float bottom_right = image_ptr[((b_in*depth + d) * image_height 74 | + bottom_y_index) * image_width + right_x_index]; 75 | 76 | const float top = top_left + (top_right - top_left) * x_lerp; 77 | const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; 78 | crops_ptr[out_idx] = top + (bottom - top) * y_lerp; 79 | } 80 | } 81 | 82 | int ROIAlignForwardLaucher(const float* image_ptr, const float* boxes_ptr, 83 | int num_boxes, int batch, int image_height, int image_width, int crop_height, 84 | int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream) { 85 | 86 | const int kThreadsPerBlock = 1024; 87 | const int output_size = num_boxes * crop_height * crop_width * depth; 88 | cudaError_t err; 89 | 90 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>> 91 | (output_size, image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, 92 | crop_height, crop_width, depth, extrapolation_value, crops_ptr); 93 | 94 | err = cudaGetLastError(); 95 | if(cudaSuccess != err) { 96 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 97 | exit( -1 ); 98 | } 99 | 100 | return 1; 101 | } 102 | 103 | __global__ void ROIAlignBackward( 104 | const int nthreads, const float* grads_ptr, const float* boxes_ptr, 105 | int num_boxes, int batch, int image_height, 106 | int image_width, int crop_height, int crop_width, int depth, 107 | float* grads_image_ptr) { 108 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { 109 | 110 | // out_idx = d + depth * (w + crop_width * (h + crop_height * b)) 111 | int idx = out_idx; 112 | const int x = idx % crop_width; 113 | idx /= crop_width; 114 | const int y = idx % crop_height; 115 | idx /= crop_height; 116 | const int d = idx % depth; 117 | const int b = idx / depth; 118 | 119 | const int b_in = boxes_ptr[b * 5]; 120 | const float x1 = boxes_ptr[b * 5 + 1]; 121 | const float y1 = boxes_ptr[b * 5 + 2]; 122 | const float x2 = boxes_ptr[b * 5 + 3]; 123 | const float y2 = boxes_ptr[b * 5 + 4]; 124 | if (b_in < 0 || b_in >= batch) { 125 | continue; 126 | } 127 | 128 | const float height_scale = 129 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 130 | : 0; 131 | const float width_scale = 132 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 133 | 134 | const float in_y = (crop_height > 1) 135 | ? y1 * (image_height - 1) + y * height_scale 136 | : 0.5 * (y1 + y2) * (image_height - 1); 137 | if (in_y < 0 || in_y > image_height - 1) { 138 | continue; 139 | } 140 | 141 | const float in_x = (crop_width > 1) 142 | ? x1 * (image_width - 1) + x * width_scale 143 | : 0.5 * (x1 + x2) * (image_width - 1); 144 | if (in_x < 0 || in_x > image_width - 1) { 145 | continue; 146 | } 147 | 148 | const int top_y_index = floorf(in_y); 149 | const int bottom_y_index = ceilf(in_y); 150 | const float y_lerp = in_y - top_y_index; 151 | 152 | const int left_x_index = floorf(in_x); 153 | const int right_x_index = ceilf(in_x); 154 | const float x_lerp = in_x - left_x_index; 155 | 156 | const float dtop = (1 - y_lerp) * grads_ptr[out_idx]; 157 | atomicAdd( 158 | grads_image_ptr + ((b_in*depth + d)*image_height + top_y_index) * image_width + left_x_index, 159 | (1 - x_lerp) * dtop); 160 | atomicAdd(grads_image_ptr + 161 | ((b_in * depth + d)*image_height+top_y_index)*image_width + right_x_index, 162 | x_lerp * dtop); 163 | 164 | const float dbottom = y_lerp * grads_ptr[out_idx]; 165 | atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+left_x_index, 166 | (1 - x_lerp) * dbottom); 167 | atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+right_x_index, 168 | x_lerp * dbottom); 169 | } 170 | } 171 | 172 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes, 173 | int batch, int image_height, int image_width, int crop_height, int crop_width, int depth, 174 | float* grads_image_ptr, cudaStream_t stream) { 175 | const int kThreadsPerBlock = 1024; 176 | const int output_size = num_boxes * crop_height * crop_width * depth; 177 | cudaError_t err; 178 | 179 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>> 180 | (output_size, grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, crop_height, 181 | crop_width, depth, grads_image_ptr); 182 | 183 | err = cudaGetLastError(); 184 | if(cudaSuccess != err) { 185 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 186 | exit( -1 ); 187 | } 188 | 189 | return 1; 190 | } 191 | 192 | 193 | #ifdef __cplusplus 194 | } 195 | #endif 196 | 197 | 198 | -------------------------------------------------------------------------------- /lib/evaluation/sg_eval_slow.py: -------------------------------------------------------------------------------- 1 | # JUST TO CHECK THAT IT IS EXACTLY THE SAME.................................. 2 | import numpy as np 3 | from config import MODES 4 | 5 | class BasicSceneGraphEvaluator: 6 | 7 | def __init__(self, mode): 8 | self.result_dict = {} 9 | self.mode = {'sgdet':'sg_det', 'sgcls':'sg_cls', 'predcls':'pred_cls'}[mode] 10 | 11 | self.result_dict = {} 12 | self.result_dict[self.mode + '_recall'] = {20:[], 50:[], 100:[]} 13 | 14 | 15 | @classmethod 16 | def all_modes(cls): 17 | evaluators = {m: cls(mode=m) for m in MODES} 18 | return evaluators 19 | def evaluate_scene_graph_entry(self, gt_entry, pred_entry, iou_thresh=0.5): 20 | 21 | roidb_entry = { 22 | 'max_overlaps': np.ones(gt_entry['gt_classes'].shape[0], dtype=np.int64), 23 | 'boxes': gt_entry['gt_boxes'], 24 | 'gt_relations': gt_entry['gt_relations'], 25 | 'gt_classes': gt_entry['gt_classes'], 26 | } 27 | sg_entry = { 28 | 'boxes': pred_entry['pred_boxes'], 29 | 'relations': pred_entry['pred_rels'], 30 | 'obj_scores': pred_entry['obj_scores'], 31 | 'rel_scores': pred_entry['rel_scores'], 32 | 'pred_classes': pred_entry['pred_classes'], 33 | } 34 | 35 | pred_triplets, triplet_boxes = \ 36 | eval_relation_recall(sg_entry, roidb_entry, 37 | self.result_dict, 38 | self.mode, 39 | iou_thresh=iou_thresh) 40 | return pred_triplets, triplet_boxes 41 | 42 | 43 | def save(self, fn): 44 | np.save(fn, self.result_dict) 45 | 46 | 47 | def print_stats(self): 48 | print('======================' + self.mode + '============================') 49 | for k, v in self.result_dict[self.mode + '_recall'].items(): 50 | print('R@%i: %f' % (k, np.mean(v))) 51 | 52 | def save(self, fn): 53 | np.save(fn, self.result_dict) 54 | 55 | def print_stats(self): 56 | print('======================' + self.mode + '============================') 57 | for k, v in self.result_dict[self.mode + '_recall'].items(): 58 | print('R@%i: %f' % (k, np.mean(v))) 59 | 60 | 61 | def eval_relation_recall(sg_entry, 62 | roidb_entry, 63 | result_dict, 64 | mode, 65 | iou_thresh): 66 | 67 | # gt 68 | gt_inds = np.where(roidb_entry['max_overlaps'] == 1)[0] 69 | gt_boxes = roidb_entry['boxes'][gt_inds].copy().astype(float) 70 | num_gt_boxes = gt_boxes.shape[0] 71 | gt_relations = roidb_entry['gt_relations'].copy() 72 | gt_classes = roidb_entry['gt_classes'].copy() 73 | 74 | num_gt_relations = gt_relations.shape[0] 75 | if num_gt_relations == 0: 76 | return (None, None) 77 | gt_class_scores = np.ones(num_gt_boxes) 78 | gt_predicate_scores = np.ones(num_gt_relations) 79 | gt_triplets, gt_triplet_boxes, _ = _triplet(gt_relations[:,2], 80 | gt_relations[:,:2], 81 | gt_classes, 82 | gt_boxes, 83 | gt_predicate_scores, 84 | gt_class_scores) 85 | 86 | # pred 87 | box_preds = sg_entry['boxes'] 88 | num_boxes = box_preds.shape[0] 89 | relations = sg_entry['relations'] 90 | classes = sg_entry['pred_classes'].copy() 91 | class_scores = sg_entry['obj_scores'].copy() 92 | 93 | num_relations = relations.shape[0] 94 | 95 | if mode =='pred_cls': 96 | # if predicate classification task 97 | # use ground truth bounding boxes 98 | assert(num_boxes == num_gt_boxes) 99 | classes = gt_classes 100 | class_scores = gt_class_scores 101 | boxes = gt_boxes 102 | elif mode =='sg_cls': 103 | assert(num_boxes == num_gt_boxes) 104 | # if scene graph classification task 105 | # use gt boxes, but predicted classes 106 | # classes = np.argmax(class_preds, 1) 107 | # class_scores = class_preds.max(axis=1) 108 | boxes = gt_boxes 109 | elif mode =='sg_det': 110 | # if scene graph detection task 111 | # use preicted boxes and predicted classes 112 | # classes = np.argmax(class_preds, 1) 113 | # class_scores = class_preds.max(axis=1) 114 | boxes = box_preds 115 | else: 116 | raise NotImplementedError('Incorrect Mode! %s' % mode) 117 | 118 | pred_triplets = np.column_stack(( 119 | classes[relations[:, 0]], 120 | relations[:,2], 121 | classes[relations[:, 1]], 122 | )) 123 | pred_triplet_boxes = np.column_stack(( 124 | boxes[relations[:, 0]], 125 | boxes[relations[:, 1]], 126 | )) 127 | relation_scores = np.column_stack(( 128 | class_scores[relations[:, 0]], 129 | sg_entry['rel_scores'], 130 | class_scores[relations[:, 1]], 131 | )).prod(1) 132 | 133 | sorted_inds = np.argsort(relation_scores)[::-1] 134 | # compue recall 135 | for k in result_dict[mode + '_recall']: 136 | this_k = min(k, num_relations) 137 | keep_inds = sorted_inds[:this_k] 138 | recall = _relation_recall(gt_triplets, 139 | pred_triplets[keep_inds,:], 140 | gt_triplet_boxes, 141 | pred_triplet_boxes[keep_inds,:], 142 | iou_thresh) 143 | result_dict[mode + '_recall'][k].append(recall) 144 | 145 | # for visualization 146 | return pred_triplets[sorted_inds, :], pred_triplet_boxes[sorted_inds, :] 147 | 148 | 149 | def _triplet(predicates, relations, classes, boxes, 150 | predicate_scores, class_scores): 151 | 152 | # format predictions into triplets 153 | assert(predicates.shape[0] == relations.shape[0]) 154 | num_relations = relations.shape[0] 155 | triplets = np.zeros([num_relations, 3]).astype(np.int32) 156 | triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32) 157 | triplet_scores = np.zeros([num_relations]).astype(np.float32) 158 | for i in range(num_relations): 159 | triplets[i, 1] = predicates[i] 160 | sub_i, obj_i = relations[i,:2] 161 | triplets[i, 0] = classes[sub_i] 162 | triplets[i, 2] = classes[obj_i] 163 | triplet_boxes[i, :4] = boxes[sub_i, :] 164 | triplet_boxes[i, 4:] = boxes[obj_i, :] 165 | # compute triplet score 166 | score = class_scores[sub_i] 167 | score *= class_scores[obj_i] 168 | score *= predicate_scores[i] 169 | triplet_scores[i] = score 170 | return triplets, triplet_boxes, triplet_scores 171 | 172 | 173 | def _relation_recall(gt_triplets, pred_triplets, 174 | gt_boxes, pred_boxes, iou_thresh): 175 | 176 | # compute the R@K metric for a set of predicted triplets 177 | 178 | num_gt = gt_triplets.shape[0] 179 | num_correct_pred_gt = 0 180 | 181 | for gt, gt_box in zip(gt_triplets, gt_boxes): 182 | keep = np.zeros(pred_triplets.shape[0]).astype(bool) 183 | for i, pred in enumerate(pred_triplets): 184 | if gt[0] == pred[0] and gt[1] == pred[1] and gt[2] == pred[2]: 185 | keep[i] = True 186 | if not np.any(keep): 187 | continue 188 | boxes = pred_boxes[keep,:] 189 | sub_iou = iou(gt_box[:4], boxes[:,:4]) 190 | obj_iou = iou(gt_box[4:], boxes[:,4:]) 191 | inds = np.intersect1d(np.where(sub_iou >= iou_thresh)[0], 192 | np.where(obj_iou >= iou_thresh)[0]) 193 | if inds.size > 0: 194 | num_correct_pred_gt += 1 195 | return float(num_correct_pred_gt) / float(num_gt) 196 | 197 | 198 | def iou(gt_box, pred_boxes): 199 | # computer Intersection-over-Union between two sets of boxes 200 | ixmin = np.maximum(gt_box[0], pred_boxes[:,0]) 201 | iymin = np.maximum(gt_box[1], pred_boxes[:,1]) 202 | ixmax = np.minimum(gt_box[2], pred_boxes[:,2]) 203 | iymax = np.minimum(gt_box[3], pred_boxes[:,3]) 204 | iw = np.maximum(ixmax - ixmin + 1., 0.) 205 | ih = np.maximum(iymax - iymin + 1., 0.) 206 | inters = iw * ih 207 | 208 | # union 209 | uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) + 210 | (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) * 211 | (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters) 212 | 213 | overlaps = inters / uni 214 | return overlaps 215 | -------------------------------------------------------------------------------- /dataloaders/blob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data blob, hopefully to make collating less painful and MGPU training possible 3 | """ 4 | from lib.fpn.anchor_targets import anchor_target_layer 5 | import numpy as np 6 | import torch 7 | from torch.autograd import Variable 8 | 9 | 10 | class Blob(object): 11 | def __init__(self, mode='det', is_train=False, num_gpus=1, primary_gpu=0, batch_size_per_gpu=3): 12 | """ 13 | Initializes an empty Blob object. 14 | :param mode: 'det' for detection and 'rel' for det+relationship 15 | :param is_train: True if it's training 16 | """ 17 | assert mode in ('det', 'rel') 18 | assert num_gpus >= 1 19 | self.mode = mode 20 | self.is_train = is_train 21 | self.num_gpus = num_gpus 22 | self.batch_size_per_gpu = batch_size_per_gpu 23 | self.primary_gpu = primary_gpu 24 | 25 | self.imgs = [] # [num_images, 3, IM_SCALE, IM_SCALE] array 26 | self.im_sizes = [] # [num_images, 4] array of (h, w, scale, num_valid_anchors) 27 | self.all_anchor_inds = [] # [all_anchors, 2] array of (img_ind, anchor_idx). Only has valid 28 | # boxes (meaning some are gonna get cut out) 29 | self.all_anchors = [] # [num_im, IM_SCALE/4, IM_SCALE/4, num_anchors, 4] shapes. Anchors outside get squashed 30 | # to 0 31 | self.gt_boxes = [] # [num_gt, 4] boxes 32 | self.gt_classes = [] # [num_gt,2] array of img_ind, class 33 | self.gt_rels = [] # [num_rels, 3]. Each row is (gtbox0, gtbox1, rel). 34 | 35 | self.gt_sents = [] 36 | self.gt_nodes = [] 37 | self.sent_lengths = [] 38 | 39 | self.train_anchor_labels = [] # [train_anchors, 5] array of (img_ind, h, w, A, labels) 40 | self.train_anchors = [] # [train_anchors, 8] shapes with anchor, target 41 | 42 | self.train_anchor_inds = None # This will be split into GPUs, just (img_ind, h, w, A). 43 | 44 | self.batch_size = None 45 | self.gt_box_chunks = None 46 | self.anchor_chunks = None 47 | self.train_chunks = None 48 | self.proposal_chunks = None 49 | self.proposals = [] 50 | 51 | @property 52 | def is_flickr(self): 53 | return self.mode == 'flickr' 54 | 55 | @property 56 | def is_rel(self): 57 | return self.mode == 'rel' 58 | 59 | @property 60 | def volatile(self): 61 | return not self.is_train 62 | 63 | def append(self, d): 64 | """ 65 | Adds a single image to the blob 66 | :param datom: 67 | :return: 68 | """ 69 | i = len(self.imgs) 70 | self.imgs.append(d['img']) 71 | 72 | h, w, scale = d['img_size'] 73 | 74 | # all anchors 75 | self.im_sizes.append((h, w, scale)) 76 | 77 | gt_boxes_ = d['gt_boxes'].astype(np.float32) * d['scale'] 78 | self.gt_boxes.append(gt_boxes_) 79 | 80 | self.gt_classes.append(np.column_stack(( 81 | i * np.ones(d['gt_classes'].shape[0], dtype=np.int64), 82 | d['gt_classes'], 83 | ))) 84 | 85 | # Add relationship info 86 | if self.is_rel: 87 | self.gt_rels.append(np.column_stack(( 88 | i * np.ones(d['gt_relations'].shape[0], dtype=np.int64), 89 | d['gt_relations']))) 90 | 91 | # Augment with anchor targets 92 | if self.is_train: 93 | train_anchors_, train_anchor_inds_, train_anchor_targets_, train_anchor_labels_ = \ 94 | anchor_target_layer(gt_boxes_, (h, w)) 95 | 96 | self.train_anchors.append(np.hstack((train_anchors_, train_anchor_targets_))) 97 | 98 | self.train_anchor_labels.append(np.column_stack(( 99 | i * np.ones(train_anchor_inds_.shape[0], dtype=np.int64), 100 | train_anchor_inds_, 101 | train_anchor_labels_, 102 | ))) 103 | 104 | if 'proposals' in d: 105 | self.proposals.append(np.column_stack((i * np.ones(d['proposals'].shape[0], dtype=np.float32), 106 | d['scale'] * d['proposals'].astype(np.float32)))) 107 | 108 | 109 | 110 | def _chunkize(self, datom, tensor=torch.LongTensor): 111 | """ 112 | Turn data list into chunks, one per GPU 113 | :param datom: List of lists of numpy arrays that will be concatenated. 114 | :return: 115 | """ 116 | chunk_sizes = [0] * self.num_gpus 117 | for i in range(self.num_gpus): 118 | for j in range(self.batch_size_per_gpu): 119 | chunk_sizes[i] += datom[i * self.batch_size_per_gpu + j].shape[0] 120 | return Variable(tensor(np.concatenate(datom, 0)), volatile=self.volatile), chunk_sizes 121 | 122 | def reduce(self): 123 | """ Merges all the detections into flat lists + numbers of how many are in each""" 124 | if len(self.imgs) != self.batch_size_per_gpu * self.num_gpus: 125 | raise ValueError("Wrong batch size? imgs len {} bsize/gpu {} numgpus {}".format( 126 | len(self.imgs), self.batch_size_per_gpu, self.num_gpus 127 | )) 128 | 129 | self.imgs = Variable(torch.stack(self.imgs, 0), volatile=self.volatile) 130 | self.im_sizes = np.stack(self.im_sizes).reshape( 131 | (self.num_gpus, self.batch_size_per_gpu, 3)) 132 | 133 | if self.is_rel: 134 | self.gt_rels, self.gt_rel_chunks = self._chunkize(self.gt_rels) 135 | 136 | self.gt_boxes, self.gt_box_chunks = self._chunkize(self.gt_boxes, tensor=torch.FloatTensor) 137 | self.gt_classes, _ = self._chunkize(self.gt_classes) 138 | if self.is_train: 139 | self.train_anchor_labels, self.train_chunks = self._chunkize(self.train_anchor_labels) 140 | self.train_anchors, _ = self._chunkize(self.train_anchors, tensor=torch.FloatTensor) 141 | self.train_anchor_inds = self.train_anchor_labels[:, :-1].contiguous() 142 | 143 | if len(self.proposals) != 0: 144 | self.proposals, self.proposal_chunks = self._chunkize(self.proposals, tensor=torch.FloatTensor) 145 | 146 | 147 | 148 | def _scatter(self, x, chunk_sizes, dim=0): 149 | """ Helper function""" 150 | if self.num_gpus == 1: 151 | return x.cuda(self.primary_gpu, async=True) 152 | return torch.nn.parallel.scatter_gather.Scatter.apply( 153 | list(range(self.num_gpus)), chunk_sizes, dim, x) 154 | 155 | def scatter(self): 156 | """ Assigns everything to the GPUs""" 157 | self.imgs = self._scatter(self.imgs, [self.batch_size_per_gpu] * self.num_gpus) 158 | 159 | self.gt_classes_primary = self.gt_classes.cuda(self.primary_gpu, async=True) 160 | self.gt_boxes_primary = self.gt_boxes.cuda(self.primary_gpu, async=True) 161 | 162 | # Predcls might need these 163 | self.gt_classes = self._scatter(self.gt_classes, self.gt_box_chunks) 164 | self.gt_boxes = self._scatter(self.gt_boxes, self.gt_box_chunks) 165 | 166 | if self.is_train: 167 | 168 | self.train_anchor_inds = self._scatter(self.train_anchor_inds, 169 | self.train_chunks) 170 | self.train_anchor_labels = self.train_anchor_labels.cuda(self.primary_gpu, async=True) 171 | self.train_anchors = self.train_anchors.cuda(self.primary_gpu, async=True) 172 | 173 | if self.is_rel: 174 | self.gt_rels = self._scatter(self.gt_rels, self.gt_rel_chunks) 175 | else: 176 | if self.is_rel: 177 | self.gt_rels = self.gt_rels.cuda(self.primary_gpu, async=True) 178 | 179 | if self.proposal_chunks is not None: 180 | self.proposals = self._scatter(self.proposals, self.proposal_chunks) 181 | 182 | def __getitem__(self, index): 183 | """ 184 | Returns a tuple containing data 185 | :param index: Which GPU we're on, or 0 if no GPUs 186 | :return: If training: 187 | (image, im_size, img_start_ind, anchor_inds, anchors, gt_boxes, gt_classes, 188 | train_anchor_inds) 189 | test: 190 | (image, im_size, img_start_ind, anchor_inds, anchors) 191 | """ 192 | if index not in list(range(self.num_gpus)): 193 | raise ValueError("Out of bounds with index {} and {} gpus".format(index, self.num_gpus)) 194 | 195 | if self.is_rel: 196 | rels = self.gt_rels 197 | if index > 0 or self.num_gpus != 1: 198 | rels_i = rels[index] if self.is_rel else None 199 | elif self.is_flickr: 200 | rels = (self.gt_sents, self.gt_nodes) 201 | if index > 0 or self.num_gpus != 1: 202 | rels_i = (self.gt_sents[index], self.gt_nodes[index]) 203 | else: 204 | rels = None 205 | rels_i = None 206 | 207 | if self.proposal_chunks is None: 208 | proposals = None 209 | else: 210 | proposals = self.proposals 211 | 212 | if index == 0 and self.num_gpus == 1: 213 | image_offset = 0 214 | if self.is_train: 215 | return (self.imgs, self.im_sizes[0], image_offset, 216 | self.gt_boxes, self.gt_classes, rels, proposals, self.train_anchor_inds) 217 | return self.imgs, self.im_sizes[0], image_offset, self.gt_boxes, self.gt_classes, rels, proposals 218 | 219 | # Otherwise proposals is None 220 | assert proposals is None 221 | 222 | image_offset = self.batch_size_per_gpu * index 223 | # TODO: Return a namedtuple 224 | if self.is_train: 225 | return ( 226 | self.imgs[index], self.im_sizes[index], image_offset, 227 | self.gt_boxes[index], self.gt_classes[index], rels_i, None, self.train_anchor_inds[index]) 228 | return (self.imgs[index], self.im_sizes[index], image_offset, 229 | self.gt_boxes[index], self.gt_classes[index], rels_i, None) 230 | 231 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration file! 3 | """ 4 | import os 5 | from argparse import ArgumentParser 6 | import numpy as np 7 | 8 | ROOT_PATH = '/path/to/GBNet-Supp' 9 | 10 | def path(fn): 11 | return os.path.join(ROOT_PATH, fn) 12 | 13 | def stanford_path(fn): 14 | return os.path.join('/path/to/stanford/preprocessed/metadata', fn) 15 | 16 | # ============================================================================= 17 | # Update these with where your data is stored ~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | VG_IMAGES = '/path/to/visual/genome/images' 20 | RCNN_CHECKPOINT_FN = path('checkpoints/vgdet/vg-24.tar') 21 | 22 | IM_DATA_FN = stanford_path('image_data.json') 23 | VG_SGG_FN = stanford_path('VG-SGG.h5') 24 | VG_SGG_DICT_FN = stanford_path('VG-SGG-dicts.json') 25 | PROPOSAL_FN = stanford_path('proposals.h5') 26 | 27 | # ============================================================================= 28 | # ============================================================================= 29 | 30 | 31 | MODES = ('sgdet', 'sgcls', 'predcls') 32 | 33 | BOX_SCALE = 1024 # Scale at which we have the boxes 34 | IM_SCALE = 592 # Our images will be resized to this res without padding 35 | 36 | # Proposal assignments 37 | BG_THRESH_HI = 0.5 38 | BG_THRESH_LO = 0.0 39 | 40 | RPN_POSITIVE_OVERLAP = 0.7 41 | # IOU < thresh: negative example 42 | RPN_NEGATIVE_OVERLAP = 0.3 43 | 44 | # Max number of foreground examples 45 | RPN_FG_FRACTION = 0.5 46 | FG_FRACTION = 0.25 47 | # Total number of examples 48 | RPN_BATCHSIZE = 256 49 | ROIS_PER_IMG = 256 50 | REL_FG_FRACTION = 0.25 51 | RELS_PER_IMG = 256 52 | 53 | RELS_PER_IMG_REFINE = 64 54 | 55 | BATCHNORM_MOMENTUM = 0.01 56 | ANCHOR_SIZE = 16 57 | 58 | ANCHOR_RATIOS = (0.23232838, 0.63365731, 1.28478321, 3.15089189) #(0.5, 1, 2) 59 | ANCHOR_SCALES = (2.22152954, 4.12315647, 7.21692515, 12.60263013, 22.7102731) #(4, 8, 16, 32) 60 | 61 | class ModelConfig(object): 62 | """Wrapper class for model hyperparameters.""" 63 | def __init__(self, args_str=None): 64 | """ 65 | Defaults 66 | """ 67 | self.ckpt = None 68 | self.save_dir = None 69 | self.lr = None 70 | self.batch_size = None 71 | self.val_size = None 72 | self.l2 = None 73 | self.adamwd = None 74 | self.clip = None 75 | self.num_gpus = None 76 | self.num_workers = None 77 | self.print_interval = None 78 | self.mode = None 79 | self.test = False 80 | self.adam = False 81 | self.cache = None 82 | self.use_proposals=False 83 | self.use_resnet=False 84 | self.num_epochs=None 85 | self.pooling_dim = None 86 | 87 | self.use_ggnn_obj = False 88 | self.ggnn_obj_time_step_num = None 89 | self.ggnn_obj_hidden_dim = None 90 | self.ggnn_obj_output_dim = None 91 | self.use_obj_knowledge = False 92 | self.obj_knowledge = None 93 | 94 | self.use_ggnn_rel = False 95 | self.ggnn_rel_time_step_num = None 96 | self.ggnn_rel_hidden_dim = None 97 | self.ggnn_rel_output_dim = None 98 | self.use_rel_knowledge = False 99 | self.rel_knowledge = None 100 | 101 | self.tb_log_dir = None 102 | self.save_rel_recall = None 103 | 104 | self.parser = self.setup_parser() 105 | if args_str is None: 106 | self.args = vars(self.parser.parse_args()) 107 | else: 108 | self.args = vars(self.parser.parse_args(args_str.split())) 109 | 110 | print("~~~~~~~~ Hyperparameters used: ~~~~~~~") 111 | for x, y in self.args.items(): 112 | print("{} : {}".format(x, y)) 113 | 114 | self.__dict__.update(self.args) 115 | 116 | if len(self.ckpt) != 0: 117 | self.ckpt = os.path.join(ROOT_PATH, self.ckpt) 118 | else: 119 | self.ckpt = None 120 | 121 | if len(self.cache) != 0: 122 | if len(self.cache.split('/')) > 1: 123 | file_len = len(self.cache.split('/')[-1]) 124 | cache_dir = self.cache[:-file_len] 125 | cache_dir = os.path.join(ROOT_PATH, cache_dir) 126 | if not os.path.exists(cache_dir): 127 | os.mkdir(cache_dir) 128 | self.cache = os.path.join(ROOT_PATH, self.cache) 129 | else: 130 | self.cache = None 131 | 132 | if len(self.save_dir) == 0: 133 | self.save_dir = None 134 | else: 135 | self.save_dir = os.path.join(ROOT_PATH, self.save_dir) 136 | if not os.path.exists(self.save_dir): 137 | os.makedirs(self.save_dir) 138 | 139 | if len(self.tb_log_dir) != 0: 140 | self.tb_log_dir = os.path.join(ROOT_PATH, self.tb_log_dir) 141 | if not os.path.exists(self.tb_log_dir): 142 | os.makedirs(self.tb_log_dir) # help make multi depth directories, such as summaries/kern_predcls 143 | else: 144 | self.tb_log_dir = None 145 | 146 | if len(self.save_rel_recall) != 0: 147 | if len(self.save_rel_recall.split('/')) > 1: 148 | file_len = len(self.save_rel_recall.split('/')[-1]) 149 | save_rel_recall_dir = self.save_rel_recall[:-file_len] 150 | save_rel_recall_dir = os.path.join(ROOT_PATH, save_rel_recall_dir) 151 | if not os.path.exists(save_rel_recall_dir): 152 | os.mkdir(save_rel_recall_dir) 153 | self.save_rel_recall = os.path.join(ROOT_PATH, self.save_rel_recall) 154 | else: 155 | self.save_rel_recall = None 156 | 157 | 158 | assert self.val_size >= 0 159 | 160 | if self.mode not in MODES: 161 | raise ValueError("Invalid mode: mode must be in {}".format(MODES)) 162 | 163 | 164 | if self.ckpt is not None and not os.path.exists(self.ckpt): 165 | raise ValueError("Ckpt file ({}) doesnt exist".format(self.ckpt)) 166 | 167 | def setup_parser(self): 168 | """ 169 | Sets up an argument parser 170 | :return: 171 | """ 172 | parser = ArgumentParser(description='training code') 173 | 174 | 175 | parser.add_argument('-ckpt', dest='ckpt', help='Filename to load from', type=str, default='') 176 | parser.add_argument('-save_dir', dest='save_dir', 177 | help='Directory to save things to, such as checkpoints/save', default='', type=str) 178 | 179 | parser.add_argument('-ngpu', dest='num_gpus', help='cuantos GPUs tienes', type=int, default=1) 180 | parser.add_argument('-nwork', dest='num_workers', help='num processes to use as workers', type=int, default=1) 181 | 182 | parser.add_argument('-lr', dest='lr', help='learning rate', type=float, default=1e-3) 183 | 184 | parser.add_argument('-b', dest='batch_size', help='batch size per GPU',type=int, default=2) 185 | parser.add_argument('-val_size', dest='val_size', help='val size to use (if 0 we wont use val)', type=int, default=5000) 186 | 187 | parser.add_argument('-l2', dest='l2', help='weight decay of SGD', type=float, default=1e-4) 188 | parser.add_argument('-adamwd', dest='adamwd', help='weight decay of adam', type=float, default=0.0) 189 | 190 | parser.add_argument('-clip', dest='clip', help='gradients will be clipped to have norm less than this', type=float, default=5.0) 191 | parser.add_argument('-p', dest='print_interval', help='print during training', type=int, 192 | default=100) 193 | parser.add_argument('-m', dest='mode', help='mode in {sgdet, sgcls, predcls}', type=str, default='sgdet') 194 | 195 | 196 | parser.add_argument('-cache', dest='cache', help='where should we cache predictions', type=str, 197 | default='') 198 | 199 | parser.add_argument('-adam', dest='adam', help='use adam', action='store_true') 200 | parser.add_argument('-test', dest='test', help='test set', action='store_true') 201 | 202 | parser.add_argument('-nepoch', dest='num_epochs', help='Number of epochs to train the model for',type=int, default=50) 203 | parser.add_argument('-resnet', dest='use_resnet', help='use resnet instead of VGG', action='store_true') 204 | parser.add_argument('-proposals', dest='use_proposals', help='Use Xu et als proposals', action='store_true') 205 | parser.add_argument('-pooling_dim', dest='pooling_dim', help='Dimension of pooling', type=int, default=4096) 206 | 207 | 208 | parser.add_argument('-use_ggnn_obj', dest='use_ggnn_obj', help='use GGNN_obj module', action='store_true') 209 | parser.add_argument('-ggnn_obj_time_step_num', dest='ggnn_obj_time_step_num', help='time step number of GGNN_obj', type=int, default=3) 210 | parser.add_argument('-ggnn_obj_hidden_dim', dest='ggnn_obj_hidden_dim', help='node hidden state dimension of GGNN_obj', type=int, default=512) 211 | parser.add_argument('-ggnn_obj_output_dim', dest='ggnn_obj_output_dim', help='node output feature dimension of GGNN_obj', type=int, default=512) 212 | parser.add_argument('-use_obj_knowledge', dest='use_obj_knowledge', help='use object cooccurrence knowledge', action='store_true') 213 | parser.add_argument('-obj_knowledge', dest='obj_knowledge', help='Filename to load matrix of object cooccurrence knowledge', type=str, default='') 214 | 215 | 216 | parser.add_argument('-use_ggnn_rel', dest='use_ggnn_rel', help='use GGNN_rel module', action='store_true') 217 | parser.add_argument('-ggnn_rel_time_step_num', dest='ggnn_rel_time_step_num', help='time step number of GGNN_rel', type=int, default=3) 218 | parser.add_argument('-ggnn_rel_hidden_dim', dest='ggnn_rel_hidden_dim', help='node hidden state dimension of GGNN_rel', type=int, default=512) 219 | parser.add_argument('-ggnn_rel_output_dim', dest='ggnn_rel_output_dim', help='node output feature dimension of GGNN_rel', type=int, default=512) 220 | parser.add_argument('-use_rel_knowledge', dest='use_rel_knowledge', help='use cooccurrence knowledge of object pairs and relationships', action='store_true') 221 | parser.add_argument('-rel_knowledge', dest='rel_knowledge', help='Filename to load matrix of cooccurrence knowledge of object pairs and relationships', type=str, default='') 222 | 223 | 224 | parser.add_argument('-tb_log_dir', dest='tb_log_dir', help='dir to save tensorboard summaries', type=str, default='') 225 | parser.add_argument('-save_rel_recall', dest='save_rel_recall', help='dir to save relationship results', type=str, default='') 226 | 227 | return parser 228 | -------------------------------------------------------------------------------- /lib/fpn/proposal_assignments/proposal_assignments_rel.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION_REL, ROIS_PER_IMG_REL, REL_FG_FRACTION, \ 9 | RELS_PER_IMG 10 | from lib.fpn.box_utils import bbox_overlaps 11 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds 12 | from collections import defaultdict 13 | import torch 14 | 15 | 16 | @to_variable 17 | def proposal_assignments_rel(rpn_rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5): 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 22 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 23 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 24 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 25 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 26 | :return: 27 | rois: [num_rois, 5] 28 | labels: [num_rois] array of labels 29 | bbox_targets [num_rois, 4] array of targets for the labels. 30 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 31 | """ 32 | fg_rois_per_image = int(np.round(ROIS_PER_IMG_REL * FG_FRACTION_REL)) 33 | fg_rels_per_image = int(np.round(REL_FG_FRACTION * RELS_PER_IMG)) 34 | 35 | pred_inds_np = rpn_rois[:, 0].cpu().numpy().astype(np.int64) 36 | pred_boxes_np = rpn_rois[:, 1:].cpu().numpy() 37 | gt_boxes_np = gt_boxes.cpu().numpy() 38 | gt_classes_np = gt_classes.cpu().numpy() 39 | gt_rels_np = gt_rels.cpu().numpy() 40 | 41 | gt_classes_np[:, 0] -= image_offset 42 | gt_rels_np[:, 0] -= image_offset 43 | 44 | num_im = gt_classes_np[:, 0].max()+1 45 | 46 | rois = [] 47 | obj_labels = [] 48 | rel_labels = [] 49 | bbox_targets = [] 50 | 51 | num_box_seen = 0 52 | 53 | for im_ind in range(num_im): 54 | pred_ind = np.where(pred_inds_np == im_ind)[0] 55 | 56 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 57 | gt_boxes_i = gt_boxes_np[gt_ind] 58 | gt_classes_i = gt_classes_np[gt_ind, 1] 59 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 60 | 61 | pred_boxes_i = np.concatenate((pred_boxes_np[pred_ind], gt_boxes_i), 0) 62 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) 63 | 64 | obj_inds_i, obj_labels_i, obj_assignments_i = _sel_inds(ious, gt_classes_i, 65 | fg_thresh, fg_rois_per_image, ROIS_PER_IMG_REL) 66 | 67 | all_rels_i = _sel_rels(ious[obj_inds_i], pred_boxes_i[obj_inds_i], obj_labels_i, 68 | gt_classes_i, gt_rels_i, 69 | fg_thresh=fg_thresh, fg_rels_per_image=fg_rels_per_image) 70 | all_rels_i[:,0:2] += num_box_seen 71 | 72 | rois.append(np.column_stack(( 73 | im_ind * np.ones(obj_inds_i.shape[0], dtype=np.float32), 74 | pred_boxes_i[obj_inds_i], 75 | ))) 76 | obj_labels.append(obj_labels_i) 77 | rel_labels.append(np.column_stack(( 78 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 79 | all_rels_i, 80 | ))) 81 | 82 | # print("Gtboxes i {} obj assignments i {}".format(gt_boxes_i, obj_assignments_i)) 83 | bbox_targets.append(gt_boxes_i[obj_assignments_i]) 84 | 85 | num_box_seen += obj_inds_i.size 86 | 87 | rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(rpn_rois.get_device(), async=True) 88 | labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(rpn_rois.get_device(), async=True) 89 | bbox_targets = torch.FloatTensor(np.concatenate(bbox_targets, 0)).cuda(rpn_rois.get_device(), 90 | async=True) 91 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), 92 | async=True) 93 | 94 | return rois, labels, bbox_targets, rel_labels 95 | 96 | 97 | def _sel_rels(ious, pred_boxes, pred_labels, gt_classes, gt_rels, fg_thresh=0.5, fg_rels_per_image=128, num_sample_per_gt=1, filter_non_overlap=True): 98 | """ 99 | Selects the relations needed 100 | :param ious: [num_pred', num_gt] 101 | :param pred_boxes: [num_pred', num_gt] 102 | :param pred_labels: [num_pred'] 103 | :param gt_classes: [num_gt] 104 | :param gt_rels: [num_gtrel, 3] 105 | :param fg_thresh: 106 | :param fg_rels_per_image: 107 | :return: new rels, [num_predrel, 3] where each is (pred_ind1, pred_ind2, predicate) 108 | """ 109 | is_match = (ious >= fg_thresh) & (pred_labels[:, None] == gt_classes[None, :]) 110 | 111 | pbi_iou = bbox_overlaps(pred_boxes, pred_boxes) 112 | 113 | # Limit ourselves to only IOUs that overlap, but are not the exact same box 114 | # since we duplicated stuff earlier. 115 | if filter_non_overlap: 116 | rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) 117 | rels_intersect = rel_possibilities 118 | else: 119 | rel_possibilities = np.ones((pred_labels.shape[0], pred_labels.shape[0]), 120 | dtype=np.int64) - np.eye(pred_labels.shape[0], dtype=np.int64) 121 | rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) 122 | 123 | # ONLY select relations between ground truth because otherwise we get useless data 124 | rel_possibilities[pred_labels == 0] = 0 125 | rel_possibilities[:,pred_labels == 0] = 0 126 | 127 | # For each GT relationship, sample exactly 1 relationship. 128 | fg_rels = [] 129 | p_size = [] 130 | for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels): 131 | fg_rels_i = [] 132 | fg_scores_i = [] 133 | 134 | for from_ind in np.where(is_match[:,from_gtind])[0]: 135 | for to_ind in np.where(is_match[:,to_gtind])[0]: 136 | if from_ind != to_ind: 137 | fg_rels_i.append((from_ind, to_ind, rel_id)) 138 | fg_scores_i.append((ious[from_ind, from_gtind]*ious[to_ind, to_gtind])) 139 | rel_possibilities[from_ind, to_ind] = 0 140 | if len(fg_rels_i) == 0: 141 | continue 142 | p = np.array(fg_scores_i) 143 | p = p/p.sum() 144 | p_size.append(p.shape[0]) 145 | num_to_add = min(p.shape[0], num_sample_per_gt) 146 | for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): 147 | fg_rels.append(fg_rels_i[rel_to_add]) 148 | 149 | bg_rels = np.column_stack(np.where(rel_possibilities)) 150 | bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) 151 | 152 | fg_rels = np.array(fg_rels, dtype=np.int64) 153 | if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: 154 | fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] 155 | # print("{} scores for {} GT. max={} min={} BG rels {}".format( 156 | # fg_rels_scores.shape[0], gt_rels.shape[0], fg_rels_scores.max(), fg_rels_scores.min(), 157 | # bg_rels.shape)) 158 | elif fg_rels.size == 0: 159 | fg_rels = np.zeros((0,3), dtype=np.int64) 160 | 161 | num_bg_rel = min(RELS_PER_IMG - fg_rels.shape[0], bg_rels.shape[0]) 162 | if bg_rels.size > 0: 163 | 164 | # Sample 4x as many intersecting relationships as non-intersecting. 165 | bg_rels_intersect = rels_intersect[bg_rels[:,0], bg_rels[:,1]] 166 | p = bg_rels_intersect.astype(np.float32) 167 | p[bg_rels_intersect == 0] = 0.2 168 | p[bg_rels_intersect == 1] = 0.8 169 | p /= p.sum() 170 | bg_rels = bg_rels[np.random.choice(bg_rels.shape[0], p=p, size=num_bg_rel, replace=False)] 171 | else: 172 | bg_rels = np.zeros((0,3), dtype=np.int64) 173 | 174 | #print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) 175 | 176 | all_rels = np.concatenate((fg_rels, bg_rels), 0) 177 | 178 | # Sort by 2nd ind and then 1st ind 179 | all_rels = all_rels[np.lexsort((all_rels[:, 1], all_rels[:, 0]))] 180 | return all_rels 181 | 182 | def _sel_inds(ious, gt_classes_i, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256, n_sample_per=1): 183 | 184 | #gt_assignment = ious.argmax(1) 185 | #max_overlaps = ious[np.arange(ious.shape[0]), gt_assignment] 186 | #fg_inds = np.where(max_overlaps >= fg_thresh)[0] 187 | 188 | fg_ious = ious.T >= fg_thresh #[num_gt, num_pred] 189 | #is_bg = ~fg_ious.any(0) 190 | 191 | # Sample K inds per GT image. 192 | fg_inds = [] 193 | for i, (ious_i, cls_i) in enumerate(zip(fg_ious, gt_classes_i)): 194 | n_sample_this_roi = min(n_sample_per, ious_i.sum()) 195 | if n_sample_this_roi > 0: 196 | p = ious_i.astype(np.float64) / ious_i.sum() 197 | for ind in npr.choice(ious_i.shape[0], p=p, size=n_sample_this_roi, replace=False): 198 | fg_inds.append((ind, i)) 199 | 200 | fg_inds = np.array(fg_inds, dtype=np.int64) 201 | if fg_inds.size == 0: 202 | fg_inds = np.zeros((0, 2), dtype=np.int64) 203 | elif fg_inds.shape[0] > fg_rois_per_image: 204 | #print("sample FG") 205 | fg_inds = fg_inds[npr.choice(fg_inds.shape[0], size=fg_rois_per_image, replace=False)] 206 | 207 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 208 | max_overlaps = ious.max(1) 209 | bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] 210 | 211 | # Compute number of background RoIs to take from this image (guarding 212 | # against there being fewer than desired) 213 | bg_rois_per_this_image = min(rois_per_image-fg_inds.shape[0], bg_inds.size) 214 | # Sample background regions without replacement 215 | if bg_inds.size > 0: 216 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 217 | 218 | 219 | # FIx for format issues 220 | obj_inds = np.concatenate((fg_inds[:,0], bg_inds), 0) 221 | obj_assignments_i = np.concatenate((fg_inds[:,1], np.zeros(bg_inds.shape[0], dtype=np.int64))) 222 | obj_labels_i = gt_classes_i[obj_assignments_i] 223 | obj_labels_i[fg_inds.shape[0]:] = 0 224 | #print("{} FG and {} BG".format(fg_inds.shape[0], bg_inds.shape[0])) 225 | return obj_inds, obj_labels_i, obj_assignments_i 226 | 227 | 228 | -------------------------------------------------------------------------------- /lib/evaluation/test_sg_eval.py: -------------------------------------------------------------------------------- 1 | # Just some tests so you can be assured that sg_eval.py works the same as the (original) stanford evaluation 2 | 3 | import numpy as np 4 | from six.moves import xrange 5 | from dataloaders.visual_genome import VG 6 | from lib.evaluation.sg_eval import evaluate_from_dict 7 | from tqdm import trange 8 | from lib.fpn.box_utils import center_size, point_form 9 | def eval_relation_recall(sg_entry, 10 | roidb_entry, 11 | result_dict, 12 | mode, 13 | iou_thresh): 14 | 15 | # gt 16 | gt_inds = np.where(roidb_entry['max_overlaps'] == 1)[0] 17 | gt_boxes = roidb_entry['boxes'][gt_inds].copy().astype(float) 18 | num_gt_boxes = gt_boxes.shape[0] 19 | gt_relations = roidb_entry['gt_relations'].copy() 20 | gt_classes = roidb_entry['gt_classes'].copy() 21 | 22 | num_gt_relations = gt_relations.shape[0] 23 | if num_gt_relations == 0: 24 | return (None, None) 25 | gt_class_scores = np.ones(num_gt_boxes) 26 | gt_predicate_scores = np.ones(num_gt_relations) 27 | gt_triplets, gt_triplet_boxes, _ = _triplet(gt_relations[:,2], 28 | gt_relations[:,:2], 29 | gt_classes, 30 | gt_boxes, 31 | gt_predicate_scores, 32 | gt_class_scores) 33 | 34 | # pred 35 | box_preds = sg_entry['boxes'] 36 | num_boxes = box_preds.shape[0] 37 | predicate_preds = sg_entry['relations'] 38 | class_preds = sg_entry['scores'] 39 | predicate_preds = predicate_preds.reshape(num_boxes, num_boxes, -1) 40 | 41 | # no bg 42 | predicate_preds = predicate_preds[:, :, 1:] 43 | predicates = np.argmax(predicate_preds, 2).ravel() + 1 44 | predicate_scores = predicate_preds.max(axis=2).ravel() 45 | relations = [] 46 | keep = [] 47 | for i in xrange(num_boxes): 48 | for j in xrange(num_boxes): 49 | if i != j: 50 | keep.append(num_boxes*i + j) 51 | relations.append([i, j]) 52 | # take out self relations 53 | predicates = predicates[keep] 54 | predicate_scores = predicate_scores[keep] 55 | 56 | relations = np.array(relations) 57 | assert(relations.shape[0] == num_boxes * (num_boxes - 1)) 58 | assert(predicates.shape[0] == relations.shape[0]) 59 | num_relations = relations.shape[0] 60 | 61 | if mode =='predcls': 62 | # if predicate classification task 63 | # use ground truth bounding boxes 64 | assert(num_boxes == num_gt_boxes) 65 | classes = gt_classes 66 | class_scores = gt_class_scores 67 | boxes = gt_boxes 68 | elif mode =='sgcls': 69 | assert(num_boxes == num_gt_boxes) 70 | # if scene graph classification task 71 | # use gt boxes, but predicted classes 72 | classes = np.argmax(class_preds, 1) 73 | class_scores = class_preds.max(axis=1) 74 | boxes = gt_boxes 75 | elif mode =='sgdet': 76 | # if scene graph detection task 77 | # use preicted boxes and predicted classes 78 | classes = np.argmax(class_preds, 1) 79 | class_scores = class_preds.max(axis=1) 80 | boxes = [] 81 | for i, c in enumerate(classes): 82 | boxes.append(box_preds[i]) # no bbox regression, c*4:(c+1)*4]) 83 | boxes = np.vstack(boxes) 84 | else: 85 | raise NotImplementedError('Incorrect Mode! %s' % mode) 86 | 87 | pred_triplets, pred_triplet_boxes, relation_scores = \ 88 | _triplet(predicates, relations, classes, boxes, 89 | predicate_scores, class_scores) 90 | 91 | 92 | sorted_inds = np.argsort(relation_scores)[::-1] 93 | # compue recall 94 | for k in result_dict[mode + '_recall']: 95 | this_k = min(k, num_relations) 96 | keep_inds = sorted_inds[:this_k] 97 | recall = _relation_recall(gt_triplets, 98 | pred_triplets[keep_inds,:], 99 | gt_triplet_boxes, 100 | pred_triplet_boxes[keep_inds,:], 101 | iou_thresh) 102 | result_dict[mode + '_recall'][k].append(recall) 103 | 104 | # for visualization 105 | return pred_triplets[sorted_inds, :], pred_triplet_boxes[sorted_inds, :] 106 | 107 | 108 | def _triplet(predicates, relations, classes, boxes, 109 | predicate_scores, class_scores): 110 | 111 | # format predictions into triplets 112 | assert(predicates.shape[0] == relations.shape[0]) 113 | num_relations = relations.shape[0] 114 | triplets = np.zeros([num_relations, 3]).astype(np.int32) 115 | triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32) 116 | triplet_scores = np.zeros([num_relations]).astype(np.float32) 117 | for i in xrange(num_relations): 118 | triplets[i, 1] = predicates[i] 119 | sub_i, obj_i = relations[i,:2] 120 | triplets[i, 0] = classes[sub_i] 121 | triplets[i, 2] = classes[obj_i] 122 | triplet_boxes[i, :4] = boxes[sub_i, :] 123 | triplet_boxes[i, 4:] = boxes[obj_i, :] 124 | # compute triplet score 125 | score = class_scores[sub_i] 126 | score *= class_scores[obj_i] 127 | score *= predicate_scores[i] 128 | triplet_scores[i] = score 129 | return triplets, triplet_boxes, triplet_scores 130 | 131 | 132 | def _relation_recall(gt_triplets, pred_triplets, 133 | gt_boxes, pred_boxes, iou_thresh): 134 | 135 | # compute the R@K metric for a set of predicted triplets 136 | 137 | num_gt = gt_triplets.shape[0] 138 | num_correct_pred_gt = 0 139 | 140 | for gt, gt_box in zip(gt_triplets, gt_boxes): 141 | keep = np.zeros(pred_triplets.shape[0]).astype(bool) 142 | for i, pred in enumerate(pred_triplets): 143 | if gt[0] == pred[0] and gt[1] == pred[1] and gt[2] == pred[2]: 144 | keep[i] = True 145 | if not np.any(keep): 146 | continue 147 | boxes = pred_boxes[keep,:] 148 | sub_iou = iou(gt_box[:4], boxes[:,:4]) 149 | obj_iou = iou(gt_box[4:], boxes[:,4:]) 150 | inds = np.intersect1d(np.where(sub_iou >= iou_thresh)[0], 151 | np.where(obj_iou >= iou_thresh)[0]) 152 | if inds.size > 0: 153 | num_correct_pred_gt += 1 154 | return float(num_correct_pred_gt) / float(num_gt) 155 | 156 | 157 | def iou(gt_box, pred_boxes): 158 | # computer Intersection-over-Union between two sets of boxes 159 | ixmin = np.maximum(gt_box[0], pred_boxes[:,0]) 160 | iymin = np.maximum(gt_box[1], pred_boxes[:,1]) 161 | ixmax = np.minimum(gt_box[2], pred_boxes[:,2]) 162 | iymax = np.minimum(gt_box[3], pred_boxes[:,3]) 163 | iw = np.maximum(ixmax - ixmin + 1., 0.) 164 | ih = np.maximum(iymax - iymin + 1., 0.) 165 | inters = iw * ih 166 | 167 | # union 168 | uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) + 169 | (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) * 170 | (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters) 171 | 172 | overlaps = inters / uni 173 | return overlaps 174 | 175 | train, val, test = VG.splits() 176 | 177 | result_dict_mine = {'sgdet_recall': {20: [], 50: [], 100: []}} 178 | result_dict_theirs = {'sgdet_recall': {20: [], 50: [], 100: []}} 179 | 180 | for img_i in trange(len(val)): 181 | gt_entry = { 182 | 'gt_classes': val.gt_classes[img_i].copy(), 183 | 'gt_relations': val.relationships[img_i].copy(), 184 | 'gt_boxes': val.gt_boxes[img_i].copy(), 185 | } 186 | 187 | # Use shuffled GT boxes 188 | gt_indices = np.arange(gt_entry['gt_boxes'].shape[0]) #np.random.choice(gt_entry['gt_boxes'].shape[0], 20) 189 | pred_boxes = gt_entry['gt_boxes'][gt_indices] 190 | 191 | # Jitter the boxes a bit 192 | pred_boxes = center_size(pred_boxes) 193 | pred_boxes[:,:2] += np.random.rand(pred_boxes.shape[0], 2)*128 194 | pred_boxes[:,2:] *= (1+np.random.randn(pred_boxes.shape[0], 2).clip(-0.1, 0.1)) 195 | pred_boxes = point_form(pred_boxes) 196 | 197 | obj_scores = np.random.rand(pred_boxes.shape[0]) 198 | 199 | rels_to_use = np.column_stack(np.where(1 - np.diag(np.ones(pred_boxes.shape[0], dtype=np.int32)))) 200 | rel_scores = np.random.rand(min(100, rels_to_use.shape[0]), 51) 201 | rel_scores = rel_scores / rel_scores.sum(1, keepdims=True) 202 | pred_rel_inds = rels_to_use[np.random.choice(rels_to_use.shape[0], rel_scores.shape[0], 203 | replace=False)] 204 | 205 | # We must sort by P(o, o, r) 206 | rel_order = np.argsort(-rel_scores[:,1:].max(1) * obj_scores[pred_rel_inds[:,0]] * obj_scores[pred_rel_inds[:,1]]) 207 | 208 | pred_entry = { 209 | 'pred_boxes': pred_boxes, 210 | 'pred_classes': gt_entry['gt_classes'][gt_indices], #1+np.random.choice(150, pred_boxes.shape[0], replace=True), 211 | 'obj_scores': obj_scores, 212 | 'pred_rel_inds': pred_rel_inds[rel_order], 213 | 'rel_scores': rel_scores[rel_order], 214 | } 215 | 216 | # def check_whether_they_are_the_same(gt_entry, pred_entry): 217 | evaluate_from_dict(gt_entry, pred_entry, 'sgdet', result_dict_mine, multiple_preds=False, 218 | viz_dict=None) 219 | 220 | ######################### 221 | predicate_scores_theirs = np.zeros((pred_boxes.shape[0], pred_boxes.shape[0], 51), dtype=np.float64) 222 | for (o1, o2), s in zip(pred_entry['pred_rel_inds'], pred_entry['rel_scores']): 223 | predicate_scores_theirs[o1, o2] = s 224 | 225 | obj_scores_theirs = np.zeros((obj_scores.shape[0], 151), dtype=np.float64) 226 | obj_scores_theirs[np.arange(obj_scores.shape[0]), pred_entry['pred_classes']] = obj_scores 227 | 228 | sg_entry_orig_format = { 229 | 'boxes': pred_entry['pred_boxes'], 230 | # 'gt_classes': gt_entry['gt_classes'], 231 | # 'gt_relations': gt_entry['gt_relations'], 232 | 'relations': predicate_scores_theirs, 233 | 'scores': obj_scores_theirs 234 | } 235 | roidb_entry = { 236 | 'max_overlaps': np.concatenate((np.ones(gt_entry['gt_boxes'].shape[0]), np.zeros(pred_entry['pred_boxes'].shape[0])), 0), 237 | 'boxes': np.concatenate((gt_entry['gt_boxes'], pred_entry['pred_boxes']), 0), 238 | 'gt_classes': gt_entry['gt_classes'], 239 | 'gt_relations': gt_entry['gt_relations'], 240 | } 241 | eval_relation_recall(sg_entry_orig_format, roidb_entry, result_dict_theirs, 'sgdet', iou_thresh=0.5) 242 | 243 | my_results = np.array(result_dict_mine['sgdet_recall'][20]) 244 | their_results = np.array(result_dict_theirs['sgdet_recall'][20]) 245 | 246 | assert np.all(my_results == their_results) -------------------------------------------------------------------------------- /lib/my_ggnn_10.py: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # From my_ggnn_09: Dynamically connecting entities to ontology too 3 | # Also a minor change: img2ont edges are now normalized over ont rather than img 4 | ################################################################## 5 | 6 | import os, sys 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.autograd import Variable 11 | import numpy as np 12 | import pickle 13 | from lib.my_util import MLP 14 | 15 | def wrap(nparr): 16 | return Variable(torch.from_numpy(nparr).float().cuda(), requires_grad=False) 17 | 18 | def arange(num): 19 | return torch.arange(num).type(torch.LongTensor).cuda() 20 | 21 | class GGNN(nn.Module): 22 | def __init__(self, emb_path, graph_path, time_step_num=3, hidden_dim=512, output_dim=512, 23 | use_embedding=True, use_knowledge=True, refine_obj_cls=False): 24 | super(GGNN, self).__init__() 25 | self.time_step_num = time_step_num 26 | 27 | if use_embedding: 28 | with open(emb_path, 'rb') as fin: 29 | self.emb_ent, self.emb_pred = pickle.load(fin) 30 | else: 31 | self.emb_ent = np.eye(151, dtype=np.float32) 32 | self.emb_pred = np.eye(51, dtype=np.float32) 33 | 34 | if use_knowledge: 35 | with open(graph_path, 'rb') as fin: 36 | edge_dict = pickle.load(fin) 37 | self.adjmtx_ent2ent = edge_dict['edges_ent2ent'] 38 | self.adjmtx_ent2pred = edge_dict['edges_ent2pred'] 39 | self.adjmtx_pred2ent = edge_dict['edges_pred2ent'] 40 | self.adjmtx_pred2pred = edge_dict['edges_pred2pred'] 41 | else: 42 | self.adjmtx_ent2ent = np.zeros((1, 151, 151), dtype=np.float32) 43 | self.adjmtx_ent2pred = np.zeros((1, 151, 51), dtype=np.float32) 44 | self.adjmtx_pred2ent = np.zeros((1, 51, 151), dtype=np.float32) 45 | self.adjmtx_pred2pred = np.zeros((1, 51, 51), dtype=np.float32) 46 | 47 | self.num_edge_types_ent2ent = self.adjmtx_ent2ent.shape[0] 48 | self.num_edge_types_ent2pred = self.adjmtx_ent2pred.shape[0] 49 | self.num_edge_types_pred2ent = self.adjmtx_pred2ent.shape[0] 50 | self.num_edge_types_pred2pred = self.adjmtx_pred2pred.shape[0] 51 | 52 | self.fc_init_ont_ent = nn.Linear(self.emb_ent.shape[1], hidden_dim) 53 | self.fc_init_ont_pred = nn.Linear(self.emb_pred.shape[1], hidden_dim) 54 | 55 | self.fc_mp_send_ont_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 56 | self.fc_mp_send_ont_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 57 | self.fc_mp_send_img_ent = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 58 | self.fc_mp_send_img_pred = MLP([hidden_dim, hidden_dim // 2, hidden_dim // 4], act_fn='ReLU', last_act=True) 59 | 60 | self.fc_mp_receive_ont_ent = MLP([(self.num_edge_types_ent2ent + self.num_edge_types_pred2ent + 1) * hidden_dim // 4, 61 | (self.num_edge_types_ent2ent + self.num_edge_types_pred2ent + 1) * hidden_dim // 4, 62 | hidden_dim], act_fn='ReLU', last_act=True) 63 | self.fc_mp_receive_ont_pred = MLP([(self.num_edge_types_ent2pred + self.num_edge_types_pred2pred + 1) * hidden_dim // 4, 64 | (self.num_edge_types_ent2pred + self.num_edge_types_pred2pred + 1) * hidden_dim // 4, 65 | hidden_dim], act_fn='ReLU', last_act=True) 66 | self.fc_mp_receive_img_ent = MLP([3 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True) 67 | self.fc_mp_receive_img_pred = MLP([3 * hidden_dim // 4, 3 * hidden_dim // 4, hidden_dim], act_fn='ReLU', last_act=True) 68 | 69 | self.fc_eq3_w_ont_ent = nn.Linear(hidden_dim, hidden_dim) 70 | self.fc_eq3_u_ont_ent = nn.Linear(hidden_dim, hidden_dim) 71 | self.fc_eq4_w_ont_ent = nn.Linear(hidden_dim, hidden_dim) 72 | self.fc_eq4_u_ont_ent = nn.Linear(hidden_dim, hidden_dim) 73 | self.fc_eq5_w_ont_ent = nn.Linear(hidden_dim, hidden_dim) 74 | self.fc_eq5_u_ont_ent = nn.Linear(hidden_dim, hidden_dim) 75 | 76 | self.fc_eq3_w_ont_pred = nn.Linear(hidden_dim, hidden_dim) 77 | self.fc_eq3_u_ont_pred = nn.Linear(hidden_dim, hidden_dim) 78 | self.fc_eq4_w_ont_pred = nn.Linear(hidden_dim, hidden_dim) 79 | self.fc_eq4_u_ont_pred = nn.Linear(hidden_dim, hidden_dim) 80 | self.fc_eq5_w_ont_pred = nn.Linear(hidden_dim, hidden_dim) 81 | self.fc_eq5_u_ont_pred = nn.Linear(hidden_dim, hidden_dim) 82 | 83 | self.fc_eq3_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 84 | self.fc_eq3_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 85 | self.fc_eq4_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 86 | self.fc_eq4_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 87 | self.fc_eq5_w_img_ent = nn.Linear(hidden_dim, hidden_dim) 88 | self.fc_eq5_u_img_ent = nn.Linear(hidden_dim, hidden_dim) 89 | 90 | self.fc_eq3_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 91 | self.fc_eq3_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 92 | self.fc_eq4_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 93 | self.fc_eq4_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 94 | self.fc_eq5_w_img_pred = nn.Linear(hidden_dim, hidden_dim) 95 | self.fc_eq5_u_img_pred = nn.Linear(hidden_dim, hidden_dim) 96 | 97 | self.fc_output_proj_img_pred = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False) 98 | self.fc_output_proj_ont_pred = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False) 99 | 100 | self.refine_obj_cls = refine_obj_cls 101 | if self.refine_obj_cls: 102 | self.fc_output_proj_img_ent = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False) 103 | self.fc_output_proj_ont_ent = MLP([hidden_dim, hidden_dim, hidden_dim], act_fn='ReLU', last_act=False) 104 | 105 | self.debug_info = {} 106 | 107 | 108 | def forward(self, rel_inds, obj_probs, obj_fmaps, vr): 109 | num_img_ent = obj_probs.size(0) 110 | num_img_pred = rel_inds.size(0) 111 | num_ont_ent = self.emb_ent.shape[0] 112 | num_ont_pred = self.emb_pred.shape[0] 113 | 114 | self.debug_info['rel_inds'] = rel_inds 115 | self.debug_info['obj_probs'] = obj_probs 116 | 117 | nodes_ont_ent = self.fc_init_ont_ent(wrap(self.emb_ent)) 118 | nodes_ont_pred = self.fc_init_ont_pred(wrap(self.emb_pred)) 119 | nodes_img_ent = obj_fmaps 120 | nodes_img_pred = vr 121 | 122 | edges_ont_ent2ent = wrap(self.adjmtx_ent2ent) 123 | edges_ont_ent2pred = wrap(self.adjmtx_ent2pred) 124 | edges_ont_pred2ent = wrap(self.adjmtx_pred2ent) 125 | edges_ont_pred2pred = wrap(self.adjmtx_pred2pred) 126 | 127 | edges_img_pred2subj = wrap(np.zeros((num_img_pred, num_img_ent))) 128 | edges_img_pred2subj[arange(num_img_pred), rel_inds[:, 0]] = 1 129 | edges_img_pred2obj = wrap(np.zeros((num_img_pred, num_img_ent))) 130 | edges_img_pred2obj[arange(num_img_pred), rel_inds[:, 1]] = 1 131 | edges_img_subj2pred = edges_img_pred2subj.t() 132 | edges_img_obj2pred = edges_img_pred2obj.t() 133 | 134 | edges_img2ont_ent = wrap(obj_probs.data.cpu().numpy()) 135 | edges_ont2img_ent = edges_img2ont_ent.t() 136 | 137 | edges_img2ont_pred = wrap(np.zeros((num_img_pred, num_ont_pred))) 138 | edges_ont2img_pred = edges_img2ont_pred.t() 139 | 140 | ent_cls_logits = None 141 | 142 | for t in range(self.time_step_num): 143 | message_send_ont_ent = self.fc_mp_send_ont_ent(nodes_ont_ent) 144 | message_send_ont_pred = self.fc_mp_send_ont_pred(nodes_ont_pred) 145 | message_send_img_ent = self.fc_mp_send_img_ent(nodes_img_ent) 146 | message_send_img_pred = self.fc_mp_send_img_pred(nodes_img_pred) 147 | 148 | message_received_ont_ent = self.fc_mp_receive_ont_ent(torch.cat( 149 | [torch.mm(edges_ont_ent2ent[i].t(), message_send_ont_ent) for i in range(self.num_edge_types_ent2ent)] + 150 | [torch.mm(edges_ont_pred2ent[i].t(), message_send_ont_pred) for i in range(self.num_edge_types_pred2ent)] + 151 | [torch.mm(edges_img2ont_ent.t(), message_send_img_ent),] 152 | , 1)) 153 | 154 | message_received_ont_pred = self.fc_mp_receive_ont_pred(torch.cat( 155 | [torch.mm(edges_ont_ent2pred[i].t(), message_send_ont_ent) for i in range(self.num_edge_types_ent2pred)] + 156 | [torch.mm(edges_ont_pred2pred[i].t(), message_send_ont_pred) for i in range(self.num_edge_types_pred2pred)] + 157 | [torch.mm(edges_img2ont_pred.t(), message_send_img_pred),] 158 | , 1)) 159 | 160 | message_received_img_ent = self.fc_mp_receive_img_ent(torch.cat([ 161 | torch.mm(edges_img_pred2subj.t(), message_send_img_pred), 162 | torch.mm(edges_img_pred2obj.t(), message_send_img_pred), 163 | torch.mm(edges_ont2img_ent.t(), message_send_ont_ent), 164 | ], 1)) 165 | 166 | message_received_img_pred = self.fc_mp_receive_img_pred(torch.cat([ 167 | torch.mm(edges_img_subj2pred.t(), message_send_img_ent), 168 | torch.mm(edges_img_obj2pred.t(), message_send_img_ent), 169 | torch.mm(edges_ont2img_pred.t(), message_send_ont_pred), 170 | ], 1)) 171 | 172 | z_ont_ent = torch.sigmoid(self.fc_eq3_w_ont_ent(message_received_ont_ent) + self.fc_eq3_u_ont_ent(nodes_ont_ent)) 173 | r_ont_ent = torch.sigmoid(self.fc_eq4_w_ont_ent(message_received_ont_ent) + self.fc_eq4_u_ont_ent(nodes_ont_ent)) 174 | h_ont_ent = torch.tanh(self.fc_eq5_w_ont_ent(message_received_ont_ent) + self.fc_eq5_u_ont_ent(r_ont_ent * nodes_ont_ent)) 175 | nodes_ont_ent_new = (1 - z_ont_ent) * nodes_ont_ent + z_ont_ent * h_ont_ent 176 | 177 | z_ont_pred = torch.sigmoid(self.fc_eq3_w_ont_pred(message_received_ont_pred) + self.fc_eq3_u_ont_pred(nodes_ont_pred)) 178 | r_ont_pred = torch.sigmoid(self.fc_eq4_w_ont_pred(message_received_ont_pred) + self.fc_eq4_u_ont_pred(nodes_ont_pred)) 179 | h_ont_pred = torch.tanh(self.fc_eq5_w_ont_pred(message_received_ont_pred) + self.fc_eq5_u_ont_pred(r_ont_pred * nodes_ont_pred)) 180 | nodes_ont_pred_new = (1 - z_ont_pred) * nodes_ont_pred + z_ont_pred * h_ont_pred 181 | 182 | z_img_ent = torch.sigmoid(self.fc_eq3_w_img_ent(message_received_img_ent) + self.fc_eq3_u_img_ent(nodes_img_ent)) 183 | r_img_ent = torch.sigmoid(self.fc_eq4_w_img_ent(message_received_img_ent) + self.fc_eq4_u_img_ent(nodes_img_ent)) 184 | h_img_ent = torch.tanh(self.fc_eq5_w_img_ent(message_received_img_ent) + self.fc_eq5_u_img_ent(r_img_ent * nodes_img_ent)) 185 | nodes_img_ent_new = (1 - z_img_ent) * nodes_img_ent + z_img_ent * h_img_ent 186 | 187 | z_img_pred = torch.sigmoid(self.fc_eq3_w_img_pred(message_received_img_pred) + self.fc_eq3_u_img_pred(nodes_img_pred)) 188 | r_img_pred = torch.sigmoid(self.fc_eq4_w_img_pred(message_received_img_pred) + self.fc_eq4_u_img_pred(nodes_img_pred)) 189 | h_img_pred = torch.tanh(self.fc_eq5_w_img_pred(message_received_img_pred) + self.fc_eq5_u_img_pred(r_img_pred * nodes_img_pred)) 190 | nodes_img_pred_new = (1 - z_img_pred) * nodes_img_pred + z_img_pred * h_img_pred 191 | 192 | relative_state_change_ont_ent = torch.sum(torch.abs(nodes_ont_ent_new - nodes_ont_ent)) / torch.sum(torch.abs(nodes_ont_ent)) 193 | relative_state_change_ont_pred = torch.sum(torch.abs(nodes_ont_pred_new - nodes_ont_pred)) / torch.sum(torch.abs(nodes_ont_pred)) 194 | relative_state_change_img_ent = torch.sum(torch.abs(nodes_img_ent_new - nodes_img_ent)) / torch.sum(torch.abs(nodes_img_ent)) 195 | relative_state_change_img_pred = torch.sum(torch.abs(nodes_img_pred_new - nodes_img_pred)) / torch.sum(torch.abs(nodes_img_pred)) 196 | 197 | self.debug_info[f'relative_state_change_{t}'] = [relative_state_change_ont_ent, relative_state_change_ont_pred, relative_state_change_img_ent, relative_state_change_img_pred] 198 | 199 | nodes_ont_ent = nodes_ont_ent_new 200 | nodes_ont_pred = nodes_ont_pred_new 201 | nodes_img_ent = nodes_img_ent_new 202 | nodes_img_pred = nodes_img_pred_new 203 | 204 | pred_cls_logits = torch.mm(self.fc_output_proj_img_pred(nodes_img_pred), self.fc_output_proj_ont_pred(nodes_ont_pred).t()) 205 | edges_img2ont_pred = F.softmax(pred_cls_logits, dim=1) 206 | edges_ont2img_pred = edges_img2ont_pred.t() 207 | 208 | if self.refine_obj_cls: 209 | ent_cls_logits = torch.mm(self.fc_output_proj_img_ent(nodes_img_ent), self.fc_output_proj_ont_ent(nodes_ont_ent).t()) 210 | edges_img2ont_ent = F.softmax(ent_cls_logits, dim=1) 211 | edges_ont2img_ent = edges_img2ont_ent.t() 212 | 213 | return pred_cls_logits, ent_cls_logits 214 | 215 | --------------------------------------------------------------------------------