├── .idea
    ├── encodings.xml
    ├── misc.xml
    ├── modules.xml
    ├── vRGV.iml
    ├── vcs.xml
    └── workspace.xml
├── README.md
├── cfgs
    ├── res101.yml
    ├── res101_ls.yml
    ├── res50.yml
    └── vgg16.yml
├── dataloader
    ├── __init__.py
    ├── build_vocab.py
    ├── data_preparation.py
    ├── detect_frame_loader.py
    ├── ground_loader.py
    └── util.py
├── dataset
    └── vidvrd
    │   ├── .gitignore
    │   ├── gt_relation_frame.json
    │   ├── static_relations.txt
    │   ├── test.txt
    │   ├── trainval.txt
    │   ├── val_list.txt
    │   ├── videos.txt
    │   ├── vocab.pkl
    │   ├── vrelation_train.json
    │   ├── vrelation_val.json
    │   └── vrelation_zero.json
├── detect_frame.py
├── detection.py
├── detection.sh
├── eval_ground.py
├── evaluations
    ├── common.py
    └── util.py
├── generate_track_link.py
├── ground.py
├── ground.sh
├── ground_relation.py
├── introduction.png
├── lib
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── coco.cpython-36.pyc
    │   │   ├── ds_utils.cpython-36.pyc
    │   │   ├── factory.cpython-36.pyc
    │   │   ├── imagenet.cpython-36.pyc
    │   │   ├── imdb.cpython-36.pyc
    │   │   ├── pascal_voc.cpython-36.pyc
    │   │   ├── vg.cpython-36.pyc
    │   │   ├── vg_eval.cpython-36.pyc
    │   │   └── voc_eval.cpython-36.pyc
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imagenet.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc_rbg.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   ├── vg.py
    │   ├── vg_eval.py
    │   ├── voc_eval.py
    │   └── voc_eval_bak.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-36.pyc
    │   ├── faster_rcnn
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── faster_rcnn.cpython-36.pyc
    │   │   │   ├── resnet.cpython-36.pyc
    │   │   │   └── vgg16.cpython-36.pyc
    │   │   ├── faster_rcnn.py
    │   │   ├── resnet.py
    │   │   └── vgg16.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── nms_cpu.cpython-36.pyc
    │   │   │   ├── nms_gpu.cpython-36.pyc
    │   │   │   └── nms_wrapper.cpython-36.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── nms
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   ├── build.py
    │   │   ├── make.sh
    │   │   ├── nms_cpu.py
    │   │   ├── nms_gpu.py
    │   │   ├── nms_kernel.cu
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cuda.c
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda_kernel.cu
    │   │   │   ├── nms_cuda_kernel.cu.o
    │   │   │   └── nms_cuda_kernel.h
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── roi_align
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   │   │   └── _roi_align.so
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_align.cpython-36.pyc
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_align.cpython-36.pyc
    │   │   │   └── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align.c
    │   │   │   ├── roi_align.h
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   ├── roi_align_kernel.cu.o
    │   │   │   └── roi_align_kernel.h
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   ├── crop_resize
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── _crop_resize.so
    │   │   │   └── roi_crop
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   │   │   └── _roi_crop.so
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_crop.cpython-36.pyc
    │   │   │   ├── crop_resize.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_crop.cpython-36.pyc
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   └── src
    │   │   │   ├── roi_crop.c
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda.c
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop_cuda_kernel.cu
    │   │   │   ├── roi_crop_cuda_kernel.cu.o
    │   │   │   └── roi_crop_cuda_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── roi_pooling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   │   │   └── _roi_pooling.so
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_pool.cpython-36.pyc
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_pool.cpython-36.pyc
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.cu.o
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── anchor_target_layer.cpython-36.pyc
    │   │   │   ├── bbox_transform.cpython-36.pyc
    │   │   │   ├── generate_anchors.cpython-36.pyc
    │   │   │   ├── proposal_layer.cpython-36.pyc
    │   │   │   ├── proposal_target_layer_cascade.cpython-36.pyc
    │   │   │   └── rpn.cpython-36.pyc
    │   │   ├── anchor_target_layer.py
    │   │   ├── bbox_transform.py
    │   │   ├── generate_anchors.py
    │   │   ├── proposal_layer.py
    │   │   ├── proposal_target_layer_cascade.py
    │   │   └── rpn.py
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-36.pyc
    │   │       ├── blob.cpython-36.pyc
    │   │       ├── config.cpython-36.pyc
    │   │       └── net_utils.cpython-36.pyc
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   ├── config.py
    │   │   ├── logger.py
    │   │   └── net_utils.py
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── coco.cpython-36.pyc
    │   │   ├── cocoeval.cpython-36.pyc
    │   │   └── mask.cpython-36.pyc
    │   ├── _mask.c
    │   ├── _mask.cpython-36m-x86_64-linux-gnu.so
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── license.txt
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── minibatch.cpython-36.pyc
    │   │   ├── roibatchLoader.cpython-36.pyc
    │   │   └── roidb.cpython-36.pyc
    │   ├── minibatch.py
    │   ├── roibatchLoader.py
    │   └── roidb.py
    └── setup.py
├── model.png
├── models
    ├── .gitignore
    ├── pretrained_models
    └── vidvrd
    │   ├── visual_bbox_trans_temp2-ground-6.ckpt
    │   └── visual_bbox_trans_temp2-reconstruct-6.ckpt
├── networks
    ├── basic.py
    └── relation2relation.py
├── requirements.txt
├── tools
    ├── sample_video_feature.py
    ├── util.py
    └── word_feature.py
├── tube.py
└── utils.py


/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding" addBOMForNewFiles="with NO BOM" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (vrelation)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/vRGV.iml" filepath="$PROJECT_DIR$/.idea/vRGV.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vRGV.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="projectConfiguration" value="pytest" />
10 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="07d701fb-c0b0-4cf7-bfb1-d32da6d797d0" name="Default Changelist" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="SHOW_DIALOG" value="false" />
  7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
  9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 10 |   </component>
 11 |   <component name="FileEditorManager">
 12 |     <leaf SIDE_TABS_SIZE_LIMIT_KEY="300" />
 13 |   </component>
 14 |   <component name="Git.Settings">
 15 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
 16 |   </component>
 17 |   <component name="IdeDocumentHistory">
 18 |     <option name="CHANGED_PATHS">
 19 |       <list>
 20 |         <option value="$PROJECT_DIR$/networks/relation2relation.py" />
 21 |       </list>
 22 |     </option>
 23 |   </component>
 24 |   <component name="ProjectFrameBounds">
 25 |     <option name="width" value="1920" />
 26 |     <option name="height" value="1040" />
 27 |   </component>
 28 |   <component name="ProjectView">
 29 |     <navigator proportions="" version="1">
 30 |       <foldersAlwaysOnTop value="true" />
 31 |     </navigator>
 32 |     <panes>
 33 |       <pane id="ProjectPane">
 34 |         <subPane>
 35 |           <expand>
 36 |             <path>
 37 |               <item name="vRGV" type="b2602c69:ProjectViewProjectNode" />
 38 |               <item name="vRGV" type="462c0819:PsiDirectoryNode" />
 39 |             </path>
 40 |           </expand>
 41 |           <select />
 42 |         </subPane>
 43 |       </pane>
 44 |       <pane id="Scope" />
 45 |     </panes>
 46 |   </component>
 47 |   <component name="PropertiesComponent">
 48 |     <property name="last_opened_file_path" value="$PROJECT_DIR$/../ground_code_vrd" />
 49 |   </component>
 50 |   <component name="RunDashboard">
 51 |     <option name="ruleStates">
 52 |       <list>
 53 |         <RuleState>
 54 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
 55 |         </RuleState>
 56 |         <RuleState>
 57 |           <option name="name" value="StatusDashboardGroupingRule" />
 58 |         </RuleState>
 59 |       </list>
 60 |     </option>
 61 |   </component>
 62 |   <component name="SvnConfiguration">
 63 |     <configuration />
 64 |   </component>
 65 |   <component name="TaskManager">
 66 |     <task active="true" id="Default" summary="Default task">
 67 |       <changelist id="07d701fb-c0b0-4cf7-bfb1-d32da6d797d0" name="Default Changelist" comment="" />
 68 |       <created>1595309961029</created>
 69 |       <option name="number" value="Default" />
 70 |       <option name="presentableId" value="Default" />
 71 |       <updated>1595309961029</updated>
 72 |     </task>
 73 |     <servers />
 74 |   </component>
 75 |   <component name="ToolWindowManager">
 76 |     <frame x="0" y="0" width="1920" height="1040" extended-state="0" />
 77 |     <layout>
 78 |       <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.18483412" />
 79 |       <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
 80 |       <window_info id="Favorites" order="2" side_tool="true" />
 81 |       <window_info anchor="bottom" id="Message" order="0" />
 82 |       <window_info anchor="bottom" id="Find" order="1" />
 83 |       <window_info anchor="bottom" id="Run" order="2" />
 84 |       <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
 85 |       <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
 86 |       <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
 87 |       <window_info anchor="bottom" id="TODO" order="6" />
 88 |       <window_info anchor="bottom" id="Version Control" order="7" />
 89 |       <window_info anchor="bottom" id="Terminal" order="8" />
 90 |       <window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
 91 |       <window_info anchor="bottom" id="Python Console" order="10" />
 92 |       <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
 93 |       <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
 94 |       <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
 95 |     </layout>
 96 |   </component>
 97 |   <component name="editorHistoryManager">
 98 |     <entry file="file://$PROJECT_DIR$/networks/relation2relation.py">
 99 |       <provider selected="true" editor-type-id="text-editor">
100 |         <state relative-caret-position="420">
101 |           <caret line="175" column="66" selection-start-line="175" selection-start-column="66" selection-end-line="175" selection-end-column="66" />
102 |         </state>
103 |       </provider>
104 |     </entry>
105 |     <entry file="file://$PROJECT_DIR$/visualize.py">
106 |       <provider selected="true" editor-type-id="text-editor">
107 |         <state relative-caret-position="330">
108 |           <caret line="26" column="11" lean-forward="true" selection-start-line="26" selection-start-column="11" selection-end-line="26" selection-end-column="11" />
109 |         </state>
110 |       </provider>
111 |     </entry>
112 |   </component>
113 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [Visual Relation Grounding in Videos](https://arxiv.org/pdf/2007.08814.pdf)
 2 | 
 3 | This is the pytorch implementation of our work at ECCV2020 (Spotlight). 
 4 | ![teaser](https://github.com/doc-doc/vRGV/blob/master/introduction.png)
 5 | The repository mainly includes 3 parts: (1) Extract RoI feature; (2) Train and inference; and (3) Generate relation-aware trajectories.
 6 | ![](https://github.com/doc-doc/vRGV/blob/master/model.png)
 7 | ## Notes
 8 | Fix issue on unstable result [2021/10/07].
 9 | 
10 | ## Environment
11 | 
12 | Anaconda 3, python 3.6.5, pytorch 0.4.1 (Higher version is OK once feature is ready) and cuda >= 9.0. For others libs, please refer to the file requirements.txt.
13 | 
14 | ## Install
15 | Please create an env for this project using anaconda3 (should install [anaconda](https://docs.anaconda.com/anaconda/install/linux/) first)
16 | ```
17 | >conda create -n envname python=3.6.5 # Create
18 | >conda activate envname # Enter
19 | >pip install -r requirements.txt # Install the provided libs
20 | >sh vRGV/lib/make.sh # Set the environment for detection, make sure you have nvcc
21 | ```
22 | ## Data Preparation
23 | Please download the data [here](https://drive.google.com/file/d/1qNJ3jBPPoi0BPkvLqooS66czvCxsib1M/view?usp=sharing). The folder ```ground_data``` should be at the same directory as ```vRGV```. Please merge the downloaded vRGV folder with this repo. 
24 | 
25 | Please download the videos [here](https://xdshang.github.io/docs/imagenet-vidvrd.html) and extract the frames into ground_data. 
26 | The directory should be like: ground_data/vidvrd/JPEGImages/ILSVRC2015_train_xxx/000000.JPEG.
27 | 
28 | ## Usage
29 | Feature Extraction. (need about 100G storage! Because I dumped all the detected bboxes along with their features. It can be greatly reduced by changing detect_frame.py to return the top-40 bboxes and save them with .npz file.)
30 | ```
31 | ./detection.sh 0 val #(or train)
32 | ```
33 | Sample video features:
34 | ```
35 | cd tools
36 | python sample_video_feature.py
37 | ```
38 | Test. You can use our provided model to verify the feature and environment:
39 | ```
40 | ./ground.sh 0 val # Output the relation-aware spatio-temporal attention
41 | python generate_track_link.py # Generate relation-aware trajectories with Viterbi algorithm.
42 | python eval_ground.py # Evaluate the performance
43 | ```
44 | You will get accuracy Acc_R: 24.58%.
45 | 
46 | Train. If you want to train the model from scratch. Please apply a two-stage training scheme: 1) train a basic model without relation attendance, and 2) load the reconstruction part of the pre-trained model to learn the whole model (with the same lr_rate). For implementation, please turn off/on ```[pretrain] in line 52 of ground.py```, and switch between ```line 6 & 7 in ground_relation.py```  for 1st & 2nd stage training respectively. Also, you need to change the model files in ```line 69 & 70 of ground_relation.py``` to the best model obtained at the first stage for 2nd-stage training. 
47 | ```
48 | ./ground.sh 0 train # Train the model with GPU id 0
49 | ```
50 | The results maybe slightly different (+/-0.5%), For comparison, please follow the results reported in our paper.
51 | ## Result Visualization
52 | |Query| bicycle-jump_beneath-person       | person-feed-elephant          | person-stand_above-bicycle       | dog-watch-turtle|
53 | |:---| --------------------------------- | ----------------------------- | ---------------------------------------- | ---------------------------------------- | 
54 | |Result| ![](https://media.giphy.com/media/htciIcJZ2q7pb06zoI/giphy.gif) | ![](https://media.giphy.com/media/dX34r2BJNjVCNCuFNy/giphy.gif)   | ![](https://media.giphy.com/media/ln7xmvrkjcX47W9Kax/giphy.gif)|![](https://media.giphy.com/media/h5uiVR9ukJLVRgT9yC/giphy.gif)|
55 | |Query| person-ride-horse       | person-ride-bicycle          |   person-drive-car     |  bicycle-move_toward-car|
56 | |Result| ![](https://media.giphy.com/media/J5jSa7lJxwFXorWYbx/giphy.gif) | ![](https://media.giphy.com/media/lSsztYWamp6gLfHSfg/giphy.gif)   | ![](https://media.giphy.com/media/S5Kp8KaApxrazkVmcd/giphy.gif)|![](https://media.giphy.com/media/ZE4vFIjfm1BHXP7w0R/giphy.gif)|
57 | 
58 | ## Citation
59 | 
60 | ```
61 | @inproceedings{xiao2020visual,
62 |   title={Visual Relation Grounding in Videos},
63 |   author={Xiao, Junbin and Shang, Xindi and Yang, Xun and Tang, Sheng and Chua, Tat-Seng},
64 |   booktitle={European Conference on Computer Vision},
65 |   pages={447--464},
66 |   year={2020},
67 |   organization={Springer}
68 | }
69 | ```
70 | 
71 | ## License
72 | 
73 | NUS © [NExT++](https://nextcenter.org/)
74 | 


--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_SIZE: 7
17 | POOLING_MODE: align
18 | CROP_RESIZE_WITH_MAX_POOL: False
19 | 


--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 | TEST:
16 |   HAS_RPN: True
17 |   SCALES: [800]
18 |   MAX_SIZE: 1200
19 |   RPN_POST_NMS_TOP_N: 40
20 |   NMS: 0.4
21 | POOLING_SIZE: 7
22 | POOLING_MODE: align
23 | CROP_RESIZE_WITH_MAX_POOL: False
24 | 


--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   # IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   WEIGHT_DECAY: 0.0001
13 |   DOUBLE_BIAS: False
14 |   SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_MODE: crop
18 | 


--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   BATCH_SIZE: 256
10 |   LEARNING_RATE: 0.01
11 | TEST:
12 |   HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 | 


--------------------------------------------------------------------------------
/dataloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/dataloader/__init__.py


--------------------------------------------------------------------------------
/dataloader/build_vocab.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | # nltk.download('punkt')
 3 | import pickle
 4 | import argparse
 5 | from .util import load_file
 6 | from collections import Counter
 7 | import string
 8 | 
 9 | 
10 | 
11 | class Vocabulary(object):
12 |     """Simple vocabulary wrapper."""
13 |     def __init__(self):
14 |         self.word2idx = {}
15 |         self.idx2word = {}
16 |         self.idx = 0
17 | 
18 |     def add_word(self, word):
19 |         if not word in self.word2idx:
20 |             self.word2idx[word] = self.idx
21 |             self.idx2word[self.idx] = word
22 |             self.idx += 1
23 | 
24 |     def __call__(self, word):
25 |         if not word in self.word2idx:
26 |             return self.word2idx['<unk>']
27 |         return self.word2idx[word]
28 | 
29 |     def __len__(self):
30 |         return len(self.word2idx)
31 | 
32 | 
33 | def build_vocab(anno_file, threshold):
34 |     """Build a simple vocabulary wrapper."""
35 | 
36 |     annos = load_file(anno_file)
37 |     counter = Counter()
38 |     table = str.maketrans('-_', '  ')
39 |     for vrelation in annos:
40 |         relation = vrelation[4].translate(table)
41 |         tokens = nltk.tokenize.word_tokenize(relation.lower())
42 |         counter.update(tokens)
43 | 
44 | 
45 |     # for word, cnt in counter.items():
46 |     #     print(word, cnt)
47 |     # If the word frequency is less than 'threshold', then the word is discarded.
48 |     words = [word for word, cnt in counter.items() if cnt >= threshold]
49 | 
50 | 
51 |     # Create a vocab wrapper and add some special tokens.
52 |     vocab = Vocabulary()
53 |     vocab.add_word('<pad>')
54 |     vocab.add_word('<start>')
55 |     vocab.add_word('<end>')
56 |     vocab.add_word('<unk>')
57 | 
58 |     # Add the words to the vocabulary.
59 |     for i, word in enumerate(words):
60 |         vocab.add_word(word)
61 |     return vocab
62 | 
63 | 
64 | def main(args):
65 |     vocab = build_vocab(args.caption_path, args.threshold)
66 |     vocab_path = args.vocab_path
67 |     with open(vocab_path, 'wb') as f:
68 |         pickle.dump(vocab, f)
69 |     print("Total vocabulary size: {}".format(len(vocab)))
70 |     print("Saved the vocabulary wrapper to '{}'".format(vocab_path))
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     parser = argparse.ArgumentParser()
75 |     parser.add_argument('--caption_path', type=str, 
76 |                         default='../dataset/vidvrd/vrelation_train.json',
77 |                         help='path for train annotation file')
78 |     parser.add_argument('--vocab_path', type=str, default='../dataset/vidvrd/vocab.pkl',
79 |                         help='path for saving vocabulary wrapper')
80 |     parser.add_argument('--threshold', type=int, default=1,
81 |                         help='minimum word count threshold')
82 |     args = parser.parse_args()
83 |     main(args)


--------------------------------------------------------------------------------
/dataloader/data_preparation.py:
--------------------------------------------------------------------------------
  1 | # ====================================================
  2 | # @Author  : Xiao Junbin
  3 | # @Email   : junbin@comp.nus.edu.sg
  4 | # @File    : data_preparation.py
  5 | # ====================================================
  6 | 
  7 | from util import *
  8 | import os.path as osp
  9 | import json
 10 | import pickle as pkl
 11 | import torch
 12 | import numpy as np
 13 | import os
 14 | import sys
 15 | sys.path.insert(0, '/storage/jbxiao/workspace/ground_code/lib')
 16 | from model.nms.nms_wrapper import nms
 17 | 
 18 | 
 19 | 
 20 | def load_predict(predict_file, topn):
 21 |     """
 22 |     nms within class ans then select the top n bbox of higher score among classes
 23 |     :param predict_file:
 24 |     :return:
 25 |     """
 26 |     with open(predict_file, 'rb') as fp:
 27 |         predict = pkl.load(fp)
 28 | 
 29 | 
 30 |     pred_boxes = predict['bbox'].squeeze()
 31 |     scores = predict['cls_prob'].squeeze()
 32 |     roi_feat = predict['roi_feat'].squeeze()
 33 | 
 34 |     pthresh = 0.00001
 35 |     bbox = []
 36 |     keep_inds = []
 37 |     first = True
 38 |     for j in range(81):
 39 |         if j == 0: continue # skip the background
 40 |         inds = torch.nonzero(scores[:, j] > pthresh).view(-1)
 41 |         if len(inds) == 0: continue
 42 | 
 43 |         cls_scores = scores[:, j][inds]
 44 |         _, order = torch.sort(cls_scores, 0, True)
 45 | 
 46 |         cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
 47 | 
 48 |         cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
 49 | 
 50 |         cls_dets = cls_dets[order]
 51 |         keep = nms(cls_dets, 0.4, force_cpu=0)
 52 |         inds = keep.view(-1).long()
 53 |         if len(inds) > 0:
 54 |             tmp_bbox = cls_dets[inds].cpu().data.numpy()
 55 |             keep = keep.cpu().data.numpy()
 56 |             if first:
 57 |                 bbox = tmp_bbox
 58 |                 keep_inds = keep
 59 |             else:
 60 |                 bbox = np.vstack((bbox, tmp_bbox))
 61 |                 keep_inds = np.vstack((keep_inds, keep))
 62 |             first = False
 63 | 
 64 | 
 65 |     rank_ind = bbox[:,-1].argsort()
 66 |     select_inds = keep_inds[rank_ind][-topn:]
 67 |     select_classme = scores[select_inds, :].squeeze()
 68 |     select_feat = roi_feat[select_inds, :].squeeze()
 69 |     select_bbox = bbox[rank_ind][-topn:, 0:4]
 70 |     return select_bbox, select_classme.cpu().data.numpy(), select_feat.cpu().data.numpy()
 71 | 
 72 | 
 73 | def select_feature(predict_dir, video_list, save_dir):
 74 |     """
 75 |     select bbox from the 1000 region proposals
 76 |     :param predict_dir:
 77 |     :param video_list:
 78 |     :return:
 79 |     """
 80 |     videos = load_file(video_list)
 81 |     for vid, vname in enumerate(videos):
 82 |         if vid <= 600: continue
 83 |         if vid > 800: break
 84 | 
 85 |         body_name = osp.splitext(vname)[0]
 86 |         predict_file = osp.join(predict_dir, body_name)
 87 |         files = os.listdir(predict_file)
 88 |         save_folder = osp.join(save_dir, body_name)
 89 | 
 90 |         if not osp.exists(save_folder):
 91 |             os.makedirs(save_folder)
 92 |         for file in files:
 93 |             path = osp.join(predict_file, file)
 94 |             bbox, classme, feat = load_predict(path, 40)
 95 |             # print(bbox.shape, classme.shape, feat.shape)
 96 |             feature  = {'bbox': bbox, 'classme': classme, 'feat': feat}
 97 |             save_file = osp.join(save_folder, file)
 98 |             with open(save_file, 'wb') as fp:
 99 |                 pkl.dump(feature, fp)
100 | 
101 |         print(vid, save_folder)
102 | 
103 | 
104 | def get_video_relation(anno_dir, video_list, mode):
105 |     """
106 |     obtain video relation samples
107 |     :param anno_dir:
108 |     :param video_list:
109 |     :return:
110 |     """
111 |     videos = load_file(video_list)
112 |     vrelations = []
113 |     for video in videos:
114 |         basename = osp.splitext(video)[0]
115 |         path = osp.join(anno_dir, video)
116 |         anno = load_file(path)
117 |         id2cls = {}
118 |         subobj = anno['subject/objects']
119 |         for item in subobj:
120 |             id2cls[item['tid']] = item['category']
121 | 
122 |         frame_count = anno['frame_count']
123 |         frame_width, frame_height = anno['width'], anno['height']
124 |         relations = anno['relation_instances']
125 |         for rel in relations:
126 |             subject = id2cls[rel['subject_tid']]
127 |             object = id2cls[rel['object_tid']]
128 |             predicate = rel['predicate']
129 |             relation = '-'.join([subject, predicate, object])
130 |             vrelations.append((basename, frame_count, frame_width,  frame_height,relation))
131 | 
132 |     save_file = '../dataset/vidvrd/vrelation_{}.json'.format(mode)
133 |     print('save to {}'.format(save_file))
134 |     with open(save_file, 'w') as fp:
135 |         json.dump(vrelations, fp)
136 | 
137 | 
138 | 
139 | def main():
140 |     root_dir = '/storage/jbxiao/workspace/'
141 |     predict_dir = root_dir + 'ground_data/new_dets/'
142 |     ground_dir = root_dir + 'vdata/'
143 |     anno_dir = osp.join(ground_dir, 'vidvrd')
144 |     train_list = osp.join(ground_dir, 'train_list.txt')
145 |     val_list = osp.join(ground_dir, 'val_list.txt')
146 |     save_dir = osp.join(root_dir, 'ground_data/video_feature')
147 | 
148 |     select_feature(predict_dir, train_list, save_dir)
149 | 
150 |     # get_video_relation(anno_dir, train_list, 'train')
151 | 
152 | 
153 | if __name__ == "__main__":
154 |     main()
155 | 


--------------------------------------------------------------------------------
/dataloader/detect_frame_loader.py:
--------------------------------------------------------------------------------
  1 | # ====================================================
  2 | # @Author  : Xiao Junbin
  3 | # @Email   : junbin@comp.nus.edu.sg
  4 | # @File    : detect_frame_loader.py
  5 | # ====================================================
  6 | import sys
  7 | sys.path.insert(0, 'lib')
  8 | from .util import *
  9 | from PIL import Image
 10 | from torch.utils.data import Dataset, DataLoader
 11 | import os.path as osp
 12 | import numpy as np
 13 | import cv2
 14 | from model.utils.config import cfg, cfg_from_file, cfg_from_list
 15 | 
 16 | 
 17 | 
 18 | class ExpDataset(Dataset):
 19 |     """load the dataset in dataloader"""
 20 |     def __init__(self, dic, anno_path, spatial_path, mode):
 21 |         self.frames = dic
 22 |         self.anno_path = anno_path
 23 |         self.spatial_path = spatial_path
 24 |         self.mode = mode
 25 | 
 26 |         
 27 |     def __len__(self):
 28 |         return len(self.frames)
 29 | 
 30 | 
 31 |     def transform_image(self, im):
 32 |         """resize and save scale"""
 33 |         im_orig = im.astype(np.float32, copy=True)
 34 |         im_orig -= cfg.PIXEL_MEANS
 35 | 
 36 |         self.im_shape = im_orig.shape
 37 |         im_size_min = np.min(self.im_shape[0:2])
 38 |         im_size_max = np.max(self.im_shape[0:2])
 39 | 
 40 |         processed_ims = []
 41 |         im_scale_factors = []
 42 | 
 43 |         for target_size in cfg.TEST.SCALES:
 44 |             im_scale = float(target_size) / float(im_size_min)
 45 | 
 46 |             if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 47 |                 im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 48 |             im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 49 |                             interpolation=cv2.INTER_LINEAR)
 50 |             im_scale_factors.append(im_scale)
 51 |             processed_ims.append(im)
 52 | 
 53 |         return np.array(processed_ims).squeeze(), np.array(im_scale_factors,dtype=np.float32)
 54 | 
 55 | 
 56 |     def load_rgb_image(self, frame_name):
 57 |         """loading image from path and frame number"""
 58 |         full_name = osp.join(self.spatial_path, frame_name+'.JPEG')
 59 | 
 60 |         if not osp.exists(full_name):
 61 |             print('File {} not find'.format(full_name))
 62 |             return None
 63 |         img = Image.open(full_name)
 64 |         im_in = np.array(img)
 65 |         if len(im_in.shape) == 2:
 66 |             im_in = im_in[:, :, np.newaxis]
 67 |             im_in = np.concatenate((im_in, im_in, im_in), axis=2)
 68 |         # rgb -> bgr
 69 |         im = im_in[:, :, ::-1]
 70 | 
 71 |         blob, im_scale = self.transform_image(im)
 72 | 
 73 |         return blob, im_scale
 74 | 
 75 | 
 76 |     def __getitem__(self, idx):
 77 |         #if idx <= 200000: return -1, -1
 78 |         # if idx > 200000: return -1, -1
 79 | 
 80 |         frame_name = self.frames[idx]
 81 |         blob, scale = self.load_rgb_image(frame_name)
 82 |         spatial_data = {}
 83 |         spatial_data['im_blob'] = [blob]
 84 |         spatial_data['im_scale'] = [scale]
 85 | 
 86 |         return spatial_data, frame_name
 87 | 
 88 | 
 89 | class DetectFrameLoader():
 90 |     def __init__(self, batch_size, num_worker, spatial_path,
 91 |                  dataset, train_list_path, val_list_path,
 92 |                  train_shuffle=True, val_shuffle=False):
 93 | 
 94 |         self.batch_size = batch_size
 95 |         self.num_workers = num_worker
 96 |         self.spatial_path = spatial_path
 97 | 
 98 | 
 99 |         self.train_shuffle = train_shuffle
100 |         self.val_shuffle = val_shuffle
101 | 
102 |         self.dataset = dataset
103 | 
104 |         self.get_frames(train_list_path, val_list_path)
105 | 
106 | 
107 |     def get_frames(self, train_list_path, val_list_path):
108 | 
109 |         train_list = get_video_frames(train_list_path)
110 |         test_list = get_video_frames(val_list_path)
111 | 
112 |         self.train_frames = train_list
113 |         self.val_frames = test_list
114 | 
115 | 
116 |     def run(self, mode):
117 |         if mode == 'val':
118 |             train_loader = '' # self.train()
119 |             val_loader = self.validate()
120 |         elif mode == 'train':
121 |             train_loader = self.train()
122 |             val_loader = '' #self.validate()
123 |         return train_loader, val_loader
124 | 
125 | 
126 |     def train(self):
127 |         #print("Now in train")
128 |         #applying trabsformation on training videos 
129 |         training_set = ExpDataset(dic=self.train_frames, anno_path=self.dataset, spatial_path=self.spatial_path,
130 |                                                mode='train')
131 | 
132 |         print('Eligible frames for training :', len(training_set), 'video frames')
133 |         train_loader = DataLoader(
134 |             dataset=training_set, 
135 |             batch_size=self.batch_size,
136 |             shuffle=self.train_shuffle,
137 |             num_workers=self.num_workers)
138 |         return train_loader
139 | 
140 | 
141 |     def validate(self):
142 |         #print("Now in Validate")
143 |         #applying transformation for validation videos 
144 |         validation_set = ExpDataset(dic=self.val_frames, anno_path=self.dataset, spatial_path=self.spatial_path,
145 |                                                  mode='val')
146 |         
147 |         print('Eligible frames for validation:', len(validation_set), 'video frames')
148 |         val_loader = DataLoader(
149 |             dataset=validation_set, 
150 |             batch_size=self.batch_size,
151 |             shuffle=self.val_shuffle,
152 |             num_workers=self.num_workers)
153 | 
154 |         return val_loader
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/dataloader/util.py:
--------------------------------------------------------------------------------
  1 | # ====================================================
  2 | # @Author  : Xiao Junbin
  3 | # @Email   : junbin@comp.nus.edu.sg
  4 | # @File    : util.py
  5 | # ====================================================
  6 | import json
  7 | import os
  8 | import os.path as osp
  9 | import numpy as np
 10 | import pickle as pkl
 11 | 
 12 | def load_file(file_name):
 13 | 
 14 |     with open(file_name, 'r') as fp:
 15 |         if osp.splitext(file_name)[1]== '.txt':
 16 |             annos = fp.readlines()
 17 |             annos = [line.rstrip() for line in annos]
 18 |         if osp.splitext(file_name)[1] == '.json':
 19 |             annos = json.load(fp)
 20 | 
 21 |     return annos
 22 | 
 23 | def pkload(file):
 24 |     with open(file, 'rb') as fp:
 25 |         data = pkl.load(fp)
 26 |     return data
 27 | 
 28 | def pkdump(data, file):
 29 |     dirname = osp.dirname(file)
 30 |     if not osp.exists(dirname):
 31 |         os.makedirs(dirname)
 32 |     with open(file, 'wb') as fp:
 33 |         pkl.dump(data, fp)
 34 | 
 35 | def get_video_frames(video_relation_file):
 36 | 
 37 |     folders = load_file(video_relation_file)
 38 |     vframes = {}
 39 |     for recode in folders:
 40 |         video, nframe = recode[0], recode[1]
 41 |         if video not in vframes:
 42 |             vframes[video] = nframe
 43 |         else:
 44 |             continue
 45 | 
 46 |     all_frames = []
 47 |     sample_num = 120
 48 | 
 49 |     for video, nframe in vframes.items():
 50 |         
 51 |         samples = np.round(np.linspace(
 52 |             0, nframe-1, sample_num))
 53 | 
 54 |         samples = set([int(s) for s in samples])
 55 |         samples = list(samples)
 56 |         fnames = [osp.join(video, str(fid).zfill(6)) for fid in samples]
 57 |         if all_frames == []:
 58 |             all_frames = fnames
 59 |         else:
 60 |             all_frames.extend(fnames)
 61 | 
 62 |     return all_frames
 63 | 
 64 | def select_bbox(roi_bbox, roi_classme, width, height):
 65 |         """
 66 |         select the bboxes with maximun confidence
 67 |         :param roi_bbox:
 68 |         :param roi_classme:
 69 |         :return:
 70 |         """
 71 |         bbox, classme = roi_bbox.squeeze(), roi_classme.squeeze()
 72 |         classme = classme[:, 1:]  # skip background
 73 |         index = np.argmax(classme, 1)
 74 |         bbox = np.asarray([bbox[i][4 * (index[i] + 1):4 * (index[i] + 1) + 4] for i in range(len(bbox))])
 75 |         relative_bbox = bbox / np.asarray([width, height, width, height])
 76 |         area = (bbox[:,2]-bbox[:,0]+1)*(bbox[:,3]-bbox[:,1]+1)
 77 |         relative_area = area/(width*height)
 78 |         relative_area = relative_area.reshape(-1, 1)
 79 |         relative_bbox = np.hstack((relative_bbox, relative_area))
 80 | 
 81 |         return relative_bbox
 82 | 
 83 | 
 84 | def get_video_feature(video_feature_path, cache_file, frame_count, 
 85 |                     width, height, nbbox, frame_steps, feat_dim):
 86 |         """
 87 |         :param video_name:
 88 |         :param frame_count:
 89 |         :param width:
 90 |         :param height:
 91 |         :return:
 92 |         """
 93 |         # video_feature_folder = osp.join(video_feature_path, video_name)
 94 |         # cache_file = osp.join(video_feature_cache, '{}.npy'.format(video_name))
 95 |         # if osp.exists(cache_file) and osp.getsize(cache_file) > 0:
 96 |         #     video_feature = pkload(cache_file)
 97 |         #     return video_feature
 98 |         sample_frames = np.round(np.linspace(0, frame_count - 1, frame_steps))
 99 |         video_feature = np.zeros((len(sample_frames), nbbox, feat_dim), dtype=np.float32)
100 |         for i, fid in enumerate(sample_frames):
101 |             frame_name = osp.join(video_feature_path, str(int(fid)).zfill(6)+'.pkl')
102 |             with open(frame_name, 'rb') as fp:
103 |                 feat = pkl.load(fp)
104 |             roi_feat = feat['roi_feat'] #40x2048
105 |             roi_bbox = feat['bbox']
106 |             roi_classme = feat['cls_prob'] #40 x 81
107 |             bbox = select_bbox(roi_bbox, roi_classme, width, height) # 40 x 5
108 |             cb_feat = np.hstack((roi_feat, bbox))
109 | 
110 |             video_feature[i] = cb_feat
111 | 
112 |         np.savez(cache_file, x=video_feature)
113 | 
114 |         return video_feature
115 | 


--------------------------------------------------------------------------------
/dataset/vidvrd/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore all except .gitignore file
2 | *
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/dataset/vidvrd/static_relations.txt:
--------------------------------------------------------------------------------
 1 | above
 2 | beneath
 3 | left
 4 | right
 5 | front
 6 | behind
 7 | taller
 8 | larger
 9 | next
10 | lie
11 | sit
12 | stand
13 | stop
14 | hold
15 | bite


--------------------------------------------------------------------------------
/dataset/vidvrd/vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/dataset/vidvrd/vocab.pkl


--------------------------------------------------------------------------------
/detection.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : detection.py
 5 | # ====================================================
 6 | from detect_frame import *
 7 | from dataloader.detect_frame_loader import *
 8 | from tools.util import set_gpu_devices
 9 | from argparse import ArgumentParser
10 | 
11 | BATCH_SIZE = 1
12 | num_workers = 1
13 | dataset = 'vidvrd/'
14 | spatial_path = '../ground_data/'+dataset+'/JPEGImages/'
15 | train_list_path= 'dataset/'+dataset+'/vrelation_train.json'
16 | val_list_path = 'dataset/'+dataset+'/vrelation_val.json'
17 | 
18 | 
19 | def main(args):
20 | 
21 |     data_loader = DetectFrameLoader(BATCH_SIZE, num_workers, spatial_path,
22 |                  dataset, train_list_path, val_list_path,
23 |                  train_shuffle=False, val_shuffle=False)
24 | 
25 |     train_loader, val_loader = data_loader.run(args.mode)
26 | 
27 |     checkpoint_path = 'models/pretrained_models/res101/coco/faster_rcnn_1_10_14657.pth'
28 |     save_dir = '../ground_data/vidvrd/frame_feature1/'
29 | 
30 |     cfg_file = 'cfgs/res101_ls.yml'
31 |     classes = ['coco']*81
32 | 
33 |     cuda = True
34 |     class_agnostic = False
35 | 
36 |     detect_frame = FeatureExtractor(train_loader, val_loader, cfg_file, classes,
37 |                  class_agnostic, cuda, checkpoint_path, save_dir)
38 | 
39 |     detect_frame.run(args.mode)
40 | 
41 | if __name__ == "__main__":
42 |     parser = ArgumentParser()
43 |     parser.add_argument('--mode', dest='mode', type=str, default='val', help='train or val')
44 |     args = parser.parse_args()
45 |     main(args)
46 | 
47 | 


--------------------------------------------------------------------------------
/detection.sh:
--------------------------------------------------------------------------------
 1 | #########################################################################
 2 | # File Name: detection.sh
 3 | # Author: Xiao Junbin
 4 | # mail: xiaojunbin@u.nus.edu
 5 | # Created Time: Wed 20 Nov 2019 11:10:44 AM +08
 6 | #########################################################################
 7 | #!/bin/bash
 8 | GPU=$1
 9 | MODE=$2
10 | CUDA_VISIBLE_DEVICES=$GPU python detection.py --mode $MODE
11 | 


--------------------------------------------------------------------------------
/eval_ground.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | from evaluations.common import tiou
  3 | from evaluations.util import load_file
  4 | import generate_track_link
  5 | 
  6 | def eval_ground_scores(gt_relations, pred_relations, tiou_threshold):
  7 |     """
  8 | 
  9 |     :param gt_relations:
 10 |     :param pred_relations:
 11 |     :param tiou_threshold:
 12 |     :return:
 13 |     """
 14 |     # pred_relations = sorted(pred_relations, key=lambda x: x['score'], reverse=True)
 15 | 
 16 |     relation_num = len(gt_relations)
 17 |     predict, predict_sub, predict_obj = 0, 0, 0
 18 | 
 19 |     for relation, pred_trajs in pred_relations.items():
 20 |         pred_sub = pred_trajs['sub']
 21 |         pred_obj = pred_trajs['obj']
 22 |         flag, flag_s, flag_o = False, False, False
 23 | 
 24 |         gt_trajs = gt_relations[relation]
 25 | 
 26 |         # print(relation)
 27 | 
 28 |         for gt_traj in gt_trajs:
 29 |             gt_sub = gt_traj['sub']
 30 |             gt_obj = gt_traj['obj']
 31 |             s_tiou = tiou(pred_sub, gt_sub)
 32 |             o_tiou = tiou(pred_obj, gt_obj)
 33 |             r_iou = min(s_tiou, o_tiou)
 34 | 
 35 |             if r_iou >= tiou_threshold:
 36 |                 flag = True
 37 |             if s_tiou >= tiou_threshold:
 38 |                 flag_s = True
 39 |             if o_tiou >= tiou_threshold:
 40 |                 flag_o = True
 41 |         if flag:
 42 |             predict += 1
 43 |         if flag_s:
 44 |             predict_sub += 1
 45 |         if flag_o:
 46 |             predict_obj += 1
 47 | 
 48 |     predict = predict / relation_num
 49 |     predict_sub = predict_sub /relation_num
 50 |     predict_obj = predict_obj /relation_num
 51 | 
 52 |     return predict, predict_sub, predict_obj, relation_num
 53 | 
 54 | 
 55 | def evaluate(groundtruth, prediction, tiou_threshold=0.5):
 56 |     """ evaluate visual relation detection and visual 
 57 |     relation tagging.
 58 |     """
 59 | 
 60 |     video_num = len(groundtruth)
 61 |     print('Computing grounding accuracy over {} videos...'.format(video_num))
 62 |     acc, acc_sub, acc_obj = 0.0, 0.0, 0.0
 63 | 
 64 |     gt_rnum = 0
 65 |     for qid, relation_gt in groundtruth.items():
 66 | 
 67 |         if qid not in prediction:
 68 |             continue
 69 |         relation_pred = prediction[qid]
 70 |         if len(relation_pred) == 0:
 71 |             continue
 72 | 
 73 |         video_acc, video_acc_sub, video_acc_obj, relation_num = eval_ground_scores(relation_gt, relation_pred, tiou_threshold)
 74 | 
 75 |         acc += video_acc
 76 |         acc_sub += video_acc_sub
 77 |         acc_obj += video_acc_obj
 78 |         gt_rnum += relation_num
 79 | 
 80 | 
 81 |     acc /= video_num
 82 |     acc_sub /= video_num
 83 |     acc_obj /= video_num
 84 | 
 85 |     print("Acc_S\t Acc_O\t Acc_R")
 86 | 
 87 |     print('{:.2f}\t {:.2f}\t {:.2f}'.format(acc_sub*100, acc_obj*100, acc*100))
 88 | 
 89 | 
 90 | def main():
 91 | 
 92 |     groundtruth_dir = 'dataset/vidvrd/'
 93 |     gt_file = osp.join(groundtruth_dir, 'gt_relation_frame.json')
 94 | 
 95 |     result_dir = 'results/'
 96 |     res_file = osp.join(result_dir, 'test_viterbi_1gap_04_batch.json')
 97 |     if not osp.exists(res_file):
 98 |         print('Generating ...')
 99 |         generate_track_link.main(res_file)
100 | 
101 |     grountruth = load_file(gt_file)
102 |     prediction = load_file(res_file)
103 | 
104 |     evaluate(grountruth, prediction)
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     main()
109 | 
110 | 


--------------------------------------------------------------------------------
/evaluations/common.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def voc_ap(rec, prec, use_07_metric=False):
  5 |     """ ap = voc_ap(rec, prec, [use_07_metric])
  6 |     Compute VOC AP given precision and recall.
  7 |     If use_07_metric is true, uses the
  8 |     VOC 07 11 point method (default:False).
  9 | 
 10 |     Borrowed from https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
 11 |     """
 12 |     if use_07_metric:
 13 |         # 11 point metric
 14 |         ap = 0.
 15 |         for t in np.arange(0., 1.1, 0.1):
 16 |             if np.sum(rec >= t) == 0:
 17 |                 p = 0
 18 |             else:
 19 |                 p = np.max(prec[rec >= t])
 20 |             ap = ap + p / 11.
 21 |     else:
 22 |         # correct AP calculation
 23 |         # first append sentinel values at the end
 24 |         mrec = np.concatenate(([0.], rec, [1.]))
 25 |         mpre = np.concatenate(([0.], prec, [0.]))
 26 | 
 27 |         # compute the precision envelope
 28 |         for i in range(mpre.size - 1, 0, -1):
 29 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 30 | 
 31 |         # to calculate area under PR curve, look for points
 32 |         # where X axis (recall) changes value
 33 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 34 | 
 35 |         # and sum (\Delta recall) * prec
 36 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 37 |     return ap
 38 | 
 39 | 
 40 | def iou(bbox_1, bbox_2):
 41 |     """
 42 |     Get IoU value of two bboxes
 43 |     :param bbox_1:
 44 |     :param bbox_2:
 45 |     :return: IoU
 46 |     """
 47 |     w_1 = bbox_1[2] - bbox_1[0] + 1
 48 |     h_1 = bbox_1[3] - bbox_1[1] + 1
 49 |     w_2 = bbox_2[2] - bbox_2[0] + 1
 50 |     h_2 = bbox_2[3] - bbox_2[1] + 1
 51 |     area_1 = w_1 * h_1
 52 |     area_2 = w_2 * h_2
 53 | 
 54 |     overlap_bbox = (max(bbox_1[0], bbox_2[0]), max(bbox_1[1], bbox_2[1]),
 55 |                     min(bbox_1[2], bbox_2[2]), min(bbox_1[3], bbox_2[3]))
 56 |     overlap_w = max(0, (overlap_bbox[2] - overlap_bbox[0] + 1))
 57 |     overlap_h = max(0, (overlap_bbox[3] - overlap_bbox[1] + 1))
 58 | 
 59 |     overlap_area = overlap_w * overlap_h
 60 |     union_area = area_1 + area_2 - overlap_area
 61 |     IoU = overlap_area * 1.0 / union_area
 62 |     return IoU
 63 | 
 64 | 
 65 | def viou(traj_1, duration_1, traj_2, duration_2):
 66 |     """ compute the voluminal Intersection over Union
 67 |     for two trajectories, each of which is represented
 68 |     by a duration [fstart, fend) and a list of bounding
 69 |     boxes (i.e. traj) within the duration.
 70 |     """
 71 | 
 72 |     if duration_1[0] >= duration_2[1] or duration_1[1] <= duration_2[0]:
 73 |         return 0.
 74 |     elif duration_1[0] <= duration_2[0]:
 75 |         head_1 = duration_2[0] - duration_1[0]
 76 |         head_2 = 0
 77 |         if duration_1[1] < duration_2[1]:
 78 |             tail_1 = duration_1[1] - duration_1[0]
 79 |             tail_2 = duration_1[1] - duration_2[0]
 80 |         else:
 81 |             tail_1 = duration_2[1] - duration_1[0]
 82 |             tail_2 = duration_2[1] - duration_2[0]
 83 |     else:
 84 |         head_1 = 0
 85 |         head_2 = duration_1[0] - duration_2[0]
 86 |         if duration_1[1] < duration_2[1]:
 87 |             tail_1 = duration_1[1] - duration_1[0]
 88 |             tail_2 = duration_1[1] - duration_2[0]
 89 |         else:
 90 |             tail_1 = duration_2[1] - duration_1[0]
 91 |             tail_2 = duration_2[1] - duration_2[0]
 92 |     v_overlap = 0
 93 | 
 94 |     for i in range(tail_1 - head_1):
 95 |         roi_1 = traj_1[head_1 + i]
 96 |         roi_2 = traj_2[head_2 + i]
 97 |         left = max(roi_1[0], roi_2[0])
 98 |         top = max(roi_1[1], roi_2[1])
 99 |         right = min(roi_1[2], roi_2[2])
100 |         bottom = min(roi_1[3], roi_2[3])
101 |         ov_w = max(0, (right - left + 1))
102 |         ov_h = max(0, (bottom - top + 1))
103 |         v_overlap += ov_w * ov_h
104 |     v1 = 0
105 |     for i in range(len(traj_1)):
106 |         v1 += (traj_1[i][2] - traj_1[i][0] + 1) * (traj_1[i][3] - traj_1[i][1] + 1)
107 |     v2 = 0
108 |     for i in range(len(traj_2)):
109 |         v2 += (traj_2[i][2] - traj_2[i][0] + 1) * (traj_2[i][3] - traj_2[i][1] + 1)
110 |     return float(v_overlap) / (v1 + v2 - v_overlap)
111 | 
112 | def tiou(pred_traj, gt_traj):
113 |     """
114 |     compute overlap, no need to be consecutive
115 |     :param pred_traj:
116 |     :param gt_traj:
117 |     :param gt_duration:
118 |     :return:
119 |     """
120 | 
121 |     thresh_s = [0.3, 0.5, 0.7]
122 | 
123 |     t_op1, t_op2, t_op3 = 0, 0, 0
124 | 
125 |     total = len(set(gt_traj.keys()) | set(pred_traj.keys()))
126 |     for i, fid in enumerate(gt_traj):
127 |         if fid not in pred_traj:
128 |             continue
129 |         sIoU = iou(gt_traj[fid], pred_traj[fid])
130 |         if sIoU >= thresh_s[0]:
131 |             t_op1 += 1
132 |             if sIoU >= thresh_s[1]:
133 |                 t_op2 += 1
134 |                 if sIoU >= thresh_s[2]:
135 |                     t_op3 += 1
136 | 
137 |     tov = (t_op1 + t_op2 + t_op3) * 1.0 / (3 * total)
138 | 
139 |     #print('pred_traj: {}, gt_traj: {}, tiou: {:.6f}'.format(len(pred_traj), len(gt_traj), tov))
140 | 
141 |     return tov
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/evaluations/util.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : util.py
 5 | # ====================================================
 6 | import os.path as osp
 7 | import json
 8 | 
 9 | 
10 | def load_file(file_name):
11 | 
12 |     with open(file_name, 'r') as fp:
13 |         if osp.splitext(file_name)[1]== '.txt':
14 |             annos = fp.readlines()
15 |             annos = [line.rstrip() for line in annos]
16 |         if osp.splitext(file_name)[1] == '.json':
17 |             annos = json.load(fp)
18 | 
19 |     return annos
20 | 
21 | 
22 | def iou(bbox_1, bbox_2):
23 |     """
24 |     Get IoU value of two bboxes
25 |     :param bbox_1:
26 |     :param bbox_2:
27 |     :return: IoU
28 |     """
29 |     w_1 = bbox_1[2] - bbox_1[0] + 1
30 |     h_1 = bbox_1[3] - bbox_1[1] + 1
31 |     w_2 = bbox_2[2] - bbox_2[0] + 1
32 |     h_2 = bbox_2[3] - bbox_2[1] + 1
33 |     area_1 = w_1 * h_1
34 |     area_2 = w_2 * h_2
35 | 
36 |     overlap_bbox = (max(bbox_1[0], bbox_2[0]), max(bbox_1[1], bbox_2[1]),
37 |                     min(bbox_1[2], bbox_2[2]), min(bbox_1[3], bbox_2[3]))
38 |     overlap_w = max(0, (overlap_bbox[2] - overlap_bbox[0] + 1))
39 |     overlap_h = max(0, (overlap_bbox[3] - overlap_bbox[1] + 1))
40 | 
41 |     overlap_area = overlap_w * overlap_h
42 |     union_area = area_1 + area_2 - overlap_area
43 |     IoU = overlap_area * 1.0 / union_area
44 |     return IoU
45 | 


--------------------------------------------------------------------------------
/generate_track_link.py:
--------------------------------------------------------------------------------
  1 | # ====================================================
  2 | # @Author  : Xiao Junbin
  3 | # @Email   : junbin@comp.nus.edu.sg
  4 | # @File    : generate_track.py
  5 | # ====================================================
  6 | from tools.util import load_file
  7 | import os.path as osp
  8 | import numpy as np
  9 | import pickle as pkl
 10 | from utils import save_results, sort_bbox, pkload, pkdump
 11 | from tube import *
 12 | import time
 13 | 
 14 | sample_fnum = 120
 15 | beta_thresh = 0.04
 16 | 
 17 | def load_video_bbox(vname, feat_dir, nframe):
 18 |     """
 19 |     load bboxes for a video
 20 |     :param vname:
 21 |     :param feat_dir:
 22 |     :param nframe:
 23 |     :return:
 24 |     """
 25 | 
 26 |     video_feature_folder = osp.join(feat_dir, vname)
 27 |     sample_frames = np.round(np.linspace(0, nframe - 1, sample_fnum))
 28 |     sample_frames = [int(num) for num in sample_frames]
 29 |     videos = []
 30 |     for i, fid in enumerate(sample_frames):
 31 |         frame_name = osp.join(video_feature_folder, str(fid).zfill(6) + '.pkl')
 32 |         with open(frame_name, 'rb') as fp:
 33 |             feat = pkl.load(fp)
 34 |         bbox, classme = feat['bbox'].squeeze(), feat['cls_prob'].squeeze()
 35 |         classme = classme[:, 1:] #skip background
 36 |         index = np.argmax(classme, 1)
 37 |         bbox = np.asarray([bbox[i][4*(index[i]+1):4*(index[i]+1) + 4] for i in range(len(bbox))])
 38 |         videos.append(bbox)
 39 |     return videos, sample_frames
 40 | 
 41 | 
 42 | def interpolate(sub_bboxes, obj_bboxes, valid_frame_idx, sample_frames, nframe):
 43 |     """
 44 |     linear interpolate the missing bboxes
 45 |     :param sub_bboxes:
 46 |     :param obj_bboxes:
 47 |     :param valid_frames:
 48 |     :param nframe:
 49 |     :return:
 50 |     """
 51 |     sub_bboxes = np.asarray(sub_bboxes)
 52 |     obj_bboxes = np.asarray(obj_bboxes)
 53 | 
 54 |     full_sub_bboxes = []
 55 |     full_obj_bboxes = []
 56 | 
 57 |     for i, id in enumerate(valid_frame_idx):
 58 | 
 59 |         full_sub_bboxes.append(sub_bboxes[i])
 60 |         full_obj_bboxes.append(obj_bboxes[i])
 61 |         if i == len(valid_frame_idx)-1: break
 62 | 
 63 |         pre_frame = sample_frames[id]
 64 |         next_frame = sample_frames[id+1]
 65 |         gap = next_frame - pre_frame
 66 |         if gap == 1: continue
 67 |         for mid in range(pre_frame+1, next_frame):
 68 |             sub_bbox = (next_frame - mid) / gap * sub_bboxes[i] + (mid - pre_frame) / gap * sub_bboxes[i + 1]
 69 |             obj_bbox = (next_frame - mid) / gap * obj_bboxes[i] + (mid - pre_frame) / gap * obj_bboxes[i + 1]
 70 |             full_sub_bboxes.append(sub_bbox)
 71 |             full_obj_bboxes.append(obj_bbox)
 72 | 
 73 |     fnum = sample_frames[valid_frame_idx[-1]]-sample_frames[valid_frame_idx[0]]+1
 74 |     assert len(full_sub_bboxes) == fnum, 'interpolate error'
 75 |     full_sub_bboxes = [bbox.tolist() for bbox in full_sub_bboxes]
 76 |     full_obj_bboxes = [bbox.tolist() for bbox in full_obj_bboxes]
 77 | 
 78 |     return full_sub_bboxes, full_obj_bboxes
 79 | 
 80 | 
 81 | def generate_track(val_list_file, results_file, feat_dir, bbox_dir, res_file):
 82 |     """
 83 |     generate tracklet from attention value
 84 |     :param val_list_file:
 85 |     :param results_dir:
 86 |     :return:
 87 |     """
 88 |     val_list = load_file(val_list_file)
 89 |     total_n = len(val_list)
 90 |     pre_vname = ''
 91 |     results, video_bboxes = None, None
 92 |     sample_frames = None
 93 |     results_all = load_file(results_file)
 94 | 
 95 |     final_res = {}
 96 |     video_res = {}
 97 | 
 98 |     for i, sample in enumerate(val_list):
 99 | 
100 |         vname, nframe, width, height, relation = sample
101 | 
102 |         # if vname != 'ILSVRC2015_train_00267002': continue
103 |         # if relation.split('-')[0] == relation.split('-')[-1]: continue
104 |         # if nframe <= 120: continue
105 |         if vname != pre_vname:
106 |             cache_file = osp.join(bbox_dir, vname + '.pkl')
107 |             data = pkload(cache_file)
108 |             if not (data is None):
109 |                 video_bboxes, sample_frames = data
110 |             else:
111 |                 video_bboxes, sample_frames = load_video_bbox(vname, feat_dir, nframe)
112 |                 pkdump((video_bboxes, sample_frames), cache_file)
113 |             results = results_all[vname]
114 |             if i > 0:
115 |                 final_res[pre_vname] = video_res
116 |             video_res = {}
117 |             print('{}/{} {}'.format(i, total_n, vname))
118 | 
119 |         alpha_s = np.array(results[relation]['sub'])
120 |         alpha_o = np.array(results[relation]['obj'])
121 | 
122 |         beta1 = results[relation]['beta1']
123 |         beta2 = results[relation]['beta2']
124 | 
125 |         # print(alpha_o.shape, beta1.shape)
126 | 
127 |         nsample, nclip = len(beta1), len(beta2)
128 |         beta1 = np.asarray(beta1)
129 |         beta2 = np.asarray(beta2)
130 |         step = nsample//nclip
131 |         temp = np.zeros(nsample)
132 |         for cp in range(nclip):
133 |             temp[cp*step:(cp+1)*step] = beta2[cp] + beta1[cp*step:step*(cp+1)]
134 | 
135 |         t1 = time.time()
136 |         sub_bboxes, obj_bboxes, sid, valid_frame_idx = link_bbox(video_bboxes, alpha_s, alpha_o,
137 |                                                                  temp, beta_thresh,sample_frames, nframe)
138 |         t2 = time.time()
139 |         if valid_frame_idx is None:
140 |             sub_bboxes = {}
141 |             obj_bboxes = {}
142 |         else:
143 |             if nframe > sample_fnum:
144 |                 sub_bboxes, obj_bboxes = interpolate(sub_bboxes,obj_bboxes,valid_frame_idx,sample_frames,nframe)
145 | 
146 |             sid = sample_frames[sid]
147 |             sub_bboxes = {fid+sid:bbox for fid, bbox in enumerate(sub_bboxes)}
148 |             obj_bboxes = {fid+sid:bbox for fid, bbox in enumerate(obj_bboxes)}
149 | 
150 |         ins = {"sub": sub_bboxes, "obj": obj_bboxes}
151 |         video_res[relation] = ins
152 |         # vis_prediction_online(ins, vname, relation)
153 |         pre_vname = vname
154 | 
155 |         if i == total_n -1:
156 |             final_res[vname] = video_res
157 | 
158 |     save_results(res_file, final_res)
159 | 
160 | 
161 | def main(res_file):
162 |     data_dir = '../ground_data/'
163 |     dataset = 'vidvrd'
164 |     val_list_file = 'dataset/{}/vrelation_val.json'.format(dataset)
165 |     result_file = '{}/results/{}_batch.json'.format(data_dir, dataset)
166 | 
167 |     feat_dir = osp.join(data_dir, dataset, 'frame_feature')
168 |     bbox_dir = osp.join(data_dir, dataset, 'bbox')
169 |     generate_track(val_list_file, result_file, feat_dir, bbox_dir, res_file)
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     res_file = 'results/test_viterbi_1gap_04_batch.json'
174 |     main(res_file)
175 | 


--------------------------------------------------------------------------------
/ground.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : ground.py
 5 | # ====================================================
 6 | from ground_relation import *
 7 | from dataloader.ground_loader import *
 8 | from dataloader.build_vocab import Vocabulary
 9 | import os.path as osp
10 | import pickle as pkl
11 | from argparse import ArgumentParser
12 | 
13 | 
14 | batch_size = 32
15 | lr = 1e-4
16 | num_workers = 8
17 | epoch_num = 10
18 | cuda = True
19 | nframes, nbbox = 120, 40
20 | 
21 | vis_step = 200
22 | save_step = 10000
23 | visual_dim = 2048+5 #visual appearance+bbox
24 | 
25 | dataset = 'vidvrd/'
26 | root_dir = '/path/to/workspace/' #this directory includes two folders: ground_data and vRGV
27 | 
28 | video_feature_path = osp.join(root_dir, 'ground_data/{}/frame_feature/'.format(dataset))
29 | video_feature_cache = osp.join(root_dir, 'ground_data/{}/video_feature/'.format(dataset))
30 | 
31 | sample_list_path = osp.join('dataset/', dataset)
32 | vocab_file = osp.join(sample_list_path, 'vocab.pkl')
33 | 
34 | checkpoint_path = osp.join('models', dataset)
35 | model_prefix = 'visual_bbox_trans_temp2'
36 | 
37 | def main(args):
38 | 
39 |     with open(vocab_file, 'rb') as fp:
40 |         vocab = pkl.load(fp)
41 | 
42 |     data_loader = RelationLoader(batch_size, num_workers, video_feature_path, video_feature_cache,
43 |                                             sample_list_path, vocab, nframes, nbbox, visual_dim, True, False)
44 | 
45 |     train_loader, val_loader = data_loader.run(mode=args.mode)
46 | 
47 |     ground_relation = GroundRelation(vocab, train_loader, val_loader, checkpoint_path, model_prefix, vis_step, save_step, visual_dim,
48 |                                      lr, batch_size, epoch_num, cuda)
49 | 
50 |     mode = args.mode
51 |     if mode == 'train':
52 |         ground_relation.run(pretrain=False)
53 |     elif mode == 'val':
54 |         #return relation-aware spatio-temporal attention for dynamicly linking object proposals into trajectories
55 |         save_name = '../ground_data/results/vidvrd_batch.json'
56 |         ground_relation.ground_attention(7, save_name)
57 |     
58 | 
59 | if __name__ == "__main__":
60 |     torch.backends.cudnn.enabled = False
61 |     torch.manual_seed(666)
62 |     torch.cuda.manual_seed(666)
63 |     torch.backends.cudnn.benchmark = True
64 | 
65 |     parser = ArgumentParser()
66 |     parser.add_argument('--mode', dest='mode', type=str, default='train', help='train or val')
67 |     args = parser.parse_args()
68 |     main(args)
69 | 


--------------------------------------------------------------------------------
/ground.sh:
--------------------------------------------------------------------------------
 1 | #########################################################################
 2 | # File Name: ground.sh
 3 | # Author: Xiao Junbin
 4 | # mail: xiaojunbin@u.nus.edu
 5 | # Created Time: Mon 18 Nov 2019 03:37:25 PM +08
 6 | #########################################################################
 7 | #!/bin/bash
 8 | GPU=$1
 9 | MODE=$2
10 | CUDA_VISIBLE_DEVICES=$GPU python ground.py --mode $MODE
11 | 


--------------------------------------------------------------------------------
/introduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/introduction.png


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/ds_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/ds_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/factory.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/factory.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/imagenet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/imagenet.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/imdb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/imdb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/vg.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/vg.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/vg_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/vg_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/voc_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/datasets/__pycache__/voc_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | from datasets.imagenet import imagenet
17 | from datasets.vg import vg
18 | 
19 | import numpy as np
20 | 
21 | # Set up voc_<year>_<split>
22 | for year in ['2007', '2012']:
23 |   for split in ['train', 'val', 'trainval', 'test']:
24 |     name = 'voc_{}_{}'.format(year, split)
25 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 | 
27 | # Set up coco_2014_<split>
28 | for year in ['2014']:
29 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
30 |     name = 'coco_{}_{}'.format(year, split)
31 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
32 | 
33 | # Set up coco_2014_cap_<split>
34 | for year in ['2014']:
35 |   for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
36 |     name = 'coco_{}_{}'.format(year, split)
37 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
38 | 
39 | # Set up coco_2015_<split>
40 | for year in ['2015']:
41 |   for split in ['test', 'test-dev']:
42 |     name = 'coco_{}_{}'.format(year, split)
43 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
44 | 
45 | # Set up vg_<split>
46 | # for version in ['1600-400-20']:
47 | #     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
48 | #         name = 'vg_{}_{}'.format(version,split)
49 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
51 |     for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
52 |         name = 'vg_{}_{}'.format(version,split)
53 |         __sets[name] = (lambda split=split, version=version: vg(version, split))
54 |         
55 | # set up image net.
56 | for split in ['train', 'val', 'val1', 'val2', 'test']:
57 |     name = 'imagenet_{}'.format(split)
58 |     devkit_path = 'data/imagenet/ILSVRC/devkit'
59 |     data_path = 'data/imagenet/ILSVRC'
60 |     __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
61 | 
62 | def get_imdb(name):
63 |   """Get an imdb (image database) by name."""
64 |   if name not in __sets:
65 |     raise KeyError('Unknown dataset: {}'.format(name))
66 |   return __sets[name]()
67 | 
68 | 
69 | def list_imdbs():
70 |   """List all registered imdbs."""
71 |   return list(__sets.keys())
72 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Fast/er R-CNN
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Bharath Hariharan
  6 | # --------------------------------------------------------
  7 | 
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import numpy as np
 11 | from .voc_eval import voc_ap
 12 | 
 13 | def vg_eval( detpath,
 14 |              gt_roidb,
 15 |              image_index,
 16 |              classindex,
 17 |              ovthresh=0.5,
 18 |              use_07_metric=False,
 19 |              eval_attributes=False):
 20 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 21 |                                 detpath, 
 22 |                                 gt_roidb,
 23 |                                 image_index,
 24 |                                 classindex,
 25 |                                 [ovthresh],
 26 |                                 [use_07_metric])
 27 | 
 28 |     Top level function that does the Visual Genome evaluation.
 29 | 
 30 |     detpath: Path to detections
 31 |     gt_roidb: List of ground truth structs.
 32 |     image_index: List of image ids.
 33 |     classindex: Category index
 34 |     [ovthresh]: Overlap threshold (default = 0.5)
 35 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 36 |         (default False)
 37 |     """
 38 |     # extract gt objects for this class
 39 |     class_recs = {}
 40 |     npos = 0
 41 |     for item,imagename in zip(gt_roidb,image_index):
 42 |         if eval_attributes:
 43 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 44 |         else:
 45 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 46 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 47 |         det = [False] * bbox.shape[0]
 48 |         npos = npos + sum(~difficult)        
 49 |         class_recs[str(imagename)] = {'bbox': bbox,
 50 |                                  'difficult': difficult,
 51 |                                  'det': det}
 52 |     if npos == 0:
 53 |         # No ground truth examples
 54 |         return 0,0,0,0,npos
 55 | 
 56 |     # read dets
 57 |     with open(detpath, 'r') as f:
 58 |         lines = f.readlines()
 59 |     if len(lines) == 0:
 60 |         # No detection examples
 61 |         return 0,0,0,0,npos
 62 | 
 63 |     splitlines = [x.strip().split(' ') for x in lines]
 64 |     image_ids = [x[0] for x in splitlines]
 65 |     confidence = np.array([float(x[1]) for x in splitlines])
 66 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 67 | 
 68 |     # sort by confidence
 69 |     sorted_ind = np.argsort(-confidence)
 70 |     sorted_scores = -np.sort(-confidence)
 71 |     BB = BB[sorted_ind, :]
 72 |     image_ids = [image_ids[x] for x in sorted_ind]
 73 | 
 74 |     # go down dets and mark TPs and FPs
 75 |     nd = len(image_ids)
 76 |     tp = np.zeros(nd)
 77 |     fp = np.zeros(nd)
 78 |     for d in range(nd):
 79 |         R = class_recs[image_ids[d]]
 80 |         bb = BB[d, :].astype(float)
 81 |         ovmax = -np.inf
 82 |         BBGT = R['bbox'].astype(float)
 83 | 
 84 |         if BBGT.size > 0:
 85 |             # compute overlaps
 86 |             # intersection
 87 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 88 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 89 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 90 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 91 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 92 |             ih = np.maximum(iymax - iymin + 1., 0.)
 93 |             inters = iw * ih
 94 | 
 95 |             # union
 96 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 97 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 98 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
 99 | 
100 |             overlaps = inters / uni
101 |             ovmax = np.max(overlaps)
102 |             jmax = np.argmax(overlaps)
103 | 
104 |         if ovmax > ovthresh:
105 |             if not R['difficult'][jmax]:
106 |                 if not R['det'][jmax]:
107 |                     tp[d] = 1.
108 |                     R['det'][jmax] = 1
109 |                 else:
110 |                     fp[d] = 1.
111 |         else:
112 |             fp[d] = 1.
113 | 
114 |     # compute precision recall
115 |     fp = np.cumsum(fp)
116 |     tp = np.cumsum(tp)
117 |     rec = tp / float(npos)
118 |     # avoid divide by zero in case the first detection matches a difficult
119 |     # ground truth
120 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
121 |     ap = voc_ap(rec, prec, use_07_metric)
122 |     
123 |     return rec, prec, ap, sorted_scores, npos
124 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | export CUDA_PATH=/usr/local/cuda/
 6 | #You may also want to ad the following
 7 | #export C_INCLUDE_PATH=/opt/cuda/include
 8 | 
 9 | export CXXFLAGS="-std=c++11"
10 | export CFLAGS="-std=c99"
11 | 
12 | python setup.py build_ext --inplace
13 | rm -rf build
14 | 
15 | CUDA_ARCH="-gencode arch=compute_70,code=sm_70 \
16 |            -gencode arch=compute_60,code=sm_60 \
17 |            -gencode arch=compute_52,code=sm_52 "
18 | 
19 | # compile NMS
20 | cd model/nms/src
21 | echo "Compiling nms kernels by nvcc..."
22 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
23 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
24 | 
25 | cd ../
26 | python build.py
27 | 
28 | # compile roi_pooling
29 | cd ../../
30 | cd model/roi_pooling/src
31 | echo "Compiling roi pooling kernels by nvcc..."
32 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
33 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
34 | cd ../
35 | python build.py
36 | 
37 | # compile roi_align
38 | cd ../../
39 | cd model/roi_align/src
40 | echo "Compiling roi align kernels by nvcc..."
41 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
42 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
43 | cd ../
44 | python build.py
45 | 
46 | # compile roi_crop
47 | cd ../../
48 | cd model/roi_crop/src
49 | echo "Compiling roi crop kernels by nvcc..."
50 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
51 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
52 | cd ../
53 | python build.py
54 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/__init__.py


--------------------------------------------------------------------------------
/lib/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/faster_rcnn/__init__.py


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/faster_rcnn/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__pycache__/faster_rcnn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/faster_rcnn/__pycache__/faster_rcnn.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/faster_rcnn/__pycache__/resnet.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__pycache__/vgg16.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/faster_rcnn/__pycache__/vgg16.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | import torchvision.models as models
  7 | from torch.autograd import Variable
  8 | import numpy as np
  9 | from model.utils.config import cfg
 10 | from model.rpn.rpn import _RPN
 11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
 12 | from model.roi_crop.modules.roi_crop import _RoICrop
 13 | from model.roi_align.modules.roi_align import RoIAlignAvg
 14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
 15 | import time
 16 | import pdb
 17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta
 18 | 
 19 | class _fasterRCNN(nn.Module):
 20 |     """ faster RCNN """
 21 |     def __init__(self, classes, class_agnostic):
 22 |         super(_fasterRCNN, self).__init__()
 23 |         self.classes = classes
 24 |         self.n_classes = len(classes)
 25 |         self.class_agnostic = class_agnostic
 26 |         # loss
 27 |         self.RCNN_loss_cls = 0
 28 |         self.RCNN_loss_bbox = 0
 29 | 
 30 |         # define rpn
 31 |         self.RCNN_rpn = _RPN(self.dout_base_model)
 32 |         self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
 33 |         self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
 34 |         self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
 35 | 
 36 |         self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
 37 |         self.RCNN_roi_crop = _RoICrop()
 38 | 
 39 | 
 40 |     def forward(self, im_data, im_info, gt_boxes, num_boxes):
 41 |         batch_size = im_data.size(0)
 42 | 
 43 |         im_info = im_info.data
 44 |         gt_boxes = gt_boxes.data
 45 |         num_boxes = num_boxes.data
 46 | 
 47 |         # feed image data to base model to obtain base feature map
 48 |         base_feat = self.RCNN_base(im_data)
 49 | 
 50 |         # feed base feature map tp RPN to obtain rois
 51 | 
 52 |         rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
 53 | 
 54 |         # if it is training phrase, then use ground-truth bboxes for refining
 55 |         if self.training:
 56 |             roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
 57 |             rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
 58 | 
 59 |             rois_label = Variable(rois_label.view(-1).long())
 60 |             rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
 61 |             rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
 62 |             rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
 63 |         else:
 64 |             rois_label = None
 65 |             rois_target = None
 66 |             rois_inside_ws = None
 67 |             rois_outside_ws = None
 68 |             rpn_loss_cls = 0
 69 |             rpn_loss_bbox = 0
 70 | 
 71 |         rois = Variable(rois)
 72 |         # do roi pooling based on predicted rois
 73 | 
 74 |         if cfg.POOLING_MODE == 'crop':
 75 |             # pdb.set_trace()
 76 |             # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
 77 |             grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
 78 |             grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
 79 |             pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
 80 |             if cfg.CROP_RESIZE_WITH_MAX_POOL:
 81 |                 pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
 82 |         elif cfg.POOLING_MODE == 'align':
 83 |             pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
 84 |         elif cfg.POOLING_MODE == 'pool':
 85 |             pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))
 86 | 
 87 |         # feed pooled features to top model
 88 |         pooled_feat = self._head_to_tail(pooled_feat)
 89 | 
 90 |         # compute bbox offset
 91 |         bbox_pred = self.RCNN_bbox_pred(pooled_feat)
 92 |         if self.training and not self.class_agnostic:
 93 |             # select the corresponding columns according to roi labels
 94 |             bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
 95 |             bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
 96 |             bbox_pred = bbox_pred_select.squeeze(1)
 97 | 
 98 |         # compute object classification probability
 99 |         cls_score = self.RCNN_cls_score(pooled_feat)
100 |         cls_prob = F.softmax(cls_score, 1)
101 | 
102 |         RCNN_loss_cls = 0
103 |         RCNN_loss_bbox = 0
104 | 
105 |         if self.training:
106 |             # classification loss
107 |             RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
108 | 
109 |             # bounding box regression L1 loss
110 |             RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
111 | 
112 | 
113 |         cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
114 |         bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
115 | 
116 |         return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, pooled_feat, base_feat
117 | 
118 |     def _init_weights(self):
119 |         def normal_init(m, mean, stddev, truncated=False):
120 |             """
121 |             weight initalizer: truncated normal and random normal.
122 |             """
123 |             # x is a parameter
124 |             if truncated:
125 |                 m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
126 |             else:
127 |                 m.weight.data.normal_(mean, stddev)
128 |                 m.bias.data.zero_()
129 | 
130 |         normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
131 |         normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
132 |         normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
133 |         normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
134 |         normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
135 | 
136 |     def create_architecture(self):
137 |         self._init_modules()
138 |         self._init_weights()
139 | 


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
17 | import pdb
18 | 
19 | class vgg16(_fasterRCNN):
20 |   def __init__(self, classes, pretrained=False, class_agnostic=False):
21 |     self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
22 |     self.dout_base_model = 512
23 |     self.pretrained = pretrained
24 |     self.class_agnostic = class_agnostic
25 | 
26 |     _fasterRCNN.__init__(self, classes, class_agnostic)
27 | 
28 |   def _init_modules(self):
29 |     vgg = models.vgg16()
30 |     if self.pretrained:
31 |         print("Loading pretrained weights from %s" %(self.model_path))
32 |         state_dict = torch.load(self.model_path)
33 |         vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
34 | 
35 |     vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
36 | 
37 |     # not using the last maxpool layer
38 |     self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
39 | 
40 |     # Fix the layers before conv3:
41 |     for layer in range(10):
42 |       for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
43 | 
44 |     # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
45 | 
46 |     self.RCNN_top = vgg.classifier
47 | 
48 |     # not using the last maxpool layer
49 |     self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
50 | 
51 |     if self.class_agnostic:
52 |       self.RCNN_bbox_pred = nn.Linear(4096, 4)
53 |     else:
54 |       self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)      
55 | 
56 |   def _head_to_tail(self, pool5):
57 |     
58 |     pool5_flat = pool5.view(pool5.size(0), -1)
59 |     fc7 = self.RCNN_top(pool5_flat)
60 | 
61 |     return fc7
62 | 
63 | 


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/__pycache__/nms_cpu.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/__pycache__/nms_cpu.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/__pycache__/nms_gpu.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/__pycache__/nms_gpu.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/__pycache__/nms_wrapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/__pycache__/nms_wrapper.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | def nms_cpu(dets, thresh):
 7 |     dets = dets.numpy()
 8 |     x1 = dets[:, 0]
 9 |     y1 = dets[:, 1]
10 |     x2 = dets[:, 2]
11 |     y2 = dets[:, 3]
12 |     scores = dets[:, 4]
13 | 
14 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 |     order = scores.argsort()[::-1]
16 | 
17 |     keep = []
18 |     while order.size > 0:
19 |         i = order.item(0)
20 |         keep.append(i)
21 |         xx1 = np.maximum(x1[i], x1[order[1:]])
22 |         yy1 = np.maximum(y1[i], y1[order[1:]])
23 |         xx2 = np.minimum(x2[i], x2[order[1:]])
24 |         yy2 = np.minimum(y2[i], y2[order[1:]])
25 | 
26 |         w = np.maximum(0.0, xx2 - xx1 + 1)
27 |         h = np.maximum(0.0, yy2 - yy1 + 1)
28 |         inter = w * h
29 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 | 
31 |         inds = np.where(ovr <= thresh)[0]
32 |         order = order[inds + 1]
33 | 
34 |     return torch.IntTensor(keep)
35 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | if torch.cuda.is_available():
10 |     from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     # ---numpy version---
18 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     # ---pytorch version---
20 | 
21 |     return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         THCudaTensor_size(state, boxes_host, 0),
15 | 		         THCudaTensor_size(state, boxes_host, 1),
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/nms/src/nms_cuda_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/_ext/roi_align/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/_roi_align.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/_ext/roi_align/_roi_align.so


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | sources = ['src/roi_align.c']
 7 | headers = ['src/roi_align.h']
 8 | extra_objects = []
 9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 | 
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 | 
17 | if torch.cuda.is_available():
18 |     print('Including CUDA code.')
19 |     sources += ['src/roi_align_cuda.c']
20 |     headers += ['src/roi_align_cuda.h']
21 |     defines += [('WITH_CUDA', None)]
22 |     with_cuda = True
23 |     
24 |     extra_objects = ['src/roi_align_kernel.cu.o']
25 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 | 
27 | ffi = create_extension(
28 |     '_ext.roi_align',
29 |     headers=headers,
30 |     sources=sources,
31 |     define_macros=defines,
32 |     relative_to=__file__,
33 |     with_cuda=with_cuda,
34 |     extra_objects=extra_objects
35 | )
36 | 
37 | if __name__ == '__main__':
38 |     ffi.build()
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__pycache__/roi_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/functions/__pycache__/roi_align.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             roi_align.roi_align_forward(self.aligned_height,
30 |                                         self.aligned_width,
31 |                                         self.spatial_scale, features,
32 |                                         rois, output)
33 | #            raise NotImplementedError
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 |         assert(self.feature_size is not None and grad_output.is_cuda)
39 | 
40 |         batch_size, num_channels, data_height, data_width = self.feature_size
41 | 
42 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
43 |                                   data_width).zero_()
44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
45 |                                           self.aligned_width,
46 |                                           self.spatial_scale, grad_output,
47 |                                           self.rois, grad_input)
48 | 
49 |         # print grad_input
50 | 
51 |         return grad_input, None
52 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/modules/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__pycache__/roi_align.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/modules/__pycache__/roi_align.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 |                       THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 | 
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 |                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_align/src/roi_align_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/_ext/roi_crop/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/_roi_crop.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/_ext/roi_crop/_roi_crop.so


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__pycache__/roi_crop.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/functions/__pycache__/roi_crop.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/modules/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__pycache__/roi_crop.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/modules/__pycache__/roi_crop.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(THCudaTensor_size(state, output, 1),
 23 | 							 THCudaTensor_size(state, output, 3),
 24 | 							 THCudaTensor_size(state, output, 2),
 25 | 							 THCudaTensor_size(state, output, 0),
 26 | 							 THCudaTensor_size(state, inputImages, 1),
 27 | 							 THCudaTensor_size(state, inputImages, 2),
 28 | 							 THCudaTensor_size(state, inputImages, 3),
 29 | 							 THCudaTensor_size(state, inputImages, 0),
 30 | 							 THCudaTensor_data(state, inputImages),
 31 | 							 THCudaTensor_stride(state, inputImages, 0),
 32 | 							 THCudaTensor_stride(state, inputImages, 1),
 33 | 							 THCudaTensor_stride(state, inputImages, 2),
 34 | 							 THCudaTensor_stride(state, inputImages, 3),
 35 | 							 THCudaTensor_data(state, grids),
 36 | 							 THCudaTensor_stride(state, grids, 0),
 37 | 							 THCudaTensor_stride(state, grids, 3),
 38 | 							 THCudaTensor_stride(state, grids, 1),
 39 | 							 THCudaTensor_stride(state, grids, 2),
 40 | 							 THCudaTensor_data(state, output),
 41 | 							 THCudaTensor_stride(state, output, 0),
 42 | 							 THCudaTensor_stride(state, output, 1),
 43 | 							 THCudaTensor_stride(state, output, 2),
 44 | 							 THCudaTensor_stride(state, output, 3),
 45 | 							 THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(THCudaTensor_size(state, gradOutput, 1),
 66 | 							    THCudaTensor_size(state, gradOutput, 3),
 67 | 							    THCudaTensor_size(state, gradOutput, 2),
 68 | 							    THCudaTensor_size(state, gradOutput, 0),
 69 | 							    THCudaTensor_size(state, inputImages, 1),
 70 | 							    THCudaTensor_size(state, inputImages, 2),
 71 | 							    THCudaTensor_size(state, inputImages, 3),
 72 | 							    THCudaTensor_size(state, inputImages, 0),
 73 | 							    THCudaTensor_data(state, inputImages),
 74 | 							    THCudaTensor_stride(state, inputImages, 0),
 75 | 							    THCudaTensor_stride(state, inputImages, 1),
 76 | 							    THCudaTensor_stride(state, inputImages, 2),
 77 | 							    THCudaTensor_stride(state, inputImages, 3),
 78 | 							    THCudaTensor_data(state, grids),
 79 | 							    THCudaTensor_stride(state, grids, 0),
 80 | 							    THCudaTensor_stride(state, grids, 3),
 81 | 							    THCudaTensor_stride(state, grids, 1),
 82 | 							    THCudaTensor_stride(state, grids, 2),
 83 | 							    THCudaTensor_data(state, gradInputImages),
 84 | 							    THCudaTensor_stride(state, gradInputImages, 0),
 85 | 							    THCudaTensor_stride(state, gradInputImages, 1),
 86 | 							    THCudaTensor_stride(state, gradInputImages, 2),
 87 | 							    THCudaTensor_stride(state, gradInputImages, 3),
 88 | 							    THCudaTensor_data(state, gradGrids),
 89 | 							    THCudaTensor_stride(state, gradGrids, 0),
 90 | 							    THCudaTensor_stride(state, gradGrids, 3),
 91 | 							    THCudaTensor_stride(state, gradGrids, 1),
 92 | 							    THCudaTensor_stride(state, gradGrids, 2),
 93 | 							    THCudaTensor_data(state, gradOutput),
 94 | 							    THCudaTensor_stride(state, gradOutput, 0),
 95 | 							    THCudaTensor_stride(state, gradOutput, 1),
 96 | 							    THCudaTensor_stride(state, gradOutput, 2),
 97 | 							    THCudaTensor_stride(state, gradOutput, 3),
 98 | 							    THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/_roi_pooling.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/_ext/roi_pooling/_roi_pooling.so


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 | 
16 | if torch.cuda.is_available():
17 |     print('Including CUDA code.')
18 |     sources += ['src/roi_pooling_cuda.c']
19 |     headers += ['src/roi_pooling_cuda.h']
20 |     defines += [('WITH_CUDA', None)]
21 |     with_cuda = True
22 |     extra_objects = ['src/roi_pooling.cu.o']
23 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_pooling',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/modules/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__pycache__/roi_pool.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/modules/__pycache__/roi_pool.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/roi_pooling/src/roi_pooling.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/anchor_target_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/anchor_target_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/bbox_transform.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/bbox_transform.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/generate_anchors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/generate_anchors.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/proposal_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/proposal_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/proposal_target_layer_cascade.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/proposal_target_layer_cascade.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/__pycache__/rpn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/rpn/__pycache__/rpn.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | import pdb
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | #array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | try:
 40 |     xrange          # Python 2
 41 | except NameError:
 42 |     xrange = range  # Python 3
 43 | 
 44 | 
 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 46 |                      scales=2**np.arange(3, 6)):
 47 |     """
 48 |     Generate anchor (reference) windows by enumerating aspect ratios X
 49 |     scales wrt a reference (0, 0, 15, 15) window.
 50 |     """
 51 | 
 52 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 53 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 54 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 55 |                          for i in xrange(ratio_anchors.shape[0])])
 56 |     return anchors
 57 | 
 58 | def _whctrs(anchor):
 59 |     """
 60 |     Return width, height, x center, and y center for an anchor (window).
 61 |     """
 62 | 
 63 |     w = anchor[2] - anchor[0] + 1
 64 |     h = anchor[3] - anchor[1] + 1
 65 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 66 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 67 |     return w, h, x_ctr, y_ctr
 68 | 
 69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 70 |     """
 71 |     Given a vector of widths (ws) and heights (hs) around a center
 72 |     (x_ctr, y_ctr), output a set of anchors (windows).
 73 |     """
 74 | 
 75 |     ws = ws[:, np.newaxis]
 76 |     hs = hs[:, np.newaxis]
 77 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 78 |                          y_ctr - 0.5 * (hs - 1),
 79 |                          x_ctr + 0.5 * (ws - 1),
 80 |                          y_ctr + 0.5 * (hs - 1)))
 81 |     return anchors
 82 | 
 83 | def _ratio_enum(anchor, ratios):
 84 |     """
 85 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 86 |     """
 87 | 
 88 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |     size = w * h
 90 |     size_ratios = size / ratios
 91 |     ws = np.round(np.sqrt(size_ratios))
 92 |     hs = np.round(ws * ratios)
 93 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 94 |     return anchors
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |     """
 98 |     Enumerate a set of anchors for each scale wrt an anchor.
 99 |     """
100 | 
101 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |     ws = w * scales
103 |     hs = h * scales
104 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |     return anchors
106 | 
107 | if __name__ == '__main__':
108 |     import time
109 |     t = time.time()
110 |     a = generate_anchors()
111 |     print(time.time() - t)
112 |     print(a)
113 |     from IPython import embed; embed()
114 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from model.utils.config import cfg
  8 | from .proposal_layer import _ProposalLayer
  9 | from .anchor_target_layer import _AnchorTargetLayer
 10 | from model.utils.net_utils import _smooth_l1_loss
 11 | 
 12 | import numpy as np
 13 | import math
 14 | import pdb
 15 | import time
 16 | 
 17 | class _RPN(nn.Module):
 18 |     """ region proposal network """
 19 |     def __init__(self, din):
 20 |         super(_RPN, self).__init__()
 21 |         
 22 |         self.din = din  # get depth of input feature map, e.g., 512
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 25 |         self.feat_stride = cfg.FEAT_STRIDE[0]
 26 | 
 27 |         # define the convrelu layers processing input feature map
 28 |         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 29 | 
 30 |         # define bg/fg classifcation score layer
 31 |         self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 32 |         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 33 | 
 34 |         # define anchor box offset prediction layer
 35 |         self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 36 |         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 37 | 
 38 |         # define proposal layer
 39 |         self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 40 | 
 41 |         # define anchor target layer
 42 |         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 43 | 
 44 |         self.rpn_loss_cls = 0
 45 |         self.rpn_loss_box = 0
 46 | 
 47 |     @staticmethod
 48 |     def reshape(x, d):
 49 |         input_shape = x.size()
 50 |         x = x.view(
 51 |             input_shape[0],
 52 |             int(d),
 53 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 54 |             input_shape[3]
 55 |         )
 56 |         return x
 57 | 
 58 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 59 | 
 60 |         batch_size = base_feat.size(0)
 61 | 
 62 |         # return feature map after convrelu layer
 63 |         rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 64 |         # get rpn classification score
 65 |         rpn_cls_score = self.RPN_cls_score(rpn_conv1)
 66 | 
 67 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 68 |         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
 69 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 70 | 
 71 |         # get rpn offsets to the anchor boxes
 72 |         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
 73 | 
 74 |         # proposal layer
 75 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 76 | 
 77 |         rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
 78 |                                  im_info, cfg_key))
 79 | 
 80 |         self.rpn_loss_cls = 0
 81 |         self.rpn_loss_box = 0
 82 | 
 83 |         # generating training labels and build the rpn loss
 84 |         if self.training:
 85 |             assert gt_boxes is not None
 86 | 
 87 |             rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
 88 | 
 89 |             # compute classification loss
 90 |             rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
 91 |             rpn_label = rpn_data[0].view(batch_size, -1)
 92 | 
 93 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
 94 |             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
 95 |             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
 96 |             rpn_label = Variable(rpn_label.long())
 97 |             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
 98 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
 99 | 
100 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
101 | 
102 |             # compute bbox regression loss
103 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
104 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
105 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
106 | 
107 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
108 |                                                             rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
109 | 
110 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
111 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/model/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/utils/__pycache__/blob.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/utils/__pycache__/blob.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/utils/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/utils/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/utils/__pycache__/net_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/model/utils/__pycache__/net_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 | 
14 | try:
15 |     xrange          # Python 2
16 | except NameError:
17 |     xrange = range  # Python 3
18 | 
19 | 
20 | def im_list_to_blob(ims):
21 |     """Convert a list of images into a network input.
22 | 
23 |     Assumes images are already prepared (means subtracted, BGR order, ...).
24 |     """
25 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 |     num_images = len(ims)
27 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 |                     dtype=np.float32)
29 |     for i in xrange(num_images):
30 |         im = ims[i]
31 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 | 
33 |     return blob
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     im_size_min = np.min(im_shape[0:2])
43 |     im_size_max = np.max(im_shape[0:2])
44 |     im_scale = float(target_size) / float(im_size_min)
45 |     # Prevent the biggest axis from being more than MAX_SIZE
46 |     # if np.round(im_scale * im_size_max) > max_size:
47 |     #     im_scale = float(max_size) / float(im_size_max)
48 |     # im = imresize(im, im_scale)
49 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 |                     interpolation=cv2.INTER_LINEAR)
51 | 
52 |     return im, im_scale
53 | 


--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc 
 5 | try:
 6 |     from StringIO import StringIO  # Python 2.7
 7 | except ImportError:
 8 |     from io import BytesIO         # Python 3.x
 9 | 
10 | 
11 | class Logger(object):
12 |     
13 |     def __init__(self, log_dir):
14 |         """Create a summary writer logging to log_dir."""
15 |         self.writer = tf.summary.FileWriter(log_dir)
16 | 
17 |     def scalar_summary(self, tag, value, step):
18 |         """Log a scalar variable."""
19 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 |         self.writer.add_summary(summary, step)
21 | 
22 |     def image_summary(self, tag, images, step):
23 |         """Log a list of images."""
24 | 
25 |         img_summaries = []
26 |         for i, img in enumerate(images):
27 |             # Write the image to a string
28 |             try:
29 |                 s = StringIO()
30 |             except:
31 |                 s = BytesIO()
32 |             scipy.misc.toimage(img).save(s, format="png")
33 | 
34 |             # Create an Image object
35 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 |                                        height=img.shape[0],
37 |                                        width=img.shape[1])
38 |             # Create a Summary value
39 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
40 | 
41 |         # Create and write Summary
42 |         summary = tf.Summary(value=img_summaries)
43 |         self.writer.add_summary(summary, step)
44 |         
45 |     def histo_summary(self, tag, values, step, bins=1000):
46 |         """Log a histogram of the tensor of values."""
47 | 
48 |         # Create a histogram using numpy
49 |         counts, bin_edges = np.histogram(values, bins=bins)
50 | 
51 |         # Fill the fields of the histogram proto
52 |         hist = tf.HistogramProto()
53 |         hist.min = float(np.min(values))
54 |         hist.max = float(np.max(values))
55 |         hist.num = int(np.prod(values.shape))
56 |         hist.sum = float(np.sum(values))
57 |         hist.sum_squares = float(np.sum(values**2))
58 | 
59 |         # Drop the start of the first bin
60 |         bin_edges = bin_edges[1:]
61 | 
62 |         # Add bin edges and counts
63 |         for edge in bin_edges:
64 |             hist.bucket_limit.append(edge)
65 |         for c in counts:
66 |             hist.bucket.append(c)
67 | 
68 |         # Create and write Summary
69 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
70 |         self.writer.add_summary(summary, step)
71 |         self.writer.flush()
72 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/pycocotools/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/pycocotools/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/pycocotools/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/pycocotools/__pycache__/cocoeval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/pycocotools/__pycache__/cocoeval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/pycocotools/__pycache__/mask.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/pycocotools/__pycache__/mask.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/roibatchLoader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/roi_data_layer/__pycache__/roibatchLoader.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 |   # Sample random scales to use for each image in this batch
23 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 |                   size=num_images)
25 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
27 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
28 | 
29 |   # Get the input image blob, formatted for caffe
30 |   im_blob, im_scales, im_names = _get_image_blob(roidb, random_scale_inds)
31 | 
32 |   blobs = {'data': im_blob}
33 | 
34 |   assert len(im_scales) == 1, "Single batch only"
35 |   assert len(roidb) == 1, "Single batch only"
36 |   
37 |   # gt boxes: (x1, y1, x2, y2, cls)
38 |   if cfg.TRAIN.USE_ALL_GT:
39 |     # Include all ground truth boxes
40 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
41 |   else:
42 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
43 |     gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 |   blobs['gt_boxes'] = gt_boxes
48 |   blobs['im_info'] = np.array(
49 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
50 |     dtype=np.float32)
51 | 
52 |   blobs['img_id'] = roidb[0]['img_id']
53 |   blobs['img_name'] = im_names
54 | 
55 |   return blobs
56 | 
57 | def _get_image_blob(roidb, scale_inds):
58 |   """Builds an input blob from the images in the roidb at the specified
59 |   scales.
60 |   """
61 |   num_images = len(roidb)
62 | 
63 |   processed_ims = []
64 |   im_scales = []
65 |   im_names = []
66 |   for i in range(num_images):
67 |     #im = cv2.imread(roidb[i]['image'])
68 |     im_name = roidb[i]['image']
69 |     im = imread(im_name)
70 | 
71 |     if len(im.shape) == 2:
72 |       im = im[:,:,np.newaxis]
73 |       im = np.concatenate((im,im,im), axis=2)
74 |     # flip the channel, since the original one using cv2
75 |     # rgb -> bgr
76 |     im = im[:,:,::-1]
77 | 
78 |     if roidb[i]['flipped']:
79 |       im = im[:, ::-1, :]
80 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
81 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
82 |                     cfg.TRAIN.MAX_SIZE)
83 |     im_scales.append(im_scale)
84 |     processed_ims.append(im)
85 |     im_names.append(im_name)
86 | 
87 |   # Create a blob to hold the input images
88 |   blob = im_list_to_blob(processed_ims)
89 | 
90 |   return blob, im_scales, im_names
91 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import datasets
  7 | import numpy as np
  8 | from model.utils.config import cfg
  9 | from datasets.factory import get_imdb
 10 | import PIL
 11 | import pdb
 12 | 
 13 | def prepare_roidb(imdb):
 14 |   """Enrich the imdb's roidb by adding some derived quantities that
 15 |   are useful for training. This function precomputes the maximum
 16 |   overlap, taken over ground-truth boxes, between each ROI and
 17 |   each ground-truth box. The class with maximum overlap is also
 18 |   recorded.
 19 |   """
 20 | 
 21 |   roidb = imdb.roidb
 22 |   if not (imdb.name.startswith('coco')):
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |          for i in range(imdb.num_images)]
 25 |          
 26 |   for i in range(len(imdb.image_index)):
 27 |     roidb[i]['img_id'] = imdb.image_id_at(i)
 28 |     roidb[i]['image'] = imdb.image_path_at(i)
 29 |     if not (imdb.name.startswith('coco')):
 30 |       roidb[i]['width'] = sizes[i][0]
 31 |       roidb[i]['height'] = sizes[i][1]
 32 |     # need gt_overlaps as a dense array for argmax
 33 |     gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 34 |     # max overlap with gt over classes (columns)
 35 |     max_overlaps = gt_overlaps.max(axis=1)
 36 |     # gt class that had the max overlap
 37 |     max_classes = gt_overlaps.argmax(axis=1)
 38 |     roidb[i]['max_classes'] = max_classes
 39 |     roidb[i]['max_overlaps'] = max_overlaps
 40 |     # sanity checks
 41 |     # max overlap of 0 => class should be zero (background)
 42 |     zero_inds = np.where(max_overlaps == 0)[0]
 43 |     assert all(max_classes[zero_inds] == 0)
 44 |     # max overlap > 0 => class should not be zero (must be a fg class)
 45 |     nonzero_inds = np.where(max_overlaps > 0)[0]
 46 |     assert all(max_classes[nonzero_inds] != 0)
 47 | 
 48 | 
 49 | def rank_roidb_ratio(roidb):
 50 |     # rank roidb based on the ratio between width and height.
 51 |     ratio_large = 2 # largest ratio to preserve.
 52 |     ratio_small = 0.5 # smallest ratio to preserve.    
 53 |     
 54 |     ratio_list = []
 55 |     for i in range(len(roidb)):
 56 |       width = roidb[i]['width']
 57 |       height = roidb[i]['height']
 58 |       ratio = width / float(height)
 59 | 
 60 |       if ratio > ratio_large:
 61 |         roidb[i]['need_crop'] = 1
 62 |         ratio = ratio_large
 63 |       elif ratio < ratio_small:
 64 |         roidb[i]['need_crop'] = 1
 65 |         ratio = ratio_small        
 66 |       else:
 67 |         roidb[i]['need_crop'] = 0
 68 | 
 69 |       ratio_list.append(ratio)
 70 | 
 71 |     ratio_list = np.array(ratio_list)
 72 |     ratio_index = np.argsort(ratio_list)
 73 |     return ratio_list[ratio_index], ratio_index
 74 | 
 75 | def filter_roidb(roidb):
 76 |     # filter the image without bounding box.
 77 |     print('before filtering, there are %d images...' % (len(roidb)))
 78 |     i = 0
 79 |     while i < len(roidb):
 80 |       if len(roidb[i]['boxes']) == 0:
 81 |         del roidb[i]
 82 |         i -= 1
 83 |       i += 1
 84 | 
 85 |     print('after filtering, there are %d images...' % (len(roidb)))
 86 |     return roidb
 87 | 
 88 | def combined_roidb(imdb_names, training=True):
 89 |   """
 90 |   Combine multiple roidbs
 91 |   """
 92 | 
 93 |   def get_training_roidb(imdb):
 94 |     """Returns a roidb (Region of Interest database) for use in training."""
 95 |     if cfg.TRAIN.USE_FLIPPED:
 96 |       print('Appending horizontally-flipped training examples...')
 97 |       imdb.append_flipped_images()
 98 |       print('done')
 99 | 
100 |     print('Preparing data...')
101 | 
102 |     prepare_roidb(imdb)
103 |     #ratio_index = rank_roidb_ratio(imdb)
104 |     print('done')
105 | 
106 |     return imdb.roidb
107 |   
108 |   def get_roidb(imdb_name):
109 |     imdb = get_imdb(imdb_name)
110 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
111 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
112 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
113 |     roidb = get_training_roidb(imdb)
114 |     return roidb
115 | 
116 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
117 |   roidb = roidbs[0]
118 | 
119 |   if len(roidbs) > 1:
120 |     for r in roidbs[1:]:
121 |       roidb.extend(r)
122 |     tmp = get_imdb(imdb_names.split('+')[1])
123 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
124 |   else:
125 |     imdb = get_imdb(imdb_names)
126 | 
127 |   if training:
128 |     roidb = filter_roidb(roidb)
129 | 
130 |   ratio_list, ratio_index = rank_roidb_ratio(roidb)
131 | 
132 |   return imdb, roidb, ratio_list, ratio_index
133 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | 
 17 | def find_in_path(name, path):
 18 |     "Find a file in a search path"
 19 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | # def locate_cuda():
 28 | #     """Locate the CUDA environment on the system
 29 | #
 30 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 | #     and values giving the absolute path to each directory.
 32 | #
 33 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 | #     is based on finding 'nvcc' in the PATH.
 35 | #     """
 36 | # 
 37 | #     # first check if the CUDAHOME env variable is in use
 38 | #     if 'CUDAHOME' in os.environ:
 39 | #         home = os.environ['CUDAHOME']
 40 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 41 | #     else:
 42 | #         # otherwise, search the PATH for NVCC
 43 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 | #         if nvcc is None:
 46 | #             raise EnvironmentError('The nvcc binary could not be '
 47 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 | #         home = os.path.dirname(os.path.dirname(nvcc))
 49 | #
 50 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 51 | #                   'include': pjoin(home, 'include'),
 52 | #                   'lib64': pjoin(home, 'lib64')}
 53 | #     for k, v in cudaconfig.iteritems():
 54 | #         if not os.path.exists(v):
 55 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | #
 57 | #     return cudaconfig
 58 | 
 59 | 
 60 | # CUDA = locate_cuda()
 61 | 
 62 | # Obtain the numpy include directory.  This logic works across numpy versions.
 63 | try:
 64 |     numpy_include = np.get_include()
 65 | except AttributeError:
 66 |     numpy_include = np.get_numpy_include()
 67 | 
 68 | 
 69 | def customize_compiler_for_nvcc(self):
 70 |     """inject deep into distutils to customize how the dispatch
 71 |     to gcc/nvcc works.
 72 | 
 73 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 74 |     injected in, and still have the right customizations (i.e.
 75 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 76 |     the OO route, I have this. Note, it's kindof like a wierd functional
 77 |     subclassing going on."""
 78 | 
 79 |     # tell the compiler it can processes .cu
 80 |     self.src_extensions.append('.cu')
 81 | 
 82 |     # save references to the default compiler_so and _comple methods
 83 |     default_compiler_so = self.compiler_so
 84 |     super = self._compile
 85 | 
 86 |     # now redefine the _compile method. This gets executed for each
 87 |     # object but distutils doesn't have the ability to change compilers
 88 |     # based on source extension: we add it.
 89 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 90 |         print(extra_postargs)
 91 |         if os.path.splitext(src)[1] == '.cu':
 92 |             # use the cuda for .cu files
 93 |             self.set_executable('compiler_so', CUDA['nvcc'])
 94 |             # use only a subset of the extra_postargs, which are 1-1 translated
 95 |             # from the extra_compile_args in the Extension class
 96 |             postargs = extra_postargs['nvcc']
 97 |         else:
 98 |             postargs = extra_postargs['gcc']
 99 | 
100 |         super(obj, src, ext, cc_args, postargs, pp_opts)
101 |         # reset the default compiler_so, which we might have changed for cuda
102 |         self.compiler_so = default_compiler_so
103 | 
104 |     # inject our redefined _compile method into the class
105 |     self._compile = _compile
106 | 
107 | 
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 |     def build_extensions(self):
111 |         customize_compiler_for_nvcc(self.compiler)
112 |         build_ext.build_extensions(self)
113 | 
114 | 
115 | ext_modules = [
116 |     Extension(
117 |         "model.utils.cython_bbox",
118 |         ["model/utils/bbox.pyx"],
119 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
120 |         include_dirs=[numpy_include]
121 |     ),
122 |     Extension(
123 |         'pycocotools._mask',
124 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
125 |         include_dirs=[numpy_include, 'pycocotools'],
126 |         extra_compile_args={
127 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
128 |     ),
129 | ]
130 | 
131 | setup(
132 |     name='faster_rcnn',
133 |     ext_modules=ext_modules,
134 |     # inject our custom trigger
135 |     cmdclass={'build_ext': custom_build_ext},
136 | )
137 | 


--------------------------------------------------------------------------------
/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/model.png


--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore all except .gitignore file
2 | *
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/models/pretrained_models:
--------------------------------------------------------------------------------
1 | /storage/jbxiao/workspace/vrelation/models/pretrained_models


--------------------------------------------------------------------------------
/models/vidvrd/visual_bbox_trans_temp2-ground-6.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/models/vidvrd/visual_bbox_trans_temp2-ground-6.ckpt


--------------------------------------------------------------------------------
/models/vidvrd/visual_bbox_trans_temp2-reconstruct-6.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doc-doc/vRGV/805cf996054e5bb295ca3833bcfe34442556382f/models/vidvrd/visual_bbox_trans_temp2-reconstruct-6.ckpt


--------------------------------------------------------------------------------
/networks/basic.py:
--------------------------------------------------------------------------------
  1 | # ====================================================
  2 | # @Author  : Xiao Junbin
  3 | # @Email   : junbin@comp.nus.edu.sg
  4 | # @File    : basic.py
  5 | # ====================================================
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | import torch.nn.functional as F
 10 | from torch.nn.utils.rnn import pack_padded_sequence
 11 | import numpy as np
 12 | import pickle as pkl
 13 | 
 14 | 
 15 | class AttHierarchicalGround(nn.Module):
 16 | 
 17 |     def __init__(self, input_size, hidden_size, visual_dim, word_dim, num_layers=1):
 18 |         super(AttHierarchicalGround, self).__init__()
 19 | 
 20 |         self.input_size = input_size
 21 |         self.hidden_size = hidden_size
 22 |         self.embed_dim = hidden_size // 2
 23 | 
 24 | 
 25 |         self.num_layers = num_layers
 26 |         self.word_dim = word_dim
 27 | 
 28 |         self.max_seg_len = 12
 29 |         dropout = 0.2
 30 | 
 31 |         self.embedding_word = nn.Sequential(nn.Linear(word_dim, self.embed_dim),
 32 |                                        nn.ReLU(),
 33 |                                        nn.Dropout(dropout))
 34 | 
 35 |         self.embedding_visual = nn.Sequential(nn.Linear(visual_dim, hidden_size),
 36 |                                        nn.ReLU(),
 37 |                                        nn.Dropout(dropout))
 38 |         
 39 |         # affine transformation for lstm hidden state
 40 |         self.linear1 = nn.Linear(hidden_size*2, hidden_size)
 41 | 
 42 |         # affine transformation for context
 43 |         self.linear2 = nn.Linear(hidden_size, 1, bias=False)
 44 | 
 45 |         self.transform_visual = nn.Sequential(nn.Linear(hidden_size, hidden_size),
 46 |                                        nn.ReLU(),
 47 |                                        nn.Dropout(dropout))
 48 | 
 49 |         self.relu = nn.ReLU()
 50 |         self.softmax = nn.Softmax(dim=1)
 51 | 
 52 |         self.within_seg_rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
 53 |         self.seg_rnn = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
 54 | 
 55 | 
 56 |     def soft_attention(self, input, context):
 57 |         """
 58 |         compute temporal self-attention
 59 |         :param input:  (batch_size, seq_len, feat_dim)
 60 |         :param context: (batch_size, feat_dim)
 61 |         :return: vfeat: (batch_size, feat_dim), beta
 62 |         """
 63 |         batch_size, seq_len, feat_dim = input.size()
 64 |         context = context.unsqueeze(1).repeat(1, seq_len, 1)
 65 |         inputs = torch.cat((input, context), 2).view(-1, feat_dim*2)
 66 | 
 67 |         o = self.linear2(torch.tanh(self.linear1(inputs)))
 68 |         e = o.view(batch_size, seq_len)
 69 |         beta = self.softmax(e)
 70 |         vfeat = torch.bmm(beta.unsqueeze(1), input).squeeze(1)
 71 | 
 72 |         return vfeat, beta
 73 | 
 74 | 
 75 |     def forward(self, videos, relation_text, mode='train'):
 76 |         """
 77 |         Without participation of textual relation, to warm-up the decoder only
 78 |         """
 79 | 
 80 |         frame_count = videos.shape[1]
 81 | 
 82 |         max_seg_num = int(frame_count / self.max_seg_len)
 83 | 
 84 |         ori_x = self.embedding_visual(videos).sum(dim=2).squeeze()
 85 |         
 86 |         x_trans = self.transform_visual(ori_x)
 87 | 
 88 |         within_seg_rnn_out, _ = self.within_seg_rnn(x_trans)
 89 |         self.within_seg_rnn.flatten_parameters()
 90 | 
 91 |         idx = np.round(np.linspace(self.max_seg_len-1, frame_count-1, max_seg_num)).astype('int')
 92 | 
 93 |         seg_rnn_input = within_seg_rnn_out[:,idx,:]
 94 |         
 95 |         seg_out, hidden = self.seg_rnn(seg_rnn_input)
 96 |         self.seg_rnn.flatten_parameters()
 97 |         
 98 |         output, _ = self.soft_attention(within_seg_rnn_out, hidden[0].squeeze(0))
 99 | 
100 |         return output, hidden
101 | 
102 | 
103 | class DecoderRNN(nn.Module):
104 |     def __init__(self, embed_size, hidden_size, vocab_size, num_layers, max_seq_length=10):
105 |         """Set the hyper-parameters and build the layers."""
106 |         super(DecoderRNN, self).__init__()
107 |         self.embed = nn.Embedding(vocab_size, embed_size)
108 |         self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
109 |         self.linear = nn.Linear(hidden_size, vocab_size)
110 |         self.max_seq_length = max_seq_length
111 | 
112 |     def forward(self, video_out, video_hidden, relations, lengths):
113 |         """
114 |         Decode relation attended video feature and reconstruct the relation.
115 |         :param video_out: (batch, seq_len, dim_hidden * num_directions)
116 |         :param video_hidden: (num_layers * num_directions, batch_size, dim_hidden)
117 |         :param relations:
118 |         :param lengths:
119 |         :return:
120 |         """
121 |         embeddings = self.embed(relations)
122 |         batch_size, seq_len, _ = embeddings.size()
123 | 
124 |         embeddings = torch.cat((video_out.unsqueeze(1), embeddings), 1)
125 | 
126 |         packed = pack_padded_sequence(embeddings, lengths, batch_first=True)
127 | 
128 |         hiddens, _ = self.lstm(packed, video_hidden)
129 |         outputs = self.linear(hiddens[0])
130 | 
131 |         # print(outputs.shape)
132 |         return outputs
133 | 
134 |     def sample(self, video_out, states=None):
135 |         """reconstruct the relation using greedy search"""
136 |         sampled_ids = []
137 |         inputs = video_out.unsqueeze(1)
138 |         for i in range(self.max_seq_length):
139 |             hiddens, states = self.lstm(inputs, states)
140 |             outputs = self.linear(hiddens.squeeze(1))
141 |             _, predicted = outputs.max(1)
142 |             sampled_ids.append(predicted)
143 |             inputs = self.embed(predicted)
144 |             inputs = inputs.unsqueeze(1)
145 | 
146 |         sampled_ids = torch.stack(sampled_ids, 1)
147 |         return sampled_ids
148 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | cffi
 3 | certifi==2020.11.8
 4 | click==7.1.2
 5 | easydict==1.9
 6 | joblib==0.17.0
 7 | nltk==3.5
 8 | numpy==1.19.4
 9 | opencv-python==4.4.0.46
10 | Pillow==8.0.1
11 | regex==2020.11.13
12 | torch==0.4.1
13 | tqdm==4.54.0
14 | 


--------------------------------------------------------------------------------
/tools/sample_video_feature.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '../')
 3 | from dataloader.util import *
 4 | 
 5 | def sample_video_feature(src, dst, sample_list_file):
 6 |     nframes, nbbox, feat_dim = 120, 40, 2048+5
 7 |     samples = load_file(sample_list_file)
 8 |     sp_num = len(samples)
 9 |     for it, item in enumerate(samples):
10 |         # if it < 24000: continue
11 |         # if it >= 24000: break
12 |         video_name, frame_count, width, height, relation = item
13 |         dst_file = osp.join(dst, video_name)
14 |         src_dir = osp.join(src, video_name)
15 |         if osp.exists(dst_file+'.npy'): 
16 |             print('exist {}.npy'.format(dst_file))
17 |             continue
18 |         get_video_feature(src_dir, dst_file, frame_count, width, height, nbbox, nframes, feat_dim)
19 |         if it % 200 == 0:
20 |             print(it, sp_num)
21 | 
22 | 
23 | def main():
24 |     dataset = 'vidvrd/'
25 |     root_dir = '/path/to/workspace/' #this directory includes two folders: ground_data and vRGV
26 |     video_feature_path = osp.join(root_dir, 'ground_data/{}/frame_feature/'.format(dataset))
27 |     video_feature_cache = osp.join(root_dir, 'ground_data/{}/video_feature/'.format(dataset))
28 |     dset_dir = '../dataset/'+dataset+'/'
29 |     train_list = osp.join(dset_dir, 'vrelation_train.json')
30 |     val_list = osp.join(dset_dir, 'vrelation_val.json')
31 |     sample_video_feature(video_feature_path, video_feature_cache, val_list)
32 |     # sample_video_feature(video_feature_path, video_feature_cache, train_list)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/tools/util.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : util.py
 5 | # ====================================================
 6 | import json
 7 | import os.path as osp
 8 | import os
 9 | import shutil
10 | import subprocess
11 | 
12 | 
13 | def load_file(file_name):
14 | 
15 |     with open(file_name, 'r') as fp:
16 |         if osp.splitext(file_name)[1]== '.txt':
17 |             annos = fp.readlines()
18 |             annos = [line.rstrip() for line in annos]
19 |         if osp.splitext(file_name)[1] == '.json':
20 |             annos = json.load(fp)
21 | 
22 |     return annos
23 | 
24 | def set_gpu_devices(gpu_id):
25 |     gpu = ''
26 |     if gpu_id != -1:
27 |         gpu = str(gpu_id)
28 |     os.environ['CUDA_VISIBLE_DEVICES'] = gpu
29 | 
30 | 
31 | def extract_frames(video, dst):
32 |     with open(os.devnull, "w") as ffmpeg_log:
33 |         if osp.exists(dst):
34 |             # print(" cleanup: " + dst + "/")
35 |             shutil.rmtree(dst)
36 |         os.makedirs(dst)
37 |         video_to_frames_command = ["ffmpeg",
38 |                                    # (optional) overwrite output file if it exists
39 |                                    '-y',
40 |                                    '-i', video,  # input file
41 |                                    '-qscale:v', "2",  # quality for JPEG
42 |                                    '{0}/%06d.jpg'.format(dst)]
43 |         subprocess.call(video_to_frames_command,
44 |                         stdout=ffmpeg_log, stderr=ffmpeg_log)
45 | 


--------------------------------------------------------------------------------
/tools/word_feature.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : word_feature.py
 5 | # ====================================================
 6 | import numpy as np
 7 | import json
 8 | import os.path as osp
 9 | import pickle as pkl
10 | from util import load_file
11 | 
12 | 
13 | 
14 | 
15 | def word_selection(word_file, feature_file, out_file):
16 |     """
17 | 
18 |     :param word_file:
19 |     :param feature_file:
20 |     :param out_file:
21 |     :return:
22 |     """
23 |     with open(word_file, 'r') as fp:
24 |         word_idx = json.load(fp)
25 | 
26 |     keys = []
27 |     for key, item in word_idx.items():
28 |         key = key.split('_')
29 |         for k in key:
30 |             keys.append(k)
31 |     keys = set(keys)
32 |     keys = sorted(list(keys))
33 |     print(len(keys))
34 |     print(keys)
35 | 
36 |     with open(feature_file, 'r') as fp:
37 |         word_glove = fp.readlines()
38 | 
39 |     word_feature = {}
40 |     for wg in word_glove:
41 |         wg = wg.rstrip('\n').split(' ')
42 |         if wg[0] in keys:
43 |             word = wg[0]
44 |             feature = np.asarray(wg[1:])
45 |             print(feature.size)
46 |             word_feature[word] = feature
47 | 
48 |     with open(out_file, 'wb') as fp:
49 |         pkl.dump(word_feature, fp)
50 | 
51 |     print('finished')
52 | 
53 | def main():
54 | 
55 |     root_dir = '/storage/jbxiao/workspace/'
56 |     word_file = osp.join(root_dir, 'ground_code/dataset/vidvrd/word_idx.json')
57 | 
58 |     feature_file = osp.join(root_dir, 'ground_data/glove/glove.6B.300d.txt')
59 | 
60 |     out_file = osp.join(root_dir, 'ground_data/glove/vidvrd_word_glove.pkl')
61 | 
62 |     word_selection(word_file, feature_file, out_file)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | # ====================================================
 2 | # @Author  : Xiao Junbin
 3 | # @Email   : junbin@comp.nus.edu.sg
 4 | # @File    : utils.py
 5 | # ====================================================
 6 | import json
 7 | import os
 8 | import os.path as osp
 9 | import shutil
10 | import numpy as np
11 | import pickle as pkl
12 | 
13 | 
14 | def clip_gradient(optimizer, grad_clip):
15 |     """
16 |     Clips gradients computed during backpropagation to avoid explosion of gradients.
17 |     :param optimizer: optimizer with the gradients to be clipped
18 |     :param grad_clip: clip value
19 |     """
20 |     for group in optimizer.param_groups:
21 |         for param in group['params']:
22 |             if param.grad is not None:
23 |                 param.grad.data.clamp_(-grad_clip, grad_clip)
24 | 
25 | 
26 | def save_results(save_name, data):
27 | 
28 |     print('Save to {}'.format(save_name))
29 | 
30 |     path = osp.dirname(save_name)
31 |     if not osp.exists(path):
32 |         os.makedirs(path)
33 | 
34 |     with open(save_name, 'w') as fp:
35 |         json.dump(data, fp)
36 | 
37 | 
38 | def delete(vname):
39 |     if vname != '':
40 |         frame_dir = '../ground_data/vidor/frames/'
41 |         print('Clean up {}'.format(vname))
42 |         shutil.rmtree(osp.join(frame_dir, vname))
43 | 
44 | def sort_bbox(bboxes, width, height):
45 |     """
46 |     sort bbox according to the top-left to bottom-right order
47 |     :param bboxes:
48 |     :return:
49 |     """
50 |     x_c = (bboxes[:, 2] - bboxes[:, 0]) / 2
51 |     y_c = (bboxes[:, 3] - bboxes[:, 1]) / 2
52 | 
53 |     points =  []
54 |     for x, y in zip(x_c, y_c):
55 |         points.append((y-1)*width+x)
56 | 
57 |     index = np.argsort(points)
58 | 
59 |     return index
60 | 
61 | 
62 | def pkload(file):
63 |     data = None
64 |     if osp.exists(file) and osp.getsize(file) > 0:
65 |         with open(file, 'rb') as fp:
66 |             data = pkl.load(fp)
67 |         # print('{} does not exist'.format(file))
68 |     return data
69 | 
70 | 
71 | def pkdump(data, file):
72 |     dirname = osp.dirname(file)
73 |     if not osp.exists(dirname):
74 |         os.makedirs(dirname)
75 |     with open(file, 'wb') as fp:
76 |         pkl.dump(data, fp)
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------