├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── data
    ├── coco_semantics.pickle
    ├── imagenet_data
    │   ├── imagenet_count.txt
    │   ├── imagenet_tree.txt
    │   └── imagenet_words.txt
    ├── info.json
    └── pascal_voc_semantics.pickle
├── lib
    ├── __init__.py
    ├── boxTools.py
    ├── dppTools.py
    └── dpp_loss_layer.py
└── tools
    └── Semantic_Similarity.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .ipynb_checkpoints
3 | lib/build
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "py-faster-rcnn"]
2 | 	path = py-faster-rcnn
3 | 	url = https://github.com/azadis/py-faster-rcnn
4 | 	branch = lddp_modification
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | LDDP
  2 | 
  3 | UC Berkeley's Standard Copyright and Disclaimer Notice:
  4 | 
  5 | Copyright ©2017. The Regents of the University of California (Regents). All
  6 | Rights Reserved. Permission to use, copy, modify, and distribute this software
  7 | and its documentation for educational, research, and not-for-profit purposes,
  8 | without fee and without a signed licensing agreement, is hereby granted,
  9 | provided that the above copyright notice, this paragraph and the following two
 10 | paragraphs appear in all copies, modifications, and distributions. Contact The
 11 | Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510,
 12 | Berkeley, CA 94720-1620, (510) 643-7201, for commercial licensing
 13 | opportunities.
 14 | 
 15 | Samaneh Azadi, Jiashi Feng, Trevor Darrell, University of
 16 | California, Berkeley.
 17 | 
 18 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
 19 | INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
 20 | THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
 21 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 | 
 23 | REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 24 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 25 | THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS
 26 | PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 27 | UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 28 | 
 29 | 
 30 | ************************************************************************
 31 | 
 32 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
 33 | 
 34 | This project, LDDP, incorporates material from the project(s)
 35 | listed below (collectively, "Third Party Code").  Microsoft is not the
 36 | original author of the Third Party Code.  The original copyright notice
 37 | and license under which Microsoft received such Third Party Code are set
 38 | out below. This Third Party Code is licensed to you under their original
 39 | license terms set forth below.  Microsoft reserves all other rights not
 40 | expressly granted, whether by implication, estoppel or otherwise.
 41 | 
 42 | 1. Faster R-CNN, (https://github.com/rbgirshick/py-faster-rcnn/)
 43 | 
 44 | The MIT License (MIT)
 45 | 
 46 | Copyright (c) 2015 Microsoft Corporation
 47 | 
 48 | Permission is hereby granted, free of charge, to any person obtaining a copy
 49 | of this software and associated documentation files (the "Software"), to deal
 50 | in the Software without restriction, including without limitation the rights
 51 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 52 | copies of the Software, and to permit persons to whom the Software is
 53 | furnished to do so, subject to the following conditions:
 54 | 
 55 | The above copyright notice and this permission notice shall be included in
 56 | all copies or substantial portions of the Software.
 57 | 
 58 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 59 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 60 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 61 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 62 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 63 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 64 | THE SOFTWARE.
 65 | 
 66 | 
 67 | 
 68 | 2.	Caffe, (https://github.com/BVLC/caffe/)
 69 | 
 70 | COPYRIGHT
 71 | 
 72 | All contributions by the University of California:
 73 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 74 | All rights reserved.
 75 | 
 76 | All other contributions:
 77 | Copyright (c) 2014, 2015, the respective contributors
 78 | All rights reserved.
 79 | 
 80 | Caffe uses a shared copyright model: each contributor holds copyright
 81 | over their contributions to Caffe. The project versioning records all
 82 | such contribution and copyright details. If a contributor wants to
 83 | further mark their specific copyright on a particular contribution,
 84 | they should indicate their copyright solely in the commit message of
 85 | the change when it is committed.
 86 | 
 87 | The BSD 2-Clause License
 88 | 
 89 | Redistribution and use in source and binary forms, with or without
 90 | modification, are permitted provided that the following conditions
 91 | are met:
 92 | 
 93 | 1. Redistributions of source code must retain the above copyright notice,
 94 | this list of conditions and the following disclaimer.
 95 | 
 96 | 2. Redistributions in binary form must reproduce the above copyright
 97 | notice, this list of conditions and the following disclaimer in the
 98 | documentation and/or other materials provided with the distribution.
 99 | 
100 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
101 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
102 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
103 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
104 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
105 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
106 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
107 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
108 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
109 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
110 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
111 | 
112 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
113 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LDDP: Learning Detection with Diverse Proposals
  2 | 
  3 | By Samaneh Azadi, Jiashi Feng, Trevor Darrell at UC Berkeley.
  4 | 
  5 | ### Introduction: 
  6 | LDDP is proposed to predict a set of diverse and informative proposals with enriched representations which is able to augment object detection architectures. 
  7 | LDDP considers both label-level contextual information and spatial layout relationships between object proposals without increasing the number of parameters of the network, and thus improves location and category specifications of final detected bounding boxes substantially during both training and inference schemes.
  8 | This implementation is built based on [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn) framework but can be modified for other detection architectures.
  9 | For more information on LDDP, please refer to the [arxiv preprint](https://arxiv.org/pdf/1704.03533.pdf) which will be published at CVPR 2017. 
 10 | 
 11 | ### License
 12 | LDDP is licensed for open non-commercial distribution under the UC Regents license; see LICENSE. Its dependencies, such as Caffe and Faster R-CNN, are subject to their own respective licenses.
 13 | 
 14 | ### Citing LDDP 
 15 | If you find LDDP useful in your research, please cite:
 16 | 
 17 | 	@article{azadi2017learning,
 18 | 	  title={Learning Detection with Diverse Proposals},
 19 | 	  author={Azadi, Samaneh and Feng, Jiashi and Darrell, Trevor},
 20 | 	  journal={arXiv preprint arXiv:1704.03533},
 21 | 	  year={2017}
 22 | 	} 
 23 | 
 24 | Requirements and installation instructions are similar to [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn), but we mention them again for your convenience.
 25 | 	
 26 | ### Requirements: software
 27 | 
 28 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html))
 29 | 
 30 |   **Note:** Caffe *must* be built with support for Python layers!
 31 | 
 32 |   ```make
 33 |   # In your Makefile.config, make sure to have this line uncommented
 34 |   WITH_PYTHON_LAYER := 1
 35 |   # Unrelatedly, it's also recommended that you use CUDNN
 36 |   USE_CUDNN := 1
 37 |   ```
 38 |   You can download my [Makefile.config](https://people.eecs.berkeley.edu/~sazadi/LDDP/Makefile.config) for reference.
 39 | 
 40 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict`
 41 | 
 42 | ### Requirements: hardware
 43 | Hardware requirements are similar to the those for running [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/blob/96dc9f1dea3087474d6da5a98879072901ee9bf9/README.md#requirements-hardware).
 44 | 
 45 | ### Installation
 46 | 
 47 | 1. Clone the LDDP repository
 48 |   ```Shell
 49 |   # Make sure to clone with --recursive
 50 |   git clone --recursive https://github.com/azadis/LDDP.git
 51 |   ```
 52 | 
 53 | 2. We'll call the directory that you cloned LDDP into `LDDP_ROOT`
 54 | 
 55 | 
 56 | 3. Build the Cython modules
 57 |     ```Shell
 58 |     cd $LDDP_ROOT/py-faster-rcnn/lib
 59 |     make
 60 |     ```
 61 | 
 62 | 4. Build Caffe and pycaffe
 63 |     ```Shell
 64 |     cd $LDDP_ROOT/py-faster-rcnn/caffe-fast-rcnn
 65 |     # Now follow the Caffe installation instructions here:
 66 |     #   http://caffe.berkeleyvision.org/installation.html
 67 | 
 68 |     # If you're experienced with Caffe and have all of the requirements installed
 69 |     # and your Makefile.config in place, then simply do:
 70 |     make -j8 && make pycaffe
 71 |     ```
 72 | ### Installation for training and testing models
 73 | 1. Download the training, validation, test data and VOCdevkit
 74 | 
 75 | 	```Shell
 76 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 77 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 78 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
 79 | 	```
 80 | 
 81 | 2. Extract all of these tars into one directory named `VOCdevkit`
 82 | 
 83 | 	```Shell
 84 | 	tar xvf VOCtrainval_06-Nov-2007.tar
 85 | 	tar xvf VOCtest_06-Nov-2007.tar
 86 | 	tar xvf VOCdevkit_08-Jun-2007.tar
 87 | 	```
 88 | 
 89 | 3. It should have this basic structure
 90 | 
 91 | 	```Shell
 92 |   	$VOCdevkit/                           # development kit
 93 |   	$VOCdevkit/VOCcode/                   # VOC utility code
 94 |   	$VOCdevkit/VOC2007                    # image sets, annotations, etc.
 95 |   	# ... and several other directories ...
 96 |   	```
 97 | 
 98 | 4. Create symlinks for the PASCAL VOC dataset
 99 | 
100 | 	```Shell
101 |     cd $LDDP_ROOT/py-faster-rcnn/data
102 |     ln -s $VOCdevkit VOCdevkit2007
103 |     ```
104 |     Using symlinks is a good idea because you will likely want to share the same PASCAL dataset installation between multiple projects.
105 | 5. [Optional] follow similar steps to get PASCAL VOC 2010 and 2012.
106 | 6. [Optional] If you want to use COCO, please see the notes [here](https://github.com/rbgirshick/py-faster-rcnn/blob/96dc9f1dea3087474d6da5a98879072901ee9bf9/data/README.md).
107 | 7. Follow the next sections to download pre-trained ImageNet models.
108 | 
109 | ### Download pre-trained ImageNet models
110 | 
111 | Pre-trained ImageNet models can be downloaded for the three networks described in the paper: ZF and VGG16.
112 | 
113 | ```Shell
114 | cd $LDDP_ROOT/py-faster-rcnn
115 | ./data/scripts/fetch_imagenet_models.sh
116 | ```
117 | 
118 | ### Usage
119 | To train and test the LDDP end-to-end detection framework:
120 | ```Shell
121 | cd $LDDP_ROOT/py-faster-rcnn
122 | ./experiments/scripts/LDDP_end2end.sh [GPU_ID] [NET] [--set ...]
123 | # GPU_ID is the GPU you want to train on
124 | # NET in {ZF, VGG_CNN_M_1024, VGG16} is the network arch to use
125 | # --set ... allows you to specify fast_rcnn.config options, e.g.
126 | #   --set EXP_DIR seed_rng1701 RNG_SEED 1701 TRAIN.SCALES [400,500,600,700]
127 | ```
128 | 
129 | Trained LDDP networks are saved under:
130 | 
131 | ```
132 | output/<experiment directory>/<dataset name>/
133 | ```
134 | 
135 | Test outputs are saved under:
136 | 
137 | ```
138 | output/<experiment directory>/<dataset name>/<network snapshot name>/
139 | ```
140 | 
141 | Semantic Similarity matrices used in the [paper](https://arxiv.org/pdf/1704.03533.pdf) are stored as pickle files at:
142 | ```Shell
143 | $LDDP_ROOT/data
144 | ```
145 | An example ipython script to generate semantic similarity matrices for PASCAL VOC and COCO data sets is located at:
146 | 
147 | ```Shell
148 | $LDDP_ROOT/tools/Semantic_Similarity.ipynb
149 | ```
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 |  
162 | 


--------------------------------------------------------------------------------
/data/info.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"pascal_cats":[
  3 | 		"__background__",
  4 | 		"aeroplane",
  5 | 		"bicycle",
  6 | 		"bird",
  7 | 		"boat",
  8 | 		"bottle", 
  9 | 		"bus", 
 10 | 		"car", 
 11 | 		"cat", 
 12 | 		"chair",
 13 | 		"cow",
 14 | 		"diningtable",
 15 | 		"dog",
 16 | 		"horse",
 17 | 		"motorbike",
 18 | 		"person",
 19 | 		"pottedplant",
 20 | 		"sheep",
 21 | 		"sofa",
 22 | 		"train", 
 23 | 		"tvmonitor"
 24 | 	],
 25 | 	"coco_cats":[
 26 | 		"__background__",
 27 | 		"person",
 28 | 		"bicycle",
 29 | 		"car",
 30 | 		"motorcycle",
 31 | 		"airplane",
 32 | 		"bus",
 33 | 		"train",
 34 | 		"truck", 
 35 | 		"boat",
 36 | 		"traffic light",
 37 | 		"fire hydrant",
 38 | 		"stop sign",
 39 | 		"parking meter",
 40 | 		"bench", 
 41 | 		"bird",
 42 | 		"cat",
 43 | 		"dog",
 44 | 		"horse",
 45 | 		"sheep",
 46 | 		"cow",
 47 | 		"elephant",
 48 | 		"bear",
 49 | 		"zebra",
 50 | 		"giraffe",
 51 | 		"backpack", 
 52 | 		"umbrella", 
 53 | 		"handbag", 
 54 | 		"tie",
 55 | 		"suitcase",
 56 | 		"frisbee", 
 57 | 		"skis", 
 58 | 		"snowboard", 
 59 | 		"sports ball",
 60 | 		"kite",
 61 | 		"baseball bat",
 62 | 		"baseball glove",
 63 | 		"skateboard",
 64 | 		"surfboard",
 65 | 		"tennis racket",
 66 | 		"bottle",
 67 | 		"wine glass",
 68 | 		"cup", 
 69 | 		"fork",
 70 | 		"knife", 
 71 | 		"spoon",
 72 | 		"bowl",
 73 | 		"banana",
 74 | 		"apple",
 75 | 		"sandwich",
 76 | 		"orange",
 77 | 		"broccoli",
 78 | 		"carrot",
 79 | 		"hot dog", 
 80 | 		"pizza", 
 81 | 		"donut",
 82 | 		"cake",
 83 | 		"chair",
 84 | 		"couch",
 85 | 		"potted plant",
 86 | 		"bed",
 87 | 		"dining table",
 88 | 		"toilet",
 89 | 		"tv",
 90 | 		"laptop", 
 91 | 		"mouse",
 92 | 		"remote", 
 93 | 		"keyboard", 
 94 | 		"cell phone",
 95 | 		"microwave",
 96 | 		"oven", 
 97 | 		"toaster", 
 98 | 		"sink",
 99 | 		"refrigerator",
100 | 		"book",
101 | 		"clock",
102 | 		"vase",
103 | 		"scissors",
104 | 		"teddy bear",
105 | 		"hair drier", 
106 | 		"toothbrush"]
107 | 
108 | }


--------------------------------------------------------------------------------
/data/pascal_voc_semantics.pickle:
--------------------------------------------------------------------------------
 1 | cnumpy.core.multiarray
 2 | _reconstruct
 3 | p0
 4 | (cnumpy
 5 | ndarray
 6 | p1
 7 | (I0
 8 | tp2
 9 | S'b'
10 | p3
11 | tp4
12 | Rp5
13 | (I1
14 | (I20
15 | I20
16 | tp6
17 | cnumpy
18 | dtype
19 | p7
20 | (S'f8'
21 | p8
22 | I0
23 | I1
24 | tp9
25 | Rp10
26 | (I3
27 | S'<'
28 | p11
29 | NNNI-1
30 | I-1
31 | I0
32 | tp12
33 | bI00
34 | S"\x00\x00\x00\x00\x00\x00\xf0?im\xcc\x06L\xec\xe0?\x00\x00\x00\x00\x00\x00\x00\x00>\x88\xf8r\xeb\xa3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00P}C\xb6\xaa\xdf\xdf?{Q\xe5lr \xe3?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00#q\r\x8d\xb9\xc1\xdd?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa5\xc2\xcdO\xfd\xce\xe0?\x00\x00\x00\x00\x00\x00\x00\x00im\xcc\x06L\xec\xe0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00L\xf2\xf7\\\xff\xcf\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x9f9b\xae\xb1\x8e\xdd?P\xb3@q\\\xf7\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00[I\nIR\x9e\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbfvu\xc2\xf9\x0c\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00>\x88\xf8r\xeb\xa3\xe7?L\xf2\xf7\\\xff\xcf\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x8b\xc1\xc3\xf9D\xbf\xe0?\x06fZ\xb4PE\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00{'\xa6\x10\x82\x1f\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00b\xfa\xfa\xcca\xb6\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P}C\xb6\xaa\xdf\xdf?\x9f9b\xae\xb1\x8e\xdd?\x00\x00\x00\x00\x00\x00\x00\x00\x8b\xc1\xc3\xf9D\xbf\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xef\xa9\xf9CA|\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1e\xcb\x9d?\tO\xda?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^\xd5\x95\xf96|\xeb?\x00\x00\x00\x00\x00\x00\x00\x00{Q\xe5lr \xe3?P\xb3@q\\\xf7\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x06fZ\xb4PE\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\xef\xa9\xf9CA|\xe0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<\xea\x1b\xcad\x05\xe6?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\xa6K\xb4|k\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xffDrLJ3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\xfc\x80\xc6\x02o\x00\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_:\xe6N\xe9\x1f\xea?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfc\x80\xc6\x02o\x00\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xafh\x13\x85\xe4\x82\xe2?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xffDrLJ3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00&$u\xa4\xfd\xb3\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00#q\r\x8d\xb9\xc1\xdd?[I\nIR\x9e\xe0?\x00\x00\x00\x00\x00\x00\x00\x00{'\xa6\x10\x82\x1f\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x1e\xcb\x9d?\tO\xda?<\xea\x1b\xcad\x05\xe6?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4j'\xd9:|\xdb?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00&$u\xa4\xfd\xb3\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_:\xe6N\xe9\x1f\xea?\x00\x00\x00\x00\x00\x00\x00\x00\xafh\x13\x85\xe4\x82\xe2?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa5\xc2\xcdO\xfd\xce\xe0?\xbfvu\xc2\xf9\x0c\xdf?\x00\x00\x00\x00\x00\x00\x00\x00b\xfa\xfa\xcca\xb6\xe1?\x00\x00\x00\x00\x00\x00\x00\x00^\xd5\x95\xf96|\xeb?\x98\xa6K\xb4|k\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4j'\xd9:|\xdb?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?"
35 | p13
36 | tp14
37 | b.


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azadis/LDDP/2cf990b47b4de9d6f835850a4affddefb0be6ab6/lib/__init__.py


--------------------------------------------------------------------------------
/lib/boxTools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # LDDP
  5 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details]
  6 | # Written by Samaneh Azadi
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform
 11 | 
 12 | 
 13 | def IoU_target(bbox,gt):
 14 |     """compute IoU between bbox and gt
 15 |         where both of them are vectors
 16 |     """ 
 17 |     M = bbox.shape[0]
 18 |     x_1i = bbox[:,0]
 19 |     x_1j = gt[:,0]
 20 |     x_2i = bbox[:,2]
 21 |     x_2j = gt[:,2]
 22 |     y_1i = bbox[:,1]
 23 |     y_1j = gt[:,1]
 24 |     y_2i = bbox[:,3]
 25 |     y_2j = gt[:,3]
 26 | 
 27 |     w = (np.minimum(x_2i, x_2j) - np.maximum(x_1i, x_1j) + 1)
 28 |     h = (np.minimum(y_2i, y_2j) - np.maximum(y_1i, y_1j) + 1) 
 29 |     w = (w>0) * w
 30 |     h = (h>0) * h
 31 |     Intersection = w * h
 32 |     Area_i = (bbox[:,2] - bbox[:,0] + 1 ) * (bbox[:,3] - bbox[:,1] + 1)
 33 |     Area_j = (gt[:,2] - gt[:,0] + 1 ) * (gt[:,3] - gt[:,1] + 1)
 34 |     Union = Area_i + Area_j - Intersection
 35 |     if np.nonzero(Union ==0)[0].size:
 36 |         raise Exception("Union of boxes should not be zero")
 37 |     IoU = Intersection/Union 
 38 | 
 39 |     return IoU
 40 | 
 41 | def pair_Intersection(locations):
 42 |     """ compute intersection between each pair of boxes in 
 43 |      the locations matrix
 44 |      [x_1i,y_1i,x_2i,y_2i]=locations[i,0:4]
 45 |     """
 46 |     M = locations.shape[0]
 47 |     x_1i = np.reshape(np.repeat(locations[:,0],M),(M,M))
 48 |     x_1j = np.reshape(np.tile(locations[:,0],M),(M,M))
 49 |     x_2i = np.reshape(np.repeat(locations[:,2],M),(M,M))
 50 |     x_2j = np.reshape(np.tile(locations[:,2],M),(M,M))
 51 |     y_1i = np.reshape(np.repeat(locations[:,1],M),(M,M))
 52 |     y_1j = np.reshape(np.tile(locations[:,1],M),(M,M))
 53 |     y_2i = np.reshape(np.repeat(locations[:,3],M),(M,M))
 54 |     y_2j = np.reshape(np.tile(locations[:,3],M),(M,M))
 55 |     w = (np.minimum(x_2i, x_2j) - np.maximum(x_1i, x_1j) + 1)
 56 |     h = (np.minimum(y_2i, y_2j) - np.maximum(y_1i, y_1j) + 1) 
 57 |     w = (w>0) * w
 58 |     h = (h>0) * h
 59 |     Intersection = w * h
 60 |     return Intersection
 61 | 
 62 | def pair_IoU(locations):
 63 |     """ compute IoU between each pair of boxes in 
 64 |      the locations matrix
 65 |      [x_1i,y_1i,x_2i,y_2i]=locations[i,0:4] 
 66 |     """
 67 |     M = locations.shape[0]
 68 |     Intersection = pair_Intersection(locations)
 69 |     Area = (locations[:,2] - locations[:,0] + 1 ) * (locations[:,3] - locations[:,1] + 1)
 70 |     Area_i = np.reshape(np.repeat(Area,M),(M,M))
 71 |     Area_j = np.reshape(np.tile(Area,M),(M,M))
 72 |     Union = Area_i + Area_j - Intersection
 73 |     if np.nonzero(Union ==0)[0].size:
 74 |         raise Exception("Union of boxes should not be zero")
 75 |     IoU = Intersection/Union 
 76 |     return IoU
 77 |     
 78 | 
 79 | def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means,M, im_shape_w, im_shape_h):
 80 |     """
 81 |     un-normalize boxes by using stds and means
 82 |     """
 83 |     Phi_argmax = 4 * Phi_labels
 84 |     bbox_target = bbox_targets[np.tile(range(M),4),np.hstack((4*Phi_labels,4*Phi_labels+1,4*Phi_labels+2, 4*Phi_labels+3))]
 85 |     bbox_target = np.reshape(bbox_target,(M,4),order='F')
 86 |     bbox_target = bbox_target * stds[Phi_argmax/4,:] + means[Phi_argmax/4,:]
 87 |     unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target)
 88 |     unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets, (im_shape_w,im_shape_h))
 89 |     return unnormalized_bbox_targets
 90 | 
 91 | def find_local_argmax(Phi_labels, contributing_images, bbox_pred):
 92 |     """
 93 |     Find the index of the box with maximum score: [x1,y1,x2,y2]
 94 |     """
 95 |     M_cont = len(contributing_images)
 96 |     Phi_argmax = 4 * Phi_labels
 97 |     loc_argmax = bbox_pred[np.tile(contributing_images,4),np.hstack((Phi_argmax,Phi_argmax+1,Phi_argmax+2, Phi_argmax+3))]
 98 |     loc_argmax = np.reshape(loc_argmax,(M_cont,4),order='F')
 99 |     return loc_argmax
100 | 
101 | 


--------------------------------------------------------------------------------
/lib/dppTools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # LDDP
  5 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details]
  6 | # Written by Samaneh Azadi
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform
 11 | from boxTools import *
 12 | from fast_rcnn.config import cfg
 13 | 
 14 | 
 15 | class DPP():
 16 |     def __init__(self,stds=[],means=[],sim_classes=[],epsilon=0.01,loss_weight=0.001):
 17 |         
 18 |         self.stds =stds
 19 |         self.means = means
 20 |         self.sim_classes = sim_classes
 21 |         self.epsilon = epsilon
 22 |         self._loss_weight = loss_weight
 23 |         
 24 |     
 25 |     def select_bg(self,Phi_labels,boxes,labels,bbox_pred,keeps_Y,good_gt_overlap,M,im_shape_w,im_shape_h):
 26 |         """
 27 |         Find B in p(B|Xb)
 28 |         """
 29 |         selected_item = range(M)
 30 |         prob_dpp = np.ones((M,))
 31 |         ignores=[]
 32 |         dict_keeps_Y = {}
 33 |         for i,j in keeps_Y.iteritems():
 34 |             if j not in dict_keeps_Y:
 35 |                 dict_keeps_Y[j]=[]
 36 |             dict_keeps_Y[j].append(i)
 37 |     
 38 |         for k in range(M):
 39 |             if (k in keeps_Y and keeps_Y[k]==Phi_labels[k]) \
 40 |             or (k in good_gt_overlap and Phi_labels[k]==labels[k] and labels[k]>0):
 41 |                 ignores.append(k)
 42 |             else:
 43 |                 label_k = labels[k]
 44 |                 if label_k in dict_keeps_Y:
 45 |                     loc_lbl = bbox_pred[[k],4*label_k:4*(label_k+1)]
 46 |                     loc_lbl = loc_lbl * self.stds[label_k,:] + self.means[label_k,:]
 47 |                     pbox = bbox_transform_inv(boxes[[k],:], loc_lbl)
 48 |                     pbox = clip_boxes(pbox, (im_shape_w,im_shape_h))
 49 |                     pbox = np.reshape(np.tile(pbox,len(dict_keeps_Y[label_k])),(len(dict_keeps_Y[label_k]),4))
 50 |             
 51 |                     Y_selected_ll = bbox_pred[dict_keeps_Y[label_k],4*label_k:4*(label_k+1)]
 52 |                     Y_selected_ll = Y_selected_ll*self.stds[label_k,:] + self.means[label_k,:]
 53 |                     Y_selected_pbox = bbox_transform_inv(boxes[dict_keeps_Y[label_k],:], Y_selected_ll)
 54 |                     Y_selected_pbox = clip_boxes(Y_selected_pbox, (im_shape_w,im_shape_h))
 55 |                     if np.max(IoU_target(pbox,Y_selected_pbox)) > cfg.TRAIN.IGNORANCE:
 56 |                         ignores.append(k)
 57 | 
 58 |         selected_item = np.array([x for ii,x in enumerate(selected_item) if ii not in ignores])
 59 |         prob_dpp = [x for ii,x in enumerate(prob_dpp) if ii not in ignores]
 60 |         return selected_item,prob_dpp
 61 | 
 62 |     def dpp_greedy(self,S, scores_s, score_power, max_per_image, among_ims, num_gt_per_img=1000, close_thr=0.0001): 
 63 |         """ 
 64 |         Greedy optimization to select boxes
 65 |         S: similarity matrix
 66 |         scores_s : predicted scores over different categories
 67 | 
 68 |         """
 69 |         prob_thresh = cfg.TEST.PROB_THRESH
 70 |         S = S[among_ims,:][:,among_ims]
 71 |         scores_s = scores_s[among_ims]    
 72 | 
 73 |         M = S.shape[0]
 74 |     
 75 |         #keep: selected_boxes
 76 |         keep = []
 77 |     
 78 |         #left : boxes not selected yet
 79 |         left = np.zeros((M,3))
 80 |         left[:,0] = np.arange(M) #box number
 81 |         left[:,1] = 1 # 0/1? Is the box left?
 82 |         selected_prob = []
 83 |         while (len(keep) < max_per_image) and sum(left[:,1])>0:
 84 |             z = np.zeros((M,1))
 85 |             z[keep] = 1
 86 |             sum_scores = (score_power*np.log(scores_s).T).dot(z)
 87 |             prob_rest = np.zeros((M,))
 88 |             left_indices = np.where(left[:,1]==1)[0]
 89 |             done_indices = np.where(left[:,1]==0)[0]
 90 |             if len(keep)>0:
 91 |                 S_prev = S[keep,:][:,keep]
 92 |                 det_D = np.linalg.det(S_prev)
 93 |                 d_1 = np.linalg.inv(S_prev)
 94 |             else:
 95 |                 det_D = 1
 96 |                 d_1 = 0
 97 |             # ====================================================================
 98 |             #     |D  a^T|
 99 |             # det(|a    b|)= (b - a D^{-1} a^T)det(D)
100 |             #
101 |             # Here "D" = S_prev and "a","b" are the similarity values added by each single item
102 |             # in left_indices.
103 |             # To avoid using a for loop, we compute the above det for all items in left_indices
104 |             # all at once through appropriate inner vector multiplications as the next line:  
105 |             
106 |             # ====================================================================
107 |             if len(keep)>0:
108 |                 prob_rest[left_indices] =- np.sum(np.multiply(np.dot(S[left_indices,:][:,keep],d_1),S[left_indices,:][:,keep]),1)
109 | 
110 |             prob_rest[left_indices] = np.log((prob_rest[left_indices] + S[left_indices,left_indices]) * det_D)+\
111 |                            (sum_scores + score_power * np.log(scores_s[(left[left_indices,0]).astype(int)]))                
112 |             
113 |             prob_rest[done_indices] = np.min(prob_rest)-100
114 |             max_ind = np.argmax(prob_rest)
115 |             ind = left[max_ind,0]
116 |             close_inds = np.where(prob_rest >= (prob_rest[max_ind] + np.log(close_thr)))[0]
117 |             far_inds = np.where(prob_rest < (prob_rest[max_ind] + np.log(close_thr)))[0]
118 |             tops_prob_rest = np.argsort(-prob_rest[close_inds]).astype(int)
119 |             if len(keep) >= num_gt_per_img:
120 |                 break
121 |             elif len(keep)> 0:
122 |                 cost = np.max(S[np.array(range(M))[close_inds][tops_prob_rest],:][:,keep],1)
123 |                 good_cost = list(np.where(cost <= prob_thresh)[0])
124 |                 bad_cost = list(np.where(cost > prob_thresh)[0])
125 |                 if len(good_cost)>0:
126 |                     ind = np.array(range(M))[close_inds][tops_prob_rest[good_cost[0]]]
127 |                     keep.append(ind)
128 |                     left[ind,1] = 0
129 |                     #left[far_inds,1]=0
130 |                     selected_prob.append(prob_rest[max_ind])
131 |                 else:
132 |                     left[:,1]=0
133 | 
134 | 
135 |             else:
136 |                 keep.append(max_ind)
137 |                 left[max_ind,1] = 0
138 |                 selected_prob.append(prob_rest[max_ind])
139 | 
140 |             
141 |         return keep,selected_prob
142 |     
143 |     
144 |     def dpp_MAP(self,im_dets_pair, scores, boxes,sim_classes,score_thresh,epsilon,max_per_image,close_thr=0.00001):
145 |        """
146 |        DPP MAP inference
147 |        """
148 |        M0 = boxes.shape[0]
149 |        num_classes = scores.shape[1]
150 |        scores = scores[:,1:] #ignore background
151 |        
152 |        # consider only top 5 class scores per box
153 |        num_ignored = scores.shape[1]-5
154 |        sorted_scores = np.argsort(-scores,1)
155 |        ignored_cols = np.reshape(sorted_scores[:,-num_ignored:],(M0*num_ignored))
156 |        ignored_rows = np.repeat(range(0,sorted_scores.shape[0]),num_ignored)
157 |        scores[ignored_rows,ignored_cols] = 0
158 |        high_scores = np.nonzero(scores >= score_thresh)
159 |        lbl_high_scores = high_scores[1]
160 |        box_high_scores = high_scores[0]
161 |        scores_s = np.reshape(scores[box_high_scores, lbl_high_scores],(lbl_high_scores.shape[0],))
162 | 
163 |    
164 |        boxes = boxes[:,4:]
165 |        boxes_s = np.reshape(boxes[np.tile(box_high_scores,4), np.hstack((4*lbl_high_scores,4*lbl_high_scores+1,\
166 |        4*lbl_high_scores+2,4*lbl_high_scores+3))] ,(lbl_high_scores.shape[0],4),order='F')
167 |        M = boxes_s.shape[0]
168 |        sim_power = cfg.TEST.SIM_POWER
169 |        sim_boxes = sim_classes[(lbl_high_scores),:][:,(lbl_high_scores)]
170 |        sim_boxes = sim_boxes**sim_power
171 |        keep_ = {} 
172 | 
173 |        if M>0:
174 |            IoU = pair_IoU(boxes_s)
175 |            IoU[np.where(IoU<cfg.TEST.DPP_NMS)] = 0
176 |            # S = IoU * sim + \epsilon *I_M
177 |            S = np.multiply(IoU,sim_boxes) + epsilon * np.eye(M,M)
178 |            keep = self.dpp_greedy(S, scores_s, 1.0, max_per_image, np.array(range(M)), close_thr=close_thr)[0]
179 |            keep_['box_id'] = box_high_scores[keep]
180 |            keep_['box_cls'] = lbl_high_scores[keep]+1
181 |        else:
182 |           keep_['box_id'] = []
183 |           keep_['box_cls'] = []       
184 |    
185 |        return keep_ 
186 | 
187 | 
188 |     def compute_kernel(self, labels, boxes, Phi, loc_argmax, unnormalized_bbox_targets, im_shape_w, im_shape_h):
189 |         """
190 |         Compute DPP Kernel Matrix
191 |         """
192 |         M = boxes.shape[0] # number of rois of 1 image in the minibatch
193 | 
194 |         pred_boxes = bbox_transform_inv(boxes, loc_argmax)
195 |         pred_boxes = clip_boxes(pred_boxes, (im_shape_w,im_shape_h))
196 |         
197 |         IoU_with_gt_all = IoU_target(pred_boxes,unnormalized_bbox_targets)
198 |         # nonzero argmax labels for background images will have wrong target boxes
199 |         IoU_with_gt_all[np.where(labels == 0)[0]] = 0.5  
200 |         IoU_with_gt_all = IoU_with_gt_all
201 |         sim_images = self.sim_classes[(labels-1),:][:,(labels-1)]
202 | 
203 |         # Compute IoU, S, Phi, L
204 |         IoU = pair_IoU(pred_boxes)
205 |         S = np.multiply(IoU,sim_images) + self.epsilon * np.eye(M,M)
206 |         Phi = np.multiply(IoU_with_gt_all,Phi) 
207 |         L = np.reshape(np.repeat(Phi,M),(M,M))*S*np.reshape(np.tile(Phi,M),(M,M))
208 |         det_L_I = np.linalg.det(L + np.eye(M))
209 |         return IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I  
210 | 
211 |     def compute_log_p(self, Y, S_y, y, Phi, det_L_I, M, Phi_power):
212 |         """
213 |         log p(Y|Xy) = log det(L_Y) -log det( L+I)
214 |         """
215 |         
216 |         if len(Y)==0:
217 |             log_p=0
218 |         else:
219 |             log_p = 2*Phi_power * np.sum(np.multiply(y,np.reshape(np.log(Phi),(M,1)))) +np.log(np.linalg.det(S_y))-np.log(det_L_I)
220 |         return log_p
221 |     
222 |     
223 |     def Compute_Xy(self, Y, keeps_Y, labels, pred_boxes):
224 |         """
225 |         Find the set of proposals as Xy in p(Y|Xy)
226 |         """
227 |         dict_keeps_Y={}
228 |         for i,j in keeps_Y.iteritems():
229 |             if j not in dict_keeps_Y:
230 |                 dict_keeps_Y[j]=[]
231 |             dict_keeps_Y[j].append(i)
232 |         potential_bgs=[]
233 |         for label_k in dict_keeps_Y.keys():
234 |             idxs_k = np.where((labels)==label_k)[0]
235 |             pbox = pred_boxes[idxs_k,:]
236 |             pbox = np.reshape(np.tile(pbox,len(dict_keeps_Y[label_k])),(len(dict_keeps_Y[label_k])*len(idxs_k),4))
237 |             Y_selected_pbox = pred_boxes[dict_keeps_Y[label_k],:]
238 |             Y_selected_pbox = np.tile(Y_selected_pbox,(len(idxs_k),1))
239 |             max_ol_ll = np.max(np.reshape(IoU_target(pbox,Y_selected_pbox),(len(idxs_k),len(dict_keeps_Y[label_k]))),1)
240 |             potential_bgs.extend(list(idxs_k[np.where(max_ol_ll<cfg.TRAIN.IGNORANCE)[0]]))
241 |         potential_bgs = list(set(potential_bgs))
242 |         bgs = list(np.where((labels)==0)[0])
243 | 
244 |         Xy = np.array(list(Y)+bgs+potential_bgs)
245 |         labels_Xy = np.array(list(labels[Xy[0:len(Y)]])+list(np.zeros((len(bgs)+len(potential_bgs))))).astype(int)
246 |         return Xy, labels_Xy, potential_bgs
247 |     
248 |     def compute_diff_logp(self,labels_Xy, Xy, y, exp_cls_score, bbox_pred, boxes, Phi, unnormalized_bbox_targets, 
249 |         im_shape_w, im_shape_h,Phi_power, normalizer, is_y):
250 |         """
251 |         Compute gradient of log p(Y|Xy)
252 |         """
253 |         M = bbox_pred.shape[0] # number of rois of 1 image in the minibatch
254 |         K = exp_cls_score.shape[1] # number of categories
255 |         loc_argmax = find_local_argmax(labels_Xy, Xy, bbox_pred)
256 |         IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I  = self.compute_kernel(labels_Xy, boxes[Xy,:], Phi,
257 |                                                      loc_argmax, unnormalized_bbox_targets[Xy,:], 
258 |                                                      im_shape_w, im_shape_h)                        
259 |         M_y = len(Xy) 
260 |         Adj_L = np.linalg.inv(L + np.eye(M_y)) * det_L_I 
261 |         diag_L = np.diag(L)
262 |         Kii = np.reshape(np.repeat(diag_L,K),(M_y,K))/det_L_I
263 |         Phi_frac = np.divide(exp_cls_score[Xy,:], np.reshape(np.repeat(np.sum(exp_cls_score[Xy,1:],1),K),(M_y,K))+0.0001)
264 | 
265 |         y_repeated = np.reshape(np.repeat(y,K),(M_y,K))
266 |         if is_y:
267 |             dLoss_dbi_cj = np.multiply(1-y_repeated, -2*Phi_power * np.multiply(Phi_frac, Kii))
268 |             dLoss_dbi_cj[:,0] = 0
269 |             dLoss_dbi_c = 2*Phi_power * np.multiply(y, (1-diag_L/det_L_I))
270 |         else:
271 |             dLoss_dbi_cj = np.multiply(y_repeated, 2*Phi_power* np.multiply(Phi_frac, 1-Kii))
272 |             dLoss_dbi_cj[:,0] = 0
273 |             dLoss_dbi_c = np.multiply(1-y, -2*Phi_power*diag_L/det_L_I)
274 |         dLoss_db1 = np.zeros((M,K))        
275 |         dLoss_db1[Xy,:] = dLoss_dbi_cj
276 |         dLoss_db1[Xy,labels_Xy] = dLoss_dbi_c
277 |         dLoss_db1 *= normalizer        
278 |         return dLoss_db1
279 | 
280 |     def clip_grad(self,dLoss_db1, cls_score):
281 |         """
282 |         clip gradient above an specific threshold
283 |         """
284 |         bottom_diff_1_y =  0.1 * self._loss_weight * dLoss_db1
285 |         max_relative_diff_1_y=np.max(np.max(np.abs(np.divide(bottom_diff_1_y,cls_score+0.000001))))
286 |         MAX_RD_1 = 10
287 |         if  max_relative_diff_1_y > MAX_RD_1:
288 |             bottom_diff_1_y *= 1/max_relative_diff_1_y * MAX_RD_1
289 |         return bottom_diff_1_y
290 |         
291 |     def vis_detections(self,imdb, im, labels, dets,Phi_argmax,scores,thresh=0.6,scores2=[]):
292 |     
293 |         """Visualize Detections"""
294 |         with open(cfg.TRAIN.info, 'r') as fp:
295 |             info = json.load(fp)
296 |     
297 |         if imdb == 'pascal_voc':
298 |             classes = info['pascal_cats']
299 | 
300 |         elif imdb == 'coco':
301 |             classes = info['coco_cats']
302 |         im_=np.zeros((im.shape[1],im.shape[2],3))
303 |         im_[:,:,0]=im[2,:,:]
304 |         im_[:,:,1]=im[1,:,:]
305 |         im_[:,:,2]=im[0,:,:]
306 |         im=np.uint8((im_-np.min(im_))/np.max(im_-np.min(im_))*255)
307 |         class_names = [classes[ll] for ll in labels]
308 |         for i in xrange(np.minimum(10, dets.shape[0])):
309 |             bbox = dets[i, :4]
310 |             score = scores[i]
311 |             if len(scores2)>0:
312 |                 score2 = scores2[i]
313 |             else:
314 |                 score2 = 0
315 |             class_name = class_names[i]
316 |             class_phi = Phi_argmax[i]/4.0
317 |             if score > thresh:            
318 |                 plt.cla()
319 |                 plt.imshow(im)
320 |                 plt.gca().add_patch(
321 |                     plt.Rectangle((bbox[0], bbox[1]),
322 |                                   bbox[2] - bbox[0],
323 |                                   bbox[3] - bbox[1], fill=False,
324 |                                   edgecolor='g', linewidth=3)
325 |                     )
326 |                 plt.title('{}  {:.3f} {:.3f}'.format(class_name, score, score2))
327 |                 plt.show()
328 |                 
329 |     def extract_im_per_batch(self,N_im_per_batch,i_image, data):
330 |         """This functions is especially useful if N_im_per_batch >1"""
331 |         if N_im_per_batch==1:
332 |             im_shape_w=(data[i_image,:,:,:]).shape[1]
333 |             im_shape_h=(data[i_image,:,:,:]).shape[2]
334 |       
335 |         else:
336 |             zeros_data=np.nonzero(np.sum(np.sum((data[i_image,:,:,:]),0),1)==0)[0]
337 |             if zeros_data.size: 
338 |                 diff_B4 = max(np.nonzero(abs(np.diff(zeros_data)-1)))
339 |                 diff_B4 = -1 if not diff_B4 else diff_B4[0]
340 |                 im_shape_w = zeros_data[diff_B4+1]                        
341 |             else:
342 |                 im_shape_w = (data[i_image,:,:,:]).shape[1]                        
343 | 
344 |             zeros_data=np.nonzero(np.sum(np.sum((data[i_image,:,:,:]),0),0)==0)[0]
345 |             if zeros_data.size: 
346 |                 diff_B4 = max(np.nonzero(abs(np.diff(zeros_data)-1)))
347 |                 diff_B4 = -1 if not diff_B4 else diff_B4[0]
348 |                 im_shape_h = zeros_data[diff_B4+1]                        
349 |             else:
350 |                 im_shape_h = (data[i_image,:,:,:]).shape[2] 
351 |         return im_shape_w,im_shape_h  
352 | 
353 | 


--------------------------------------------------------------------------------
/lib/dpp_loss_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # LDDP
  3 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details]
  4 | # Written by Samaneh Azadi
  5 | # --------------------------------------------------------
  6 | 
  7 | import caffe
  8 | import numpy as np
  9 | import math
 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform
 11 | import pickle
 12 | from fast_rcnn.config import cfg
 13 | from boxTools import *
 14 | from dppTools import DPP
 15 |             
 16 |         
 17 | 
 18 | class DPPLossLayer(caffe.Layer):
 19 |     """
 20 |     Compute the DPP Loss to apply diversity among detected boxes
 21 |     """
 22 |     
 23 |     def setup(self, bottom, top):
 24 |         # check input pair
 25 |         if len(bottom) != 9:
 26 |              raise Exception("Need nine inputs to apply diversity by DPP.")
 27 |         self.sim_classes=[]
 28 |         self.imdb_name = cfg.TRAIN.IMDB
 29 | 
 30 |     def reshape(self, bottom, top):
 31 |         # loss output is scalar
 32 |         top[0].reshape(1)
 33 |         
 34 |         
 35 |     def forward(self, bottom, top):
 36 |         """
 37 |         Forward Pass
 38 |         """
 39 |         self._loss_weight = top[0].diff[0]
 40 |         # to make S as a PSD matrix:
 41 |         self.epsilon = 0.02 
 42 |         sim_power = cfg.TRAIN.SIM_POWER
 43 |         Phi_power = 0.5
 44 |         self.max_per_image = 100
 45 |         self.min_Phi=0.0001        
 46 |         
 47 |         gt_boxes= bottom[2].data
 48 |         num_gt_per_img = gt_boxes.shape[0]
 49 |         rois = bottom[3].data
 50 |         data = bottom[4].data
 51 |         N_im_per_batch = data.shape[0]
 52 | 
 53 |         self._sample_Y = ["" for i in range(N_im_per_batch)]
 54 |         self._keeps_Y = ["" for i in range(N_im_per_batch)]
 55 |         self._sample_B = ["" for i in range(N_im_per_batch)]
 56 |         self._keeps_B = ["" for i in range(N_im_per_batch)]
 57 |         self._Xy = ["" for i in range(N_im_per_batch)]
 58 |         self._labels_Xy = ["" for i in range(N_im_per_batch)]
 59 |         self._Xb = ["" for i in range(N_im_per_batch)]
 60 |         self._labels_Xb = ["" for i in range(N_im_per_batch)]
 61 | 
 62 |         
 63 |         self.sim_classes = pickle.load(open(cfg.TRAIN.similarity_path,"r"))
 64 |         K = bottom[1].data.shape[1] # number of categories
 65 |         self.means = np.reshape(bottom[7].data,(K,4))
 66 |         self.stds = np.reshape(bottom[8].data,(K,4))
 67 |         self.stds[0,:] = np.ones(((self.stds).shape[1],))
 68 | 
 69 |         
 70 |         self.sim_classes = self.sim_classes**sim_power
 71 |         DPP_ = DPP(stds=self.stds,means=self.means,sim_classes=self.sim_classes,epsilon=self.epsilon,loss_weight=self._loss_weight)
 72 |         for i_image in range(N_im_per_batch):
 73 |             batch = range(min(np.nonzero(rois[:,0] ==i_image)[0]), max(np.nonzero(rois[:,0] ==i_image)[0]))
 74 |             
 75 |             im_shape_w,im_shape_h = DPP_.extract_im_per_batch( N_im_per_batch, i_image, data)      
 76 | 
 77 |             bbox_pred = np.array((bottom[0].data[batch,:]))
 78 |             cls_score = bottom[1].data[batch,:]
 79 |             bbox_targets = bottom[6].data[batch,:]
 80 |             labels = bottom[5].data[batch].astype(int)
 81 |             M = bbox_pred.shape[0] # number of rois of 1 image in the minibatch
 82 |             max_cls_score = np.reshape(np.repeat(np.max((cls_score),1),K),(M,K))
 83 |             exp_cls_score = np.exp(cls_score - max_cls_score)
 84 |             
 85 |             boxes = (rois[batch,:])[:,1:]
 86 |             
 87 |             # ========================================================= 
 88 |             # Y: maximize prob of selecting gt boxes
 89 |             # ========================================================= 
 90 |             
 91 |             unnormalized_bbox_targets = unnormalize_box(labels, bbox_targets, boxes, self.stds, self.means, M, im_shape_w, im_shape_h)
 92 |             loc_argmax = find_local_argmax(labels, range(M), bbox_pred)
 93 |             Phi = exp_cls_score[range(M),labels] #gt label to be considered as phi_i
 94 |             Phi = np.maximum(Phi , self.min_Phi) 
 95 |             Phi = Phi ** Phi_power
 96 |             IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I = DPP_.compute_kernel(labels, boxes, Phi, 
 97 |                                                         loc_argmax, unnormalized_bbox_targets, 
 98 |                                                         im_shape_w, im_shape_h)
 99 | 
100 |             # ========================================================= 
101 |             # find Y with MAP :
102 |             # non background images considered only
103 |             # ignore prediction scores; only IoU for measuring quality
104 |             # find non background images based on their labels: label=0 => bg
105 |             # only consider boxes with high overlap with a non-bg ground-truth box
106 |             # ========================================================= 
107 | 
108 |             MAP_images = np.nonzero(labels)[0]
109 |             MAP_labels = labels[MAP_images]
110 |             M_MAP = len(MAP_images)
111 |             log_p_Y=[]
112 |             keeps_Y=[]
113 |             
114 |             IoU_with_gt_all_MAP = IoU_with_gt_all[MAP_images]
115 |             good_gt_overlap = np.where(IoU_with_gt_all_MAP > (cfg.TRAIN.IoU_gt_thresh))[0]
116 |             among_ims = MAP_images[good_gt_overlap]
117 | 
118 |             y = np.zeros((M,1))
119 |             if among_ims.shape[0] == 0:                
120 |                 log_p_Y.append(0)
121 |                 keeps_Y.append({})
122 |                 Xy=[]
123 |                 Y=np.array([])
124 |                 labels_Xy=[]
125 |             else:
126 |                 S_MAP = S[MAP_images,:][:,MAP_images]
127 |                 Phi_MAP = np.multiply(IoU_with_gt_all_MAP,np.ones((M_MAP,)))
128 |                 # =======================================================
129 |                 # select representative boxes by MAP inference
130 |                 # =======================================================
131 |                 
132 |                 selected_and_probs = DPP_.dpp_greedy(S_MAP, Phi_MAP, 1, self.max_per_image, among_ims, 
133 |                                             num_gt_per_img=num_gt_per_img)
134 |                 Y = np.array(selected_and_probs[0])
135 |                 prob_dpp = selected_and_probs[1]
136 |                 
137 |                 Y = among_ims[np.reshape(Y,(Y.shape[0],)).tolist()]
138 |                 keeps_Y.append(dict(zip(Y,MAP_labels[Y])))
139 |                 y[Y] = 1
140 |                 y = np.reshape(y, (M,))
141 | 
142 |                 # =======================================================
143 |                 # Find X in P(Y|X)
144 |                 # =======================================================
145 |                 
146 |                 Xy, labels_Xy, potential_bgs = DPP_.Compute_Xy(list(Y), keeps_Y[i_image], labels,
147 |                                                                pred_boxes)
148 |                 L = L[Xy,:][:,Xy]
149 |                 det_L_I = np.linalg.det(L + np.eye(len(Xy)))
150 |                 S_y = S[Y,:][:,Y]
151 |                 log_p = DPP_.compute_log_p(Y, S_y, y, Phi, det_L_I, M, Phi_power)
152 |                 log_p_Y.append(log_p)
153 | 
154 |             self._sample_Y[i_image] = y
155 |             self._keeps_Y[i_image] = keeps_Y
156 |             self._Xy[i_image] = Xy
157 |             self._labels_Xy[i_image] = labels_Xy
158 | 
159 |  
160 |             # ========================================================= 
161 |             # B: minimize prob of selecting background boxes
162 |             # ========================================================= 
163 |             log_p_B=[]
164 |             keeps_B=[]
165 | 
166 |             Phi_labels = np.argmax(exp_cls_score,axis=1)
167 |             Phi = exp_cls_score[range(M),Phi_labels] #gt label to be considered as phi_i
168 |             Phi = np.maximum(Phi , self.min_Phi) 
169 |             Phi = Phi ** Phi_power
170 |             loc_argmax = find_local_argmax(Phi_labels, range(M), bbox_pred)
171 |             IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I = DPP_.compute_kernel(Phi_labels, boxes, Phi, 
172 |                                                 loc_argmax, unnormalized_bbox_targets, im_shape_w, im_shape_h)
173 | 
174 |             good_gt_overlap = np.where(IoU_with_gt_all > (cfg.TRAIN.IoU_gt_thresh))[0] 
175 |             B, prob_dpp = DPP_.select_bg(Phi_labels,boxes,labels,bbox_pred,keeps_Y[i_image],good_gt_overlap,
176 |                                          M,im_shape_w,im_shape_h)
177 |             b = np.zeros((M,1))
178 |             b[np.reshape(B,(B.shape[0],)).tolist()] = 1
179 |             b = np.reshape(b, (M,))
180 | 
181 |             bgs = list(np.where((labels)==0)[0])
182 |             bgs_1=sorted(set(bgs)-set(list(Y)+list(B)))
183 |             Xb = np.array(list(Y)+list(B)+bgs_1)
184 |             labels_Xb = np.array(list(labels[list(Y)])+list(np.zeros((len(B)+len(bgs_1),1)))).astype(int)
185 |             keeps_B.append(dict(zip(B,Phi_labels[B])))
186 |             
187 |             L = L[Xb,:][:,Xb]
188 |             det_L_I = np.linalg.det(L + np.eye(len(Xb)))
189 |             S_b = S[B,:][:,B]
190 |             log_p = DPP_.compute_log_p(B, S_b, b, Phi, det_L_I, M, Phi_power)
191 |             log_p_B.append(log_p)
192 | 
193 | 
194 |             self._sample_B[i_image] = b
195 |             self._keeps_B[i_image] = keeps_B
196 |             self._Xb[i_image] = Xb
197 |             self._labels_Xb[i_image] = labels_Xb
198 |             
199 |             normalizer_Y = (len(B)+1)/np.float(len(list(Y)+list(B))+1)
200 |             normalizer_B = (len(Y)+1)/np.float(len(list(Y)+list(B))+1)
201 |             
202 |             
203 |         top[0].data[...] = -normalizer_Y*sum(log_p_Y)+normalizer_B*sum(log_p_B)
204 | 
205 | 
206 | 
207 |     def backward(self, top, propagate_down, bottom):
208 |         """
209 |         Backward Pass
210 |         """
211 | 
212 |         Phi_power = 0.5
213 |     
214 |         cls_score_diff = np.zeros(bottom[1].data.shape)
215 |         rois = bottom[3].data
216 |         data = bottom[4].data
217 |         gt_boxes= bottom[2].data
218 |         num_gt_per_img = gt_boxes.shape[0]
219 |         N_im_per_batch = data.shape[0]
220 |         
221 |         for i_image in range(N_im_per_batch):
222 |             batch = range(min(np.nonzero(rois[:,0] ==i_image)[0]), max(np.nonzero(rois[:,0] ==i_image)[0]))
223 |             im_shape_w,im_shape_h = DPP().extract_im_per_batch( N_im_per_batch, i_image, data)      
224 |                      
225 |             bbox_pred = (bottom[0].data[batch,:])        
226 |             cls_score = (bottom[1].data[batch,:])
227 |             bbox_targets = (bottom[6].data[batch,:])
228 |             labels = bottom[5].data[batch].astype(int)
229 |             M = bbox_pred.shape[0] # number of rois of 1 image in the minibatch
230 |             K = cls_score.shape[1] # number of categories
231 |             
232 |             boxes = (rois[batch,:])[:,1:]
233 |             dLoss_db1 = np.zeros((M,K))
234 | 
235 |             max_cls_score = np.reshape(np.repeat(np.max((cls_score),1),K),(M,K))
236 |             exp_cls_score = np.exp(cls_score - max_cls_score)
237 | 
238 |             Phi_labels = labels
239 |             unnormalized_bbox_targets = unnormalize_box(Phi_labels, bbox_targets, boxes,self.stds,self.means,
240 |                                                                                  M, im_shape_w, im_shape_h)
241 | 
242 |             sim_classes_0 = np.zeros((K,K))
243 |             sim_classes_0[1:K,1:K] = self.sim_classes #include sims for class 0
244 |             sim_classes_0[0,0]=1
245 |             self.sim_classes = sim_classes_0
246 |             DPP_ = DPP(stds=self.stds,means=self.means,sim_classes=self.sim_classes,epsilon=self.epsilon,loss_weight=self._loss_weight)
247 |             
248 |             # ========================================================= 
249 |             # d logp(Y|Xy)/db_i^c
250 |             # ========================================================= 
251 |             
252 |             dLoss_db1 = np.zeros((M,K))
253 |             B=sorted(self._keeps_B[i_image][0].keys())
254 |             Y = sorted(self._keeps_Y[i_image][0].keys())
255 | 
256 |             if len(self._keeps_Y[i_image][0].keys()) > 0:
257 | 
258 |                 Xy = self._Xy[i_image]
259 |                 labels_Xy = self._labels_Xy[i_image]
260 |                 y = self._sample_Y[i_image][Xy]
261 | 
262 |                 Phi = np.multiply(y, exp_cls_score[Xy,labels_Xy]) + np.multiply(1-y, np.sum(exp_cls_score[Xy,1:],1))
263 |                 Phi = Phi**Phi_power
264 |                 normalizer = (len(B)+1)/np.float(len(list(Y)+list(B))+1) 
265 | 
266 |                 dLoss_db1 = DPP_.compute_diff_logp(labels_Xy, Xy, y, exp_cls_score, bbox_pred, boxes, Phi, unnormalized_bbox_targets, 
267 |                         im_shape_w, im_shape_h, Phi_power, normalizer, True)
268 |             bottom_diff_1_y = DPP_.clip_grad(dLoss_db1, cls_score)
269 | 
270 |             # ========================================================= 
271 |             # d logp(B|Xb)/db_i^c
272 |             # =========================================================                 
273 | 
274 |             dLoss_db1 = np.zeros((M,K))
275 |             if len(self._keeps_B[i_image][0].keys()) > 0:
276 |                 Xb = self._Xb[i_image]
277 |                 labels_Xb = self._labels_Xb[i_image]
278 |                 labels_Xb = np.reshape(labels_Xb,(labels_Xb.shape[0],))
279 |                 b = self._sample_B[i_image][Xb]
280 | 
281 |                 Phi = (np.multiply(b, np.sum(exp_cls_score[Xb,1:],1)) +np.multiply(1-b,exp_cls_score[Xb,labels_Xb] ))
282 |                 Phi = Phi**Phi_power
283 |                 normalizer = (len(Y)+1)/np.float(len(list(Y)+list(B))+1)
284 |                 dLoss_db1 = DPP_.compute_diff_logp(labels_Xb, Xb, b, exp_cls_score, bbox_pred, boxes, Phi, unnormalized_bbox_targets, 
285 |                         im_shape_w, im_shape_h, Phi_power, normalizer, False)
286 | 
287 |             bottom_diff_1_b = DPP_.clip_grad(dLoss_db1, cls_score)
288 | 
289 |             cls_score_diff[batch,:] = bottom_diff_1_y - bottom_diff_1_b
290 |             
291 |         bottom[1].diff[...] = -cls_score_diff 
292 | 


--------------------------------------------------------------------------------
/tools/Semantic_Similarity.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Populating the interactive namespace from numpy and matplotlib\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "%pylab inline"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 44,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import csv\n",
 31 |     "import scipy\n",
 32 |     "from scipy import sparse as sps\n",
 33 |     "from scipy import io\n",
 34 |     "import numpy as np\n",
 35 |     "import pickle\n",
 36 |     "import json"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 5,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "tree_p2c={}\n",
 48 |     "tree_c2p={}\n",
 49 |     "with open(\"../data/imagenet_data/imagenet_tree.txt\") as csv_file:\n",
 50 |     "    spamreader = csv.reader(csv_file, delimiter=\" \", quotechar='|')\n",
 51 |     "    for row in spamreader:\n",
 52 |     "        if row[1] not in tree_c2p:\n",
 53 |     "            tree_c2p[row[1]]=[row[0]]\n",
 54 |     "        else:\n",
 55 |     "            tree_c2p[row[1]].append(row[0])\n",
 56 |     "        if row[0] not in tree_p2c:\n",
 57 |     "            tree_p2c[row[0]]=[row[1]]\n",
 58 |     "        else:\n",
 59 |     "            tree_p2c[row[0]].append(row[1])\n",
 60 |     "    "
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 6,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "words={}\n",
 72 |     "words_r={}\n",
 73 |     "duplicates=[]\n",
 74 |     "with open(\"../data/imagenet_data/imagenet_words.txt\") as csv_file:\n",
 75 |     "    spamreader = csv.reader(csv_file, delimiter=\"\\t\", quotechar='|')\n",
 76 |     "    for row in spamreader:\n",
 77 |     "        words[row[0]]=row[1]\n",
 78 |     "        if row[1] in words_r:\n",
 79 |     "            duplicates.append(row[1])\n",
 80 |     "        words_r[row[1]]=row[0]"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 7,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "words_r['elephant']='n02503517'\n",
 92 |     "words_r['person']='n00007846'\n",
 93 |     "words_r['ball']='n02778669'\n",
 94 |     "words_r['bear']='n02131653'\n",
 95 |     "words_r['bed']='n02818832'\n",
 96 |     "words_r['bench']='n02828884'\n",
 97 |     "words_r['book']='n02870526'\n",
 98 |     "words_r['bowl']='n02880940'\n",
 99 |     "words_r['carrot']='n07730207'\n",
100 |     "words_r['chair']='n03001627'\n",
101 |     "words_r['couch']='n04256520'\n",
102 |     "words_r['cow']='n01887787'\n",
103 |     "words_r['cup']='n03147509'\n",
104 |     "words_r['fork']='n03383948'\n",
105 |     "words_r['spoon']='n04284002'\n",
106 |     "words_r['knife']='n04380346'\n",
107 |     "words_r['kite']='n03621473'\n",
108 |     "words_r['mouse']='n03793489'\n",
109 |     "words_r['orange']='n07747607'\n",
110 |     "words_r['sheep']='n02411705'\n",
111 |     "words_r['sign']='n06793231'\n",
112 |     "words_r['sink']='n03620052'\n",
113 |     "words_r['tie']='n03815615'\n",
114 |     "words_r['dog']='n02084071'\n",
115 |     "words_r['cat']='n02121620'\n",
116 |     "words_r['tv']='n06277280'\n",
117 |     "words_r['toilet']='n04447028'\n",
118 |     "words_r['backpack']='n02773037'\n",
119 |     "words_r['handbag']='n02774152'\n",
120 |     "words_r['suitcase']='n02774630'\n",
121 |     "\n",
122 |     "\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 8,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "raw_counts={}\n",
134 |     "with open(\"../data/imagenet_data/imagenet_count.txt\") as csv_file:\n",
135 |     "    spamreader = csv.reader(csv_file, delimiter=\" \", quotechar='|')\n",
136 |     "    for row in spamreader:\n",
137 |     "        if \"./n\" in row[0]:\n",
138 |     "            raw_counts[row[0][2:]]=int(row[1])"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 41,
144 |    "metadata": {
145 |     "collapsed": false
146 |    },
147 |    "outputs": [],
148 |    "source": [
149 |     "leaves=(set(tree_c2p.keys())-set(tree_p2c.keys()))\n",
150 |     "nodes=list(set(tree_c2p.keys())|set(tree_p2c.keys()))\n"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 11,
156 |    "metadata": {
157 |     "collapsed": true
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "node_2_idx={k:i for i,k in enumerate(nodes)}\n",
162 |     "idx_2_node={i:k for i,k in enumerate(nodes)}"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 12,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "def ged_descendents(node,Descendents):\n",
174 |     "    if node not in Descendents:\n",
175 |     "        descendents=set([node])\n",
176 |     "        if node in tree_p2c:\n",
177 |     "            for child in tree_p2c[node]:\n",
178 |     "                if child not in Descendents:\n",
179 |     "                    ged_descendents(child,Descendents)\n",
180 |     "                descendents|=Descendents[child]\n",
181 |     "        Descendents[node]=descendents"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 13,
187 |    "metadata": {
188 |     "collapsed": false
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "Descendents={}\n",
193 |     "for node in nodes:\n",
194 |     "    ged_descendents(node,Descendents)\n"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 14,
200 |    "metadata": {
201 |     "collapsed": true
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "Ancestors={}\n",
206 |     "for node in nodes:\n",
207 |     "    for dec in Descendents[node]:\n",
208 |     "        if dec not in Ancestors:\n",
209 |     "            Ancestors[dec]=set([])\n",
210 |     "        Ancestors[dec].add(node)\n"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 15,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "counts={}\n",
222 |     "for node in nodes:\n",
223 |     "    c=0\n",
224 |     "    for d in Descendents[node]:\n",
225 |     "        if d in raw_counts:\n",
226 |     "            c+=raw_counts[d]\n",
227 |     "    counts[node]=c"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 17,
233 |    "metadata": {
234 |     "collapsed": false
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "total_count=max(counts.values())\n",
239 |     "pscnt=1\n",
240 |     "IC={}\n",
241 |     "for node in nodes:\n",
242 |     "    if counts[node]>500000:\n",
243 |     "        cn=total_count\n",
244 |     "    else:\n",
245 |     "        cn=counts[node]\n",
246 |     "    IC[node]=np.log((cn+pscnt)/float(total_count+pscnt))"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 18,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "def find_common_ancestor(node1,node2):\n",
258 |     "    a1=Ancestors[node1]\n",
259 |     "    a2=Ancestors[node2]\n",
260 |     "    return sorted([(k,counts[k],IC[k]) for k in set(a1)&set(a2)],key=lambda x:x[1])[0]"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 39,
266 |    "metadata": {
267 |     "collapsed": true
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "def compute_sim(classes,class_map):\n",
272 |     "    \"\"\"Compute similarity matrix\"\"\"\n",
273 |     "    num_classes = len(classes)\n",
274 |     "    Sim=np.zeros((num_classes,num_classes))\n",
275 |     "    for i,clsi in enumerate(classes):\n",
276 |     "        ci=words_r[class_map[clsi]]\n",
277 |     "        for j,clsj in enumerate(classes):\n",
278 |     "            if i==j:\n",
279 |     "                Sim[i,j]=1\n",
280 |     "                continue        \n",
281 |     "            cj=words_r[class_map[clsj]]\n",
282 |     "            cc=find_common_ancestor(ci,cj)\n",
283 |     "            if cc[2]==0:\n",
284 |     "                Sim[i][j]=0\n",
285 |     "            else:\n",
286 |     "                Sim[i][j]=(2*cc[2])/(IC[ci]+IC[cj])\n",
287 |     "            Sim[j][i]=Sim[i][j]\n",
288 |     "    return Sim\n",
289 |     "\n",
290 |     "    "
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 49,
296 |    "metadata": {
297 |     "collapsed": false
298 |    },
299 |    "outputs": [],
300 |    "source": [
301 |     "with open('../data/info.json', 'r') as fp:\n",
302 |     "    info = json.load(fp)\n",
303 |     "pascal_20_classes = info['pascal_cats'][1:]"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 20,
309 |    "metadata": {
310 |     "collapsed": true
311 |    },
312 |    "outputs": [],
313 |    "source": [
314 |     "pascal_class_map={cls:cls for cls in pascal_20_classes}\n",
315 |     "pascal_class_map[\"aeroplane\"]=\"airplane, aeroplane, plane\"\n",
316 |     "pascal_class_map[\"bicycle\"]=\"bicycle, bike, wheel, cycle\"\n",
317 |     "pascal_class_map[\"bus\"]=\"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle\"\n",
318 |     "pascal_class_map[\"train\"]=\"train, railroad train\"\n",
319 |     "pascal_class_map[\"car\"]=\"car, auto, automobile, machine, motorcar\"\n",
320 |     "pascal_class_map[\"diningtable\"]=\"dining table, board\"\n",
321 |     "pascal_class_map[\"horse\"]=\"horse, Equus caballus\"\n",
322 |     "pascal_class_map[\"motorbike\"]=\"minibike, motorbike\"\n",
323 |     "pascal_class_map[\"pottedplant\"]=\"pot plant\"\n",
324 |     "pascal_class_map[\"sofa\"]=\"sofa, couch, lounge\"\n",
325 |     "pascal_class_map[\"tvmonitor\"]=\"television monitor, tv monitor\"\n"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 22,
331 |    "metadata": {
332 |     "collapsed": false
333 |    },
334 |    "outputs": [],
335 |    "source": [
336 |     "Sim_pascal = compute_sim(pascal_20_classes,pascal_class_map)\n",
337 |     "# pickle.dump(Sim_pascal,open(\"../data/semantic_similarity/pascal_voc_2007_semantics.pickle\",\"w\"))"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 50,
343 |    "metadata": {
344 |     "collapsed": true
345 |    },
346 |    "outputs": [],
347 |    "source": [
348 |     "coco_80_classes = info['coco_cats'][1:]"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": 24,
354 |    "metadata": {
355 |     "collapsed": false
356 |    },
357 |    "outputs": [],
358 |    "source": [
359 |     "coco_class_map={cls:cls for cls in coco_80_classes}\n",
360 |     "coco_class_map[\"bicycle\"]=\"bicycle, bike, wheel, cycle\"\n",
361 |     "coco_class_map[\"car\"]=\"car, auto, automobile, machine, motorcar\"\n",
362 |     "coco_class_map[u'motorcycle']=\"motorcycle, bike\"\n",
363 |     "coco_class_map[u'airplane']=\"airplane, aeroplane, plane\"\n",
364 |     "coco_class_map[\"bus\"]=\"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle\"\n",
365 |     "coco_class_map[\"train\"]=\"train, railroad train\"\n",
366 |     "coco_class_map['truck']=\"truck, motortruck\"\n",
367 |     "coco_class_map['traffic light']=\"traffic light, traffic signal, stoplight\"\n",
368 |     "coco_class_map['fire hydrant']=\"fireplug, fire hydrant, plug\"\n",
369 |     "coco_class_map['stop sign']=\"sign\"\n",
370 |     "coco_class_map[\"horse\"]=\"horse, Equus caballus\"\n",
371 |     "coco_class_map['giraffe']=\"giraffe, camelopard, Giraffa camelopardalis\"\n",
372 |     "coco_class_map['backpack']=\"backpack, back pack, knapsack, packsack, rucksack, haversack\"\n",
373 |     "coco_class_map['handbag']=\"bag, handbag, pocketbook, purse\"\n",
374 |     "coco_class_map['suitcase']=\"bag, traveling bag, travelling bag, grip, suitcase\"\n",
375 |     "coco_class_map['frisbee']=\"ultimate frisbee\"\n",
376 |     "coco_class_map['skis']=\"ski\"\n",
377 |     "coco_class_map['sports ball']=\"ball\"\n",
378 |     "coco_class_map['baseball bat']='baseball bat, lumber'\n",
379 |     "coco_class_map['baseball glove']='baseball glove, glove, baseball mitt, mitt'\n",
380 |     "coco_class_map['tennis racket']='tennis racket, tennis racquet'\n",
381 |     "coco_class_map['wine glass']='wineglass'\n",
382 |     "coco_class_map['hot dog']='hotdog, hot dog'\n",
383 |     "coco_class_map['pizza']='pizza, pizza pie'\n",
384 |     "coco_class_map['donut']='doughnut, donut, sinker'\n",
385 |     "coco_class_map[\"potted plant\"]=\"pot plant\"\n",
386 |     "coco_class_map['dining table']='dining table, board'\n",
387 |     "coco_class_map['toilet']='toilet, toilette'\n",
388 |     "coco_class_map['tv']='television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box'\n",
389 |     "coco_class_map['remote']='remote control, remote'\n",
390 |     "coco_class_map['laptop']='laptop, laptop computer'\n",
391 |     "coco_class_map['cell phone']='cellular telephone, cellular phone, cellphone, cell, mobile phone'\n",
392 |     "coco_class_map['refrigerator']='electric refrigerator, fridge'\n",
393 |     "coco_class_map['teddy bear']='teddy, teddy bear'\n",
394 |     "coco_class_map['hair drier']='hand blower, blow dryer, blow drier, hair dryer, hair drier'\n",
395 |     " \n",
396 |     "\n",
397 |     "\n"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": 32,
403 |    "metadata": {
404 |     "collapsed": true
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "Sim_coco = compute_sim(coco_80_classes,coco_class_map)\n",
409 |     "#pickle.dump(Sim_coco, open(\"../data/semantic_similarity/coco_semantics.pickle\",\"w\"))"
410 |    ]
411 |   }
412 |  ],
413 |  "metadata": {
414 |   "anaconda-cloud": {},
415 |   "kernelspec": {
416 |    "display_name": "Python [default]",
417 |    "language": "python",
418 |    "name": "python2"
419 |   },
420 |   "language_info": {
421 |    "codemirror_mode": {
422 |     "name": "ipython",
423 |     "version": 2
424 |    },
425 |    "file_extension": ".py",
426 |    "mimetype": "text/x-python",
427 |    "name": "python",
428 |    "nbconvert_exporter": "python",
429 |    "pygments_lexer": "ipython2",
430 |    "version": "2.7.12"
431 |   }
432 |  },
433 |  "nbformat": 4,
434 |  "nbformat_minor": 0
435 | }
436 | 


--------------------------------------------------------------------------------