├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── data ├── coco_semantics.pickle ├── imagenet_data │ ├── imagenet_count.txt │ ├── imagenet_tree.txt │ └── imagenet_words.txt ├── info.json └── pascal_voc_semantics.pickle ├── lib ├── __init__.py ├── boxTools.py ├── dppTools.py └── dpp_loss_layer.py └── tools └── Semantic_Similarity.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .ipynb_checkpoints 3 | lib/build 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "py-faster-rcnn"] 2 | path = py-faster-rcnn 3 | url = https://github.com/azadis/py-faster-rcnn 4 | branch = lddp_modification 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LDDP 2 | 3 | UC Berkeley's Standard Copyright and Disclaimer Notice: 4 | 5 | Copyright ©2017. The Regents of the University of California (Regents). All 6 | Rights Reserved. Permission to use, copy, modify, and distribute this software 7 | and its documentation for educational, research, and not-for-profit purposes, 8 | without fee and without a signed licensing agreement, is hereby granted, 9 | provided that the above copyright notice, this paragraph and the following two 10 | paragraphs appear in all copies, modifications, and distributions. Contact The 11 | Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, 12 | Berkeley, CA 94720-1620, (510) 643-7201, for commercial licensing 13 | opportunities. 14 | 15 | Samaneh Azadi, Jiashi Feng, Trevor Darrell, University of 16 | California, Berkeley. 17 | 18 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, 19 | INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 20 | THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN 21 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | 23 | REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 25 | THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS 26 | PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, 27 | UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 28 | 29 | 30 | ************************************************************************ 31 | 32 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 33 | 34 | This project, LDDP, incorporates material from the project(s) 35 | listed below (collectively, "Third Party Code"). Microsoft is not the 36 | original author of the Third Party Code. The original copyright notice 37 | and license under which Microsoft received such Third Party Code are set 38 | out below. This Third Party Code is licensed to you under their original 39 | license terms set forth below. Microsoft reserves all other rights not 40 | expressly granted, whether by implication, estoppel or otherwise. 41 | 42 | 1. Faster R-CNN, (https://github.com/rbgirshick/py-faster-rcnn/) 43 | 44 | The MIT License (MIT) 45 | 46 | Copyright (c) 2015 Microsoft Corporation 47 | 48 | Permission is hereby granted, free of charge, to any person obtaining a copy 49 | of this software and associated documentation files (the "Software"), to deal 50 | in the Software without restriction, including without limitation the rights 51 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 52 | copies of the Software, and to permit persons to whom the Software is 53 | furnished to do so, subject to the following conditions: 54 | 55 | The above copyright notice and this permission notice shall be included in 56 | all copies or substantial portions of the Software. 57 | 58 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 59 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 60 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 61 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 62 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 63 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 64 | THE SOFTWARE. 65 | 66 | 67 | 68 | 2. Caffe, (https://github.com/BVLC/caffe/) 69 | 70 | COPYRIGHT 71 | 72 | All contributions by the University of California: 73 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents) 74 | All rights reserved. 75 | 76 | All other contributions: 77 | Copyright (c) 2014, 2015, the respective contributors 78 | All rights reserved. 79 | 80 | Caffe uses a shared copyright model: each contributor holds copyright 81 | over their contributions to Caffe. The project versioning records all 82 | such contribution and copyright details. If a contributor wants to 83 | further mark their specific copyright on a particular contribution, 84 | they should indicate their copyright solely in the commit message of 85 | the change when it is committed. 86 | 87 | The BSD 2-Clause License 88 | 89 | Redistribution and use in source and binary forms, with or without 90 | modification, are permitted provided that the following conditions 91 | are met: 92 | 93 | 1. Redistributions of source code must retain the above copyright notice, 94 | this list of conditions and the following disclaimer. 95 | 96 | 2. Redistributions in binary form must reproduce the above copyright 97 | notice, this list of conditions and the following disclaimer in the 98 | documentation and/or other materials provided with the distribution. 99 | 100 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 101 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 102 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 103 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 104 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 105 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 106 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 107 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 108 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 109 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 110 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 111 | 112 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LDDP: Learning Detection with Diverse Proposals 2 | 3 | By Samaneh Azadi, Jiashi Feng, Trevor Darrell at UC Berkeley. 4 | 5 | ### Introduction: 6 | LDDP is proposed to predict a set of diverse and informative proposals with enriched representations which is able to augment object detection architectures. 7 | LDDP considers both label-level contextual information and spatial layout relationships between object proposals without increasing the number of parameters of the network, and thus improves location and category specifications of final detected bounding boxes substantially during both training and inference schemes. 8 | This implementation is built based on [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn) framework but can be modified for other detection architectures. 9 | For more information on LDDP, please refer to the [arxiv preprint](https://arxiv.org/pdf/1704.03533.pdf) which will be published at CVPR 2017. 10 | 11 | ### License 12 | LDDP is licensed for open non-commercial distribution under the UC Regents license; see LICENSE. Its dependencies, such as Caffe and Faster R-CNN, are subject to their own respective licenses. 13 | 14 | ### Citing LDDP 15 | If you find LDDP useful in your research, please cite: 16 | 17 | @article{azadi2017learning, 18 | title={Learning Detection with Diverse Proposals}, 19 | author={Azadi, Samaneh and Feng, Jiashi and Darrell, Trevor}, 20 | journal={arXiv preprint arXiv:1704.03533}, 21 | year={2017} 22 | } 23 | 24 | Requirements and installation instructions are similar to [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn), but we mention them again for your convenience. 25 | 26 | ### Requirements: software 27 | 28 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html)) 29 | 30 | **Note:** Caffe *must* be built with support for Python layers! 31 | 32 | ```make 33 | # In your Makefile.config, make sure to have this line uncommented 34 | WITH_PYTHON_LAYER := 1 35 | # Unrelatedly, it's also recommended that you use CUDNN 36 | USE_CUDNN := 1 37 | ``` 38 | You can download my [Makefile.config](https://people.eecs.berkeley.edu/~sazadi/LDDP/Makefile.config) for reference. 39 | 40 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict` 41 | 42 | ### Requirements: hardware 43 | Hardware requirements are similar to the those for running [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/blob/96dc9f1dea3087474d6da5a98879072901ee9bf9/README.md#requirements-hardware). 44 | 45 | ### Installation 46 | 47 | 1. Clone the LDDP repository 48 | ```Shell 49 | # Make sure to clone with --recursive 50 | git clone --recursive https://github.com/azadis/LDDP.git 51 | ``` 52 | 53 | 2. We'll call the directory that you cloned LDDP into `LDDP_ROOT` 54 | 55 | 56 | 3. Build the Cython modules 57 | ```Shell 58 | cd $LDDP_ROOT/py-faster-rcnn/lib 59 | make 60 | ``` 61 | 62 | 4. Build Caffe and pycaffe 63 | ```Shell 64 | cd $LDDP_ROOT/py-faster-rcnn/caffe-fast-rcnn 65 | # Now follow the Caffe installation instructions here: 66 | # http://caffe.berkeleyvision.org/installation.html 67 | 68 | # If you're experienced with Caffe and have all of the requirements installed 69 | # and your Makefile.config in place, then simply do: 70 | make -j8 && make pycaffe 71 | ``` 72 | ### Installation for training and testing models 73 | 1. Download the training, validation, test data and VOCdevkit 74 | 75 | ```Shell 76 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 77 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 78 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar 79 | ``` 80 | 81 | 2. Extract all of these tars into one directory named `VOCdevkit` 82 | 83 | ```Shell 84 | tar xvf VOCtrainval_06-Nov-2007.tar 85 | tar xvf VOCtest_06-Nov-2007.tar 86 | tar xvf VOCdevkit_08-Jun-2007.tar 87 | ``` 88 | 89 | 3. It should have this basic structure 90 | 91 | ```Shell 92 | $VOCdevkit/ # development kit 93 | $VOCdevkit/VOCcode/ # VOC utility code 94 | $VOCdevkit/VOC2007 # image sets, annotations, etc. 95 | # ... and several other directories ... 96 | ``` 97 | 98 | 4. Create symlinks for the PASCAL VOC dataset 99 | 100 | ```Shell 101 | cd $LDDP_ROOT/py-faster-rcnn/data 102 | ln -s $VOCdevkit VOCdevkit2007 103 | ``` 104 | Using symlinks is a good idea because you will likely want to share the same PASCAL dataset installation between multiple projects. 105 | 5. [Optional] follow similar steps to get PASCAL VOC 2010 and 2012. 106 | 6. [Optional] If you want to use COCO, please see the notes [here](https://github.com/rbgirshick/py-faster-rcnn/blob/96dc9f1dea3087474d6da5a98879072901ee9bf9/data/README.md). 107 | 7. Follow the next sections to download pre-trained ImageNet models. 108 | 109 | ### Download pre-trained ImageNet models 110 | 111 | Pre-trained ImageNet models can be downloaded for the three networks described in the paper: ZF and VGG16. 112 | 113 | ```Shell 114 | cd $LDDP_ROOT/py-faster-rcnn 115 | ./data/scripts/fetch_imagenet_models.sh 116 | ``` 117 | 118 | ### Usage 119 | To train and test the LDDP end-to-end detection framework: 120 | ```Shell 121 | cd $LDDP_ROOT/py-faster-rcnn 122 | ./experiments/scripts/LDDP_end2end.sh [GPU_ID] [NET] [--set ...] 123 | # GPU_ID is the GPU you want to train on 124 | # NET in {ZF, VGG_CNN_M_1024, VGG16} is the network arch to use 125 | # --set ... allows you to specify fast_rcnn.config options, e.g. 126 | # --set EXP_DIR seed_rng1701 RNG_SEED 1701 TRAIN.SCALES [400,500,600,700] 127 | ``` 128 | 129 | Trained LDDP networks are saved under: 130 | 131 | ``` 132 | output/// 133 | ``` 134 | 135 | Test outputs are saved under: 136 | 137 | ``` 138 | output//// 139 | ``` 140 | 141 | Semantic Similarity matrices used in the [paper](https://arxiv.org/pdf/1704.03533.pdf) are stored as pickle files at: 142 | ```Shell 143 | $LDDP_ROOT/data 144 | ``` 145 | An example ipython script to generate semantic similarity matrices for PASCAL VOC and COCO data sets is located at: 146 | 147 | ```Shell 148 | $LDDP_ROOT/tools/Semantic_Similarity.ipynb 149 | ``` 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /data/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "pascal_cats":[ 3 | "__background__", 4 | "aeroplane", 5 | "bicycle", 6 | "bird", 7 | "boat", 8 | "bottle", 9 | "bus", 10 | "car", 11 | "cat", 12 | "chair", 13 | "cow", 14 | "diningtable", 15 | "dog", 16 | "horse", 17 | "motorbike", 18 | "person", 19 | "pottedplant", 20 | "sheep", 21 | "sofa", 22 | "train", 23 | "tvmonitor" 24 | ], 25 | "coco_cats":[ 26 | "__background__", 27 | "person", 28 | "bicycle", 29 | "car", 30 | "motorcycle", 31 | "airplane", 32 | "bus", 33 | "train", 34 | "truck", 35 | "boat", 36 | "traffic light", 37 | "fire hydrant", 38 | "stop sign", 39 | "parking meter", 40 | "bench", 41 | "bird", 42 | "cat", 43 | "dog", 44 | "horse", 45 | "sheep", 46 | "cow", 47 | "elephant", 48 | "bear", 49 | "zebra", 50 | "giraffe", 51 | "backpack", 52 | "umbrella", 53 | "handbag", 54 | "tie", 55 | "suitcase", 56 | "frisbee", 57 | "skis", 58 | "snowboard", 59 | "sports ball", 60 | "kite", 61 | "baseball bat", 62 | "baseball glove", 63 | "skateboard", 64 | "surfboard", 65 | "tennis racket", 66 | "bottle", 67 | "wine glass", 68 | "cup", 69 | "fork", 70 | "knife", 71 | "spoon", 72 | "bowl", 73 | "banana", 74 | "apple", 75 | "sandwich", 76 | "orange", 77 | "broccoli", 78 | "carrot", 79 | "hot dog", 80 | "pizza", 81 | "donut", 82 | "cake", 83 | "chair", 84 | "couch", 85 | "potted plant", 86 | "bed", 87 | "dining table", 88 | "toilet", 89 | "tv", 90 | "laptop", 91 | "mouse", 92 | "remote", 93 | "keyboard", 94 | "cell phone", 95 | "microwave", 96 | "oven", 97 | "toaster", 98 | "sink", 99 | "refrigerator", 100 | "book", 101 | "clock", 102 | "vase", 103 | "scissors", 104 | "teddy bear", 105 | "hair drier", 106 | "toothbrush"] 107 | 108 | } -------------------------------------------------------------------------------- /data/pascal_voc_semantics.pickle: -------------------------------------------------------------------------------- 1 | cnumpy.core.multiarray 2 | _reconstruct 3 | p0 4 | (cnumpy 5 | ndarray 6 | p1 7 | (I0 8 | tp2 9 | S'b' 10 | p3 11 | tp4 12 | Rp5 13 | (I1 14 | (I20 15 | I20 16 | tp6 17 | cnumpy 18 | dtype 19 | p7 20 | (S'f8' 21 | p8 22 | I0 23 | I1 24 | tp9 25 | Rp10 26 | (I3 27 | S'<' 28 | p11 29 | NNNI-1 30 | I-1 31 | I0 32 | tp12 33 | bI00 34 | S"\x00\x00\x00\x00\x00\x00\xf0?im\xcc\x06L\xec\xe0?\x00\x00\x00\x00\x00\x00\x00\x00>\x88\xf8r\xeb\xa3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00P}C\xb6\xaa\xdf\xdf?{Q\xe5lr \xe3?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00#q\r\x8d\xb9\xc1\xdd?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa5\xc2\xcdO\xfd\xce\xe0?\x00\x00\x00\x00\x00\x00\x00\x00im\xcc\x06L\xec\xe0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00L\xf2\xf7\\\xff\xcf\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x9f9b\xae\xb1\x8e\xdd?P\xb3@q\\\xf7\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00[I\nIR\x9e\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbfvu\xc2\xf9\x0c\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00>\x88\xf8r\xeb\xa3\xe7?L\xf2\xf7\\\xff\xcf\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x8b\xc1\xc3\xf9D\xbf\xe0?\x06fZ\xb4PE\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00{'\xa6\x10\x82\x1f\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00b\xfa\xfa\xcca\xb6\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P}C\xb6\xaa\xdf\xdf?\x9f9b\xae\xb1\x8e\xdd?\x00\x00\x00\x00\x00\x00\x00\x00\x8b\xc1\xc3\xf9D\xbf\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xef\xa9\xf9CA|\xe0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1e\xcb\x9d?\tO\xda?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^\xd5\x95\xf96|\xeb?\x00\x00\x00\x00\x00\x00\x00\x00{Q\xe5lr \xe3?P\xb3@q\\\xf7\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x06fZ\xb4PE\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\xef\xa9\xf9CA|\xe0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<\xea\x1b\xcad\x05\xe6?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\xa6K\xb4|k\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xffDrLJ3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\xfc\x80\xc6\x02o\x00\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_:\xe6N\xe9\x1f\xea?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfc\x80\xc6\x02o\x00\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xafh\x13\x85\xe4\x82\xe2?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xffDrLJ3\xe7?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00&$u\xa4\xfd\xb3\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00#q\r\x8d\xb9\xc1\xdd?[I\nIR\x9e\xe0?\x00\x00\x00\x00\x00\x00\x00\x00{'\xa6\x10\x82\x1f\xdf?\x00\x00\x00\x00\x00\x00\x00\x00\x1e\xcb\x9d?\tO\xda?<\xea\x1b\xcad\x05\xe6?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4j'\xd9:|\xdb?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00&$u\xa4\xfd\xb3\xe4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00_:\xe6N\xe9\x1f\xea?\x00\x00\x00\x00\x00\x00\x00\x00\xafh\x13\x85\xe4\x82\xe2?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa5\xc2\xcdO\xfd\xce\xe0?\xbfvu\xc2\xf9\x0c\xdf?\x00\x00\x00\x00\x00\x00\x00\x00b\xfa\xfa\xcca\xb6\xe1?\x00\x00\x00\x00\x00\x00\x00\x00^\xd5\x95\xf96|\xeb?\x98\xa6K\xb4|k\xe1?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4j'\xd9:|\xdb?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?" 35 | p13 36 | tp14 37 | b. -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azadis/LDDP/2cf990b47b4de9d6f835850a4affddefb0be6ab6/lib/__init__.py -------------------------------------------------------------------------------- /lib/boxTools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # LDDP 5 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details] 6 | # Written by Samaneh Azadi 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform 11 | 12 | 13 | def IoU_target(bbox,gt): 14 | """compute IoU between bbox and gt 15 | where both of them are vectors 16 | """ 17 | M = bbox.shape[0] 18 | x_1i = bbox[:,0] 19 | x_1j = gt[:,0] 20 | x_2i = bbox[:,2] 21 | x_2j = gt[:,2] 22 | y_1i = bbox[:,1] 23 | y_1j = gt[:,1] 24 | y_2i = bbox[:,3] 25 | y_2j = gt[:,3] 26 | 27 | w = (np.minimum(x_2i, x_2j) - np.maximum(x_1i, x_1j) + 1) 28 | h = (np.minimum(y_2i, y_2j) - np.maximum(y_1i, y_1j) + 1) 29 | w = (w>0) * w 30 | h = (h>0) * h 31 | Intersection = w * h 32 | Area_i = (bbox[:,2] - bbox[:,0] + 1 ) * (bbox[:,3] - bbox[:,1] + 1) 33 | Area_j = (gt[:,2] - gt[:,0] + 1 ) * (gt[:,3] - gt[:,1] + 1) 34 | Union = Area_i + Area_j - Intersection 35 | if np.nonzero(Union ==0)[0].size: 36 | raise Exception("Union of boxes should not be zero") 37 | IoU = Intersection/Union 38 | 39 | return IoU 40 | 41 | def pair_Intersection(locations): 42 | """ compute intersection between each pair of boxes in 43 | the locations matrix 44 | [x_1i,y_1i,x_2i,y_2i]=locations[i,0:4] 45 | """ 46 | M = locations.shape[0] 47 | x_1i = np.reshape(np.repeat(locations[:,0],M),(M,M)) 48 | x_1j = np.reshape(np.tile(locations[:,0],M),(M,M)) 49 | x_2i = np.reshape(np.repeat(locations[:,2],M),(M,M)) 50 | x_2j = np.reshape(np.tile(locations[:,2],M),(M,M)) 51 | y_1i = np.reshape(np.repeat(locations[:,1],M),(M,M)) 52 | y_1j = np.reshape(np.tile(locations[:,1],M),(M,M)) 53 | y_2i = np.reshape(np.repeat(locations[:,3],M),(M,M)) 54 | y_2j = np.reshape(np.tile(locations[:,3],M),(M,M)) 55 | w = (np.minimum(x_2i, x_2j) - np.maximum(x_1i, x_1j) + 1) 56 | h = (np.minimum(y_2i, y_2j) - np.maximum(y_1i, y_1j) + 1) 57 | w = (w>0) * w 58 | h = (h>0) * h 59 | Intersection = w * h 60 | return Intersection 61 | 62 | def pair_IoU(locations): 63 | """ compute IoU between each pair of boxes in 64 | the locations matrix 65 | [x_1i,y_1i,x_2i,y_2i]=locations[i,0:4] 66 | """ 67 | M = locations.shape[0] 68 | Intersection = pair_Intersection(locations) 69 | Area = (locations[:,2] - locations[:,0] + 1 ) * (locations[:,3] - locations[:,1] + 1) 70 | Area_i = np.reshape(np.repeat(Area,M),(M,M)) 71 | Area_j = np.reshape(np.tile(Area,M),(M,M)) 72 | Union = Area_i + Area_j - Intersection 73 | if np.nonzero(Union ==0)[0].size: 74 | raise Exception("Union of boxes should not be zero") 75 | IoU = Intersection/Union 76 | return IoU 77 | 78 | 79 | def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means,M, im_shape_w, im_shape_h): 80 | """ 81 | un-normalize boxes by using stds and means 82 | """ 83 | Phi_argmax = 4 * Phi_labels 84 | bbox_target = bbox_targets[np.tile(range(M),4),np.hstack((4*Phi_labels,4*Phi_labels+1,4*Phi_labels+2, 4*Phi_labels+3))] 85 | bbox_target = np.reshape(bbox_target,(M,4),order='F') 86 | bbox_target = bbox_target * stds[Phi_argmax/4,:] + means[Phi_argmax/4,:] 87 | unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target) 88 | unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets, (im_shape_w,im_shape_h)) 89 | return unnormalized_bbox_targets 90 | 91 | def find_local_argmax(Phi_labels, contributing_images, bbox_pred): 92 | """ 93 | Find the index of the box with maximum score: [x1,y1,x2,y2] 94 | """ 95 | M_cont = len(contributing_images) 96 | Phi_argmax = 4 * Phi_labels 97 | loc_argmax = bbox_pred[np.tile(contributing_images,4),np.hstack((Phi_argmax,Phi_argmax+1,Phi_argmax+2, Phi_argmax+3))] 98 | loc_argmax = np.reshape(loc_argmax,(M_cont,4),order='F') 99 | return loc_argmax 100 | 101 | -------------------------------------------------------------------------------- /lib/dppTools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # LDDP 5 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details] 6 | # Written by Samaneh Azadi 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform 11 | from boxTools import * 12 | from fast_rcnn.config import cfg 13 | 14 | 15 | class DPP(): 16 | def __init__(self,stds=[],means=[],sim_classes=[],epsilon=0.01,loss_weight=0.001): 17 | 18 | self.stds =stds 19 | self.means = means 20 | self.sim_classes = sim_classes 21 | self.epsilon = epsilon 22 | self._loss_weight = loss_weight 23 | 24 | 25 | def select_bg(self,Phi_labels,boxes,labels,bbox_pred,keeps_Y,good_gt_overlap,M,im_shape_w,im_shape_h): 26 | """ 27 | Find B in p(B|Xb) 28 | """ 29 | selected_item = range(M) 30 | prob_dpp = np.ones((M,)) 31 | ignores=[] 32 | dict_keeps_Y = {} 33 | for i,j in keeps_Y.iteritems(): 34 | if j not in dict_keeps_Y: 35 | dict_keeps_Y[j]=[] 36 | dict_keeps_Y[j].append(i) 37 | 38 | for k in range(M): 39 | if (k in keeps_Y and keeps_Y[k]==Phi_labels[k]) \ 40 | or (k in good_gt_overlap and Phi_labels[k]==labels[k] and labels[k]>0): 41 | ignores.append(k) 42 | else: 43 | label_k = labels[k] 44 | if label_k in dict_keeps_Y: 45 | loc_lbl = bbox_pred[[k],4*label_k:4*(label_k+1)] 46 | loc_lbl = loc_lbl * self.stds[label_k,:] + self.means[label_k,:] 47 | pbox = bbox_transform_inv(boxes[[k],:], loc_lbl) 48 | pbox = clip_boxes(pbox, (im_shape_w,im_shape_h)) 49 | pbox = np.reshape(np.tile(pbox,len(dict_keeps_Y[label_k])),(len(dict_keeps_Y[label_k]),4)) 50 | 51 | Y_selected_ll = bbox_pred[dict_keeps_Y[label_k],4*label_k:4*(label_k+1)] 52 | Y_selected_ll = Y_selected_ll*self.stds[label_k,:] + self.means[label_k,:] 53 | Y_selected_pbox = bbox_transform_inv(boxes[dict_keeps_Y[label_k],:], Y_selected_ll) 54 | Y_selected_pbox = clip_boxes(Y_selected_pbox, (im_shape_w,im_shape_h)) 55 | if np.max(IoU_target(pbox,Y_selected_pbox)) > cfg.TRAIN.IGNORANCE: 56 | ignores.append(k) 57 | 58 | selected_item = np.array([x for ii,x in enumerate(selected_item) if ii not in ignores]) 59 | prob_dpp = [x for ii,x in enumerate(prob_dpp) if ii not in ignores] 60 | return selected_item,prob_dpp 61 | 62 | def dpp_greedy(self,S, scores_s, score_power, max_per_image, among_ims, num_gt_per_img=1000, close_thr=0.0001): 63 | """ 64 | Greedy optimization to select boxes 65 | S: similarity matrix 66 | scores_s : predicted scores over different categories 67 | 68 | """ 69 | prob_thresh = cfg.TEST.PROB_THRESH 70 | S = S[among_ims,:][:,among_ims] 71 | scores_s = scores_s[among_ims] 72 | 73 | M = S.shape[0] 74 | 75 | #keep: selected_boxes 76 | keep = [] 77 | 78 | #left : boxes not selected yet 79 | left = np.zeros((M,3)) 80 | left[:,0] = np.arange(M) #box number 81 | left[:,1] = 1 # 0/1? Is the box left? 82 | selected_prob = [] 83 | while (len(keep) < max_per_image) and sum(left[:,1])>0: 84 | z = np.zeros((M,1)) 85 | z[keep] = 1 86 | sum_scores = (score_power*np.log(scores_s).T).dot(z) 87 | prob_rest = np.zeros((M,)) 88 | left_indices = np.where(left[:,1]==1)[0] 89 | done_indices = np.where(left[:,1]==0)[0] 90 | if len(keep)>0: 91 | S_prev = S[keep,:][:,keep] 92 | det_D = np.linalg.det(S_prev) 93 | d_1 = np.linalg.inv(S_prev) 94 | else: 95 | det_D = 1 96 | d_1 = 0 97 | # ==================================================================== 98 | # |D a^T| 99 | # det(|a b|)= (b - a D^{-1} a^T)det(D) 100 | # 101 | # Here "D" = S_prev and "a","b" are the similarity values added by each single item 102 | # in left_indices. 103 | # To avoid using a for loop, we compute the above det for all items in left_indices 104 | # all at once through appropriate inner vector multiplications as the next line: 105 | 106 | # ==================================================================== 107 | if len(keep)>0: 108 | prob_rest[left_indices] =- np.sum(np.multiply(np.dot(S[left_indices,:][:,keep],d_1),S[left_indices,:][:,keep]),1) 109 | 110 | prob_rest[left_indices] = np.log((prob_rest[left_indices] + S[left_indices,left_indices]) * det_D)+\ 111 | (sum_scores + score_power * np.log(scores_s[(left[left_indices,0]).astype(int)])) 112 | 113 | prob_rest[done_indices] = np.min(prob_rest)-100 114 | max_ind = np.argmax(prob_rest) 115 | ind = left[max_ind,0] 116 | close_inds = np.where(prob_rest >= (prob_rest[max_ind] + np.log(close_thr)))[0] 117 | far_inds = np.where(prob_rest < (prob_rest[max_ind] + np.log(close_thr)))[0] 118 | tops_prob_rest = np.argsort(-prob_rest[close_inds]).astype(int) 119 | if len(keep) >= num_gt_per_img: 120 | break 121 | elif len(keep)> 0: 122 | cost = np.max(S[np.array(range(M))[close_inds][tops_prob_rest],:][:,keep],1) 123 | good_cost = list(np.where(cost <= prob_thresh)[0]) 124 | bad_cost = list(np.where(cost > prob_thresh)[0]) 125 | if len(good_cost)>0: 126 | ind = np.array(range(M))[close_inds][tops_prob_rest[good_cost[0]]] 127 | keep.append(ind) 128 | left[ind,1] = 0 129 | #left[far_inds,1]=0 130 | selected_prob.append(prob_rest[max_ind]) 131 | else: 132 | left[:,1]=0 133 | 134 | 135 | else: 136 | keep.append(max_ind) 137 | left[max_ind,1] = 0 138 | selected_prob.append(prob_rest[max_ind]) 139 | 140 | 141 | return keep,selected_prob 142 | 143 | 144 | def dpp_MAP(self,im_dets_pair, scores, boxes,sim_classes,score_thresh,epsilon,max_per_image,close_thr=0.00001): 145 | """ 146 | DPP MAP inference 147 | """ 148 | M0 = boxes.shape[0] 149 | num_classes = scores.shape[1] 150 | scores = scores[:,1:] #ignore background 151 | 152 | # consider only top 5 class scores per box 153 | num_ignored = scores.shape[1]-5 154 | sorted_scores = np.argsort(-scores,1) 155 | ignored_cols = np.reshape(sorted_scores[:,-num_ignored:],(M0*num_ignored)) 156 | ignored_rows = np.repeat(range(0,sorted_scores.shape[0]),num_ignored) 157 | scores[ignored_rows,ignored_cols] = 0 158 | high_scores = np.nonzero(scores >= score_thresh) 159 | lbl_high_scores = high_scores[1] 160 | box_high_scores = high_scores[0] 161 | scores_s = np.reshape(scores[box_high_scores, lbl_high_scores],(lbl_high_scores.shape[0],)) 162 | 163 | 164 | boxes = boxes[:,4:] 165 | boxes_s = np.reshape(boxes[np.tile(box_high_scores,4), np.hstack((4*lbl_high_scores,4*lbl_high_scores+1,\ 166 | 4*lbl_high_scores+2,4*lbl_high_scores+3))] ,(lbl_high_scores.shape[0],4),order='F') 167 | M = boxes_s.shape[0] 168 | sim_power = cfg.TEST.SIM_POWER 169 | sim_boxes = sim_classes[(lbl_high_scores),:][:,(lbl_high_scores)] 170 | sim_boxes = sim_boxes**sim_power 171 | keep_ = {} 172 | 173 | if M>0: 174 | IoU = pair_IoU(boxes_s) 175 | IoU[np.where(IoU MAX_RD_1: 288 | bottom_diff_1_y *= 1/max_relative_diff_1_y * MAX_RD_1 289 | return bottom_diff_1_y 290 | 291 | def vis_detections(self,imdb, im, labels, dets,Phi_argmax,scores,thresh=0.6,scores2=[]): 292 | 293 | """Visualize Detections""" 294 | with open(cfg.TRAIN.info, 'r') as fp: 295 | info = json.load(fp) 296 | 297 | if imdb == 'pascal_voc': 298 | classes = info['pascal_cats'] 299 | 300 | elif imdb == 'coco': 301 | classes = info['coco_cats'] 302 | im_=np.zeros((im.shape[1],im.shape[2],3)) 303 | im_[:,:,0]=im[2,:,:] 304 | im_[:,:,1]=im[1,:,:] 305 | im_[:,:,2]=im[0,:,:] 306 | im=np.uint8((im_-np.min(im_))/np.max(im_-np.min(im_))*255) 307 | class_names = [classes[ll] for ll in labels] 308 | for i in xrange(np.minimum(10, dets.shape[0])): 309 | bbox = dets[i, :4] 310 | score = scores[i] 311 | if len(scores2)>0: 312 | score2 = scores2[i] 313 | else: 314 | score2 = 0 315 | class_name = class_names[i] 316 | class_phi = Phi_argmax[i]/4.0 317 | if score > thresh: 318 | plt.cla() 319 | plt.imshow(im) 320 | plt.gca().add_patch( 321 | plt.Rectangle((bbox[0], bbox[1]), 322 | bbox[2] - bbox[0], 323 | bbox[3] - bbox[1], fill=False, 324 | edgecolor='g', linewidth=3) 325 | ) 326 | plt.title('{} {:.3f} {:.3f}'.format(class_name, score, score2)) 327 | plt.show() 328 | 329 | def extract_im_per_batch(self,N_im_per_batch,i_image, data): 330 | """This functions is especially useful if N_im_per_batch >1""" 331 | if N_im_per_batch==1: 332 | im_shape_w=(data[i_image,:,:,:]).shape[1] 333 | im_shape_h=(data[i_image,:,:,:]).shape[2] 334 | 335 | else: 336 | zeros_data=np.nonzero(np.sum(np.sum((data[i_image,:,:,:]),0),1)==0)[0] 337 | if zeros_data.size: 338 | diff_B4 = max(np.nonzero(abs(np.diff(zeros_data)-1))) 339 | diff_B4 = -1 if not diff_B4 else diff_B4[0] 340 | im_shape_w = zeros_data[diff_B4+1] 341 | else: 342 | im_shape_w = (data[i_image,:,:,:]).shape[1] 343 | 344 | zeros_data=np.nonzero(np.sum(np.sum((data[i_image,:,:,:]),0),0)==0)[0] 345 | if zeros_data.size: 346 | diff_B4 = max(np.nonzero(abs(np.diff(zeros_data)-1))) 347 | diff_B4 = -1 if not diff_B4 else diff_B4[0] 348 | im_shape_h = zeros_data[diff_B4+1] 349 | else: 350 | im_shape_h = (data[i_image,:,:,:]).shape[2] 351 | return im_shape_w,im_shape_h 352 | 353 | -------------------------------------------------------------------------------- /lib/dpp_loss_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # LDDP 3 | # Licensed under UC Berkeley's Standard Copyright [see LICENSE for details] 4 | # Written by Samaneh Azadi 5 | # -------------------------------------------------------- 6 | 7 | import caffe 8 | import numpy as np 9 | import math 10 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes,bbox_transform 11 | import pickle 12 | from fast_rcnn.config import cfg 13 | from boxTools import * 14 | from dppTools import DPP 15 | 16 | 17 | 18 | class DPPLossLayer(caffe.Layer): 19 | """ 20 | Compute the DPP Loss to apply diversity among detected boxes 21 | """ 22 | 23 | def setup(self, bottom, top): 24 | # check input pair 25 | if len(bottom) != 9: 26 | raise Exception("Need nine inputs to apply diversity by DPP.") 27 | self.sim_classes=[] 28 | self.imdb_name = cfg.TRAIN.IMDB 29 | 30 | def reshape(self, bottom, top): 31 | # loss output is scalar 32 | top[0].reshape(1) 33 | 34 | 35 | def forward(self, bottom, top): 36 | """ 37 | Forward Pass 38 | """ 39 | self._loss_weight = top[0].diff[0] 40 | # to make S as a PSD matrix: 41 | self.epsilon = 0.02 42 | sim_power = cfg.TRAIN.SIM_POWER 43 | Phi_power = 0.5 44 | self.max_per_image = 100 45 | self.min_Phi=0.0001 46 | 47 | gt_boxes= bottom[2].data 48 | num_gt_per_img = gt_boxes.shape[0] 49 | rois = bottom[3].data 50 | data = bottom[4].data 51 | N_im_per_batch = data.shape[0] 52 | 53 | self._sample_Y = ["" for i in range(N_im_per_batch)] 54 | self._keeps_Y = ["" for i in range(N_im_per_batch)] 55 | self._sample_B = ["" for i in range(N_im_per_batch)] 56 | self._keeps_B = ["" for i in range(N_im_per_batch)] 57 | self._Xy = ["" for i in range(N_im_per_batch)] 58 | self._labels_Xy = ["" for i in range(N_im_per_batch)] 59 | self._Xb = ["" for i in range(N_im_per_batch)] 60 | self._labels_Xb = ["" for i in range(N_im_per_batch)] 61 | 62 | 63 | self.sim_classes = pickle.load(open(cfg.TRAIN.similarity_path,"r")) 64 | K = bottom[1].data.shape[1] # number of categories 65 | self.means = np.reshape(bottom[7].data,(K,4)) 66 | self.stds = np.reshape(bottom[8].data,(K,4)) 67 | self.stds[0,:] = np.ones(((self.stds).shape[1],)) 68 | 69 | 70 | self.sim_classes = self.sim_classes**sim_power 71 | DPP_ = DPP(stds=self.stds,means=self.means,sim_classes=self.sim_classes,epsilon=self.epsilon,loss_weight=self._loss_weight) 72 | for i_image in range(N_im_per_batch): 73 | batch = range(min(np.nonzero(rois[:,0] ==i_image)[0]), max(np.nonzero(rois[:,0] ==i_image)[0])) 74 | 75 | im_shape_w,im_shape_h = DPP_.extract_im_per_batch( N_im_per_batch, i_image, data) 76 | 77 | bbox_pred = np.array((bottom[0].data[batch,:])) 78 | cls_score = bottom[1].data[batch,:] 79 | bbox_targets = bottom[6].data[batch,:] 80 | labels = bottom[5].data[batch].astype(int) 81 | M = bbox_pred.shape[0] # number of rois of 1 image in the minibatch 82 | max_cls_score = np.reshape(np.repeat(np.max((cls_score),1),K),(M,K)) 83 | exp_cls_score = np.exp(cls_score - max_cls_score) 84 | 85 | boxes = (rois[batch,:])[:,1:] 86 | 87 | # ========================================================= 88 | # Y: maximize prob of selecting gt boxes 89 | # ========================================================= 90 | 91 | unnormalized_bbox_targets = unnormalize_box(labels, bbox_targets, boxes, self.stds, self.means, M, im_shape_w, im_shape_h) 92 | loc_argmax = find_local_argmax(labels, range(M), bbox_pred) 93 | Phi = exp_cls_score[range(M),labels] #gt label to be considered as phi_i 94 | Phi = np.maximum(Phi , self.min_Phi) 95 | Phi = Phi ** Phi_power 96 | IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I = DPP_.compute_kernel(labels, boxes, Phi, 97 | loc_argmax, unnormalized_bbox_targets, 98 | im_shape_w, im_shape_h) 99 | 100 | # ========================================================= 101 | # find Y with MAP : 102 | # non background images considered only 103 | # ignore prediction scores; only IoU for measuring quality 104 | # find non background images based on their labels: label=0 => bg 105 | # only consider boxes with high overlap with a non-bg ground-truth box 106 | # ========================================================= 107 | 108 | MAP_images = np.nonzero(labels)[0] 109 | MAP_labels = labels[MAP_images] 110 | M_MAP = len(MAP_images) 111 | log_p_Y=[] 112 | keeps_Y=[] 113 | 114 | IoU_with_gt_all_MAP = IoU_with_gt_all[MAP_images] 115 | good_gt_overlap = np.where(IoU_with_gt_all_MAP > (cfg.TRAIN.IoU_gt_thresh))[0] 116 | among_ims = MAP_images[good_gt_overlap] 117 | 118 | y = np.zeros((M,1)) 119 | if among_ims.shape[0] == 0: 120 | log_p_Y.append(0) 121 | keeps_Y.append({}) 122 | Xy=[] 123 | Y=np.array([]) 124 | labels_Xy=[] 125 | else: 126 | S_MAP = S[MAP_images,:][:,MAP_images] 127 | Phi_MAP = np.multiply(IoU_with_gt_all_MAP,np.ones((M_MAP,))) 128 | # ======================================================= 129 | # select representative boxes by MAP inference 130 | # ======================================================= 131 | 132 | selected_and_probs = DPP_.dpp_greedy(S_MAP, Phi_MAP, 1, self.max_per_image, among_ims, 133 | num_gt_per_img=num_gt_per_img) 134 | Y = np.array(selected_and_probs[0]) 135 | prob_dpp = selected_and_probs[1] 136 | 137 | Y = among_ims[np.reshape(Y,(Y.shape[0],)).tolist()] 138 | keeps_Y.append(dict(zip(Y,MAP_labels[Y]))) 139 | y[Y] = 1 140 | y = np.reshape(y, (M,)) 141 | 142 | # ======================================================= 143 | # Find X in P(Y|X) 144 | # ======================================================= 145 | 146 | Xy, labels_Xy, potential_bgs = DPP_.Compute_Xy(list(Y), keeps_Y[i_image], labels, 147 | pred_boxes) 148 | L = L[Xy,:][:,Xy] 149 | det_L_I = np.linalg.det(L + np.eye(len(Xy))) 150 | S_y = S[Y,:][:,Y] 151 | log_p = DPP_.compute_log_p(Y, S_y, y, Phi, det_L_I, M, Phi_power) 152 | log_p_Y.append(log_p) 153 | 154 | self._sample_Y[i_image] = y 155 | self._keeps_Y[i_image] = keeps_Y 156 | self._Xy[i_image] = Xy 157 | self._labels_Xy[i_image] = labels_Xy 158 | 159 | 160 | # ========================================================= 161 | # B: minimize prob of selecting background boxes 162 | # ========================================================= 163 | log_p_B=[] 164 | keeps_B=[] 165 | 166 | Phi_labels = np.argmax(exp_cls_score,axis=1) 167 | Phi = exp_cls_score[range(M),Phi_labels] #gt label to be considered as phi_i 168 | Phi = np.maximum(Phi , self.min_Phi) 169 | Phi = Phi ** Phi_power 170 | loc_argmax = find_local_argmax(Phi_labels, range(M), bbox_pred) 171 | IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I = DPP_.compute_kernel(Phi_labels, boxes, Phi, 172 | loc_argmax, unnormalized_bbox_targets, im_shape_w, im_shape_h) 173 | 174 | good_gt_overlap = np.where(IoU_with_gt_all > (cfg.TRAIN.IoU_gt_thresh))[0] 175 | B, prob_dpp = DPP_.select_bg(Phi_labels,boxes,labels,bbox_pred,keeps_Y[i_image],good_gt_overlap, 176 | M,im_shape_w,im_shape_h) 177 | b = np.zeros((M,1)) 178 | b[np.reshape(B,(B.shape[0],)).tolist()] = 1 179 | b = np.reshape(b, (M,)) 180 | 181 | bgs = list(np.where((labels)==0)[0]) 182 | bgs_1=sorted(set(bgs)-set(list(Y)+list(B))) 183 | Xb = np.array(list(Y)+list(B)+bgs_1) 184 | labels_Xb = np.array(list(labels[list(Y)])+list(np.zeros((len(B)+len(bgs_1),1)))).astype(int) 185 | keeps_B.append(dict(zip(B,Phi_labels[B]))) 186 | 187 | L = L[Xb,:][:,Xb] 188 | det_L_I = np.linalg.det(L + np.eye(len(Xb))) 189 | S_b = S[B,:][:,B] 190 | log_p = DPP_.compute_log_p(B, S_b, b, Phi, det_L_I, M, Phi_power) 191 | log_p_B.append(log_p) 192 | 193 | 194 | self._sample_B[i_image] = b 195 | self._keeps_B[i_image] = keeps_B 196 | self._Xb[i_image] = Xb 197 | self._labels_Xb[i_image] = labels_Xb 198 | 199 | normalizer_Y = (len(B)+1)/np.float(len(list(Y)+list(B))+1) 200 | normalizer_B = (len(Y)+1)/np.float(len(list(Y)+list(B))+1) 201 | 202 | 203 | top[0].data[...] = -normalizer_Y*sum(log_p_Y)+normalizer_B*sum(log_p_B) 204 | 205 | 206 | 207 | def backward(self, top, propagate_down, bottom): 208 | """ 209 | Backward Pass 210 | """ 211 | 212 | Phi_power = 0.5 213 | 214 | cls_score_diff = np.zeros(bottom[1].data.shape) 215 | rois = bottom[3].data 216 | data = bottom[4].data 217 | gt_boxes= bottom[2].data 218 | num_gt_per_img = gt_boxes.shape[0] 219 | N_im_per_batch = data.shape[0] 220 | 221 | for i_image in range(N_im_per_batch): 222 | batch = range(min(np.nonzero(rois[:,0] ==i_image)[0]), max(np.nonzero(rois[:,0] ==i_image)[0])) 223 | im_shape_w,im_shape_h = DPP().extract_im_per_batch( N_im_per_batch, i_image, data) 224 | 225 | bbox_pred = (bottom[0].data[batch,:]) 226 | cls_score = (bottom[1].data[batch,:]) 227 | bbox_targets = (bottom[6].data[batch,:]) 228 | labels = bottom[5].data[batch].astype(int) 229 | M = bbox_pred.shape[0] # number of rois of 1 image in the minibatch 230 | K = cls_score.shape[1] # number of categories 231 | 232 | boxes = (rois[batch,:])[:,1:] 233 | dLoss_db1 = np.zeros((M,K)) 234 | 235 | max_cls_score = np.reshape(np.repeat(np.max((cls_score),1),K),(M,K)) 236 | exp_cls_score = np.exp(cls_score - max_cls_score) 237 | 238 | Phi_labels = labels 239 | unnormalized_bbox_targets = unnormalize_box(Phi_labels, bbox_targets, boxes,self.stds,self.means, 240 | M, im_shape_w, im_shape_h) 241 | 242 | sim_classes_0 = np.zeros((K,K)) 243 | sim_classes_0[1:K,1:K] = self.sim_classes #include sims for class 0 244 | sim_classes_0[0,0]=1 245 | self.sim_classes = sim_classes_0 246 | DPP_ = DPP(stds=self.stds,means=self.means,sim_classes=self.sim_classes,epsilon=self.epsilon,loss_weight=self._loss_weight) 247 | 248 | # ========================================================= 249 | # d logp(Y|Xy)/db_i^c 250 | # ========================================================= 251 | 252 | dLoss_db1 = np.zeros((M,K)) 253 | B=sorted(self._keeps_B[i_image][0].keys()) 254 | Y = sorted(self._keeps_Y[i_image][0].keys()) 255 | 256 | if len(self._keeps_Y[i_image][0].keys()) > 0: 257 | 258 | Xy = self._Xy[i_image] 259 | labels_Xy = self._labels_Xy[i_image] 260 | y = self._sample_Y[i_image][Xy] 261 | 262 | Phi = np.multiply(y, exp_cls_score[Xy,labels_Xy]) + np.multiply(1-y, np.sum(exp_cls_score[Xy,1:],1)) 263 | Phi = Phi**Phi_power 264 | normalizer = (len(B)+1)/np.float(len(list(Y)+list(B))+1) 265 | 266 | dLoss_db1 = DPP_.compute_diff_logp(labels_Xy, Xy, y, exp_cls_score, bbox_pred, boxes, Phi, unnormalized_bbox_targets, 267 | im_shape_w, im_shape_h, Phi_power, normalizer, True) 268 | bottom_diff_1_y = DPP_.clip_grad(dLoss_db1, cls_score) 269 | 270 | # ========================================================= 271 | # d logp(B|Xb)/db_i^c 272 | # ========================================================= 273 | 274 | dLoss_db1 = np.zeros((M,K)) 275 | if len(self._keeps_B[i_image][0].keys()) > 0: 276 | Xb = self._Xb[i_image] 277 | labels_Xb = self._labels_Xb[i_image] 278 | labels_Xb = np.reshape(labels_Xb,(labels_Xb.shape[0],)) 279 | b = self._sample_B[i_image][Xb] 280 | 281 | Phi = (np.multiply(b, np.sum(exp_cls_score[Xb,1:],1)) +np.multiply(1-b,exp_cls_score[Xb,labels_Xb] )) 282 | Phi = Phi**Phi_power 283 | normalizer = (len(Y)+1)/np.float(len(list(Y)+list(B))+1) 284 | dLoss_db1 = DPP_.compute_diff_logp(labels_Xb, Xb, b, exp_cls_score, bbox_pred, boxes, Phi, unnormalized_bbox_targets, 285 | im_shape_w, im_shape_h, Phi_power, normalizer, False) 286 | 287 | bottom_diff_1_b = DPP_.clip_grad(dLoss_db1, cls_score) 288 | 289 | cls_score_diff[batch,:] = bottom_diff_1_y - bottom_diff_1_b 290 | 291 | bottom[1].diff[...] = -cls_score_diff 292 | -------------------------------------------------------------------------------- /tools/Semantic_Similarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Populating the interactive namespace from numpy and matplotlib\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "%pylab inline" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 44, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import csv\n", 31 | "import scipy\n", 32 | "from scipy import sparse as sps\n", 33 | "from scipy import io\n", 34 | "import numpy as np\n", 35 | "import pickle\n", 36 | "import json" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 5, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "tree_p2c={}\n", 48 | "tree_c2p={}\n", 49 | "with open(\"../data/imagenet_data/imagenet_tree.txt\") as csv_file:\n", 50 | " spamreader = csv.reader(csv_file, delimiter=\" \", quotechar='|')\n", 51 | " for row in spamreader:\n", 52 | " if row[1] not in tree_c2p:\n", 53 | " tree_c2p[row[1]]=[row[0]]\n", 54 | " else:\n", 55 | " tree_c2p[row[1]].append(row[0])\n", 56 | " if row[0] not in tree_p2c:\n", 57 | " tree_p2c[row[0]]=[row[1]]\n", 58 | " else:\n", 59 | " tree_p2c[row[0]].append(row[1])\n", 60 | " " 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "words={}\n", 72 | "words_r={}\n", 73 | "duplicates=[]\n", 74 | "with open(\"../data/imagenet_data/imagenet_words.txt\") as csv_file:\n", 75 | " spamreader = csv.reader(csv_file, delimiter=\"\\t\", quotechar='|')\n", 76 | " for row in spamreader:\n", 77 | " words[row[0]]=row[1]\n", 78 | " if row[1] in words_r:\n", 79 | " duplicates.append(row[1])\n", 80 | " words_r[row[1]]=row[0]" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "words_r['elephant']='n02503517'\n", 92 | "words_r['person']='n00007846'\n", 93 | "words_r['ball']='n02778669'\n", 94 | "words_r['bear']='n02131653'\n", 95 | "words_r['bed']='n02818832'\n", 96 | "words_r['bench']='n02828884'\n", 97 | "words_r['book']='n02870526'\n", 98 | "words_r['bowl']='n02880940'\n", 99 | "words_r['carrot']='n07730207'\n", 100 | "words_r['chair']='n03001627'\n", 101 | "words_r['couch']='n04256520'\n", 102 | "words_r['cow']='n01887787'\n", 103 | "words_r['cup']='n03147509'\n", 104 | "words_r['fork']='n03383948'\n", 105 | "words_r['spoon']='n04284002'\n", 106 | "words_r['knife']='n04380346'\n", 107 | "words_r['kite']='n03621473'\n", 108 | "words_r['mouse']='n03793489'\n", 109 | "words_r['orange']='n07747607'\n", 110 | "words_r['sheep']='n02411705'\n", 111 | "words_r['sign']='n06793231'\n", 112 | "words_r['sink']='n03620052'\n", 113 | "words_r['tie']='n03815615'\n", 114 | "words_r['dog']='n02084071'\n", 115 | "words_r['cat']='n02121620'\n", 116 | "words_r['tv']='n06277280'\n", 117 | "words_r['toilet']='n04447028'\n", 118 | "words_r['backpack']='n02773037'\n", 119 | "words_r['handbag']='n02774152'\n", 120 | "words_r['suitcase']='n02774630'\n", 121 | "\n", 122 | "\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 8, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "raw_counts={}\n", 134 | "with open(\"../data/imagenet_data/imagenet_count.txt\") as csv_file:\n", 135 | " spamreader = csv.reader(csv_file, delimiter=\" \", quotechar='|')\n", 136 | " for row in spamreader:\n", 137 | " if \"./n\" in row[0]:\n", 138 | " raw_counts[row[0][2:]]=int(row[1])" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 41, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "leaves=(set(tree_c2p.keys())-set(tree_p2c.keys()))\n", 150 | "nodes=list(set(tree_c2p.keys())|set(tree_p2c.keys()))\n" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 11, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "node_2_idx={k:i for i,k in enumerate(nodes)}\n", 162 | "idx_2_node={i:k for i,k in enumerate(nodes)}" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 12, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "def ged_descendents(node,Descendents):\n", 174 | " if node not in Descendents:\n", 175 | " descendents=set([node])\n", 176 | " if node in tree_p2c:\n", 177 | " for child in tree_p2c[node]:\n", 178 | " if child not in Descendents:\n", 179 | " ged_descendents(child,Descendents)\n", 180 | " descendents|=Descendents[child]\n", 181 | " Descendents[node]=descendents" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 13, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "Descendents={}\n", 193 | "for node in nodes:\n", 194 | " ged_descendents(node,Descendents)\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 14, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "Ancestors={}\n", 206 | "for node in nodes:\n", 207 | " for dec in Descendents[node]:\n", 208 | " if dec not in Ancestors:\n", 209 | " Ancestors[dec]=set([])\n", 210 | " Ancestors[dec].add(node)\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 15, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "counts={}\n", 222 | "for node in nodes:\n", 223 | " c=0\n", 224 | " for d in Descendents[node]:\n", 225 | " if d in raw_counts:\n", 226 | " c+=raw_counts[d]\n", 227 | " counts[node]=c" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 17, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "total_count=max(counts.values())\n", 239 | "pscnt=1\n", 240 | "IC={}\n", 241 | "for node in nodes:\n", 242 | " if counts[node]>500000:\n", 243 | " cn=total_count\n", 244 | " else:\n", 245 | " cn=counts[node]\n", 246 | " IC[node]=np.log((cn+pscnt)/float(total_count+pscnt))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 18, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "def find_common_ancestor(node1,node2):\n", 258 | " a1=Ancestors[node1]\n", 259 | " a2=Ancestors[node2]\n", 260 | " return sorted([(k,counts[k],IC[k]) for k in set(a1)&set(a2)],key=lambda x:x[1])[0]" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 39, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "def compute_sim(classes,class_map):\n", 272 | " \"\"\"Compute similarity matrix\"\"\"\n", 273 | " num_classes = len(classes)\n", 274 | " Sim=np.zeros((num_classes,num_classes))\n", 275 | " for i,clsi in enumerate(classes):\n", 276 | " ci=words_r[class_map[clsi]]\n", 277 | " for j,clsj in enumerate(classes):\n", 278 | " if i==j:\n", 279 | " Sim[i,j]=1\n", 280 | " continue \n", 281 | " cj=words_r[class_map[clsj]]\n", 282 | " cc=find_common_ancestor(ci,cj)\n", 283 | " if cc[2]==0:\n", 284 | " Sim[i][j]=0\n", 285 | " else:\n", 286 | " Sim[i][j]=(2*cc[2])/(IC[ci]+IC[cj])\n", 287 | " Sim[j][i]=Sim[i][j]\n", 288 | " return Sim\n", 289 | "\n", 290 | " " 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 49, 296 | "metadata": { 297 | "collapsed": false 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "with open('../data/info.json', 'r') as fp:\n", 302 | " info = json.load(fp)\n", 303 | "pascal_20_classes = info['pascal_cats'][1:]" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 20, 309 | "metadata": { 310 | "collapsed": true 311 | }, 312 | "outputs": [], 313 | "source": [ 314 | "pascal_class_map={cls:cls for cls in pascal_20_classes}\n", 315 | "pascal_class_map[\"aeroplane\"]=\"airplane, aeroplane, plane\"\n", 316 | "pascal_class_map[\"bicycle\"]=\"bicycle, bike, wheel, cycle\"\n", 317 | "pascal_class_map[\"bus\"]=\"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle\"\n", 318 | "pascal_class_map[\"train\"]=\"train, railroad train\"\n", 319 | "pascal_class_map[\"car\"]=\"car, auto, automobile, machine, motorcar\"\n", 320 | "pascal_class_map[\"diningtable\"]=\"dining table, board\"\n", 321 | "pascal_class_map[\"horse\"]=\"horse, Equus caballus\"\n", 322 | "pascal_class_map[\"motorbike\"]=\"minibike, motorbike\"\n", 323 | "pascal_class_map[\"pottedplant\"]=\"pot plant\"\n", 324 | "pascal_class_map[\"sofa\"]=\"sofa, couch, lounge\"\n", 325 | "pascal_class_map[\"tvmonitor\"]=\"television monitor, tv monitor\"\n" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 22, 331 | "metadata": { 332 | "collapsed": false 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "Sim_pascal = compute_sim(pascal_20_classes,pascal_class_map)\n", 337 | "# pickle.dump(Sim_pascal,open(\"../data/semantic_similarity/pascal_voc_2007_semantics.pickle\",\"w\"))" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 50, 343 | "metadata": { 344 | "collapsed": true 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "coco_80_classes = info['coco_cats'][1:]" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 24, 354 | "metadata": { 355 | "collapsed": false 356 | }, 357 | "outputs": [], 358 | "source": [ 359 | "coco_class_map={cls:cls for cls in coco_80_classes}\n", 360 | "coco_class_map[\"bicycle\"]=\"bicycle, bike, wheel, cycle\"\n", 361 | "coco_class_map[\"car\"]=\"car, auto, automobile, machine, motorcar\"\n", 362 | "coco_class_map[u'motorcycle']=\"motorcycle, bike\"\n", 363 | "coco_class_map[u'airplane']=\"airplane, aeroplane, plane\"\n", 364 | "coco_class_map[\"bus\"]=\"bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle\"\n", 365 | "coco_class_map[\"train\"]=\"train, railroad train\"\n", 366 | "coco_class_map['truck']=\"truck, motortruck\"\n", 367 | "coco_class_map['traffic light']=\"traffic light, traffic signal, stoplight\"\n", 368 | "coco_class_map['fire hydrant']=\"fireplug, fire hydrant, plug\"\n", 369 | "coco_class_map['stop sign']=\"sign\"\n", 370 | "coco_class_map[\"horse\"]=\"horse, Equus caballus\"\n", 371 | "coco_class_map['giraffe']=\"giraffe, camelopard, Giraffa camelopardalis\"\n", 372 | "coco_class_map['backpack']=\"backpack, back pack, knapsack, packsack, rucksack, haversack\"\n", 373 | "coco_class_map['handbag']=\"bag, handbag, pocketbook, purse\"\n", 374 | "coco_class_map['suitcase']=\"bag, traveling bag, travelling bag, grip, suitcase\"\n", 375 | "coco_class_map['frisbee']=\"ultimate frisbee\"\n", 376 | "coco_class_map['skis']=\"ski\"\n", 377 | "coco_class_map['sports ball']=\"ball\"\n", 378 | "coco_class_map['baseball bat']='baseball bat, lumber'\n", 379 | "coco_class_map['baseball glove']='baseball glove, glove, baseball mitt, mitt'\n", 380 | "coco_class_map['tennis racket']='tennis racket, tennis racquet'\n", 381 | "coco_class_map['wine glass']='wineglass'\n", 382 | "coco_class_map['hot dog']='hotdog, hot dog'\n", 383 | "coco_class_map['pizza']='pizza, pizza pie'\n", 384 | "coco_class_map['donut']='doughnut, donut, sinker'\n", 385 | "coco_class_map[\"potted plant\"]=\"pot plant\"\n", 386 | "coco_class_map['dining table']='dining table, board'\n", 387 | "coco_class_map['toilet']='toilet, toilette'\n", 388 | "coco_class_map['tv']='television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box'\n", 389 | "coco_class_map['remote']='remote control, remote'\n", 390 | "coco_class_map['laptop']='laptop, laptop computer'\n", 391 | "coco_class_map['cell phone']='cellular telephone, cellular phone, cellphone, cell, mobile phone'\n", 392 | "coco_class_map['refrigerator']='electric refrigerator, fridge'\n", 393 | "coco_class_map['teddy bear']='teddy, teddy bear'\n", 394 | "coco_class_map['hair drier']='hand blower, blow dryer, blow drier, hair dryer, hair drier'\n", 395 | " \n", 396 | "\n", 397 | "\n" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 32, 403 | "metadata": { 404 | "collapsed": true 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "Sim_coco = compute_sim(coco_80_classes,coco_class_map)\n", 409 | "#pickle.dump(Sim_coco, open(\"../data/semantic_similarity/coco_semantics.pickle\",\"w\"))" 410 | ] 411 | } 412 | ], 413 | "metadata": { 414 | "anaconda-cloud": {}, 415 | "kernelspec": { 416 | "display_name": "Python [default]", 417 | "language": "python", 418 | "name": "python2" 419 | }, 420 | "language_info": { 421 | "codemirror_mode": { 422 | "name": "ipython", 423 | "version": 2 424 | }, 425 | "file_extension": ".py", 426 | "mimetype": "text/x-python", 427 | "name": "python", 428 | "nbconvert_exporter": "python", 429 | "pygments_lexer": "ipython2", 430 | "version": "2.7.12" 431 | } 432 | }, 433 | "nbformat": 4, 434 | "nbformat_minor": 0 435 | } 436 | --------------------------------------------------------------------------------