├── .gitignore ├── LICENSE ├── README.md ├── extract ├── .gitignore ├── .ipynb_checkpoints │ └── visualize-checkpoint.ipynb ├── README.md ├── extractor.py ├── folder.py └── pca.py ├── helper ├── __init__.py ├── delf_helper.py ├── feeder.py └── matcher.py ├── notebook └── visualize.ipynb ├── static ├── attn.png ├── glr1k.png ├── glr2k.png ├── ldmk.png └── vis.png ├── train ├── .gitignore ├── config.py ├── dataloader.py ├── delf.py ├── layers.py ├── main.py └── solver.py └── utils ├── __init__.py ├── logger.py └── misc.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore this files when commit. 2 | 3 | # data files. 4 | *.h5 5 | *.hdf5 6 | *.json 7 | *.t7 8 | *.npy 9 | *.csv 10 | 11 | 12 | # zip files. 13 | *.dmg 14 | *.7z 15 | *.gz 16 | *.iso 17 | *.jar 18 | *.rar 19 | *.tar 20 | *.zip 21 | *.tar.gz 22 | *.tgz 23 | 24 | # image files. 25 | *.png 26 | *.jpeg 27 | *.jpg 28 | 29 | # log files. 30 | *.log 31 | *.out 32 | 33 | # OS generated files. 34 | .DS_Store* 35 | Thumbs.db 36 | *.swp 37 | *.pyc 38 | *.swo 39 | screenlog.* 40 | 41 | # pytorch. 42 | *.pth.tar 43 | *.log 44 | *.sock 45 | *.pid 46 | *.delf 47 | *.h5 48 | 49 | # user generaged files. 50 | repo/ 51 | venv/ 52 | data/ 53 | c3/ 54 | ref/ 55 | nvlib/ 56 | 57 | # pycharm IDE files 58 | .idea/ 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Minchul Shin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Pytorch Implementation of Deep Local Feature (DeLF) 3 | PyTorch Implementation of "Large-Scale Image Retrieval with Attentive Deep Local Features" 4 | reference: https://arxiv.org/pdf/1612.06321.pdf 5 | 6 | 7 | ## Prerequisites 8 | + PyTorch 9 | + python3 10 | + CUDA 11 | 12 | ## Training DeLF 13 | There are 2 steps for DeLF training: (1) finetune stage, and (2) keypoint stage. 14 | Finetune stage loads resnet50 model pretrained on ImageNet, and finetune. 15 | Keypoint stage freezes the "base" network, and only update "attention" network for keypoint selection. 16 | After the train process is done, model will be saved at `repo//keypoint/ckpt` 17 | 18 | ### (1) training finetune stage: 19 | ~~~shell 20 | $ cd train/ 21 | $ python main.py \ 22 | --stage 'finetune' \ 23 | --optim 'sgd' \ 24 | --gpu_id 6 \ 25 | --expr 'landmark' \ 26 | --ncls 586 \ 27 | --finetune_train_path \ 28 | --finetune_val_path \ 29 | ~~~ 30 | 31 | ### (2) training keypoint stage: 32 | + load_from: absolute path to pytorch model you wish to load. (.pth.tar) 33 | + expr: name of experiment you wish to save as. 34 | ~~~shell 35 | $ cd train/ 36 | $ python main.py \ 37 | --stage 'keypoint' \ 38 | --gpu_id 6 \ 39 | --ncls 586 \ 40 | --optim 'sgd' \ 41 | --use_random_gamma_scaling true \ 42 | --expr 'landmark' \ 43 | --load_from \ 44 | --keypoint_train_path \ 45 | --keypoint_val_path \ 46 | ~~~ 47 | 48 | 49 | ## Feature Extraction of DeLF 50 | There are also two steps to extract DeLF: (1) train PCA, (2) extract dimension reduced DeLF. 51 | __IMPORTANT: YOU MUST CHANGE OR COPY THE NAME OF MODEL from `repo//keypoint/ckpt/bestshot.pth.tar` to `repo//keypoint/ckpt/fix.pth.tar`.__ 52 | __I intentionally added this to prevent the model from being updated after the PCA matrix is already calculated.__ 53 | 54 | ### (1) train PCA 55 | ~~~shell 56 | $ cd extract/ 57 | $ python extractor.py 58 | --gpu_id 4 \ 59 | --load_expr 'delf' \ 60 | --mode 'pca' \ 61 | --stage 'inference' \ 62 | --batch_size 1 \ 63 | --input_path , but it is hardcoded. 64 | --output_path , but it is hardcoded. 65 | ~~~ 66 | 67 | ### (2) extract dimension reduced DeLF 68 | ~~~shell 69 | $ cd extract/ 70 | $ python extractor.py 71 | --gpu_id 4 \ 72 | --load_expr 'delf' \ 73 | --mode 'delf' \ 74 | --stage 'inference' \ 75 | --batch_size 1 \ 76 | --attn_thres 0.31 \ 77 | --iou_thres 0.92 \ 78 | --top_k 1000 \ 79 | --use_pca True \ 80 | --pca_dims 40 \ 81 | --pca_parameters_path , but it is hardcoded. 82 | --input_path , but it is hardcoded. 83 | --output_path , but it is hardcoded. 84 | ~~~ 85 | 86 | 87 | ## Visualization 88 | You can visualize DeLF matching batween two arbitrary query images. 89 | Let's assume there exist two images, test/img1.jpg, test/img2.jpg. 90 | Run visualize.ipynb using Jupyter Notebook, and run each cells. 91 | You may get the result like below. 92 | 93 | __1) RANSAC Matching (Correspondance Matching + Geometric Verification)__ 94 | ![image](https://raw.githubusercontent.com/nashory/DeLF-pytorch/master/static/vis.png) 95 | 96 | __2) Attention Map__ 97 | ![image](https://raw.githubusercontent.com/nashory/DeLF-pytorch/master/static/attn.png) 98 | 99 | __Ranking Result on Oxf5k:__ 100 | * glr1k, glr2k: Trained DeLF model with a subset of google-landmark-dataset on kaggle, which contains top-K instances sorted by the # of images included. 101 | ** ldmk: Trained DeLF model with landmark dataset. (exactly same with the paper) 102 | 103 |

104 |

glr1k ranking result

105 | 106 | ![image](https://raw.githubusercontent.com/nashory/DeLF-pytorch/master/static/glr1k.png) 107 | 108 |

109 | 110 |

111 |

glr2k ranking result

112 | 113 | ![image](https://raw.githubusercontent.com/nashory/DeLF-pytorch/master/static/glr2k.png) 114 | 115 |

116 | 117 |

118 |

ldmk ranking result

119 | 120 | ![image](https://raw.githubusercontent.com/nashory/DeLF-pytorch/master/static/ldmk.png) 121 | 122 |

123 | 124 | ## Benchmark Result on Oxf5k (comparing to original paper) 125 | __Note: DELF_TF is the author's model, and the feature was extracted using this nice repo.__ __(https://github.com/insikk/delf_enhanced)__ 126 | PYTORCH_LDMK: Trained with landmark dataset. 127 | PYTORCH_GLR1K: Trained with a subset of google-landmark-dataset with 1k instance classes. 128 | PYTORCH_GLR1K: Trained with a subset of google-landmark-dataset with 2k instance classes. 129 | PYTORCH_BNK_V3_BAL_HANA: Private currency dataset I personally own just for check. 130 | 131 | Classes | DELF_TF | PYTORCH_LDMK | PYTORCH_GLR1K | PYTORCH_GLR2K | PYTORCH_BNK_V3_BAL_HANA 132 | -- | -- | -- | -- | -- | -- 133 | mAP | 0.851307 | 0.849373 | 0.87828 | 0.866517 | 0.489614 134 | all_souls_1 | 0.751052 | 0.767453 | 0.916059 | 0.886243 | 0.0584418 135 | all_souls_2 | 0.517995 | 0.645628 | 0.708546 | 0.767904 | 0.287783 136 | all_souls_3 | 0.626499 | 0.760189 | 0.881578 | 0.903977 | 0.347261 137 | all_souls_4 | 0.968566 | 0.930445 | 0.967221 | 0.980288 | 0.515091 138 | all_souls_5 | 0.735256 | 0.827341 | 0.899803 | 0.911414 | 0.117378 139 | ashmolean_1 | 0.83206 | 0.768585 | 0.829522 | 0.860364 | 0.157126 140 | ashmolean_2 | 0.844329 | 0.803305 | 0.814522 | 0.88631 | 0.194069 141 | ashmolean_3 | 0.8407 | 0.863916 | 0.86428 | 0.841624 | 0.20158 142 | ashmolean_4 | 0.857416 | 0.730968 | 0.816007 | 0.829129 | 0.353456 143 | ashmolean_5 | 0.77901 | 0.84768 | 0.808717 | 0.875755 | 0.106619 144 | balliol_1 | 0.917435 | 0.818512 | 0.914453 | 0.857404 | 0.362258 145 | balliol_2 | 0.462124 | 0.5546 | 0.68825 | 0.632167 | 0.0984046 146 | balliol_3 | 0.710849 | 0.72742 | 0.80883 | 0.729275 | 0.209934 147 | balliol_4 | 0.658099 | 0.681549 | 0.749764 | 0.667446 | 0.342497 148 | balliol_5 | 0.739436 | 0.689549 | 0.80835 | 0.716029 | 0.319832 149 | bodleian_1 | 0.7943 | 0.797353 | 0.833887 | 0.851872 | 0.350422 150 | bodleian_2 | 0.828246 | 0.549165 | 0.520681 | 0.413119 | 0.643002 151 | bodleian_3 | 0.84655 | 0.844758 | 0.954003 | 0.841856 | 0.799652 152 | bodleian_4 | 0.726362 | 0.732197 | 0.916468 | 0.84604 | 0.476852 153 | bodleian_5 | 0.815629 | 0.864863 | 0.915992 | 0.847784 | 0.773505 154 | christ_church_1 | 0.953197 | 0.97743 | 0.96955 | 0.987822 | 0.866622 155 | christ_church_2 | 0.960692 | 0.950959 | 0.975525 | 0.979186 | 0.783949 156 | christ_church_3 | 0.932694 | 0.951987 | 0.940492 | 0.942081 | 0.263114 157 | christ_church_4 | 0.965374 | 0.979779 | 0.970264 | 0.981529 | 0.784185 158 | christ_church_5 | 0.971503 | 0.971411 | 0.976488 | 0.983004 | 0.312071 159 | cornmarket_1 | 0.690551 | 0.722799 | 0.692261 | 0.681911 | 0.492891 160 | cornmarket_2 | 0.727338 | 0.382168 | 0.32282 | 0.184599 | 0.169908 161 | cornmarket_3 | 0.707911 | 0.650324 | 0.696718 | 0.672553 | 0.379656 162 | cornmarket_4 | 0.65958 | 0.789562 | 0.656362 | 0.669228 | 0.273514 163 | cornmarket_5 | 0.68901 | 0.814039 | 0.606983 | 0.558519 | 0.19587 164 | hertford_1 | 0.92893 | 0.915811 | 0.957557 | 0.951947 | 0.562145 165 | hertford_2 | 0.960313 | 0.942536 | 0.937546 | 0.951293 | 0.524951 166 | hertford_3 | 0.936073 | 0.959108 | 0.97494 | 0.941641 | 0.570177 167 | hertford_4 | 0.898146 | 0.914434 | 0.924889 | 0.927225 | 0.679879 168 | hertford_5 | 0.975377 | 0.929499 | 0.946097 | 0.94726 | 0.235865 169 | keble_1 | 1 | 1 | 1 | 1 | 0.954762 170 | keble_2 | 1 | 0.944161 | 1 | 1 | 0.921088 171 | keble_3 | 1 | 0.932568 | 1 | 1 | 0.931319 172 | keble_4 | 1 | 1 | 1 | 1 | 0.331796 173 | keble_5 | 1 | 0.87432 | 1 | 1 | 0.944161 174 | magdalen_1 | 0.710288 | 0.766209 | 0.819577 | 0.861361 | 0.109972 175 | magdalen_2 | 0.830566 | 0.928487 | 0.914451 | 0.926896 | 0.164253 176 | magdalen_3 | 0.759041 | 0.832379 | 0.872577 | 0.896532 | 0.168931 177 | magdalen_4 | 0.853145 | 0.877747 | 0.880979 | 0.844535 | 0.0728258 178 | magdalen_5 | 0.761443 | 0.77776 | 0.841862 | 0.791102 | 0.175314 179 | pitt_rivers_1 | 1 | 1 | 1 | 1 | 0.647935 180 | pitt_rivers_2 | 1 | 1 | 1 | 1 | 1 181 | pitt_rivers_3 | 1 | 1 | 1 | 1 | 0.746479 182 | pitt_rivers_4 | 1 | 1 | 1 | 1 | 0.599398 183 | pitt_rivers_5 | 1 | 1 | 1 | 1 | 1 184 | radcliffe_camera_1 | 0.93144 | 0.916562 | 0.943584 | 0.95298 | 0.860801 185 | radcliffe_camera_2 | 0.961224 | 0.980161 | 0.980304 | 0.982237 | 0.936467 186 | radcliffe_camera_3 | 0.925759 | 0.908404 | 0.949748 | 0.959252 | 0.871228 187 | radcliffe_camera_4 | 0.979608 | 0.98273 | 0.983941 | 0.988227 | 0.787773 188 | radcliffe_camera_5 | 0.90082 | 0.936742 | 0.952967 | 0.949522 | 0.894346 189 | 190 | 191 | 192 | 193 | ## Author 194 | Minchul Shin([@nashory](https://github.com/nashory)) 195 | contact: min.nashory@navercorp.com 196 | 197 | ![image](https://camo.githubusercontent.com/e053bc3e1b63635239e8a44574e819e62ab3e3f4/687474703a2f2f692e67697068792e636f6d2f49634a366e36564a4e6a524e532e676966) 198 | -------------------------------------------------------------------------------- /extract/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ./output 5 | ./debug 6 | 7 | -------------------------------------------------------------------------------- /extract/.ipynb_checkpoints/visualize-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# query_list = ['./data/oxford5k_images/hertford_000056.jpg', './data/oxford5k_images/hertford_000056.jpg']" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "/home1/irteam/nashory/workspace/kaggle/google-landmark-challenge/delf/venv/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 22 | " from ._conv import register_converters as _register_converters\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import os, sys, time\n", 28 | "sys.path.append('../')\n", 29 | "sys.path.append('../train')\n", 30 | "\n", 31 | "from PIL import Image\n", 32 | "from io import BytesIO\n", 33 | "import matplotlib.image as mpimg\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "\n", 36 | "from train.delf import Delf_V1\n", 37 | "from feeder import Feeder\n", 38 | "import matcher\n", 39 | "\n", 40 | "def resize_image(image, target_size=800):\n", 41 | " def calc_by_ratio(a, b):\n", 42 | " return int(a * target_size / float(b))\n", 43 | "\n", 44 | " size = image.size\n", 45 | " if size[0] < size[1]:\n", 46 | " w = calc_by_ratio(size[0], size[1])\n", 47 | " h = target_size\n", 48 | " else:\n", 49 | " w = target_size\n", 50 | " h = calc_by_ratio(size[1], size[0])\n", 51 | "\n", 52 | " image = image.resize((w, h), Image.BILINEAR)\n", 53 | " return image\n", 54 | "\n", 55 | "\n", 56 | "def get_and_cache_image(image_path, basewidth=None):\n", 57 | " image = Image.open(image_path)\n", 58 | " if basewidth is not None:\n", 59 | " image = resize_image(image, basewidth)\n", 60 | " imgByteArr = BytesIO()\n", 61 | " image.save(imgByteArr, format='PNG')\n", 62 | " imgByteArr = imgByteArr.getvalue()\n", 63 | " return image, imgByteArr\n", 64 | "\n", 65 | "\n", 66 | "def get_result(feeder, query):\n", 67 | " pil_image = []\n", 68 | " byte_image = []\n", 69 | " for _, v in enumerate(query):\n", 70 | " pil, byte = get_and_cache_image(v)\n", 71 | " pil_image.append(pil)\n", 72 | " byte_image.append(byte)\n", 73 | "\n", 74 | " # feed and get output.\n", 75 | " outputs = feeder.feed_to_compare(query, pil_image)\n", 76 | " \n", 77 | " att1 = matcher.get_attention_image_byte(outputs[0]['attention_np_list'])\n", 78 | " att2 = matcher.get_attention_image_byte(outputs[1]['attention_np_list'])\n", 79 | "\n", 80 | " side_by_side_comp_img_byte, score = matcher.get_ransac_image_byte(\n", 81 | " byte_image[0],\n", 82 | " outputs[0]['location_np_list'],\n", 83 | " outputs[0]['descriptor_np_list'],\n", 84 | " byte_image[1],\n", 85 | " outputs[1]['location_np_list'],\n", 86 | " outputs[1]['descriptor_np_list'])\n", 87 | " print('matching inliner num:', score)\n", 88 | " return side_by_side_comp_img_byte, att1, att2\n", 89 | "\n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 2, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "load DeLF pytorch model...\n", 102 | "{'arch': 'resnet50', 'expr': 'dummy', 'load_from': '../train/repo/delf_real_clean/keypoint/ckpt/fix.pth.tar', 'ncls': 'dummy', 'stage': 'inference', 'target_layer': 'layer3', 'use_random_gamma_rescale': False}\n", 103 | "[inference] loading resnet50 pretrained ImageNet weights ... It may take few seconds...\n", 104 | "deep copied weights from layer \"conv1\" ...\n", 105 | "deep copied weights from layer \"bn1\" ...\n", 106 | "deep copied weights from layer \"relu\" ...\n", 107 | "deep copied weights from layer \"maxpool\" ...\n", 108 | "deep copied weights from layer \"layer1\" ...\n", 109 | "deep copied weights from layer \"layer2\" ...\n", 110 | "deep copied weights from layer \"layer3\" ...\n", 111 | "deep copied weights from layer \"layer4\" ...\n", 112 | "loaded weights from module \"base\" ...\n", 113 | "loaded weights from module \"attn\" ...\n", 114 | "loaded weights from module \"pool\" ...\n", 115 | "load model from \"../train/repo/delf_real_clean/keypoint/ckpt/fix.pth.tar\"\n", 116 | "load PCA parameters...\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "import numpy as np\n", 122 | "from matplotlib.pyplot import imshow\n", 123 | "\n", 124 | "feeder_config = {\n", 125 | " 'GPU_ID': 6,\n", 126 | " 'IOU_THRES': 0.92,\n", 127 | " 'ATTN_THRES': 0.37,\n", 128 | " 'TARGET_LAYER': 'layer3',\n", 129 | " 'TOP_K': 1000,\n", 130 | " 'PCA_PARAMETERS_PATH':'./output/pca/delf_real/pca.h5',\n", 131 | " 'PCA_DIMS':40,\n", 132 | " 'USE_PCA': True,\n", 133 | " 'SCALE_LIST': [0.25, 0.3535, 0.5, 0.7071, 1.0, 1.4142, 2.0],\n", 134 | " \n", 135 | " 'LOAD_FROM': '../train/repo/delf_real_clean/keypoint/ckpt/fix.pth.tar',\n", 136 | " 'ARCH': 'resnet50',\n", 137 | " 'EXPR': 'dummy',\n", 138 | " 'TARGET_LAYER': 'layer3',\n", 139 | "}\n", 140 | "myfeeder = Feeder(feeder_config)\n", 141 | "\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 3, 147 | "metadata": { 148 | "scrolled": true 149 | }, 150 | "outputs": [ 151 | { 152 | "ename": "TypeError", 153 | "evalue": "pic should be PIL Image or ndarray. Got ", 154 | "output_type": "error", 155 | "traceback": [ 156 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 157 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 158 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;31m# test 1 (good)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0mquery\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'test/wrongcases/keble/query/keble_4.png'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'test/wrongcases/keble/db/keble_000214.jpg'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m \u001b[0mresult_image_byte\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0matt1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0matt2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmyfeeder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0mresult_image\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBytesIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult_image_byte\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 159 | "\u001b[0;32m\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(feeder, query)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;31m# feed and get output.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeeder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpil_image\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0matt1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmatcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_attention_image_byte\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'attention_np_list'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 160 | "\u001b[0;32m~/nashory/workspace/kaggle/google-landmark-challenge/delf/extract/feeder.py\u001b[0m in \u001b[0;36mfeed\u001b[0;34m(self, pil_image, filename)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfeed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpil_image\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'dummy'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpil_image\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfeed_to_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpil_image\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 161 | "\u001b[0;32m~/nashory/workspace/kaggle/google-landmark-challenge/delf/extract/feeder.py\u001b[0m in \u001b[0;36m__get_result__\u001b[0;34m(self, path, image)\u001b[0m\n\u001b[1;32m 111\u001b[0m image):\n\u001b[1;32m 112\u001b[0m \u001b[0;31m# load tensor image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__transform__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 162 | "\u001b[0;32m~/nashory/workspace/kaggle/google-landmark-challenge/delf/extract/feeder.py\u001b[0m in \u001b[0;36m__transform__\u001b[0;34m(self, image)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__transform__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mToTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__print_result__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 163 | "\u001b[0;32m~/nashory/workspace/kaggle/google-landmark-challenge/delf/venv/lib/python3.6/site-packages/torchvision/transforms/transforms.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, pic)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mConverted\u001b[0m \u001b[0mimage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \"\"\"\n\u001b[0;32m---> 61\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpic\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 164 | "\u001b[0;32m~/nashory/workspace/kaggle/google-landmark-challenge/delf/venv/lib/python3.6/site-packages/torchvision/transforms/functional.py\u001b[0m in \u001b[0;36mto_tensor\u001b[0;34m(pic)\u001b[0m\n\u001b[1;32m 42\u001b[0m \"\"\"\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_is_pil_image\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpic\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_is_numpy_image\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpic\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'pic should be PIL Image or ndarray. Got {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpic\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpic\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 165 | "\u001b[0;31mTypeError\u001b[0m: pic should be PIL Image or ndarray. Got " 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "\n", 171 | "'''\n", 172 | "###### keble_1.png\n", 173 | "# test 1 (good)\n", 174 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000214.jpg']\n", 175 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 176 | "plt.figure(figsize=(16,12))\n", 177 | "result_image = Image.open(BytesIO(result_image_byte))\n", 178 | "imshow(np.asarray(result_image), aspect='auto')\n", 179 | "\n", 180 | "# test 2 (good)\n", 181 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000227.jpg']\n", 182 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 183 | "plt.figure(figsize=(16,12))\n", 184 | "result_image = Image.open(BytesIO(result_image_byte))\n", 185 | "imshow(np.asarray(result_image), aspect='auto')\n", 186 | "\n", 187 | "# test 3 (ok)\n", 188 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000016.jpg']\n", 189 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 190 | "plt.figure(figsize=(16,12))\n", 191 | "result_image = Image.open(BytesIO(result_image_byte))\n", 192 | "imshow(np.asarray(result_image), aspect='auto')\n", 193 | "\n", 194 | "# test 4 (junk)\n", 195 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000234.jpg']\n", 196 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 197 | "plt.figure(figsize=(16,12))\n", 198 | "result_image = Image.open(BytesIO(result_image_byte))\n", 199 | "imshow(np.asarray(result_image), aspect='auto')\n", 200 | "\n", 201 | "# test 5 (junk)\n", 202 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000233.jpg']\n", 203 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 204 | "plt.figure(figsize=(16,12))\n", 205 | "result_image = Image.open(BytesIO(result_image_byte))\n", 206 | "imshow(np.asarray(result_image), aspect='auto')\n", 207 | "\n", 208 | "# test 6 (arbitrary)\n", 209 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000199.jpg']\n", 210 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 211 | "plt.figure(figsize=(16,12))\n", 212 | "result_image = Image.open(BytesIO(result_image_byte))\n", 213 | "imshow(np.asarray(result_image), aspect='auto')\n", 214 | "\n", 215 | "# test 7 (arbitrary)\n", 216 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000111.jpg']\n", 217 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 218 | "plt.figure(figsize=(16,12))\n", 219 | "result_image = Image.open(BytesIO(result_image_byte))\n", 220 | "imshow(np.asarray(result_image), aspect='auto')\n", 221 | "\n", 222 | "# test 8 (arbitrary)\n", 223 | "query = ['test/wrongcases/keble/query/keble_1.png', 'test/wrongcases/keble/db/keble_000036.jpg']\n", 224 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 225 | "plt.figure(figsize=(16,12))\n", 226 | "result_image = Image.open(BytesIO(result_image_byte))\n", 227 | "imshow(np.asarray(result_image), aspect='auto')\n", 228 | "'''\n", 229 | "\n", 230 | "'''\n", 231 | "###### keble_4.png\n", 232 | "# test 1 (good)\n", 233 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000214.jpg']\n", 234 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 235 | "plt.figure(figsize=(16,12))\n", 236 | "result_image = Image.open(BytesIO(result_image_byte))\n", 237 | "imshow(np.asarray(result_image), aspect='auto')\n", 238 | "\n", 239 | "# test 2 (good)\n", 240 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000227.jpg']\n", 241 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 242 | "plt.figure(figsize=(16,12))\n", 243 | "result_image = Image.open(BytesIO(result_image_byte))\n", 244 | "imshow(np.asarray(result_image), aspect='auto')\n", 245 | "\n", 246 | "# test 3 (ok)\n", 247 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000016.jpg']\n", 248 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 249 | "plt.figure(figsize=(16,12))\n", 250 | "result_image = Image.open(BytesIO(result_image_byte))\n", 251 | "imshow(np.asarray(result_image), aspect='auto')\n", 252 | "\n", 253 | "# test 4 (junk)\n", 254 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000234.jpg']\n", 255 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 256 | "plt.figure(figsize=(16,12))\n", 257 | "result_image = Image.open(BytesIO(result_image_byte))\n", 258 | "imshow(np.asarray(result_image), aspect='auto')\n", 259 | "\n", 260 | "# test 5 (junk)\n", 261 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000233.jpg']\n", 262 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 263 | "plt.figure(figsize=(16,12))\n", 264 | "result_image = Image.open(BytesIO(result_image_byte))\n", 265 | "imshow(np.asarray(result_image), aspect='auto')\n", 266 | "\n", 267 | "# test 6 (arbitrary)\n", 268 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000199.jpg']\n", 269 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 270 | "plt.figure(figsize=(16,12))\n", 271 | "result_image = Image.open(BytesIO(result_image_byte))\n", 272 | "imshow(np.asarray(result_image), aspect='auto')\n", 273 | "\n", 274 | "# test 7 (arbitrary)\n", 275 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000111.jpg']\n", 276 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 277 | "plt.figure(figsize=(16,12))\n", 278 | "result_image = Image.open(BytesIO(result_image_byte))\n", 279 | "imshow(np.asarray(result_image), aspect='auto')\n", 280 | "\n", 281 | "# test 8 (arbitrary)\n", 282 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000036.jpg']\n", 283 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 284 | "plt.figure(figsize=(16,12))\n", 285 | "result_image = Image.open(BytesIO(result_image_byte))\n", 286 | "imshow(np.asarray(result_image), aspect='auto')\n", 287 | "\n", 288 | "\n", 289 | "###### keble_5.png\n", 290 | "# test 1 (good)\n", 291 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000214.jpg']\n", 292 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 293 | "plt.figure(figsize=(16,12))\n", 294 | "result_image = Image.open(BytesIO(result_image_byte))\n", 295 | "imshow(np.asarray(result_image), aspect='auto')\n", 296 | "\n", 297 | "# test 2 (good)\n", 298 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000227.jpg']\n", 299 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 300 | "plt.figure(figsize=(16,12))\n", 301 | "result_image = Image.open(BytesIO(result_image_byte))\n", 302 | "imshow(np.asarray(result_image), aspect='auto')\n", 303 | "\n", 304 | "# test 3 (ok)\n", 305 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000016.jpg']\n", 306 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 307 | "plt.figure(figsize=(16,12))\n", 308 | "result_image = Image.open(BytesIO(result_image_byte))\n", 309 | "imshow(np.asarray(result_image), aspect='auto')\n", 310 | "\n", 311 | "# test 4 (junk)\n", 312 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000234.jpg']\n", 313 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 314 | "plt.figure(figsize=(16,12))\n", 315 | "result_image = Image.open(BytesIO(result_image_byte))\n", 316 | "imshow(np.asarray(result_image), aspect='auto')\n", 317 | "\n", 318 | "# test 5 (junk)\n", 319 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000233.jpg']\n", 320 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 321 | "plt.figure(figsize=(16,12))\n", 322 | "result_image = Image.open(BytesIO(result_image_byte))\n", 323 | "imshow(np.asarray(result_image), aspect='auto')\n", 324 | "\n", 325 | "# test 6 (arbitrary)\n", 326 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000199.jpg']\n", 327 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 328 | "plt.figure(figsize=(16,12))\n", 329 | "result_image = Image.open(BytesIO(result_image_byte))\n", 330 | "imshow(np.asarray(result_image), aspect='auto')\n", 331 | "\n", 332 | "# test 7 (arbitrary)\n", 333 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000111.jpg']\n", 334 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 335 | "plt.figure(figsize=(16,12))\n", 336 | "result_image = Image.open(BytesIO(result_image_byte))\n", 337 | "imshow(np.asarray(result_image), aspect='auto')\n", 338 | "\n", 339 | "# test 8 (arbitrary)\n", 340 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000036.jpg']\n", 341 | "result_image_byte, att1, att2 = get_result(feeder_config, delf, query)\n", 342 | "plt.figure(figsize=(16,12))\n", 343 | "result_image = Image.open(BytesIO(result_image_byte))\n", 344 | "imshow(np.asarray(result_image), aspect='auto')\n", 345 | "'''\n", 346 | "'''\n", 347 | "# deb\n", 348 | "query = ['test/wrongcases/keble/query/keble_5.png', 'test/wrongcases/keble/db/keble_000036.jpg']\n", 349 | "result_image_byte, att1, att2 = get_result(myfeeder, query)\n", 350 | "plt.figure(figsize=(16,12))\n", 351 | "result_image = Image.open(BytesIO(result_image_byte))\n", 352 | "imshow(np.asarray(result_image), aspect='auto')\n", 353 | "'''\n", 354 | "\n", 355 | "###### keble_4.png\n", 356 | "# test 1 (good)\n", 357 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000214.jpg']\n", 358 | "result_image_byte, att1, att2 = get_result(myfeeder, query)\n", 359 | "plt.figure(figsize=(16,12))\n", 360 | "result_image = Image.open(BytesIO(result_image_byte))\n", 361 | "imshow(np.asarray(result_image), aspect='auto')\n", 362 | "\n", 363 | "# test 2 (good)\n", 364 | "query = ['test/wrongcases/keble/query/keble_4.png', 'test/wrongcases/keble/db/keble_000227.jpg']\n", 365 | "result_image_byte, att1, att2 = get_result(myfeeder, query)\n", 366 | "plt.figure(figsize=(16,12))\n", 367 | "result_image = Image.open(BytesIO(result_image_byte))\n", 368 | "imshow(np.asarray(result_image), aspect='auto')\n" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [] 391 | } 392 | ], 393 | "metadata": { 394 | "kernelspec": { 395 | "display_name": "venv", 396 | "language": "python", 397 | "name": "venv" 398 | }, 399 | "language_info": { 400 | "codemirror_mode": { 401 | "name": "ipython", 402 | "version": 3 403 | }, 404 | "file_extension": ".py", 405 | "mimetype": "text/x-python", 406 | "name": "python", 407 | "nbconvert_exporter": "python", 408 | "pygments_lexer": "ipython3", 409 | "version": "3.6.4" 410 | } 411 | }, 412 | "nbformat": 4, 413 | "nbformat_minor": 2 414 | } 415 | -------------------------------------------------------------------------------- /extract/README.md: -------------------------------------------------------------------------------- 1 | 2 | You can train pca and get delf feature files by simply changing the hyperparameters in `extract.py`. 3 | 4 | ### Hyperparameters 5 | ~~~ 6 | + MODE: 7 | 'pca' or 'delf'. 8 | 'pca': extract feature to get pca matrix. 9 | 'delf': extract delf feature and save it to file. 10 | + USE_PCA: 11 | if you want to use pca dimesion reduction when delf feature extraction. 12 | this flag is only for MODE='delf'. 13 | 14 | + PCA_DIMS: 15 | final dimension after dimension reductin by pca. 16 | 17 | + PCA_PARAMETERS_PATH: 18 | when MODE=='pca': where to save pca.h5 file. (pca.hy5 file includes calculated pca matrix, pca vars, pca means) 19 | when MODE='delf': where to load pca matrix from to extract delf feature. 20 | 21 | + INPUT_PATH: 22 | path to input image to extract feature. 23 | 24 | + OUTPUT_PATH: 25 | path to output delf feature file. 26 | this option is only for MODE='delf' 27 | 28 | + LOAD_FROM: 29 | path to pytorch model you wish to use as a feature extractor. 30 | ~~~ 31 | 32 | 33 | ### How to train pca? 34 | modify hyperparameters in `extract.py`, 35 | and run `python extract.py` 36 | 37 | ~~~ 38 | (example) 39 | MODE = 'pca' 40 | GPU_ID = 0 41 | IOU_THRES = 0.98 42 | ATTN_THRES = 0.17 43 | TOP_K = 1000 44 | USE_PCA = False 45 | PCA_DIMS = 40 46 | SCALE_LIST = [0.25, 0.3535, 0.5, 0.7071, 1.0, 1.4142, 2.0] 47 | ARCH = 'resnet50' 48 | EXPR = 'dummy' 49 | TARGET_LAYER = 'layer3' 50 | LOAD_FROM = 'xxx' 51 | PCA_PARAMETERS_PATH = 'xxx' 52 | INPUT_PATH = 'xxx' 53 | OUTPUT_PATH = 'dummy' 54 | 55 | python extract.py 56 | ~~~ 57 | 58 | 59 | ### How to extract delf feature? 60 | modify hyperparameters in `extract.py`, 61 | and run `python extract.py` 62 | 63 | ~~~ 64 | (example) 65 | MODE = 'delf' 66 | GPU_ID = 0 67 | IOU_THRES = 0.98 68 | ATTN_THRES = 0.17 69 | TOP_K = 1000 70 | USE_PCA = True 71 | PCA_DIMS = 40 72 | SCALE_LIST = [0.25, 0.3535, 0.5, 0.7071, 1.0, 1.4142, 2.0] 73 | ARCH = 'resnet50' 74 | EXPR = 'dummy' 75 | TARGET_LAYER = 'layer3' 76 | LOAD_FROM = 'yyy' 77 | PCA_PARAMETERS_PATH = 'yyy' 78 | INPUT_PATH = 'yyy' 79 | OUTPUT_PATH = './output.delf' 80 | 81 | python extract.py 82 | ~~~ 83 | 84 | ### [!!!] BE CAREFUL [!!!] 85 | + SIZE LIMIT: 86 | If width * height > 1400*1400, the feature will be passed to prevent GPU memory overflow (24GB). 87 | Make sure the input image size(w x h) is less than 1400 * 1400. 88 | 89 | -------------------------------------------------------------------------------- /extract/extractor.py: -------------------------------------------------------------------------------- 1 | 2 | '''extractor.py 3 | extract DeLF local features 4 | ''' 5 | 6 | import os, sys, time 7 | sys.path.append('../') 8 | sys.path.append('../train') 9 | import argparse 10 | 11 | import torch 12 | import torch.nn 13 | import torch 14 | from torch.autograd import Variable 15 | import torchvision.transforms as transforms 16 | import torchvision.datasets as datasets 17 | import numpy as np 18 | import h5py 19 | import pickle 20 | import copy 21 | 22 | import delf_helper 23 | from train.delf import Delf_V1 24 | from pca import DelfPCA 25 | from folder import ImageFolder 26 | from utils import mkdir_p, Bar, AverageMeter 27 | 28 | __DEBUG__ = False 29 | 30 | '''helper functions. 31 | ''' 32 | def __cuda__(x): 33 | if torch.cuda.is_available(): 34 | return x.cuda() 35 | else: 36 | return x 37 | 38 | def __is_cuda__(): 39 | return torch.cuda.is_available() 40 | 41 | def __to_var__(x, volatile=False): 42 | return Variable(x, volatile=volatile) 43 | 44 | def __to_tensor__(x): 45 | return x.data 46 | 47 | def __build_delf_config__(data): 48 | parser = argparse.ArgumentParser('delf-config') 49 | parser.add_argument('--stage', type=str, default='inference') 50 | parser.add_argument('--expr', type=str, default='dummy') 51 | parser.add_argument('--ncls', type=str, default='dummy') 52 | parser.add_argument('--use_random_gamma_rescale', type=str, default=False) 53 | parser.add_argument('--arch', type=str, default=data['ARCH']) 54 | parser.add_argument('--load_from', type=str, default=data['LOAD_FROM']) 55 | parser.add_argument('--target_layer', type=str, default=data['TARGET_LAYER']) 56 | delf_config, _ = parser.parse_known_args() 57 | 58 | # print config. 59 | state = {k: v for k, v in delf_config._get_kwargs()} 60 | print(state) 61 | return delf_config 62 | 63 | 64 | class FeatureExtractor(): 65 | def __init__(self, 66 | extractor_config): 67 | 68 | # environment setting. 69 | os.environ['CUDA_VISIBLE_DEVICES'] = str(extractor_config.get('GPU_ID')) 70 | 71 | # parameters. 72 | self.title = 'DeLF-Inference' 73 | self.mode = extractor_config.get('MODE') 74 | self.ncls = extractor_config.get('NCLS') 75 | self.iou_thres = extractor_config.get('IOU_THRES') 76 | self.attn_thres = extractor_config.get('ATTN_THRES') 77 | self.top_k = extractor_config.get('TOP_K') 78 | self.target_layer = extractor_config.get('TARGET_LAYER') 79 | self.scale_list = extractor_config.get('SCALE_LIST') 80 | self.use_pca = extractor_config.get('USE_PCA') 81 | self.input_path = extractor_config.get('INPUT_PATH') 82 | self.output_path = extractor_config.get('OUTPUT_PATH') 83 | 84 | 85 | # load pytorch model 86 | print('load DeLF pytorch model...') 87 | delf_config = __build_delf_config__(extractor_config) 88 | self.model = Delf_V1( 89 | ncls = delf_config.ncls, 90 | load_from = delf_config.load_from, 91 | arch = delf_config.arch, 92 | stage = delf_config.stage, 93 | target_layer = delf_config.target_layer, 94 | use_random_gamma_rescale = False) 95 | self.model.eval() 96 | self.model = __cuda__(self.model) 97 | 98 | # load pca matrix 99 | if self.mode.lower() in ['delf']: 100 | if self.use_pca: 101 | print('load PCA parameters...') 102 | h5file = h5py.File(extractor_config.get('PCA_PARAMETERS_PATH'), 'r') 103 | self.pca_mean = h5file['.']['pca_mean'].value 104 | self.pca_vars = h5file['.']['pca_vars'].value 105 | self.pca_matrix = h5file['.']['pca_matrix'].value 106 | self.pca_dims = extractor_config.get('PCA_DIMS') 107 | self.use_pca = extractor_config.get('USE_PCA') 108 | else: 109 | print('PCA will not be applied...') 110 | self.pca_mean = None 111 | self.pca_vars = None 112 | self.pca_matrix = None 113 | self.pca_dims = None 114 | 115 | # PCA. 116 | if self.mode.lower() in ['pca']: 117 | self.pca = DelfPCA( 118 | pca_n_components = extractor_config.get('PCA_DIMS'), 119 | pca_whitening = True, 120 | pca_parameters_path = extractor_config.get('PCA_PARAMETERS_PATH')) 121 | 122 | # set receptive field, stride, padding. 123 | if self.target_layer in ['layer3']: 124 | self.rf = 291.0 125 | self.stride = 16.0 126 | self.padding = 145.0 127 | elif self.target_layer in ['layer4']: 128 | self.rf = 483.0 129 | self.stride = 32.0 130 | self.padding = 241.0 131 | else: 132 | raise ValueError('Unsupported target_layer: {}'.format(self.target_layer)) 133 | 134 | def __adjust_pixel_range__(self, 135 | x, 136 | range_from=[0,1], 137 | range_to=[-1,1]): 138 | ''' 139 | adjust pixel range from to . 140 | ''' 141 | if not range_from == range_to: 142 | scale = float(range_to[1]-range_to[0])/float(range_from[1]-range_from[0]) 143 | bias = range_to[0]-range_from[0]*scale 144 | x = x.mul(scale).add(bias) 145 | return x 146 | 147 | 148 | def __extract_delf_feature__(self, x, filename, mode='pca'): 149 | '''extract raw features from image batch. 150 | x: Input FloatTensor, [b x c x w x h] 151 | output: Output FloatTensor, [b x c x dim x dim] 152 | ''' 153 | if mode == 'pca': 154 | use_pca = False 155 | pca_mean = 'dummy_pca_mean', 156 | pca_vars = 'dummy_pca_vars', 157 | pca_matrix = 'dummy_pca_matrix', 158 | pca_dims = 'dummy_pca_dims', 159 | workers = 4 160 | else: 161 | assert mode == 'delf', 'mode must be either pca or delf' 162 | use_pca = copy.deepcopy(self.use_pca) 163 | pca_mean = copy.deepcopy(self.pca_mean) 164 | pca_vars = copy.deepcopy(self.pca_vars) 165 | pca_matrix = copy.deepcopy(self.pca_matrix) 166 | pca_dims = copy.deepcopy(self.pca_dims) 167 | workers = 4 168 | try: 169 | output = delf_helper.GetDelfFeatureFromMultiScale( 170 | x = x, 171 | model = self.model, 172 | filename = filename, 173 | pca_mean = pca_mean, 174 | pca_vars = pca_vars, 175 | pca_matrix = pca_matrix, 176 | pca_dims = pca_dims, 177 | rf = self.rf, 178 | stride = self.stride, 179 | padding = self.padding, 180 | top_k = self.top_k, 181 | scale_list = self.scale_list, 182 | iou_thres = self.iou_thres, 183 | attn_thres = self.attn_thres, 184 | use_pca = use_pca, 185 | workers = workers) 186 | if mode == 'pca': 187 | descriptor_np_list = output['descriptor_np_list'] 188 | descriptor = [descriptor_np_list[i,:] for i in range(descriptor_np_list.shape[0])] 189 | return descriptor 190 | else: 191 | return output 192 | except Exception as e: 193 | print('\n[Error] filename:{}, error message:{}'.format(filename, e)) 194 | return None 195 | 196 | 197 | def __save_delf_features_to_file__(self, 198 | data, 199 | filename): 200 | '''save final local features after delf-postprocessing(PCA, NMS) 201 | use pickle to save features. 202 | Args: 203 | data = [{ 204 | filename: 205 | location_np_list: 206 | descriptor_np_list: 207 | feature_scale_np_list: 208 | attention_score_np_list: 209 | attention_np_list: 210 | }, ... ] 211 | ''' 212 | with open(filename, 'wb') as handle: 213 | pickle.dump(data, handle, protocol=2) # use protocol <= 2 for python2 compatibility. 214 | print('\nsaved DeLF feature at {}'.format(filename)) 215 | 216 | 217 | def __save_raw_features_to_file__(self, 218 | feature_maps, 219 | filename): 220 | '''save feature to local file. 221 | feature_maps: list of descriptor tensors in batch. [x1, x2, x3, x4 ...], x1 = Tensor([c x w x h]) 222 | output_path: path to save file. 223 | 224 | save: 225 | list of descriptors converted to numpy array. [d1, d2, d3, ...] 226 | ''' 227 | np_feature_maps = [] 228 | np_feature_maps = [x.numpy() for _, x in enumerate(feature_maps)] 229 | np_feature_maps = np.asarray(np_feature_maps) 230 | 231 | # save features, labels to h5 file. 232 | h5file = h5py.File(filename, 'w') 233 | h5file.create_dataset('feature_maps', data=np_feature_maps) 234 | h5file.close() 235 | 236 | 237 | def extract(self, input_path, output_path): 238 | '''extract features from single image without batch process. 239 | ''' 240 | assert self.mode.lower() in ['pca', 'delf'] 241 | batch_timer = AverageMeter() 242 | data_timer = AverageMeter() 243 | since = time.time() 244 | 245 | # dataloader. 246 | dataset = ImageFolder( 247 | root = input_path, 248 | transform = transforms.ToTensor()) 249 | self.dataloader = torch.utils.data.DataLoader( 250 | dataset = dataset, 251 | batch_size = 1, 252 | shuffle = True, 253 | num_workers = 0) 254 | feature_maps = [] 255 | if self.mode.lower() in ['pca']: 256 | bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader)) 257 | for batch_idx, (inputs, _, filename) in enumerate(self.dataloader): 258 | # image size upper limit. 259 | if not (len(inputs.size()) == 4): 260 | if __DEBUG__: 261 | print('wrong input dimenstion! ({},{})'.format(filename, input.size())) 262 | continue; 263 | if not (inputs.size(2)*inputs.size(3) <= 1200*1200): 264 | if __DEBUG__: 265 | print('passed: image size too large! ({},{})'.format(filename, inputs.size())) 266 | continue; 267 | if not (inputs.size(2) >= 112 and inputs.size(3) >= 112): 268 | if __DEBUG__: 269 | print('passed: image size too small! ({},{})'.format(filename, inputs.size())) 270 | continue; 271 | 272 | data_timer.update(time.time() - since) 273 | # prepare inputs 274 | if __is_cuda__(): 275 | inputs = __cuda__(inputs) 276 | inputs = __to_var__(inputs) 277 | 278 | # get delf feature only for pca calculation. 279 | pca_feature = self.__extract_delf_feature__(inputs.data, filename, mode='pca') 280 | if pca_feature is not None: 281 | feature_maps.extend(pca_feature) 282 | 283 | batch_timer.update(time.time() - since) 284 | since = time.time() 285 | 286 | # progress 287 | log_msg = ('\n[Extract][Processing:({batch}/{size})] '+ \ 288 | 'eta: (data:{data:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \ 289 | .format( 290 | batch=batch_idx + 1, 291 | size=len(self.dataloader), 292 | data=data_timer.val, 293 | bt=batch_timer.val, 294 | tt=bar.elapsed_td) 295 | print(log_msg) 296 | bar.next() 297 | print('\nnumber of selected features so far: {}'.format(len(feature_maps))) 298 | if len(feature_maps) >= 10000000: # UPPER LIMIT. 299 | break; 300 | 301 | # free GPU cache every. 302 | if batch_idx % 10 == 0: 303 | torch.cuda.empty_cache() 304 | if __DEBUG__: 305 | print('GPU Memory flushed !!!!!!!!!') 306 | 307 | # trian PCA. 308 | self.pca(feature_maps) 309 | 310 | else: 311 | bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader)) 312 | assert self.mode.lower() in ['delf'] 313 | feature_maps = [] 314 | for batch_idx, (inputs, labels, filename) in enumerate(self.dataloader): 315 | # image size upper limit. 316 | if not (len(inputs.size()) == 4): 317 | if __DEBUG__: 318 | print('wrong input dimenstion! ({},{})'.format(filename, input.size())) 319 | continue; 320 | if not (inputs.size(2)*inputs.size(3) <= 1200*1200): 321 | if __DEBUG__: 322 | print('passed: image size too large! ({},{})'.format(filename, inputs.size())) 323 | continue; 324 | if not (inputs.size(2) >= 112 and inputs.size(3) >= 112): 325 | if __DEBUG__: 326 | print('passed: image size too small! ({},{})'.format(filename, inputs.size())) 327 | continue; 328 | 329 | data_timer.update(time.time() - since) 330 | # prepare inputs 331 | if __is_cuda__(): 332 | inputs = __cuda__(inputs) 333 | inputs = __to_var__(inputs) 334 | 335 | # get delf everything (score, feature, etc.) 336 | delf_feature = self.__extract_delf_feature__(inputs.data, filename, mode='delf') 337 | if delf_feature is not None: 338 | feature_maps.append(delf_feature) 339 | 340 | # log. 341 | batch_timer.update(time.time() - since) 342 | since = time.time() 343 | log_msg = ('\n[Extract][Processing:({batch}/{size})] '+ \ 344 | 'eta: (data:{data:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \ 345 | .format( 346 | batch=batch_idx + 1, 347 | size=len(self.dataloader), 348 | data=data_timer.val, 349 | bt=batch_timer.val, 350 | tt=bar.elapsed_td) 351 | print(log_msg) 352 | bar.next() 353 | 354 | # free GPU cache every. 355 | if batch_idx % 10 == 0: 356 | torch.cuda.empty_cache() 357 | if __DEBUG__: 358 | print('GPU Memory flushed !!!!!!!!!') 359 | 360 | # use pickle to save DeLF features. 361 | self.__save_delf_features_to_file__(feature_maps, output_path) 362 | 363 | 364 | if __name__ == "__main__": 365 | MODE = 'delf' # either "delf" or "pca" 366 | GPU_ID = 7 367 | IOU_THRES = 0.98 368 | ATTN_THRES = 0.17 369 | TOP_K = 1000 370 | USE_PCA = True 371 | PCA_DIMS = 40 372 | SCALE_LIST = [0.25, 0.3535, 0.5, 0.7071, 1.0, 1.4142, 2.0] 373 | ARCH = 'resnet50' 374 | EXPR = 'dummy' 375 | TARGET_LAYER = 'layer3' 376 | 377 | MODEL_NAME = 'ldmk' 378 | 379 | LOAD_FROM = 'archive/model/{}/keypoint/ckpt/fix.pth.tar'.format(MODEL_NAME) 380 | PCA_PARAMETERS_PATH = 'archive/pca/{}/pca.h5'.format(MODEL_NAME) 381 | 382 | extractor_config = { 383 | # params for feature extraction. 384 | 'MODE': MODE, 385 | 'GPU_ID': GPU_ID, 386 | 'IOU_THRES': IOU_THRES, 387 | 'ATTN_THRES': ATTN_THRES, 388 | 'TOP_K': TOP_K, 389 | 'PCA_PARAMETERS_PATH': PCA_PARAMETERS_PATH, 390 | 'PCA_DIMS': PCA_DIMS, 391 | 'USE_PCA': USE_PCA, 392 | 'SCALE_LIST': SCALE_LIST, 393 | 394 | # params for model load. 395 | 'LOAD_FROM': LOAD_FROM, 396 | 'ARCH': ARCH, 397 | 'EXPR': EXPR, 398 | 'TARGET_LAYER': TARGET_LAYER, 399 | } 400 | 401 | 402 | extractor = FeatureExtractor(extractor_config) 403 | if MODE.lower() in ['pca']: 404 | OUTPUT_PATH = 'dummy' 405 | INPUT_PATH = 'your_path_to_dataset' 406 | extractor.extract(INPUT_PATH, OUTPUT_PATH) 407 | 408 | elif MODE.lower() in ['delf']: 409 | # query 410 | INPUT_PATH = 'your_path_to_dataset' 411 | OUTPUT_PATH = 'archive/delf.batch/{}/oxf5k_query.delf'.format(MODEL_NAME) 412 | extractor.extract(INPUT_PATH, OUTPUT_PATH) 413 | # index 414 | INPUT_PATH = 'data/oxf5k/index' 415 | OUTPUT_PATH = 'archive/delf.batch/{}/oxf5k_index.delf'.format(MODEL_NAME) 416 | extractor.extract(INPUT_PATH, OUTPUT_PATH) 417 | 418 | 419 | 420 | 421 | 422 | -------------------------------------------------------------------------------- /extract/folder.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | 5 | import os 6 | import os.path 7 | 8 | 9 | def has_file_allowed_extension(filename, extensions): 10 | """Checks if a file is an allowed extension. 11 | 12 | Args: 13 | filename (string): path to a file 14 | 15 | Returns: 16 | bool: True if the filename ends with a known image extension 17 | """ 18 | filename_lower = filename.lower() 19 | return any(filename_lower.endswith(ext) for ext in extensions) 20 | 21 | 22 | def find_classes(dir): 23 | classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] 24 | classes.sort() 25 | class_to_idx = {classes[i]: i for i in range(len(classes))} 26 | return classes, class_to_idx 27 | 28 | 29 | def make_dataset(dir, class_to_idx, extensions): 30 | images = [] 31 | dir = os.path.expanduser(dir) 32 | for target in sorted(os.listdir(dir)): 33 | d = os.path.join(dir, target) 34 | if not os.path.isdir(d): 35 | continue 36 | 37 | for root, _, fnames in sorted(os.walk(d)): 38 | for fname in sorted(fnames): 39 | if has_file_allowed_extension(fname, extensions): 40 | path = os.path.join(root, fname) 41 | item = (path, class_to_idx[target]) 42 | images.append(item) 43 | 44 | return images 45 | 46 | 47 | class DatasetFolder(data.Dataset): 48 | """A generic data loader where the samples are arranged in this way: :: 49 | 50 | root/class_x/xxx.ext 51 | root/class_x/xxy.ext 52 | root/class_x/xxz.ext 53 | 54 | root/class_y/123.ext 55 | root/class_y/nsdf3.ext 56 | root/class_y/asd932_.ext 57 | 58 | Args: 59 | root (string): Root directory path. 60 | loader (callable): A function to load a sample given its path. 61 | extensions (list[string]): A list of allowed extensions. 62 | transform (callable, optional): A function/transform that takes in 63 | a sample and returns a transformed version. 64 | E.g, ``transforms.RandomCrop`` for images. 65 | target_transform (callable, optional): A function/transform that takes 66 | in the target and transforms it. 67 | 68 | Attributes: 69 | classes (list): List of the class names. 70 | class_to_idx (dict): Dict with items (class_name, class_index). 71 | samples (list): List of (sample path, class_index) tuples 72 | """ 73 | 74 | def __init__(self, root, loader, extensions, transform=None, target_transform=None): 75 | classes, class_to_idx = find_classes(root) 76 | samples = make_dataset(root, class_to_idx, extensions) 77 | if len(samples) == 0: 78 | raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n" 79 | "Supported extensions are: " + ",".join(extensions))) 80 | 81 | self.root = root 82 | self.loader = loader 83 | self.extensions = extensions 84 | 85 | self.classes = classes 86 | self.class_to_idx = class_to_idx 87 | self.samples = samples 88 | 89 | self.transform = transform 90 | self.target_transform = target_transform 91 | 92 | def __getitem__(self, index): 93 | """ 94 | Args: 95 | index (int): Index 96 | 97 | Returns: 98 | tuple: (sample, target) where target is class_index of the target class. 99 | """ 100 | path, target = self.samples[index] 101 | sample = self.loader(path) 102 | if self.transform is not None: 103 | sample = self.transform(sample) 104 | if self.target_transform is not None: 105 | target = self.target_transform(target) 106 | 107 | return sample, target, str(os.path.basename(path)) 108 | 109 | def __len__(self): 110 | return len(self.samples) 111 | 112 | def __repr__(self): 113 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' 114 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) 115 | fmt_str += ' Root Location: {}\n'.format(self.root) 116 | tmp = ' Transforms (if any): ' 117 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 118 | tmp = ' Target Transforms (if any): ' 119 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 120 | return fmt_str 121 | 122 | 123 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'] 124 | 125 | 126 | def pil_loader(path): 127 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 128 | with open(path, 'rb') as f: 129 | img = Image.open(f) 130 | return img.convert('RGB') 131 | 132 | 133 | def accimage_loader(path): 134 | import accimage 135 | try: 136 | return accimage.Image(path) 137 | except IOError: 138 | # Potentially a decoding problem, fall back to PIL.Image 139 | return pil_loader(path) 140 | 141 | 142 | def default_loader(path): 143 | from torchvision import get_image_backend 144 | if get_image_backend() == 'accimage': 145 | return accimage_loader(path) 146 | else: 147 | return pil_loader(path) 148 | 149 | 150 | class ImageFolder(DatasetFolder): 151 | """A generic data loader where the images are arranged in this way: :: 152 | 153 | root/dog/xxx.png 154 | root/dog/xxy.png 155 | root/dog/xxz.png 156 | 157 | root/cat/123.png 158 | root/cat/nsdf3.png 159 | root/cat/asd932_.png 160 | 161 | Args: 162 | root (string): Root directory path. 163 | transform (callable, optional): A function/transform that takes in an PIL image 164 | and returns a transformed version. E.g, ``transforms.RandomCrop`` 165 | target_transform (callable, optional): A function/transform that takes in the 166 | target and transforms it. 167 | loader (callable, optional): A function to load an image given its path. 168 | 169 | Attributes: 170 | classes (list): List of the class names. 171 | class_to_idx (dict): Dict with items (class_name, class_index). 172 | imgs (list): List of (image path, class_index) tuples 173 | """ 174 | def __init__(self, root, transform=None, target_transform=None, 175 | loader=default_loader): 176 | super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS, 177 | transform=transform, 178 | target_transform=target_transform) 179 | self.imgs = self.samples 180 | -------------------------------------------------------------------------------- /extract/pca.py: -------------------------------------------------------------------------------- 1 | 2 | '''pca.py 3 | calculate PCA for given features, and save output into file. 4 | ''' 5 | 6 | import os 7 | import sys 8 | import time 9 | import glob 10 | 11 | import numpy as np 12 | import h5py 13 | from sklearn.preprocessing import StandardScaler 14 | from sklearn.decomposition import PCA 15 | 16 | 17 | class DelfPCA(): 18 | def __init__(self, 19 | pca_n_components, 20 | pca_whitening=True, 21 | pca_parameters_path='./output/pca/pca.h5'): 22 | self.pca_n_components = pca_n_components 23 | self.pca_whitening = pca_whitening 24 | self.pca_parameters_path = pca_parameters_path 25 | 26 | def __call__(self, 27 | feature_maps): 28 | '''training pca. 29 | Args: 30 | feature_maps: list of feature tensorsm, 31 | feature_maps = [f1, f2, f3 ...], 32 | f1 = FloatTensor(fmap_depth) 33 | Returns: 34 | pca_matrix, 35 | pca_means, 36 | pca_vars 37 | ''' 38 | 39 | # calculate pca. 40 | pca = PCA(whiten=self.pca_whitening) 41 | pca.fit(np.array(feature_maps)) 42 | pca_matrix = pca.components_ 43 | pca_mean = pca.mean_ 44 | pca_vars = pca.explained_variance_ 45 | 46 | # save as h5 file. 47 | print('================= PCA RESULT ==================') 48 | print('pca_matrix: {}'.format(pca_matrix.shape)) 49 | print('pca_mean: {}'.format(pca_mean.shape)) 50 | print('pca_vars: {}'.format(pca_vars.shape)) 51 | print('===============================================') 52 | 53 | # save features, labels to h5 file. 54 | filename = os.path.join(self.pca_parameters_path) 55 | h5file = h5py.File(filename, 'w') 56 | h5file.create_dataset('pca_matrix', data=pca_matrix) 57 | h5file.create_dataset('pca_mean', data=pca_mean) 58 | h5file.create_dataset('pca_vars', data=pca_vars) 59 | h5file.close() 60 | 61 | -------------------------------------------------------------------------------- /helper/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from .matcher import * 5 | from .feeder import * 6 | from .delf_helper import * 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /helper/delf_helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | delf_helper.py 6 | helper functions to extract DeLF functions. 7 | """ 8 | 9 | 10 | import os, sys, time 11 | 12 | import numpy as np 13 | import h5py 14 | import torch 15 | import torch.nn.functional as F 16 | import torchvision.transforms as transforms 17 | from torch.autograd import Variable 18 | from concurrent.futures import ThreadPoolExecutor, as_completed # for use of multi-threads 19 | 20 | __DEBUG__ = False 21 | 22 | def GenerateCoordinates(h,w): 23 | '''generate coorinates 24 | Returns: [h*w, 2] FloatTensor 25 | ''' 26 | x = torch.floor(torch.arange(0, w*h) / w) 27 | y = torch.arange(0, w).repeat(h) 28 | 29 | coord = torch.stack([x,y], dim=1) 30 | return coord 31 | 32 | def CalculateReceptiveBoxes(height, 33 | width, 34 | rf, 35 | stride, 36 | padding): 37 | 38 | ''' 39 | caculate receptive boxes from original image for each feature point. 40 | Args: 41 | height: The height of feature map. 42 | width: The width of feature map. 43 | rf: The receptive field size. 44 | stride: The effective stride between two adjacent feature points. 45 | padding: The effective padding size. 46 | 47 | Returns: 48 | rf_boxes: [N, 4] recpetive boxes tensor. (N = height x weight). 49 | each box is represented by [ymin, xmin, ymax, xmax]. 50 | ''' 51 | coordinates = GenerateCoordinates(h=height, 52 | w=width) 53 | # create [ymin, xmin, ymax, xmax] 54 | point_boxes = torch.cat([coordinates, coordinates], dim=1) 55 | bias = torch.FloatTensor([-padding, -padding, -padding + rf - 1, -padding + rf - 1]) 56 | rf_boxes = stride * point_boxes + bias 57 | return rf_boxes 58 | 59 | def CalculateKeypointCenters(rf_boxes): 60 | '''compute feature centers, from receptive field boxes (rf_boxes). 61 | Args: 62 | rf_boxes: [N, 4] FloatTensor. 63 | Returns: 64 | centers: [N, 2] FloatTensor. 65 | ''' 66 | xymin = torch.index_select(rf_boxes, dim=1, index=torch.LongTensor([0,1])) 67 | xymax = torch.index_select(rf_boxes, dim=1, index=torch.LongTensor([2,3])) 68 | return (xymax + xymin) / 2.0 69 | 70 | def ApplyPcaAndWhitening(data, 71 | pca_matrix, 72 | pca_mean, 73 | pca_vars, 74 | pca_dims, 75 | use_whitening=False): 76 | '''apply PCA/Whitening to data. 77 | Args: 78 | data: [N, dim] FloatTensor containing data which undergoes PCA/Whitening. 79 | pca_matrix: [dim, dim] numpy array PCA matrix, row-major. 80 | pca_mean: [dim] numpy array mean to subtract before projection. 81 | pca_dims: # of dimenstions to use in output data, of type int. 82 | pca_vars: [dim] numpy array containing PCA variances. 83 | Only used if use_whitening is True. 84 | use_whitening: Whether whitening is to be used. usually recommended. 85 | Returns: 86 | output: [N, output_dim] FloatTensor with output of PCA/Whitening operation. 87 | (Warning: element 0 in pca_variances might produce nan/inf value.) 88 | ''' 89 | pca_mean = torch.from_numpy(pca_mean).float() 90 | pca_vars = torch.from_numpy(pca_vars).float() 91 | pca_matrix = torch.from_numpy(pca_matrix).float() 92 | 93 | data = data - pca_mean 94 | output = data.matmul(pca_matrix.narrow(0, 0, pca_dims).transpose(0,1)) 95 | 96 | if use_whitening: 97 | output = output.div((pca_vars.narrow(0, 0, pca_dims) ** 0.5)) 98 | return output 99 | 100 | def GetDelfFeatureFromMultiScale( 101 | x, 102 | model, 103 | filename, 104 | pca_mean, 105 | pca_vars, 106 | pca_matrix, 107 | pca_dims, 108 | rf, 109 | stride, 110 | padding, 111 | top_k, 112 | scale_list, 113 | iou_thres, 114 | attn_thres, 115 | use_pca=False, 116 | workers=8): 117 | '''GetDelfFeatureFromMultiScale 118 | warning: use workers = 1 for serving otherwise out of memory error could occurs. 119 | (because uwsgi uses multi-threads by itself.) 120 | ''' 121 | 122 | # helper func. 123 | def __concat_tensors_in_list__(tensor_list, dim): 124 | res = None 125 | tensor_list = [x for x in tensor_list if x is not None] 126 | for tensor in tensor_list: 127 | if res is None: 128 | res = tensor 129 | else: 130 | res = torch.cat((res, tensor), dim=dim) 131 | return res 132 | 133 | # extract features for each scale, and concat. 134 | output_boxes = [] 135 | output_features = [] 136 | output_scores = [] 137 | output_scales = [] 138 | output_original_scale_attn = None 139 | 140 | # multi-threaded feature extraction from different scales. 141 | with ThreadPoolExecutor(max_workers=workers) as pool: 142 | # assign jobs. 143 | futures = { 144 | pool.submit( 145 | GetDelfFeatureFromSingleScale, 146 | x, 147 | model, 148 | scale, 149 | pca_mean, 150 | pca_vars, 151 | pca_matrix, 152 | pca_dims, 153 | rf, 154 | stride, 155 | padding, 156 | attn_thres, 157 | use_pca): 158 | scale for scale in scale_list 159 | } 160 | for future in as_completed(futures): 161 | (selected_boxes, selected_features, 162 | selected_scales, selected_scores, 163 | selected_original_scale_attn) = future.result() 164 | # append to list. 165 | output_boxes.append(selected_boxes) if selected_boxes is not None else output_boxes 166 | output_features.append(selected_features) if selected_features is not None else output_features 167 | output_scales.append(selected_scales) if selected_scales is not None else output_scales 168 | output_scores.append(selected_scores) if selected_scores is not None else output_scores 169 | if selected_original_scale_attn is not None: 170 | output_original_scale_attn = selected_original_scale_attn 171 | 172 | # if scale == 1.0 is not included in scale list, just show noisy attention image. 173 | if output_original_scale_attn is None: 174 | output_original_scale_attn = x.clone().uniform() 175 | 176 | # concat tensors precessed from different scales. 177 | output_boxes = __concat_tensors_in_list__(output_boxes, dim=0) 178 | output_features = __concat_tensors_in_list__(output_features, dim=0) 179 | output_scales = __concat_tensors_in_list__(output_scales, dim=0) 180 | output_scores = __concat_tensors_in_list__(output_scores, dim=0) 181 | 182 | # perform Non Max Suppression(NMS) to select top-k bboxes arrcoding to the attn_score. 183 | keep_indices, count = nms(boxes = output_boxes, 184 | scores = output_scores, 185 | overlap = iou_thres, 186 | top_k = top_k) 187 | keep_indices = keep_indices[:top_k] 188 | output_boxes = torch.index_select(output_boxes, dim=0, index=keep_indices) 189 | output_features = torch.index_select(output_features, dim=0, index=keep_indices) 190 | output_scales = torch.index_select(output_scales, dim=0, index=keep_indices) 191 | output_scores = torch.index_select(output_scores, dim=0, index=keep_indices) 192 | output_locations = CalculateKeypointCenters(output_boxes) 193 | 194 | data = { 195 | 'filename':filename, 196 | 'location_np_list':output_locations.cpu().numpy(), 197 | 'descriptor_np_list':output_features.cpu().numpy(), 198 | 'feature_scale_np_list':output_scales.cpu().numpy(), 199 | 'attention_score_np_list':output_scores.cpu().numpy(), 200 | 'attention_np_list':output_original_scale_attn.cpu().numpy() 201 | } 202 | 203 | # free GPU memory. 204 | del output_locations 205 | del output_boxes, selected_boxes 206 | del output_features, selected_features 207 | del output_scales, selected_scales 208 | del output_scores, selected_scores 209 | del output_original_scale_attn, selected_original_scale_attn 210 | #torch.cuda.empty_cache() # it releases all unoccupied cached memory!! (but it makes process slow) 211 | 212 | if __DEBUG__: 213 | #PrintGpuMemoryStats() 214 | PrintResult(data) 215 | return data 216 | 217 | def PrintGpuMemoryStats(): 218 | '''PyTorch >= 0.5.0 219 | ''' 220 | print 221 | print('\n----------------------------------------------------------') 222 | print('[Monitor] max GPU Memory Used by Tensor: {}'.format(torch.cuda.max_memory_allocated())) 223 | print('[Monitor] max GPU Memory Used by Cache: {}'.format(torch.cuda.max_memory_cached())) 224 | print('----------------------------------------------------------') 225 | 226 | def PrintResult(data): 227 | print('\n----------------------------------------------------------') 228 | print('filename: ', data['filename']) 229 | print("location_np_list shape: ", data['location_np_list'].shape) 230 | print("descriptor_np_list shape: ", data['descriptor_np_list'].shape) 231 | print("feature_scale_np_list shape: ", data['feature_scale_np_list'].shape) 232 | print("attention_score_np_list shape: ", data['attention_score_np_list'].shape) 233 | print("attention_np_list shape: ", data['attention_np_list'].shape) 234 | print('----------------------------------------------------------\n') 235 | 236 | def GetDelfFeatureFromSingleScale( 237 | x, 238 | model, 239 | scale, 240 | pca_mean, 241 | pca_vars, 242 | pca_matrix, 243 | pca_dims, 244 | rf, 245 | stride, 246 | padding, 247 | attn_thres, 248 | use_pca): 249 | 250 | # scale image then get features and attention. 251 | new_h = int(round(x.size(2)*scale)) 252 | new_w = int(round(x.size(3)*scale)) 253 | scaled_x = F.upsample(x, size=(new_h, new_w), mode='bilinear') 254 | scaled_features, scaled_scores = model.forward_for_serving(scaled_x) 255 | 256 | # save original size attention (used for attention visualization.) 257 | selected_original_scale_attn = None 258 | if scale == 1.0: 259 | selected_original_scale_attn = torch.clamp(scaled_scores*255, 0, 255) # 1 1 h w 260 | 261 | # calculate receptive field boxes. 262 | rf_boxes = CalculateReceptiveBoxes( 263 | height=scaled_features.size(2), 264 | width=scaled_features.size(3), 265 | rf=rf, 266 | stride=stride, 267 | padding=padding) 268 | 269 | # re-projection back to original image space. 270 | rf_boxes = rf_boxes / scale 271 | scaled_scores = scaled_scores.view(-1) 272 | scaled_features = scaled_features.view(scaled_features.size(1), -1).t() 273 | 274 | # do post-processing for dimension reduction by PCA. 275 | scaled_features = DelfFeaturePostProcessing( 276 | rf_boxes, 277 | scaled_features, 278 | pca_mean, 279 | pca_vars, 280 | pca_matrix, 281 | pca_dims, 282 | use_pca) 283 | 284 | # use attention score to select feature. 285 | indices = None 286 | while(indices is None or len(indices) == 0): 287 | indices = torch.gt(scaled_scores, attn_thres).nonzero().squeeze() 288 | attn_thres = attn_thres * 0.5 # use lower threshold if no indexes are found. 289 | if attn_thres < 0.001: 290 | break; 291 | 292 | try: 293 | selected_boxes = torch.index_select(rf_boxes, dim=0, index=indices) 294 | selected_features = torch.index_select(scaled_features, dim=0, index=indices) 295 | selected_scores = torch.index_select(scaled_scores, dim=0, index=indices) 296 | selected_scales = torch.ones_like(selected_scores) * scale 297 | except Exception as e: 298 | selected_boxes = None 299 | selected_features = None 300 | selected_scores = None 301 | selected_scales = None 302 | print(e) 303 | pass; 304 | 305 | return selected_boxes, selected_features, selected_scales, selected_scores, selected_original_scale_attn 306 | 307 | 308 | def DelfFeaturePostProcessing( 309 | boxes, 310 | descriptors, 311 | pca_mean, 312 | pca_vars, 313 | pca_matrix, 314 | pca_dims, 315 | use_pca): 316 | 317 | ''' Delf feature post-processing. 318 | (1) apply L2 Normalization. 319 | (2) apply PCA and Whitening. 320 | (3) apply L2 Normalization once again. 321 | Args: 322 | descriptors: (w x h, fmap_depth) descriptor Tensor. 323 | Retturn: 324 | descriptors: (w x h, pca_dims) desciptor Tensor. 325 | ''' 326 | 327 | locations = CalculateKeypointCenters(boxes) 328 | 329 | # L2 Normalization. 330 | descriptors = descriptors.squeeze() 331 | l2_norm = descriptors.norm(p=2, dim=1, keepdim=True) # (1, w x h) 332 | descriptors = descriptors.div(l2_norm.expand_as(descriptors)) # (N, w x h) 333 | 334 | if use_pca: 335 | # apply PCA and Whitening. 336 | descriptors = ApplyPcaAndWhitening( 337 | descriptors, 338 | pca_matrix, 339 | pca_mean, 340 | pca_vars, 341 | pca_dims, 342 | True) 343 | # L2 Normalization (we found L2 Norm is not helpful. DO NOT UNCOMMENT THIS.) 344 | #descriptors = descriptors.view(descriptors.size(0), -1) # (N, w x h) 345 | #l2_norm = descriptors.norm(p=2, dim=0, keepdim=True) # (1, w x h) 346 | #descriptors = descriptors.div(l2_norm.expand_as(descriptors)) # (N, w x h) 347 | 348 | return descriptors 349 | 350 | 351 | # Original author: Francisco Massa: 352 | # https://github.com/fmassa/object-detection.torch 353 | # Ported to PyTorch by Max deGroot (02/01/2017) 354 | def nms(boxes, scores, overlap=0.5, top_k=200): 355 | """Apply non-maximum suppression at test time to avoid detecting too many 356 | overlapping bounding boxes for a given object. 357 | Args: 358 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 359 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 360 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 361 | top_k: (int) The Maximum number of box preds to consider. 362 | Returns: 363 | The indices of the kept boxes with respect to num_priors. 364 | """ 365 | 366 | keep = scores.new(scores.size(0)).zero_().long() 367 | if boxes.numel() == 0: 368 | return keep 369 | y1 = boxes[:, 0] 370 | x1 = boxes[:, 1] 371 | y2 = boxes[:, 2] 372 | x2 = boxes[:, 3] 373 | area = torch.mul(x2 - x1, y2 - y1) 374 | v, idx = scores.sort(0) # sort in ascending order 375 | # I = I[v >= 0.01] 376 | idx = idx[-top_k:] # indices of the top-k largest vals 377 | xx1 = boxes.new() 378 | yy1 = boxes.new() 379 | xx2 = boxes.new() 380 | yy2 = boxes.new() 381 | w = boxes.new() 382 | h = boxes.new() 383 | 384 | # keep = torch.Tensor() 385 | count = 0 386 | while idx.numel() > 0: 387 | i = idx[-1] # index of current largest val 388 | # keep.append(i) 389 | keep[count] = i 390 | count += 1 391 | if idx.size(0) == 1: 392 | break 393 | idx = idx[:-1] # remove kept element from view 394 | # load bboxes of next highest vals 395 | torch.index_select(x1, 0, idx, out=xx1) 396 | torch.index_select(y1, 0, idx, out=yy1) 397 | torch.index_select(x2, 0, idx, out=xx2) 398 | torch.index_select(y2, 0, idx, out=yy2) 399 | # store element-wise max with next highest score 400 | xx1 = torch.clamp(xx1, min=x1[i]) 401 | yy1 = torch.clamp(yy1, min=y1[i]) 402 | xx2 = torch.clamp(xx2, max=x2[i]) 403 | yy2 = torch.clamp(yy2, max=y2[i]) 404 | w.resize_as_(xx2) 405 | h.resize_as_(yy2) 406 | w = xx2 - xx1 407 | h = yy2 - yy1 408 | # check sizes of xx1 and xx2.. after each iteration 409 | w = torch.clamp(w, min=0.0) 410 | h = torch.clamp(h, min=0.0) 411 | inter = w*h 412 | # IoU = i / (area(a) + area(b) - i) 413 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 414 | union = (rem_areas - inter) + area[i] 415 | IoU = inter/union # store result in iou 416 | # keep only elements with an IoU <= overlap 417 | idx = idx[IoU.le(overlap)] 418 | return keep, count 419 | 420 | 421 | 422 | -------------------------------------------------------------------------------- /helper/feeder.py: -------------------------------------------------------------------------------- 1 | '''feeder.py 2 | ''' 3 | 4 | import os, sys, time 5 | sys.path.append('../') 6 | import argparse 7 | 8 | from PIL import Image 9 | import h5py 10 | import torch 11 | import torchvision.transforms as transforms 12 | 13 | import helper.delf_helper as delf_helper 14 | from train.delf import Delf_V1 15 | 16 | __DEBUG__ = False 17 | 18 | def __cuda__(x): 19 | if torch.cuda.is_available(): 20 | return x.cuda() 21 | else: 22 | return x 23 | 24 | def __build_delf_config__(data): 25 | parser = argparse.ArgumentParser('delf-config') 26 | parser.add_argument('--stage', type=str, default='inference') 27 | parser.add_argument('--expr', type=str, default='dummy') 28 | parser.add_argument('--ncls', type=str, default='dummy') 29 | parser.add_argument('--use_random_gamma_rescale', type=str, default=False) 30 | parser.add_argument('--arch', type=str, default=data['ARCH']) 31 | parser.add_argument('--load_from', type=str, default=data['LOAD_FROM']) 32 | parser.add_argument('--target_layer', type=str, default=data['TARGET_LAYER']) 33 | delf_config, _ = parser.parse_known_args() 34 | 35 | # print config. 36 | state = {k: v for k, v in delf_config._get_kwargs()} 37 | print(state) 38 | return delf_config 39 | 40 | 41 | class Feeder(): 42 | def __init__(self, 43 | feeder_config): 44 | # environment setting. 45 | os.environ['CUDA_VISIBLE_DEVICES'] = str(feeder_config.get('GPU_ID')) 46 | 47 | # parameters. 48 | self.iou_thres = feeder_config.get('IOU_THRES') 49 | self.attn_thres = feeder_config.get('ATTN_THRES') 50 | self.top_k = feeder_config.get('TOP_K') 51 | self.target_layer = feeder_config.get('TARGET_LAYER') 52 | self.scale_list = feeder_config.get('SCALE_LIST') 53 | self.workers = feeder_config.get('WORKERS') 54 | 55 | # load pytorch model 56 | print('load DeLF pytorch model...') 57 | delf_config = __build_delf_config__(feeder_config) 58 | self.model = Delf_V1( 59 | ncls = delf_config.ncls, 60 | load_from = delf_config.load_from, 61 | arch = delf_config.arch, 62 | stage = delf_config.stage, 63 | target_layer = delf_config.target_layer, 64 | use_random_gamma_rescale = False) 65 | self.model.eval() 66 | self.model = __cuda__(self.model) 67 | 68 | # load pca matrix 69 | print('load PCA parameters...') 70 | h5file = h5py.File(feeder_config.get('PCA_PARAMETERS_PATH'), 'r') 71 | self.pca_mean = h5file['.']['pca_mean'].value 72 | self.pca_vars = h5file['.']['pca_vars'].value 73 | self.pca_matrix = h5file['.']['pca_matrix'].value 74 | self.pca_dims = feeder_config.get('PCA_DIMS') 75 | self.use_pca = feeder_config.get('USE_PCA') 76 | 77 | # !!! stride value in tensorflow inference code is not applicable for pytorch, because pytorch works differently. 78 | # !!! make sure to use stride=16 for target_layer=='layer3'. 79 | if self.target_layer in ['layer3']: 80 | self.fmap_depth = 1024 81 | self.rf = 291.0 82 | self.stride = 16.0 83 | self.padding = 145.0 84 | elif self.target_layer in ['layer4']: 85 | self.fmap_depth = 2048 86 | self.rf = 483.0 87 | self.stride = 32.0 88 | self.padding = 241.0 89 | else: 90 | raise ValueError('Unsupported target_layer: {}'.format(self.target_layer)) 91 | 92 | 93 | def __resize_image__(self, image, target_size): 94 | return 'resize image.' 95 | 96 | def __transform__(self, image): 97 | transform = transforms.ToTensor() 98 | return transform(image) 99 | 100 | def __print_result__(self, data): 101 | print('----------------------------------------------------------') 102 | print('filename: ', data['filename']) 103 | print("location_np_list shape: ", data['location_np_list'].shape) 104 | print("descriptor_np_list shape: ", data['descriptor_np_list'].shape) 105 | print("feature_scale_np_list shape: ", data['feature_scale_np_list'].shape) 106 | print("attention_score_np_list shape: ", data['attention_score_np_list'].shape) 107 | print("attention_np_list shape: ", data['attention_np_list'].shape) 108 | print('----------------------------------------------------------') 109 | 110 | def __get_result__(self, 111 | path, 112 | image): 113 | # load tensor image 114 | x = __cuda__(self.__transform__(image)) 115 | x = x.unsqueeze(0) 116 | 117 | # extract feature. 118 | data = delf_helper.GetDelfFeatureFromMultiScale( 119 | x = x, 120 | model = self.model, 121 | filename = path, 122 | pca_mean = self.pca_mean, 123 | pca_vars = self.pca_vars, 124 | pca_matrix = self.pca_matrix, 125 | pca_dims = self.pca_dims, 126 | rf = self.rf, 127 | stride = self.stride, 128 | padding = self.padding, 129 | top_k = self.top_k, 130 | scale_list = self.scale_list, 131 | iou_thres = self.iou_thres, 132 | attn_thres = self.attn_thres, 133 | use_pca = self.use_pca, 134 | workers = self.workers) 135 | 136 | if __DEBUG__: 137 | self.__print_result__(data) 138 | return data 139 | 140 | def feed(self, pil_image, filename='dummy'): 141 | return self.__get_result__(filename, pil_image) 142 | 143 | def feed_to_compare(self, query_path, pil_image): 144 | '''feed_to_compare 145 | used to visualize mathcing between two query images. 146 | ''' 147 | assert len(pil_image)==2, 'length of query list should be 2.' 148 | outputs = [] 149 | for i in range(2): 150 | outputs.append(self.__get_result__(query_path[i], pil_image[i])) 151 | return outputs 152 | 153 | -------------------------------------------------------------------------------- /helper/matcher.py: -------------------------------------------------------------------------------- 1 | '''matcher.py 2 | Matches two images using their DELF features. 3 | The matching is done using feature-based nearest-neighbor search, followed by 4 | geometric verification using RANSAC. 5 | The DELF features can be extracted using the extract_features.py script. 6 | ''' 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os, sys, time 12 | 13 | import numpy as np 14 | from PIL import Image 15 | import io 16 | from io import BytesIO 17 | import matplotlib.image as mpimg 18 | import matplotlib.pyplot as plt 19 | from scipy.spatial import cKDTree 20 | from skimage.feature import plot_matches 21 | from skimage.measure import ransac 22 | from skimage.transform import AffineTransform 23 | 24 | import cv2 25 | 26 | _DISTANCE_THRESHOLD = 3.4 # Adjust this value depending on your dataset. 27 | # This value needs to be engineered for optimized result. 28 | IMAGE_SIZE = (16, 12) 29 | 30 | def load_image_into_numpy_array(image): 31 | if image.mode == "P": # PNG palette mode 32 | image = image.convert('RGBA') 33 | # image.palette = None # PIL Bug Workaround 34 | 35 | (im_width, im_height) = image.size 36 | imgarray = np.asarray(image).reshape( 37 | (im_height, im_width, -1)).astype(np.uint8) 38 | 39 | return imgarray[:, :, :3] # truncate alpha channel if exists. 40 | 41 | def read_image(image_path): 42 | with open(image_path, 'rb') as image_fp: 43 | image = Image.open(image_fp) 44 | # the array based representation of the image will be used later in order to prepare the 45 | # result image with boxes and labels on it. 46 | image_np = load_image_into_numpy_array(image) 47 | return image_np 48 | 49 | def get_inliers(locations_1, descriptors_1, locations_2, descriptors_2): 50 | 51 | num_features_1 = locations_1.shape[0] 52 | num_features_2 = locations_2.shape[0] 53 | 54 | # Find nearest-neighbor matches using a KD tree. 55 | d1_tree = cKDTree(descriptors_1) 56 | distances, indices = d1_tree.query( 57 | descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD) 58 | 59 | # Select feature locations for putative matches. 60 | locations_2_to_use = np.array([ 61 | locations_2[i,] for i in range(num_features_2) 62 | if indices[i] != num_features_1 63 | ]) 64 | locations_1_to_use = np.array([ 65 | locations_1[indices[i],] for i in range(num_features_2) 66 | if indices[i] != num_features_1 67 | ]) 68 | 69 | # Perform geometric verification using RANSAC. 70 | model_robust, inliers = ransac( 71 | (locations_1_to_use, locations_2_to_use), 72 | AffineTransform, 73 | min_samples=3, 74 | residual_threshold=20, 75 | max_trials=1000) 76 | return inliers, locations_1_to_use, locations_2_to_use 77 | 78 | 79 | def get_attention_image_byte(att_score): 80 | print('attn_score shape: {}'.format(att_score.shape)) 81 | attention_np = np.squeeze(att_score, (0, 1)).astype(np.uint8) 82 | 83 | im = Image.fromarray(np.dstack((attention_np, attention_np, attention_np))) 84 | buf = io.BytesIO() 85 | im.save(buf, 'PNG') 86 | return buf.getvalue() 87 | 88 | 89 | def get_ransac_image_byte(img_1, locations_1, descriptors_1, img_2, locations_2, descriptors_2, save_path=None, use_opencv_match_vis=True): 90 | """ 91 | Args: 92 | img_1: image bytes. JPEG, PNG 93 | img_2: image bytes. JPEG, PNG 94 | Return: 95 | ransac result PNG image as byte 96 | score: number of matching inlier 97 | """ 98 | 99 | # Convert image byte to 3 channel numpy array 100 | with Image.open(BytesIO(img_1)) as img: 101 | img_1 = load_image_into_numpy_array(img) 102 | with Image.open(BytesIO(img_2)) as img: 103 | img_2 = load_image_into_numpy_array(img) 104 | 105 | inliers, locations_1_to_use, locations_2_to_use = get_inliers( 106 | locations_1, 107 | descriptors_1, 108 | locations_2, 109 | descriptors_2) 110 | 111 | # Visualize correspondences, and save to file. 112 | #fig, ax = plt.subplots(figsize=IMAGE_SIZE) 113 | inlier_idxs = np.nonzero(inliers)[0] 114 | score = sum(inliers) 115 | if score is None: 116 | score = 0 117 | 118 | 119 | if use_opencv_match_vis: 120 | inlier_matches = [] 121 | for idx in inlier_idxs: 122 | inlier_matches.append(cv2.DMatch(idx, idx, 0)) 123 | 124 | kp1 =[] 125 | for point in locations_1_to_use: 126 | kp = cv2.KeyPoint(point[1], point[0], _size=1) 127 | kp1.append(kp) 128 | 129 | kp2 =[] 130 | for point in locations_2_to_use: 131 | kp = cv2.KeyPoint(point[1], point[0], _size=1) 132 | kp2.append(kp) 133 | 134 | 135 | ransac_img = cv2.drawMatches(img_1, kp1, img_2, kp2, inlier_matches, None, flags=0) 136 | ransac_img = cv2.cvtColor(ransac_img, cv2.COLOR_BGR2RGB) 137 | image_byte = cv2.imencode('.png', ransac_img)[1].tostring() 138 | 139 | else: 140 | plot_matches( 141 | ax, 142 | img_1, 143 | img_2, 144 | locations_1_to_use, 145 | locations_2_to_use, 146 | np.column_stack((inlier_idxs, inlier_idxs)), 147 | matches_color='b') 148 | ax.axis('off') 149 | extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) 150 | buf = io.BytesIO() 151 | fig.savefig(buf, bbox_inches=extent, format='png') 152 | plt.close('all') # close resources. 153 | image_byte = buf.getvalue() 154 | 155 | return image_byte, score 156 | 157 | 158 | -------------------------------------------------------------------------------- /static/attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nashory/DeLF-pytorch/5577a447a0330b9e976cff56a10fc91669216b8c/static/attn.png -------------------------------------------------------------------------------- /static/glr1k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nashory/DeLF-pytorch/5577a447a0330b9e976cff56a10fc91669216b8c/static/glr1k.png -------------------------------------------------------------------------------- /static/glr2k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nashory/DeLF-pytorch/5577a447a0330b9e976cff56a10fc91669216b8c/static/glr2k.png -------------------------------------------------------------------------------- /static/ldmk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nashory/DeLF-pytorch/5577a447a0330b9e976cff56a10fc91669216b8c/static/ldmk.png -------------------------------------------------------------------------------- /static/vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nashory/DeLF-pytorch/5577a447a0330b9e976cff56a10fc91669216b8c/static/vis.png -------------------------------------------------------------------------------- /train/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.sh 3 | 4 | -------------------------------------------------------------------------------- /train/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | config.py 3 | """ 4 | 5 | import argparse 6 | import time 7 | import torchvision.models as models 8 | 9 | # helper func. 10 | def str2bool(v): 11 | return v.lower() in ('true', '1') 12 | 13 | 14 | # Parser 15 | parser = argparse.ArgumentParser('delf') 16 | 17 | # Common options. 18 | parser.add_argument('--gpu_id', 19 | default='4', 20 | type=str, 21 | help='id(s) for CUDA_VISIBLE_DEVICES') 22 | parser.add_argument('--manualSeed', 23 | type=int, 24 | default=int(time.time()), 25 | help='manual seed') 26 | # Experiment 27 | parser.add_argument('--expr', 28 | default='devel', 29 | type=str, 30 | help='experiment name') 31 | parser.add_argument('--load_from', 32 | default='dummy', 33 | type=str, 34 | help='from which experiment the model be loaded') 35 | # Datasets 36 | parser.add_argument('--stage', 37 | default='finetune', 38 | type=str, 39 | help='target stage: finetune | keypoint') 40 | parser.add_argument('--train_path_for_pretraining', 41 | default='../../data/landmarks/landmarks_full_train', 42 | type=str) 43 | parser.add_argument('--val_path_for_pretraining', 44 | default='../../data/landmarks/landmarks_full_val', 45 | type=str) 46 | parser.add_argument('--train_path_for_finetuning', 47 | default='../../data/landmarks/landmarks_clean_train', 48 | type=str) 49 | parser.add_argument('--val_path_for_finetuning', 50 | default='../../data/landmarks/landmarks_clean_val', 51 | type=str) 52 | parser.add_argument('--workers', 53 | default=20, 54 | type=int, 55 | help='number of data loading workers (default: 4)') 56 | # preprocessing 57 | parser.add_argument('--finetune_sample_size', 58 | default=256, 59 | type=int, 60 | help='finetune resize (default: 256)') 61 | parser.add_argument('--finetune_crop_size', 62 | default=224, 63 | type=int, 64 | help='finetune crop (default: 224)') 65 | parser.add_argument('--keypoint_sample_size', 66 | default=900, 67 | type=int, 68 | help='keypoint (default: 900)') 69 | parser.add_argument('--keypoint_crop_size', 70 | default=720, 71 | type=int, 72 | help='keypoint (default: 720)') 73 | parser.add_argument('--target_layer', 74 | default='layer3', 75 | type=str, 76 | help='target layer you wish to extract local features from: layer3 | layer4') 77 | parser.add_argument('--use_random_gamma_rescale', 78 | type=str2bool, 79 | default=True, 80 | help='apply gamma rescaling in range of [0.3535, 1.0]') 81 | # training parameters 82 | parser.add_argument('--finetune_epoch', 83 | default=30, 84 | type=int, 85 | help='number of total epochs for finetune stage.') 86 | parser.add_argument('--keypoint_epoch', 87 | default=30, 88 | type=int, 89 | help='number of total epochs for keypoint stage.') 90 | parser.add_argument('--lr', 91 | default=0.008, 92 | type=float, 93 | help='initial learning rate') 94 | parser.add_argument('--lr_gamma', 95 | default=0.5, 96 | type=float, 97 | help='decay factor of learning rate') 98 | parser.add_argument('--lr_stepsize', 99 | default=10, 100 | type=int, 101 | help='decay learning rate at every specified epoch.') 102 | parser.add_argument('--weight_decay', 103 | default=0.0001, 104 | type=float, 105 | help='weight decay (l2 penalty)') 106 | parser.add_argument('--optim', 107 | default='sgd', 108 | type=str, 109 | help='optimizer: rmsprop | sgd | adam') 110 | parser.add_argument('--train_batch_size', 111 | default=8, 112 | type=int, 113 | help='train batchsize (default: 16)') 114 | parser.add_argument('--val_batch_size', 115 | default=8, 116 | type=int, 117 | help='val batchsize (default: 16)') 118 | parser.add_argument('--ncls', 119 | default=586, 120 | type=int, 121 | help='number of classes') 122 | parser.add_argument('--lr_decay', 123 | default=0.5, 124 | type=float, 125 | help='lr decay factor') 126 | parser.add_argument('--arch', 127 | metavar='ARCH', 128 | default='resnet50', 129 | choices=['resnet50, resnet101, resnet152'], 130 | help='only support resnet50 at the moment.') 131 | 132 | ## parse and save config. 133 | config, _ = parser.parse_known_args() 134 | 135 | 136 | -------------------------------------------------------------------------------- /train/dataloader.py: -------------------------------------------------------------------------------- 1 | 2 | #-*- coding: utf-8 -*- 3 | 4 | ''' 5 | dataloader.py 6 | ''' 7 | 8 | import sys, os, time 9 | 10 | import torch 11 | import torchvision.transforms as transforms 12 | import torchvision.datasets as datasets 13 | from PIL import Image, ImageFile 14 | Image.MAX_IMAGE_PIXELS = 1000000000 # to avoid error "https://github.com/zimeon/iiif/issues/11" 15 | Image.warnings.simplefilter('error', Image.DecompressionBombWarning) 16 | ImageFile.LOAD_TRUNCATED_IMAGES = True # to avoid error "https://github.com/python-pillow/Pillow/issues/1510" 17 | 18 | def get_loader( 19 | train_path, 20 | val_path, 21 | stage, 22 | train_batch_size, 23 | val_batch_size, 24 | sample_size, 25 | crop_size, 26 | workers): 27 | 28 | if stage in ['finetune']: 29 | # for train 30 | prepro = [] 31 | prepro.append(transforms.Resize(size=sample_size)) 32 | prepro.append(transforms.CenterCrop(size=sample_size)) 33 | prepro.append(transforms.RandomCrop(size=crop_size, padding=0)) 34 | prepro.append(transforms.RandomHorizontalFlip()) 35 | #prepro.append(transforms.RandomRotation((-15, 15))) # experimental. 36 | prepro.append(transforms.ToTensor()) 37 | train_transform = transforms.Compose(prepro) 38 | train_path = train_path 39 | 40 | # for val 41 | prepro = [] 42 | prepro.append(transforms.Resize(size=sample_size)) 43 | prepro.append(transforms.CenterCrop(size=crop_size)) 44 | prepro.append(transforms.ToTensor()) 45 | val_transform = transforms.Compose(prepro) 46 | val_path = val_path 47 | 48 | elif stage in ['keypoint']: 49 | # for train 50 | prepro = [] 51 | prepro.append(transforms.Resize(size=sample_size)) 52 | prepro.append(transforms.CenterCrop(size=sample_size)) 53 | prepro.append(transforms.RandomCrop(size=crop_size, padding=0)) 54 | prepro.append(transforms.RandomHorizontalFlip()) 55 | #prepro.append(transforms.RandomRotation((-15, 15))) # experimental. 56 | prepro.append(transforms.ToTensor()) 57 | train_transform = transforms.Compose(prepro) 58 | train_path = train_path 59 | 60 | # for val 61 | prepro = [] 62 | prepro.append(transforms.Resize(size=sample_size)) 63 | prepro.append(transforms.CenterCrop(size=crop_size)) 64 | prepro.append(transforms.ToTensor()) 65 | val_transform = transforms.Compose(prepro) 66 | val_path = val_path 67 | 68 | # image folder dataset. 69 | train_dataset = datasets.ImageFolder(root = train_path, 70 | transform = train_transform) 71 | val_dataset = datasets.ImageFolder(root = val_path, 72 | transform = val_transform) 73 | 74 | # return train/val dataloader 75 | train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 76 | batch_size = train_batch_size, 77 | shuffle = True, 78 | num_workers = workers) 79 | val_loader = torch.utils.data.DataLoader(dataset = val_dataset, 80 | batch_size = val_batch_size, 81 | shuffle = False, 82 | num_workers = workers) 83 | 84 | return train_loader, val_loader 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /train/delf.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import os, sys, time 9 | sys.path.append('../') 10 | import random 11 | import logging 12 | import copy 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.functional as F 17 | import torchvision.models as models 18 | import torchvision.transforms as transforms 19 | from torch.autograd import Variable 20 | 21 | from train.layers import ( 22 | CMul, 23 | Flatten, 24 | ConcatTable, 25 | Identity, 26 | Reshape, 27 | SpatialAttention2d, 28 | WeightedSum2d) 29 | 30 | 31 | ''' helper functions 32 | ''' 33 | 34 | def __unfreeze_weights__(module_dict, freeze=[]): 35 | for _, v in enumerate(freeze): 36 | module = module_dict[v] 37 | for param in module.parameters(): 38 | param.requires_grad = True 39 | 40 | def __freeze_weights__(module_dict, freeze=[]): 41 | for _, v in enumerate(freeze): 42 | module = module_dict[v] 43 | for param in module.parameters(): 44 | param.requires_grad = False 45 | 46 | def __print_freeze_status__(model): 47 | '''print freeze stagus. only for debugging purpose. 48 | ''' 49 | for i, module in enumerate(model.named_children()): 50 | for param in module[1].parameters(): 51 | print('{}:{}'.format(module[0], str(param.requires_grad))) 52 | 53 | def __load_weights_from__(module_dict, load_dict, modulenames): 54 | for modulename in modulenames: 55 | module = module_dict[modulename] 56 | print('loaded weights from module "{}" ...'.format(modulename)) 57 | module.load_state_dict(load_dict[modulename]) 58 | 59 | def __deep_copy_module__(module, exclude=[]): 60 | modules = {} 61 | for name, m in module.named_children(): 62 | if name not in exclude: 63 | modules[name] = copy.deepcopy(m) 64 | print('deep copied weights from layer "{}" ...'.format(name)) 65 | return modules 66 | 67 | def __cuda__(model): 68 | if torch.cuda.is_available(): 69 | model.cuda() 70 | return model 71 | 72 | 73 | '''Delf 74 | ''' 75 | 76 | class Delf_V1(nn.Module): 77 | def __init__( 78 | self, 79 | ncls=None, 80 | load_from=None, 81 | arch='resnet50', 82 | stage='inference', 83 | target_layer='layer3', 84 | use_random_gamma_rescale=False): 85 | 86 | super(Delf_V1, self).__init__() 87 | 88 | self.arch = arch 89 | self.stage = stage 90 | self.target_layer = target_layer 91 | self.load_from = load_from 92 | self.use_random_gamma_rescale = use_random_gamma_rescale 93 | 94 | self.module_list = nn.ModuleList() 95 | self.module_dict = {} 96 | self.end_points = {} 97 | 98 | in_c = self.__get_in_c__() 99 | if self.stage in ['finetune']: 100 | use_pretrained_base = True 101 | exclude = ['avgpool', 'fc'] 102 | 103 | elif self.stage in ['keypoint']: 104 | use_pretrained_base = False 105 | self.use_l2_normalized_feature = True 106 | if self.target_layer in ['layer3']: 107 | exclude = ['layer4', 'avgpool', 'fc'] 108 | if self.target_layer in ['layer4']: 109 | exclude = ['avgpool', 'fc'] 110 | 111 | else: 112 | assert self.stage in ['inference'] 113 | use_pretrained_base = False 114 | self.use_l2_normalized_feature = True 115 | if self.target_layer in ['layer3']: 116 | exclude = ['layer4', 'avgpool', 'fc'] 117 | if self.target_layer in ['layer4']: 118 | exclude = ['avgpool', 'fc'] 119 | 120 | if self.arch in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']: 121 | print('[{}] loading {} pretrained ImageNet weights ... It may take few seconds...' 122 | .format(self.stage, self.arch)) 123 | module = models.__dict__[self.arch](pretrained=use_pretrained_base) 124 | module_state_dict = __deep_copy_module__(module, exclude=exclude) 125 | module = None 126 | 127 | # endpoint: base 128 | submodules = [] 129 | submodules.append(module_state_dict['conv1']) 130 | submodules.append(module_state_dict['bn1']) 131 | submodules.append(module_state_dict['relu']) 132 | submodules.append(module_state_dict['maxpool']) 133 | submodules.append(module_state_dict['layer1']) 134 | submodules.append(module_state_dict['layer2']) 135 | submodules.append(module_state_dict['layer3']) 136 | self.__register_module__('base', submodules) 137 | 138 | # build structure. 139 | if self.stage in ['finetune']: 140 | # endpoint: layer4, pool 141 | self.__register_module__('layer4', module_state_dict['layer4']) 142 | self.__register_module__('pool', nn.AvgPool2d( 143 | kernel_size=7, stride=1, padding=0, 144 | ceil_mode=False, count_include_pad=True)) 145 | elif self.stage in ['keypoint', 'inference']: 146 | # endpoint: attn, pool 147 | self.__register_module__('attn', SpatialAttention2d(in_c=in_c, act_fn='relu')) 148 | self.__register_module__('pool', WeightedSum2d()) 149 | 150 | 151 | if self.stage not in ['inference']: 152 | # endpoint: logit 153 | submodules = [] 154 | submodules.append(nn.Conv2d(in_c, ncls, 1)) 155 | submodules.append(Flatten()) 156 | self.__register_module__('logits', submodules) 157 | 158 | # load weights. 159 | if self.stage in ['keypoint']: 160 | load_dict = torch.load(self.load_from) 161 | __load_weights_from__(self.module_dict, load_dict, modulenames=['base']) 162 | __freeze_weights__(self.module_dict, freeze=['base']) 163 | print('load model from "{}"'.format(load_from)) 164 | elif self.stage in ['inference']: 165 | load_dict = torch.load(self.load_from) 166 | __load_weights_from__(self.module_dict, load_dict, modulenames=['base','attn','pool']) 167 | print('load model from "{}"'.format(load_from)) 168 | 169 | 170 | def __register_module__(self, modulename, module): 171 | if isinstance(module, list) or isinstance(module, tuple): 172 | module = nn.Sequential(*module) 173 | self.module_list.append(module) 174 | self.module_dict[modulename] = module 175 | 176 | def __get_in_c__(self): 177 | # adjust input channels according to arch. 178 | if self.arch in ['resnet18', 'resnet34']: 179 | in_c = 512 180 | elif self.arch in ['resnet50', 'resnet101', 'resnet152']: 181 | if self.stage in ['finetune']: 182 | in_c = 2048 183 | elif self.stage in ['keypoint', 'inference']: 184 | if self.target_layer in ['layer3']: 185 | in_c = 1024 186 | elif self.target_layer in ['layer4']: 187 | in_c = 2048 188 | return in_c 189 | 190 | def __forward_and_save__(self, x, modulename): 191 | module = self.module_dict[modulename] 192 | x = module(x) 193 | self.end_points[modulename] = x 194 | return x 195 | 196 | def __forward_and_save_feature__(self, x, model, name): 197 | x = model(x) 198 | self.end_points[name] = x.data 199 | return x 200 | 201 | def __gamma_rescale__(self, x, min_scale=0.3535, max_scale=1.0): 202 | '''max_scale > 1.0 may cause training failure. 203 | ''' 204 | h, w = x.size(2), x.size(3) 205 | assert w == h, 'input must be square image.' 206 | gamma = random.uniform(min_scale, max_scale) 207 | new_h, new_w = int(h*gamma), int(w*gamma) 208 | x = F.upsample(x, size=(new_h, new_w), mode='bilinear') 209 | return x 210 | 211 | def get_endpoints(self): 212 | return self.end_points 213 | 214 | def get_feature_at(self, modulename): 215 | return copy.deepcopy(self.end_points[modulename].data.cpu()) 216 | 217 | def write_to(self, state): 218 | if self.stage in ['finetune']: 219 | state['base'] = self.module_dict['base'].state_dict() 220 | state['layer4'] = self.module_dict['layer4'].state_dict() 221 | state['pool'] = self.module_dict['pool'].state_dict() 222 | state['logits'] = self.module_dict['logits'].state_dict() 223 | elif self.stage in ['keypoint']: 224 | state['base'] = self.module_dict['base'].state_dict() 225 | state['attn'] = self.module_dict['attn'].state_dict() 226 | state['pool'] = self.module_dict['pool'].state_dict() 227 | state['logits'] = self.module_dict['logits'].state_dict() 228 | else: 229 | assert self.stage in ['inference'] 230 | raise ValueError('inference does not support model saving!') 231 | 232 | def forward_for_serving(self, x): 233 | ''' 234 | This function directly returns attention score and raw features 235 | without saving to endpoint dict. 236 | ''' 237 | x = self.__forward_and_save__(x, 'base') 238 | if self.target_layer in ['layer4']: 239 | x = self.__forward_and_save__(x, 'layer4') 240 | ret_x = x 241 | if self.use_l2_normalized_feature: 242 | attn_x = F.normalize(x, p=2, dim=1) 243 | else: 244 | attn_x = x 245 | attn_score = self.__forward_and_save__(x, 'attn') 246 | ret_s = attn_score 247 | return ret_x.data.cpu(), ret_s.data.cpu() 248 | 249 | def forward(self, x): 250 | if self.stage in ['finetune']: 251 | x = self.__forward_and_save__(x, 'base') 252 | x = self.__forward_and_save__(x, 'layer4') 253 | x = self.__forward_and_save__(x, 'pool') 254 | x = self.__forward_and_save__(x, 'logits') 255 | elif self.stage in ['keypoint']: 256 | if self.use_random_gamma_rescale: 257 | x = self.__gamma_rescale__(x) 258 | x = self.__forward_and_save__(x, 'base') 259 | if self.target_layer in ['layer4']: 260 | x = self.__forward_and_save__(x, 'layer4') 261 | if self.use_l2_normalized_feature: 262 | attn_x = F.normalize(x, p=2, dim=1) 263 | else: 264 | attn_x = x 265 | attn_score = self.__forward_and_save__(x, 'attn') 266 | x = self.__forward_and_save__([attn_x, attn_score], 'pool') 267 | x = self.__forward_and_save__(x, 'logits') 268 | 269 | elif self.stage in ['inference']: 270 | x = self.__forward_and_save__(x, 'base') 271 | if self.target_layer in ['layer4']: 272 | x = self.__forward_and_save__(x, 'layer4') 273 | if self.use_l2_normalized_feature: 274 | attn_x = F.normalize(x, p=2, dim=1) 275 | else: 276 | attn_x = x 277 | attn_score = self.__forward_and_save__(x, 'attn') 278 | x = self.__forward_and_save__([attn_x, attn_score], 'pool') 279 | 280 | else: 281 | raise ValueError('unsupported stage parameter: {}'.format(self.stage)) 282 | return x 283 | 284 | if __name__=="__main__": 285 | pass; 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /train/layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | 9 | '''custom layers 10 | ''' 11 | class Flatten(nn.Module): 12 | def __init__(self): 13 | super(Flatten, self).__init__() 14 | 15 | def forward(self, x): 16 | return x.view(x.size(0), -1) 17 | 18 | def __repr__(self): 19 | return self.__class__.__name__ 20 | 21 | 22 | class ConcatTable(nn.Module): 23 | '''ConcatTable container in Torch7. 24 | ''' 25 | def __init__(self, layer1, layer2): 26 | super(ConcatTable, self).__init__() 27 | self.layer1 = layer1 28 | self.layer2 = layer2 29 | 30 | def forward(self,x): 31 | return [self.layer1(x), self.layer2(x)] 32 | 33 | 34 | class Identity(nn.Module): 35 | ''' 36 | nn.Identity in Torch7. 37 | ''' 38 | def __init__(self): 39 | super(Identity, self).__init__() 40 | def forward(self, x): 41 | return x 42 | def __repr__(self): 43 | return self.__class__.__name__ + ' (skip connection)' 44 | 45 | 46 | class Reshape(nn.Module): 47 | ''' 48 | nn.Reshape in Torch7. 49 | ''' 50 | def __init__(self, shape): 51 | super(Reshape, self).__init__() 52 | self.shape = shape 53 | def forward(self, x): 54 | return x.view(self.shape) 55 | def __repr__(self): 56 | return self.__class__.__name__ + ' (reshape to size: {})'.format(" ".join(str(x) for x in self.shape)) 57 | 58 | 59 | class CMul(nn.Module): 60 | ''' 61 | nn.CMul in Torch7. 62 | ''' 63 | def __init__(self): 64 | super(CMul, self).__init__() 65 | def forward(self, x): 66 | return x[0]*x[1] 67 | def __repr__(self): 68 | return self.__class__.__name__ 69 | 70 | 71 | class WeightedSum2d(nn.Module): 72 | def __init__(self): 73 | super(WeightedSum2d, self).__init__() 74 | def forward(self, x): 75 | x, weights = x 76 | assert x.size(2) == weights.size(2) and x.size(3) == weights.size(3),\ 77 | 'err: h, w of tensors x({}) and weights({}) must be the same.'\ 78 | .format(x.size, weights.size) 79 | y = x * weights # element-wise multiplication 80 | y = y.view(-1, x.size(1), x.size(2) * x.size(3)) # b x c x hw 81 | return torch.sum(y, dim=2).view(-1, x.size(1), 1, 1) # b x c x 1 x 1 82 | def __repr__(self): 83 | return self.__class__.__name__ 84 | 85 | 86 | class SpatialAttention2d(nn.Module): 87 | ''' 88 | SpatialAttention2d 89 | 2-layer 1x1 conv network with softplus activation. 90 | attention score normalization will be added for experiment. 91 | ''' 92 | def __init__(self, in_c, act_fn='relu'): 93 | super(SpatialAttention2d, self).__init__() 94 | self.conv1 = nn.Conv2d(in_c, 512, 1, 1) # 1x1 conv 95 | if act_fn.lower() in ['relu']: 96 | self.act1 = nn.ReLU() 97 | elif act_fn.lower() in ['leakyrelu', 'leaky', 'leaky_relu']: 98 | self.act1 = nn.LeakyReLU() 99 | self.conv2 = nn.Conv2d(512, 1, 1, 1) # 1x1 conv 100 | self.softplus = nn.Softplus(beta=1, threshold=20) # use default setting. 101 | 102 | def forward(self, x): 103 | ''' 104 | x : spatial feature map. (b x c x w x h) 105 | s : softplus attention score 106 | ''' 107 | x = self.conv1(x) 108 | x = self.act1(x) 109 | x = self.conv2(x) 110 | x = self.softplus(x) 111 | return x 112 | 113 | def __repr__(self): 114 | return self.__class__.__name__ 115 | 116 | -------------------------------------------------------------------------------- /train/main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | main.py 3 | ''' 4 | 5 | import os, sys, time 6 | sys.path.append('../') 7 | import shutil 8 | import random 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.parallel 13 | import torch.backends.cudnn as cudnn 14 | import torch.optim as optim 15 | import torch.utils.data as data 16 | import torchvision.transforms as transforms 17 | import torchvision.datasets as datasets 18 | import torchvision.models as models 19 | 20 | from config import config 21 | 22 | 23 | def main(): 24 | # print config. 25 | state = {k: v for k, v in config._get_kwargs()} 26 | print(state) 27 | 28 | # if use cuda. 29 | os.environ['CUDA_VISIBLE_DEVICES'] = config.gpu_id 30 | use_cuda = torch.cuda.is_available() 31 | 32 | # Random seed 33 | if config.manualSeed is None: 34 | config.manualSeed = random.randint(1, 10000) 35 | random.seed(config.manualSeed) 36 | torch.manual_seed(config.manualSeed) 37 | if use_cuda: 38 | torch.cuda.manual_seed_all(config.manualSeed) 39 | torch.backends.cudnn.benchmark = True # speed up training. 40 | 41 | # data loader 42 | from dataloader import get_loader 43 | if config.stage in ['finetune']: 44 | sample_size = config.finetune_sample_size 45 | crop_size = config.finetune_crop_size 46 | elif config.stage in ['keypoint']: 47 | sample_size = config.keypoint_sample_size 48 | crop_size = config.keypoint_crop_size 49 | 50 | # dataloader for pretrain 51 | train_loader_pt, val_loader_pt = get_loader( 52 | train_path = config.train_path_for_pretraining, 53 | val_path = config.val_path_for_pretraining, 54 | stage = config.stage, 55 | train_batch_size = config.train_batch_size, 56 | val_batch_size = config.val_batch_size, 57 | sample_size = sample_size, 58 | crop_size = crop_size, 59 | workers = config.workers) 60 | # dataloader for finetune 61 | train_loader_ft, val_loader_ft = get_loader( 62 | train_path = config.train_path_for_finetuning, 63 | val_path = config.val_path_for_finetuning, 64 | stage = config.stage, 65 | train_batch_size = config.train_batch_size, 66 | val_batch_size = config.val_batch_size, 67 | sample_size = sample_size, 68 | crop_size = crop_size, 69 | workers = config.workers) 70 | 71 | 72 | # load model 73 | from delf import Delf_V1 74 | model = Delf_V1( 75 | ncls = config.ncls, 76 | load_from = config.load_from, 77 | arch = config.arch, 78 | stage = config.stage, 79 | target_layer = config.target_layer, 80 | use_random_gamma_rescale = config.use_random_gamma_rescale) 81 | 82 | # solver 83 | from solver import Solver 84 | solver = Solver(config=config, model=model) 85 | if config.stage in ['finetune']: 86 | epochs = config.finetune_epoch 87 | elif config.stage in ['keypoint']: 88 | epochs = config.keypoint_epoch 89 | 90 | # train/test for N-epochs. (50%: pretain with datasetA, 50%: finetune with datasetB) 91 | for epoch in range(epochs): 92 | if epoch < int(epochs * 0.5): 93 | print('[{:.1f}] load pretrain dataset: {}'.format( 94 | float(epoch) / epochs, 95 | config.train_path_for_pretraining)) 96 | train_loader = train_loader_pt 97 | val_loader = val_loader_pt 98 | else: 99 | print('[{:.1f}] load finetune dataset: {}'.format( 100 | float(epoch) / epochs, 101 | config.train_path_for_finetuning)) 102 | train_loader = train_loader_ft 103 | val_loader = val_loader_ft 104 | 105 | solver.train('train', epoch, train_loader, val_loader) 106 | solver.train('val', epoch, train_loader, val_loader) 107 | 108 | print('Congrats! You just finished DeLF training.') 109 | 110 | 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /train/solver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | PyTorch Implementation of training DeLF feature. 6 | Solver for step 1 (finetune local descriptor) 7 | nashory, 2018.04 8 | ''' 9 | import os, sys, time 10 | import shutil 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.optim as optim 15 | from torch.autograd import Variable 16 | 17 | from utils import Bar, Logger, AverageMeter, compute_precision_top_k, mkdir_p 18 | 19 | '''helper functions. 20 | ''' 21 | def __cuda__(x): 22 | if torch.cuda.is_available(): 23 | return x.cuda() 24 | else: 25 | return x 26 | 27 | def __is_cuda__(): 28 | return torch.cuda.is_available() 29 | 30 | def __to_var__(x, volatile=False): 31 | return Variable(x, volatile=volatile) 32 | 33 | def __to_tensor__(x): 34 | return x.data 35 | 36 | 37 | class Solver(object): 38 | def __init__(self, config, model): 39 | self.state = {k: v for k, v in config._get_kwargs()} 40 | self.config = config 41 | self.epoch = 0 # global epoch. 42 | self.best_acc = 0 # global best accuracy. 43 | self.prefix = os.path.join('repo', config.expr) 44 | 45 | # ship model to cuda 46 | self.model = __cuda__(model) 47 | 48 | # define criterion and optimizer 49 | self.criterion = nn.CrossEntropyLoss() 50 | if config.optim.lower() in ['rmsprop']: 51 | self.optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, self.model.parameters()), 52 | lr=config.lr, 53 | weight_decay=config.weight_decay) 54 | elif config.optim.lower() in ['sgd']: 55 | self.optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), 56 | lr=config.lr, 57 | weight_decay=config.weight_decay) 58 | elif config.optim.lower() in ['adam']: 59 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), 60 | lr=config.lr, 61 | weight_decay=config.weight_decay) 62 | 63 | # decay learning rate by a factor of 0.5 every 10 epochs 64 | self.lr_scheduler = optim.lr_scheduler.StepLR( 65 | self.optimizer, 66 | step_size=config.lr_stepsize, 67 | gamma=config.lr_gamma) 68 | 69 | # create directory to save result if not exist. 70 | self.ckpt_path = os.path.join(self.prefix, config.stage, 'ckpt') 71 | self.log_path = os.path.join(self.prefix, config.stage, 'log') 72 | self.image_path = os.path.join(self.prefix, config.stage, 'image') 73 | mkdir_p(self.ckpt_path) 74 | mkdir_p(self.log_path) 75 | mkdir_p(self.image_path) 76 | 77 | # set logger. 78 | self.logger = {} 79 | self.title = 'DeLF-{}'.format(config.stage.upper()) 80 | self.logger['train'] = Logger(os.path.join(self.prefix, config.stage, 'log/train.log')) 81 | self.logger['val'] = Logger(os.path.join(self.prefix, config.stage, 'log/val.log')) 82 | self.logger['train'].set_names( 83 | ['epoch','lr', 'loss', 'top1_accu', 'top3_accu', 'top5_accu']) 84 | self.logger['val'].set_names( 85 | ['epoch','lr', 'loss', 'top1_accu', 'top3_accu', 'top5_accu']) 86 | 87 | def __exit__(self): 88 | self.train_logger.close() 89 | self.val_logger.close() 90 | 91 | 92 | def __adjust_pixel_range__(self, 93 | x, 94 | range_from=[0,1], 95 | range_to=[-1,1]): 96 | ''' 97 | adjust pixel range from to . 98 | ''' 99 | if not range_from == range_to: 100 | scale = float(range_to[1]-range_to[0])/float(range_from[1]-range_from[0]) 101 | bias = range_to[0]-range_from[0]*scale 102 | x = x.mul(scale).add(bias) 103 | return x 104 | 105 | def __save_checkpoint__(self, state, ckpt='ckpt', filename='checkpoint.pth.tar'): 106 | filepath = os.path.join(ckpt, filename) 107 | torch.save(state, filepath) 108 | 109 | def __solve__(self, mode, epoch, dataloader): 110 | '''solve 111 | mode: train / val 112 | ''' 113 | batch_timer = AverageMeter() 114 | data_timer = AverageMeter() 115 | prec_losses = AverageMeter() 116 | prec_top1 = AverageMeter() 117 | prec_top3 = AverageMeter() 118 | prec_top5 = AverageMeter() 119 | 120 | if mode in ['val']: 121 | pass; 122 | #confusion_matrix = ConusionMeter() 123 | 124 | since = time.time() 125 | bar = Bar('[{}]{}'.format(mode.upper(), self.title), max=len(dataloader)) 126 | for batch_idx, (inputs, labels) in enumerate(dataloader): 127 | # measure data loading time 128 | data_timer.update(time.time() - since) 129 | 130 | # wrap inputs in variable 131 | if mode in ['train']: 132 | if __is_cuda__(): 133 | inputs = inputs.cuda() 134 | labels = labels.cuda(async=True) 135 | inputs = __to_var__(inputs) 136 | labels = __to_var__(labels) 137 | elif mode in ['val']: 138 | if __is_cuda__(): 139 | inputs = inputs.cuda() 140 | labels = labels.cuda(async=True) 141 | inputs = __to_var__(inputs, volatile=True) 142 | labels = __to_var__(labels, volatile=False) 143 | 144 | # forward 145 | outputs = self.model(inputs) 146 | loss = self.criterion(outputs, labels) 147 | 148 | # backward + optimize 149 | if mode in ['train']: 150 | self.optimizer.zero_grad() 151 | loss.backward() 152 | self.optimizer.step() 153 | 154 | # statistics 155 | prec_1, prec_3, prec_5 = compute_precision_top_k( 156 | __to_tensor__(outputs), 157 | __to_tensor__(labels), 158 | top_k=(1,3,5)) 159 | batch_size = inputs.size(0) 160 | prec_losses.update(__to_tensor__(loss)[0], batch_size) 161 | prec_top1.update(prec_1[0], batch_size) 162 | prec_top3.update(prec_3[0], batch_size) 163 | prec_top5.update(prec_5[0], batch_size) 164 | 165 | # measure elapsed time 166 | batch_timer.update(time.time() - since) 167 | since = time.time() 168 | 169 | # progress 170 | log_msg = ('\n[{mode}][epoch:{epoch}][iter:({batch}/{size})]'+ 171 | '[lr:{lr}] loss: {loss:.4f} | top1: {top1:.4f} | ' + 172 | 'top3: {top3:.4f} | top5: {top5:.4f} | eta: ' + 173 | '(data:{dt:.3f}s),(batch:{bt:.3f}s),(total:{tt:})') \ 174 | .format( 175 | mode=mode, 176 | epoch=self.epoch+1, 177 | batch=batch_idx+1, 178 | size=len(dataloader), 179 | lr=self.lr_scheduler.get_lr()[0], 180 | loss=prec_losses.avg, 181 | top1=prec_top1.avg, 182 | top3=prec_top3.avg, 183 | top5=prec_top5.avg, 184 | dt=data_timer.val, 185 | bt=batch_timer.val, 186 | tt=bar.elapsed_td) 187 | print(log_msg) 188 | bar.next() 189 | bar.finish() 190 | 191 | # write to logger 192 | self.logger[mode].append([self.epoch+1, 193 | self.lr_scheduler.get_lr()[0], 194 | prec_losses.avg, 195 | prec_top1.avg, 196 | prec_top3.avg, 197 | prec_top5.avg]) 198 | 199 | # save model 200 | if mode == 'val' and prec_top1.avg > self.best_acc: 201 | print('best_acc={}, new_best_acc={}'.format(self.best_acc, prec_top1.avg)) 202 | self.best_acc = prec_top1.avg 203 | state = { 204 | 'epoch': self.epoch, 205 | 'acc': self.best_acc, 206 | 'optimizer': self.optimizer.state_dict(), 207 | } 208 | self.model.write_to(state) 209 | filename = 'bestshot.pth.tar' 210 | self.__save_checkpoint__(state, ckpt=self.ckpt_path, filename=filename) 211 | 212 | 213 | def train(self, mode, epoch, train_loader, val_loader): 214 | self.epoch = epoch 215 | if mode in ['train']: 216 | self.model.train() 217 | self.lr_scheduler.step() 218 | dataloader = train_loader 219 | else: 220 | assert mode == 'val' 221 | self.model.eval() 222 | dataloader = val_loader 223 | self.__solve__(mode, epoch, dataloader) 224 | 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | useful utils. 4 | """ 5 | from .misc import * 6 | from .logger import * 7 | from .confusionmeter import * 8 | 9 | # progress bar 10 | import os, sys 11 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress")) 12 | from progress.bar import Bar as Bar 13 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | # A simple torch style logger 2 | # (C) Wei YANG 2017 3 | from __future__ import absolute_import 4 | import matplotlib.pyplot as plt 5 | import os 6 | import sys 7 | import numpy as np 8 | 9 | __all__ = ['Logger', 'LoggerMonitor', 'savefig'] 10 | 11 | def savefig(fname, dpi=None): 12 | dpi = 150 if dpi == None else dpi 13 | plt.savefig(fname, dpi=dpi) 14 | 15 | def plot_overlap(logger, names=None): 16 | names = logger.names if names == None else names 17 | numbers = logger.numbers 18 | for _, name in enumerate(names): 19 | x = np.arange(len(numbers[name])) 20 | plt.plot(x, np.asarray(numbers[name])) 21 | return [logger.title + '(' + name + ')' for name in names] 22 | 23 | class Logger(object): 24 | '''Save training process to log file with simple plot function.''' 25 | def __init__(self, fpath, title=None, resume=False): 26 | self.file = None 27 | self.resume = resume 28 | self.title = '' if title == None else title 29 | if fpath is not None: 30 | if resume: 31 | self.file = open(fpath, 'r') 32 | name = self.file.readline() 33 | self.names = name.rstrip().split('\t') 34 | self.numbers = {} 35 | for _, name in enumerate(self.names): 36 | self.numbers[name] = [] 37 | 38 | for numbers in self.file: 39 | numbers = numbers.rstrip().split('\t') 40 | for i in range(0, len(numbers)): 41 | self.numbers[self.names[i]].append(numbers[i]) 42 | self.file.close() 43 | self.file = open(fpath, 'a') 44 | else: 45 | self.file = open(fpath, 'w') 46 | 47 | def set_names(self, names): 48 | if self.resume: 49 | pass 50 | # initialize numbers as empty list 51 | self.numbers = {} 52 | self.names = names 53 | for _, name in enumerate(self.names): 54 | self.file.write(name) 55 | self.file.write('\t') 56 | self.numbers[name] = [] 57 | self.file.write('\n') 58 | self.file.flush() 59 | 60 | 61 | def append(self, numbers): 62 | assert len(self.names) == len(numbers), 'Numbers do not match names' 63 | for index, num in enumerate(numbers): 64 | self.file.write("{0:.6f}".format(num)) 65 | self.file.write('\t') 66 | self.numbers[self.names[index]].append(num) 67 | self.file.write('\n') 68 | self.file.flush() 69 | 70 | def plot(self, names=None): 71 | names = self.names if names == None else names 72 | numbers = self.numbers 73 | for _, name in enumerate(names): 74 | x = np.arange(len(numbers[name])) 75 | plt.plot(x, np.asarray(numbers[name])) 76 | plt.legend([self.title + '(' + name + ')' for name in names]) 77 | plt.grid(True) 78 | 79 | def close(self): 80 | if self.file is not None: 81 | self.file.close() 82 | 83 | class LoggerMonitor(object): 84 | '''Load and visualize multiple logs.''' 85 | def __init__ (self, paths): 86 | '''paths is a distionary with {name:filepath} pair''' 87 | self.loggers = [] 88 | for title, path in paths.items(): 89 | logger = Logger(path, title=title, resume=True) 90 | self.loggers.append(logger) 91 | 92 | def plot(self, names=None): 93 | plt.figure() 94 | plt.subplot(121) 95 | legend_text = [] 96 | for logger in self.loggers: 97 | legend_text += plot_overlap(logger, names) 98 | plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 99 | plt.grid(True) 100 | 101 | if __name__ == '__main__': 102 | # # Example 103 | # logger = Logger('test.txt') 104 | # logger.set_names(['Train loss', 'Valid loss','Test loss']) 105 | 106 | # length = 100 107 | # t = np.arange(length) 108 | # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 109 | # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 110 | # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 111 | 112 | # for i in range(0, length): 113 | # logger.append([train_loss[i], valid_loss[i], test_loss[i]]) 114 | # logger.plot() 115 | 116 | # Example: logger monitor 117 | paths = { 118 | 'resadvnet20':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet20/log.txt', 119 | 'resadvnet32':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet32/log.txt', 120 | 'resadvnet44':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet44/log.txt', 121 | } 122 | 123 | field = ['Valid Acc.'] 124 | 125 | monitor = LoggerMonitor(paths) 126 | monitor.plot(names=field) 127 | savefig('test.eps') -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | '''Some helper functions for PyTorch, including: 2 | - get_mean_and_std: calculate the mean and std value of dataset. 3 | - msr_init: net parameter initialization. 4 | - progress_bar: progress bar mimic xlua.progress. 5 | ''' 6 | 7 | from __future__ import print_function, absolute_import 8 | 9 | import errno 10 | import os 11 | import sys 12 | import time 13 | import math 14 | 15 | import torch.nn as nn 16 | import torch.nn.init as init 17 | from torch.autograd import Variable 18 | 19 | __all__ = ['get_mean_and_std', 'init_params', 'mkdir_p', 'AverageMeter', 'compute_precision_top_k'] 20 | 21 | def compute_precision_top_k(output, target, top_k=(1,)): 22 | """Computes the precision@k for the specified values of k""" 23 | maxk = max(top_k) 24 | batch_size = target.size(0) 25 | 26 | _, pred = output.topk(maxk, 1, True, True) 27 | pred = pred.t() 28 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 29 | 30 | res = [] 31 | for k in top_k: 32 | correct_k = correct[:k].view(-1).float().sum(0) 33 | res.append(correct_k.mul_(100.0 / batch_size)) 34 | return res 35 | 36 | def mkdir_p(path): 37 | '''make dir if not exist''' 38 | try: 39 | os.makedirs(path) 40 | except OSError as exc: # Python >2.5 41 | if exc.errno == errno.EEXIST and os.path.isdir(path): 42 | pass 43 | else: 44 | raise 45 | 46 | class AverageMeter(object): 47 | """Computes and stores the average and current value 48 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 49 | """ 50 | def __init__(self): 51 | self.reset() 52 | 53 | def reset(self): 54 | self.val = 0 55 | self.avg = 0 56 | self.sum = 0 57 | self.count = 0 58 | 59 | def update(self, val, n=1): 60 | self.val = val 61 | self.sum += val * n 62 | self.count += n 63 | self.avg = self.sum / self.count 64 | 65 | 66 | 67 | 68 | --------------------------------------------------------------------------------