├── .gitignore
├── README.md
├── TrainClassif.ipynb
├── convertJupyter.sh
├── create_mean_std_file.py
├── dataset
    ├── ReadImages.ipynb
    ├── ReadImages.py
    ├── __init__.py
    ├── collection.ipynb
    └── collection.py
├── datasetTools.ipynb
├── make_plots.ipynb
├── mean_std.ipynb
├── model
    ├── ModelDefinition.ipynb
    ├── ModelDefinition.py
    ├── RNN.ipynb
    ├── RNN.py
    ├── Untitled.ipynb
    ├── __init__.py
    ├── cours2.ipynb
    ├── custom_modules.py
    ├── nn_utils.py
    ├── siamese.ipynb
    └── siamese.py
├── pre_proc.ipynb
├── pre_process_dataset.py
├── test
    ├── __init__.py
    ├── classif_finetune_test.py
    ├── classif_regions_test.py
    ├── instance_avg.py
    ├── siamese_descriptor_test.py
    └── siamese_regions_test.py
├── train
    ├── __init__.py
    ├── classif_finetune.py
    ├── classif_finetune_p.py
    ├── classif_regions.py
    ├── classif_regions_p.py
    ├── global_p.py
    ├── siamese_descriptor.py
    ├── siamese_descriptor_p.py
    ├── siamese_regions.py
    └── siamese_regions_p.py
├── utils.py
├── utils
    ├── __init__.py
    ├── dataset.py
    ├── general.py
    ├── image.py
    ├── metrics.py
    ├── train_classif.py
    ├── train_general.py
    └── train_siamese.py
└── visualize_cnn.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python (from Github Python gitignore)
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | env/
 8 | build/
 9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | *.manifest
24 | *.spec
25 | pip-log.txt
26 | pip-delete-this-directory.txt
27 | .ipynb_checkpoints
28 | .python-version
29 | .env
30 | .venv
31 | venv/
32 | ENV/
33 | 
34 | # custom
35 | data/
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pytorch models definition and test for image retrieval
 2 | 
 3 | This is the implementation of the paper :
 4 | 
 5 | Portaz, M., Kohl, M., Chevallet, J. P., Quénot, G., & Mulhem, P. (2019). Object instance identification with fully convolutional networks. Multimedia Tools and Applications, 78(3), 2747-2764.
 6 | 
 7 | If you use it, please cite :
 8 | 
 9 | 	@article{portaz2019object,
10 | 		title={Object instance identification with fully convolutional networks},
11 | 		author={Portaz, Maxime and Kohl, Matthias and Chevallet, Jean-Pierre and Qu{\'e}not, Georges and Mulhem, Philippe},
12 | 		journal={Multimedia Tools and Applications},
13 | 		volume={78},
14 | 		number={3},
15 | 		pages={2747--2764},
16 | 		year={2019},
17 | 		publisher={Springer}
18 | 	}
19 | 
20 | ## Test several approaches for images retrieval:
21 | 	* Feature Extraction from Pretrained CNN
22 | 	* Pretrained CNN finetuning
23 | 	* Siamese network from scratch
24 | 	* Siamese network with pretrained network
25 | 
26 | # TrainClassif
27 | 	Finetune a CNN for classification over few examples
28 | 	Finetune only the classifier or the entire network
29 | 
30 | # TrainSiamese
31 | 	Train a siamese network with pairs selection for image retrieval
32 | 


--------------------------------------------------------------------------------
/convertJupyter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | jupyter nbconvert --to python model/ModelDefinition.ipynb
3 | jupyter nbconvert --to python dataset/collection.ipynb
4 | jupyter nbconvert --to python dataset/ReadImages.ipynb
5 | jupyter nbconvert --to python TrainClassif.ipynb
6 | jupyter nbconvert --to python model/siamese.ipynb
7 | jupyter nbconvert --to python trainSiamese.ipynb
8 | jupyter nbconvert --to python model/RNN.ipynb
9 | 


--------------------------------------------------------------------------------
/create_mean_std_file.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | import torch
 4 | import torchvision.transforms as transforms
 5 | import numpy as np
 6 | from utils import get_images_labels, imread_rgb
 7 | from utils import match_label_fou_clean2, match_label_video
 8 | 
 9 | # create the mean std file needed to normalize images of a dataset
10 | 
11 | # path to the training images of the dataset
12 | dataset_path = 'data/pre_proc/CLICIDE_video_224sq'
13 | # file to write the mean and std values to
14 | out_path = 'data/CLICIDE_224sq_train_ms.txt'
15 | # function to match labels, this is not necessary here
16 | match_labels = match_label_video
17 | # if the image size is constant, indicate it in format (C, H, W)
18 | # if the image size is not constant, use None here
19 | image_size = (3, 224, 224)
20 | dataset_full = get_images_labels(dataset_path, match_labels)
21 | 
22 | mean = [0., 0., 0.]
23 | std = [0., 0., 0.]
24 | size = len(dataset_full)
25 | if image_size is not None:
26 |     T = torch.Tensor(size, *(image_size))
27 |     for i, (im, _) in enumerate(dataset_full):
28 |         T[i] = transforms.ToTensor()(imread_rgb(im))
29 |     for i in range(3):
30 |         mean[i] = T[:, i, :, :].mean()
31 |         std[i] = T[:, i, :, :].std()
32 | else:
33 |     # cannot take mean/std of whole dataset tensor.
34 |     # need to compute mean of all pixels and std afterwards, pixel by pixel
35 |     dataset_open = []
36 |     for im, _ in dataset_full:
37 |         im_o = imread_rgb(im) / 255.  # cv2 images are 0-255, torch tensors are 0-1
38 |         im_size = im_o.shape[0] * im_o.shape[1]
39 |         dataset_open.append((im_o, im_size))
40 |         for i in range(3):
41 |             mean[i] += np.sum(im_o[:, :, i]) / (im_size * size)
42 |     for im_o, im_size in dataset_open:
43 |         for i in range(3):
44 |             std[i] += np.sum(np.square(im_o[:, :, i] - mean[i])) / (im_size * size)
45 |     for i in range(3):
46 |         std[i] = np.sqrt(std[i])
47 | 
48 | with open(out_path, 'w') as outfile:
49 |     outfile.write(' '.join(map(repr, mean)))
50 |     outfile.write('\n')
51 |     outfile.write(' '.join(map(repr, std)))
52 |     outfile.write('\n')
53 | 


--------------------------------------------------------------------------------
/dataset/ReadImages.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [],
 12 |    "source": [
 13 |     "from __future__ import division\n",
 14 |     "import glob\n",
 15 |     "import os.path as path\n",
 16 |     "from PIL import Image\n",
 17 |     "import torchvision.transforms as transforms"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "collapsed": true,
 25 |     "deletable": true,
 26 |     "editable": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "def readImagesInCLass(folder='.'):\n",
 31 |     "    \"\"\"\n",
 32 |     "        Read a folder containing images with the structure :\n",
 33 |     "            folder\n",
 34 |     "                --class1\n",
 35 |     "                    --image1\n",
 36 |     "                    --image2\n",
 37 |     "                --class2\n",
 38 |     "                    --image3\n",
 39 |     "                    --image3\n",
 40 |     "        \n",
 41 |     "        Return :\n",
 42 |     "            list of couple : (image, class)\n",
 43 |     "    \"\"\"\n",
 44 |     "    \n",
 45 |     "    exts = ('*.jpg', '*.JPG', '*.JPEG', \"*.png\")\n",
 46 |     "    r = []\n",
 47 |     "    for el in glob.iglob(path.join(folder, '*')):\n",
 48 |     "        if path.isdir(el):\n",
 49 |     "            for ext in exts:\n",
 50 |     "                r.extend( [(im, el.split('/')[-1]) for im in glob.iglob(path.join(el, ext)) ] )\n",
 51 |     "    return r"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "metadata": {
 58 |     "collapsed": true,
 59 |     "deletable": true,
 60 |     "editable": true
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "def readImageswithPattern(folder='.', matchFunc=lambda x:x.split('.')[0]):\n",
 65 |     "    \"\"\"\n",
 66 |     "        Read a folder containing images where the name of the class is in the filename\n",
 67 |     "        the match function should return the class given the filename\n",
 68 |     "        Return :\n",
 69 |     "            list of couple : (image, class)\n",
 70 |     "    \"\"\"\n",
 71 |     "    exts = ('*.jpg', '*.JPG', '*.JPEG', \"*.png\")\n",
 72 |     "    r = []\n",
 73 |     "    for ext in exts:\n",
 74 |     "        r.extend( [(im, matchFunc(im)) for im in glob.iglob(path.join(folder, ext)) ] )\n",
 75 |     "    return r"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 4,
 81 |    "metadata": {
 82 |     "collapsed": false,
 83 |     "deletable": true,
 84 |     "editable": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def openAll(imageList, size=0 ):\n",
 89 |     "    \"\"\"\n",
 90 |     "        Open all images, return a list of PIL images\n",
 91 |     "    \"\"\"\n",
 92 |     "    if size == 0:\n",
 93 |     "        return [Image.open(im) for im, c in imageList]\n",
 94 |     "    else:\n",
 95 |     "        return [Image.open(im).resize(size) for im, c in imageList]\n",
 96 |     "        "
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": true
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "def openDict(imageList, size=(225,225)):\n",
108 |     "    \"\"\"\n",
109 |     "        Open all images, return a dictionnary of (image name : PIL image) and resize as the given size\n",
110 |     "    \"\"\"\n",
111 |     "    return {im: Image.open(im).resize(size) for im, c in imageList}"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 5,
117 |    "metadata": {
118 |     "collapsed": true,
119 |     "deletable": true,
120 |     "editable": true
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "def positiveCouples(dataset):\n",
125 |     "    \"\"\"\n",
126 |     "        Create all positive couples in the dataset\n",
127 |     "    \"\"\"\n",
128 |     "    return [ (im[0], im2[0], 1) for im in dataset for im2 in dataset if im[1]==im2[1]]"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "def negativeCouples(dataset):\n",
140 |     "    \"\"\"\n",
141 |     "        Create all negative couples in the dataset\n",
142 |     "    \"\"\"\n",
143 |     "    return [ (im[0], im2[0], -1) for im in dataset for im2 in dataset if im[1] != im2[1]]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 9,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "def createCouples(dataset):\n",
155 |     "    \"\"\"\n",
156 |     "        Create all couples in the dataset\n",
157 |     "    \"\"\"\n",
158 |     "    return [ (im[0], im2[0], 1) if im[1] == im2[1] else (im[0], im2[0], -1) for im in dataset for im2 in dataset]"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 10,
164 |    "metadata": {
165 |     "collapsed": false
166 |    },
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "27271\n",
173 |       "10502754\n",
174 |       "Nb of p / nb of n : 0.260 %\n",
175 |       "10530025\n"
176 |      ]
177 |     }
178 |    ],
179 |    "source": [
180 |     "if __name__ == '__main__':\n",
181 |     "    dataset = readImageswithPattern('/video/CLICIDE', lambda x:x.split('/')[-1].split('-')[0]) #read Clicide dataset\n",
182 |     "    p = positiveCouples(dataset) #Clicide positives couples\n",
183 |     "    print(len(p)) #should be 27217\n",
184 |     "    n = negativeCouples(dataset) #Clicide negatives couples, all of them\n",
185 |     "    print(len(n)) #should be 10502754 (10M)\n",
186 |     "    print(\"Nb of p / nb of n : %.3f %%\"  % (len(p)/len(n)*100)) #around 0.2% of positive examples\n",
187 |     "    a = createCouples(dataset)\n",
188 |     "    print(len(a))\n",
189 |     "    "
190 |    ]
191 |   }
192 |  ],
193 |  "metadata": {
194 |   "kernelspec": {
195 |    "display_name": "Python 2",
196 |    "language": "python",
197 |    "name": "python2"
198 |   },
199 |   "language_info": {
200 |    "codemirror_mode": {
201 |     "name": "ipython",
202 |     "version": 2
203 |    },
204 |    "file_extension": ".py",
205 |    "mimetype": "text/x-python",
206 |    "name": "python",
207 |    "nbconvert_exporter": "python",
208 |    "pygments_lexer": "ipython2",
209 |    "version": "2.7.9"
210 |   }
211 |  },
212 |  "nbformat": 4,
213 |  "nbformat_minor": 2
214 | }
215 | 


--------------------------------------------------------------------------------
/dataset/ReadImages.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | from __future__ import division
  7 | import glob
  8 | import os.path as path
  9 | from PIL import Image
 10 | import torchvision.transforms as transforms
 11 | 
 12 | 
 13 | # In[2]:
 14 | 
 15 | def readImagesInCLass(folder='.'):
 16 |     """
 17 |         Read a folder containing images with the structure :
 18 |             folder
 19 |                 --class1
 20 |                     --image1
 21 |                     --image2
 22 |                 --class2
 23 |                     --image3
 24 |                     --image3
 25 |         
 26 |         Return :
 27 |             list of couple : (image, class)
 28 |     """
 29 |     
 30 |     exts = ('*.jpg', '*.JPG', '*.JPEG', "*.png")
 31 |     r = []
 32 |     for el in glob.iglob(path.join(folder, '*')):
 33 |         if path.isdir(el):
 34 |             for ext in exts:
 35 |                 r.extend( [(im, el.split('/')[-1]) for im in glob.iglob(path.join(el, ext)) ] )
 36 |     return r
 37 | 
 38 | 
 39 | # In[3]:
 40 | 
 41 | def readImageswithPattern(folder='.', matchFunc=lambda x:x.split('.')[0]):
 42 |     """
 43 |         Read a folder containing images where the name of the class is in the filename
 44 |         the match function should return the class given the filename
 45 |         Return :
 46 |             list of couple : (image, class)
 47 |     """
 48 |     exts = ('*.jpg', '*.JPG', '*.JPEG', "*.png")
 49 |     r = []
 50 |     for ext in exts:
 51 |         r.extend( [(im, matchFunc(im)) for im in glob.iglob(path.join(folder, ext)) ] )
 52 |     return r
 53 | 
 54 | 
 55 | # In[4]:
 56 | 
 57 | def openAll(imageList, size=0 ):
 58 |     """
 59 |         Open all images, return a list of PIL images
 60 |     """
 61 |     if size == 0:
 62 |         return [Image.open(im) for im, c in imageList]
 63 |     else:
 64 |         return [Image.open(im).resize(size) for im, c in imageList]
 65 |         
 66 | 
 67 | 
 68 | # In[ ]:
 69 | 
 70 | def openDict(imageList, size=(225,225)):
 71 |     """
 72 |         Open all images, return a dictionnary of (image name : PIL image) and resize as the given size
 73 |     """
 74 |     return {im: Image.open(im).resize(size) for im, c in imageList}
 75 | 
 76 | 
 77 | # In[5]:
 78 | 
 79 | def positiveCouples(dataset):
 80 |     """
 81 |         Create all positive couples in the dataset
 82 |     """
 83 |     return [ (im[0], im2[0], 1) for im in dataset for im2 in dataset if im[1]==im2[1]]
 84 | 
 85 | 
 86 | # In[6]:
 87 | 
 88 | def negativeCouples(dataset):
 89 |     """
 90 |         Create all negative couples in the dataset
 91 |     """
 92 |     return [ (im[0], im2[0], -1) for im in dataset for im2 in dataset if im[1] != im2[1]]
 93 | 
 94 | 
 95 | # In[9]:
 96 | 
 97 | def createCouples(dataset):
 98 |     """
 99 |         Create all couples in the dataset
100 |     """
101 |     return [ (im[0], im2[0], 1) if im[1] == im2[1] else (im[0], im2[0], -1) for im in dataset for im2 in dataset]
102 | 
103 | 
104 | # In[10]:
105 | 
106 | if __name__ == '__main__':
107 |     dataset = readImageswithPattern('/video/CLICIDE', lambda x:x.split('/')[-1].split('-')[0]) #read Clicide dataset
108 |     p = positiveCouples(dataset) #Clicide positives couples
109 |     print(len(p)) #should be 27217
110 |     n = negativeCouples(dataset) #Clicide negatives couples, all of them
111 |     print(len(n)) #should be 10502754 (10M)
112 |     print("Nb of p / nb of n : %.3f %%"  % (len(p)/len(n)*100)) #around 0.2% of positive examples
113 |     a = createCouples(dataset)
114 |     print(len(a))
115 |     
116 | 
117 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maxgreat/Instance-Search/2cea5f64a2d397047072a91788af81c0ea1c6d5e/dataset/__init__.py


--------------------------------------------------------------------------------
/dataset/collection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import tensorflow as tf\n",
 12 |     "import torch.utils.data\n",
 13 |     "import torchvision.transforms as transforms"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "def ComputeMean(imagesList, h=299, w=299):\n",
 25 |     "    \"\"\"\n",
 26 |     "        TODO : make efficient\n",
 27 |     "        Return the mean of the collection for each chanel RGB\n",
 28 |     "    \"\"\"\n",
 29 |     "    r,g,b = 0,0,0\n",
 30 |     "    toT = transforms.ToTensor()\n",
 31 |     "\n",
 32 |     "    #f = FloatProgress(min=0, max=len(imagesList))\n",
 33 |     "    #display(f)\n",
 34 |     "\n",
 35 |     "    for im in imagesList:\n",
 36 |     "        #f.value += 1\n",
 37 |     "        t = toT(im)\n",
 38 |     "        for e in t[0].view(-1):\n",
 39 |     "            r += e\n",
 40 |     "        for e in t[1].view(-1):\n",
 41 |     "            g += e\n",
 42 |     "        for e in t[2].view(-1):\n",
 43 |     "            b += e\n",
 44 |     "    return r/(len(imagesList)*h*w), g/(len(imagesList)*h*w), b/(len(imagesList)*h*w) "
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "def ComputeStdDev(imagesList, mean):\n",
 56 |     "    \"\"\"\n",
 57 |     "        TODO : make efficient\n",
 58 |     "        Return the std deviation for each channel over the collection\n",
 59 |     "    \"\"\"\n",
 60 |     "    toT = transforms.ToTensor()\n",
 61 |     "    r,g,b = 0,0,0\n",
 62 |     "    h = len(toT(imagesList[0])[0])\n",
 63 |     "    w = len(toT(imagesList[0])[0][0])\n",
 64 |     "    for im in imagesList:\n",
 65 |     "        t = toT(im)\n",
 66 |     "        for e in t[0].view(-1):\n",
 67 |     "            r += (e - mean[0])**2\n",
 68 |     "        for e in t[1].view(-1):\n",
 69 |     "            g += (e - mean[1])**2\n",
 70 |     "        for e in t[2].view(-1):\n",
 71 |     "            b += (e - mean[2])**2\n",
 72 |     "    return (r/(len(imagesList)*h*w))**0.5, (g/(len(imagesList)*h*w))**0.5, (b/(len(imagesList)*h*w))**0.5"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 2,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def createConceptDict(imageList):\n",
 84 |     "    \"\"\"\n",
 85 |     "        Create a dictionnary that store for each concept the list of image path corresponding\n",
 86 |     "    \"\"\"\n",
 87 |     "    ConceptDict = {}\n",
 88 |     "    for im in imageList:\n",
 89 |     "        if im[1] in ConceptDict.keys():\n",
 90 |     "            ConceptDict[im[1]].append(im[0])\n",
 91 |     "        else:\n",
 92 |     "            ConceptDict[im[1]] = [im[0]]\n",
 93 |     "    return ConceptDict"
 94 |    ]
 95 |   }
 96 |  ],
 97 |  "metadata": {
 98 |   "kernelspec": {
 99 |    "display_name": "Python 2",
100 |    "language": "python",
101 |    "name": "python2"
102 |   },
103 |   "language_info": {
104 |    "codemirror_mode": {
105 |     "name": "ipython",
106 |     "version": 2
107 |    },
108 |    "file_extension": ".py",
109 |    "mimetype": "text/x-python",
110 |    "name": "python",
111 |    "nbconvert_exporter": "python",
112 |    "pygments_lexer": "ipython2",
113 |    "version": "2.7.9"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 2
118 | }
119 | 


--------------------------------------------------------------------------------
/dataset/collection.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # In[1]:
 5 | 
 6 | import tensorflow as tf
 7 | import torch.utils.data
 8 | import torchvision.transforms as transforms
 9 | 
10 | 
11 | # In[ ]:
12 | 
13 | def ComputeMean(imagesList, h=299, w=299):
14 |     """
15 |         TODO : make efficient
16 |         Return the mean of the collection for each chanel RGB
17 |     """
18 |     r,g,b = 0,0,0
19 |     toT = transforms.ToTensor()
20 | 
21 |     #f = FloatProgress(min=0, max=len(imagesList))
22 |     #display(f)
23 | 
24 |     for im in imagesList:
25 |         #f.value += 1
26 |         t = toT(im)
27 |         for e in t[0].view(-1):
28 |             r += e
29 |         for e in t[1].view(-1):
30 |             g += e
31 |         for e in t[2].view(-1):
32 |             b += e
33 |     return r/(len(imagesList)*h*w), g/(len(imagesList)*h*w), b/(len(imagesList)*h*w) 
34 | 
35 | 
36 | # In[ ]:
37 | 
38 | def ComputeStdDev(imagesList, mean):
39 |     """
40 |         TODO : make efficient
41 |         Return the std deviation for each channel over the collection
42 |     """
43 |     toT = transforms.ToTensor()
44 |     r,g,b = 0,0,0
45 |     h = len(toT(imagesList[0])[0])
46 |     w = len(toT(imagesList[0])[0][0])
47 |     for im in imagesList:
48 |         t = toT(im)
49 |         for e in t[0].view(-1):
50 |             r += (e - mean[0])**2
51 |         for e in t[1].view(-1):
52 |             g += (e - mean[1])**2
53 |         for e in t[2].view(-1):
54 |             b += (e - mean[2])**2
55 |     return (r/(len(imagesList)*h*w))**0.5, (g/(len(imagesList)*h*w))**0.5, (b/(len(imagesList)*h*w))**0.5
56 | 
57 | 
58 | # In[2]:
59 | 
60 | def createConceptDict(imageList):
61 |     """
62 |         Create a dictionnary that store for each concept the list of image path corresponding
63 |     """
64 |     ConceptDict = {}
65 |     for im in imageList:
66 |         if im[1] in ConceptDict.keys():
67 |             ConceptDict[im[1]].append(im[0])
68 |         else:
69 |             ConceptDict[im[1]] = [im[0]]
70 |     return ConceptDict
71 | 
72 | 


--------------------------------------------------------------------------------
/datasetTools.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 8,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "\"\"\"\n",
12 |     "import glob\n",
13 |     "with open('CliListTest.txt', \"w\") as f:\n",
14 |     "    l = glob.glob('/home/data/collection/GUIMUTEIC/CLICIDE/CLICIDEMAX/test/*.JPG')\n",
15 |     "    print(len(l))\n",
16 |     "    for a in l:\n",
17 |     "        if not 'wall' in a:\n",
18 |     "            f.write(a+\"\\n\")\n",
19 |     "\"\"\"\n",
20 |     "with open('FouList.txt', \"r\") as f:\n",
21 |     "    with open('FouConcept.txt' ,\"w\") as fout:\n",
22 |     "        a = set()\n",
23 |     "        for l in f:\n",
24 |     "            floor, nb, _ = l.split('/')[-1].split('_')\n",
25 |     "            a.add(floor+'_'+nb)\n",
26 |     "        for e in a:\n",
27 |     "            fout.write(e+'\\n')\n"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "kernelspec": {
33 |    "display_name": "Python 2",
34 |    "language": "python",
35 |    "name": "python2"
36 |   },
37 |   "language_info": {
38 |    "codemirror_mode": {
39 |     "name": "ipython",
40 |     "version": 2
41 |    },
42 |    "file_extension": ".py",
43 |    "mimetype": "text/x-python",
44 |    "name": "python",
45 |    "nbconvert_exporter": "python",
46 |    "pygments_lexer": "ipython2",
47 |    "version": "2.7.9"
48 |   }
49 |  },
50 |  "nbformat": 4,
51 |  "nbformat_minor": 2
52 | }
53 | 


--------------------------------------------------------------------------------
/mean_std.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [],
 12 |    "source": [
 13 |     "import glob\n",
 14 |     "from os import path\n",
 15 |     "import torch\n",
 16 |     "import torchvision.transforms as transforms\n",
 17 |     "from PIL import Image\n",
 18 |     "from dataset.ReadImages import readImageswithPattern"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": true,
 26 |     "deletable": true,
 27 |     "editable": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "dataset_path = '/home/mrim/kohlm/nnForRetrieval/data/pre_proc/fourviere_227sq'\n",
 32 |     "dataset_test = '/home/mrim/kohlm/nnForRetrieval/data/pre_proc/fourviere_227sq/test'"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {
 39 |     "collapsed": false,
 40 |     "deletable": true,
 41 |     "editable": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "def match(x):\n",
 46 |     "    return x.split('/')[-1].split('-')[0]\n",
 47 |     "\n",
 48 |     "trainSetFull = readImageswithPattern(dataset_path, match)\n",
 49 |     "testSetFull = readImageswithPattern(dataset_test, match)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {
 56 |     "collapsed": false,
 57 |     "deletable": true,
 58 |     "editable": true
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "labels = list(set([t[1] for t in trainSetFull]))\n",
 63 |     "size = sum(1 for _, lab in trainSetFull if lab in labels)\n",
 64 |     "T = torch.Tensor(size, 3, 227, 227)\n",
 65 |     "i = 0\n",
 66 |     "for img, lab in trainSetFull:\n",
 67 |     "    if lab in labels:\n",
 68 |     "        im = Image.open(img)\n",
 69 |     "        T[i] = transforms.ToTensor()(im)\n",
 70 |     "        i += 1"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 5,
 76 |    "metadata": {
 77 |     "collapsed": false,
 78 |     "deletable": true,
 79 |     "editable": true
 80 |    },
 81 |    "outputs": [
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "[0.3643357148734363, 0.304306334270731, 0.2774018310814609]\n",
 87 |       "[0.21223013632973034, 0.2003156783406293, 0.19758758196073448]\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "mean = [0, 0, 0]\n",
 93 |     "std = [0, 0, 0]\n",
 94 |     "for i in range(3):\n",
 95 |     "    mean[i] = T[:, i, :, :].mean()\n",
 96 |     "    std[i] = T[:, i, :, :].std()\n",
 97 |     "print(mean)\n",
 98 |     "print(std)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {
105 |     "collapsed": true,
106 |     "deletable": true,
107 |     "editable": true
108 |    },
109 |    "outputs": [],
110 |    "source": []
111 |   }
112 |  ],
113 |  "metadata": {
114 |   "kernelspec": {
115 |    "display_name": "Python 2",
116 |    "language": "python",
117 |    "name": "python2"
118 |   },
119 |   "language_info": {
120 |    "codemirror_mode": {
121 |     "name": "ipython",
122 |     "version": 2
123 |    },
124 |    "file_extension": ".py",
125 |    "mimetype": "text/x-python",
126 |    "name": "python",
127 |    "nbconvert_exporter": "python",
128 |    "pygments_lexer": "ipython2",
129 |    "version": "2.7.9"
130 |   }
131 |  },
132 |  "nbformat": 4,
133 |  "nbformat_minor": 2
134 | }
135 | 


--------------------------------------------------------------------------------
/model/ModelDefinition.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.parallel"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "class maxnet(nn.Module):\n",
 25 |     "    def __init__(self, nbClass=464):\n",
 26 |     "        super(maxnet, self).__init__()\n",
 27 |     "        self.features = nn.Sequential(\n",
 28 |     "                nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),\n",
 29 |     "                nn.ReLU(True),\n",
 30 |     "                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),\n",
 31 |     "                nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),\n",
 32 |     "                nn.ReLU(True),\n",
 33 |     "                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),\n",
 34 |     "                nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),\n",
 35 |     "                nn.ReLU(True),\n",
 36 |     "                nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),\n",
 37 |     "                nn.ReLU(True),\n",
 38 |     "                nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),\n",
 39 |     "                nn.ReLU(True),\n",
 40 |     "                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1))\n",
 41 |     "        )\n",
 42 |     "        self.classifier = nn.Sequential(\n",
 43 |     "            nn.Dropout(),\n",
 44 |     "            nn.Linear(256 * 6 * 6, 4096),\n",
 45 |     "            nn.ReLU(inplace=True),\n",
 46 |     "            nn.Dropout(),\n",
 47 |     "            nn.Linear(4096, 4096),\n",
 48 |     "            nn.ReLU(inplace=True),\n",
 49 |     "            nn.Linear(4096, nbClass),\n",
 50 |     "        )\n",
 51 |     "\n",
 52 |     "    def forward(self, x):\n",
 53 |     "        x = self.features(x)\n",
 54 |     "        x = x.view(x.size(0), -1)\n",
 55 |     "        x = self.classifier(x)\n",
 56 |     "        return x"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def Maxnet(nbClass=464):\n",
 68 |     "    return maxnet(nbClass)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "collapsed": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def copyParameters(net, modelBase):\n",
 80 |     "    \"\"\"\n",
 81 |     "        Copy parameters from a model to another\n",
 82 |     "    \"\"\"\n",
 83 |     "    #for each feature\n",
 84 |     "    for i, f in enumerate(net.features):\n",
 85 |     "        if type(f) is torch.nn.modules.conv.Conv2d:\n",
 86 |     "            #we copy convolution parameters\n",
 87 |     "            f.weight.data = modelBase.features[i].weight.data\n",
 88 |     "            f.bias.data = modelBase.features[i].bias.data\n",
 89 |     "\n",
 90 |     "    #for each classifier element\n",
 91 |     "    for i, f in enumerate(net.classifier):\n",
 92 |     "        if type(f) is torch.nn.modules.linear.Linear:\n",
 93 |     "            #we copy fully connected parameters\n",
 94 |     "            if f.weight.size() == modelBase.classifier[i].weight.size():\n",
 95 |     "                f.weight.data = modelBase.classifier[i].weight.data\n",
 96 |     "                f.bias.data = modelBase.classifier[i].bias.data"
 97 |    ]
 98 |   }
 99 |  ],
100 |  "metadata": {
101 |   "kernelspec": {
102 |    "display_name": "Python 2",
103 |    "language": "python",
104 |    "name": "python2"
105 |   },
106 |   "language_info": {
107 |    "codemirror_mode": {
108 |     "name": "ipython",
109 |     "version": 2
110 |    },
111 |    "file_extension": ".py",
112 |    "mimetype": "text/x-python",
113 |    "name": "python",
114 |    "nbconvert_exporter": "python",
115 |    "pygments_lexer": "ipython2",
116 |    "version": "2.7.9"
117 |   }
118 |  },
119 |  "nbformat": 4,
120 |  "nbformat_minor": 2
121 | }
122 | 


--------------------------------------------------------------------------------
/model/ModelDefinition.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # In[1]:
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.parallel
 9 | 
10 | 
11 | # In[2]:
12 | 
13 | class maxnet(nn.Module):
14 |     def __init__(self, nbClass=464):
15 |         super(maxnet, self).__init__()
16 |         self.features = nn.Sequential(
17 |                 nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
18 |                 nn.ReLU(True),
19 |                 nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),
20 |                 nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
21 |                 nn.ReLU(True),
22 |                 nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),
23 |                 nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
24 |                 nn.ReLU(True),
25 |                 nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
26 |                 nn.ReLU(True),
27 |                 nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
28 |                 nn.ReLU(True),
29 |                 nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1))
30 |         )
31 |         self.classifier = nn.Sequential(
32 |             nn.Dropout(),
33 |             nn.Linear(256 * 6 * 6, 4096),
34 |             nn.ReLU(inplace=True),
35 |             nn.Dropout(),
36 |             nn.Linear(4096, 4096),
37 |             nn.ReLU(inplace=True),
38 |             nn.Linear(4096, nbClass),
39 |         )
40 | 
41 |     def forward(self, x):
42 |         x = self.features(x)
43 |         x = x.view(x.size(0), -1)
44 |         x = self.classifier(x)
45 |         return x
46 | 
47 | 
48 | # In[ ]:
49 | 
50 | def Maxnet(nbClass=464):
51 |     return maxnet(nbClass)
52 | 
53 | 
54 | # In[ ]:
55 | 
56 | def copyParameters(net, modelBase):
57 |     """
58 |         Copy parameters from a model to another
59 |     """
60 |     #for each feature
61 |     for i, f in enumerate(net.features):
62 |         if type(f) is torch.nn.modules.conv.Conv2d:
63 |             #we copy convolution parameters
64 |             f.weight.data = modelBase.features[i].weight.data
65 |             f.bias.data = modelBase.features[i].bias.data
66 | 
67 |     #for each classifier element
68 |     for i, f in enumerate(net.classifier):
69 |         if type(f) is torch.nn.modules.linear.Linear:
70 |             #we copy fully connected parameters
71 |             if f.weight.size() == modelBase.classifier[i].weight.size():
72 |                 f.weight.data = modelBase.classifier[i].weight.data
73 |                 f.bias.data = modelBase.classifier[i].bias.data
74 | 
75 | 


--------------------------------------------------------------------------------
/model/RNN.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding: utf-8
 3 | 
 4 | # In[2]:
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torchvision.models as models
 9 | from torch.autograd import Variable
10 | 
11 | 
12 | # In[ ]:
13 | 
14 | class RNN(nn.Module):
15 |     """
16 |         Define a RNN network
17 |     """
18 |     def __init__(self, net, hidden_size):
19 |         super(siamese, self).__init__()
20 |         self.features = net
21 |         self.rnn = nn.LSTMCell(input_size=net.classifier[len(net.classifier._modules)-1], hidden_size=hidden_size)
22 |     
23 |     def forward(self, x, hx, cx):
24 |         x = self.features(x)
25 |         x = self.rnn(x, hx, cx)
26 |         return x, hx
27 | 
28 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maxgreat/Instance-Search/2cea5f64a2d397047072a91788af81c0ea1c6d5e/model/__init__.py


--------------------------------------------------------------------------------
/model/cours2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 71,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torchvision.models as models\n",
 14 |     "import torchvision.transforms as transforms\n",
 15 |     "import random"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 5,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "class net1(nn.Module):\n",
 27 |     "    def __init__(self):\n",
 28 |     "        super(net1, self).__init__()\n",
 29 |     "        self.layer1 = nn.Linear(225*225*3, 1000)\n",
 30 |     "        self.relu   = nn.ReLU(inplace=True) \n",
 31 |     "    \n",
 32 |     "    def forward(self, x):\n",
 33 |     "        \"\"\"\n",
 34 |     "            x est le vecteur d'entrée\n",
 35 |     "        \"\"\"\n",
 36 |     "        y = self.layer1(x)\n",
 37 |     "        y = self.relu(y)\n",
 38 |     "        return y"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 28,
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "alex = models.alexnet(pretrained=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 31,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "AlexNet (\n",
 64 |       "  (features): Sequential (\n",
 65 |       "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
 66 |       "    (1): ReLU (inplace)\n",
 67 |       "    (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
 68 |       "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
 69 |       "    (4): ReLU (inplace)\n",
 70 |       "    (5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
 71 |       "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 72 |       "    (7): ReLU (inplace)\n",
 73 |       "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 74 |       "    (9): ReLU (inplace)\n",
 75 |       "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 76 |       "    (11): ReLU (inplace)\n",
 77 |       "    (12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
 78 |       "  )\n",
 79 |       "  (classifier): Sequential (\n",
 80 |       "    (0): Dropout (p = 0.5)\n",
 81 |       "    (1): Linear (9216 -> 4096)\n",
 82 |       "    (2): ReLU (inplace)\n",
 83 |       "    (3): Dropout (p = 0.5)\n",
 84 |       "    (4): Linear (4096 -> 4096)\n",
 85 |       "    (5): ReLU (inplace)\n",
 86 |       "    (6): Linear (4096 -> 1000)\n",
 87 |       "  )\n",
 88 |       ")\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "print(alex)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 23,
 99 |    "metadata": {
100 |     "collapsed": true
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "import PIL.Image as Image\n",
105 |     "im = Image.open(\"/video/CLICIDE/10A-0.JPG\")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 27,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "t = torch.Tensor(1, 3, 225, 225)\n",
117 |     "trans = transforms.ToTensor()\n",
118 |     "t[0] = trans(im.resize( (225, 225) ))"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 29,
124 |    "metadata": {
125 |     "collapsed": true
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "output = alex(Variable(t))"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 32,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "(Variable containing:\n",
143 |        "  9.5034\n",
144 |        " [torch.FloatTensor of size 1x1], Variable containing:\n",
145 |        "  669\n",
146 |        " [torch.LongTensor of size 1x1])"
147 |       ]
148 |      },
149 |      "execution_count": 32,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": [
155 |     "output.max(1)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 33,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "class AlexNet(nn.Module):\n",
167 |     "    def __init__(self, num_classes=1000):\n",
168 |     "        super(AlexNet, self).__init__()\n",
169 |     "        self.features = nn.Sequential(\n",
170 |     "            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),\n",
171 |     "            nn.ReLU(inplace=True),\n",
172 |     "            nn.MaxPool2d(kernel_size=3, stride=2),\n",
173 |     "            nn.Conv2d(64, 192, kernel_size=5, padding=2),\n",
174 |     "            nn.ReLU(inplace=True),\n",
175 |     "            nn.MaxPool2d(kernel_size=3, stride=2),\n",
176 |     "            nn.Conv2d(192, 384, kernel_size=3, padding=1),\n",
177 |     "            nn.ReLU(inplace=True),\n",
178 |     "            nn.Conv2d(384, 256, kernel_size=3, padding=1),\n",
179 |     "            nn.ReLU(inplace=True),\n",
180 |     "            nn.Conv2d(256, 256, kernel_size=3, padding=1),\n",
181 |     "            nn.ReLU(inplace=True),\n",
182 |     "            nn.MaxPool2d(kernel_size=3, stride=2),\n",
183 |     "        )\n",
184 |     "        self.classifier = nn.Sequential(\n",
185 |     "            nn.Dropout(),\n",
186 |     "            nn.Linear(256 * 6 * 6, 4096),\n",
187 |     "            nn.ReLU(inplace=True),\n",
188 |     "            nn.Dropout(),\n",
189 |     "            nn.Linear(4096, 4096),\n",
190 |     "            nn.ReLU(inplace=True),\n",
191 |     "            nn.Linear(4096, num_classes),\n",
192 |     "        )\n",
193 |     "\n",
194 |     "    def forward(self, x):\n",
195 |     "        x = self.features(x)\n",
196 |     "        x = x.view(x.size(0), 256 * 6 * 6)\n",
197 |     "        x = self.classifier(x)\n",
198 |     "        return x\n",
199 |     "\n"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 47,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "AlexNet (\n",
213 |        "  (features): Sequential (\n",
214 |        "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
215 |        "    (1): ReLU (inplace)\n",
216 |        "    (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
217 |        "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
218 |        "    (4): ReLU (inplace)\n",
219 |        "    (5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
220 |        "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
221 |        "    (7): ReLU (inplace)\n",
222 |        "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
223 |        "    (9): ReLU (inplace)\n",
224 |        "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
225 |        "    (11): ReLU (inplace)\n",
226 |        "    (12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))\n",
227 |        "  )\n",
228 |        "  (classifier): Sequential (\n",
229 |        "    (0): Dropout (p = 0.5)\n",
230 |        "    (1): Linear (9216 -> 4096)\n",
231 |        "    (2): ReLU (inplace)\n",
232 |        "    (3): Dropout (p = 0.5)\n",
233 |        "    (4): Linear (4096 -> 4096)\n",
234 |        "    (5): ReLU (inplace)\n",
235 |        "    (6): Linear (4096 -> 464)\n",
236 |        "  )\n",
237 |        ")"
238 |       ]
239 |      },
240 |      "execution_count": 47,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "alex464 = models.AlexNet(464)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 54,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [
256 |     {
257 |      "name": "stdout",
258 |      "output_type": "stream",
259 |      "text": [
260 |       "<class 'torch.nn.modules.dropout.Dropout'>\n",
261 |       "<class 'torch.nn.modules.linear.Linear'>\n",
262 |       "<class 'torch.nn.modules.activation.ReLU'>\n",
263 |       "<class 'torch.nn.modules.dropout.Dropout'>\n",
264 |       "<class 'torch.nn.modules.linear.Linear'>\n",
265 |       "<class 'torch.nn.modules.activation.ReLU'>\n",
266 |       "<class 'torch.nn.modules.linear.Linear'>\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "for c in alex.classifier:\n",
272 |     "    print(type(c))"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 60,
278 |    "metadata": {
279 |     "collapsed": true
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "def copyParameters(net, netBase):\n",
284 |     "    for i, f in enumerate(net.features):\n",
285 |     "        if type(f) is torch.nn.modules.conv.Conv2d:\n",
286 |     "            f.weight.data = netBase.features[i].weight.data\n",
287 |     "            f.bias.data = netBase.features[i].bias.data\n",
288 |     "    for i, c in enumerate(net.classifier):\n",
289 |     "        if type(c) is torch.nn.modules.linear.Linear:\n",
290 |     "            if c.weight.size() == netBase.classifier[i].weight.size():\n",
291 |     "                c.weight.data = netBase.classifier[i].weight.data\n",
292 |     "                c.bias.data = netBase.classifier[i].bias.data"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 56,
298 |    "metadata": {
299 |     "collapsed": false
300 |    },
301 |    "outputs": [],
302 |    "source": [
303 |     "copyParameters(alex464, models.alexnet(pretrained=True))"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 63,
309 |    "metadata": {
310 |     "collapsed": false
311 |    },
312 |    "outputs": [],
313 |    "source": [
314 |     "import torch.optim as optim\n",
315 |     "criterion = nn.loss.CrossEntropyLoss()\n",
316 |     "optimizer = optim.SGD(alex464.parameters(), lr=0.01, momentum=0.9)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 116,
322 |    "metadata": {
323 |     "collapsed": true
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "import os.path as path\n",
328 |     "import glob\n",
329 |     "def readTrainingSet(folder=\".\"):\n",
330 |     "    \"\"\"\n",
331 |     "        Lit un dossier contenant des images\n",
332 |     "        Retourne la liste d'image avec leur classe\n",
333 |     "    \"\"\"\n",
334 |     "    matchFunc = lambda x: x.split('/')[-1].split('-')[0]\n",
335 |     "    \n",
336 |     "    exts = ('*.jpg', '*.JPG', '*.JPEG', \"*.png\")\n",
337 |     "    r = []\n",
338 |     "    for ext in exts:\n",
339 |     "        r.extend( [(im, matchFunc(im)) for im in glob.iglob(path.join(folder, ext)) if not 'wall' in im] )\n",
340 |     "    return r"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": 117,
346 |    "metadata": {
347 |     "collapsed": false
348 |    },
349 |    "outputs": [],
350 |    "source": [
351 |     "trainset = readTrainingSet(\"/video/CLICIDE/\")"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": 118,
357 |    "metadata": {
358 |     "collapsed": false
359 |    },
360 |    "outputs": [],
361 |    "source": [
362 |     "listLabel = [t[1] for t in trainset if not 'wall' in t[1]]"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 119,
368 |    "metadata": {
369 |     "collapsed": false
370 |    },
371 |    "outputs": [
372 |     {
373 |      "name": "stdout",
374 |      "output_type": "stream",
375 |      "text": [
376 |       "464\n"
377 |      ]
378 |     }
379 |    ],
380 |    "source": [
381 |     "s = set(listLabel)\n",
382 |     "s = list(s)\n",
383 |     "print(len(s))"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {
390 |     "collapsed": false,
391 |     "scrolled": true
392 |    },
393 |    "outputs": [
394 |     {
395 |      "name": "stdout",
396 |      "output_type": "stream",
397 |      "text": [
398 |       "[1,    10] loss: 6.143\n",
399 |       "[1,    20] loss: 6.084\n",
400 |       "[1,    30] loss: 6.041\n",
401 |       "[1,    40] loss: 5.915\n",
402 |       "[1,    50] loss: 5.707\n",
403 |       "[1,    60] loss: 5.503\n",
404 |       "[1,    70] loss: 5.481\n",
405 |       "[1,    80] loss: 5.295\n",
406 |       "[1,    90] loss: 5.075\n",
407 |       "[1,   100] loss: 4.737\n",
408 |       "[2,    10] loss: 3.990\n",
409 |       "[2,    20] loss: 4.320\n",
410 |       "[2,    30] loss: 4.300\n",
411 |       "[2,    40] loss: 3.964\n",
412 |       "[2,    50] loss: 4.021\n",
413 |       "[2,    60] loss: 3.972\n",
414 |       "[2,    70] loss: 4.121\n",
415 |       "[2,    80] loss: 4.176\n",
416 |       "[2,    90] loss: 4.125\n",
417 |       "[2,   100] loss: 3.864\n",
418 |       "[3,    10] loss: 2.850\n",
419 |       "[3,    20] loss: 3.330\n",
420 |       "[3,    30] loss: 3.021\n",
421 |       "[3,    40] loss: 3.085\n",
422 |       "[3,    50] loss: 3.290\n",
423 |       "[3,    60] loss: 3.062\n",
424 |       "[3,    70] loss: 2.940\n",
425 |       "[3,    80] loss: 2.637\n",
426 |       "[3,    90] loss: 2.795\n",
427 |       "[3,   100] loss: 2.678\n",
428 |       "[4,    10] loss: 1.977\n",
429 |       "[4,    70] loss: 1.932\n",
430 |       "[4,    80] loss: 2.296\n",
431 |       "[4,    90] loss: 2.013\n",
432 |       "[4,   100] loss: 2.111\n",
433 |       "[5,    10] loss: 1.562\n",
434 |       "[5,    20] loss: 1.551\n",
435 |       "[5,    30] loss: 1.635\n",
436 |       "[5,    40] loss: 1.625\n",
437 |       "[5,    50] loss: 1.590\n",
438 |       "[5,    60] loss: 1.645\n",
439 |       "[5,    70] loss: 1.597\n",
440 |       "[5,    80] loss: 1.866\n",
441 |       "[5,    90] loss: 2.272\n",
442 |       "[5,   100] loss: 2.033\n",
443 |       "[6,    10] loss: 1.091\n",
444 |       "[6,    20] loss: 1.133\n",
445 |       "[6,    30] loss: 1.268\n",
446 |       "[6,    40] loss: 0.939\n",
447 |       "[6,    50] loss: 0.975\n",
448 |       "[6,    60] loss: 1.105\n",
449 |       "[6,    70] loss: 1.212\n",
450 |       "[6,    80] loss: 1.435\n",
451 |       "[6,    90] loss: 1.345\n",
452 |       "[6,   100] loss: 1.096\n",
453 |       "[7,    10] loss: 0.758\n",
454 |       "[7,    20] loss: 0.764\n",
455 |       "[7,    30] loss: 0.777\n",
456 |       "[7,    40] loss: 0.684\n",
457 |       "[7,    50] loss: 0.759\n",
458 |       "[7,    60] loss: 0.750\n",
459 |       "[7,    70] loss: 0.798\n",
460 |       "[7,    80] loss: 0.874\n",
461 |       "[7,    90] loss: 0.891\n",
462 |       "[7,   100] loss: 0.773\n",
463 |       "[8,    10] loss: 0.472\n",
464 |       "[8,    20] loss: 0.424\n",
465 |       "[8,    30] loss: 0.456\n",
466 |       "[8,    40] loss: 0.737\n",
467 |       "[8,    50] loss: 0.534\n",
468 |       "[8,    60] loss: 0.558\n",
469 |       "[8,    70] loss: 0.593\n",
470 |       "[8,    80] loss: 0.607\n",
471 |       "[8,    90] loss: 0.521\n",
472 |       "[8,   100] loss: 0.552\n",
473 |       "[9,    10] loss: 0.429\n",
474 |       "[9,    20] loss: 0.602\n",
475 |       "[9,    30] loss: 0.457\n",
476 |       "[9,    40] loss: 0.694\n",
477 |       "[9,    50] loss: 0.546\n",
478 |       "[9,    60] loss: 0.453\n",
479 |       "[9,    70] loss: 0.541\n",
480 |       "[9,    80] loss: 0.514\n",
481 |       "[9,    90] loss: 0.590\n",
482 |       "[9,   100] loss: 0.704\n",
483 |       "[10,    10] loss: 0.502\n",
484 |       "[10,    20] loss: 0.533\n"
485 |      ]
486 |     }
487 |    ],
488 |    "source": [
489 |     "batchSize = 32\n",
490 |     "alex464\n",
491 |     "trans = transforms.ToTensor()\n",
492 |     "for epoch in range(10):\n",
493 |     "    \"\"\"\n",
494 |     "        On parcourt l'ensemble du training set\n",
495 |     "    \"\"\"\n",
496 |     "    alex464.train()\n",
497 |     "    running_loss = 0.0\n",
498 |     "    random.shuffle(trainset)    \n",
499 |     "    for i in range(len(trainset)/batchSize):\n",
500 |     "        \"\"\"\n",
501 |     "         1. Charge batchSize images\n",
502 |     "         2. Backprop\n",
503 |     "        \"\"\"\n",
504 |     "        inputs = torch.Tensor(batchSize, 3, 225, 225)\n",
505 |     "        for j in range(batchSize):\n",
506 |     "            inputs[j] = trans(Image.open(trainset[i*batchSize+j][0]).resize( (225, 225) ))\n",
507 |     "        inputs = Variable(inputs)\n",
508 |     "        \n",
509 |     "        lab = Variable(torch.LongTensor([s.index(trainset[i*batchSize+j][1]) for j in range(batchSize)]))\n",
510 |     "        optimizer.zero_grad()\n",
511 |     "        \n",
512 |     "        outputs = alex464(inputs)\n",
513 |     "        loss = criterion(outputs, lab)\n",
514 |     "        loss.backward()\n",
515 |     "        optimizer.step()\n",
516 |     "        \n",
517 |     "        running_loss += loss.data[0]\n",
518 |     "        if i % 10 == 9: # print every 10 mini-batches\n",
519 |     "            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 10))\n",
520 |     "            running_loss = 0.0\n",
521 |     "        \n",
522 |     "        \n",
523 |     "        \n",
524 |     "    \"\"\"\n",
525 |     "        Eval\n",
526 |     "    \"\"\""
527 |    ]
528 |   }
529 |  ],
530 |  "metadata": {
531 |   "kernelspec": {
532 |    "display_name": "Python 2",
533 |    "language": "python",
534 |    "name": "python2"
535 |   },
536 |   "language_info": {
537 |    "codemirror_mode": {
538 |     "name": "ipython",
539 |     "version": 2
540 |    },
541 |    "file_extension": ".py",
542 |    "mimetype": "text/x-python",
543 |    "name": "python",
544 |    "nbconvert_exporter": "python",
545 |    "pygments_lexer": "ipython2",
546 |    "version": "2.7.9"
547 |   }
548 |  },
549 |  "nbformat": 4,
550 |  "nbformat_minor": 2
551 | }
552 | 


--------------------------------------------------------------------------------
/model/custom_modules.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Function
  6 | from torch.nn.parameter import Parameter
  7 | import numpy as np
  8 | 
  9 | 
 10 | # function to shift an input with a trainable parameter
 11 | class ShiftFun(Function):
 12 | 
 13 |     def __init__(self):
 14 |         super(ShiftFun, self).__init__()
 15 | 
 16 |     def forward(self, input, param):
 17 |         self.save_for_backward(input, param)
 18 |         return input + param.view(1, -1).expand_as(input)
 19 | 
 20 |     def backward(self, grad_output):
 21 |         input, param = self.saved_tensors
 22 |         grad_input = grad_output.clone()
 23 |         buf = param.clone().resize_(input.size(0)).fill_(1)
 24 |         grad_param = torch.mv(grad_output.t(), buf)
 25 |         return grad_input, grad_param
 26 | 
 27 | 
 28 | class Shift(nn.Module):
 29 | 
 30 |     def __init__(self, n_features):
 31 |         super(Shift, self).__init__()
 32 |         self.param = Parameter(torch.Tensor(n_features))
 33 |         self.reset_parameters()
 34 | 
 35 |     def reset_parameters(self):
 36 |         self.param.data.fill_(0)
 37 | 
 38 |     def forward(self, input):
 39 |         return ShiftFun()(input, self.param)
 40 | 
 41 | 
 42 | # autograd function to normalize an input over the rows
 43 | # (each vector of a batch is normalized)
 44 | # the backward step follows the implementation of
 45 | # torch.legacy.nn.Normalize closely
 46 | class NormalizeL2Fun(Function):
 47 | 
 48 |     def __init__(self, eps=1e-10):
 49 |         super(NormalizeL2Fun, self).__init__()
 50 |         self.eps = eps
 51 | 
 52 |     def forward(self, input):
 53 |         self.save_for_backward(input)
 54 |         self.norm2 = input.pow(2).sum(1).add_(self.eps)
 55 |         self.norm = self.norm2.pow(0.5)
 56 |         output = input / self.norm.expand_as(input)
 57 |         return output
 58 | 
 59 |     def backward(self, grad_output):
 60 |         input = self.saved_tensors[0]
 61 |         grad_input = self.norm2.expand_as(input) * grad_output
 62 |         cross = (input * grad_output).sum(1)
 63 |         buf = input * cross.expand_as(input)
 64 |         grad_input.add_(-1, buf)
 65 |         cross = self.norm2 * self.norm
 66 |         grad_input.div_(cross.expand_as(grad_input))
 67 |         return grad_input
 68 | 
 69 | 
 70 | class NormalizeL2(nn.Module):
 71 | 
 72 |     def __init__(self):
 73 |         super(NormalizeL2, self).__init__()
 74 | 
 75 |     def forward(self, input):
 76 |         return NormalizeL2Fun()(input)
 77 | 
 78 | 
 79 | # metric loss according to Chopra et al "Learning a Similarity Metric Discriminatively, with Application to Face Verification"
 80 | # since we assume normalized vectors, we use Q=2
 81 | class MetricLossFun(Function):
 82 | 
 83 |     def __init__(self, size_average=True):
 84 |         super(MetricLossFun, self).__init__()
 85 |         self.size_average = size_average
 86 | 
 87 |     # TODO: more things could be done inplace
 88 |     # this is difficult and probs unnecessary though
 89 |     def terms(self, input1, input2, y):
 90 |         diff = input1 - input2
 91 |         energy = diff.norm(1, 1)
 92 |         e = (energy * 0).add_(np.e)  # fill with e, same shape as energy
 93 |         exp_term = e.pow_((-2.77 * energy).div_(2))
 94 |         return diff, energy, exp_term
 95 | 
 96 |     # target takes values in 1 (good), -1 (bad) so (1-target)/2 is 0 for good pairs and 1 for bad ones, (1+target) / 2 inverse
 97 |     def forward(self, input1, input2, y):
 98 |         self.save_for_backward(input1, input2, y)
 99 |         _, energy, exp_term = self.terms(input1, input2, y)
100 |         loss = energy.mul_(energy).mul_(1 + y).div_(2)
101 |         loss.add_(exp_term.mul_(1 - y).mul_(2))
102 |         loss = loss.sum(0).view(1)
103 |         if self.size_average:
104 |             loss.div_(y.size(0))
105 |         return loss
106 | 
107 |     def backward(self, grad_output):
108 |         input1, input2, y = self.saved_tensors
109 |         diff, energy, exp_term = self.terms(input1, input2, y)
110 |         diff[diff.lt(0)] = -1
111 |         diff[diff.ge(0)] = 1
112 |         energy = energy.expand_as(input1)
113 |         exp_term = exp_term.expand_as(input1)
114 |         y_g = (1 + y).view(-1, 1).expand_as(input1)
115 |         y_i = (1 - y).view(-1, 1).expand_as(input1)
116 |         y_g = y_g.mul(diff).mul_(energy)
117 |         y_i = y_i.mul(2.77).mul_(diff).mul_(exp_term)
118 |         grad1 = y_g.add_(-1, y_i)
119 |         grad2 = -grad1
120 |         if self.size_average:
121 |             grad1.div_(y.size(0))
122 |             grad2.div_(y.size(0))
123 |         g = grad_output[0]
124 |         if g != 1:
125 |             grad1.mul_(g)
126 |             grad2.mul_(g)
127 |         return grad1, grad2, None
128 | 
129 | 
130 | class MetricLoss(nn.Module):
131 | 
132 |     def __init__(self, size_average=True):
133 |         super(MetricLoss, self).__init__()
134 |         self.size_average = size_average
135 | 
136 |     def forward(self, input1, input2, target):
137 |         return MetricLossFun(self.size_average)(input1, input2, target)
138 | 
139 | 
140 | class TripletLossFun(Function):
141 | 
142 |     def __init__(self, margin, size_average=True, normalized=True):
143 |         super(TripletLossFun, self).__init__()
144 |         self.size_average = size_average
145 |         self.margin = margin
146 |         self.normalized = normalized
147 | 
148 |     # calculate for each sample i:
149 |     # 1/2 (||anchor_i - pos_i||^2 - ||anchor_i - neg_i||^2 + 2margin)
150 |     # then clamp to positive values and sum over all samples
151 |     # when normalized, ||x1-x2||^2 = 2 - 2x1.x2
152 |     # so the loss for i becomes: anchor_i . neg_i - anchor_i . pos_i + margin
153 |     def forward(self, anchor, pos, neg):
154 |         self.save_for_backward(anchor, pos, neg)
155 |         if self.normalized:
156 |             loss = (anchor * neg).sum(1)
157 |             loss.add_(-1, (anchor * pos).sum(1))
158 |             loss.add_(self.margin)
159 |         else:
160 |             sqdiff_pos = (anchor - pos).pow_(2)
161 |             sqdiff_neg = (anchor - neg).pow_(2)
162 |             loss = sqdiff_pos.sum(1)
163 |             loss.add_(-1, sqdiff_neg.sum(1))
164 |             loss.add_(self.margin * 2)
165 |             loss.div_(2)
166 |         self.clamp = torch.le(loss, 0)
167 |         loss[self.clamp] = 0
168 |         loss = loss.sum(0).view(1)
169 |         if self.size_average:
170 |             loss.div_(anchor.size(0))
171 |         return loss
172 | 
173 |     def backward(self, grad_output):
174 |         # grad_pos = -(anchor_i - pos_i) for sample i
175 |         # grad_neg = (anchor_i - neg_i)
176 |         # grad_anchor = (anchor_i - pos_i) - (anchor_i - neg_i)
177 |         # = (neg_i - pos_i)
178 |         # if normalized: grad_pos = -anchor_i, grad_neg = anchor_i
179 |         # grad_anchor = neg_i - pos_i
180 |         anchor, pos, neg = self.saved_tensors
181 |         if self.normalized:
182 |             grad_anchor = neg - pos
183 |             grad_pos = -anchor
184 |             grad_neg = -grad_pos
185 |         else:
186 |             grad_anchor = neg - pos
187 |             grad_pos = pos - anchor
188 |             grad_neg = anchor - neg
189 |         c = self.clamp.expand_as(anchor)
190 |         grad_anchor[c] = 0
191 |         grad_pos[c] = 0
192 |         grad_neg[c] = 0
193 | 
194 |         if self.size_average:
195 |             grad_anchor.div_(anchor.size(0))
196 |             grad_pos.div_(anchor.size(0))
197 |             grad_neg.div_(anchor.size(0))
198 |         g = grad_output[0]
199 |         if g != 1:
200 |             grad_anchor = grad_anchor.mul_(g)
201 |             grad_pos = grad_pos.mul_(g)
202 |             grad_neg = grad_neg.mul_(g)
203 |         return grad_anchor, grad_pos, grad_neg
204 | 
205 | 
206 | class TripletLoss(nn.Module):
207 | 
208 |     def __init__(self, margin, size_average=True, normalized=True):
209 |         super(TripletLoss, self).__init__()
210 |         self.size_average = size_average
211 |         self.margin = margin
212 |         self.normalized = normalized
213 | 
214 |     def forward(self, anchor, pos, neg):
215 |         return TripletLossFun(self.margin, self.size_average, self.normalized)(anchor, pos, neg)
216 | 


--------------------------------------------------------------------------------
/model/nn_utils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torchvision.models as models
  3 | 
  4 | 
  5 | # n < 0 sets all modules/blocks to be untrained
  6 | def set_untrained_blocks(containers, n):
  7 |     # first make sure everything is trainable (not trainable if n<0)
  8 |     for container in containers:
  9 |         for m in container:
 10 |             for p in m.parameters():
 11 |                 p.requires_grad = n >= 0
 12 | 
 13 |     count = 0
 14 |     for seq in containers:
 15 |         for m in seq:
 16 |             if count >= n:
 17 |                 break
 18 |             if sum(1 for _ in m.parameters()) <= 0:
 19 |                 # exclude modules without params from count
 20 |                 continue
 21 |             for p in m.parameters():
 22 |                 p.requires_grad = False
 23 |             count += 1
 24 | 
 25 | 
 26 | def convolutionalize(fc, in_size2d):
 27 |     # Turn an FC layer into Conv2D layer by copying weights the right way
 28 |     out_size = fc.out_features
 29 |     in_size_total = fc.in_features
 30 |     if in_size_total % (in_size2d[0] * in_size2d[1]) != 0:
 31 |         raise ValueError('FC in_feature size {0} is not divisible by in_size2d {1}'.format(in_size_total, in_size2d))
 32 |     in_channels = in_size_total // (in_size2d[0] * in_size2d[1])
 33 |     has_bias = fc.bias is not None
 34 |     conv = nn.Conv2d(in_channels, out_size, in_size2d, bias=has_bias)
 35 |     if has_bias:
 36 |         conv.bias.data = fc.bias.data.clone()
 37 |     for i in range(out_size):
 38 |         conv.weight.data[i] = fc.weight.data[i].view(in_channels, *in_size2d).clone()
 39 |     return conv
 40 | 
 41 | 
 42 | def get_feature_size(seq, factor=1, default=-1):
 43 |     feature_size = default
 44 |     for module in seq:
 45 |         if isinstance(module, models.resnet.Bottleneck):
 46 |             feature_size = module.conv3.out_channels * factor
 47 |         if isinstance(module, models.resnet.BasicBlock):
 48 |             feature_size = module.conv2.out_channels * factor
 49 |         if isinstance(module, nn.modules.Conv2d):
 50 |             feature_size = module.out_channels * factor
 51 |         if isinstance(module, nn.modules.linear.Linear):
 52 |             feature_size = module.out_features
 53 |     return feature_size
 54 | 
 55 | 
 56 | def extract_layers(net):
 57 |     if hasattr(net, 'features') and hasattr(net, 'feature_reduc') and hasattr(net, 'classifier'):
 58 |         return net.features, net.feature_reduc, net.classifier
 59 |     if isinstance(net, models.ResNet):
 60 |         features = [net.conv1, net.bn1, net.relu, net.maxpool]
 61 |         features.extend(net.layer1)
 62 |         features.extend(net.layer2)
 63 |         features.extend(net.layer3)
 64 |         features.extend(net.layer4)
 65 |         features = nn.Sequential(*features)
 66 |         feature_reduc = nn.Sequential(net.avgpool)
 67 |         classifier = nn.Sequential(net.fc)
 68 |     else:
 69 |         features, classifier = net.features, net.classifier
 70 |         feature_reduc = nn.Sequential()
 71 |     return features, feature_reduc, classifier
 72 | 
 73 | 
 74 | def copy_bn_params(m, base_m):
 75 |     if m.weight is not None:
 76 |         m.weight.data.copy_(base_m.weight.data)
 77 |     if m.bias is not None:
 78 |         m.bias.data.copy_(base_m.bias.data)
 79 |     m.running_mean.copy_(base_m.running_mean)
 80 |     m.running_var.copy_(base_m.running_var)
 81 | 
 82 | 
 83 | def copy_bn_all(seq, base_seq):
 84 |     for m, base_m in zip(seq, base_seq):
 85 |         if isinstance(m, nn.Sequential):
 86 |             copy_bn_all(m, base_m)
 87 |         if isinstance(m, nn.BatchNorm2d):
 88 |             copy_bn_params(m, base_m)
 89 |         if isinstance(m, models.resnet.BasicBlock):
 90 |             copy_bn_params(m.bn1, base_m.bn1)
 91 |             copy_bn_params(m.bn2, base_m.bn2)
 92 |             if m.downsample is None:
 93 |                 continue
 94 |             copy_bn_all(m.downsample, base_m.downsample)
 95 |         if isinstance(m, models.resnet.Bottleneck):
 96 |             copy_bn_params(m.bn1, base_m.bn1)
 97 |             copy_bn_params(m.bn2, base_m.bn2)
 98 |             copy_bn_params(m.bn3, base_m.bn3)
 99 |             if m.downsample is None:
100 |                 continue
101 |             copy_bn_all(m.downsample, base_m.downsample)
102 | 
103 | 
104 | def bn_new_params(bn, **kwargs):
105 |     w, b, rm, rv = bn.weight, bn.bias, bn.running_mean, bn.running_var
106 |     new_bn = nn.BatchNorm2d(bn.num_features, **kwargs)
107 |     if w and new_bn.weight:
108 |         new_bn.weight.data = w.data.clone()
109 |     if b and new_bn.bias:
110 |         new_bn.bias.data = b.data.clone()
111 |     new_bn.running_mean = rm.clone()
112 |     new_bn.running_var = rv.clone()
113 |     return new_bn
114 | 
115 | 
116 | def set_batch_norm_params(seq, **kwargs):
117 |     for name, block in seq._modules.items():
118 |         if isinstance(block, nn.Sequential):
119 |             set_batch_norm_params(block, **kwargs)
120 |         if isinstance(block, nn.BatchNorm2d):
121 |             seq._modules[name] = bn_new_params(block, **kwargs)
122 |         if isinstance(block, models.resnet.BasicBlock):
123 |             block.bn1 = bn_new_params(block.bn1, **kwargs)
124 |             block.bn2 = bn_new_params(block.bn2, **kwargs)
125 |             if block.downsample is None:
126 |                 continue
127 |             set_batch_norm_params(block.downsample, **kwargs)
128 |         if isinstance(block, models.resnet.Bottleneck):
129 |             block.bn1 = bn_new_params(block.bn1, **kwargs)
130 |             block.bn2 = bn_new_params(block.bn2, **kwargs)
131 |             block.bn3 = bn_new_params(block.bn3, **kwargs)
132 |             if block.downsample is None:
133 |                 continue
134 |             set_batch_norm_params(block.downsample, **kwargs)
135 | 
136 | 
137 | def set_batch_norm_train(seq, train):
138 |     for block in seq:
139 |         if isinstance(block, nn.Sequential):
140 |             set_batch_norm_train(block, train)
141 |         if isinstance(block, nn.BatchNorm2d):
142 |             block.train(mode=train)
143 |         if isinstance(block, models.resnet.BasicBlock):
144 |             block.bn1.train(mode=train)
145 |             block.bn2.train(mode=train)
146 |             if block.downsample is None:
147 |                 continue
148 |             set_batch_norm_train(block.downsample, train)
149 |         if isinstance(block, models.resnet.Bottleneck):
150 |             block.bn1.train(mode=train)
151 |             block.bn2.train(mode=train)
152 |             block.bn3.train(mode=train)
153 |             if block.downsample is None:
154 |                 continue
155 |             set_batch_norm_train(block.downsample, train)
156 | 
157 | 
158 | # net is assumed to have only one component containing BatchNorm modules:
159 | # net.features
160 | def set_net_train(net, train, bn_train=False):
161 |     net.train(mode=train)
162 |     if train and not bn_train:
163 |         set_batch_norm_train(net.features, False)
164 | 


--------------------------------------------------------------------------------
/model/siamese.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 52,
  6 |    "metadata": {
  7 |     "collapsed": true,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [],
 12 |    "source": [
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torchvision.models as models\n",
 16 |     "from torch.autograd import Variable"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 38,
 22 |    "metadata": {
 23 |     "collapsed": true,
 24 |     "deletable": true,
 25 |     "editable": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "class Siamese1(nn.Module):\n",
 30 |     "    \"\"\"\n",
 31 |     "        Define a siamese network\n",
 32 |     "        Given a module, it will duplicate it with weight sharing, concatenate the output and add a linear classifier \n",
 33 |     "    \"\"\"\n",
 34 |     "    def __init__(self, net):\n",
 35 |     "        super(siamese, self).__init__()\n",
 36 |     "        self.features = net\n",
 37 |     "        self.classifier = nn.Linear(net.classifier[len(net.classifier._modules)-1].out_features*2, 1)\n",
 38 |     "    \n",
 39 |     "    def forward(self, x1, x2):\n",
 40 |     "        x = torch.cat( (self.features(x1), self.features(x2)), 1)\n",
 41 |     "        x = self.classifier(x)\n",
 42 |     "        return x"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 49,
 48 |    "metadata": {
 49 |     "collapsed": true,
 50 |     "deletable": true,
 51 |     "editable": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "class Siamese2(nn.Module):\n",
 56 |     "    \"\"\"\n",
 57 |     "        Define a siamese network\n",
 58 |     "        Given a module, it will duplicate it with weight sharing, concatenate the output and add a linear classifier \n",
 59 |     "    \"\"\"\n",
 60 |     "    def __init__(self, net):\n",
 61 |     "        super(Siamese2, self).__init__()\n",
 62 |     "        self.features = net\n",
 63 |     "        \n",
 64 |     "    def forward(self, x1, x2):\n",
 65 |     "        return (self.features(x1), self.features(x2))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 39,
 71 |    "metadata": {
 72 |     "collapsed": false,
 73 |     "deletable": true,
 74 |     "editable": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def siamese():\n",
 79 |     "    return Siamese2(models.alexnet(pretrained=True))"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 53,
 85 |    "metadata": {
 86 |     "collapsed": false,
 87 |     "deletable": true,
 88 |     "editable": true
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "if __name__=='__main__':\n",
 93 |     "    t = Variable(torch.Tensor(1,3,225,225))\n",
 94 |     "    s = Siamese2(models.alexnet(pretrained=True))\n",
 95 |     "    o = s(t, t)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# autograd function to normalize an input over the rows\n",
107 |     "# (each vector of a batch is normalized)\n",
108 |     "# the backward step follows the implementation of\n",
109 |     "# torch.legacy.nn.Normalize closely\n",
110 |     "class Normalize2DL2(Function):\n",
111 |     "\n",
112 |     "    def __init__(self, eps=1e-10):\n",
113 |     "        super(Normalize2DL2, self).__init__()\n",
114 |     "        self.eps = eps\n",
115 |     "\n",
116 |     "    def forward(self, input):\n",
117 |     "        self.norm2 = input.pow(2).sum(1).add_(self.eps)\n",
118 |     "        self.norm = self.norm2.pow(0.5)\n",
119 |     "        output = input / self.norm.expand_as(input)\n",
120 |     "        self.save_for_backward(input)\n",
121 |     "        return output\n",
122 |     "\n",
123 |     "    def backward(self, grad_output):\n",
124 |     "        input = self.saved_tensors[0]\n",
125 |     "        gradInput = self.norm2.expand_as(input) * grad_output\n",
126 |     "        cross = (input * grad_output).sum(1)\n",
127 |     "        buf = input * cross.expand_as(input)\n",
128 |     "        gradInput.add_(-1, buf)\n",
129 |     "        cross = self.norm2 * self.norm\n",
130 |     "        gradInput.div_(cross.expand_as(gradInput))\n",
131 |     "        return gradInput"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": true
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "class NormalizeL2(nn.Module):\n",
143 |     "\n",
144 |     "    def __init__(self):\n",
145 |     "        super(NormalizeL2, self).__init__()\n",
146 |     "\n",
147 |     "    def forward(self, input):\n",
148 |     "        return Normalize2DL2()(input)"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {
155 |     "collapsed": true
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "def extract_layers(net):\n",
160 |     "    if isinstance(net, models.ResNet):\n",
161 |     "        features = [net.conv1, net.bn1, net.relu, net.maxpool]\n",
162 |     "        features.extend(net.layer1)\n",
163 |     "        features.extend(net.layer2)\n",
164 |     "        features.extend(net.layer3)\n",
165 |     "        features.extend(net.layer4)\n",
166 |     "        features = nn.Sequential(*features)\n",
167 |     "        feature_reduc = nn.Sequential(net.avgpool)\n",
168 |     "        classifier = nn.Sequential(net.fc)\n",
169 |     "    else:\n",
170 |     "        features, classifier = net.features, net.classifier\n",
171 |     "        feature_reduc = nn.Sequential()\n",
172 |     "    return features, feature_reduc, classifier"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "collapsed": true
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "class TuneClassif(nn.Module):\n",
184 |     "    \"\"\"\n",
185 |     "        Image classification network based on a pretrained network\n",
186 |     "        which is then finetuned to a different dataset\n",
187 |     "        It's assumed that the last layer of the given network\n",
188 |     "        is a fully connected (linear) one\n",
189 |     "        untrained_blocks specifies how many layers or blocks of layers are\n",
190 |     "        left untrained (only layers with parameters are counted). for ResNet, each 'BottleNeck' or 'BasicBlock' (block containing skip connection for residual) is considered as one block\n",
191 |     "    \"\"\"\n",
192 |     "\n",
193 |     "    def __init__(self, net, num_classes, untrained_blocks=-1):\n",
194 |     "        super(TuneClassif, self).__init__()\n",
195 |     "        features, feature_reduc, classifier = extract_layers(net)\n",
196 |     "        if untrained_blocks < 0:\n",
197 |     "            untrained_blocks = sum(1 for _ in features) + sum(1 for _ in classifier)\n",
198 |     "        self.features = features\n",
199 |     "        self.feature_reduc = feature_reduc\n",
200 |     "        self.classifier = classifier\n",
201 |     "        # make sure we never retrain the first few layers\n",
202 |     "        # this is usually not needed\n",
203 |     "        seqs = [self.features, self.feature_reduc, self.classifier]\n",
204 |     "\n",
205 |     "        def has_param(m):\n",
206 |     "            return sum(1 for _ in m.parameters()) > 0\n",
207 |     "        count = 0\n",
208 |     "        for module in (m for seq in seqs for m in seq if has_param(m)):\n",
209 |     "            if count >= untrained_blocks:\n",
210 |     "                break\n",
211 |     "            count += 1\n",
212 |     "            for p in module.parameters():\n",
213 |     "                p.requires_grad = False\n",
214 |     "\n",
215 |     "        for name, module in self.classifier._modules.items():\n",
216 |     "            if module is classifier[len(classifier._modules) - 1]:\n",
217 |     "                self.classifier._modules[name] = nn.Linear(module.in_features, num_classes)\n",
218 |     "\n",
219 |     "    def forward(self, x):\n",
220 |     "        x = self.features(x)\n",
221 |     "        x = self.feature_reduc(x)\n",
222 |     "        x = x.view(x.size(0), -1)\n",
223 |     "        x = self.classifier(x)\n",
224 |     "        return x"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {
231 |     "collapsed": true
232 |    },
233 |    "outputs": [],
234 |    "source": [
235 |     "class Siamese1(nn.Module):\n",
236 |     "    \"\"\"\n",
237 |     "        Define a siamese network\n",
238 |     "        Given a module, it will duplicate it with weight sharing, concatenate the output and add a linear classifier\n",
239 |     "    \"\"\"\n",
240 |     "    def __init__(self, net, num_classes=100, feature_dim=100, feature_size2d=(6, 6)):\n",
241 |     "        super(Siamese1, self).__init__()\n",
242 |     "        self.features = net.features\n",
243 |     "        spatial_factor = 4\n",
244 |     "        self.spatial_feature_reduc = nn.Sequential(\n",
245 |     "            nn.AvgPool2d(spatial_factor)\n",
246 |     "        )\n",
247 |     "        factor = feature_size2d[0] / spatial_factor * feature_size2d[1] / spatial_factor\n",
248 |     "        for module in self.features:\n",
249 |     "            if isinstance(module, models.resnet.Bottleneck):\n",
250 |     "                in_features = module.conv3.out_channels * factor\n",
251 |     "            if isinstance(module, models.resnet.BasicBlock):\n",
252 |     "                in_features = module.conv2.out_channels * factor\n",
253 |     "            if isinstance(module, nn.modules.Conv2d):\n",
254 |     "                in_features = module.out_channels * factor\n",
255 |     "        if feature_dim <= 0:\n",
256 |     "            for module in net.classifier:\n",
257 |     "                if isinstance(module, nn.modules.linear.Linear):\n",
258 |     "                    out_features = module.out_features\n",
259 |     "        else:\n",
260 |     "            out_features = feature_dim\n",
261 |     "        self.feature_reduc1 = nn.Sequential(\n",
262 |     "            nn.Dropout(0.5),\n",
263 |     "            NormalizeL2(),\n",
264 |     "            nn.Linear(in_features, out_features)\n",
265 |     "        )\n",
266 |     "        self.feature_reduc2 = NormalizeL2()\n",
267 |     "\n",
268 |     "    def forward_single(self, x):\n",
269 |     "        x = self.features(x)\n",
270 |     "        x = self.spatial_feature_reduc(x)\n",
271 |     "        x = x.view(x.size(0), -1)\n",
272 |     "        x = self.feature_reduc1(x)\n",
273 |     "        x = self.feature_reduc2(x)\n",
274 |     "        return x\n",
275 |     "\n",
276 |     "    def forward(self, x1, x2=None, x3=None):\n",
277 |     "        if self.training:\n",
278 |     "            return self.forward_single(x1), self.forward_single(x2), self.forward_single(x3)\n",
279 |     "        else:\n",
280 |     "            return self.forward_single(x1)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {
287 |     "collapsed": true
288 |    },
289 |    "outputs": [],
290 |    "source": [
291 |     "# metric loss according to Chopra et al \"Learning a Similarity Metric Discriminatively, with Application to Face Verification\"\n",
292 |     "# since we assume normalized vectors, we use Q=2\n",
293 |     "class MetricL(Function):\n",
294 |     "\n",
295 |     "    def __init__(self, size_average=True):\n",
296 |     "        super(MetricL, self).__init__()\n",
297 |     "        self.size_average = size_average\n",
298 |     "\n",
299 |     "    # TODO: everything could be done inplace,\n",
300 |     "    # more difficult though (for norm see torch.nn._functions.loss.Cosine...)\n",
301 |     "    def terms(self, input1, input2, y):\n",
302 |     "        diff = input1 - input2\n",
303 |     "        energy = diff.norm(1, 1)\n",
304 |     "        e = energy * 0 + np.e\n",
305 |     "        exp_term = torch.pow(e, -2.77 * energy / 2)\n",
306 |     "        return diff, energy, exp_term\n",
307 |     "\n",
308 |     "    # target takes values in 1 (good), -1 (bad) so (1-target)/2 is 0 for good pairs and 1 for bad ones, (1+target) / 2 inverse\n",
309 |     "    def forward(self, input1, input2, y):\n",
310 |     "        _, energy, exp_term = self.terms(input1, input2, y)\n",
311 |     "        loss_g = (1 + y) * energy * energy / 2\n",
312 |     "        loss_i = (1 - y) * 2 * exp_term\n",
313 |     "        loss = (loss_g + loss_i).sum(0).view(1)\n",
314 |     "        if self.size_average:\n",
315 |     "            loss.div_(y.size(0))\n",
316 |     "        self.save_for_backward(input1, input2, y)\n",
317 |     "        return loss\n",
318 |     "\n",
319 |     "    def backward(self, grad_output):\n",
320 |     "        input1, input2, y = self.saved_tensors\n",
321 |     "        diff, energy, exp_term = self.terms(input1, input2, y)\n",
322 |     "        diff[diff.lt(0)] = -1\n",
323 |     "        diff[diff.ge(0)] = 1\n",
324 |     "        y_g = (1 + y).view(-1, 1).expand_as(input1)\n",
325 |     "        y_i = (1 - y).view(-1, 1).expand_as(input1)\n",
326 |     "        energy = energy.expand_as(input1)\n",
327 |     "        exp_term = exp_term.expand_as(input1)\n",
328 |     "        grad1 = y_g * diff * energy - 2.77 * y_i * diff * exp_term\n",
329 |     "        grad2 = -grad1\n",
330 |     "        if self.size_average:\n",
331 |     "            grad1.div_(y.size(0))\n",
332 |     "            grad2.div_(y.size(0))\n",
333 |     "        if grad_output[0] != 1:\n",
334 |     "            grad1.mul_(grad_output)\n",
335 |     "            grad2.mul_(grad_output)\n",
336 |     "        return grad1, grad2, None"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {
343 |     "collapsed": true
344 |    },
345 |    "outputs": [],
346 |    "source": [
347 |     "class MetricLoss(nn.Module):\n",
348 |     "\n",
349 |     "    def __init__(self, size_average=True):\n",
350 |     "        super(MetricLoss, self).__init__()\n",
351 |     "        self.size_average = size_average\n",
352 |     "\n",
353 |     "    def forward(self, input1, input2, target):\n",
354 |     "        return MetricL(self.size_average)(input1, input2, target)\n",
355 |     "\n",
356 |     "\n",
357 |     "class TripletL(Function):\n",
358 |     "\n",
359 |     "    def __init__(self, margin, size_average=True):\n",
360 |     "        super(TripletL, self).__init__()\n",
361 |     "        self.size_average = size_average\n",
362 |     "        self.margin = margin\n",
363 |     "\n",
364 |     "    def forward(self, anchor, pos, neg):\n",
365 |     "        sqdiff = anchor.add(-1, pos).pow_(2)\n",
366 |     "        sqdiff = anchor.add(-1, neg).pow_(2)\n",
367 |     "        loss = sqdiff.sum(1)\n",
368 |     "        loss.add_(-1, sqdiff.sum(1))\n",
369 |     "        loss.add_(self.margin)\n",
370 |     "        self.clamp = torch.lt(loss, 0)\n",
371 |     "        loss[self.clamp] = 0\n",
372 |     "        loss = loss.sum(0).view(1)\n",
373 |     "        if self.size_average:\n",
374 |     "            loss.div_(anchor.size(0))\n",
375 |     "        self.save_for_backward(anchor, pos, neg)\n",
376 |     "        return loss\n",
377 |     "\n",
378 |     "    def backward(self, grad_output):\n",
379 |     "        # grad_pos = -2(x_anchor - x_pos)\n",
380 |     "        # grad_neg = 2(x_anchor - x_neg)\n",
381 |     "        # grad_anchor = 2(x_anchor - x_pos) - 2(x_anchor - x_neg)\n",
382 |     "        # = -(grad_pos + grad_neg)\n",
383 |     "        anchor, pos, neg = self.saved_tensors\n",
384 |     "        c = self.clamp.expand_as(anchor)\n",
385 |     "        anchor[c] = 0\n",
386 |     "        pos[c] = 0\n",
387 |     "        neg[c] = 0\n",
388 |     "        anchor_sum = anchor.sum(0)\n",
389 |     "        grad_pos = anchor_sum.add(-1, pos.sum(0)).mul_(-2)\n",
390 |     "        grad_neg = anchor_sum.add_(-1, neg.sum(0)).mul_(2)\n",
391 |     "        grad_anchor = grad_pos.add(grad_neg).mul_(-1)\n",
392 |     "\n",
393 |     "        if self.size_average:\n",
394 |     "            grad_anchor.div_(anchor.size(0))\n",
395 |     "            grad_pos.div_(anchor.size(0))\n",
396 |     "            grad_neg.div_(anchor.size(0))\n",
397 |     "        if grad_output[0] != 1:\n",
398 |     "            grad_anchor = grad_anchor.mul_(grad_output)\n",
399 |     "            grad_pos = grad_pos.mul_(grad_output)\n",
400 |     "            grad_neg = grad_neg.mul_(grad_output)\n",
401 |     "        grad_anchor = grad_anchor.expand_as(anchor)\n",
402 |     "        grad_pos = grad_pos.expand_as(anchor)\n",
403 |     "        grad_neg = grad_neg.expand_as(anchor)\n",
404 |     "        return grad_anchor, grad_pos, grad_neg\n",
405 |     "\n",
406 |     "\n",
407 |     "class TripletLoss(nn.Module):\n",
408 |     "\n",
409 |     "    def __init__(self, margin, size_average=True):\n",
410 |     "        super(TripletLoss, self).__init__()\n",
411 |     "        self.size_average = size_average\n",
412 |     "        self.margin = margin\n",
413 |     "\n",
414 |     "    def forward(self, anchor, pos, neg):\n",
415 |     "        return TripletL(self.margin, self.size_average)(anchor, pos, neg)"
416 |    ]
417 |   }
418 |  ],
419 |  "metadata": {
420 |   "kernelspec": {
421 |    "display_name": "Python 2",
422 |    "language": "python",
423 |    "name": "python2"
424 |   },
425 |   "language_info": {
426 |    "codemirror_mode": {
427 |     "name": "ipython",
428 |     "version": 2
429 |    },
430 |    "file_extension": ".py",
431 |    "mimetype": "text/x-python",
432 |    "name": "python",
433 |    "nbconvert_exporter": "python",
434 |    "pygments_lexer": "ipython2",
435 |    "version": "2.7.9"
436 |   }
437 |  },
438 |  "nbformat": 4,
439 |  "nbformat_minor": 2
440 | }
441 | 


--------------------------------------------------------------------------------
/model/siamese.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | from nn_utils import *
  7 | from custom_modules import *
  8 | 
  9 | 
 10 | class TuneClassif(nn.Module):
 11 |     """
 12 |         Image classification network based on a pretrained network
 13 |         which is then finetuned to a different dataset
 14 |         It's assumed that the last layer of the given network
 15 |         is a fully connected (linear) one
 16 |         untrained specifies how many layers or blocks of layers are
 17 |         left untrained (only layers with parameters are counted). for ResNet, each 'BottleNeck' or 'BasicBlock' (block containing skip connection for residual) is considered as one block
 18 |     """
 19 | 
 20 |     def __init__(self, net, num_classes, untrained=-1, reduc=True):
 21 |         super(TuneClassif, self).__init__()
 22 |         self.features, self.feature_reduc, self.classifier = extract_layers(net)
 23 |         # make sure we never retrain the first few layers
 24 |         # this is usually not needed
 25 |         set_untrained_blocks([self.features, self.classifier], untrained)
 26 | 
 27 |         # replace last module of classifier with a reduced one
 28 |         last_module = self.classifier[len(self.classifier._modules) - 1]
 29 |         if not isinstance(last_module, nn.Linear) or last_module.out_features != num_classes:
 30 |             for name, module in self.classifier._modules.items():
 31 |                 if module is last_module:
 32 |                     self.classifier._modules[name] = nn.Linear(module.in_features, num_classes)
 33 | 
 34 |         self.feature_size = num_classes
 35 |         # if no reduc is wanted, remove it
 36 |         if not reduc:
 37 |             factor = 1
 38 |             for m in self.feature_reduc:
 39 |                 try:
 40 |                     factor *= (m.kernel_size[0] * m.kernel_size[1])
 41 |                 except TypeError:
 42 |                     factor *= m.kernel_size * m.kernel_size
 43 |             # increase the number of input features on first classifier module
 44 |             for name, module in self.classifier._modules.items():
 45 |                 if module is self.classifier[0]:
 46 |                     self.classifier._modules[name] = nn.Linear(module.in_features * factor, module.out_features)
 47 |             self.feature_reduc = nn.Sequential()
 48 | 
 49 |     def forward(self, x):
 50 |         x = self.features(x)
 51 |         x = self.feature_reduc(x)
 52 |         x = x.view(x.size(0), -1)
 53 |         x = self.classifier(x)
 54 |         return x
 55 | 
 56 | 
 57 | class TuneClassifSub(TuneClassif):
 58 |     """
 59 |         Image classification network based on a pretrained network
 60 |         which is then finetuned to a different dataset, as above
 61 |         Here, all sub-parts of the image are classified by
 62 |         convolutionalizing the linear classification layers
 63 |     """
 64 |     def __init__(self, net, num_classes, feature_size2d, untrained=-1):
 65 |         super(TuneClassifSub, self).__init__(net, num_classes, untrained, reduc=True)
 66 |         reduc_count = sum(1 for _ in self.feature_reduc)
 67 |         if reduc_count > 0:
 68 |             # in a ResNet, apply stride 1 feature size avg pool reduction
 69 |             self.feature_reduc = nn.Sequential(
 70 |                 nn.AvgPool2d(feature_size2d, stride=1)
 71 |             )
 72 |         # convolutionalize the linear layers in classifier
 73 |         count = 0
 74 |         for name, module in self.classifier._modules.items():
 75 |             if isinstance(module, nn.modules.linear.Linear):
 76 |                 size2d = feature_size2d
 77 |                 if reduc_count > 0 or count > 0:
 78 |                     size2d = (1, 1)
 79 |                 self.classifier._modules[name] = convolutionalize(module, size2d)
 80 |                 count += 1
 81 | 
 82 |     def forward_single(self, x):
 83 |         x = self.features(x)
 84 |         x = self.feature_reduc(x)
 85 |         x = self.classifier(x)
 86 |         return x
 87 | 
 88 |     def forward(self, *scales):
 89 |         return [self.forward_single(x) for x in scales]
 90 | 
 91 | 
 92 | class DescriptorNet(nn.Module):
 93 |     """
 94 |         Define a siamese network
 95 |         Given a network, obtain its features, then apply spatial reduction
 96 |         (optional) and a norm, shift+linear, norm reduction to obtain a
 97 |         descriptor.
 98 |         TODO description
 99 |     """
100 |     def __init__(self, net, feature_dim, feature_size2d, untrained=-1):
101 |         super(DescriptorNet, self).__init__()
102 |         self.features, _, classifier = extract_layers(net)
103 |         set_untrained_blocks([self.features], untrained)
104 |         factor = feature_size2d[0] * feature_size2d[1]
105 |         in_features = get_feature_size(self.features, factor)
106 |         if feature_dim <= 0:
107 |             self.feature_size = get_feature_size(classifier)
108 |         else:
109 |             self.feature_size = feature_dim
110 |         self.feature_reduc1 = nn.Sequential(
111 |             NormalizeL2(),
112 |             Shift(in_features),
113 |             nn.Linear(in_features, self.feature_size)
114 |         )
115 |         self.feature_reduc2 = NormalizeL2()
116 | 
117 |     def forward_single(self, x):
118 |         x = self.features(x)
119 |         x = x.view(x.size(0), -1)
120 |         x = self.feature_reduc1(x)
121 |         x = self.feature_reduc2(x)
122 |         return x
123 | 
124 |     def forward(self, x1, x2=None, x3=None):
125 |         if self.training and x3 is not None:
126 |             return self.forward_single(x1), self.forward_single(x2), self.forward_single(x3)
127 |         elif self.training:
128 |             return self.forward_single(x1), self.forward_single(x2)
129 |         else:
130 |             return self.forward_single(x1)
131 | 
132 | 
133 | class RegionDescriptorNet(nn.Module):
134 |     """
135 |         Define a siamese network
136 |         Given a network, obtain its features and apply spatial reduction
137 |         (optional). The feature maps can have any size here, so we apply
138 |         a classifier (obtained from the given network) to all locations
139 |         in the feature map. Finally, we sum the features in those regions
140 |         obtaining the highest classification values and apply normalization,
141 |         shifting, linear, normalization to obtain a global descriptor.
142 |         In order to allow training for both the descriptor and the classifier,
143 |         the classification values are output as well as the descriptor
144 |         for all input images.
145 | 
146 |         Use the k highest values from the classifier to obtain descriptor
147 |     """
148 |     def __init__(self, net, k, feature_dim, feature_size2d, untrained=-1):
149 |         super(RegionDescriptorNet, self).__init__()
150 |         self.k = k
151 |         self.feature_size2d = feature_size2d
152 |         self.features, self.feature_reduc, self.classifier = extract_layers(net)
153 | 
154 |         # factor = 1
155 |         factor = feature_size2d[0] * feature_size2d[1]
156 |         in_features = get_feature_size(self.features, factor)
157 |         if feature_dim <= 0:
158 |             self.feature_size = get_feature_size(classifier)
159 |         else:
160 |             self.feature_size = feature_dim
161 |         reduc_count = sum(1 for _ in self.feature_reduc)
162 |         if reduc_count > 0:
163 |             # we are a ResNet or similar, apply feature_size AvgPool stride 1
164 |             self.feature_reduc = nn.Sequential(
165 |                 nn.AvgPool2d(feature_size2d, stride=1)
166 |             )
167 |         # convolutionalize the linear layers in classifier
168 |         count = 0
169 |         for name, module in self.classifier._modules.items():
170 |             if isinstance(module, nn.modules.linear.Linear):
171 |                 size2d = feature_size2d
172 |                 if reduc_count > 0 or count > 0:
173 |                     size2d = (1, 1)
174 |                 self.classifier._modules[name] = convolutionalize(module, size2d)
175 |                 count += 1
176 |         set_untrained_blocks([self.features, self.classifier], untrained)
177 |         self.feature_reduc1 = nn.Sequential(
178 |             NormalizeL2(),
179 |             Shift(in_features),
180 |             nn.Linear(in_features, self.feature_size)
181 |         )
182 |         self.feature_reduc2 = NormalizeL2()
183 | 
184 |     # this can only be done using a single input (batch size: 1) TODO
185 |     def forward_single(self, x):
186 |         x = self.features(x)
187 |         c = self.feature_reduc(x)
188 |         c = self.classifier(c)
189 |         # get maximal classification values and choose indexes with
190 |         # highest maximal classification
191 |         c_maxv, _ = c.max(1)
192 |         c_maxv = c_maxv.view(-1)
193 |         k = min(c_maxv.size(0), self.k)
194 |         _, flat_idx = c_maxv.topk(k)
195 | 
196 |         # transform flat classification indexes to feature indexes
197 |         # first, flat index -> 2d classification index, then add
198 |         # feature size to obtain the region in feature map
199 |         def feature_idx(flat_idx):
200 |             cls_idx = flat_idx // c.size(3), flat_idx % c.size(3)
201 |             return (cls_idx[0], cls_idx[0] + self.feature_size2d[0],
202 |                     cls_idx[1], cls_idx[1] + self.feature_size2d[1])
203 |         top_idx = [feature_idx(int(i)) for i in flat_idx.data]
204 |         # needed for output
205 |         tmp = c_maxv.data.clone().resize_(c.size(0), self.feature_size)
206 |         acc = Variable(tmp.fill_(0))
207 |         tmp = c_maxv.data.clone().resize_(c.size(0), c.size(1), self.k)
208 |         cls_out = Variable(tmp.fill_(0))
209 | 
210 |         # for all top maximal classification indexes, output the actual
211 |         # classification values at those indexes
212 |         # for the descriptor, use the feature indexes and then reduce
213 |         # accumulate regional descriptors using addition
214 |         i = 0
215 |         for x1, x2, y1, y2 in top_idx:
216 |             cls_out[:, :, i] = c[:, :, x1, y1]
217 |             i += 1
218 |             region = x[:, :, x1:x2, y1:y2].contiguous().view(x.size(0), -1)
219 |             region = self.feature_reduc1(region)
220 |             acc = acc + region
221 |         # finally, perform final reduction (normalization)
222 |         x = self.feature_reduc2(acc)
223 |         return x, cls_out
224 | 
225 |     def forward(self, x1, x2=None, x3=None):
226 |         if self.training and x3 is not None:
227 |             return self.forward_single(x1), self.forward_single(x2), self.forward_single(x3)
228 |         elif self.training:
229 |             return self.forward_single(x1), self.forward_single(x2)
230 |         else:
231 |             return self.forward_single(x1)[0]
232 | 


--------------------------------------------------------------------------------
/pre_process_dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | from os import path
 6 | from utils import get_images_labels, match_label_fou_clean2, match_label_video
 7 | 
 8 | # resize all images of a dataset and place them into a new folder
 9 | 
10 | # path to folders containing train and test images
11 | dataset = '/home/mrim/data/collection/GUIMUTEIC/FOURVIERE_CLEAN2/TRAIN_I'
12 | dataset_test = '/home/mrim/data/collection/GUIMUTEIC/FOURVIERE_CLEAN2/TEST_I'
13 | # function to match the labels in image names
14 | match_labels = match_label_fou_clean2
15 | # paths where the resized images are placed
16 | out_path = './data/pre_proc/fourviere_clean2_448'
17 | out_path_test = './data/pre_proc/fourviere_clean2_448/test'
18 | 
19 | # training and test sets (scaled to 300 on the small side)
20 | dataSetFull = get_images_labels(dataset, match_labels)
21 | testSetFull = get_images_labels(dataset_test, match_labels)
22 | 
23 | 
24 | # resize function
25 | def resize(dataset, out_path, max_ar, newsize1, newsize2=None):
26 |     for im, lab in dataset:
27 |         im_o = cv2.imread(im)
28 |         h, w, _ = im_o.shape
29 |         if max_ar >= 1. and ((h > w and float(h) / w > max_ar) or (h < w and float(w) / h > max_ar)):
30 |             # force a max aspect ratio of max_ar by padding image with random uniform noise
31 |             def pad_rand(vector, pad_width, iaxis, kwargs):
32 |                 if pad_width[0] > 0:
33 |                     vector[:pad_width[0]] = np.random.randint(256, size=pad_width[0])
34 |                 if pad_width[1] > 0:
35 |                     vector[-pad_width[1]:] = np.random.randint(256, size=pad_width[1])
36 |                 return vector
37 |             if h > w:
38 |                 ow = int(np.ceil(float(h) / max_ar))
39 |                 w_pad = (ow - w) // 2
40 |                 w_mod = (ow - w) % 2
41 |                 im_o = np.pad(im_o, ((0, 0), (w_pad + w_mod, w_pad), (0, 0)), pad_rand)
42 |             else:
43 |                 oh = int(np.ceil(float(w) / max_ar))
44 |                 h_pad = (oh - h) // 2
45 |                 h_mod = (oh - h) % 2
46 |                 im_o = np.pad(im_o, ((h_pad + h_mod, h_pad), (0, 0), (0, 0)), pad_rand)
47 |         h, w, _ = im_o.shape
48 |         if newsize2 is None:
49 |             if (w <= h and w == newsize1) or (h <= w and h == newsize1):
50 |                 ow, oh = w, h
51 |             elif (w < h):
52 |                 ow, oh = newsize1, int(round(float(newsize1 * h) / w))
53 |             else:
54 |                 ow, oh = int(round(float(newsize1 * w) / h)), newsize1
55 |         else:
56 |             ow, oh = newsize1, newsize2
57 |         if ow == w and oh == h:
58 |             im_out = im_o
59 |         else:
60 |             im_out = cv2.resize(im_o, (ow, oh), interpolation=cv2.INTER_CUBIC)
61 |         out_p = path.join(out_path, im.split('/')[-1])
62 |         print('/'.join(im.split('/')[-3:]), '->', '/'.join(out_p.split('/')[-3:]))
63 |         cv2.imwrite(out_p, im_out)
64 | 
65 | 
66 | resize(dataSetFull, out_path, 2.0, 448)
67 | resize(testSetFull, out_path_test, 2.0, 448)
68 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maxgreat/Instance-Search/2cea5f64a2d397047072a91788af81c0ea1c6d5e/test/__init__.py


--------------------------------------------------------------------------------
/test/classif_finetune_test.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import sys
  5 | import getopt
  6 | import torchvision.transforms as transforms
  7 | from model.nn_utils import set_net_train
  8 | from utils import *
  9 | from train.classif_finetune import P, labels, test_classif_net
 10 | from train.global_p import *
 11 | from train.classif_finetune import get_embeddings, get_class_net
 12 | from instance_avg import instance_avg
 13 | 
 14 | 
 15 | def usage():
 16 |     print('Usage: ' + sys.argv[0] + ' [options]')
 17 |     prefix = 'Options:\n\tRequired:\n'
 18 |     o1 = ('--dataset=\t<path>\tThe path to the dataset containing all ' +
 19 |           'reference images. It should contain a sub-folder "test" ' +
 20 |           'containing all test images\n')
 21 |     o2 = ('--model=\t<name>\tEither AlexNet or ResNet152 to specify the ' +
 22 |           'type of model.\n')
 23 |     o3 = ('--weights=\t<file>\tThe filename containing weights of a ' +
 24 |           'network trained for sub-region classification.\n')
 25 |     o4 = ('--device=\t<int>\tThe GPU device used for testing. ' +
 26 |           'If negative, CPU is used.\n')
 27 |     o5 = ('--classify=\t<bool>\tTrue/yes/y/1 if the classification ' +
 28 |           'feature should be tested. Otherwise, convolutional features ' +
 29 |           'are tested.\n')
 30 |     o6 = ('--batch=\t<int>\tThe batch size to use.\n')
 31 |     o7 = ('--dba=\t<int>\tUse DBA with given k. If k = 0, do not use DBA. ' +
 32 |           'If k<0, use all neighbors within the same instance.\n')
 33 |     o8 = '--help\t\tShow this help\n'
 34 |     print(prefix + o1 + o2 + o3 + o4 + o5 + o6 + o7 + o8)
 35 | 
 36 | 
 37 | def main(dataset_full, model, weights, device, classify, batch_size, dba):
 38 |     # training and test sets
 39 |     dataset_id = parse_dataset_id(dataset_full)
 40 |     match_labels = match_label_functions[dataset_id]
 41 |     train_set_full = get_images_labels(dataset_full, match_labels)
 42 |     test_set_full = get_images_labels(dataset_full + '/test', match_labels)
 43 | 
 44 |     labels_list = [t[1] for t in train_set_full]
 45 |     # setup global params so that testing functions work properly
 46 |     labels.extend(sorted(list(set(labels_list))))
 47 |     P.test_pre_proc = True  # we always pre process images
 48 |     P.cuda_device = device
 49 |     P.image_input_size = image_sizes[dataset_id]
 50 |     P.test_batch_size = batch_size
 51 |     P.preload_net = weights
 52 |     P.cnn_model = model
 53 |     P.feature_size2d = feature_sizes[model, image_sizes[dataset_id]]
 54 |     P.embeddings_classify = classify
 55 |     out_size = len(labels) if classify else flat_feature_sizes[model, P.image_input_size]
 56 |     P.feature_dim = out_size
 57 | 
 58 |     print('Loading and transforming train/test sets.')
 59 | 
 60 |     # open the images (and transform already if possible)
 61 |     # do that only if it fits in memory !
 62 |     m, s = read_mean_std(mean_std_files[dataset_id])
 63 |     test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 64 |     test_set, test_train_set = [], []
 65 |     for im, lab in train_set_full:
 66 |         im_o = imread_rgb(im)
 67 |         test_train_set.append((test_trans(im_o), lab, im))
 68 | 
 69 |     for im, lab in test_set_full:
 70 |         if lab not in labels:
 71 |             continue
 72 |         im_o = imread_rgb(im)
 73 |         test_set.append((test_trans(im_o), lab, im))
 74 | 
 75 |     print('Testing network on dataset with ID {0}'.format(dataset_id))
 76 |     class_net = get_class_net()
 77 |     set_net_train(class_net, False)
 78 |     c, t = test_classif_net(class_net, test_set)
 79 |     print('Classification (TEST): {0} / {1} - acc: {2:.4f}'.format(c, t, float(c) / t))
 80 |     test_embeddings = get_embeddings(class_net, test_set, device, out_size)
 81 |     ref_embeddings = get_embeddings(class_net, test_train_set, device, out_size)
 82 |     sim = torch.mm(test_embeddings, ref_embeddings.t())
 83 |     prec1, c, t, _, _ = precision1(sim, test_set, test_train_set)
 84 |     mAP = mean_avg_precision(sim, test_set, test_train_set)
 85 |     print('Descriptor (TEST): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP))
 86 |     if dba == 0:
 87 |         return
 88 |     print('Testing using instance feature augmentation')
 89 |     dba_embeddings, dba_set = instance_avg(device, ref_embeddings,
 90 |                                            test_train_set, labels, dba)
 91 |     sim = torch.mm(test_embeddings, dba_embeddings.t())
 92 |     prec1, c, t, _, _ = precision1(sim, test_set, dba_set)
 93 |     mAP = mean_avg_precision(sim, test_set, dba_set)
 94 |     print('Descriptor (TEST DBA k={4}): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP, dba))
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     options_l = (['help', 'dataset=', 'model=', 'weights=', 'device=',
 99 |                  'classify=', 'batch=', 'dba='])
100 |     try:
101 |         opts, args = getopt.getopt(sys.argv[1:], '', options_l)
102 |     except getopt.GetoptError:
103 |         usage()
104 |         sys.exit(2)
105 |     dataset_full, model, weights, device = None, None, None, None
106 |     classify, batch_size, dba = None, None, -1
107 |     for opt, arg in opts:
108 |         if opt in ('--help'):
109 |             usage()
110 |             sys.exit()
111 |         elif opt in ('--dataset'):
112 |             dataset_full = check_folder(arg, 'dataset', True, usage)
113 |         elif opt in ('--model'):
114 |             model = check_model(arg, usage)
115 |         elif opt in ('--weights'):
116 |             weights = check_file(arg, 'initialization weights', True, usage)
117 |         elif opt in ('--device'):
118 |             device = check_int(arg, 'device', usage)
119 |         elif opt in ('--classify'):
120 |             classify = check_bool(arg, 'classify', usage)
121 |         elif opt in ('--batch'):
122 |             batch_size = check_int(arg, 'batch', usage)
123 |         elif opt in ('--dba'):
124 |             dba = check_int(arg, 'dba', usage)
125 |     if (dataset_full is None or model is None or
126 |             device is None or classify is None or batch_size is None):
127 |         print('One or more required arguments is missing.')
128 |         usage()
129 |         sys.exit(2)
130 | 
131 |     with torch.cuda.device(device):
132 |         try:
133 |             main(dataset_full, model, weights, device, classify,
134 |                  batch_size, dba)
135 |         except:
136 |             log_detail(P, None, traceback.format_exc())
137 |             raise
138 | 


--------------------------------------------------------------------------------
/test/classif_regions_test.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import sys
  5 | import getopt
  6 | import torchvision.transforms as transforms
  7 | from model.nn_utils import set_net_train
  8 | from utils import *
  9 | from train.classif_regions import P, labels, test_classif_net
 10 | from train.classif_regions import get_embeddings, get_class_net
 11 | from instance_avg import instance_avg
 12 | 
 13 | 
 14 | def usage():
 15 |     print('Usage: ' + sys.argv[0] + ' [options]')
 16 |     prefix = 'Options:\n\tRequired:\n'
 17 |     o1 = ('--dataset=\t<path>\tThe path to the dataset containing all ' +
 18 |           'reference images. It should contain a sub-folder "test" ' +
 19 |           'containing all test images\n')
 20 |     o2 = ('--model=\t<name>\tEither AlexNet or ResNet152 to specify the ' +
 21 |           'type of model.\n')
 22 |     o3 = ('--weights=\t<file>\tThe filename containing weights of a ' +
 23 |           'network trained for sub-region classification.\n')
 24 |     o4 = ('--device=\t<int>\tThe GPU device used for testing. ' +
 25 |           'If negative, CPU is used.\n')
 26 |     o5 = ('--dba=\t<int>\tUse DBA with given k. If k = 0, do not use DBA. ' +
 27 |           'If k<0, use all neighbors within the same instance.\n')
 28 |     o6 = '--help\t\tShow this help\n'
 29 |     print(prefix + o1 + o2 + o3 + o4 + o5 + o6)
 30 | 
 31 | 
 32 | def main(dataset_full, model, weights, device, dba):
 33 |     # training and test sets
 34 |     dataset_id = parse_dataset_id(dataset_full)
 35 |     match_labels = match_label_functions[dataset_id]
 36 |     train_set_full = get_images_labels(dataset_full, match_labels)
 37 |     test_set_full = get_images_labels(dataset_full + '/test', match_labels)
 38 | 
 39 |     labels_list = [t[1] for t in train_set_full]
 40 |     # setup global params so that testing functions work properly
 41 |     labels.extend(sorted(list(set(labels_list))))
 42 |     P.test_pre_proc = True  # we always pre process images
 43 |     P.cuda_device = device
 44 |     P.preload_net = weights
 45 |     P.cnn_model = model
 46 |     P.feature_size2d = feature_sizes[model, image_sizes[dataset_id]]
 47 |     P.bn_model = ''  # only useful for training
 48 | 
 49 |     print('Loading and transforming train/test sets.')
 50 | 
 51 |     # open the images (and transform already if possible)
 52 |     # do that only if it fits in memory !
 53 |     m, s = read_mean_std(mean_std_files[dataset_id])
 54 |     test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 55 |     test_set, test_train_set = [], []
 56 |     for im, lab in train_set_full:
 57 |         im_o = imread_rgb(im)
 58 |         test_train_set.append((test_trans(im_o), lab, im))
 59 | 
 60 |     for im, lab in test_set_full:
 61 |         if lab not in labels:
 62 |             continue
 63 |         im_o = imread_rgb(im)
 64 |         test_set.append((test_trans(im_o), lab, im))
 65 | 
 66 |     print('Testing network on dataset with ID {0}'.format(dataset_id))
 67 |     class_net = get_class_net()
 68 |     set_net_train(class_net, False)
 69 |     c, t = test_classif_net(class_net, test_set)
 70 |     print('Classification (TEST): {0} / {1} - acc: {2:.4f}'.format(c, t, float(c) / t))
 71 |     test_embeddings = get_embeddings(class_net, test_set, device, len(labels))
 72 |     ref_embeddings = get_embeddings(class_net, test_train_set, device, len(labels))
 73 |     sim = torch.mm(test_embeddings, ref_embeddings.t())
 74 |     prec1, c, t, _, _ = precision1(sim, test_set, test_train_set)
 75 |     mAP = mean_avg_precision(sim, test_set, test_train_set)
 76 |     print('Descriptor (TEST): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP))
 77 |     if dba == 0:
 78 |         return
 79 |     print('Testing using instance feature augmentation')
 80 |     dba_embeddings, dba_set = instance_avg(device, ref_embeddings,
 81 |                                            test_train_set, labels, dba)
 82 |     sim = torch.mm(test_embeddings, dba_embeddings.t())
 83 |     prec1, c, t, _, _ = precision1(sim, test_set, dba_set)
 84 |     mAP = mean_avg_precision(sim, test_set, dba_set)
 85 |     print('Descriptor (TEST DBA k={4}): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP, dba))
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     options_l = (['help', 'dataset=', 'model=', 'weights=', 'device=', 'dba='])
 90 |     try:
 91 |         opts, args = getopt.getopt(sys.argv[1:], '', options_l)
 92 |     except getopt.GetoptError:
 93 |         usage()
 94 |         sys.exit(2)
 95 |     dataset_full, model, weights, device, dba = None, None, None, None, -1
 96 |     for opt, arg in opts:
 97 |         if opt in ('--help'):
 98 |             usage()
 99 |             sys.exit()
100 |         elif opt in ('--dataset'):
101 |             dataset_full = check_folder(arg, 'dataset', True, usage)
102 |         elif opt in ('--model'):
103 |             model = check_model(arg, usage)
104 |         elif opt in ('--weights'):
105 |             weights = check_file(arg, 'initialization weights', True, usage)
106 |         elif opt in ('--device'):
107 |             device = check_int(arg, 'device', usage)
108 |         elif opt in ('--dba'):
109 |             dba = check_int(arg, 'dba', usage)
110 |     if (dataset_full is None or model is None or
111 |             weights is None or device is None):
112 |         print('One or more required arguments is missing.')
113 |         usage()
114 |         sys.exit(2)
115 | 
116 |     with torch.cuda.device(device):
117 |         try:
118 |             main(dataset_full, model, weights, device, dba)
119 |         except:
120 |             log_detail(P, None, traceback.format_exc())
121 |             raise
122 | 


--------------------------------------------------------------------------------
/test/instance_avg.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | import torch
 4 | from utils import get_lab_indicators
 5 | 
 6 | 
 7 | def instance_avg(device, embeddings, dataset, labels, k=-1):
 8 |     # create new embeddings for the dataset, where each embedding
 9 |     # is replaced with a weighted sum of the k nearest neighbors
10 |     # within its instance. if k is negative, all neighbors of the instance
11 |     # are used
12 |     sim = torch.mm(embeddings, embeddings.t())
13 |     # for each embedding, set the similarities to embeddings of different
14 |     # labels to -2, plus to itself, so the maximal similarities are always
15 |     # neighbors of the same instance
16 |     lab_ind = get_lab_indicators(dataset, device)
17 |     new_embeddings = embeddings.clone()
18 |     for i, (_, lab, _) in enumerate(dataset):
19 |         num_neighbors = lab_ind[lab].sum() - 1
20 |         if k >= 0 and k < num_neighbors:
21 |             num_neighbors = k
22 |         if num_neighbors <= 0:
23 |             new_embeddings[i] = embeddings[i]
24 |             continue
25 |         sim[i, i] = -2
26 |         sim[i][1 - lab_ind[lab]] = -2
27 |         _, best_neighbors = torch.sort(sim[i], dim=0, descending=True)
28 |         agg_embedding = embeddings[i].clone()
29 |         for j in range(num_neighbors):
30 |             weight = (num_neighbors - j) / float(num_neighbors + 1)
31 |             agg_embedding += embeddings[best_neighbors[j]] * weight
32 |         new_embeddings[i] = agg_embedding / (agg_embedding.norm() + 1e-10)
33 |     return new_embeddings, dataset
34 | 
35 | 
36 | # a method of simply averaging the descriptors for each instance
37 | # this is less useful as it may pull outliers into the average
38 | # def instance_avg(device, embeddings, dataset, labels):
39 | #     # create a fictional dataset with one entry per label, with
40 | #     # its embeddings as the average of all descriptors of each label
41 | #     fictional_set = [(None, lab, None) for lab in labels]
42 | #     new_embeddings = tensor(device, len(labels), embeddings.size(1))
43 | #     avg = {lab: tensor(device, embeddings.size(1)).fill_(0)
44 | #            for lab in labels}
45 | #     for embedding, (_, lab, _) in zip(embeddings, dataset):
46 | #         avg[lab] += embedding  # no need to average since we normalize
47 | #     for i, lab in enumerate(labels):
48 | #         new_embeddings[i] = avg[lab] / (avg[lab].norm() + 1e-10)
49 | #     return new_embeddings, fictional_set
50 | 


--------------------------------------------------------------------------------
/test/siamese_descriptor_test.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import sys
  5 | import getopt
  6 | import torchvision.transforms as transforms
  7 | from model.nn_utils import set_net_train
  8 | from utils import *
  9 | from train.siamese_descriptor import P, labels
 10 | from train.siamese_descriptor import get_embeddings, get_siamese_net
 11 | from instance_avg import instance_avg
 12 | 
 13 | 
 14 | def usage():
 15 |     print('Usage: ' + sys.argv[0] + ' [options]')
 16 |     prefix = 'Options:\n\tRequired:\n'
 17 |     o1 = ('--dataset=\t<path>\tThe path to the dataset containing all ' +
 18 |           'reference images. It should contain a sub-folder "test" ' +
 19 |           'containing all test images\n')
 20 |     o2 = ('--model=\t<name>\tEither AlexNet or ResNet152 to specify the ' +
 21 |           'type of model.\n')
 22 |     o3 = ('--weights=\t<file>\tThe filename containing weights of a ' +
 23 |           'network trained for sub-region classification.\n')
 24 |     o4 = ('--device=\t<int>\tThe GPU device used for testing. ' +
 25 |           'If negative, CPU is used.\n')
 26 |     o5 = ('--feature-dim=\t<int>\tThe feature dimensionality of the network.\n')
 27 |     o6 = ('--batch=\t<int>\tThe batch size to use.\n')
 28 |     o7 = ('--dba=\t<int>\tUse DBA with given k. If k = 0, do not use DBA. ' +
 29 |           'If k<0, use all neighbors within the same instance.\n')
 30 |     o8 = '--help\t\tShow this help\n'
 31 |     print(prefix + o1 + o2 + o3 + o4 + o5 + o6 + o7 + o8)
 32 | 
 33 | 
 34 | def main(dataset_full, model, weights, device, feature_dim, batch_size, dba):
 35 |     # training and test sets
 36 |     dataset_id = parse_dataset_id(dataset_full)
 37 |     match_labels = match_label_functions[dataset_id]
 38 |     train_set_full = get_images_labels(dataset_full, match_labels)
 39 |     test_set_full = get_images_labels(dataset_full + '/test', match_labels)
 40 | 
 41 |     labels_list = [t[1] for t in train_set_full]
 42 |     # setup global params so that testing functions work properly
 43 |     labels.extend(sorted(list(set(labels_list))))
 44 |     P.num_classes = len(labels)
 45 |     P.test_pre_proc = True  # we always pre process images
 46 |     P.cuda_device = device
 47 |     P.image_input_size = image_sizes[dataset_id]
 48 |     P.preload_net = weights
 49 |     P.cnn_model = model
 50 |     P.feature_size2d = feature_sizes[model, image_sizes[dataset_id]]
 51 |     P.classif_model = ''  # only useful for training
 52 |     P.feature_dim = feature_dim
 53 |     P.test_batch_size = batch_size
 54 | 
 55 |     print('Loading and transforming train/test sets.')
 56 | 
 57 |     # open the images (and transform already if possible)
 58 |     # do that only if it fits in memory !
 59 |     m, s = read_mean_std(mean_std_files[dataset_id])
 60 |     test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 61 |     test_set, test_train_set = [], []
 62 |     for im, lab in train_set_full:
 63 |         im_o = imread_rgb(im)
 64 |         test_train_set.append((test_trans(im_o), lab, im))
 65 | 
 66 |     for im, lab in test_set_full:
 67 |         if lab not in labels:
 68 |             continue
 69 |         im_o = imread_rgb(im)
 70 |         test_set.append((test_trans(im_o), lab, im))
 71 | 
 72 |     print('Testing network on dataset with ID {0}'.format(dataset_id))
 73 |     net = get_siamese_net()
 74 |     set_net_train(net, False)
 75 |     test_embeddings = get_embeddings(net, test_set, device, net.feature_size)
 76 |     ref_embeddings = get_embeddings(net, test_train_set, device, net.feature_size)
 77 |     sim = torch.mm(test_embeddings, ref_embeddings.t())
 78 |     prec1, c, t, _, _ = precision1(sim, test_set, test_train_set)
 79 |     mAP = mean_avg_precision(sim, test_set, test_train_set)
 80 |     print('Descriptor (TEST): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP))
 81 |     if dba == 0:
 82 |         return
 83 |     print('Testing using instance feature augmentation')
 84 |     dba_embeddings, dba_set = instance_avg(device, ref_embeddings,
 85 |                                            test_train_set, labels, dba)
 86 |     sim = torch.mm(test_embeddings, dba_embeddings.t())
 87 |     prec1, c, t, _, _ = precision1(sim, test_set, dba_set)
 88 |     mAP = mean_avg_precision(sim, test_set, dba_set)
 89 |     print('Descriptor (TEST DBA k={4}): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP, dba))
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     options_l = (['help', 'dataset=', 'model=', 'weights=', 'device=',
 94 |                  'feature-dim=', 'batch=', 'dba='])
 95 |     try:
 96 |         opts, args = getopt.getopt(sys.argv[1:], '', options_l)
 97 |     except getopt.GetoptError:
 98 |         usage()
 99 |         sys.exit(2)
100 |     dataset_full, model, weights, device = None, None, None, None
101 |     feature_dim, batch_size, dba = None, None, -1
102 |     for opt, arg in opts:
103 |         if opt in ('--help'):
104 |             usage()
105 |             sys.exit()
106 |         elif opt in ('--dataset'):
107 |             dataset_full = check_folder(arg, 'dataset', True, usage)
108 |         elif opt in ('--model'):
109 |             model = check_model(arg, usage)
110 |         elif opt in ('--weights'):
111 |             weights = check_file(arg, 'initialization weights', True, usage)
112 |         elif opt in ('--device'):
113 |             device = check_int(arg, 'device', usage)
114 |         elif opt in ('--feature-dim'):
115 |             feature_dim = check_int(arg, 'feature-dim', usage)
116 |         elif opt in ('--batch'):
117 |             batch_size = check_int(arg, 'batch', usage)
118 |         elif opt in ('--dba'):
119 |             dba = check_int(arg, 'dba', usage)
120 |     if (dataset_full is None or model is None or
121 |             weights is None or device is None or
122 |             feature_dim is None or batch_size is None):
123 |         print('One or more required arguments is missing.')
124 |         usage()
125 |         sys.exit(2)
126 | 
127 |     with torch.cuda.device(device):
128 |         try:
129 |             main(dataset_full, model, weights, device, feature_dim,
130 |                  batch_size, dba)
131 |         except:
132 |             log_detail(P, None, traceback.format_exc())
133 |             raise
134 | 


--------------------------------------------------------------------------------
/test/siamese_regions_test.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import sys
  5 | import getopt
  6 | import torchvision.transforms as transforms
  7 | from model.nn_utils import set_net_train
  8 | from utils import *
  9 | from train.siamese_regions import P, labels
 10 | from train.siamese_regions import get_embeddings, get_siamese_net
 11 | from instance_avg import instance_avg
 12 | 
 13 | 
 14 | def usage():
 15 |     print('Usage: ' + sys.argv[0] + ' [options]')
 16 |     prefix = 'Options:\n\tRequired:\n'
 17 |     o1 = ('--dataset=\t<path>\tThe path to the dataset containing all ' +
 18 |           'reference images. It should contain a sub-folder "test" ' +
 19 |           'containing all test images\n')
 20 |     o2 = ('--model=\t<name>\tEither AlexNet or ResNet152 to specify the ' +
 21 |           'type of model.\n')
 22 |     o3 = ('--weights=\t<file>\tThe filename containing weights of a ' +
 23 |           'network trained for sub-region classification.\n')
 24 |     o4 = ('--device=\t<int>\tThe GPU device used for testing. ' +
 25 |           'If negative, CPU is used.\n')
 26 |     o5 = ('--feature-dim=\t<int>\tThe feature dimensionality of the network.\n')
 27 |     o6 = ('--regions-k=\t<int\tThe number of regions to form the descriptor.\n')
 28 |     o7 = ('--dba=\t<int>\tUse DBA with given k. If k = 0, do not use DBA. ' +
 29 |           'If k<0, use all neighbors within the same instance.\n')
 30 |     o8 = '--help\t\tShow this help\n'
 31 |     print(prefix + o1 + o2 + o3 + o4 + o5 + o6 + o7 + o8)
 32 | 
 33 | 
 34 | def main(dataset_full, model, weights, device, feature_dim, regions_k, dba):
 35 |     # training and test sets
 36 |     dataset_id = parse_dataset_id(dataset_full)
 37 |     match_labels = match_label_functions[dataset_id]
 38 |     train_set_full = get_images_labels(dataset_full, match_labels)
 39 |     test_set_full = get_images_labels(dataset_full + '/test', match_labels)
 40 | 
 41 |     labels_list = [t[1] for t in train_set_full]
 42 |     # setup global params so that testing functions work properly
 43 |     labels.extend(sorted(list(set(labels_list))))
 44 |     P.num_classes = len(labels)
 45 |     P.test_pre_proc = True  # we always pre process images
 46 |     P.cuda_device = device
 47 |     P.preload_net = weights
 48 |     P.cnn_model = model
 49 |     P.feature_size2d = feature_sizes[model, image_sizes[dataset_id]]
 50 |     P.classif_model = ''  # only useful for training
 51 |     P.feature_dim = feature_dim
 52 |     P.regions_k = regions_k
 53 | 
 54 |     print('Loading and transforming train/test sets.')
 55 | 
 56 |     # open the images (and transform already if possible)
 57 |     # do that only if it fits in memory !
 58 |     m, s = read_mean_std(mean_std_files[dataset_id])
 59 |     test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 60 |     test_set, test_train_set = [], []
 61 |     for im, lab in train_set_full:
 62 |         im_o = imread_rgb(im)
 63 |         test_train_set.append((test_trans(im_o), lab, im))
 64 | 
 65 |     for im, lab in test_set_full:
 66 |         if lab not in labels:
 67 |             continue
 68 |         im_o = imread_rgb(im)
 69 |         test_set.append((test_trans(im_o), lab, im))
 70 | 
 71 |     print('Testing network on dataset with ID {0}'.format(dataset_id))
 72 |     net = get_siamese_net()
 73 |     set_net_train(net, False)
 74 |     test_embeddings = get_embeddings(net, test_set, device, net.feature_size)
 75 |     ref_embeddings = get_embeddings(net, test_train_set, device, net.feature_size)
 76 |     sim = torch.mm(test_embeddings, ref_embeddings.t())
 77 |     prec1, c, t, _, _ = precision1(sim, test_set, test_train_set)
 78 |     mAP = mean_avg_precision(sim, test_set, test_train_set)
 79 |     print('Descriptor (TEST): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP))
 80 |     if dba == 0:
 81 |         return
 82 |     print('Testing using instance feature augmentation')
 83 |     dba_embeddings, dba_set = instance_avg(device, ref_embeddings,
 84 |                                            test_train_set, labels, dba)
 85 |     sim = torch.mm(test_embeddings, dba_embeddings.t())
 86 |     prec1, c, t, _, _ = precision1(sim, test_set, dba_set)
 87 |     mAP = mean_avg_precision(sim, test_set, dba_set)
 88 |     print('Descriptor (TEST DBA k={4}): {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}'.format(c, t, prec1, mAP, dba))
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     options_l = (['help', 'dataset=', 'model=', 'weights=', 'device=',
 93 |                  'feature-dim=', 'regions-k=', 'dba='])
 94 |     try:
 95 |         opts, args = getopt.getopt(sys.argv[1:], '', options_l)
 96 |     except getopt.GetoptError:
 97 |         usage()
 98 |         sys.exit(2)
 99 |     dataset_full, model, weights, device = None, None, None, None
100 |     feature_dim, regions_k, dba = None, None, -1
101 |     for opt, arg in opts:
102 |         if opt in ('--help'):
103 |             usage()
104 |             sys.exit()
105 |         elif opt in ('--dataset'):
106 |             dataset_full = check_folder(arg, 'dataset', True, usage)
107 |         elif opt in ('--model'):
108 |             model = check_model(arg, usage)
109 |         elif opt in ('--weights'):
110 |             weights = check_file(arg, 'initialization weights', True, usage)
111 |         elif opt in ('--device'):
112 |             device = check_int(arg, 'device', usage)
113 |         elif opt in ('--feature-dim'):
114 |             feature_dim = check_int(arg, 'feature-dim', usage)
115 |         elif opt in ('--regions-k'):
116 |             regions_k = check_int(arg, 'regions-k', usage)
117 |         elif opt in ('--dba'):
118 |             dba = check_int(arg, 'dba', usage)
119 |     if (dataset_full is None or model is None or
120 |             weights is None or device is None or
121 |             feature_dim is None or regions_k is None):
122 |         print('One or more required arguments is missing.')
123 |         usage()
124 |         sys.exit(2)
125 | 
126 |     with torch.cuda.device(device):
127 |         try:
128 |             main(dataset_full, model, weights, device, feature_dim,
129 |                  regions_k, dba)
130 |         except:
131 |             log_detail(P, None, traceback.format_exc())
132 |             raise
133 | 


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maxgreat/Instance-Search/2cea5f64a2d397047072a91788af81c0ea1c6d5e/train/__init__.py


--------------------------------------------------------------------------------
/train/classif_finetune.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import random
  5 | import torch
  6 | import torch.optim as optim
  7 | import torch.nn as nn
  8 | import torchvision.models as models
  9 | import torchvision.transforms as transforms
 10 | from torch.autograd import Variable
 11 | from classif_finetune_p import P
 12 | from utils import move_device, tensor_t, tensor, fold_batches, train_gen
 13 | from utils import imread_rgb, log, log_detail, test_print_classif
 14 | from utils import test_print_descriptor, get_images_labels
 15 | from model.siamese import TuneClassif
 16 | from model.custom_modules import NormalizeL2Fun
 17 | 
 18 | # keep labels as global variable. they are initialized after
 19 | # train set has been loaded and then kept constant
 20 | labels = []
 21 | train_type = P.cnn_model.lower() + ' Classification simple fine-tuning'
 22 | 
 23 | 
 24 | # test a classifier model. it should be in eval mode
 25 | def test_classif_net(net, test_set):
 26 |     """
 27 |         Test the network accuracy on a test_set
 28 |         Return the number of success and the number of evaluations done
 29 |     """
 30 |     trans = P.test_trans
 31 |     if P.test_pre_proc:
 32 |         trans = transforms.Compose([])
 33 | 
 34 |     def eval_batch_test(last, i, is_final, batch):
 35 |         correct, total = last
 36 |         n = len(batch)
 37 |         test_in = tensor(P.cuda_device, n, *P.image_input_size)
 38 |         for j, (testIm, _, _) in enumerate(batch):
 39 |             test_in[j] = trans(testIm)
 40 |         out = net(Variable(test_in, volatile=True)).data
 41 |         # first get all maximal values for classification
 42 |         # then, use the spatial region with the highest maximal value
 43 |         # to make a prediction
 44 |         _, predicted = torch.max(out, 1)
 45 |         total += n
 46 |         correct += sum(labels.index(testLabel) == predicted[j][0] for j, (_, testLabel, _) in enumerate(batch))
 47 |         return correct, total
 48 | 
 49 |     # batch size has to be 1 here
 50 |     return fold_batches(eval_batch_test, (0, 0), test_set, P.test_batch_size)
 51 | 
 52 | 
 53 | def train_classif(net, train_set, testset_tuple, criterion, optimizer, best_score=0):
 54 |     # trans is a list of transforms for each scale here
 55 |     trans = P.train_trans
 56 |     if P.train_pre_proc:
 57 |         trans = transforms.Compose([])
 58 | 
 59 |     # images are already pre-processed in all cases
 60 |     def create_epoch(epoch, train_set, testset_tuple):
 61 |         random.shuffle(train_set)
 62 |         # labels are needed for stats
 63 |         return train_set, {}
 64 | 
 65 |     def create_batch(batch, n):
 66 |         train_in = tensor(P.cuda_device, n, *P.image_input_size)
 67 |         labels_in = tensor_t(torch.LongTensor, P.cuda_device, n)
 68 |         for j, (im, lab, _) in enumerate(batch):
 69 |             train_in[j] = trans(im)
 70 |             labels_in[j] = labels.index(lab)
 71 |         return [train_in], [labels_in]
 72 | 
 73 |     def create_loss(t_out, labels_list):
 74 |         return criterion(t_out, labels_list[0]), None
 75 | 
 76 |     train_gen(train_type, P, test_print_classif, test_classif_net, net,
 77 |               train_set, testset_tuple, optimizer, create_epoch, create_batch,
 78 |               create_loss, best_score=best_score)
 79 | 
 80 | 
 81 | # get the embeddings as the normalized output of the classification
 82 | def get_embeddings(net, dataset, device, out_size):
 83 |     trans = P.test_trans
 84 |     if P.test_pre_proc:
 85 |         trans = transforms.Compose([])
 86 | 
 87 |     if not P.embeddings_classify:
 88 |         # remove classifier and add back later
 89 |         classifier = net.classifier
 90 |         net.classifier = nn.Sequential()
 91 | 
 92 |     def batch(last, i, is_final, batch):
 93 |         embeddings = last
 94 |         n = len(batch)
 95 |         test_in = tensor(P.cuda_device, n, *P.image_input_size)
 96 |         for j, (testIm, _, _) in enumerate(batch):
 97 |             test_in[j] = trans(testIm)
 98 |         out = net(Variable(test_in, volatile=True))
 99 |         # we have the classification values. just normalize
100 |         out = NormalizeL2Fun()(out)
101 |         out = out.data
102 |         for j in range(n):
103 |             embeddings[i + j] = out[j]
104 |         return embeddings
105 | 
106 |     init = tensor(device, len(dataset), out_size)
107 |     embeddings = fold_batches(batch, init, dataset, P.test_batch_size)
108 |     if not P.embeddings_classify:
109 |         net.classifier = classifier
110 |     return embeddings
111 | 
112 | 
113 | def get_class_net():
114 |     model = models.alexnet
115 |     if P.cnn_model.lower() == 'resnet152':
116 |         model = models.resnet152
117 |     net = TuneClassif(model(pretrained=True), len(labels), untrained=P.untrained_blocks)
118 |     if P.preload_net:
119 |         net.load_state_dict(torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu()))
120 |     net = move_device(net, P.cuda_device)
121 |     return net
122 | 
123 | 
124 | def main():
125 |     # training and test sets
126 |     train_set_full = get_images_labels(P.dataset_full, P.match_labels)
127 |     test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)
128 | 
129 |     labels_list = [t[1] for t in train_set_full]
130 |     # we have to give a number to each label,
131 |     # so we need a list here for the index
132 |     labels.extend(sorted(list(set(labels_list))))
133 | 
134 |     log(P, 'Loading and transforming train/test sets.')
135 | 
136 |     # open the images (and transform already if possible)
137 |     # do that only if it fits in memory !
138 |     train_set, test_train_set, test_set = [], [], []
139 |     train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([])
140 |     test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
141 |     for im, lab in train_set_full:
142 |         im_o = imread_rgb(im)
143 |         train_set.append((train_pre_f(im_o), lab, im))
144 |         test_train_set.append((test_pre_f(im_o), lab, im))
145 | 
146 |     for im, lab in test_set_full:
147 |         if lab not in labels:
148 |             continue
149 |         im_o = imread_rgb(im)
150 |         test_set.append((test_pre_f(im_o), lab, im))
151 | 
152 |     class_net = get_class_net()
153 |     optimizer = optim.SGD((p for p in class_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay)
154 |     criterion = nn.CrossEntropyLoss(size_average=P.train_loss_avg)
155 |     testset_tuple = (test_set, test_train_set)
156 |     if P.test_upfront:
157 |         log(P, 'Upfront testing of classification model')
158 |         score = test_print_classif(train_type, P, class_net, testset_tuple, test_classif_net)
159 |     else:
160 |         score = 0
161 |     if P.train:
162 |         log(P, 'Starting classification training')
163 |         train_classif(class_net, train_set, testset_tuple, criterion, optimizer, best_score=score)
164 |         log(P, 'Finished classification training')
165 |     if P.test_descriptor_net:
166 |         log(P, 'Testing as descriptor')
167 |         test_print_descriptor(train_type, P, class_net, testset_tuple, get_embeddings)
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     with torch.cuda.device(P.cuda_device):
172 |         try:
173 |             main()
174 |         except:
175 |             log_detail(P, None, traceback.format_exc())
176 |             raise
177 | 


--------------------------------------------------------------------------------
/train/classif_finetune_p.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | from datetime import datetime
  4 | from utils import *  # image transforms, general utilities
  5 | from global_p import *  # global config
  6 | 
  7 | # in AlexNet, there are 5 convolutional layers with parameters
  8 | # and 3 FC layers in the classifier
  9 | # in ResNet, before first layer, there are 2 modules with parameters.
 10 | # then number of blocks per layers:
 11 | # ResNet152 - layer 1: 3, layer 2: 8, layer 3: 36, layer 4: 3
 12 | # ResNet50 - layer 1: 3, layer 2: 4, layer 3: 6, layer 4: 3
 13 | # finally, a single FC layer is used as classifier
 14 | untrained_blocks = {
 15 |     'alexnet': 4,
 16 |     'resnet152': 2 + 3 + 8 + 36
 17 | }
 18 | 
 19 | 
 20 | # parameters for the sub-regions classification training with AlexNet
 21 | class Params(object):
 22 | 
 23 |     def __init__(self):
 24 |         # general parameters
 25 |         self.cnn_model = 'AlexNet'
 26 |         self.dataset_full = 'data/pre_proc/CLICIDE_video_224sq'
 27 |         self.cuda_device = 0
 28 |         self.dataset_id = parse_dataset_id(self.dataset_full)
 29 |         # the file containing mean and standard deviation values
 30 |         # for a new dataset, simply use the filename here or add it to the
 31 |         # global_p module parameters
 32 |         # (this is valid for the following parameters, too)
 33 |         self.mean_std_file = mean_std_files[self.dataset_id]
 34 |         # the function for obtaining labels from a filename in the dataset
 35 |         # this function takes a filename and returns a unique label
 36 |         self.match_labels = match_label_functions[self.dataset_id]
 37 |         # input size. this is usually always (3, 224, 224) unless larger
 38 |         # fixed-size images should be used
 39 |         self.image_input_size = image_sizes[self.dataset_id]
 40 |         # the number of different labels in the dataset
 41 |         self.num_classes = num_classes[self.dataset_id]
 42 |         # the 2D size of the convolutional features of the base network
 43 |         self.feature_size2d = feature_sizes[(self.cnn_model.lower(), self.image_input_size)]
 44 |         # the number of blocks in the base network that should not be trained
 45 |         # (starting from the lowest and going to higher layers/blocks)
 46 |         # usually, block represents a layer with parameters,
 47 |         # for ResNet or equivalent, block is a whole block of layers
 48 |         self.untrained_blocks = untrained_blocks[self.cnn_model.lower()]
 49 | 
 50 |         # read mean and standard of dataset here to define transforms already
 51 |         m, s = read_mean_std(self.mean_std_file)
 52 | 
 53 |         # Classification net general and test params
 54 |         self.preload_net = ''  # allows to continue training a network
 55 |         self.test_upfront = True
 56 |         self.train = True
 57 |         self.test_batch_size = 64
 58 |         self.test_pre_proc = True
 59 |         self.test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 60 | 
 61 |         # Classification net training params
 62 |         self.train_epochs = 50
 63 |         self.train_batch_size = 32
 64 |         self.train_micro_batch = 0
 65 |         self.train_aug_rot = r = 180
 66 |         self.train_aug_hrange = hr = 0.25
 67 |         self.train_aug_vrange = vr = 0.25
 68 |         self.train_aug_hsrange = hsr = 0.25
 69 |         self.train_aug_vsrange = vsr = 0.25
 70 |         self.train_aug_hflip = hflip = True
 71 |         trans = transforms.Compose([random_affine_noisy_cv(rotation=r, h_range=hr, v_range=vr, hs_range=hsr, vs_range=vsr, h_flip=hflip), transforms.ToTensor(), transforms.Normalize(m, s)])
 72 |         # transformation for each scale
 73 |         self.train_trans = trans
 74 |         self.train_pre_proc = False
 75 | 
 76 |         self.train_lr = 1e-2
 77 |         self.train_momentum = 0.9
 78 |         self.train_weight_decay = 5e-4
 79 |         self.train_optim = 'SGD'
 80 |         self.train_annealing = {30: 0.1}
 81 |         self.train_loss_avg = True
 82 |         self.train_loss_int = 10
 83 |         self.train_test_int = 0
 84 |         # the batch norm layer cannot be trained if the micro-batch size
 85 |         # is too small, as global variances/means cannot be properly
 86 |         # approximated in this case. so train only when having a batch
 87 |         # of at least 16
 88 |         self.train_bn = self.train_micro_batch >= 16 or (self.train_micro_batch <= 0 and (self.train_batch_size >= 16 or self.train_batch_size <= 0))
 89 | 
 90 |         # Descriptor net parameters
 91 |         # if True, test the network as a descriptor
 92 |         # (using the normalized classification output):
 93 |         self.test_descriptor_net = True
 94 |         # the threshold (in Bytes) for embeddings to be computed on GPU
 95 |         self.embeddings_cuda_size = 2 ** 30
 96 |         # if True, use classifier output for embeddings.
 97 |         # else use convolutional features
 98 |         self.embeddings_classify = False
 99 |         self.feature_dim = self.num_classes if self.embeddings_classify else flat_feature_sizes[(self.cnn_model.lower(), self.image_input_size)]
100 |         # UUID for these parameters (current time)
101 |         self.uuid = datetime.now()
102 |         self.save_dir = 'data'
103 |         self.log_file = path.join(self.save_dir, unique_str(self) + '.log')
104 | 
105 | 
106 | # global test params:
107 | P = Params()
108 | 


--------------------------------------------------------------------------------
/train/classif_regions.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import random
  5 | import torch
  6 | import torch.optim as optim
  7 | import torch.nn as nn
  8 | import torchvision.models as models
  9 | import torchvision.transforms as transforms
 10 | from torch.autograd import Variable
 11 | from classif_regions_p import P
 12 | from utils import move_device, tensor_t, tensor, fold_batches, train_gen
 13 | from utils import imread_rgb, log, log_detail, test_print_classif
 14 | from utils import test_print_descriptor, get_images_labels
 15 | from model.siamese import TuneClassif, TuneClassifSub
 16 | from model.custom_modules import NormalizeL2Fun
 17 | 
 18 | # keep labels as global variable. they are initialized after
 19 | # train set has been loaded and then kept constant
 20 | labels = []
 21 | train_type = P.cnn_model.lower() + ' Classification sub-regions'
 22 | 
 23 | 
 24 | # test a classifier model. it should be in eval mode
 25 | def test_classif_net(net, test_set):
 26 |     """
 27 |         Test the network accuracy on a test_set
 28 |         Return the number of success and the number of evaluations done
 29 |     """
 30 |     trans = P.test_trans
 31 |     if P.test_pre_proc:
 32 |         trans = transforms.Compose([])
 33 | 
 34 |     def eval_batch_test(last, i, is_final, batch):
 35 |         correct, total = last
 36 |         im_trans = trans(batch[0][0])
 37 |         test_in = move_device(im_trans.unsqueeze(0), P.cuda_device)
 38 |         out = net(Variable(test_in, volatile=True))[0].data
 39 |         # first get all maximal values for classification
 40 |         # then, use the spatial region with the highest maximal value
 41 |         # to make a prediction
 42 |         max_pred, predicted = torch.max(out, 1)
 43 |         _, max_subp = torch.max(max_pred.view(-1), 0)
 44 |         predicted = predicted.view(-1)[max_subp[0]]
 45 |         total += 1
 46 |         correct += (labels.index(batch[0][1]) == predicted)
 47 | 
 48 |         return correct, total
 49 | 
 50 |     # batch size has to be 1 here
 51 |     return fold_batches(eval_batch_test, (0, 0), test_set, 1)
 52 | 
 53 | 
 54 | def train_classif_subparts(net, train_set, testset_tuple, criterion, optimizer, best_score=0):
 55 |     # trans is a list of transforms for each scale here
 56 |     trans_scales = P.train_trans
 57 |     for i, t in enumerate(trans_scales):
 58 |         if P.train_pre_proc[i]:
 59 |             trans_scales[i] = transforms.Compose([])
 60 | 
 61 |     # images are already pre-processed in all cases
 62 |     def create_epoch(epoch, train_set, testset_tuple):
 63 |         random.shuffle(train_set)
 64 |         # labels are needed for stats
 65 |         return train_set, {}
 66 | 
 67 |     def create_batch(batch, n):
 68 |         # must proceed image by image (since different input sizes)
 69 |         # each image/batch is composed of multiple scales
 70 |         n_sc = len(batch[0][0])
 71 |         train_in_scales = []
 72 |         labels_in = tensor_t(torch.LongTensor, P.cuda_device, 1)
 73 |         labels_in.fill_(labels.index(batch[0][1]))
 74 |         for j in range(n_sc):
 75 |             im = trans_scales[j](batch[0][0][j])
 76 |             train_in = move_device(im.unsqueeze(0), P.cuda_device)
 77 |             train_in_scales.append(train_in)
 78 |         return train_in_scales, [labels_in]
 79 | 
 80 |     def create_loss(scales_out, labels_list):
 81 |         # scales_out is a list over all scales,
 82 |         # with all sub-region classifications for each scale
 83 |         labels_in = labels_list[0]
 84 |         loss = None
 85 |         for s, t_out in enumerate(scales_out):
 86 |             # batch size is 1, only consider this output
 87 |             t_out0 = t_out[0]
 88 |             # all spatial outputs are of shape (num_classes, width, height)
 89 |             # make a 'batch' as follows: (width * height, num_classes)
 90 |             # then apply loss to the whole batch, and accumulate over scales
 91 |             t_out_all = t_out0.view(t_out0.size(0), -1).t()
 92 |             if loss is None:
 93 |                 loss = criterion(t_out_all, labels_in.expand(t_out_all.size(0)))
 94 |             else:
 95 |                 loss += criterion(t_out_all, labels_in.expand(t_out_all.size(0)))
 96 |         if P.train_loss_avg:
 97 |             loss /= len(scales_out)
 98 |         return loss, None
 99 | 
100 |     train_gen(train_type, P, test_print_classif, test_classif_net, net,
101 |               train_set, testset_tuple, optimizer, create_epoch, create_batch,
102 |               create_loss, best_score=best_score)
103 | 
104 | 
105 | # get the embeddings as the normalized output of the classification
106 | # values where the highest maximal activation occurred
107 | def get_embeddings(net, dataset, device, out_size):
108 |     test_trans = P.test_trans
109 |     if P.test_pre_proc:
110 |         test_trans = transforms.Compose([])
111 | 
112 |     def batch(last, i, is_final, batch):
113 |         embeddings = last
114 |         im_trans = test_trans(batch[0][0])
115 |         test_in = move_device(im_trans.unsqueeze(0), P.cuda_device)
116 |         out = net(Variable(test_in, volatile=True))[0].data
117 |         # first, determine location of highest maximal activation
118 |         max_pred, _ = out.max(1)
119 |         max_pred1, max_i1 = max_pred.max(2)
120 |         _, max_i2 = max_pred1.max(3)
121 |         i2 = max_i2.view(-1)[0]
122 |         i1 = max_i1.view(-1)[i2]
123 |         # we have the indexes of the highest maximal activation,
124 |         # get the classification values at this point and normalize
125 |         out = out[:, :, i1, i2]
126 |         out = NormalizeL2Fun()(Variable(out, volatile=True))
127 |         out = out.data
128 |         embeddings[i] = out[0]
129 |         return embeddings
130 | 
131 |     init = tensor(device, len(dataset), out_size)
132 |     return fold_batches(batch, init, dataset, 1)
133 | 
134 | 
135 | def get_class_net():
136 |     model = models.alexnet
137 |     if P.cnn_model.lower() == 'resnet152':
138 |         model = models.resnet152
139 |     if P.bn_model:
140 |         bn_model = TuneClassif(model(), len(labels))
141 |         bn_model.load_state_dict(torch.load(P.bn_model, map_location=lambda storage, location: storage.cpu()))
142 |         # copy_bn_all(net.features, bn_model.features)
143 |     else:
144 |         bn_model = model(pretrained=True)
145 |     net = TuneClassifSub(bn_model, len(labels), P.feature_size2d, untrained=P.untrained_blocks)
146 |     if P.preload_net:
147 |         net.load_state_dict(torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu()))
148 |     net = move_device(net, P.cuda_device)
149 |     return net
150 | 
151 | 
152 | def main():
153 |     # training and test sets
154 |     train_set_full = get_images_labels(P.dataset_full, P.match_labels)
155 |     test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)
156 | 
157 |     labels_list = [t[1] for t in train_set_full]
158 |     # we have to give a number to each label,
159 |     # so we need a list here for the index
160 |     labels.extend(sorted(list(set(labels_list))))
161 | 
162 |     log(P, 'Loading and transforming train/test sets.')
163 | 
164 |     # open the images (and transform already if possible)
165 |     # do that only if it fits in memory !
166 |     train_set, test_train_set, test_set = [], [], []
167 |     train_pre_f = [t if pre_proc else transforms.Compose([]) for t, pre_proc in zip(P.train_trans, P.train_pre_proc)]
168 |     test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
169 |     train_scales = P.train_sub_scales
170 |     for im, lab in train_set_full:
171 |         im_o = imread_rgb(im)
172 |         scales = [t(im_o) for t in train_scales]
173 |         train_set.append((scales, lab, im))
174 |         for j, t in enumerate(train_pre_f):
175 |             scales[j] = t(scales[j])
176 |         im_pre_test = test_pre_f(im_o) if test_pre_f else im_o
177 |         test_train_set.append((im_pre_test, lab, im))
178 | 
179 |     for im, lab in test_set_full:
180 |         if lab not in labels:
181 |             continue
182 |         im_o = imread_rgb(im)
183 |         test_set.append((test_pre_f(im_o), lab, im))
184 | 
185 |     class_net = get_class_net()
186 |     optimizer = optim.SGD((p for p in class_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay)
187 |     criterion = nn.CrossEntropyLoss(size_average=P.train_loss_avg)
188 |     testset_tuple = (test_set, test_train_set)
189 |     if P.test_upfront:
190 |         log(P, 'Upfront testing of classification model')
191 |         score = test_print_classif(train_type, P, class_net, testset_tuple, test_classif_net)
192 |     else:
193 |         score = 0
194 |     if P.train:
195 |         log(P, 'Starting classification training')
196 |         train_classif_subparts(class_net, train_set, testset_tuple, criterion, optimizer, best_score=score)
197 |         log(P, 'Finished classification training')
198 |     if P.test_descriptor_net:
199 |         log(P, 'Testing as descriptor')
200 |         test_print_descriptor(train_type, P, class_net, testset_tuple, get_embeddings)
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     with torch.cuda.device(P.cuda_device):
205 |         try:
206 |             main()
207 |         except:
208 |             log_detail(P, None, traceback.format_exc())
209 |             raise
210 | 


--------------------------------------------------------------------------------
/train/classif_regions_p.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | from datetime import datetime
  4 | from utils import *  # image transforms, general utilities
  5 | from global_p import *  # global config
  6 | 
  7 | # in AlexNet, there are 5 convolutional layers with parameters
  8 | # and 3 FC layers in the classifier
  9 | # in ResNet, before first layer, there are 2 modules with parameters.
 10 | # then number of blocks per layers:
 11 | # ResNet152 - layer 1: 3, layer 2: 8, layer 3: 36, layer 4: 3
 12 | # ResNet50 - layer 1: 3, layer 2: 4, layer 3: 6, layer 4: 3
 13 | # finally, a single FC layer is used as classifier
 14 | untrained_blocks = {
 15 |     'alexnet': 4,
 16 |     'resnet152': 2 + 3 + 8 + 36
 17 | }
 18 | 
 19 | 
 20 | # parameters for the sub-regions classification training with AlexNet
 21 | class Params(object):
 22 | 
 23 |     def __init__(self):
 24 |         # general parameters
 25 |         self.cnn_model = 'ResNet152'
 26 |         self.dataset_full = 'data/pre_proc/fourviere_clean2_448'
 27 |         self.cuda_device = 0
 28 |         self.dataset_id = parse_dataset_id(self.dataset_full)
 29 |         # the file containing mean and standard deviation values
 30 |         # for a new dataset, simply use the filename here or add it to the
 31 |         # global_p module parameters
 32 |         # (this is valid for the following parameters, too)
 33 |         self.mean_std_file = mean_std_files[self.dataset_id]
 34 |         # the function for obtaining labels from a filename in the dataset
 35 |         # this function takes a filename and returns a unique label
 36 |         self.match_labels = match_label_functions[self.dataset_id]
 37 |         # input size. this is usually always (3, 224, 224) unless larger
 38 |         # fixed-size images should be used
 39 |         self.image_input_size = image_sizes[self.dataset_id]
 40 |         # the number of different labels in the dataset
 41 |         self.num_classes = num_classes[self.dataset_id]
 42 |         # the 2D size of the convolutional features of the base network
 43 |         self.feature_size2d = feature_sizes[(self.cnn_model.lower(), self.image_input_size)]
 44 |         # the number of blocks in the base network that should not be trained
 45 |         # (starting from the lowest and going to higher layers/blocks)
 46 |         # usually, block represents a layer with parameters,
 47 |         # for ResNet or equivalent, block is a whole block of layers
 48 |         self.untrained_blocks = untrained_blocks[self.cnn_model.lower()]
 49 | 
 50 |         # read mean and standard of dataset here to define transforms already
 51 |         m, s = read_mean_std(self.mean_std_file)
 52 | 
 53 |         # Classification net general and test params
 54 |         self.preload_net = ''  # allows to continue training a network
 55 |         self.bn_model = 'data/final_classif_ft/fou_resnet152.pth.tar'
 56 |         self.test_upfront = True
 57 |         self.train = True
 58 |         self.test_pre_proc = True
 59 |         self.test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 60 | 
 61 |         # Classification net training params
 62 |         self.train_epochs = 50
 63 |         self.train_batch_size = 32
 64 |         self.train_micro_batch = 1  # has to be 1
 65 |         self.train_aug_rot = r = 45
 66 |         self.train_aug_hrange = hr = 0
 67 |         self.train_aug_vrange = vr = 0
 68 |         self.train_aug_hsrange = hsr = 0.25
 69 |         self.train_aug_vsrange = vsr = 0.25
 70 |         self.train_aug_hflip = hflip = True
 71 |         trans = transforms.Compose([random_affine_noisy_cv(rotation=r, h_range=hr, v_range=vr, hs_range=hsr, vs_range=vsr, h_flip=hflip), transforms.ToTensor(), transforms.Normalize(m, s)])
 72 |         # list of transforms for all scales
 73 |         # the train_trans parameter should be a list of same
 74 |         # length representing the train transformation for each scale
 75 |         self.train_sub_scales = [transforms.Compose([]), scale_cv(224)]
 76 |         # transformation for each scale
 77 |         self.train_trans = [trans, trans]
 78 |         self.train_pre_proc = [False, False]
 79 | 
 80 |         self.train_lr = 1e-3
 81 |         self.train_momentum = 0.9
 82 |         self.train_weight_decay = 5e-4
 83 |         self.train_optim = 'SGD'
 84 |         self.train_annealing = {30: 0.1}
 85 |         self.train_loss_avg = True
 86 |         self.train_loss_int = 10
 87 |         self.train_test_int = 0
 88 |         # the batch norm layer cannot be trained if the micro-batch size
 89 |         # is too small, as global variances/means cannot be properly
 90 |         # approximated in this case. so train only when having a batch
 91 |         # of at least 16
 92 |         self.train_bn = False
 93 | 
 94 |         # Descriptor net parameters
 95 |         # if True, test the network as a descriptor
 96 |         # (using the normalized classification output):
 97 |         self.test_descriptor_net = True
 98 |         # the threshold (in Bytes) for embeddings to be computed on GPU
 99 |         self.embeddings_cuda_size = 2 ** 30
100 |         self.feature_dim = self.num_classes
101 | 
102 |         # UUID for these parameters (current time)
103 |         self.uuid = datetime.now()
104 |         self.save_dir = 'data'
105 |         self.log_file = path.join(self.save_dir, unique_str(self) + '.log')
106 | 
107 | 
108 | # global test params:
109 | P = Params()
110 | 


--------------------------------------------------------------------------------
/train/global_p.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | # from utils import trans_str, fun_str
 4 | 
 5 | 
 6 | def match_label_fou_clean2(x):
 7 |     s = x.split('/')[-1].split('_')
 8 |     return s[0] + s[1]
 9 | 
10 | 
11 | def match_label_video(x):
12 |     return x.split('/')[-1].split('-')[0]
13 | 
14 | 
15 | def match_label_oxford(x):
16 |     return x.split('/')[-1].split('_')[0]
17 | 
18 | 
19 | image_sizes = {
20 |     'CLICIDE': (3, 224, 224),
21 |     'CLICIDE_max_224sq': (3, 224, 224),
22 |     'CLICIDE_video_227sq': (3, 227, 227),
23 |     'CLICIDE_video_224sq': (3, 224, 224),
24 |     'CLICIDE_video_384': (3, 224, 224),
25 |     'CLICIDE_video_448': (3, 224, 224),
26 |     'fourviere_clean2_224sq': (3, 224, 224),
27 |     'fourviere_clean2_384': (3, 224, 224),
28 |     'fourviere_clean2_448': (3, 224, 224),
29 |     'oxford5k_video_224sq': (3, 224, 224),
30 |     'oxford5k_video_384': (3, 224, 224)
31 | }
32 | 
33 | num_classes = {
34 |     'CLICIDE': 464,
35 |     'CLICIDE_max_224sq': 464,
36 |     'CLICIDE_video_227sq': 464,
37 |     'CLICIDE_video_224sq': 464,
38 |     'CLICIDE_video_384': 464,
39 |     'CLICIDE_video_448': 464,
40 |     'fourviere_clean2_224sq': 311,
41 |     'fourviere_clean2_384': 311,
42 |     'fourviere_clean2_448': 311,
43 |     'oxford5k_video_224sq': 17,
44 |     'oxford5k_video_384': 17
45 | }
46 | 
47 | feature_sizes = {
48 |     ('alexnet', (3, 224, 224)): (6, 6),
49 |     ('resnet152', (3, 224, 224)): (7, 7),
50 |     ('resnet152', (3, 227, 227)): (8, 8)
51 | }
52 | 
53 | flat_feature_sizes = {
54 |     ('alexnet', (3, 224, 224)): 9216,
55 |     ('resnet152', (3, 224, 224)): 2048
56 | }
57 | 
58 | mean_std_files = {
59 |     'CLICIDE': 'data/CLICIDE_224sq_train_ms.txt',
60 |     'CLICIDE_video_227sq': 'data/cli.txt',
61 |     'CLICIDE_video_224sq': 'data/CLICIDE_224sq_train_ms.txt',
62 |     'CLICIDE_max_224sq': 'data/CLICIDE_224sq_train_ms.txt',
63 |     'CLICIDE_video_384': 'data/CLICIDE_384_train_ms.txt',
64 |     'CLICIDE_video_448': 'data/CLICIDE_448_train_ms.txt',
65 |     'fourviere_clean2_224sq': 'data/fourviere_224sq_train_ms.txt',
66 |     'fourviere_clean2_384': 'data/fourviere_384_train_ms.txt',
67 |     'fourviere_clean2_448': 'data/fourviere_448_train_ms.txt',
68 |     'oxford5k_video_224sq': 'data/oxford5k_224sq_train_ms.txt',
69 |     'oxford5k_video_384': 'data/oxford5k_384_train_ms.txt',
70 | }
71 | 
72 | match_label_functions = {
73 |     'CLICIDE': match_label_video,
74 |     'CLICIDE_video_227sq': match_label_video,
75 |     'CLICIDE_max_224sq': match_label_video,
76 |     'CLICIDE_video_224sq': match_label_video,
77 |     'CLICIDE_video_384': match_label_video,
78 |     'CLICIDE_video_448': match_label_video,
79 |     'fourviere_clean2_224sq': match_label_fou_clean2,
80 |     'fourviere_clean2_384': match_label_fou_clean2,
81 |     'fourviere_clean2_448': match_label_fou_clean2,
82 |     'oxford5k_video_224sq': match_label_oxford,
83 |     'oxford5k_video_384': match_label_oxford
84 | }
85 | 


--------------------------------------------------------------------------------
/train/siamese_descriptor.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import random
  5 | import numpy as np
  6 | import torch
  7 | import torch.optim as optim
  8 | import torchvision.models as models
  9 | import torchvision.transforms as transforms
 10 | from torch.autograd import Variable
 11 | from siamese_descriptor_p import P
 12 | from utils import move_device, tensor_t, tensor, fold_batches, train_gen
 13 | from utils import imread_rgb, log, log_detail, get_lab_indicators
 14 | from utils import get_images_labels, get_similarities, embeddings_device_dim
 15 | from utils import test_print_descriptor, choose_rand_neg, get_pos_couples
 16 | from model.siamese import TuneClassif, DescriptorNet
 17 | from model.custom_modules import TripletLoss
 18 | 
 19 | # keep labels as global variable. they are initialized after
 20 | # train set has been loaded and then kept constant
 21 | labels = []
 22 | train_type = P.cnn_model.lower() + ' Siamese descriptor'
 23 | 
 24 | 
 25 | def get_embeddings(net, dataset, device, out_size):
 26 |     test_trans = P.test_trans
 27 |     if P.test_pre_proc:
 28 |         test_trans = transforms.Compose([])
 29 | 
 30 |     def batch(last, i, is_final, batch):
 31 |         embeddings = last
 32 |         test_in = tensor(P.cuda_device, len(batch), *P.image_input_size)
 33 |         for j, (im, _, _) in enumerate(batch):
 34 |             test_in[j] = test_trans(im)
 35 |         out = net(Variable(test_in, volatile=True)).data
 36 |         for j, embedding in enumerate(out):
 37 |             embeddings[i + j] = embedding
 38 |         return embeddings
 39 | 
 40 |     init = tensor(device, len(dataset), out_size)
 41 |     return fold_batches(batch, init, dataset, P.test_batch_size)
 42 | 
 43 | 
 44 | # train using triplets, constructing triplets from all positive couples
 45 | def train_siam_triplets_pos_couples(net, train_set, testset_tuple, criterion, optimizer, best_score=0):
 46 |     """
 47 |         TODO
 48 |     """
 49 |     train_trans = P.train_trans
 50 |     if P.train_pre_proc:
 51 |         train_trans = transforms.Compose([])
 52 | 
 53 |     couples = get_pos_couples(train_set)
 54 |     sim_device, _ = embeddings_device_dim(P, net, len(train_set), sim_matrix=True)
 55 |     lab_indicators = get_lab_indicators(train_set, sim_device)
 56 |     num_pos = sum(len(couples[lab]) for lab in couples)
 57 |     log(P, '#pos (without order, with duplicates):{0}'.format(num_pos))
 58 | 
 59 |     # fold over positive couples here and choose negative for each pos
 60 |     # need to make sure the couples are evenly distributed
 61 |     # such that all batches can have couples from every instance
 62 |     def shuffle_couples(couples):
 63 |         for lab in couples:
 64 |             random.shuffle(couples[lab])
 65 |         # get x such that only 20% of labels have more than x couples
 66 |         a = np.array([len(couples[lab]) for lab in couples])
 67 |         x = int(np.percentile(a, 80))
 68 |         out = []
 69 |         keys = couples.keys()
 70 |         random.shuffle(keys)
 71 |         # append the elements to out in a strided way
 72 |         # (up to x elements per label)
 73 |         for count in range(x):
 74 |             for lab in keys:
 75 |                 if count >= len(couples[lab]):
 76 |                     continue
 77 |                 out.append(couples[lab][count])
 78 |         # the last elements in the longer lists are inserted at random
 79 |         for lab in keys:
 80 |             for i in range(x, len(couples[lab])):
 81 |                 out.insert(random.randrange(len(out)), couples[lab][i])
 82 |         return out
 83 | 
 84 |     def create_epoch(epoch, couples, testset_tuple):
 85 |         test_ref_set = testset_tuple[1]
 86 |         # use the test-train set to obtain embeddings and similarities
 87 |         # (since it may be transformed differently than train set)
 88 |         similarities, _ = get_similarities(P, get_embeddings, net, test_ref_set)
 89 | 
 90 |         # shuffle the couples
 91 |         shuffled = shuffle_couples(couples)
 92 |         return shuffled, {'epoch': epoch, 'similarities': similarities}
 93 | 
 94 |     def create_batch(batch, n, epoch, similarities):
 95 |         # one image at a time. batch is always of size 1
 96 |         train_in1 = tensor(P.cuda_device, n, *P.image_input_size)
 97 |         train_in2 = tensor(P.cuda_device, n, *P.image_input_size)
 98 |         train_in3 = tensor(P.cuda_device, n, *P.image_input_size)
 99 |         labels_in = tensor_t(torch.LongTensor, P.cuda_device, n)
100 |         # we get positive couples. find negatives for them
101 |         for j, (lab, (i1, i2), (im1, im2)) in enumerate(batch):
102 |             im3 = None
103 |             # choose a semi-hard negative. see FaceNet
104 |             # paper by Schroff et al for details.
105 |             # essentially, choose hardest negative that is still
106 |             # easier than the positive. this should avoid
107 |             # collapsing the model at beginning of training
108 |             ind_exl = lab_indicators[lab]
109 |             sim_pos = similarities[i1, i2]
110 |             if epoch < P.train_epoch_switch:
111 |                 # exclude all positives as well as any that are
112 |                 # more similar than sim_pos
113 |                 ind_exl = ind_exl | similarities[i1].ge(sim_pos)
114 |             if ind_exl.sum() >= similarities.size(0):
115 |                 p = 'cant find semi-hard neg for'
116 |                 s = 'falling back to random neg'
117 |                 n_pos = lab_indicators[lab].sum()
118 |                 n_ge = similarities[i1].ge(sim_pos).sum()
119 |                 n_tot = similarities.size(0)
120 |                 print('{0} {1}-{2}-{3} (#pos:{4}, #ge:{5}, #total:{6}), {7}'.format(p, i1, i2, lab, n_pos, n_ge, n_tot, s))
121 |             else:
122 |                 # similarities must be in [-1, 1]
123 |                 # set all similarities of excluded indexes to -2
124 |                 # then take argmax (highest similarity not excluded)
125 |                 sims = similarities[i1].clone()
126 |                 sims[ind_exl] = -2
127 |                 _, k = sims.max(0)
128 |                 im3 = train_set[k[0]][0]
129 |             if im3 is None:
130 |                 # default to random negative
131 |                 im3 = choose_rand_neg(train_set, lab)
132 |             # one image at a time
133 |             train_in1[j] = train_trans(im1)
134 |             train_in2[j] = train_trans(im2)
135 |             train_in3[j] = train_trans(im3)
136 |             labels_in[j] = labels.index(lab)
137 |         # return input tensors and labels
138 |         return [train_in1, train_in2, train_in3], [labels_in]
139 | 
140 |     def create_loss(out, labels_list):
141 |         # out is a tuple of 3 descriptors
142 |         # simply apply triplet loss
143 |         loss = criterion(*out)
144 |         return loss, None
145 | 
146 |     train_gen(train_type, P, test_print_descriptor, get_embeddings, net,
147 |               couples, testset_tuple, optimizer, create_epoch, create_batch,
148 |               create_loss, best_score=best_score)
149 | 
150 | 
151 | def get_siamese_net():
152 |     model = models.alexnet
153 |     if P.cnn_model.lower() == 'resnet152':
154 |         model = models.resnet152
155 |     class_net = TuneClassif(model(pretrained=True), P.num_classes, untrained=P.untrained_blocks)
156 |     if P.classif_model:
157 |         class_net.load_state_dict(torch.load(P.classif_model, map_location=lambda storage, location: storage.cpu()))
158 |     net = DescriptorNet(class_net, P.feature_dim, P.feature_size2d, untrained=P.untrained_blocks)
159 |     if P.preload_net:
160 |         net.load_state_dict(torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu()))
161 |     net = move_device(net, P.cuda_device)
162 |     return net
163 | 
164 | 
165 | def main():
166 |     # training and test sets
167 |     train_set_full = get_images_labels(P.dataset_full, P.match_labels)
168 |     test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)
169 | 
170 |     labels_list = [t[1] for t in train_set_full]
171 |     # we have to give a number to each label,
172 |     # so we need a list here for the index
173 |     labels.extend(sorted(list(set(labels_list))))
174 | 
175 |     log(P, 'Loading and transforming train/test sets.')
176 | 
177 |     train_set, test_train_set, test_set = [], [], []
178 |     train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([])
179 |     test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
180 |     for im, lab in train_set_full:
181 |         im_o = imread_rgb(im)
182 |         train_set.append((train_pre_f(im_o), lab, im))
183 |         test_train_set.append((test_pre_f(im_o), lab, im))
184 | 
185 |     for im, lab in test_set_full:
186 |         if lab not in labels:
187 |             continue
188 |         im_o = imread_rgb(im)
189 |         test_set.append((test_pre_f(im_o), lab, im))
190 | 
191 |     siam_net = get_siamese_net()
192 |     optimizer = optim.SGD((p for p in siam_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay)
193 |     criterion = TripletLoss(P.triplet_margin, P.train_loss_avg)
194 |     testset_tuple = (test_set, test_train_set)
195 |     if P.test_upfront:
196 |         log(P, 'Upfront testing of descriptor model')
197 |         score = test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings)
198 |     else:
199 |         score = 0
200 |     if P.train:
201 |         log(P, 'Starting region-descriptor training')
202 |         train_siam_triplets_pos_couples(siam_net, train_set, testset_tuple, criterion, optimizer, best_score=score)
203 |         log(P, 'Finished region-descriptor training')
204 |     if P.test_descriptor_net:
205 |         log(P, 'Testing as descriptor')
206 |         # set best score high enough such that it will never be saved
207 |         test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings, best_score=len(test_set) + 1)
208 | 
209 | 
210 | if __name__ == '__main__':
211 |     with torch.cuda.device(P.cuda_device):
212 |         try:
213 |             main()
214 |         except:
215 |             log_detail(P, None, traceback.format_exc())
216 |             raise
217 | 


--------------------------------------------------------------------------------
/train/siamese_descriptor_p.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | from datetime import datetime
  4 | from utils import *  # image transforms, general utilities
  5 | from global_p import *  # global config
  6 | 
  7 | # in AlexNet, there are 5 convolutional layers with parameters
  8 | # and 3 FC layers in the classifier
  9 | # in ResNet, before first layer, there are 2 modules with parameters.
 10 | # then number of blocks per layers:
 11 | # ResNet152 - layer 1: 3, layer 2: 8, layer 3: 36, layer 4: 3
 12 | # ResNet50 - layer 1: 3, layer 2: 4, layer 3: 6, layer 4: 3
 13 | # finally, a single FC layer is used as classifier
 14 | untrained_blocks = {
 15 |     'alexnet': 4,
 16 |     'resnet152': 2 + 3 + 8 + 36
 17 | }
 18 | 
 19 | 
 20 | # parameters for the sub-regions classification training with AlexNet
 21 | class Params(object):
 22 | 
 23 |     def __init__(self):
 24 |         # general parameters
 25 |         self.cnn_model = 'ResNet152'
 26 |         self.dataset_full = 'data/pre_proc/CLICIDE_video_224sq'
 27 |         self.cuda_device = 0
 28 |         self.dataset_id = parse_dataset_id(self.dataset_full)
 29 |         # the file containing mean and standard deviation values
 30 |         # for a new dataset, simply use the filename here or add it to the
 31 |         # global_p module parameters
 32 |         # (this is valid for the following parameters, too)
 33 |         self.mean_std_file = mean_std_files[self.dataset_id]
 34 |         # the function for obtaining labels from a filename in the dataset
 35 |         # this function takes a filename and returns a unique label
 36 |         self.match_labels = match_label_functions[self.dataset_id]
 37 |         # input size. this is usually always (3, 224, 224) unless larger
 38 |         # fixed-size images should be used
 39 |         self.image_input_size = image_sizes[self.dataset_id]
 40 |         # the number of different labels in the dataset
 41 |         self.num_classes = num_classes[self.dataset_id]
 42 |         # the 2D size of the convolutional features of the base network
 43 |         self.feature_size2d = feature_sizes[(self.cnn_model.lower(), self.image_input_size)]
 44 |         # the number of blocks in the base network that should not be trained
 45 |         # (starting from the lowest and going to higher layers/blocks)
 46 |         # usually, block represents a layer with parameters,
 47 |         # for ResNet or equivalent, block is a whole block of layers
 48 |         self.untrained_blocks = untrained_blocks[self.cnn_model.lower()]
 49 | 
 50 |         # read mean and standard of dataset here to define transforms already
 51 |         m, s = read_mean_std(self.mean_std_file)
 52 | 
 53 |         # Classification net general and test params
 54 |         self.preload_net = ''  # allows to continue training a network
 55 |         self.classif_model = 'data/final_classif_ft/cli_resnet152.pth.tar'
 56 |         self.test_upfront = True
 57 |         self.train = True
 58 |         self.test_batch_size = 32
 59 |         self.test_pre_proc = True
 60 |         self.test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 61 | 
 62 |         # Classification net training params
 63 |         self.train_epochs = 20
 64 |         self.train_batch_size = 64
 65 |         self.train_micro_batch = 8
 66 |         self.train_aug_rot = r = 45
 67 |         self.train_aug_hrange = hr = 0
 68 |         self.train_aug_vrange = vr = 0
 69 |         self.train_aug_hsrange = hsr = 0.25
 70 |         self.train_aug_vsrange = vsr = 0.25
 71 |         self.train_aug_hflip = hflip = True
 72 |         trans = transforms.Compose([random_affine_noisy_cv(rotation=r, h_range=hr, v_range=vr, hs_range=hsr, vs_range=vsr, h_flip=hflip), transforms.ToTensor(), transforms.Normalize(m, s)])
 73 | 
 74 |         # transformation for each scale
 75 |         self.train_trans = trans
 76 |         self.train_pre_proc = False
 77 | 
 78 |         self.train_lr = 1e-3
 79 |         self.train_momentum = 0.9
 80 |         self.train_weight_decay = 5e-4
 81 |         self.train_optim = 'SGD'
 82 |         self.train_annealing = {}
 83 |         self.train_loss_avg = False
 84 |         self.train_loss_int = 10
 85 |         self.train_test_int = 0
 86 |         # the batch norm layer cannot be trained if the micro-batch size
 87 |         # is too small, as global variances/means cannot be properly
 88 |         # approximated in this case. so train only when having a batch
 89 |         # of at least 16
 90 |         self.train_bn = self.train_micro_batch >= 16 or (self.train_micro_batch <= 0 and (self.train_batch_size >= 16 or self.train_batch_size <= 0))
 91 | 
 92 |         # Descriptor net parameters
 93 |         # if True, test the network as a descriptor
 94 |         # (using the normalized classification output):
 95 |         self.test_descriptor_net = True
 96 |         # the threshold (in Bytes) for embeddings to be computed on GPU
 97 |         self.embeddings_cuda_size = 2 ** 30
 98 |         self.feature_dim = 2048
 99 |         self.triplet_margin = 0.1
100 |         # number of epochs after which semi-hard triplet choice switches
101 |         # to hard triplet choice
102 |         self.train_epoch_switch = 2
103 | 
104 |         # UUID for these parameters (current time)
105 |         self.uuid = datetime.now()
106 |         self.save_dir = 'data'
107 |         self.log_file = path.join(self.save_dir, unique_str(self) + '.log')
108 | 
109 | 
110 | # global test params:
111 | P = Params()
112 | 


--------------------------------------------------------------------------------
/train/siamese_regions.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import traceback
  4 | import random
  5 | import numpy as np
  6 | import torch
  7 | import torch.optim as optim
  8 | import torch.nn as nn
  9 | import torchvision.models as models
 10 | import torchvision.transforms as transforms
 11 | from torch.autograd import Variable
 12 | from siamese_regions_p import P
 13 | from utils import move_device, tensor_t, tensor, fold_batches, train_gen
 14 | from utils import imread_rgb, log, log_detail, get_lab_indicators
 15 | from utils import get_images_labels, get_similarities, embeddings_device_dim
 16 | from utils import test_print_descriptor, choose_rand_neg, get_pos_couples
 17 | from model.siamese import TuneClassifSub, RegionDescriptorNet
 18 | from model.custom_modules import TripletLoss
 19 | 
 20 | # keep labels as global variable. they are initialized after
 21 | # train set has been loaded and then kept constant
 22 | labels = []
 23 | train_type = P.cnn_model.lower() + ' Siamese sub-regions'
 24 | 
 25 | 
 26 | def get_embeddings(net, dataset, device, out_size):
 27 |     test_trans = P.test_trans
 28 |     if P.test_pre_proc:
 29 |         test_trans = transforms.Compose([])
 30 | 
 31 |     def batch(last, i, is_final, batch):
 32 |         embeddings = last
 33 |         # one image at a time
 34 |         test_in = move_device(test_trans(batch[0][0]).unsqueeze(0), P.cuda_device)
 35 | 
 36 |         out = net(Variable(test_in, volatile=True)).data
 37 |         embeddings[i] = out[0]
 38 |         return embeddings
 39 | 
 40 |     init = tensor(device, len(dataset), out_size)
 41 |     return fold_batches(batch, init, dataset, 1)
 42 | 
 43 | 
 44 | # train using triplets, constructing triplets from all positive couples
 45 | def train_siam_triplets_pos_couples(net, train_set, testset_tuple, criterion, criterion2, optimizer, best_score=0):
 46 |     """
 47 |         TODO
 48 |     """
 49 |     train_trans = P.train_trans
 50 |     if P.train_pre_proc:
 51 |         train_trans = transforms.Compose([])
 52 | 
 53 |     couples = get_pos_couples(train_set)
 54 |     sim_device, _ = embeddings_device_dim(P, net, len(train_set), sim_matrix=True)
 55 |     lab_indicators = get_lab_indicators(train_set, sim_device)
 56 |     num_pos = sum(len(couples[lab]) for lab in couples)
 57 |     log(P, '#pos (without order, with duplicates):{0}'.format(num_pos))
 58 | 
 59 |     # fold over positive couples here and choose negative for each pos
 60 |     # need to make sure the couples are evenly distributed
 61 |     # such that all batches can have couples from every instance
 62 |     def shuffle_couples(couples):
 63 |         for lab in couples:
 64 |             random.shuffle(couples[lab])
 65 |         # get x such that only 20% of labels have more than x couples
 66 |         a = np.array([len(couples[lab]) for lab in couples])
 67 |         x = int(np.percentile(a, 80))
 68 |         out = []
 69 |         keys = couples.keys()
 70 |         random.shuffle(keys)
 71 |         # append the elements to out in a strided way
 72 |         # (up to x elements per label)
 73 |         for count in range(x):
 74 |             for lab in keys:
 75 |                 if count >= len(couples[lab]):
 76 |                     continue
 77 |                 out.append(couples[lab][count])
 78 |         # the last elements in the longer lists are inserted at random
 79 |         for lab in keys:
 80 |             for i in range(x, len(couples[lab])):
 81 |                 out.insert(random.randrange(len(out)), couples[lab][i])
 82 |         return out
 83 | 
 84 |     def create_epoch(epoch, couples, testset_tuple):
 85 |         test_ref_set = testset_tuple[1]
 86 |         # use the test-train set to obtain embeddings and similarities
 87 |         # (since it may be transformed differently than train set)
 88 |         similarities, _ = get_similarities(P, get_embeddings, net, test_ref_set)
 89 | 
 90 |         # shuffle the couples
 91 |         shuffled = shuffle_couples(couples)
 92 |         return shuffled, {'epoch': epoch, 'similarities': similarities}
 93 | 
 94 |     def create_batch(batch, n, epoch, similarities):
 95 |         # one image at a time. batch is always of size 1
 96 |         lab, (i1, i2), (im1, im2) = batch[0]
 97 |         labels_in = tensor_t(torch.LongTensor, P.cuda_device, 1)
 98 |         labels_in[0] = labels.index(lab)
 99 |         # we get a positive couple. find negative for it
100 |         im3 = None
101 |         # choose a semi-hard negative. see FaceNet
102 |         # paper by Schroff et al for details.
103 |         # essentially, choose hardest negative that is still
104 |         # easier than the positive. this should avoid
105 |         # collapsing the model at beginning of training
106 |         ind_exl = lab_indicators[lab]
107 |         sim_pos = similarities[i1, i2]
108 |         if epoch < P.train_epoch_switch:
109 |             # exclude all positives as well as any that are
110 |             # more similar than sim_pos
111 |             ind_exl = ind_exl | similarities[i1].ge(sim_pos)
112 |         if ind_exl.sum() >= similarities.size(0):
113 |             p = 'cant find semi-hard neg for'
114 |             s = 'falling back to random neg'
115 |             n_pos = lab_indicators[lab].sum()
116 |             n_ge = similarities[i1].ge(sim_pos).sum()
117 |             n_tot = similarities.size(0)
118 |             print('{0} {1}-{2}-{3} (#pos:{4}, #ge:{5}, #total:{6}), {7}'.format(p, i1, i2, lab, n_pos, n_ge, n_tot, s))
119 |         else:
120 |             # similarities must be in [-1, 1]
121 |             # set all similarities of excluded indexes to -2
122 |             # then take argmax (highest similarity not excluded)
123 |             sims = similarities[i1].clone()
124 |             sims[ind_exl] = -2
125 |             _, k = sims.max(0)
126 |             im3 = train_set[k[0]][0]
127 |         if im3 is None:
128 |             # default to random negative
129 |             im3 = choose_rand_neg(train_set, lab)
130 |         # one image at a time
131 |         train_in1 = move_device(train_trans(im1).unsqueeze(0), P.cuda_device)
132 |         train_in2 = move_device(train_trans(im2).unsqueeze(0), P.cuda_device)
133 |         train_in3 = move_device(train_trans(im3).unsqueeze(0), P.cuda_device)
134 |         # return input tensors and labels
135 |         return [train_in1, train_in2, train_in3], [labels_in]
136 | 
137 |     def create_loss(out, labels_list):
138 |         # out is a tuple of 3 tuples, each for the descriptor
139 |         # and a tensor with all classification results for the highest
140 |         # classification values. the first loss is a simple loss on the
141 |         # descriptors. the second loss is a classification loss for
142 |         # each sub-region of the anchor input (first input).
143 |         # we simply sum-aggregate here
144 |         loss = criterion(*(t for t, _ in out))
145 |         cls_out = out[0][1]  # classification values for anchor
146 |         # there is only 1 batch of k classification values, so cls_out
147 |         # has dimension (1, num_classes, k). need to get (k, num_classes)
148 |         cls_out_all = cls_out.squeeze(0).t()
149 |         loss2 = criterion2(cls_out_all, labels_list[0].expand(cls_out_all.size(0)))
150 |         return loss, loss2
151 | 
152 |     train_gen(train_type, P, test_print_descriptor, get_embeddings, net,
153 |               couples, testset_tuple, optimizer, create_epoch, create_batch,
154 |               create_loss, best_score=best_score)
155 | 
156 | 
157 | def get_siamese_net():
158 |     model = models.alexnet
159 |     if P.cnn_model.lower() == 'resnet152':
160 |         model = models.resnet152
161 |     class_net = TuneClassifSub(model(pretrained=True), P.num_classes, P.feature_size2d, untrained=P.untrained_blocks)
162 |     if P.classif_model:
163 |         class_net.load_state_dict(torch.load(P.classif_model, map_location=lambda storage, location: storage.cpu()))
164 |     net = RegionDescriptorNet(class_net, P.regions_k, P.feature_dim, P.feature_size2d, untrained=P.untrained_blocks)
165 |     if P.preload_net:
166 |         net.load_state_dict(torch.load(P.preload_net, map_location=lambda storage, location: storage.cpu()))
167 |     net = move_device(net, P.cuda_device)
168 |     return net
169 | 
170 | 
171 | def main():
172 |     # training and test sets
173 |     train_set_full = get_images_labels(P.dataset_full, P.match_labels)
174 |     test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)
175 | 
176 |     labels_list = [t[1] for t in train_set_full]
177 |     # we have to give a number to each label,
178 |     # so we need a list here for the index
179 |     labels.extend(sorted(list(set(labels_list))))
180 | 
181 |     log(P, 'Loading and transforming train/test sets.')
182 | 
183 |     train_set, test_train_set, test_set = [], [], []
184 |     train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([])
185 |     test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
186 |     for im, lab in train_set_full:
187 |         im_o = imread_rgb(im)
188 |         train_set.append((train_pre_f(im_o), lab, im))
189 |         test_train_set.append((test_pre_f(im_o), lab, im))
190 | 
191 |     for im, lab in test_set_full:
192 |         if lab not in labels:
193 |             continue
194 |         im_o = imread_rgb(im)
195 |         test_set.append((test_pre_f(im_o), lab, im))
196 | 
197 |     siam_net = get_siamese_net()
198 |     optimizer = optim.SGD((p for p in siam_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay)
199 |     criterion = TripletLoss(P.triplet_margin, P.train_loss_avg)
200 |     criterion2 = nn.CrossEntropyLoss(size_average=P.train_loss2_avg)
201 |     testset_tuple = (test_set, test_train_set)
202 |     if P.test_upfront:
203 |         log(P, 'Upfront testing of descriptor model')
204 |         score = test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings)
205 |     else:
206 |         score = 0
207 |     if P.train:
208 |         log(P, 'Starting region-descriptor training')
209 |         train_siam_triplets_pos_couples(siam_net, train_set, testset_tuple, criterion, criterion2, optimizer, best_score=score)
210 |         log(P, 'Finished region-descriptor training')
211 |     if P.test_descriptor_net:
212 |         log(P, 'Testing as descriptor')
213 |         # set best score high enough such that it will never be saved
214 |         test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings, best_score=len(test_set) + 1)
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     with torch.cuda.device(P.cuda_device):
219 |         try:
220 |             main()
221 |         except:
222 |             log_detail(P, None, traceback.format_exc())
223 |             raise
224 | 


--------------------------------------------------------------------------------
/train/siamese_regions_p.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | from datetime import datetime
  4 | from utils import *  # image transforms, general utilities
  5 | from global_p import *  # global config
  6 | 
  7 | # in AlexNet, there are 5 convolutional layers with parameters
  8 | # and 3 FC layers in the classifier
  9 | # in ResNet, before first layer, there are 2 modules with parameters.
 10 | # then number of blocks per layers:
 11 | # ResNet152 - layer 1: 3, layer 2: 8, layer 3: 36, layer 4: 3
 12 | # ResNet50 - layer 1: 3, layer 2: 4, layer 3: 6, layer 4: 3
 13 | # finally, a single FC layer is used as classifier
 14 | untrained_blocks = {
 15 |     'alexnet': 4,
 16 |     'resnet152': 2 + 3 + 8 + 36
 17 | }
 18 | 
 19 | 
 20 | # parameters for the sub-regions classification training with AlexNet
 21 | class Params(object):
 22 | 
 23 |     def __init__(self):
 24 |         # general parameters
 25 |         self.cnn_model = 'ResNet152'
 26 |         self.dataset_full = 'data/pre_proc/CLICIDE_video_448'
 27 |         self.cuda_device = 0
 28 |         self.dataset_id = parse_dataset_id(self.dataset_full)
 29 |         # the file containing mean and standard deviation values
 30 |         # for a new dataset, simply use the filename here or add it to the
 31 |         # global_p module parameters
 32 |         # (this is valid for the following parameters, too)
 33 |         self.mean_std_file = mean_std_files[self.dataset_id]
 34 |         # the function for obtaining labels from a filename in the dataset
 35 |         # this function takes a filename and returns a unique label
 36 |         self.match_labels = match_label_functions[self.dataset_id]
 37 |         # input size. this is usually always (3, 224, 224) unless larger
 38 |         # fixed-size images should be used
 39 |         self.image_input_size = image_sizes[self.dataset_id]
 40 |         # the number of different labels in the dataset
 41 |         self.num_classes = num_classes[self.dataset_id]
 42 |         # the 2D size of the convolutional features of the base network
 43 |         self.feature_size2d = feature_sizes[(self.cnn_model.lower(), self.image_input_size)]
 44 |         # the number of blocks in the base network that should not be trained
 45 |         # (starting from the lowest and going to higher layers/blocks)
 46 |         # usually, block represents a layer with parameters,
 47 |         # for ResNet or equivalent, block is a whole block of layers
 48 |         self.untrained_blocks = untrained_blocks[self.cnn_model.lower()]
 49 | 
 50 |         # read mean and standard of dataset here to define transforms already
 51 |         m, s = read_mean_std(self.mean_std_file)
 52 | 
 53 |         # Classification net general and test params
 54 |         self.preload_net = ''  # allows to continue training a network
 55 |         self.classif_model = 'data/final_classif_sub/cli_resnet152.pth.tar'
 56 |         self.test_upfront = True
 57 |         self.train = True
 58 |         self.test_pre_proc = True
 59 |         self.test_trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize(m, s)])
 60 | 
 61 |         # Classification net training params
 62 |         self.train_epochs = 20
 63 |         self.train_batch_size = 64
 64 |         self.train_micro_batch = 1  # has to be 1
 65 |         self.train_aug_rot = r = 45
 66 |         self.train_aug_hrange = hr = 0
 67 |         self.train_aug_vrange = vr = 0
 68 |         self.train_aug_hsrange = hsr = 0.25
 69 |         self.train_aug_vsrange = vsr = 0.25
 70 |         self.train_aug_hflip = hflip = True
 71 |         trans = transforms.Compose([random_affine_noisy_cv(rotation=r, h_range=hr, v_range=vr, hs_range=hsr, vs_range=vsr, h_flip=hflip), transforms.ToTensor(), transforms.Normalize(m, s)])
 72 | 
 73 |         # transformation for each scale
 74 |         self.train_trans = trans
 75 |         self.train_pre_proc = False
 76 | 
 77 |         self.train_lr = 1e-4
 78 |         self.train_momentum = 0.9
 79 |         self.train_weight_decay = 0.
 80 |         self.train_optim = 'SGD'
 81 |         self.train_annealing = {}
 82 |         self.train_loss_avg = False
 83 |         self.train_loss_int = 10
 84 |         self.train_test_int = 0
 85 |         # the batch norm layer cannot be trained if the micro-batch size
 86 |         # is too small, as global variances/means cannot be properly
 87 |         # approximated in this case. so train only when having a batch
 88 |         # of at least 16
 89 |         self.train_bn = self.train_micro_batch >= 16 or (self.train_micro_batch <= 0 and (self.train_batch_size >= 16 or self.train_batch_size <= 0))
 90 | 
 91 |         # Descriptor net parameters
 92 |         # if True, test the network as a descriptor
 93 |         # (using the normalized classification output):
 94 |         self.test_descriptor_net = True
 95 |         # the threshold (in Bytes) for embeddings to be computed on GPU
 96 |         self.embeddings_cuda_size = 2 ** 30
 97 |         self.feature_dim = 2048
 98 |         self.regions_k = 6
 99 |         self.train_loss2_alpha = 1.0
100 |         self.train_loss2_avg = True
101 |         self.triplet_margin = 0.1
102 |         # number of epochs after which semi-hard triplet choice switches
103 |         # to hard triplet choice
104 |         self.train_epoch_switch = 2
105 | 
106 |         # UUID for these parameters (current time)
107 |         self.uuid = datetime.now()
108 |         self.save_dir = 'data'
109 |         self.log_file = path.join(self.save_dir, unique_str(self) + '.log')
110 | 
111 | 
112 | # global test params:
113 | P = Params()
114 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from dataset import *
2 | from general import *
3 | from image import *
4 | from metrics import *
5 | from train_classif import *
6 | from train_general import *
7 | from train_siamese import *
8 | 


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | import itertools
 4 | import glob
 5 | import random
 6 | import torch
 7 | from os import path
 8 | from general import tensor_t
 9 | 
10 | 
11 | def get_images_labels(folder='.', label_f=lambda x: x.split('.')[0]):
12 |     """
13 |         Read a folder containing images where the name of the class is in the filename
14 |         the label function should return the label given the filename
15 |         Return :
16 |             list of couple : (image filename, label)
17 |     """
18 |     exts = ('*.jpg', '*.JPG', '*.JPEG', "*.png")
19 |     r = []
20 |     for ext in exts:
21 |         r.extend([(im, label_f(im)) for im in glob.iglob(path.join(folder, ext))])
22 |     return r
23 | 
24 | 
25 | # get couples of images as a dict with images as keys and all
26 | # images of same label as values
27 | def get_pos_couples_ibi(dataset, duplicate=True):
28 |     couples = {}
29 |     for (_, l1, name1), (im2, l2, name2) in itertools.product(dataset, dataset):
30 |         if l1 != l2 or (name1 is name2 and not duplicate):
31 |             continue
32 |         if name1 in couples:
33 |             couples[name1].append(im2)
34 |         else:
35 |             couples[name1] = [im2]
36 |     return couples
37 | 
38 | 
39 | # get the positive couples of a dataset as a dict with labels as keys
40 | def get_pos_couples(dataset, duplicate=True):
41 |     couples = {}
42 |     comb = itertools.combinations_with_replacement
43 |     if not duplicate:
44 |         comb = itertools.combinations
45 |     for (i1, (x1, l1, _)), (i2, (x2, l2, _)) in comb(enumerate(dataset), 2):
46 |         if l1 != l2:
47 |             continue
48 |         t = (l1, (i1, i2), (x1, x2))
49 |         if l1 in couples:
50 |             couples[l1].append(t)
51 |         else:
52 |             couples[l1] = [t]
53 |     return couples
54 | 
55 | 
56 | # return a random negative for the given label and train set
57 | def choose_rand_neg(train_set, lab):
58 |     im_neg, lab_neg, _ = random.choice(train_set)
59 |     while (lab_neg == lab):
60 |         im_neg, lab_neg, _ = random.choice(train_set)
61 |     return im_neg
62 | 
63 | 
64 | # get byte tensors indicating the indexes of images having a different label
65 | def get_lab_indicators(dataset, device):
66 |     n = len(dataset)
67 |     indicators = {}
68 |     for _, lab1, _ in dataset:
69 |         if lab1 in indicators:
70 |             continue
71 |         indicator = tensor_t(torch.ByteTensor, device, n).fill_(0)
72 |         for i2, (_, lab2, _) in enumerate(dataset):
73 |             if lab1 == lab2:
74 |                 indicator[i2] = 1
75 |         indicators[lab1] = indicator
76 |     return indicators
77 | 


--------------------------------------------------------------------------------
/utils/general.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | from __future__ import print_function
  4 | import sys
  5 | import tempfile
  6 | import inspect
  7 | import types
  8 | import torch
  9 | import os
 10 | 
 11 | 
 12 | # to check an option specifying a file that should or should not exist
 13 | def check_file(arg, name, should_exist, usage):
 14 |     if should_exist and not os.path.isfile(arg):
 15 |         print('Cannot find {0} file at path {1}\n'.format(name, arg))
 16 |         usage()
 17 |         sys.exit(2)
 18 |     if not should_exist and os.path.isfile(arg):
 19 |         print('Cannot overwrite {0} file at path {1}\n'.format(name, arg))
 20 |         usage()
 21 |         sys.exit(2)
 22 |     return arg
 23 | 
 24 | 
 25 | # to check an option specifying a folder that should or should not exist
 26 | def check_folder(arg, name, should_exist, usage):
 27 |     if should_exist and not os.path.isdir(arg):
 28 |         print('Cannot find {0} folder at path {1}\n'.format(name, arg))
 29 |         usage()
 30 |         sys.exit(2)
 31 |     if not should_exist and os.path.isdir(arg):
 32 |         print('Cannot overwrite {0} folder at path {1}\n'.format(name, arg))
 33 |         usage()
 34 |         sys.exit(2)
 35 |     return arg
 36 | 
 37 | 
 38 | # to check an option specifying the model
 39 | def check_model(arg, usage):
 40 |     if arg.lower() == 'alexnet' or arg.lower() == 'resnet152':
 41 |         return arg.lower()
 42 |     print('Model {0} is not a valid model'.format(arg))
 43 |     usage()
 44 |     sys.exit(2)
 45 | 
 46 | 
 47 | # to check an option specifying an integer
 48 | def check_int(arg, name, usage):
 49 |     try:
 50 |         return int(arg)
 51 |     except ValueError:
 52 |         print('{0} was given as {1}. This is not an integer.\n'
 53 |               .format(name, arg))
 54 |         usage()
 55 |         sys.exit(2)
 56 | 
 57 | 
 58 | # to check an option specifying a boolean
 59 | def check_bool(arg, name, usage):
 60 |     arg = arg.lower()
 61 |     if arg == '':
 62 |         print('{0} was not given. It should be a boolean (true/yes/y/1 for True and otherwise False).'.format(name))
 63 |         usage()
 64 |         sys.exit(2)
 65 |     if arg == 'true' or arg == 'yes' or arg == 'y' or arg == '1':
 66 |         return True
 67 |     return False
 68 | 
 69 | 
 70 | def parse_dataset_id(dataset_full):
 71 |     if dataset_full.endswith('/'):
 72 |         dataset_full = dataset_full[:-1]
 73 |     return dataset_full.split('/')[-1]
 74 | 
 75 | 
 76 | def read_mean_std(fname):
 77 |     with open(fname) as f:
 78 |         mean = map(float, f.readline().split(' '))
 79 |         std = map(float, f.readline().split(' '))
 80 |     return mean, std
 81 | 
 82 | 
 83 | def fun_str(f):
 84 |     if f.__class__ in (types.FunctionType, types.BuiltinFunctionType, types.BuiltinMethodType):
 85 |         return f.__name__
 86 |     else:
 87 |         return f.__class__.__name__
 88 | 
 89 | 
 90 | def trans_str(trans):
 91 |     return ','.join(fun_str(t) for t in trans.transforms)
 92 | 
 93 | 
 94 | def move_device(obj, device):
 95 |     if device >= 0:
 96 |         return obj.cuda()
 97 |     else:
 98 |         return obj.cpu()
 99 | 
100 | 
101 | def tensor_t(t, device, *sizes):
102 |     return move_device(t(*sizes), device)
103 | 
104 | 
105 | def tensor(device, *sizes):
106 |     return tensor_t(torch.Tensor, device, *sizes)
107 | 
108 | 
109 | def unique_str(P):
110 |     return P.uuid.strftime('%Y%m%d-%H%M%S-%f')
111 | 
112 | 
113 | def save(P, f, prefix):
114 |     f.write('{0}\n\n'.format(prefix))
115 |     # for name, value in sorted(vars(P).items()):
116 |     #     if name == 'uuid':
117 |     #         continue
118 |     #     if name in ('test_trans', 'train_trans', 'train_sub_scales'):
119 |     #         if type(value) is list or type(value) is tuple:
120 |     #             value = ', '.join(trans_str(t) for t in value)
121 |     #         else:
122 |     #             value = trans_str(value)
123 |     #     elif name in ('match_labels_f'):
124 |     #         value = fun_str(value)
125 |     #     f.write('{0}:{1}\n'.format(name, value))
126 |     f.write(inspect.getsource(P.__class__))
127 |     f.close()
128 | 
129 | 
130 | def save_uuid(P, prefix):
131 |     f = tempfile.NamedTemporaryFile(dir=P.save_dir, delete=False)
132 |     save(P, f, prefix)
133 |     # the following will not work on Windows (would need to add a remove first)
134 |     os.rename(f.name, os.path.join(P.save_dir, unique_str(P) + '.params'))
135 | 
136 | 
137 | def log_detail(P, p_file, *args):
138 |     if p_file:
139 |         print(*args, file=p_file)
140 |     if P.log_file:
141 |         with open(P.log_file, 'a') as f:
142 |             print(*args, file=f)
143 | 
144 | 
145 | def log(P, *args):
146 |     log_detail(P, sys.stdout, *args)
147 | 
148 | 
149 | def mod_param(p_file, param, new_value):
150 |     with open(p_file) as f_in:
151 |         with open(p_file + '.tmp', 'w') as f_out:
152 |             for line in f_in:
153 |                 if (line.strip().startswith('self.' + param) and
154 |                         len(line.split('=')) == 2):
155 |                     new_line = (line.split('=')[0] + '= \'' +
156 |                                 new_value + '\'\n')
157 |                     f_out.write(new_line)
158 |                 else:
159 |                     f_out.write(line)
160 |     # as above, in Windows a remove would be needed first
161 |     os.rename(p_file + '.tmp', p_file)
162 | 


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import torchvision.transforms as transforms
  4 | from PIL import Image
  5 | import cv2
  6 | from scipy.ndimage import interpolation
  7 | import numpy as np
  8 | import random
  9 | 
 10 | 
 11 | # ---------------------- Image transformations -----------------
 12 | def norm_image_t(tensor):
 13 |     m = s = []
 14 |     for t in tensor:
 15 |         m.append(t.mean())
 16 |         s.append(t.std())
 17 |     return transforms.Normalize(m, s)(tensor)
 18 | 
 19 | 
 20 | # pad a PIL image to a square
 21 | def pad_square(img):
 22 |     longer_side = max(img.size)
 23 |     h_pad = (longer_side - img.size[0]) // 2
 24 |     h_mod = (longer_side - img.size[0]) % 2
 25 |     v_pad = (longer_side - img.size[1]) // 2
 26 |     v_mod = (longer_side - img.size[1]) % 2
 27 |     return img.crop((-h_pad - h_mod, -v_pad - v_mod, img.size[0] + h_pad, img.size[1] + v_pad))
 28 | 
 29 | 
 30 | # randomly rotate, shift and scale vertically and horizontally a PIL image with given angle in degrees and shifting/scaling ratios
 31 | # inspired by http://stackoverflow.com/questions/7501009/affine-transform-in-pil-python
 32 | def random_affine(rotation=0, h_range=0, v_range=0, hs_range=0, vs_range=0):
 33 |     rotation = rotation * (np.pi / 180)
 34 | 
 35 |     def rand_affine(im):
 36 |         angle = random.uniform(-rotation, rotation)
 37 |         x, y = im.size[0] / 2, im.size[1] / 2
 38 |         nx = x + random.uniform(-h_range, h_range) * im.size[0]
 39 |         ny = y + random.uniform(-v_range, v_range) * im.size[1]
 40 |         sx = 1 + random.uniform(-hs_range, hs_range)
 41 |         sy = 1 + random.uniform(-vs_range, vs_range)
 42 |         cos, sin = np.cos(angle), np.sin(angle)
 43 |         a, b = cos / sx, sin / sx
 44 |         c = x - nx * a - ny * b
 45 |         d, e = -sin / sy, cos / sy
 46 |         f = y - nx * d - ny * e
 47 |         return im.transform(im.size, Image.AFFINE, (a, b, c, d, e, f), resample=Image.BICUBIC)
 48 |     return rand_affine
 49 | 
 50 | 
 51 | def pad_square_cv(img):
 52 |     longer_side = max(img.shape[:2])
 53 |     v_pad = (longer_side - img.shape[0]) // 2
 54 |     v_mod = (longer_side - img.shape[0]) % 2
 55 |     h_pad = (longer_side - img.shape[1]) // 2
 56 |     h_mod = (longer_side - img.shape[1]) % 2
 57 |     return np.pad(img, ((v_pad + v_mod, v_pad), (h_pad + h_mod, h_pad), (0, 0)), 'constant', constant_values=((0, 0), (0, 0), (0, 0)))
 58 | 
 59 | 
 60 | def scale_cv(new_size, inter=cv2.INTER_CUBIC):
 61 |     if isinstance(new_size, tuple):
 62 |         def sc_cv(img):
 63 |             return cv2.resize(img, new_size, interpolation=inter)
 64 |         return sc_cv
 65 |     else:
 66 |         def sc_cv(img):
 67 |             h, w, _ = img.shape
 68 |             if (w <= h and w == new_size) or (h <= w and h == new_size):
 69 |                 return img
 70 |             if w < h:
 71 |                 ow = new_size
 72 |                 oh = int(round(float(new_size * h) / w))
 73 |                 return cv2.resize(img, (ow, oh), interpolation=inter)
 74 |             else:
 75 |                 oh = new_size
 76 |                 ow = int(round(float(new_size * w) / h))
 77 |                 return cv2.resize(img, (ow, oh), interpolation=inter)
 78 |         return sc_cv
 79 | 
 80 | 
 81 | def center_crop_cv(size):
 82 |     if not isinstance(size, tuple):
 83 |         size = (int(size), int(size))
 84 | 
 85 |     def cent_crop_cv(img):
 86 |         h, w, _ = img.shape
 87 |         th, tw = size
 88 |         x1 = int(round((w - tw) / 2.))
 89 |         y1 = int(round((h - th) / 2.))
 90 |         return img[y1:y1 + th, x1:x1 + tw]
 91 |     return cent_crop_cv
 92 | 
 93 | 
 94 | def random_crop_cv(size):
 95 |     if not isinstance(size, tuple):
 96 |         size = (int(size), int(size))
 97 | 
 98 |     def rand_crop_cv(img):
 99 |         h, w, _ = img.shape
100 |         th, tw = size
101 |         if w == tw and h == th:
102 |             return img
103 |         x1 = random.randint(0, w - tw)
104 |         y1 = random.randint(0, h - th)
105 |         return img[y1:y1 + th, x1:x1 + tw]
106 |     return rand_crop_cv
107 | 
108 | 
109 | # crop randomly using same aspect ratio as image
110 | # such that shorter side has given size
111 | def random_crop_keep_ar_cv(short_side):
112 |     def rand_crop_cv(img):
113 |         h, w, _ = img.shape
114 |         if (h <= w and h == short_side) or (w <= h and w == short_side):
115 |             return img
116 |         if h < w:
117 |             th = short_side
118 |             tw = int(round(float(short_side * w) / h))
119 |         else:
120 |             tw = short_side
121 |             th = int(round(float(short_side * h) / w))
122 |         x1 = random.randint(0, w - tw)
123 |         y1 = random.randint(0, h - th)
124 |         return img[y1:y1 + th, x1:x1 + tw]
125 |     return rand_crop_cv
126 | 
127 | 
128 | def affine_cv(img, angle, v_shift, h_shift, sx, sy, cval=0.):
129 |     # apply translation first to allow the center to be
130 |     # offset to any position when using rotation
131 |     mat = np.array([
132 |         [sy * np.cos(angle), -sy * np.sin(angle), v_shift],
133 |         [sx * np.sin(angle), sx * np.cos(angle), h_shift],
134 |         [0., 0., 1.]
135 |     ])
136 |     # make sure the transform is applied at the center of the image,
137 |     # then reset it afterwards
138 |     offset = (img.shape[0] / 2.0 + 0.5, img.shape[1] / 2.0 + 0.5)
139 |     mat = np.dot(np.dot(
140 |         np.array([
141 |             [1., 0., offset[0]],
142 |             [0., 1., offset[1]],
143 |             [0., 0., 1.]]),
144 |         mat),
145 |         np.array([
146 |             [1., 0., -offset[0]],
147 |             [0., 1., -offset[1]],
148 |             [0., 0., 1.]]))
149 | 
150 |     def t(channel):
151 |         return interpolation.affine_transform(channel, mat[:2, :2], mat[:2, 2], cval=cval)
152 |     # apply transformation to each channel separately
153 |     return np.dstack(map(t, (img[:, :, i] for i in range(img.shape[2]))))
154 | 
155 | 
156 | def random_affine_scale_cv(range_low, range_high):
157 |     def rand_aff_scale_cv(img):
158 |         scale = random.uniform(range_low, range_high)
159 |         return affine_cv(img, 0., 0., 0., scale, scale)
160 |     return rand_aff_scale_cv
161 | 
162 | 
163 | def affine_scale_noisy_cv(scale):
164 |     def aff_scale_noisy(img):
165 |         img = affine_cv(img.astype(float), 0., 0., 0., scale, scale, cval=.1)
166 |         img[img == .1] = np.random.randint(256, size=np.sum(img == .1))
167 |         return img.astype(np.uint8)
168 |     return aff_scale_noisy
169 | 
170 | 
171 | def random_affine_noisy_cv(rotation=0, h_range=0, v_range=0, hs_range=0, vs_range=0, h_flip=False):
172 |     rotation = rotation * (np.pi / 180)
173 | 
174 |     def rand_aff_noisy_cv(img):
175 |         # compose the affine transformation applied to x
176 |         angle = np.random.uniform(-rotation, rotation)
177 |         # shift needs to be scaled by size of image in that dimension
178 |         v_shift = np.random.uniform(-v_range, v_range) * img.shape[0]
179 |         h_shift = np.random.uniform(-h_range, h_range) * img.shape[1]
180 |         sx = 1 + random.uniform(-hs_range, hs_range)
181 |         sy = 1 + random.uniform(-vs_range, vs_range)
182 |         if h_flip and random.random() < 0.5:
183 |             sx = -sx
184 |         img = affine_cv(img.astype(float), angle, v_shift, h_shift, sx, sy, cval=.1)
185 |         img[img == .1] = np.random.randint(256, size=np.sum(img == .1))
186 |         return img.astype(np.uint8)
187 |     return rand_aff_noisy_cv
188 | 
189 | 
190 | def random_affine_cv(rotation=0, h_range=0, v_range=0, hs_range=0, vs_range=0, h_flip=False):
191 |     rotation = rotation * (np.pi / 180)
192 | 
193 |     def rand_affine_cv(img):
194 |         # compose the affine transformation applied to x
195 |         angle = np.random.uniform(-rotation, rotation)
196 |         # shift needs to be scaled by size of image in that dimension
197 |         v_shift = np.random.uniform(-v_range, v_range) * img.shape[0]
198 |         h_shift = np.random.uniform(-h_range, h_range) * img.shape[1]
199 |         sx = 1 + random.uniform(-hs_range, hs_range)
200 |         sy = 1 + random.uniform(-vs_range, vs_range)
201 |         if h_flip and random.random() < 0.5:
202 |             sx = -sx
203 |         return affine_cv(img, angle, v_shift, h_shift, sx, sy)
204 |     return rand_affine_cv
205 | 
206 | 
207 | def random_h_flip_cv(img):
208 |     return img[:, ::-1, :].copy() if random.random() < 0.5 else img
209 | 
210 | 
211 | def imread_rgb(fname):
212 |     # read and convert image from BGR to RGB
213 |     im = cv2.imread(fname)
214 |     return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
215 | 
216 | 
217 | def tensor_2_bgr(tensor):
218 |     # convert RGB tensor to BGR numpy array as used in OpenCV
219 |     return cv2.cvtColor(tensor.numpy(), cv2.COLOR_RGB2BGR)
220 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | 
 4 | # Evaluation metrics (Precision@1 and mAP) given similarity matrix
 5 | # Similarity matrix must have size 'test set size' x 'ref set size'
 6 | # and contains in each row the similarity of that test (query) image
 7 | # with all ref images
 8 | def precision1(sim, test_set, ref_set, kth=1):
 9 |     total = sim.size(0)
10 |     if kth <= 1:
11 |         max_sim, max_idx = sim.max(1)
12 |     else:
13 |         max_sim, max_idx = sim.kthvalue(sim.size(1) - kth + 1, 1)
14 |     max_label = []
15 |     for i in range(sim.size(0)):
16 |         # get label from ref set which obtained highest score
17 |         max_label.append(ref_set[max_idx[i, 0]][1])
18 |     correct = sum(test_label == max_label[j] for j, (_, test_label, _) in enumerate(test_set))
19 |     return float(correct) / total, correct, total, max_sim, max_label
20 | 
21 | 
22 | # according to Oxford buildings dataset definition of AP
23 | # the kth argument allows to ignore the k highest ranked elements of ref set
24 | # this is used to compute AP even for the train set against train set
25 | def avg_precision(sim, i, test_set, ref_set, kth=1):
26 |     test_label = test_set[i][1]
27 |     n_pos = sum(test_label == ref_label for _, ref_label, _ in ref_set)
28 |     n_pos -= (kth - 1)
29 |     if n_pos <= 0:
30 |         return None
31 |     old_recall, old_precision, ap = 0.0, 1.0, 0.0
32 |     intersect_size, j = 0, 0
33 |     _, ranked_list = sim[i].sort(dim=0, descending=True)
34 |     for n, k in enumerate(ranked_list):
35 |         if n + 1 < kth:
36 |             continue
37 |         if ref_set[k][1] == test_label:
38 |             intersect_size += 1
39 | 
40 |         recall = intersect_size / float(n_pos)
41 |         precision = intersect_size / (j + 1.0)
42 |         ap += (recall - old_recall) * ((old_precision + precision) / 2.0)
43 |         old_recall, old_precision = recall, precision
44 |         j += 1
45 |     return ap
46 | 
47 | 
48 | def mean_avg_precision(sim, test_set, ref_set, kth=1):
49 |     aps = []
50 |     for i in range(sim.size(0)):
51 |         # compute ap for each test image
52 |         ap = avg_precision(sim, i, test_set, ref_set, kth)
53 |         if ap is not None:
54 |             aps.append(ap)
55 |     return sum(aps) / float(len(aps))
56 | 


--------------------------------------------------------------------------------
/utils/train_classif.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | import torch
 4 | from model.nn_utils import set_net_train
 5 | from os import path
 6 | from general import log, save_uuid, unique_str
 7 | 
 8 | 
 9 | # Generic function to test and print stats when training a classification net
10 | def test_print_classif(train_type, P, net, testset_tuple, test_net, best_score=0, epoch=0):
11 |     test_set, test_train_set = testset_tuple
12 |     set_net_train(net, False)
13 |     c, t = test_net(net, test_set)
14 |     if (c > best_score):
15 |         best_score = c
16 |         prefix = '{0}, EPOCH:{1}, SCORE:{2}'.format(train_type, epoch, c)
17 |         save_uuid(P, prefix)
18 |         torch.save(net.state_dict(), path.join(P.save_dir, unique_str(P) + "_best_classif.pth.tar"))
19 |     log(P, 'TEST - correct: {0} / {1} - acc: {2}'.format(c, t, float(c) / t))
20 | 
21 |     c, t = test_net(net, test_train_set)
22 |     torch.save(net.state_dict(), path.join(P.save_dir, "model_classif_" + str(epoch) + ".pth.tar"))
23 |     log(P, 'TRAIN - correct: {0} / {1} - acc: {2}'.format(c, t, float(c) / t))
24 |     set_net_train(net, True, bn_train=P.train_bn)
25 |     return best_score
26 | 


--------------------------------------------------------------------------------
/utils/train_general.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import gc
  4 | import functools
  5 | import torch.optim as optim
  6 | from torch.autograd import Variable
  7 | from model.nn_utils import set_net_train
  8 | from general import log
  9 | 
 10 | 
 11 | # Generic function to output the stats
 12 | def output_stats(train_type, P, test_print, test_net, net, testset_tuple, epoch, batch_count, is_final, loss, running_loss, score):
 13 |     disp_int = P.train_loss_int
 14 |     running_loss += loss
 15 |     if batch_count % disp_int == disp_int - 1:
 16 |         log(P, '[{0:d}, {1:5d}] loss: {2:.5f}'.format(epoch + 1, batch_count + 1, running_loss / disp_int))
 17 |         running_loss = 0.0
 18 |     test_int = P.train_test_int
 19 |     if ((test_int > 0 and batch_count % test_int == test_int - 1) or
 20 |             (test_int <= 0 and is_final)):
 21 |         score = test_print(train_type, P, net, testset_tuple, test_net, score, epoch + 1)
 22 |     return running_loss, score
 23 | 
 24 | 
 25 | # evaluate a function by batches of size batch_size on the set x
 26 | # and fold over the returned values
 27 | def fold_batches(f, init, x, batch_size, cut_end=False, add_args={}):
 28 |     nx = len(x)
 29 |     if batch_size <= 0:
 30 |         return f(init, 0, True, x, **add_args)
 31 | 
 32 |     def red(last, idx):
 33 |         end = min(idx + batch_size, nx)
 34 |         if cut_end and idx + batch_size > nx:
 35 |             return last
 36 |         is_final = end > nx - batch_size if cut_end else end == nx
 37 |         return f(last, idx, is_final, x[idx:end], **add_args)
 38 |     return functools.reduce(red, range(0, nx, batch_size), init)
 39 | 
 40 | 
 41 | def anneal(net, optimizer, epoch, annealing_dict):
 42 |     if epoch not in annealing_dict:
 43 |         return optimizer
 44 |     default_group = optimizer.state_dict()['param_groups'][0]
 45 |     lr = default_group['lr'] * annealing_dict[epoch]
 46 |     momentum = default_group['momentum']
 47 |     weight_decay = default_group['weight_decay']
 48 |     return optim.SGD((p for p in net.parameters() if p.requires_grad), lr=lr, momentum=momentum, weight_decay=weight_decay)
 49 | 
 50 | 
 51 | def micro_batch_gen(last, i, is_final, batch, P, net, create_batch, batch_args, create_loss):
 52 |     gc.collect()
 53 |     prev_val, mini_batch_size = last
 54 |     n = len(batch)
 55 |     tensors_in, labels_in = create_batch(batch, n, **batch_args)
 56 |     tensors_out = net(*(Variable(t) for t in tensors_in))
 57 |     loss, loss2 = create_loss(tensors_out, [Variable(l) for l in labels_in])
 58 |     loss_micro = loss * n / mini_batch_size if P.train_loss_avg else loss
 59 |     val = loss_micro.data[0]
 60 |     if loss2 is not None:
 61 |         loss2_micro = loss2 * n / mini_batch_size if P.train_loss2_avg else loss2
 62 |         loss_micro = loss_micro + P.train_loss2_alpha * loss2_micro
 63 |         val = val + P.train_loss2_alpha * loss2_micro.data[0]
 64 |     loss_micro.backward()
 65 |     return prev_val + val, mini_batch_size
 66 | 
 67 | 
 68 | def mini_batch_gen(last, i, is_final, batch, train_type, P, test_print, test_net, net, optimizer, testset_tuple, epoch, micro_args):
 69 |     batch_count, score, running_loss = last
 70 |     optimizer.zero_grad()
 71 |     loss, _ = fold_batches(micro_batch_gen, (0.0, len(batch)), batch, P.train_micro_batch, add_args=micro_args)
 72 |     optimizer.step()
 73 |     running_loss, score = output_stats(train_type, P, test_print, test_net, net, testset_tuple, epoch, batch_count, is_final, loss, running_loss, score)
 74 |     return batch_count + 1, score, running_loss
 75 | 
 76 | 
 77 | def train_gen(train_type, P, test_print, test_net, net, train_set, testset_tuple, optimizer, create_epoch, create_batch, create_loss, best_score=0):
 78 |     set_net_train(net, True, bn_train=P.train_bn)
 79 |     for epoch in range(P.train_epochs):
 80 |         # annealing
 81 |         optimizer = anneal(net, optimizer, epoch, P.train_annealing)
 82 | 
 83 |         dataset, batch_args = create_epoch(epoch, train_set, testset_tuple)
 84 | 
 85 |         micro_args = {
 86 |             'P': P,
 87 |             'net': net,
 88 |             'create_batch': create_batch,
 89 |             'batch_args': batch_args,
 90 |             'create_loss': create_loss
 91 |         }
 92 |         mini_args = {
 93 |             'train_type': train_type,
 94 |             'P': P,
 95 |             'test_print': test_print,
 96 |             'test_net': test_net,
 97 |             'net': net,
 98 |             'optimizer': optimizer,
 99 |             'testset_tuple': testset_tuple,
100 |             'epoch': epoch,
101 |             'micro_args': micro_args
102 |         }
103 | 
104 |         init = 0, best_score, 0.0  # batch count, score, running loss
105 |         _, best_score, _ = fold_batches(mini_batch_gen, init, dataset, P.train_batch_size, cut_end=True, add_args=mini_args)
106 | 


--------------------------------------------------------------------------------
/utils/train_siamese.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | 
  3 | import torch
  4 | import random
  5 | from os import path
  6 | from model.nn_utils import set_net_train
  7 | from general import tensor_t
  8 | from general import log, save_uuid, unique_str
  9 | from dataset import get_pos_couples
 10 | from metrics import precision1, mean_avg_precision
 11 | 
 12 | 
 13 | # get byte tensors indicating the indexes of images having a different label
 14 | def get_lab_indicators(dataset, device):
 15 |     n = len(dataset)
 16 |     indicators = {}
 17 |     for _, lab1, _ in dataset:
 18 |         if lab1 in indicators:
 19 |             continue
 20 |         indicator = tensor_t(torch.ByteTensor, device, n).fill_(0)
 21 |         for i2, (_, lab2, _) in enumerate(dataset):
 22 |             if lab1 == lab2:
 23 |                 indicator[i2] = 1
 24 |         indicators[lab1] = indicator
 25 |     return indicators
 26 | 
 27 | 
 28 | # determine the device where embeddings should be stored
 29 | # and the feature dimension for a descriptor
 30 | def embeddings_device_dim(P, net, n, sim_matrix=False):
 31 |     # get best device for embeddings (and possibly similarity matrix),
 32 |     # as well as the feature vector size.
 33 |     # usually, this is the configured cuda device.
 34 |     # but it could be CPU if embeddings/number of items are too large
 35 |     device = P.cuda_device
 36 |     out_size = P.feature_dim
 37 |     if hasattr(net, 'feature_size') and out_size <= 0:
 38 |         out_size = net.feature_size
 39 |     if n * out_size * 4 > P.embeddings_cuda_size:
 40 |         device = -1
 41 |     if sim_matrix and n * n * 4 > P.embeddings_cuda_size:
 42 |         device = -1
 43 |     return device, out_size
 44 | 
 45 | 
 46 | # get all similarities between pairs of images of the dataset
 47 | # net is assumed to be in train mode
 48 | def get_similarities(P, get_embeddings, net, dataset):
 49 |     set_net_train(net, False)
 50 |     n = len(dataset)
 51 |     d, o = embeddings_device_dim(P, net, n, sim_matrix=True)
 52 |     embeddings = get_embeddings(net, dataset, d, o)
 53 |     similarities = torch.mm(embeddings, embeddings.t())
 54 |     set_net_train(net, True, bn_train=P.train_bn)
 55 |     return similarities, d
 56 | 
 57 | 
 58 | # accuracy of a net giving feature vectors for each image, evaluated over test set and test ref set (where the images are searched for)
 59 | # the model should be in eval mode
 60 | # for each pair of images, this only considers the maximal similarity (precision at 1, not the average precision/ranking on the ref set). TODO
 61 | def test_descriptor_net(P, get_embeddings, net, test_set, test_ref_set, kth=1):
 62 |     d, o = embeddings_device_dim(P, net, max(len(test_set), len(test_ref_set)))
 63 |     test_embeddings = get_embeddings(net, test_set, d, o)
 64 |     ref_embeddings = get_embeddings(net, test_ref_set, d, o)
 65 | 
 66 |     # calculate all similarities as a simple matrix multiplication
 67 |     # since embeddings are assumed to be normalized
 68 |     # the similarities here should always be on CPU
 69 |     # (kthvalue is only implemented there and we don't need GPU perf)
 70 |     sim = torch.mm(test_embeddings, ref_embeddings.t()).cpu()
 71 |     # stats
 72 |     prec1, correct, total, max_sim, max_label = precision1(sim, test_set, test_ref_set, kth)
 73 |     mAP = mean_avg_precision(sim, test_set, test_ref_set, kth)
 74 |     sum_pos = sum(sim[i, j] for i, (_, test_label, _) in enumerate(test_set) for j, (_, ref_label, _) in enumerate(test_ref_set) if test_label == ref_label)
 75 |     sum_neg = sim.sum() - sum_pos
 76 |     sum_max = max_sim.sum()
 77 |     lab_dict = dict([(lab, {}) for _, lab, _ in test_set])
 78 |     for j, (_, lab, _) in enumerate(test_set):
 79 |         d = lab_dict[lab]
 80 |         lab = max_label[j]
 81 |         d.setdefault(lab, d.get(lab, 0) + 1)
 82 |     return prec1, correct, total, sum_pos, sum_neg, sum_max, mAP, lab_dict
 83 | 
 84 | 
 85 | # Generic function to test and print stats when training a descriptor net
 86 | def test_print_descriptor(train_type, P, net, testset_tuple, get_embeddings, best_score=0, epoch=0):
 87 |     def print_stats(prefix, p1, c, t, avg_pos, avg_neg, avg_max, mAP):
 88 |         s1 = 'Correct: {0} / {1} - acc: {2:.4f} - mAP:{3:.4f}\n'.format(c, t, p1, mAP)
 89 |         s2 = 'AVG cosine sim (sq dist) values: pos: {0:.4f} ({1:.4f}), neg: {2:.4f} ({3:.4f}), max: {4:.4f} ({5:.4f})'.format(avg_pos, 2 - 2 * avg_pos, avg_neg, 2 - 2 * avg_neg, avg_max, 2 - 2 * avg_max)
 90 |         log(P, prefix + s1 + s2)
 91 | 
 92 |     test_set, test_ref_set = testset_tuple
 93 |     set_net_train(net, False)
 94 |     prec1, correct, tot, sum_pos, sum_neg, sum_max, mAP, lab_dict = test_descriptor_net(P, get_embeddings, net, test_set, test_ref_set)
 95 |     # can save labels dictionary (predicted labels for all test labels)
 96 |     # TODO
 97 | 
 98 |     num_pos = sum(test_label == ref_label for _, test_label, _ in test_set for _, ref_label, _ in test_ref_set)
 99 |     num_neg = len(test_set) * len(test_ref_set) - num_pos
100 | 
101 |     if (correct > best_score):
102 |         best_score = correct
103 |         prefix = '{0}, EPOCH:{1}, SCORE:{2}'.format(train_type, epoch, correct)
104 |         save_uuid(P, prefix)
105 |         torch.save(net.state_dict(), path.join(P.save_dir, unique_str(P) + "_best_siam.pth.tar"))
106 |     print_stats('TEST - ', prec1, correct, tot, sum_pos / num_pos, sum_neg / num_neg, sum_max / len(test_set), mAP)
107 |     torch.save(net.state_dict(), path.join(P.save_dir, "model_siam_" + str(epoch) + ".pth.tar"))
108 | 
109 |     # training set accuracy (choose second highest value,
110 |     # as highest should almost certainly be the same image)
111 |     # choose train samples with at least 2 other images for the query
112 |     couples = get_pos_couples(test_ref_set)
113 |     train_test_set = random.sample(test_ref_set, max(1, len(test_ref_set) // 10))
114 |     train_test_set = filter(lambda x: len(couples[x[1]]) >= 3, train_test_set)
115 |     prec1, correct, tot, sum_pos, sum_neg, sum_max, mAP, _ = test_descriptor_net(P, get_embeddings, net, train_test_set, test_ref_set, kth=2)
116 |     num_pos = sum(test_label == ref_label for _, test_label, _ in train_test_set for _, ref_label, _ in test_ref_set)
117 |     num_neg = len(train_test_set) * len(test_ref_set) - num_pos
118 |     print_stats('TRAIN - ', prec1, correct, tot, sum_pos / num_pos, sum_neg / num_neg, sum_max / len(train_test_set), mAP)
119 |     set_net_train(net, True, bn_train=P.train_bn)
120 |     return best_score
121 | 


--------------------------------------------------------------------------------