├── .gitignore
├── .gitmodules
├── README.md
├── doc
    ├── images
    │   ├── PASCAL2.png
    │   ├── aff1.jpeg
    │   ├── aff2.jpeg
    │   ├── aff3.jpeg
    │   ├── cover.png
    │   ├── erc.jpeg
    │   ├── paint1.jpeg
    │   ├── paint2.jpeg
    │   ├── paint3.jpeg
    │   ├── query.jpeg
    │   └── words.jpeg
    ├── instructions.html
    └── instructions.md
├── exercise1.m
├── exercise2.m
├── exercise3.m
├── exercise4.m
├── extra
    ├── Makefile
    ├── annkmeans.m
    ├── bootstrap-data.sh
    ├── evaluate.m
    ├── experimental
    │   └── crawler
    │   │   ├── crawl_all_paintings.py
    │   │   └── crawl_wikipaintings.py
    ├── paintings.txt
    ├── preprocess.m
    ├── preprocess_paintings.m
    └── queries
    │   ├── mistery-building1.jpg
    │   ├── mistery-painting1.jpg
    │   ├── mistery-painting2.jpg
    │   └── mistery-painting3.jpg
├── findNeighbours.m
├── geometricVerification.m
├── getFeatures.m
├── getHistogram.m
├── getHistogramFromImage.m
├── loadIndex.m
├── matchWords.m
├── plotMatches.m
├── plotQueryImage.m
├── plotRetrievedImages.m
├── search.m
└── setup.m


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | data
3 | doc/base.ss
4 | doc/prism.css
5 | doc/prism.js
6 | vlfeat
7 | base.css
8 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "extra/practical"]
2 | 	path = extra/practical
3 | 	url = git@github.com:vedaldi/practical.git
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Object instance recognition practical
 2 | =====================================
 3 | 
 4 | > A computer vision practical by the Oxford Visual Geometry group,
 5 | > authored by Andrea Vedaldi and Andrew Zisserman.
 6 | 
 7 | Start from `doc/instructions.html`.
 8 | 
 9 | Package contents
10 | ----------------
11 | 
12 | The practical consists of four exercies, organized in the following
13 | files:
14 | 
15 | * `exercise1.m` -- Part I: Sparse features for matching specific
16 |   objects in images
17 | * `exercise2.m` -- Part II: Affine co-variant detectors
18 | * `exercise3.m` -- Part III: Towards large scale retrieval
19 | * `exercise4.m` -- Part IV: Large scale retrieval
20 | 
21 | The computer vision algorithms are implemented by
22 | [VLFeat](http://www.vlfeat.org). This package contains the following
23 | MATLAB functions:
24 | 
25 | * `findNeighbours.m`: Match features based on their descriptors.
26 | * `geometricVerification.m`: Geometrically verify feature matches.
27 | * `getFeatures.m`: Extract features from an image.
28 | * `getHistogramFromImage.m`: Get a feature hisotgram from an image.
29 | * `getHistogram.m`: Get a feature histogram from quantised features.
30 | * `loadIndex.m`: Load an image datbase with an inverted index.
31 | * `plotbox.m`: Plot boxes.
32 | * `plotMatches.m`: Plot feature matches.
33 | * `plotRetrievedImages.m`: Plot search results.
34 | * `plotQueryImage.m`: Plot the query image for a set of search results.
35 | * `search.m`: Search an image database.
36 | * `setup.m`: Setup MALTAB to use the required libraries.
37 | 
38 | Appendix: Installing from scratch
39 | ---------------------------------
40 | 
41 | 1. From Bash, run `./extras/bootstrap.sh`. This will download the
42 |    Oxford 5k practical data.
43 | 2. From MATLAB, run `addpath extras ; preprocess`. This will download
44 |    the VLFeat library (http://www.vlfeat.org) and compute a visual
45 |    index for the Oxford 5k data.
46 | 3. From MATALB, run `addpath extras ; preprocess_paintings`. This will
47 |    download and index a number of painting images from Wikipedia.
48 | 4. From Bash, run `make -f extra/Makefile pack`. This will pack the
49 |    practical in archives that can be redestributed to students.
50 | 
51 | Changes
52 | -------
53 | 
54 | * *2018a* - Updated VLFeat.
55 | * *2015a* - Compatibility with MATLAB 2014b.
56 | * *2014a* - Improves documentation and packing.
57 | * *2013a* - Improves documentation, uses last version of VLFeat, bugfixes.
58 | * *2012*  - Adds co-varaint feature detectors
59 | * *2011*  - Initial edition
60 | 
61 | License
62 | -------
63 | 
64 |     Copyright (c) 2011-13 Andrea Vedaldi and Andrew Zisserman
65 | 
66 |     Permission is hereby granted, free of charge, to any person
67 |     obtaining a copy of this software and associated documentation
68 |     files (the "Software"), to deal in the Software without
69 |     restriction, including without limitation the rights to use, copy,
70 |     modify, merge, publish, distribute, sublicense, and/or sell copies
71 |     of the Software, and to permit persons to whom the Software is
72 |     furnished to do so, subject to the following conditions:
73 | 
74 |     The above copyright notice and this permission notice shall be
75 |     included in all copies or substantial portions of the Software.
76 | 
77 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
78 |     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
79 |     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
80 |     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
81 |     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
82 |     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
83 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
84 |     DEALINGS IN THE SOFTWARE.
85 | 
86 | 


--------------------------------------------------------------------------------
/doc/images/PASCAL2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/PASCAL2.png


--------------------------------------------------------------------------------
/doc/images/aff1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/aff1.jpeg


--------------------------------------------------------------------------------
/doc/images/aff2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/aff2.jpeg


--------------------------------------------------------------------------------
/doc/images/aff3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/aff3.jpeg


--------------------------------------------------------------------------------
/doc/images/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/cover.png


--------------------------------------------------------------------------------
/doc/images/erc.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/erc.jpeg


--------------------------------------------------------------------------------
/doc/images/paint1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/paint1.jpeg


--------------------------------------------------------------------------------
/doc/images/paint2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/paint2.jpeg


--------------------------------------------------------------------------------
/doc/images/paint3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/paint3.jpeg


--------------------------------------------------------------------------------
/doc/images/query.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/query.jpeg


--------------------------------------------------------------------------------
/doc/images/words.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/doc/images/words.jpeg


--------------------------------------------------------------------------------
/doc/instructions.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |   <meta charset="utf-8">
  5 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |   <title>VGG Practical</title>
  7 |   <link rel="stylesheet" href="base.css" />
  8 |   <link rel="stylesheet" href="prism.css" />
  9 | </head>
 10 | <body>
 11 | <h1 id="recognition-of-object-instances-practical">Recognition of object instances practical</h1>
 12 | <p>This is an <a href="http://www.robots.ox.ac.uk/~vgg">Oxford Visual Geometry Group</a> computer vision practical, authored by <a href="http://www.robots.ox.ac.uk/~vedaldi/">Andrea Vedaldi</a> and Andrew Zisserman (Release 2018a).</p>
 13 | <p><img width=500 src="images/cover.png" alt="cover"/></p>
 14 | <p>The goal of instance-level recognition is to match (recognize) a specific object or scene. Examples include recognizing a specific building, such as Notre Dame, or a specific painting, such as ''Starry Night'' by Van Gogh. The object is recognized despite changes in scale, camera viewpoint, illumination conditions and partial occlusion. An important application is image retrieval -- starting from an image of an object of interest (the query), search through an image dataset to obtain (or retrieve) those images that contain the target object.</p>
 15 | <p>The goal of this session is to get basic practical experience with the methods that enable specific object recognition. It includes: (i) using SIFT features to obtain sparse matches between two images; (ii) using affine co-variant detectors to cover changes in viewpoint; (iii) vector quantizing the SIFT descriptors into visual words to enable large scale retrieval; and (iv) constructing and using an image retrieval system to identify objects.</p>
 16 | <div class="toc">
 17 | <ul>
 18 | <li><a href="#recognition-of-object-instances-practical">Recognition of object instances practical</a><ul>
 19 | <li><a href="#getting-started">Getting started</a></li>
 20 | <li><a href="#part-i-sparse-features-for-matching-object-instances">Part I: Sparse features for matching object instances</a><ul>
 21 | <li><a href="#stage-ia-sift-features-detector">Stage I.A: SIFT features detector</a></li>
 22 | <li><a href="#stage-ib-sift-features-descriptors-and-matching-between-images">Stage I.B: SIFT features descriptors and matching between images</a></li>
 23 | <li><a href="#stage-ic-improving-sift-matching-using-lowes-second-nearest-neighbour-test">Stage I.C: Improving SIFT matching using Lowe’s second nearest neighbour test</a></li>
 24 | <li><a href="#stage-id-improving-sift-matching-using-a-geometric-transformation">Stage I.D: Improving SIFT matching using a geometric transformation</a></li>
 25 | </ul>
 26 | </li>
 27 | <li><a href="#part2">Part II: Affine co-variant detectors</a></li>
 28 | <li><a href="#part3">Part III: Towards large scale retrieval</a><ul>
 29 | <li><a href="#stage-iiia-accelerating-descriptor-matching-with-visual-words">Stage III.A: Accelerating descriptor matching with visual words</a></li>
 30 | <li><a href="#stage3b">Stage III.B: Searching with an inverted index</a></li>
 31 | <li><a href="#stage3c">Stage III.C: Geometric rescoring</a></li>
 32 | <li><a href="#stage3d">Stage III.D: Full system</a></li>
 33 | </ul>
 34 | </li>
 35 | <li><a href="#part-iv-large-scale-retrieval">Part IV: Large scale retrieval</a></li>
 36 | <li><a href="#links-and-further-work">Links and further work</a></li>
 37 | <li><a href="#acknowledgements">Acknowledgements</a></li>
 38 | <li><a href="#history">History</a></li>
 39 | </ul>
 40 | </li>
 41 | </ul>
 42 | </div>
 43 | <h2 id="getting-started">Getting started</h2>
 44 | <p>Read and understand the <a href="../overview/index.html#installation">requirements and installation instructions</a>. The download links for this practical are:</p>
 45 | <ul>
 46 | <li>Code and data: <a href="http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a.tar.gz">practical-instance-recognition-2018a.tar.gz</a> 560MB</li>
 47 | <li>Code only: <a href="http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a-code-only.tar.gz">practical-instance-recognition-2018a-code-only.tar.gz</a> 10MB</li>
 48 | <li>Data only: <a href="http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a-data-only.tar.gz">practical-instance-recognition-2018a-data-only.tar.gz</a> 550MB</li>
 49 | <li><a href="https://github.com/vedaldi/practical-object-instance-recognition">Git repository</a> (for lab setters and developers)</li>
 50 | </ul>
 51 | <p>After the installation is complete, open and edit the script <code>exercise1.m</code> in the MATLAB editor. The script contains commented code and a description for all steps of this exercise, relative to Part I of this document. You can cut and paste this code into the MATLAB window to run it, and will need to modify it as you go through the session. Other files <code>exercise2.m</code>, <code>exercise3.m</code>, and <code>exercise4.m</code> are given for Part II, III, and IV.</p>
 52 | <p><strong>Note</strong>: the student packages contain only the code required to run the practical. The complete package, including code to preprocess the data, is available on GitHub.</p>
 53 | <h2 id="part-i-sparse-features-for-matching-object-instances">Part I: Sparse features for matching object instances</h2>
 54 | <h3 id="stage-ia-sift-features-detector">Stage I.A: SIFT features detector</h3>
 55 | <p>The SIFT feature has both a detector and a descriptor. We will start by computing and visualizing the SIFT feature detections for two images of the same object (a building facade). Load an image, rotate and scale it, and then display the original and transformed pair:</p>
 56 | <pre><code class="language-matlab">% Load an image
 57 | im1 = imread('data/oxbuild_lite/all_souls_000002.jpg') ;
 58 | % Let the second image be a rotated and scaled version of the first
 59 | im3 = imresize(imrotate(im1,35,'bilinear'),0.7) ;% Display the images
 60 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 61 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ;
 62 | </code></pre>
 63 | 
 64 | <p>A SIFT frame is a circle with an orientation and is specified by four parameters: the center $t_x$, $t_y$, the scale $s$, and the rotation $\theta$ (in radians), resulting in a vector of four parameters $(s, \theta, t_x, t_y)$. Now compute and visualise the SIFT feature detections (frames):</p>
 65 | <pre><code class="language-matlab">% Compute SIFT features for each
 66 | [frames1, descrs1] = getFeatures(im1, 'peakThreshold', 0.01) ;
 67 | [frames3, descrs3] = getFeatures(im3, 'peakThreshold', 0.01) ;
 68 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 69 | vl_plotframe(frames1, 'linewidth', 2) ;
 70 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ; hold on ;
 71 | vl_plotframe(frames3, 'linewidth', 2) ;
 72 | </code></pre>
 73 | 
 74 | <p>Examine the second image and its rotated and scaled version and convince yourself that the detections overlap the same scene regions (even though the circles have moved their image position and changed radius). It is helpful to zoom into a smaller image area using the MATLAB magnifying glass tool. This demonstrates that the detection process transforms (is co-variant) with translations, rotations and isotropic scalings. This class of transformations is known as a similarity or equiform.</p>
 75 | <blockquote>
 76 | <p><strong>Task:</strong> The number of detected features can be controlled by changing the <code>peakThreshold</code> option. A larger value will select features that correspond to higher contrast structures in the image. Try this now: run again the same code, but increase <code>peakThreshold</code> two or three times.</p>
 77 | </blockquote>
 78 | <p>Now repeat the exercise with a pair of natural images. Start by loading the second one:</p>
 79 | <pre><code class="language-matlab">% Load a second image
 80 | im2 = imread('data/oxbuild_lite/all_souls_000015.jpg') ;
 81 | </code></pre>
 82 | 
 83 | <p>and plot images and feature frames. Again you should see that many of the detections overlap the same scene region. Note that, while repeatability occurs for the pair of natural views, it is much betterfor the synthetically rotated pair.</p>
 84 | <blockquote>
 85 | <p><strong>Question:</strong> Note the change in density of detections across the image. Why does it change? Will it be a problem for matching? How could it be avoided?</p>
 86 | <p><strong>Question:</strong> Occasionally, a feature is detected multiple times, with different orientations. This may happen when the orientation  assignment is ambiguous. Which kind of image structure would result in ambiguous orientation assignment?</p>
 87 | </blockquote>
 88 | <h3 id="stage-ib-sift-features-descriptors-and-matching-between-images">Stage I.B: SIFT features descriptors and matching between images</h3>
 89 | <p>Next we will use the descriptor computed over each detection to match the detections between images. We will start with the simplest matching scheme (first nearest neighbour of descriptors) and then add more sophisticated methods to eliminate any mismatches.</p>
 90 | <ul>
 91 | <li>Visualize the SIFT descriptors for the detected feature frames with the function <code>vl_plotsiftdescriptor</code>. Then use <code>vl_plotframe</code> to overlay the corresponding frames.</li>
 92 | </ul>
 93 | <blockquote>
 94 | <p><strong>Question:</strong>  Note the descriptors are computed over a much larger region (shown in blue) than the detection (shown in green). Why?</p>
 95 | </blockquote>
 96 | <ul>
 97 | <li>
 98 | <p>Compute first nearest neighbours matches - for each SIFT descriptor in the first image, compute its nearest neighbour in the second image with the function <code>findNeighbours</code>.</p>
 99 | </li>
100 | <li>
101 | <p>Visualize the correspondences using lines joining matched SIFT features with the function <code>plotMatches</code>.</p>
102 | </li>
103 | </ul>
104 | <blockquote>
105 | <p><strong>Question:</strong>     Notice that there are many mismatches. Examine some of the mismatches to understand why the mistakes are being made. For example, is the change in lighting a problem? What additional constraints can be applied to remove the mismatches?</p>
106 | </blockquote>
107 | <p><strong>Hint:</strong> You can visualize a subset of the matches using:</p>
108 | <pre><code class="language-matlab">figure; plotMatches(im1,im2,frames1,frames2,matches(:,3:200:end));
109 | </code></pre>
110 | 
111 | <h3 id="stage-ic-improving-sift-matching-using-lowes-second-nearest-neighbour-test">Stage I.C: Improving SIFT matching using Lowe’s second nearest neighbour test</h3>
112 | <p>Lowe introduced a second nearest neighbour (2nd NN) test to identify, and hence remove, ambiguous matches. The idea is to identify distinctive matches by a threshold on the ratio of first to second NN distances. In the MATLAB file, the ratio is <code>nnThreshold</code> = 1NN distance / 2NN distance.</p>
113 | <ul>
114 | <li>Vary the ratio <code>nnThreshold</code> in a range from 0.1 to 0.9, and examine how the number of matches and number of mismatches changes.</li>
115 | <li>A value of <code>nnThreshold = 0.8</code> is often a good compromise between losing too many matches and rejecting mismatches.</li>
116 | </ul>
117 | <blockquote>
118 | <p><strong>Question:</strong> Examine some of the remaining mismatches  to understand why they have occurred. How could they be removed?</p>
119 | </blockquote>
120 | <h3 id="stage-id-improving-sift-matching-using-a-geometric-transformation">Stage I.D: Improving SIFT matching using a geometric transformation</h3>
121 | <p>In addition to the 2nd NN test, we can also require consistency between the matches and a geometric transformation between the images. For the moment we will look for matches that are consistent with a similarity transformation</p>
122 | <p>
123 | <script type="math/tex; mode=display">
124 | \begin{bmatrix}
125 | x' \\ y'
126 | \end{bmatrix}
127 | =
128 | sR(\theta)
129 | \begin{bmatrix}
130 | x \\ y
131 | \end{bmatrix}
132 | +
133 | \begin{bmatrix}
134 | t_x \\ t_y
135 | \end{bmatrix}
136 | </script>
137 | </p>
138 | <p>which consists of a rotation by $\theta$, an isotropic scaling (i.e. same in all directions) by s, and a translation by a vector $(t_x, t_y)$. This transformation is specified by four parameters $(s,\theta,t_x,t_y)$ and can be computed from a single correspondence between SIFT detections in each image.</p>
139 | <blockquote>
140 | <p><strong>Task:</strong> Work out how to compute this transformation from a single correspondence.</p>
141 | </blockquote>
142 | <p><strong>Hint:</strong> Recall from Stage I.A that a SIFT feature frame is an oriented circle and map one onto the other.</p>
143 | <p>The matches consistent with a similarity can then be found using a RANSAC inspired algorithm, implemented by the function <code>geometricVerification</code>:</p>
144 | <p><strong>RANSAC-like algorithm for geometric verification</strong></p>
145 | <ol>
146 | <li>For each tentative correspondence in turn:<ol>
147 | <li>compute the similarity transformation;</li>
148 | <li>map all the SIFT detections in one image to the other using this transformation;</li>
149 | <li>accept matches that are within a threshold distance to the mapped detection (inliers);</li>
150 | <li>count the number of accepted matches;</li>
151 | <li>optionally, fit a more accurate affine transformation or homography to the accepted matches and test re-validate the matches.</li>
152 | </ol>
153 | </li>
154 | <li>Choose the transformation with the highest count of inliers.</li>
155 | </ol>
156 | <hr />
157 | <p>After this algorithm the inliers are consistent with the transformation and are retained, and most mismatches should now be removed.</p>
158 | <blockquote>
159 | <p><strong>Task:</strong> The figure generated by <code>plotMatches</code> supports the interactive visualisation of the transformation found by <code>geometricVerification</code>. Try hovering with the mouse on the figure and check that corresponding image points are highlighted in the two images.</p>
160 | </blockquote>
161 | <p><strong>Skip to <a href="#part2">Part 2</a> on fast track</strong></p>
162 | <blockquote>
163 | <p><strong>Task:</strong> Test this procedure by varying the threshold distance (edit <code>geometricVerification</code> and change the <code>opts.tolerance1</code>, <code>opts.tolerance2</code>, and <code>opts.tolerance3</code> parameters, where the last two thresholds are relative to the optional iterative fitting of an affine transformation or homography to the inliers). Note the number of inliers and number of mismatches.</p>
164 | </blockquote>
165 | <p>If more matches are required the geometric transformation can be used alone, without also requiring the 2nd NN test. Indeed, since the 1st NN may not be the correct match, a list of potential (putative) matches can be generated for each SIFT descriptor by including the 1st NN, 2nd NN, 3rd NN etc. Investigate how the number of correct matches (and time for computation) grows as the potential match list is extended, and the geometric transformation is used to select inliers. To this end:</p>
166 | <blockquote>
167 | <p><strong>Task:</strong> Change the code to include in the match list the 1st NN, 2nd NN, 3rd NN, … best matches for each feature.
168 | <strong>Task:</strong> Run geometric verification and check the number of verified matches using this expanded list.</p>
169 | </blockquote>
170 | <p><strong>Hint:</strong> You can use MATLAB’s tic and toc functions to measure the execution time of a snippet of code. For example</p>
171 | <pre><code class="language-matlab">tic ; pause(3) ; toc
172 | </code></pre>
173 | 
174 | <p>will pause MATLAB for three seconds and return an elapsed time approximately equal to 3. See <code>help tic</code> for details.</p>
175 | <h2 id="part2">Part II: Affine co-variant detectors</h2>
176 | <p>So far the change in viewpoint between images has been a similarity transformation. Now we consider more severe viewpoint changes - for example where an object is fronto-parallel in one view, and turns away from the camera in the other as in the graffiti wall images below:</p>
177 | <p><img src="images/aff1.jpeg" alt="aff1" width=200px"/><img src="images/aff2.jpeg" alt="aff2" width=200px"/><img src="images/aff3.jpeg" alt="aff3" width=200px/></p>
178 | <p>In this case, there is foreshortening (anisotropic scaling) and perspective distortions between the images (as well as in-plane rotation, translation and scaling). A circle in one image cannot cover the same scene area as a circle in the other, but an ellipse can. Affine co-variant detectors are designed to find such regions.</p>
179 | <p>In the following we will compare the number of matches using a similarity and affine co-variant detector as the viewpoint becomes progressively more extreme. The detectors are SIFT (for similarity) and SIFT+affine adaptation (for affine), while the descriptor are in both cases SIFT.</p>
180 | <blockquote>
181 | <p><strong>Task:</strong> Open and examine the script <code>exercise2.m</code> in the MATLAB editor. Run the script.</p>
182 | </blockquote>
183 | <p>Note the behaviour in the number of verified matches as the viewpoint becomes more extreme. Observe that the matches also identify the regions of the images that are in common.</p>
184 | <blockquote>
185 | <p><strong>Question:</strong> The transformation between the images induced by the plane is a planar homography. The detections are only affine co-variant (not as general as a planar homography). So how can descriptors computed on these detections possibly match?</p>
186 | </blockquote>
187 | <p><strong>Note:</strong> There are many other detector variants that could be used for this task. These can be activated by the method option of getFeatures.m (see also help <code>vl_covdet</code>).</p>
188 | <h2 id="part3">Part III: Towards large scale retrieval</h2>
189 | <p>In large scale retrieval the goal is to match a query image to a large database of images (for example the WWW or Wikipedia). The quality of a match is measured as the number of geometrically verified feature correspondences between the query and a database image. While the techniques discussed in Part I and II are sufficient to do this, in practice they require too much memory to store the SIFT descriptors for all the detections in all the database images. We explore next two key ideas: one to reduce the memory footprint and pre-compute descriptor matches; the other to speed up image retrieval.</p>
190 | <blockquote>
191 | <p><strong>Task:</strong> Open and edit the script <code>exercise3.m</code> in the MATLAB editor, and cut and paste to work through the following stages.</p>
192 | </blockquote>
193 | <h3 id="stage-iiia-accelerating-descriptor-matching-with-visual-words">Stage III.A: Accelerating descriptor matching with visual words</h3>
194 | <p>Instead of matching feature descriptors directly as done in Part I and II, descriptors are usually mapped first to discrete symbols, also called visual words, by means of a clustering technique like K-Means. The descriptors that are assigned to the same visual word are considered matched. Each of the rows in the following figure illustrates image patches that are mapped to the same visual word, and are hence indistinguishable by the representation.</p>
195 | <p><img src="images/words.jpeg" alt="words" width=400px/></p>
196 | <p>Then, matching two sets of feature descriptors (from two images) reduces to finding the intersection of two sets of symbols.</p>
197 | <blockquote>
198 | <p><strong>Tasks:</strong></p>
199 | <ul>
200 | <li>Load a visual word dictionary and an associated approximate nearest neighbour (ANN) matcher (the ANN matcher is used to determine the closest visual word to each descriptor and is based on a forest of KD trees).</li>
201 | <li>Given SIFT descriptors for two images, quantise them (assign them) into the corresponding visual words.</li>
202 | <li>Find corresponding features by looking for the same visual words in the two images and note the computation time.</li>
203 | <li>Geometrically verify these initial correspondences and count the number of inlier matches found.</li>
204 | <li>Find corresponding features by using the method of Part I and II, i.e. by comparing the descriptors directly, and note the computation time. Geometrically verify these initial correspondences and count the number of inlier matches found.</li>
205 | <li>Compare the speed and number of inliers when using visual words vs raw SIFT descriptors by means of the function <code>matchWords</code>. Note, you should repeat the timing (by running the matching again) as the first time you run it there may be a delay as certain MATLAB components are loaded into memory.</li>
206 | <li><strong>Optional:</strong> compare the speed and number of matches over another pair of images (from part I and II).</li>
207 | </ul>
208 | <p><strong>Questions:</strong></p>
209 | <ul>
210 | <li>The size of the vocabulary (the number of clusters) is an important parameter in visual word algorithms. How does the size affect the number of inliers and the difficulty of computing the transformation?</li>
211 | <li>In the above procedure the time required to convert the descriptors into visual words was not accounted for. Why?</li>
212 | <li>What is the speedup in searching a large, fixed database of 10, 100, 1000 images?</li>
213 | </ul>
214 | </blockquote>
215 | <p><strong>Skip to <a href="stage3b">Stage III.B</a> on fast track</strong> </p>
216 | <p>Often multiple feature occurrences are mapped to the same visual word. In this case <code>matchWords</code> generates only one of the possible matches.</p>
217 | <blockquote>
218 | <p><strong>Tasks:</strong></p>
219 | <ul>
220 | <li>Modify <code>matchWords</code> to generate more than one match for cases in which multiple features are mapped to the same visual word.This can be achieved by increasing the value of <code>maxNumMatches</code>.</li>
221 | <li>Most of these additional matches are incorrect. Filter them out by running <code>geometricVerification</code>.</li>
222 | <li>Compare the number of inliers obtained before and after this modification.</li>
223 | </ul>
224 | </blockquote>
225 | <h3 id="stage3b">Stage III.B: Searching with an inverted index</h3>
226 | <p>While matching with visual words is much faster than doing so by comparing feature descriptors directly, scoring images directly based on the number of geometrically verified matches still entails fitting a geometric model, a relatively slow operation. Rather than scoring all the images in the database in this way, we are going to use an approximation and count the number of visual words shared between two images.</p>
227 | <p>To this end, one computes a histogram of the visual words in a query image and for each of  the database images. Then the number of visual words in common can be computed from the intersection of the two histograms.</p>
228 | <p>The histogram intersection can be thought as a similarity measure between two histograms. In practice, this measure can be refined in several ways:</p>
229 | <ul>
230 | <li>By reducing the importance of common visual words. This is similar to a stop-words list and can be implemented by weighting each word by the `inverse document frequency' (the inverse of the frequency of occurrence of that visual word over the entire database of images).</li>
231 | <li>By normalising the weighted histograms to unit vectors and using the cosine between them as similarity. This can be implemented easily as the inner product between normalised histograms.</li>
232 | </ul>
233 | <p>Computing histogram similarities can be implemented extremely efficiently using an inverted file index. In this exercise, inner products between normalized histograms are computed quite efficiently using MATLAB's built-in sparse matrix engine.</p>
234 | <p>We now apply this retrieval method to search using a query image within a 660 image subset of the Oxford 5k building image set.</p>
235 | <blockquote>
236 | <p><strong>Task:</strong> How many erroneously matched images do you count in the top results?
237 | <strong>Question:</strong> Why does the top image have a score of 1?</p>
238 | </blockquote>
239 | <h3 id="stage3c">Stage III.C: Geometric rescoring</h3>
240 | <p>Histogram-based retrieval results are good but far from perfect. Given a short list of top ranked images from the previous step, we are now going to re-score them based on the number of inlier matches after a geometric verification step.</p>
241 | <blockquote>
242 | <p><strong>Question:</strong> Why is the top score much larger than 1 now?
243 | <strong>Question:</strong> Are the retrieval results improved after geometric verification?</p>
244 | </blockquote>
245 | <h3 id="stage3d">Stage III.D: Full system</h3>
246 | <p>Now try the full system to retrieve matches to an unseen query image.</p>
247 | <p><img src="images/query.jpeg" alt="query" width-=400px/></p>
248 | <h2 id="part-iv-large-scale-retrieval">Part IV: Large scale retrieval</h2>
249 | <p><strong>Skip and end here on fast track</strong></p>
250 | <p>The images below are all details of paintings. The goal of this last part of the practical is to identify the paintings that they came from. For this we selected a set of 1734 images of paintings from Wikipedia.</p>
251 | <p><img src="images/paint1.jpeg" alt="paint1" height=200px/><img src="images/paint2.jpeg" alt="paint2" height=200px/><img src="images/paint3.jpeg" alt="paint3" height=200px/></p>
252 | <p>To identify the details you can either:</p>
253 | <ol>
254 | <li>use your knowledge of art</li>
255 | <li>search through the 1734 Wikipedia images until you find matches</li>
256 | <li>build a recognition system and match the details automatically</li>
257 | </ol>
258 | <p>We follow route (3) here. Look through and run <code>exercise4.m</code>. This uses the techniques described in Part III in order to construct an index for 1734 Wikipedia images so that they may be searched quickly. Use the code to find from which paintings these details come from.</p>
259 | <p>Note, although the index is stored locally, the matching images are downloaded from Wikipedia and displayed. Click on the image to reach the Wikipedia page for that painting (and hence identify it).</p>
260 | <blockquote>
261 | <p><strong>Task:</strong> Use the code to visually search Wikipedia for further paintings from Van Gogh downloaded from the Internet. </p>
262 | </blockquote>
263 | <p><strong>Note:</strong> the code supports URL in place of filenames. </p>
264 | <p>Take note of the code output.</p>
265 | <blockquote>
266 | <p><strong>Questions:</strong></p>
267 | <ul>
268 | <li>How many features are there in the painting database?</li>
269 | <li>How much memory does the image database take?</li>
270 | <li>What are the stages of the search? And how long does each of the stages take for one of the query images?</li>
271 | </ul>
272 | </blockquote>
273 | <p>That completes this practical.</p>
274 | <h2 id="links-and-further-work">Links and further work</h2>
275 | <ul>
276 | <li>The code for this practical is written using the software package <a href="http://www.vlfeat.org">VLFeat</a>. This is a software library written in MATLAB and C, and is freely available as source code and binary.</li>
277 | <li>The images for this practical are taken from the <a href="http://www.robots.ox.ac.uk/~vgg/research/affine/">Affine Covariant Features dataset</a>, and the <a href="http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/">Oxford Buildings benchmark</a>.</li>
278 | <li>For a tutorial on large scale visual search and references to the literature, see the lectures by Josef Sivic and Cordelia Schmid <a href="http://www.di.ens.fr/willow/events/cvml2012/materials/">here</a>.</li>
279 | <li>For recent developments in large scale search (compact image descriptors, compression with product quantization), see <a href="https://sites.google.com/site/lsvr13/">these lectures</a> by Herve Jegou and Florent Perronnin.</li>
280 | </ul>
281 | <h2 id="acknowledgements">Acknowledgements</h2>
282 | <ul>
283 | <li>Guidance from Josef Sivic, Ivan Laptev and Cordelia Schmid</li>
284 | <li>Mircea Cimpoi for scripts for downloading and linking to Wikipedia paintings </li>
285 | <li>Comments from Relja Arandjelovic, Karen Simonyan, Omkar Parkhi, Meelis Lootus, Hossein Azizpour, Max Jaderberg</li>
286 | <li>Funding from ERC grant VisRec Grant No. 228180, and a PASCAL Harvest Grant.</li>
287 | </ul>
288 | <p><img src="images/erc.jpeg" alt="erc" height=100px/><img src="images/PASCAL2.png" alt="pascal2" height=100px/></p>
289 | <h2 id="history">History</h2>
290 | <ul>
291 | <li>Used in the Oxford AIMS CDT, 2014-18</li>
292 | <li>Used at <a href="http://www.di.ens.fr/willow/events/cvml2012/">ENS/INRIA Visual Recognition and Machine Learning Summer School, 2012</a>.</li>
293 | <li>Used at <a href="http://www.clsp.jhu.edu/workshops/archive/ws-12/summer-school/">JHU Summer School on Human Language Technology, 2012</a>.</li>
294 | </ul><script type="text/x-mathjax-config">
295 | MathJax.Hub.Config({
296 |     extensions: ["tex2jax.js"],
297 |     jax: ["input/TeX", "output/HTML-CSS"],
298 |     tex2jax: {
299 |       inlineMath: [ ['$','$'], ["\\(","\\)"] ],
300 |       displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
301 |       processEscapes: true
302 |     },
303 |   "HTML-CSS": { availableFonts: ["TeX"] },
304 |   TeX: { equationNumbers: { autoNumber: "AMS" } }
305 |   });
306 | if (typeof MathJaxListener !== 'undefined') {
307 |   MathJax.Hub.Register.StartupHook('End', function () {
308 |     MathJaxListener.invokeCallbackForKey_('End');
309 |   });
310 | }
311 | </script>
312 | <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
313 | <script type="text/javascript" src="prism.js"></script>
314 | </body>
315 | </html>
316 | 


--------------------------------------------------------------------------------
/doc/instructions.md:
--------------------------------------------------------------------------------
  1 | # Recognition of object instances practical
  2 | 
  3 | This is an [Oxford Visual Geometry Group](http://www.robots.ox.ac.uk/~vgg) computer vision practical, authored by [Andrea Vedaldi](http://www.robots.ox.ac.uk/~vedaldi/) and Andrew Zisserman (Release 2018a).
  4 | 
  5 | <img width=500 src="images/cover.png" alt="cover"/>
  6 | 
  7 | The goal of instance-level recognition is to match (recognize) a specific object or scene. Examples include recognizing a specific building, such as Notre Dame, or a specific painting, such as ''Starry Night'' by Van Gogh. The object is recognized despite changes in scale, camera viewpoint, illumination conditions and partial occlusion. An important application is image retrieval -- starting from an image of an object of interest (the query), search through an image dataset to obtain (or retrieve) those images that contain the target object.
  8 | 
  9 | The goal of this session is to get basic practical experience with the methods that enable specific object recognition. It includes: (i) using SIFT features to obtain sparse matches between two images; (ii) using affine co-variant detectors to cover changes in viewpoint; (iii) vector quantizing the SIFT descriptors into visual words to enable large scale retrieval; and (iv) constructing and using an image retrieval system to identify objects.
 10 | 
 11 | [TOC]
 12 | 
 13 | ## Getting started
 14 | 
 15 | Read and understand the [requirements and installation instructions](../overview/index.html#installation). The download links for this practical are:
 16 | 
 17 | * Code and data: [practical-instance-recognition-2018a.tar.gz](http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a.tar.gz) 560MB
 18 | * Code only: [practical-instance-recognition-2018a-code-only.tar.gz](http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a-code-only.tar.gz) 10MB
 19 | * Data only: [practical-instance-recognition-2018a-data-only.tar.gz](http://www.robots.ox.ac.uk/~vgg/share/practical-instance-recognition-2018a-data-only.tar.gz) 550MB
 20 | * [Git repository](https://github.com/vedaldi/practical-object-instance-recognition) (for lab setters and developers)
 21 | 
 22 | After the installation is complete, open and edit the script `exercise1.m` in the MATLAB editor. The script contains commented code and a description for all steps of this exercise, relative to Part I of this document. You can cut and paste this code into the MATLAB window to run it, and will need to modify it as you go through the session. Other files `exercise2.m`, `exercise3.m`, and `exercise4.m` are given for Part II, III, and IV.
 23 | 
 24 | **Note**: the student packages contain only the code required to run the practical. The complete package, including code to preprocess the data, is available on GitHub.
 25 | 
 26 | ## Part I: Sparse features for matching object instances
 27 | 
 28 | ### Stage I.A: SIFT features detector
 29 | 
 30 | The SIFT feature has both a detector and a descriptor. We will start by computing and visualizing the SIFT feature detections for two images of the same object (a building facade). Load an image, rotate and scale it, and then display the original and transformed pair:
 31 | 
 32 | ```matlab
 33 | % Load an image
 34 | im1 = imread('data/oxbuild_lite/all_souls_000002.jpg') ;
 35 | % Let the second image be a rotated and scaled version of the first
 36 | im3 = imresize(imrotate(im1,35,'bilinear'),0.7) ;% Display the images
 37 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 38 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ;
 39 | ```
 40 | 
 41 | A SIFT frame is a circle with an orientation and is specified by four parameters: the center $t_x$, $t_y$, the scale $s$, and the rotation $\theta$ (in radians), resulting in a vector of four parameters $(s, \theta, t_x, t_y)$. Now compute and visualise the SIFT feature detections (frames):
 42 | 
 43 | ```matlab
 44 | % Compute SIFT features for each
 45 | [frames1, descrs1] = getFeatures(im1, 'peakThreshold', 0.01) ;
 46 | [frames3, descrs3] = getFeatures(im3, 'peakThreshold', 0.01) ;
 47 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 48 | vl_plotframe(frames1, 'linewidth', 2) ;
 49 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ; hold on ;
 50 | vl_plotframe(frames3, 'linewidth', 2) ;
 51 | ```
 52 | 
 53 | Examine the second image and its rotated and scaled version and convince yourself that the detections overlap the same scene regions (even though the circles have moved their image position and changed radius). It is helpful to zoom into a smaller image area using the MATLAB magnifying glass tool. This demonstrates that the detection process transforms (is co-variant) with translations, rotations and isotropic scalings. This class of transformations is known as a similarity or equiform.
 54 | 
 55 | > **Task:** The number of detected features can be controlled by changing the `peakThreshold` option. A larger value will select features that correspond to higher contrast structures in the image. Try this now: run again the same code, but increase `peakThreshold` two or three times.
 56 | 
 57 | Now repeat the exercise with a pair of natural images. Start by loading the second one:
 58 | 
 59 | ```matlab
 60 | % Load a second image
 61 | im2 = imread('data/oxbuild_lite/all_souls_000015.jpg') ;
 62 | ```
 63 | 
 64 | and plot images and feature frames. Again you should see that many of the detections overlap the same scene region. Note that, while repeatability occurs for the pair of natural views, it is much betterfor the synthetically rotated pair.
 65 | 
 66 | > **Question:** Note the change in density of detections across the image. Why does it change? Will it be a problem for matching? How could it be avoided?
 67 | 
 68 | > **Question:** Occasionally, a feature is detected multiple times, with different orientations. This may happen when the orientation  assignment is ambiguous. Which kind of image structure would result in ambiguous orientation assignment?
 69 | 
 70 | ### Stage I.B: SIFT features descriptors and matching between images
 71 | 
 72 | Next we will use the descriptor computed over each detection to match the detections between images. We will start with the simplest matching scheme (first nearest neighbour of descriptors) and then add more sophisticated methods to eliminate any mismatches.
 73 | 
 74 | * Visualize the SIFT descriptors for the detected feature frames with the function `vl_plotsiftdescriptor`. Then use `vl_plotframe` to overlay the corresponding frames.
 75 | 
 76 | > **Question:**  Note the descriptors are computed over a much larger region (shown in blue) than the detection (shown in green). Why?
 77 |  
 78 | * Compute first nearest neighbours matches - for each SIFT descriptor in the first image, compute its nearest neighbour in the second image with the function `findNeighbours`.
 79 | 
 80 | * Visualize the correspondences using lines joining matched SIFT features with the function `plotMatches`.
 81 | 
 82 | > **Question:**     Notice that there are many mismatches. Examine some of the mismatches to understand why the mistakes are being made. For example, is the change in lighting a problem? What additional constraints can be applied to remove the mismatches?
 83 | 
 84 | **Hint:** You can visualize a subset of the matches using:
 85 | ```matlab
 86 | figure; plotMatches(im1,im2,frames1,frames2,matches(:,3:200:end));
 87 | ```
 88 | 
 89 | ### Stage I.C: Improving SIFT matching using Lowe’s second nearest neighbour test
 90 | 
 91 | Lowe introduced a second nearest neighbour (2nd NN) test to identify, and hence remove, ambiguous matches. The idea is to identify distinctive matches by a threshold on the ratio of first to second NN distances. In the MATLAB file, the ratio is `nnThreshold` = 1NN distance / 2NN distance.
 92 | 
 93 | * Vary the ratio `nnThreshold` in a range from 0.1 to 0.9, and examine how the number of matches and number of mismatches changes.
 94 | * A value of `nnThreshold = 0.8` is often a good compromise between losing too many matches and rejecting mismatches.
 95 | 
 96 | >**Question:** Examine some of the remaining mismatches  to understand why they have occurred. How could they be removed?
 97 | 
 98 | ### Stage I.D: Improving SIFT matching using a geometric transformation
 99 | 
100 | In addition to the 2nd NN test, we can also require consistency between the matches and a geometric transformation between the images. For the moment we will look for matches that are consistent with a similarity transformation
101 | 
102 | $$
103 | \begin{bmatrix}
104 | x' \\ y'
105 | \end{bmatrix}
106 | =
107 | sR(\theta)
108 | \begin{bmatrix}
109 | x \\ y
110 | \end{bmatrix}
111 | +
112 | \begin{bmatrix}
113 | t_x \\ t_y
114 | \end{bmatrix}
115 | $$
116 | 
117 | which consists of a rotation by $\theta$, an isotropic scaling (i.e. same in all directions) by s, and a translation by a vector $(t_x, t_y)$. This transformation is specified by four parameters $(s,\theta,t_x,t_y)$ and can be computed from a single correspondence between SIFT detections in each image.
118 | 
119 | > **Task:** Work out how to compute this transformation from a single correspondence.
120 | 
121 | **Hint:** Recall from Stage I.A that a SIFT feature frame is an oriented circle and map one onto the other.
122 | 
123 | The matches consistent with a similarity can then be found using a RANSAC inspired algorithm, implemented by the function `geometricVerification`:
124 | 
125 | **RANSAC-like algorithm for geometric verification**
126 | 
127 | 1. For each tentative correspondence in turn:
128 |     1. compute the similarity transformation;
129 |     2. map all the SIFT detections in one image to the other using this transformation;
130 |     3. accept matches that are within a threshold distance to the mapped detection (inliers);
131 |     4. count the number of accepted matches;
132 |     5. optionally, fit a more accurate affine transformation or homography to the accepted matches and test re-validate the matches.
133 | 2. Choose the transformation with the highest count of inliers.
134 | 
135 | ------
136 | 
137 | After this algorithm the inliers are consistent with the transformation and are retained, and most mismatches should now be removed.
138 | 
139 | > **Task:** The figure generated by `plotMatches` supports the interactive visualisation of the transformation found by `geometricVerification`. Try hovering with the mouse on the figure and check that corresponding image points are highlighted in the two images.
140 | 
141 | **Skip to [Part 2](#part2) on fast track**
142 | 
143 | > **Task:** Test this procedure by varying the threshold distance (edit `geometricVerification` and change the `opts.tolerance1`, `opts.tolerance2`, and `opts.tolerance3` parameters, where the last two thresholds are relative to the optional iterative fitting of an affine transformation or homography to the inliers). Note the number of inliers and number of mismatches.
144 | 
145 | If more matches are required the geometric transformation can be used alone, without also requiring the 2nd NN test. Indeed, since the 1st NN may not be the correct match, a list of potential (putative) matches can be generated for each SIFT descriptor by including the 1st NN, 2nd NN, 3rd NN etc. Investigate how the number of correct matches (and time for computation) grows as the potential match list is extended, and the geometric transformation is used to select inliers. To this end:
146 | 
147 | > **Task:** Change the code to include in the match list the 1st NN, 2nd NN, 3rd NN, … best matches for each feature.
148 | > **Task:** Run geometric verification and check the number of verified matches using this expanded list.
149 | 
150 | **Hint:** You can use MATLAB’s tic and toc functions to measure the execution time of a snippet of code. For example
151 | ```matlab
152 | tic ; pause(3) ; toc
153 | ```
154 | will pause MATLAB for three seconds and return an elapsed time approximately equal to 3. See `help tic` for details.
155 | 
156 | ## Part II: Affine co-variant detectors {#part2}
157 | 
158 | So far the change in viewpoint between images has been a similarity transformation. Now we consider more severe viewpoint changes - for example where an object is fronto-parallel in one view, and turns away from the camera in the other as in the graffiti wall images below:
159 | 
160 | <img src="images/aff1.jpeg" alt="aff1" width=200px"/><img src="images/aff2.jpeg" alt="aff2" width=200px"/><img src="images/aff3.jpeg" alt="aff3" width=200px/>
161 | 
162 | In this case, there is foreshortening (anisotropic scaling) and perspective distortions between the images (as well as in-plane rotation, translation and scaling). A circle in one image cannot cover the same scene area as a circle in the other, but an ellipse can. Affine co-variant detectors are designed to find such regions.
163 | 
164 | In the following we will compare the number of matches using a similarity and affine co-variant detector as the viewpoint becomes progressively more extreme. The detectors are SIFT (for similarity) and SIFT+affine adaptation (for affine), while the descriptor are in both cases SIFT.
165 | 
166 | > **Task:** Open and examine the script `exercise2.m` in the MATLAB editor. Run the script.
167 | 
168 | Note the behaviour in the number of verified matches as the viewpoint becomes more extreme. Observe that the matches also identify the regions of the images that are in common.
169 | 
170 | > **Question:** The transformation between the images induced by the plane is a planar homography. The detections are only affine co-variant (not as general as a planar homography). So how can descriptors computed on these detections possibly match?
171 | 
172 | **Note:** There are many other detector variants that could be used for this task. These can be activated by the method option of getFeatures.m (see also help `vl_covdet`).
173 | 
174 | ## Part III: Towards large scale retrieval {#part3}
175 | 
176 | In large scale retrieval the goal is to match a query image to a large database of images (for example the WWW or Wikipedia). The quality of a match is measured as the number of geometrically verified feature correspondences between the query and a database image. While the techniques discussed in Part I and II are sufficient to do this, in practice they require too much memory to store the SIFT descriptors for all the detections in all the database images. We explore next two key ideas: one to reduce the memory footprint and pre-compute descriptor matches; the other to speed up image retrieval.
177 | 
178 | > **Task:** Open and edit the script `exercise3.m` in the MATLAB editor, and cut and paste to work through the following stages.
179 | 
180 | ### Stage III.A: Accelerating descriptor matching with visual words
181 | 
182 | Instead of matching feature descriptors directly as done in Part I and II, descriptors are usually mapped first to discrete symbols, also called visual words, by means of a clustering technique like K-Means. The descriptors that are assigned to the same visual word are considered matched. Each of the rows in the following figure illustrates image patches that are mapped to the same visual word, and are hence indistinguishable by the representation.
183 | 
184 | <img src="images/words.jpeg" alt="words" width=400px/>
185 | 
186 | Then, matching two sets of feature descriptors (from two images) reduces to finding the intersection of two sets of symbols.
187 | 
188 | > **Tasks:**
189 | 
190 | > * Load a visual word dictionary and an associated approximate nearest neighbour (ANN) matcher (the ANN matcher is used to determine the closest visual word to each descriptor and is based on a forest of KD trees).
191 | > * Given SIFT descriptors for two images, quantise them (assign them) into the corresponding visual words.
192 | > * Find corresponding features by looking for the same visual words in the two images and note the computation time.
193 | > * Geometrically verify these initial correspondences and count the number of inlier matches found.
194 | > * Find corresponding features by using the method of Part I and II, i.e. by comparing the descriptors directly, and note the computation time. Geometrically verify these initial correspondences and count the number of inlier matches found.
195 | > * Compare the speed and number of inliers when using visual words vs raw SIFT descriptors by means of the function `matchWords`. Note, you should repeat the timing (by running the matching again) as the first time you run it there may be a delay as certain MATLAB components are loaded into memory.
196 | > * **Optional:** compare the speed and number of matches over another pair of images (from part I and II).
197 | 
198 | > **Questions:**
199 | 
200 | > * The size of the vocabulary (the number of clusters) is an important parameter in visual word algorithms. How does the size affect the number of inliers and the difficulty of computing the transformation?
201 | > * In the above procedure the time required to convert the descriptors into visual words was not accounted for. Why?
202 | > * What is the speedup in searching a large, fixed database of 10, 100, 1000 images?
203 | 
204 | **Skip to [Stage III.B](stage3b) on fast track** 
205 | 
206 | Often multiple feature occurrences are mapped to the same visual word. In this case `matchWords` generates only one of the possible matches.
207 | 
208 | > **Tasks:**
209 | 
210 | > * Modify `matchWords` to generate more than one match for cases in which multiple features are mapped to the same visual word.This can be achieved by increasing the value of `maxNumMatches`.
211 | > * Most of these additional matches are incorrect. Filter them out by running `geometricVerification`.
212 | > * Compare the number of inliers obtained before and after this modification.
213 | 
214 | ### Stage III.B: Searching with an inverted index {#stage3b}
215 | 
216 | While matching with visual words is much faster than doing so by comparing feature descriptors directly, scoring images directly based on the number of geometrically verified matches still entails fitting a geometric model, a relatively slow operation. Rather than scoring all the images in the database in this way, we are going to use an approximation and count the number of visual words shared between two images.
217 | 
218 | To this end, one computes a histogram of the visual words in a query image and for each of  the database images. Then the number of visual words in common can be computed from the intersection of the two histograms.
219 | 
220 | The histogram intersection can be thought as a similarity measure between two histograms. In practice, this measure can be refined in several ways:
221 | 
222 | * By reducing the importance of common visual words. This is similar to a stop-words list and can be implemented by weighting each word by the `inverse document frequency' (the inverse of the frequency of occurrence of that visual word over the entire database of images).
223 | * By normalising the weighted histograms to unit vectors and using the cosine between them as similarity. This can be implemented easily as the inner product between normalised histograms.
224 | 
225 | Computing histogram similarities can be implemented extremely efficiently using an inverted file index. In this exercise, inner products between normalized histograms are computed quite efficiently using MATLAB's built-in sparse matrix engine.
226 | 
227 | We now apply this retrieval method to search using a query image within a 660 image subset of the Oxford 5k building image set.
228 | 
229 | > **Task:** How many erroneously matched images do you count in the top results?
230 | > **Question:** Why does the top image have a score of 1?
231 | 
232 | ### Stage III.C: Geometric rescoring {#stage3c}
233 | 
234 | Histogram-based retrieval results are good but far from perfect. Given a short list of top ranked images from the previous step, we are now going to re-score them based on the number of inlier matches after a geometric verification step.
235 | 
236 | > **Question:** Why is the top score much larger than 1 now?
237 | > **Question:** Are the retrieval results improved after geometric verification?
238 | 
239 | ### Stage III.D: Full system {#stage3d}
240 | 
241 | Now try the full system to retrieve matches to an unseen query image.
242 | 
243 | <img src="images/query.jpeg" alt="query" width-=400px/>
244 | 
245 | ## Part IV: Large scale retrieval
246 | 
247 | **Skip and end here on fast track**
248 | 
249 | The images below are all details of paintings. The goal of this last part of the practical is to identify the paintings that they came from. For this we selected a set of 1734 images of paintings from Wikipedia.
250 | 
251 | <img src="images/paint1.jpeg" alt="paint1" height=200px/><img src="images/paint2.jpeg" alt="paint2" height=200px/><img src="images/paint3.jpeg" alt="paint3" height=200px/>
252 | 
253 | To identify the details you can either:
254 | 
255 | 1. use your knowledge of art
256 | 2. search through the 1734 Wikipedia images until you find matches
257 | 3. build a recognition system and match the details automatically
258 | 
259 | We follow route (3) here. Look through and run `exercise4.m`. This uses the techniques described in Part III in order to construct an index for 1734 Wikipedia images so that they may be searched quickly. Use the code to find from which paintings these details come from.
260 | 
261 | Note, although the index is stored locally, the matching images are downloaded from Wikipedia and displayed. Click on the image to reach the Wikipedia page for that painting (and hence identify it).
262 | 
263 | > **Task:** Use the code to visually search Wikipedia for further paintings from Van Gogh downloaded from the Internet. 
264 | 
265 | **Note:** the code supports URL in place of filenames. 
266 | 
267 | Take note of the code output.
268 | 
269 | > **Questions:**
270 | 
271 | > * How many features are there in the painting database?
272 | > * How much memory does the image database take?
273 | > * What are the stages of the search? And how long does each of the stages take for one of the query images?
274 | 
275 | That completes this practical.
276 | 
277 | ## Links and further work
278 | 
279 | * The code for this practical is written using the software package [VLFeat](http://www.vlfeat.org). This is a software library written in MATLAB and C, and is freely available as source code and binary.
280 | * The images for this practical are taken from the [Affine Covariant Features dataset](http://www.robots.ox.ac.uk/~vgg/research/affine/), and the [Oxford Buildings benchmark](http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/).
281 | * For a tutorial on large scale visual search and references to the literature, see the lectures by Josef Sivic and Cordelia Schmid [here](http://www.di.ens.fr/willow/events/cvml2012/materials/).
282 | * For recent developments in large scale search (compact image descriptors, compression with product quantization), see [these lectures](https://sites.google.com/site/lsvr13/) by Herve Jegou and Florent Perronnin.
283 | 
284 | ## Acknowledgements
285 | 
286 | * Guidance from Josef Sivic, Ivan Laptev and Cordelia Schmid
287 | * Mircea Cimpoi for scripts for downloading and linking to Wikipedia paintings 
288 | * Comments from Relja Arandjelovic, Karen Simonyan, Omkar Parkhi, Meelis Lootus, Hossein Azizpour, Max Jaderberg
289 | * Funding from ERC grant VisRec Grant No. 228180, and a PASCAL Harvest Grant.
290 | 
291 | <img src="images/erc.jpeg" alt="erc" height=100px/><img src="images/PASCAL2.png" alt="pascal2" height=100px/>
292 | 
293 | ## History
294 | 
295 | * Used in the Oxford AIMS CDT, 2014-18
296 | * Used at [ENS/INRIA Visual Recognition and Machine Learning Summer School, 2012](http://www.di.ens.fr/willow/events/cvml2012/).
297 | * Used at [JHU Summer School on Human Language Technology, 2012](http://www.clsp.jhu.edu/workshops/archive/ws-12/summer-school/).
298 | 


--------------------------------------------------------------------------------
/exercise1.m:
--------------------------------------------------------------------------------
  1 | % PART I: basic features
  2 | 
  3 | % setup MATLAB to use our software
  4 | setup ;
  5 | 
  6 | %% -------------------------------------------------------------------
  7 | %                                   Stage I.A: SIFT features detection
  8 | % --------------------------------------------------------------------
  9 | 
 10 | % Load an image
 11 | im1 = imread('data/oxbuild_lite/all_souls_000002.jpg') ;
 12 | 
 13 | % Let the second image be a rotated and scaled version of the first
 14 | im3 = imresize(imrotate(im1,35,'bilinear'),0.7) ;
 15 | 
 16 | % Display the images
 17 | figure(1) ;
 18 | set(gcf,'name', 'Part I.A: Original image and rotated and scaled version') ;
 19 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 20 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ;
 21 | 
 22 | % Compute SIFT features for each
 23 | [frames1, descrs1] = getFeatures(im1, 'peakThreshold', 0.01) ;
 24 | [frames3, descrs3] = getFeatures(im3, 'peakThreshold', 0.01) ;
 25 | 
 26 | figure(2) ;
 27 | set(gcf,'name', 'Part I.A: SIFT features detection - synthetic pair') ;
 28 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 29 | vl_plotframe(frames1, 'linewidth', 2) ;
 30 | 
 31 | subplot(1,2,2) ; imagesc(im3) ; axis equal off ; hold on ;
 32 | vl_plotframe(frames3, 'linewidth', 2) ;
 33 | 
 34 | % Load a second image of the same scene
 35 | im2 = imread('data/oxbuild_lite/all_souls_000015.jpg') ;
 36 | 
 37 | % Display the images
 38 | figure(3) ;
 39 | set(gcf,'name', 'Part I.A: Original images - real pair') ;
 40 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 41 | subplot(1,2,2) ; imagesc(im2) ; axis equal off ;
 42 | 
 43 | [frames2, descrs2] = getFeatures(im2, 'peakThreshold', 0.01) ;
 44 | 
 45 | figure(4) ;
 46 | set(gcf,'name', 'Part I.A: SIFT features detection - real pair') ;
 47 | subplot(1,2,1) ; imagesc(im1) ; axis equal off ; hold on ;
 48 | vl_plotframe(frames1, 'linewidth', 2) ;
 49 | 
 50 | subplot(1,2,2) ; imagesc(im2) ; axis equal off ; hold on ;
 51 | vl_plotframe(frames2, 'linewidth', 2) ;
 52 | 
 53 | %% -------------------------------------------------------------------
 54 | %     Stage I.B: SIFT features descriptors and matching between images
 55 | % --------------------------------------------------------------------
 56 | 
 57 | % Visualize SIFT descriptors (only a few)
 58 | figure(5) ; clf ;
 59 | set(gcf,'name', 'Part I.B: SIFT descriptors') ;
 60 | imagesc(im1) ; axis equal off ;
 61 | vl_plotsiftdescriptor(descrs1(:,1:50:end), ...
 62 |                       frames1(:,1:50:end)) ;
 63 | hold on ;
 64 | vl_plotframe(frames1(:,1:50:end)) ;
 65 | 
 66 | % Find for each descriptor in im1 the closest descriptor in im2
 67 | nn = findNeighbours(descrs1, descrs2) ;
 68 | 
 69 | % Construct a matrix of matches. Each column stores two index of
 70 | % matching features in im1 and im2
 71 | matches = [1:size(descrs1,2) ; nn(1,:)] ;
 72 | 
 73 | % Display the matches
 74 | figure(6) ; clf ;
 75 | set(gcf,'name', 'Part I.B: SIFT descriptors - matching') ;
 76 | plotMatches(im1,im2,frames1,frames2,matches) ;
 77 | title('Nearest neighbour matches') ;
 78 | 
 79 | %% -------------------------------------------------------------------
 80 | %   Stage I.C: Better matching w/ Lowe's second nearest neighbour test
 81 | % --------------------------------------------------------------------
 82 | 
 83 | % Find the top two neighbours as well as their distances
 84 | [nn, dist2] = findNeighbours(descrs1, descrs2, 2) ;
 85 | 
 86 | % Accept neighbours if their second best match is sufficiently far off
 87 | nnThreshold = 0.8 ;
 88 | ratio2 = dist2(1,:) ./ dist2(2,:) ;
 89 | ok = ratio2 <= nnThreshold^2 ;
 90 | 
 91 | % Construct a list of filtered matches
 92 | matches_2nn = [find(ok) ; nn(1, ok)] ;
 93 | 
 94 | % Alternatively, do not do the second nearest neighbourhood test.
 95 | % Instead, match each feature to its two closest neighbours and let
 96 | % the geometric verification step figure it out (in stage I.D below).
 97 | 
 98 | % matches_2nn = [1:size(nn,2), 1:size(nn,2) ; nn(1,:), nn(2,:)] ;
 99 | 
100 | % Display the matches
101 | figure(7) ; clf ;
102 | set(gcf,'name', 'Part I.C: SIFT descriptors - Lowe''s test') ;
103 | plotMatches(im1,im2,frames1,frames2,matches_2nn) ;
104 | title('Matches filtered by the second nearest neighbour test') ;
105 | 
106 | %% -------------------------------------------------------------------
107 | %               Stage I.D: Better matching w/ geometric transformation
108 | % --------------------------------------------------------------------
109 | 
110 | [inliers, H] = geometricVerification(frames1, frames2, matches_2nn, 'numRefinementIterations', 8) ;
111 | matches_geo = matches_2nn(:, inliers) ;
112 | 
113 | % Display the matches
114 | figure(8) ; clf ;
115 | set(gcf,'name', 'Part I.D: SIFT descriptors - geometric verification') ;
116 | plotMatches(im1,im2,frames1,frames2,matches_geo, 'homography', H) ;
117 | title('Matches filtered by geometric verification') ;
118 | 


--------------------------------------------------------------------------------
/exercise2.m:
--------------------------------------------------------------------------------
 1 | % PART II: Affine co-variant detectors
 2 | 
 3 | % setup MATLAB to use our software
 4 | setup ;
 5 | 
 6 | % choose which images to use in the evaluation
 7 | imgPaths = {'data/graf/img1.png',...
 8 |             'data/graf/img2.png',...
 9 |             'data/graf/img3.png',...
10 |             'data/graf/img4.png',...
11 |             'data/graf/img5.png',...
12 |             'data/graf/img6.png'} ;
13 | 
14 | figure(100) ; clf ; set(gcf, 'name', 'Part II: Affine co-variant detectors - summary') ;
15 | 
16 | for o = 1:2
17 |   % Get the features for the reference image
18 |   im1 = imread(imgPaths{1}) ;
19 |   [frames1,descrs1] = getFeatures(im1, 'affineAdaptation',o==2) ;
20 | 
21 |   for t = 2:numel(imgPaths)
22 |     % Get the feature for another image
23 |     im2 = imread(imgPaths{t}) ;
24 |     [frames2,descrs2] = getFeatures(im2, 'affineAdaptation',o==2) ;
25 | 
26 |     % Get the feature descriptor neighbours
27 |     [nn, dist2] = findNeighbours(descrs1, descrs2, 2) ;
28 | 
29 |     % Second nearest neighbour pre-filtering
30 |     nnThreshold = 0.8 ;
31 |     ratio2 = dist2(1,:) ./ dist2(2,:) ;
32 |     ok = ratio2 <= nnThreshold^2 ;
33 |     matches_2nn = [find(ok) ; nn(1,ok)] ;
34 | 
35 |     % Geometric verification
36 |     [inliers, H] = geometricVerification(frames1,frames2,matches_2nn,...
37 |                                          'numRefinementIterations', 6) ;
38 |     matches_geom = matches_2nn(:, inliers) ;
39 | 
40 |     % Count the number of inliers
41 |     numInliers(t,o) = size(matches_geom,2) ;
42 | 
43 |     % Visualize
44 |     n = (t-2)*2 + o ;
45 |     h = subplot(numel(imgPaths)-1, 2, n, 'parent', 100) ;
46 |     axes(h) ;
47 |     plotMatches(im1,im2,frames1,frames2,matches_geom) ;
48 |     switch o
49 |       case 1, type = 'similarity' ;
50 |       case 2, type = 'affinity' ;
51 |     end
52 |     title(sprintf('From 1 to %d with %s: num: %d', t, type, numInliers(t,o))) ;
53 | 
54 |     figure(n) ; clf ;
55 |     set(gcf, 'name', sprintf('Part II:  Affine co-variant detectors - from 1 to %d with %s', t,type)) ;
56 |     plotMatches(im1,im2,frames1,frames2,matches_geom,'homography',H) ;
57 |     %c = copyobj(h, gcf) ; set(c, 'position', [0 0 1 1]) ;
58 |     drawnow ;
59 |   end
60 | end
61 | 
62 | % Quantitative evaluation
63 | figure(101) ; clf ;
64 | set(gcf, 'name', sprintf('Part II: Affine co-variant detectors - quantitative comparison')) ;
65 | plot(2:size(numInliers,1),numInliers(2:end,:),'linewidth', 3) ;
66 | axis tight ; grid on ;
67 | legend('similarity co-variant', 'affine co-variant') ;
68 | xlabel('image pair') ;
69 | ylabel('num. verified feature matches') ;
70 | 


--------------------------------------------------------------------------------
/exercise3.m:
--------------------------------------------------------------------------------
  1 | % PART III: Towards large-scale retrieval
  2 | 
  3 | % setup MATLAB to use our software
  4 | setup ;
  5 | 
  6 | %% -------------------------------------------------------------------
  7 | %      Stage III.A: Accelerating descriptor matching with visual words
  8 | % --------------------------------------------------------------------
  9 | 
 10 | % Load a visual word vocabulary
 11 | load('data/oxbuild_lite_imdb_100k_disc_dog.mat', 'vocab', 'kdtree') ;
 12 | 
 13 | % Load the two images
 14 | im1 = imread('data/oxbuild_lite/ashmolean_000007.jpg') ;
 15 | im2 = imread('data/oxbuild_lite/ashmolean_000028.jpg') ;
 16 | 
 17 | % Compute SIFT features for each
 18 | [frames1, descrs1] = getFeatures(im1, 'peakThreshold', 0.001, 'orientation', false) ;
 19 | [frames2, descrs2] = getFeatures(im2, 'peakThreshold', 0.001, 'orientation', false) ;
 20 | 
 21 | % Get the matches based on the raw descriptors
 22 | tic ;
 23 | [nn, dist2] = findNeighbours(descrs1, descrs2, 2) ;
 24 | nnThreshold = 0.85 ;
 25 | ratio2 = dist2(1,:) ./ dist2(2,:) ;
 26 | ok = ratio2 <= nnThreshold^2 ;
 27 | matches_raw = [find(ok) ; nn(1,ok)] ;
 28 | time_raw = toc ;
 29 | 
 30 | % Quantise the descriptors
 31 | words1 = vl_kdtreequery(kdtree, vocab, descrs1, 'maxNumComparisons', 1024) ;
 32 | words2 = vl_kdtreequery(kdtree, vocab, descrs2, 'maxNumComparisons', 1024) ;
 33 | 
 34 | % Get the matches based on the quantized descriptors
 35 | tic ;
 36 | matches_word = matchWords(words1,words2) ;
 37 | time_word = toc;
 38 | 
 39 | % Count inliers
 40 | [inliers_raw, H_raw] = geometricVerification(frames1,frames2,matches_raw,'numRefinementIterations', 3) ;
 41 | [inliers_word, H_word] = geometricVerification(frames1,frames2,matches_word,'numRefinementIterations', 3) ;
 42 | 
 43 | figure(1) ; clf ;
 44 | set(gcf,'name', 'III.B: Accelerating descriptor matching with visual words') ;
 45 | 
 46 | subplot(2,1,1) ; plotMatches(im1,im2,frames1,frames2,matches_raw(:,inliers_raw), 'homography', H_raw) ;
 47 | title(sprintf('Verified matches on raw descriptors (%d in %.3g s)',numel(inliers_raw),time_raw)) ;
 48 | 
 49 | subplot(2,1,2) ; plotMatches(im1,im2,frames1,frames2,matches_word(:,inliers_word), 'homography', H_word) ;
 50 | title(sprintf('Verified matches on visual words (%d in %.3g s)',numel(inliers_word),time_word)) ;
 51 | 
 52 | %% -------------------------------------------------------------------
 53 | %                        Stage III.B: Searching with an inverted index
 54 | % --------------------------------------------------------------------
 55 | 
 56 | % Load an image DB
 57 | imdb = loadIndex('data/oxbuild_lite_imdb_100k_ellipse_dog.mat') ;
 58 | 
 59 | % Compute a histogram for the query image
 60 | [h,frames,words] = getHistogramFromImage(imdb, im2) ;
 61 | 
 62 | % Score the other images by similarity to the query
 63 | tic ;
 64 | scores = h' * imdb.index ;
 65 | time_index = toc ;
 66 | 
 67 | % Plot results by decreasing score
 68 | figure(2) ; clf ;
 69 | plotRetrievedImages(imdb, scores, 'num', 25) ;
 70 | set(gcf,'name', 'III.B: Searching with an inverted index') ;
 71 | fprintf('Search time per database image: %.3g s\n', time_index / size(imdb.index,2)) ;
 72 | 
 73 | %% -------------------------------------------------------------------
 74 | %                                    Stage III.C: Geometric reranking
 75 | % --------------------------------------------------------------------
 76 | 
 77 | % Rescore the top 16 images based on the number of
 78 | % inlier matches.
 79 | 
 80 | [~, perm] = sort(scores, 'descend') ;
 81 | for rank = 1:25
 82 |   matches = matchWords(words,imdb.images.words{perm(rank)}) ;
 83 |   inliers = geometricVerification(frames,imdb.images.frames{perm(rank)},...
 84 |                                   matches,'numRefinementIterations', 3) ;
 85 |   newScore = numel(inliers) ;
 86 |   scores(perm(rank)) = max(scores(perm(rank)), newScore) ;
 87 | end
 88 | 
 89 | % Plot results by decreasing score
 90 | figure(3) ; clf ;
 91 | plotRetrievedImages(imdb, scores, 'num', 25) ;
 92 | set(gcf,'name', 'III.B: Searching with an inverted index - verification') ;
 93 | 
 94 | %% -------------------------------------------------------------------
 95 | %                                             Stage III.D: Full system
 96 | % --------------------------------------------------------------------
 97 | 
 98 | % Load the database if not already in memory or if it is the one
 99 | % from exercise4
100 | if ~exist('imdb', 'var') || isfield(imdb.images, 'wikiNames')
101 |   imdb = loadIndex('data/oxbuild_lite_imdb_100k_ellipse_dog.mat', ...
102 |                    'sqrtHistograms', true) ;
103 | end
104 | 
105 | % Search the database for a match to a given image. Note that URL
106 | % can be a path to a file or a URL pointing to an image in the
107 | % Internet.
108 | 
109 | url1 = 'data/queries/mistery-building1.jpg' ;
110 | res = search(imdb, url1, 'box', []) ;
111 | 
112 | % Display the results
113 | figure(4) ; clf ; set(gcf,'name', 'Part III.D: query image') ;
114 | plotQueryImage(imdb, res) ;
115 | 
116 | figure(5) ; clf ; set(gcf,'name', 'Part III.D: search results') ;
117 | plotRetrievedImages(imdb, res) ;
118 | 


--------------------------------------------------------------------------------
/exercise4.m:
--------------------------------------------------------------------------------
 1 | % PART IV: Large scale image retrieval
 2 | 
 3 | % setup MATLAB to use our software
 4 | setup ;
 5 | 
 6 | % Load the database if not already in memory, or if the one
 7 | % is from exercise3.
 8 | if ~exist('imdb', 'var') || ~isfield(imdb.images,'wikiName')
 9 |   imdb = loadIndex('data/paintings_imdb_100k_disc_dog.mat', ...
10 |                    'sqrtHistograms', true) ;
11 |   imdb.dir = '' ; % art images are not shipped with practical
12 | end
13 | 
14 | % Search the database for a match to a given image. Note that URL
15 | % can be a path to a file or a URL pointing to an image in the
16 | % Internet.
17 | 
18 | url1 = 'data/queries/mistery-painting1.jpg' ;
19 | url2 = 'data/queries/mistery-painting2.jpg' ;
20 | url3 = 'data/queries/mistery-painting3.jpg' ;
21 | res = search(imdb, url1, 'box', []) ;
22 | 
23 | % Display the results
24 | figure(1) ; clf ; set(gcf,'name', 'Part IV: query image') ;
25 | plotQueryImage(imdb, res) ;
26 | 
27 | figure(2) ; clf ; set(gcf,'name', 'Part IV: search results') ;
28 | plotRetrievedImages(imdb, res, 'num', 9) ;
29 | 


--------------------------------------------------------------------------------
/extra/Makefile:
--------------------------------------------------------------------------------
 1 | name ?= practical-instance-recognition
 2 | ver ?= 2018a
 3 | 
 4 | code=\
 5 | exercise1.m \
 6 | exercise2.m \
 7 | exercise3.m \
 8 | exercise4.m \
 9 | findNeighbours.m \
10 | geometricVerification.m \
11 | getFeatures.m \
12 | getHistogramFromImage.m \
13 | getHistogram.m \
14 | loadIndex.m \
15 | matchWords.m \
16 | plotMatches.m \
17 | plotQueryImage.m \
18 | plotRetrievedImages.m \
19 | search.m \
20 | setup.m \
21 | README.md \
22 | vlfeat
23 | 
24 | doc=\
25 | doc/images \
26 | doc/base.css \
27 | doc/instructions.html
28 | 
29 | data=\
30 | data/oxbuild_lite \
31 | data/oxbuild_lite_imdb.mat \
32 | data/oxbuild_lite_imdb_100k_ellipse_dog.mat \
33 | data/oxbuild_lite_imdb_100k_disc_dog.mat \
34 | data/paintings_imdb_100k_disc_dog.mat \
35 | data/queries \
36 | data/graf
37 | 
38 | include extra/practical/Makefile
39 | 


--------------------------------------------------------------------------------
/extra/annkmeans.m:
--------------------------------------------------------------------------------
  1 | function [centers, tree, en] = annkmeans(X, K, varargin)
  2 | % ANNKMEANS  Approximate Nearest Neighbors K-Means
  3 | %
  4 | %   Example:: To cluster the data X into K parts:
  5 | %     [CENTERS, EN] = ANNKMEANS(X, K) ;
  6 | %
  7 | %   Options are:
  8 | %
  9 | %   Seed:: 0
 10 | %     Random number generation seed (for initialization).
 11 | %
 12 | %   NumTrees:: 3
 13 | %     Number of trees in the kd-tree forest.
 14 | %
 15 | %   MaxNumComparisons:: 500
 16 | %     Maximum number of comparisons when querying the kd-tree.
 17 | %
 18 | %   MaxNumIterations:: 100
 19 | %     Maximum number of k-means iterations.
 20 | 
 21 | % Andrea Vedaldi
 22 | 
 23 | opts.seed = 0 ;
 24 | opts.numTrees = 3 ;
 25 | opts.maxNumComparisons = 500 ;
 26 | opts.maxNumIterations = 100 ;
 27 | opts.verbose = 0 ;
 28 | opts.tolerance = 0.001 ;
 29 | opts = vl_argparse(opts, varargin) ;
 30 | 
 31 | % get initial centers
 32 | rand('state',opts.seed) ;
 33 | centers = vl_colsubset(X, K) ;
 34 | 
 35 | if opts.verbose
 36 |   fprintf('%s: clustering %d vectors into %d parts\n', ...
 37 |           mfilename, size(X,2), K) ;
 38 |   fprintf('%s: random seed = %g\n', mfilename, opts.seed) ;
 39 |   fprintf('%s: tolerance = %g\n', mfilename, opts.tolerance) ;
 40 |   fprintf('%s: numTrees = %d\n', mfilename, opts.numTrees) ;
 41 |   fprintf('%s: maxNumComparisons = %d\n', mfilename, opts.maxNumComparisons) ;
 42 |   fprintf('%s: maxNumIterations = %d\n', mfilename, opts.maxNumIterations) ;
 43 | end
 44 | 
 45 | % chunk the data up
 46 | numData = size(X,2) ;
 47 | numChunks = max(matlabpool('size'), 1) ;
 48 | data = Composite() ;
 49 | dist = Composite() ;
 50 | assign = Composite() ;
 51 | 
 52 | for i = 1:numChunks
 53 |   chunk = i:numChunks:numData ;
 54 |   data{i} = X(:, chunk) ;
 55 |   dist{i} = inf(1, numel(chunk), class(X)) ;
 56 |   assign{i} = zeros(1, numel(chunk)) ;
 57 | end
 58 | %clear X ;
 59 | 
 60 | E = [] ;
 61 | 
 62 | for t = 1:opts.maxNumIterations
 63 |   % compute kd-tree
 64 |   tree = vl_kdtreebuild(centers, 'numTrees', opts.numTrees) ;
 65 | 
 66 |   % get the updated cluster assignments and partial centers
 67 |   spmd
 68 |     [centers_, mass_, en_, assign, dist] = update(opts, ...
 69 |                                                   data,K,centers,tree,...
 70 |                                                   assign,dist) ;
 71 |   end
 72 | 
 73 |   % compute the new cluster centers
 74 |   centers = zeros(size(centers),class(centers)) ;
 75 |   mass = zeros(1,K);
 76 |   en = 0 ;
 77 |   for i = 1:length(centers_)
 78 |     centers = centers + centers_{i} ;
 79 |     mass = mass + mass_{i} ;
 80 |     en = en + en_{i} ;
 81 |   end
 82 |   centers = bsxfun(@times, centers, 1./max(mass,eps)) ;
 83 |   E(t) = en ;
 84 | 
 85 |   % re-initialize any center with no mass
 86 |   rei = find(mass == 0) ;
 87 |   centers(:, rei) = vl_colsubset(X, length(rei)) ;
 88 | 
 89 |   if opts.verbose
 90 |     figure(1) ; clf ;
 91 |     plot(E,'linewidth', 2) ;
 92 |     xlim([1 opts.maxNumIterations]) ;
 93 |     title(sprintf('%s: iteration %d', mfilename, t)) ;
 94 |     xlabel('iterations') ;
 95 |     ylabel('energy') ;
 96 |     grid on ; drawnow ;
 97 |     fprintf('%s: %d: energy = %g, reinitialized = %d\n', mfilename,t,E(t),length(rei)) ;
 98 |   end
 99 | 
100 |   if t > 1 && E(t) > (1 - opts.tolerance) * E(t-1), break ; end
101 | end
102 | 
103 | % prepare final resutls
104 | en = E(end) ;
105 | 
106 | % --------------------------------------------------------------------
107 | function [centers, mass, en, assign, dist] = ...
108 |       update(opts,X,K,centers,tree,assign,dist)
109 | % --------------------------------------------------------------------
110 | 
111 | [assign_, dist_] = vl_kdtreequery(tree, centers, X, ...
112 |                                   'maxComparisons', opts.maxNumComparisons) ;
113 | ok = dist_ < dist ;
114 | assign(ok) = assign_(ok) ;
115 | dist(ok) = dist_(ok) ;
116 | 
117 | for b = 1:K
118 |   centers(:, b) = sum(X(:, assign == b),2) ;
119 |   mass(b) = sum(assign == b) ;
120 | end
121 | en = sum(dist) ;
122 | 


--------------------------------------------------------------------------------
/extra/bootstrap-data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | mkdir -p data/archives
 4 | 
 5 | if test ! -d data/graf
 6 | then
 7 |     wget -c -nc \
 8 |         http://www.robots.ox.ac.uk/~vgg/research/affine/det_eval_files/graf.tar.gz \
 9 |         -O data/archives/graf.tgz
10 |     mkdir -p data/graf
11 |     (cd data/graf ; tar xzvf ../archives/graf.tgz)
12 |     (cd data/graf ; rm -f *.png ; mogrify -format png *.ppm)
13 |     (cd data/graf ; rm -f *.ppm)
14 | fi
15 | 
16 | cp -vr extra/queries data/
17 | 
18 | if test ! -d data/oxbuild_images
19 | then
20 |     wget -c -nc \
21 |         http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/oxbuild_images.tgz \
22 |         -O data/archives/oxbuild_images.tgz
23 |     mkdir -p data/oxbuild_images
24 |     (cd data/oxbuild_images ; tar xvf ../archives/oxbuild_images.tgz)
25 | fi
26 | 
27 | if test ! -d data/oxbuild_gt
28 | then
29 |     wget -c -nc \
30 |         http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/gt_files_170407.tgz \
31 |         -O data/archives/gt_files_170407.tgz
32 |     mkdir -p data/oxbuild_gt
33 |     (cd data/oxbuild_gt ; tar xvf ../archives/gt_files_170407.tgz)
34 | fi
35 | 
36 | if test ! -d data/oxbuild_compute_ap.cpp
37 | then
38 |     wget -c -nc \
39 |         http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/compute_ap.cpp \
40 |         -O data/archives/compute_ap.cpp
41 |     (cd data ; cp -vf archives/compute_ap.cpp oxbuild_compute_ap.cpp)
42 | fi
43 | 
44 | # Create a lite version
45 | if test ! -d data/oxbuild_lite
46 | then
47 |     mkdir -p data/oxbuild_lite
48 |     (
49 |         ls -1 data/oxbuild_gt/*_{good,ok}.txt | sort | xargs cat
50 |         ls -1 data/oxbuild_gt/*_junk.txt | sort | xargs cat | head -n 300
51 |     ) | sort | uniq > data/oxbuild_lite.txt
52 |     cat data/oxbuild_lite.txt | sed "s/^\(.*\)$/data\/oxbuild_images\/\1.jpg/" | xargs -I % cp -v % data/oxbuild_lite
53 | fi
54 | 


--------------------------------------------------------------------------------
/extra/evaluate.m:
--------------------------------------------------------------------------------
 1 | function evaluate()
 2 | % EVALUATE
 3 | 
 4 | % prepare index
 5 | switch 13
 6 |   case 1, imdbPath = 'data/oxbuild_imdb_100k_disc.mat' ;
 7 |   case 2, imdbPath = 'data/oxbuild_imdb_100k_odisc.mat' ;
 8 |   case 3, imdbPath = 'data/oxbuild_imdb_100k_ellipse.mat' ;
 9 |   case 4, imdbPath = 'data/oxbuild_imdb_100k_oellipse.mat' ;
10 |   case 11, imdbPath = 'data/oxbuild_imdb_100k_disc_dog2.mat' ;
11 |   case 12, imdbPath = 'data/oxbuild_imdb_100k_odisc_dog.mat' ;
12 |   case 13, imdbPath = 'data/oxbuild_imdb_100k_ellipse_dog2.mat' ;
13 |   case 14, imdbPath = 'data/oxbuild_imdb_100k_oellipse_dog.mat' ;
14 | end
15 | imdb = loadIndex(imdbPath, 'sqrtHistograms', false, 'shortListSize', 200) ;
16 | 
17 | % run evaluation
18 | load('data/oxbuild_query.mat', 'query') ;
19 | diary([imdbPath(1:end-4) '.txt']) ;
20 | diary on ;
21 | fprintf('imdb:\n') ;
22 | disp(imdb) ;
23 | results = doEvaluation(imdb, query) ;
24 | diary off ;
25 | 
26 | % --------------------------------------------------------------------
27 | function results = doEvaluation(imdb, query)
28 | % --------------------------------------------------------------------
29 | 
30 | results = cell(1,numel(query)) ;
31 | for i = 1:numel(query)
32 |   k = find(imdb.images.id == query(i).imageId) ;
33 |   assert(~isempty(k)) ;
34 | 
35 |   % database labels for evaluation in retrieval (make sure we
36 |   % ignore the query image too)
37 |   y = - ones(1, numel(imdb.images.id)) ;
38 |   y(query(i).good) = 1 ;
39 |   y(query(i).ok) = 1 ;
40 |   y(query(i).junk) = 0 ;
41 |   y(k) = 0 ; % ooops ?
42 | 
43 |   results{i} = search(imdb, imdb.images.id(k), ...
44 |                       'box', query(i).box, ...
45 |                       'verbose', false) ;
46 | 
47 |   [rc,pr,info] = vl_pr(y, results{i}.index_scores) ;
48 |   results{i}.index_rc = rc ;
49 |   results{i}.index_pr = pr ;
50 |   results{i}.index_ap = info.ap ;
51 | 
52 |   [rc,pr,info] = vl_pr(y, results{i}.geom_scores) ;
53 |   results{i}.geom_rc = rc ;
54 |   results{i}.geom_pr = pr ;
55 |   results{i}.geom_ap = info.ap ;
56 | 
57 |   fprintf('query %03d: %-20s mAP:%5.2f   mAP+geom:%5.2f\n', i, ...
58 |           query(i).name, results{i}.index_ap, results{i}.geom_ap) ;
59 | 
60 |   if 0
61 |     figure(10) ; clf ; hold on ;
62 |     plot(results(i).index_rc, results(i).index_pr, 'color', 'b', 'linewidth', 3) ;
63 |     plot(results(i).geom_rc, results(i).geom_pr, 'color', 'g', 'linewidth', 2) ;
64 |     grid on ; axis equal ;
65 |     title(sprintf('%s', query(i).name), 'interpreter', 'none') ;
66 |     legend(sprintf('index: %.2f',results(i).index_ap*100), ...
67 |            sprintf('index+geom: %.2f',results(i).geom_ap*100)) ;
68 |     xlim([0 1]) ;ylim([0 1]);
69 |     drawnow ;
70 |   end
71 | end
72 | 
73 | results = cat(2, results{:}) ;
74 | 
75 | fprintf('features: time %.2g\n', ...
76 |         mean([results.features_time])) ;
77 | fprintf('index: mAP: %g, time: %.2f\n', ...
78 |         mean([results.index_ap])*100, ...
79 |         mean([results.index_time])) ;
80 | fprintf('index+geom: mAP: %g, time: %.2f\n', ...
81 |         mean([results.geom_ap])*100, ...
82 |         mean([results.geom_time])) ;
83 | 


--------------------------------------------------------------------------------
/extra/experimental/crawler/crawl_all_paintings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/
  2 | 
  3 | import os
  4 | from selenium import webdriver
  5 | import time
  6 | import urllib
  7 | import sys
  8 | import string
  9 | import traceback
 10 | 
 11 | from selenium.common.exceptions import ElementNotVisibleException
 12 | 
 13 | 
 14 | def get_next_page_link(browser):
 15 |     pager_elements = browser.find_element_by_class_name("pager-items")
 16 | 
 17 |     pagelinks = pager_elements.find_elements_by_tag_name("a")
 18 | 
 19 |     for pagelink in pagelinks:
 20 |         if pagelink.text == "Next":
 21 |             return pagelink
 22 |     return None
 23 | 
 24 | def get_painters(browser):
 25 |     all_painter_divs = browser.find_elements_by_class_name("search-item")
 26 | 
 27 |     painter_names = [];
 28 |     painter_urls = [];
 29 | 
 30 |     print browser.current_url
 31 |     for painterdiv in all_painter_divs:
 32 |         ahref = painterdiv.find_element_by_tag_name("a");
 33 |         painter_name = ahref.text
 34 |         idx = painter_name.find('\n')
 35 |         painter_names.append(painter_name[:idx])
 36 |         painter_url = ahref.get_attribute("href");
 37 |         painter_urls.append(painter_url)
 38 | 
 39 |     return {'painter_names' : painter_names, 'painter_links' : painter_urls }
 40 | 
 41 | 
 42 | startAt = 0
 43 | 
 44 | DataDir = '/data/datasets/paintings/'
 45 | 
 46 | do_download = True
 47 | do_overwrite = False
 48 | 
 49 | if not os.path.exists(DataDir):
 50 |     os.mkdir(DataDir)
 51 | 
 52 | 
 53 | browser = webdriver.Firefox()
 54 | browser.implicitly_wait(60)
 55 | 
 56 | painter_list = []
 57 | painter_urls = []
 58 | for letter in string.uppercase[:26]:
 59 |     browser.get('http://www.wikipaintings.org/en/alphabet/' + letter)
 60 |     painter_elements = get_painters(browser)
 61 |     painter_list = painter_list + painter_elements['painter_names']
 62 |     #painter_list = painter_list + painter_elemens
 63 |     painter_urls = painter_urls + painter_elements['painter_links']
 64 | 
 65 | paintings_artpage_list = []
 66 | paintings_name_list = []
 67 | paintings_src_list = []
 68 | 
 69 | 
 70 | # write painters
 71 | with open(os.path.join(DataDir, 'all_painter_list.txt'), 'wb') as fp:
 72 |     for ii in range(0, len(painter_list)):
 73 |         fp.write((painter_urls[ii] + "\t" + painter_list[ii] + "\n").encode("UTF-8"))
 74 |     fp.closed
 75 | 
 76 | for ii in range(startAt, len(painter_urls)):
 77 |     idx_slash = painter_urls[ii].rfind('/') + 1
 78 |     painter_name = painter_urls[ii][idx_slash:]
 79 | 
 80 |     if os.path.exists(os.path.join(DataDir, 'lists', 'lst_' + painter_name + '_detailed.txt')):
 81 |         continue
 82 | 
 83 |     try:
 84 |         with open(os.path.join(DataDir, 'lists', 'lst_' + painter_name + '_detailed.txt'), 'wb') as fdet:
 85 |             with open(os.path.join(DataDir, 'lists', 'lst_' + painter_name + '_download.txt'), 'wb') as fdown:
 86 | 
 87 |                 print str(ii) + "  " + painter_list[ii] + " " + painter_urls[ii]
 88 |                 #hack to fit on the screen if painter portrait is larger
 89 |                 browser.get(painter_urls[ii] + '/mode/all-paintings/')
 90 |                 crt_painting_urls = []
 91 |                 crt_painting_names = []
 92 |                 crt_paintings_src = []
 93 |                 # click on the current / style link to show the jcarousel
 94 |                 try:
 95 |                     paintings_div = browser.find_element_by_id("paintings")
 96 | 
 97 |                     time.sleep(1)
 98 |                     # click on the first painting
 99 | 
100 |                     container_a = paintings_div.find_element_by_class_name("mr20")
101 |                     image_element = container_a.find_element_by_tag_name("img")
102 |                     image_element.click()
103 |                     time.sleep(1)
104 | 
105 |                     current_ul = browser.find_element_by_id("artistPaintings")
106 |                     container_a = current_ul.find_element_by_class_name("rimage")
107 |                     image_element = container_a.find_element_by_tag_name("img")
108 |                     image_element.click()
109 |                     print browser.current_url
110 |                 except ElementNotVisibleException:
111 |                     # click on the first painting
112 |                     paintings_div = browser.find_element_by_id("paintings")
113 | 
114 |                     time.sleep(1)
115 |                     # click on the first painting
116 | 
117 |                     container_a = paintings_div.find_element_by_class_name("mr20")
118 |                     image_element = container_a.find_element_by_tag_name("img")
119 |                     image_element.click()
120 |                     time.sleep(1)
121 | 
122 |                     current_ul = browser.find_element_by_id("artistPaintings")
123 |                     container_a = current_ul.find_element_by_class_name("rimage")
124 |                     image_element = container_a.find_element_by_tag_name("img")
125 |                     image_element.click()
126 | 
127 |                 time.sleep(1)
128 | 
129 |                 span_total = browser.find_element_by_class_name("totalslides")
130 |                 total_paintings_str = span_total.get_attribute("innerHTML")
131 |                 try:
132 |                     total_paintings = int(total_paintings_str)
133 |                 except:
134 |                     total_paintings_str = span_total.get_attribute("innerHTML")
135 |                     total_paintings = int(total_paintings_str)
136 | 
137 |                 for pp in range(0, total_paintings):
138 |                     time.sleep(1)
139 |                     active_slide = browser.find_element_by_class_name("activeslide")
140 |                     nextslide_link = browser.find_element_by_id("nextslide")
141 | 
142 |                     crt_url = browser.current_url
143 |                     idx_dash = crt_url.rfind('-') + 1
144 | 
145 |                     img_id = crt_url[idx_dash :]
146 | 
147 |                     galleryData = browser.find_element_by_id("galleryData")
148 |                     pelems = galleryData.find_elements_by_tag_name("p")
149 |                     pelem = pelems[0];
150 |                     ahref = pelem.find_element_by_tag_name("a")
151 |                     href_text = ahref.text
152 |                     artwork_link = pelem.find_element_by_tag_name("a").get_attribute("href")
153 | 
154 |                     genre = "N/A"
155 |                     style = "N/A"
156 |                     year = "N/A"
157 | 
158 |                     for pelem in pelems:
159 |                         if (pelem.text.lower().startswith("genre")):
160 |                             genre = pelem.find_element_by_tag_name("span").text
161 |                         if (pelem.text.lower().startswith("style")):
162 |                             style = pelem.find_element_by_tag_name("span").text
163 |                         if (pelem.text.lower().startswith("completion")):
164 |                             year = pelem.find_element_by_tag_name("span").text
165 | 
166 | 
167 |                     img_src = active_slide.find_element_by_tag_name("img").get_attribute("src")
168 |                     exclamation = img_src.find('!');
169 |                     if exclamation > -1:
170 |                         img_src = img_src[:exclamation]
171 |                     # Avoid copyright protected images
172 |                     if (img_src.lower() ==
173 |                         "http://cdnc.wikipaintings.org/zur/Global/Images/Global/FRAME-600x480.jpg".lower()):
174 |                         continue
175 |                     #there should be a smarter way to do this
176 |                     fdet.write((img_id + u".jpg\t" + img_src + '\t' +
177 |                                 artwork_link + '\t' + href_text + '\t' +
178 |                                 painter_list[ii] + '\t' + style + '\t' + genre +
179 |                                 '\t' + year + '\n').encode('UTF-8'))
180 |                     fdown.write(img_src + u'\n')
181 | 
182 |                     if (do_download):
183 |                         dst_dir = os.path.join(DataDir, 'images', img_id[0:4])
184 |                         dst_file = os.path.join(dst_dir, img_id + ".jpg")
185 |                         if (not os.path.exists(dst_file) or do_overwrite):
186 |                             if (not os.path.exists(dst_dir)):
187 |                                 os.mkdir(dst_dir)
188 |                             urllib.urlretrieve(img_src, dst_file)
189 | 
190 |                     nextslide_link.click()
191 | 
192 |             fdown.closed
193 |         fdet.closed
194 |     except:
195 |         print "Failed download for : " + painter_list[ii]
196 |         traceback.print_exc(file=sys.stderr)
197 | 
198 | browser.close()
199 | 
200 | 


--------------------------------------------------------------------------------
/extra/experimental/crawler/crawl_wikipaintings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/
  2 | 
  3 | import os
  4 | from selenium import webdriver
  5 | import time
  6 | import urllib
  7 | import sys
  8 | import traceback
  9 | from selenium.common.exceptions import ElementNotVisibleException
 10 | 
 11 | 
 12 | def get_next_page_link(browser):
 13 |     pager_elements = browser.find_element_by_class_name("pager-items")
 14 | 
 15 |     pagelinks = pager_elements.find_elements_by_tag_name("a")
 16 | 
 17 |     for pagelink in pagelinks:
 18 |         if pagelink.text == "Next":
 19 |             return pagelink
 20 |     return None
 21 | 
 22 | def get_painters(browser):
 23 |     list_container = browser.find_element_by_id("listContainer")
 24 |     all_divs = list_container.find_elements_by_tag_name("div");
 25 |     painter_names = [];
 26 |     painter_urls = [];
 27 |     first_painting = [];
 28 |     print browser.current_url
 29 |     for painterdiv in all_divs:
 30 |         div_id = painterdiv.get_attribute("id");
 31 |         if div_id.startswith("a-") and not div_id.endswith("-slider"):
 32 |             print painterdiv.get_attribute("id")
 33 |             h2_elem = painterdiv.find_element_by_class_name("mr20")
 34 |             painter_name = h2_elem.text
 35 |             painter_names.append(painter_name)
 36 |             painter_url = h2_elem.find_element_by_tag_name("a").get_attribute("href");
 37 |             painter_urls.append(painter_url)
 38 | 
 39 |     return {'painter_names' : painter_names, 'painter_links' : painter_urls }
 40 | 
 41 | 
 42 | def parse_args():
 43 |     if len(sys.argv) == 1:
 44 |         return
 45 |     try:
 46 |         for arg in sys.argv:
 47 |             if (arg.startswith('-')):
 48 |                 tmp = arg[1:].lower()
 49 |                 parts = tmp.split('=')
 50 |                 if (parts[0] == 'datadir'):
 51 |                     _datadir = parts[1]
 52 |                 elif (parts[0] == 'style'):
 53 |                     _painting_style = parts[1]
 54 |         return {'datadir' : _datadir, 'style': _painting_style}
 55 |     except:
 56 |         print "Usage: -datadir=<path_to_data> -style=<impressionism|post-impressionism|> "
 57 |         print "Please use a painting style from wikipaintings.org"
 58 |         return
 59 | 
 60 | 
 61 | 
 62 | argv = parse_args()
 63 | 
 64 | startAt = 0
 65 | 
 66 | if (argv == None):
 67 |     painting_style = "impressionism"
 68 |     DataDir = '/data/datasets/paintings/'
 69 | else:
 70 |     painting_style = argv['style']
 71 |     DataDir = argv['datadir']
 72 | 
 73 | do_download = True
 74 | do_overwrite = False
 75 | 
 76 | if not os.path.exists(DataDir):
 77 |     os.mkdir(DataDir)
 78 | 
 79 | DataDir = os.path.join(DataDir, painting_style)
 80 | if not os.path.exists(DataDir):
 81 |     os.mkdir(DataDir)
 82 | 
 83 | 
 84 | browser = webdriver.Firefox()
 85 | browser.implicitly_wait(60)
 86 | browser.get("http://www.wikipaintings.org/en/paintings-by-style/" +
 87 |             painting_style + "/1")
 88 | 
 89 | page_url_list = []
 90 | page_url_list.append(browser.current_url)
 91 | nextpage = get_next_page_link(browser)
 92 | attr = nextpage.get_attribute("href")
 93 | 
 94 | while attr != None:
 95 |     nextpage.click()
 96 |     page_url_list.append(browser.current_url)
 97 |     nextpage = get_next_page_link(browser)
 98 |     attr = nextpage.get_attribute("href")
 99 | 
100 | painter_list = []
101 | painter_urls = []
102 | for pgurl in page_url_list:
103 |     time.sleep(2)
104 |     browser.get(pgurl)
105 |     painter_elements = get_painters(browser)
106 |     painter_list = painter_list + painter_elements['painter_names']
107 |     #painter_list = painter_list + painter_elemens
108 |     painter_urls = painter_urls + painter_elements['painter_links']
109 | 
110 | paintings_artpage_list = []
111 | paintings_name_list = []
112 | paintings_src_list = []
113 | 
114 | painting_count = 0;
115 | 
116 | # write painters
117 | with open(os.path.join(DataDir, painting_style + '_painter_list.txt'), 'wb') as fp:
118 |     for ii in range(0, len(painter_list)):
119 |         fp.write((painter_list[ii] + "\n").encode("UTF-8"))
120 |     fp.closed
121 | 
122 | 
123 | for ii in range(startAt, len(painter_urls)):
124 |     idx_slash = painter_urls[ii].rfind('/') + 1
125 |     painter_name = painter_urls[ii][idx_slash:]
126 | 
127 |     try:
128 |         with open(os.path.join(DataDir, 'lst_' + str(ii).zfill(4) + '_' + painter_name + '_detailed.txt'), 'wb') as fdet:
129 |             with open(os.path.join(DataDir, 'lst_' + str(ii).zfill(4) + '_' + painter_name + '_download.txt'), 'wb') as fdown:
130 | 
131 |                 print str(ii) + "  " + painter_list[ii] + " " + painter_urls[ii]
132 |                 #hack to fit on the screen if painter portrait is larger
133 |                 browser.get(painter_urls[ii])
134 |                 crt_painting_urls = []
135 |                 crt_painting_names = []
136 |                 crt_paintings_src = []
137 |                 # click on the current / style link to show the jcarousel
138 |                 try:
139 |                     current_list = browser.find_element_by_id("link-" + painting_style)
140 |                     current_list.click()
141 |                     time.sleep(1)
142 |                     # click on the first painting
143 |                     current_ul = browser.find_element_by_id("carousel-" + painting_style)
144 |                     container_a = current_ul.find_element_by_class_name("rimage")
145 |                     image_element = container_a.find_element_by_tag_name("img")
146 |                     image_element.click()
147 |                 except ElementNotVisibleException:
148 |                     current_list = browser.find_element_by_id("link-" + painting_style)
149 |                     current_list.click()
150 |                     time.sleep(1)
151 |                     # click on the first painting
152 |                     current_ul = browser.find_element_by_id("carousel-" + painting_style)
153 |                     container_a = current_ul.find_element_by_class_name("rimage")
154 |                     image_element = container_a.find_element_by_tag_name("img")
155 |                     image_element.click()
156 |                 time.sleep(1)
157 | 
158 |                 span_total = browser.find_element_by_class_name("totalslides")
159 |                 total_paintings_str = span_total.get_attribute("innerHTML")
160 |                 try:
161 |                     total_paintings = int(total_paintings_str)
162 |                 except:
163 |                     total_paintings_str = span_total.get_attribute("innerHTML")
164 |                     total_paintings = int(total_paintings_str)
165 | 
166 |                 for pp in range(0, total_paintings):
167 |                     time.sleep(1)
168 |                     active_slide = browser.find_element_by_class_name("activeslide")
169 | 
170 |                     crt_url = browser.current_url
171 |                     idx_dash = crt_url.rfind('-') + 1
172 | 
173 |                     img_id = crt_url[idx_dash :]
174 | 
175 |                     galleryData = browser.find_element_by_id("galleryData")
176 |                     pelem = galleryData.find_element_by_tag_name("p")
177 |                     ahref = pelem.find_element_by_tag_name("a")
178 |                     href_text = ahref.text
179 |                     artwork_link = pelem.find_element_by_tag_name("a").get_attribute("href")
180 | 
181 |                     img_src = active_slide.find_element_by_tag_name("img").get_attribute("src")
182 |                     exclamation = img_src.find('!');
183 |                     if exclamation > -1:
184 |                         img_src = img_src[:exclamation]
185 |                     # Avoid copyright protected images
186 |                     if (img_src.lower() ==
187 |                         "http://cdnc.wikipaintings.org/zur/Global/Images/Global/FRAME-600x480.jpg".lower()):
188 |                         active_slide.click()
189 |                         continue
190 |                     #there should be a smarter way to do this
191 |                     fdet.write((img_id + u".jpg\t" + img_src + '\t' + artwork_link + '\t' + href_text + '\n').encode('UTF-8'))
192 |                     fdown.write(img_src + u'\n')
193 | 
194 |                     if (do_download):
195 |                         dst_path = os.path.join(DataDir, img_id + ".jpg")
196 |                         if (not os.path.exists(dst_path) or do_overwrite):
197 |                             urllib.urlretrieve(img_src, dst_path)
198 | 
199 |                     active_slide.click()
200 | 
201 |             fdown.closed
202 |         fdet.closed
203 |     except:
204 |         print "Failed download for : " + painter_list[ii]
205 | 
206 | 
207 | browser.close()
208 | 


--------------------------------------------------------------------------------
/extra/preprocess.m:
--------------------------------------------------------------------------------
  1 | function preprocess()
  2 | % PREPROCESS  Build vocabulary and compute histograms
  3 | %   PREPROCESS() download an image dataset into 'data/', VLFeat into
  4 | %   'vlfeat/', and precompute the histograms for the dataset.
  5 | 
  6 |   % --------------------------------------------------------------------
  7 |   %                                                      Download VLFeat
  8 |   % --------------------------------------------------------------------
  9 |   if ~exist('vlfeat', 'dir')
 10 |     from = 'http://www.vlfeat.org/download/vlfeat-0.9.21-bin.tar.gz' ;
 11 |     fprintf('Downloading vlfeat from %s\n', from) ;
 12 |     untar(from, 'data') ;
 13 |     movefile('data/vlfeat-0.9.21', 'vlfeat') ;
 14 |   end
 15 | 
 16 |   setup ;
 17 | 
 18 |   % --------------------------------------------------------------------
 19 |   %                                                      Setup Oxford 5k
 20 |   % --------------------------------------------------------------------
 21 |   %prefix = 'data/oxbuild' ;
 22 |   %imdb = setupOxford5kBase('data/oxbuild', 'data/oxbuild_images') ;
 23 |   prefix = 'data/oxbuild_lite' ;
 24 |   imdb = setupOxford5kBase(prefix, 'data/oxbuild_lite') ;
 25 |   for t = [1 3]
 26 |     switch t
 27 |       case 1
 28 |         suffix = '100k_disc_dog' ;
 29 |         numWords = 100e3 ;
 30 |         featureOpts = {'method', 'dog', 'affineAdaptation', false, 'orientation', false} ;
 31 |       case 2
 32 |         suffix = '100k_odisc_dog' ;
 33 |         numWords = 100e3 ;
 34 |         featureOpts = {'method', 'dog', 'affineAdaptation', false, 'orientation', true} ;
 35 |       case 3
 36 |         suffix = '100k_ellipse_dog' ;
 37 |         numWords = 100e3 ;
 38 |         featureOpts = {'method', 'dog', 'affineAdaptation', true, 'orientation', false} ;
 39 |       case 4
 40 |         suffix = '100k_oellipse_dog' ;
 41 |         numWords = 100e3 ;
 42 |         featureOpts = {'method', 'dog', 'affineAdaptation', true, 'orientation', true} ;
 43 |     end
 44 |     setupOxford5k(imdb, prefix, suffix, numWords, featureOpts) ;
 45 |   end
 46 | end
 47 | 
 48 | % --------------------------------------------------------------------
 49 | function imdb = setupOxford5kBase(prefix, imPath)
 50 | % --------------------------------------------------------------------
 51 |   imdbPath = [prefix '_imdb.mat'] ;
 52 |   queryPath = [prefix '_query.mat'] ;
 53 |   %if exist(imdbPath, 'file'), imdb = load(imdbPath) ; return ; end
 54 |   names = dir(fullfile(imPath, '*.jpg')) ;
 55 | 
 56 |   imdb.dir = imPath ;
 57 |   imdb.images.id = 1:numel(names) ;
 58 |   imdb.images.name = {names.name} ;
 59 |   save(imdbPath, '-STRUCT', 'imdb') ;
 60 | 
 61 |   for i = 1:numel(imdb.images.id)
 62 |     [~,postfixless{i}] = fileparts(imdb.images.name{i}) ;
 63 |   end
 64 |   function i = toindex(x)
 65 |     [ok,i] = ismember(x,postfixless) ;
 66 |     i = i(ok) ;
 67 |   end
 68 |   names = dir('data/oxbuild_gt/*_query.txt') ;
 69 |   names = {names.name} ;
 70 |   for i = 1:numel(names)
 71 |     base = names{i} ;
 72 |     [imageName,x0,y0,x1,y1] = textread(...
 73 |       fullfile('data/oxbuild_gt/', base), '%s %f %f %f %f') ;
 74 |     name = base ;
 75 |     name = name(1:end-10) ;
 76 |     imageName = cell2mat(imageName) ;
 77 |     imageName = imageName(6:end) ;
 78 |     query(i).name = name ;
 79 |     query(i).imageName = imageName ;
 80 |     query(i).imageId = toindex(imageName) ;
 81 |     query(i).box = [x0;y0;x1;y1] ;
 82 |     query(i).good = toindex(textread(...
 83 |       fullfile('data/oxbuild_gt/', sprintf('%s_good.txt',name)), '%s')) ;
 84 |     query(i).ok = toindex(textread(...
 85 |       fullfile('data/oxbuild_gt/', sprintf('%s_ok.txt',name)), '%s')) ;
 86 |     query(i).junk = toindex(textread(...
 87 |       fullfile('data/oxbuild_gt/', sprintf('%s_junk.txt',name)), '%s')) ;
 88 |   end
 89 | 
 90 |   % check for empty queries due to subsetting of the data
 91 |   ok = true(1,numel(query)) ;
 92 |   for i = 1:numel(query)
 93 |     ok(i) = ~isempty(query(i).imageId) & ...
 94 |             ~isempty(query(i).good) ;
 95 |   end
 96 |   query = query(ok) ;
 97 |   fprintf('%d of %d are covered by the selected database subset\n',sum(ok),numel(ok)) ;
 98 |   save(queryPath, 'query') ;
 99 | end
100 | 
101 | % --------------------------------------------------------------------
102 | function setupOxford5k(imdb, prefix, suffix, numWords, featureOpts)
103 | % --------------------------------------------------------------------
104 |   imdbPath = [prefix '_imdb_' suffix '.mat'] ;
105 |   %if exist(imdbPath, 'file'), return ; end
106 |   imdb.featureOpts = featureOpts ;
107 |   imdb.numWords = numWords ;
108 | 
109 |   % ------------------------------------------------------------------
110 |   %                                      Compute the visual vocabulary
111 |   % ------------------------------------------------------------------
112 |   n = numel(imdb.images.name) ;
113 |   descrs = cell(1,n) ;
114 |   numWordsPerImage = ceil(imdb.numWords * 10 / n) ;
115 |   parfor i = 1:n
116 |     fprintf('get features from %i, %s\n', i, imdb.images.name{i}) ;
117 |     [~,descrs{i}] = getFeatures(imread(fullfile(imdb.dir, imdb.images.name{i})), ...
118 |                                 imdb.featureOpts{:}) ;
119 |     randn('state',i) ;
120 |     descrs{i} = vl_colsubset(descrs{i},numWordsPerImage) ;
121 |   end
122 | 
123 |   descrs = cat(2,descrs{:}) ;
124 |   [imdb.vocab, imdb.kdtree] = annkmeans(descrs, imdb.numWords, ...
125 |                                         'numTrees', 4, ...
126 |                                         'maxNumComparisons', 1024, ...
127 |                                         'maxNumIterations', 30, ...
128 |                                         'tolerance', 1e-3, ...
129 |                                         'verbose', true, ...
130 |                                         'seed', 2) ;
131 | 
132 |   % --------------------------------------------------------------------
133 |   %                                                 Compute the features
134 |   % --------------------------------------------------------------------
135 |   clear frames words ;
136 |   parfor i = 1:n
137 |     fprintf('get features from %i, %s\n', i, imdb.images.name{i}) ;
138 |     [frames{i},descrs] = getFeatures(imread(...
139 |       fullfile(imdb.dir, imdb.images.name{i})), imdb.featureOpts{:}) ;
140 |     words{i} = vl_kdtreequery(imdb.kdtree, imdb.vocab, descrs, ...
141 |                               'maxNumComparisons', 1024) ;
142 |   end
143 | 
144 |   imdb.images.frames = frames ;
145 |   imdb.images.words = words ;
146 |   save(imdbPath, '-STRUCT', 'imdb') ;
147 | end
148 | 


--------------------------------------------------------------------------------
/extra/preprocess_paintings.m:
--------------------------------------------------------------------------------
  1 | function preprocess_paintings()
  2 | % PREPROCESS_PAINTINGS
  3 | 
  4 | setup ;
  5 | 
  6 | dataDir = 'data' ;
  7 | listPath = 'extra/paintings.txt' ;
  8 | imdb = setupWikipediaPaintings(dataDir, listPath) ;
  9 | for t = 1
 10 |   switch t
 11 |     case 1
 12 |       suffix = '100k_disc_dog' ;
 13 |       numWords = 100e3 ;
 14 |       featureOpts = {'method', 'dog', 'affineAdaptation', false, 'orientation', false} ;
 15 |     case 2
 16 |       suffix = '100k_odisc_dog' ;
 17 |       numWords = 100e3 ;
 18 |       featureOpts = {'method', 'dog', 'affineAdaptation', false, 'orientation', true} ;
 19 |     case 3
 20 |       suffix = '100k_ellipse_hessian' ;
 21 |       numWords = 100e3 ;
 22 |       featureOpts = {'method', 'hessian', 'affineAdaptation', true, 'orientation', false} ;
 23 |     case 4
 24 |       suffix = '100k_oellipse_dog' ;
 25 |       numWords = 100e3 ;
 26 |       featureOpts = {'method', 'dog', 'affineAdaptation', true, 'orientation', true} ;
 27 |   end
 28 |   makeIndex(imdb, dataDir, suffix, numWords, featureOpts) ;
 29 | end
 30 | 
 31 | % --------------------------------------------------------------------
 32 | function [comment, descUrl, imgUrl] = getWikipediaImage(imgTitle)
 33 | % --------------------------------------------------------------------
 34 | 
 35 | url = ['https://en.wikipedia.org/w/api.php?' ...
 36 |   'action=query&prop=imageinfo&format=xml&iiprop=url' ...
 37 |   '|parsedcomment&iilimit=1&titles=' urlencode(imgTitle)] ;
 38 | 
 39 | content = urlread(url);
 40 | 
 41 | comment = [] ;
 42 | imgUrl = [] ;
 43 | descUrl = [] ;
 44 | 
 45 | m = regexp(content, 'parsedcomment="(?<x>[^"]*)"', 'names') ;
 46 | if numel(m) > 0
 47 |   comment = m.x ;
 48 | end
 49 | 
 50 | m = regexp(content, ' url="(?<x>[^"]*)"', 'names') ;
 51 | if numel(m) > 0
 52 |   imgUrl = m.x ;
 53 | end
 54 | 
 55 | m = regexp(content, 'descriptionurl="(?<x>[^"]*)"', 'names') ;
 56 | if numel(m) > 0
 57 |   descUrl = m.x ;
 58 | end
 59 | 
 60 | % --------------------------------------------------------------------
 61 | function imdb = setupWikipediaPaintings(dataDir, listPath)
 62 | % --------------------------------------------------------------------
 63 | 
 64 | mkdir(fullfile(dataDir, 'paintings')) ;
 65 | imdbPath = fullfile(dataDir, 'paintings_imdb.mat') ;
 66 | f=fopen(listPath,'r','n','UTF-8');
 67 | data = textscan(f, '%s%s', 'delimiter', '\t') ;
 68 | images = data{1} ;
 69 | titles = data{2} ;
 70 | fclose(f) ;
 71 | 
 72 | imdb.dir = fullfile(dataDir, 'paintings') ;
 73 | imdb.images.id = [] ;
 74 | imdb.images.name = {} ;
 75 | imdb.images.wikiName = {} ;
 76 | imdb.images.downloadUrl = {} ;
 77 | imdb.images.infoUrl = {} ;
 78 | 
 79 | for i=1:numel(images)
 80 |   imagePath = fullfile(imdb.dir, images{i}) ;
 81 |   matPath = [imagePath '.mat'] ;
 82 |   if ~exist(matPath)
 83 |     fprintf('Getting info for %s\n', titles{i}) ;
 84 |     [comment, descUrl, imgUrl] = getWikipediaImage(titles{i}) ;
 85 |     save(matPath, 'comment', 'descUrl', 'imgUrl') ;
 86 |   else
 87 |     load(matPath, 'comment', 'descUrl', 'imgUrl') ;
 88 |   end
 89 |   if isempty(imgUrl)
 90 |     warning('Could not find %s', titles{i}) ;
 91 |     continue ;
 92 |   end
 93 |   if ~exist(imagePath)
 94 |     fprintf('Getting image data for %s\n', titles{i}) ;
 95 |     im = imread(imgUrl) ;
 96 |     if size(im,1) > 1024
 97 |       im = imresize(im, [1024 NaN]) ;
 98 |     elseif size(im,2) > 1024
 99 |       im = imresize(im, [NaN 1024]) ;
100 |     end
101 |     imwrite(im, imagePath, 'quality', 95) ;
102 |   end
103 |   imdb.images.id(end+1) = numel(imdb.images.id)+1 ;
104 |   imdb.images.name{end+1} = images{i} ;
105 |   imdb.images.wikiName{end+1} = titles{i} ;
106 |   imdb.images.downloadUrl{end+1} = imgUrl ;
107 |   imdb.images.infoUrl{end+1} = descUrl ;
108 | end
109 | 
110 | save(imdbPath, '-STRUCT', 'imdb') ;
111 | 
112 | % --------------------------------------------------------------------
113 | function makeIndex(imdb, dataDir, suffix, numWords, featureOpts)
114 | % --------------------------------------------------------------------
115 | imdbPath = fullfile(dataDir, ['paintings_imdb_' suffix '.mat']) ;
116 | if exist(imdbPath, 'file'), return ; end
117 | imdb.featureOpts = featureOpts ;
118 | imdb.numWords = numWords ;
119 | 
120 | % ------------------------------------------------------------------
121 | %                                      Compute the visual vocabulary
122 | % ------------------------------------------------------------------
123 | descrs = cell(1,numel(imdb.images.name)) ;
124 | numWordsPerImage = ceil(imdb.numWords * 10 / numel(imdb.images.name)) ;
125 | parfor i = 1:numel(imdb.images.name)
126 |   fprintf('get features from %i, %s\n', i, imdb.images.name{i}) ;
127 |   
128 |   [~, descrs{i}] = getFeatures(imread(...
129 |     fullfile(imdb.dir, imdb.images.name{i})), imdb.featureOpts{:});
130 |   randn('state',i) ;
131 |   descrs{i} = vl_colsubset(descrs{i}, numWordsPerImage) ;
132 | end
133 | 
134 | descrs = cat(2,descrs{:}) ;
135 | [imdb.vocab, imdb.kdtree] = annkmeans(descrs, imdb.numWords, ...
136 |   'numTrees', 4, ...
137 |   'maxNumComparisons', 1024, ...
138 |   'maxNumIterations', 30, ...
139 |   'tolerance', 1e-3, ...
140 |   'verbose', true, ...
141 |   'seed', 2) ;
142 | 
143 | % --------------------------------------------------------------------
144 | %                                                 Compute the features
145 | % --------------------------------------------------------------------
146 | clear frames words ;
147 | parfor i = 1:numel(imdb.images.name)
148 |   fprintf('get features from %i, %s\n', i, imdb.images.name{i}) ;
149 |   [frames{i},descrs] = getFeatures(imread(...
150 |     fullfile(imdb.dir, imdb.images.name{i})), imdb.featureOpts{:}) ;
151 |   words{i} = vl_kdtreequery(imdb.kdtree, imdb.vocab, descrs, ...
152 |     'maxNumComparisons', 1024) ;
153 | end
154 | 
155 | imdb.images.frames = frames ;
156 | imdb.images.words = words ;
157 | save(imdbPath, '-STRUCT', 'imdb') ;
158 | 
159 | 


--------------------------------------------------------------------------------
/extra/queries/mistery-building1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/extra/queries/mistery-building1.jpg


--------------------------------------------------------------------------------
/extra/queries/mistery-painting1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/extra/queries/mistery-painting1.jpg


--------------------------------------------------------------------------------
/extra/queries/mistery-painting2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/extra/queries/mistery-painting2.jpg


--------------------------------------------------------------------------------
/extra/queries/mistery-painting3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vedaldi/practical-object-instance-recognition/0f459b76ce8afbfb09df788396d592cafc47d095/extra/queries/mistery-painting3.jpg


--------------------------------------------------------------------------------
/findNeighbours.m:
--------------------------------------------------------------------------------
 1 | function [nn, dist2] = findNeighbours(descrs1, descrs2, numNeighbours)
 2 | % FINDNEIGHBOURS  Find nearest neighbours
 3 | %   NN = FINDNEIGHBOURS(DESCRS1, DESCRS2) finds for each column of
 4 | %   DESCRS1 the closest column of DESRS2 (in Eulclidean distance)
 5 | %   storing the index to NN. The function uses a KDTree for
 6 | %   apporximate but fast matching.
 7 | %
 8 | %   NN = FINDNEIGHBOURS(DESCRS1, DESCRS2, NUMNEIGHBOURS) returns
 9 | %   NUMNEIGHBOUSRS neighbours by increasing distance, storing them
10 | %   as successive rows of NN.
11 | %
12 | %   [NN, DIST2] = FINDNEIGHBOURS(...) returns the corresponding
13 | %   matrix of distances DIST2 as well.
14 | 
15 | % Authors: Andrea Vedaldi
16 | 
17 | if nargin <= 2, numNeighbours = 1 ; end
18 | vl_twister('state',0) ;
19 | tree = vl_kdtreebuild(descrs2,'numTrees',2) ;
20 | [nn, dist2] = vl_kdtreequery(tree, descrs2, descrs1, ...
21 |                              'maxNumComparisons', 100, ...
22 |                              'numNeighbors', numNeighbours) ;
23 | 


--------------------------------------------------------------------------------
/geometricVerification.m:
--------------------------------------------------------------------------------
  1 | function [inliers, H] = geometricVerification(f1, f2, matches, varargin)
  2 | % GEOMETRICVERIFICATION  Verify feature matches based on geometry
  3 | %   OK = GEOMETRICVERIFICATION(F1, F2, MATCHES) check for geometric
  4 | %   consistency the matches MATCHES between feature frames F1 and F2
  5 | %   (see PLOTMATCHES() for the format). INLIERS is a list of indexes
  6 | %   of matches that are inliers to the geometric model.
  7 | 
  8 | % Author: Andrea Vedaldi
  9 | 
 10 |   opts.tolerance1 = 20 ;
 11 |   opts.tolerance2 = 15 ;
 12 |   opts.tolerance3 = 8 ;
 13 |   opts.minInliers = 6 ;
 14 |   opts.numRefinementIterations = 3 ;
 15 |   opts = vl_argparse(opts, varargin) ;
 16 | 
 17 |   numMatches = size(matches,2) ;
 18 |   inliers = cell(1, numMatches) ;
 19 |   H = cell(1, numMatches) ;
 20 | 
 21 |   x1 = double(f1(1:2, matches(1,:))) ;
 22 |   x2 = double(f2(1:2, matches(2,:))) ;
 23 | 
 24 |   x1hom = x1 ;
 25 |   x2hom = x2 ;
 26 |   x1hom(end+1,:) = 1 ;
 27 |   x2hom(end+1,:) = 1 ;
 28 | 
 29 |   % bad set of candidate inliers will produce a bad model, but
 30 |   % this will be discared
 31 |   warning('off','MATLAB:rankDeficientMatrix') ;
 32 | 
 33 |   for m = 1:numMatches
 34 |     for t = 1:opts.numRefinementIterations
 35 |       if t == 1
 36 |         A1 = toAffinity(f1(:,matches(1,m))) ;
 37 |         A2 = toAffinity(f2(:,matches(2,m))) ;
 38 |         H21 = A2 * inv(A1) ;
 39 |         x1p = H21(1:2,:) * x1hom ;
 40 |         tol = opts.tolerance1 ;
 41 |       elseif t <= 4
 42 |         % affinity
 43 |         H21 = x2(:,inliers{m}) / x1hom(:,inliers{m}) ;
 44 |         x1p = H21(1:2,:) * x1hom ;
 45 |         H21(3,:) = [0 0 1] ;
 46 |         tol = opts.tolerance2 ;
 47 |       else
 48 |         % homography
 49 |         x1in = x1hom(:,inliers{m}) ;
 50 |         x2in = x2hom(:,inliers{m}) ;
 51 | 
 52 |         % Sanity check
 53 |         %H = [.1 0 .4 ; 2 .3 .5 ; .1 .002 1] ;
 54 |         %x1in = [randn(2,100) ; ones(1,100)] ;
 55 |         %x2in = H*x1in ;
 56 |         %x2in = bsxfun(@times, x2in, 1./x2in(3,:)) ;
 57 | 
 58 |         S1 = centering(x1in) ;
 59 |         S2 = centering(x2in) ;
 60 |         x1c = S1 * x1in ;
 61 |         x2c = S2 * x2in ;
 62 | 
 63 |         M = [x1c, zeros(size(x1c)) ;
 64 |              zeros(size(x1c)), x1c ;
 65 |              bsxfun(@times, x1c,  -x2c(1,:)), bsxfun(@times, x1c,  -x2c(2,:))] ;
 66 |         [H21,D] = svd(M,'econ') ;
 67 |         H21 = reshape(H21(:,end),3,3)' ;
 68 |         H21 = inv(S2) * H21 * S1 ;
 69 |         H21 = H21 ./ H21(end) ;
 70 | 
 71 |         x1phom = H21 * x1hom ;
 72 |         x1p = [x1phom(1,:) ./ x1phom(3,:) ; x1phom(2,:) ./ x1phom(3,:)] ;
 73 |         tol = opts.tolerance3 ;
 74 |       end
 75 | 
 76 |       dist2 = sum((x2 - x1p).^2,1) ;
 77 |       inliers{m} = find(dist2 < tol^2) ;
 78 |       H{m} = H21 ;
 79 |       if numel(inliers{m}) < opts.minInliers, break ; end
 80 |       if numel(inliers{m}) > 0.7 * size(matches,2), break ; end % enough!
 81 |     end
 82 |   end
 83 |   scores = cellfun(@numel, inliers) ;
 84 |   [~, best] = max(scores) ;
 85 |   inliers = inliers{best} ;
 86 |   H = inv(H{best}) ;
 87 | end
 88 | 
 89 | % --------------------------------------------------------------------
 90 | function C = centering(x)
 91 | % --------------------------------------------------------------------
 92 |   T = [eye(2), - mean(x(1:2,:),2) ; 0 0 1] ;
 93 |   x = T * x ;
 94 |   std1 = std(x(1,:)) ;
 95 |   std2 = std(x(2,:)) ;
 96 | 
 97 |   % at least one pixel apart to avoid numerical problems
 98 |   std1 = max(std1, 1) ;
 99 |   std2 = max(std2, 1) ;
100 | 
101 |   S = [1/std1 0 0 ;
102 |        0 1/std2 0 ;
103 |        0 0      1] ;
104 |   C = S * T ;
105 | end
106 | 
107 | % --------------------------------------------------------------------
108 | function A = toAffinity(f)
109 | % --------------------------------------------------------------------
110 |   switch size(f,1)
111 |     case 3 % discs
112 |       T = f(1:2) ;
113 |       s = f(3) ;
114 |       th = 0 ;
115 |       A = [s*[cos(th) -sin(th) ; sin(th) cos(th)], T ; 0 0 1] ;
116 |     case 4 % oriented discs
117 |       T = f(1:2) ;
118 |       s = f(3) ;
119 |       th = f(4) ;
120 |       A = [s*[cos(th) -sin(th) ; sin(th) cos(th)], T ; 0 0 1] ;
121 |     case 5 % ellipses
122 |       T = f(1:2) ;
123 |       A = [mapFromS(f(3:5)), T ; 0 0 1] ;
124 |     case 6 % oriented ellipses
125 |       T = f(1:2) ;
126 |       A = [f(3:4), f(5:6), T ; 0 0 1] ;
127 |     otherwise
128 |       assert(false) ;
129 |   end
130 | end
131 | 
132 | % --------------------------------------------------------------------
133 | function A = mapFromS(S)
134 | % --------------------------------------------------------------------
135 | % Returns the (stacking of the) 2x2 matrix A that maps the unit circle
136 | % into the ellipses satisfying the equation x' inv(S) x = 1. Here S
137 | % is a stacked covariance matrix, with elements S11, S12 and S22.
138 | 
139 |   tmp = sqrt(S(3,:)) + eps ;
140 |   A(1,1) = sqrt(S(1,:).*S(3,:) - S(2,:).^2) ./ tmp ;
141 |   A(2,1) = zeros(1,length(tmp));
142 |   A(1,2) = S(2,:) ./ tmp ;
143 |   A(2,2) = tmp ;
144 | end


--------------------------------------------------------------------------------
/getFeatures.m:
--------------------------------------------------------------------------------
 1 | function [frames, descrs, im] = getFeatures(im, varargin)
 2 | % GETFEATURES  Extract feature frames (keypoints) and descriptors
 3 | %   [FRAMES, DESCRS] = GETFEATURES(IM) computes the SIFT features
 4 | %   from image IM.
 5 | %
 6 | %   Options:
 7 | %
 8 | %   AffineAdaptation:: false
 9 | %     Set to TRUE to turn on affine adaptation.
10 | %
11 | %   Orientation:: true
12 | %     Set to FALSE to turn off the detection of the feature
13 | %     orientation.
14 | %
15 | %   Method:: Hessian
16 | %     Set to DoG to use the approximated Laplacian operator score.
17 | %
18 | %   MaxHeight:: +inf
19 | %     Rescale the image to have the specified maximum height.
20 | %     Use [~, ~, IM] = GETFEATURES(...) to obtain the rescaled image.
21 | 
22 | % Author: Andrea Vedaldi
23 | 
24 | opts.method = 'dog' ;
25 | opts.affineAdaptation = false ;
26 | opts.orientation = true ;
27 | opts.peakThreshold = 28 / 256^2 ;
28 | opts.maxHeight = +inf ;
29 | opts = vl_argparse(opts, varargin) ;
30 | 
31 | if size(im,3) > 1, im = rgb2gray(im) ; end
32 | im = im2single(im) ;
33 | 
34 | if size(im,1) > opts.maxHeight
35 |   im = imresize(im, [opts.maxHeight, NaN]) ;
36 | end
37 | 
38 | [frames, descrs] = vl_covdet(im, ...
39 |                              'EstimateAffineShape', opts.affineAdaptation, ...
40 |                              'EstimateOrientation', opts.orientation, ...
41 |                              'DoubleImage', false, ...
42 |                              'Method', opts.method, ...
43 |                              'PeakThreshold', opts.peakThreshold, ...
44 |                              'Verbose') ;
45 | frames = single(frames) ;
46 | 


--------------------------------------------------------------------------------
/getHistogram.m:
--------------------------------------------------------------------------------
 1 | function [h,frames,words] = getHistogram(imdb, frames, words, varargin)
 2 | % GETHISTOGRAM
 3 | %   H = GETHISTOGRAM(IMDB, FRAMES, WORDS) computes a visual word
 4 | %   histogram from the specified featrures. IMDB is the image database
 5 | %   structure, which includes the visual word dictionary as well as
 6 | %   the KDTree for fast projection. FRAMES are the feature frames
 7 | %   (keypoints) and WORDS the quantized feature descriptors. H is a
 8 | %   vector with a dimension equal to the size of the visual words
 9 | %   vocabualry contained in IMDB.
10 | %
11 | %   Options:
12 | %
13 | %   Box:: []
14 | %     Set to [xmin;ymin;xmax;ymax] to specify a bounding box in the image.
15 | 
16 | % Author: Andrea Vedaldi
17 | 
18 | opts.box = [] ;
19 | opts = vl_argparse(opts, varargin) ;
20 | 
21 | if ~isempty(opts.box)
22 |   ok = frames(1,:) >= opts.box(1) & ...
23 |        frames(1,:) <= opts.box(3) & ...
24 |        frames(2,:) >= opts.box(2) & ...
25 |        frames(2,:) <= opts.box(4) ;
26 |   frames = frames(:, ok) ;
27 |   words = words(ok) ;
28 | end
29 | 
30 | h = sparse(double(words), 1, 1, imdb.numWords, 1) ;
31 | h = imdb.idf .* h ;
32 | if imdb.sqrtHistograms, h = sqrt(h) ; end
33 | h = h / sqrt(sum(h.*h)) ;
34 | 


--------------------------------------------------------------------------------
/getHistogramFromImage.m:
--------------------------------------------------------------------------------
 1 | function [h, frames, words, decrs] = getHistogramFromImage(imdb, im, varargin)
 2 | % GETHISTOGRAMFROMIMAGE
 3 | %   [H, FRAMES, DESCRS] = GETHISTOGRAMFROMIMAGE(IMDB, IM) is the
 4 | %   same as calling [FRAMES, WORDS, DESCRS] = GETFEATURES(IM) and then GETHISTOGRAMS.
 5 | 
 6 | % Author: Andrea Vedaldi
 7 | 
 8 | opts.box = [] ;
 9 | opts.maxNumComparisons = 1024 ;
10 | opts = vl_argparse(opts, varargin) ;
11 | 
12 | % extract the features
13 | [frames,descrs] = getFeatures(im,imdb.featureOpts{:}) ;
14 | 
15 | % quantize the features
16 | words = vl_kdtreequery(imdb.kdtree, imdb.vocab, descrs, ...
17 |                        'maxNumComparisons', opts.maxNumComparisons) ;
18 | 
19 | % get the histogram
20 | [h,frames,words] = getHistogram(imdb, frames, words, 'box', opts.box) ;
21 | 


--------------------------------------------------------------------------------
/loadIndex.m:
--------------------------------------------------------------------------------
 1 | function imdb = loadIndex(imdbPath, varargin)
 2 | % LOADINDEX  Load index from disk and apply options
 3 | %   IMDB = LOADINDEX(IMDBPATH) loads the image database IMDBPATH
 4 | %   and constructs the inverted index on the fly.
 5 | 
 6 | % Author: Andrea Vedaldi
 7 | 
 8 | opts.sqrtHistograms = false ;
 9 | opts.shortListSize = 100 ;
10 | opts = vl_argparse(opts, varargin) ;
11 | 
12 | imdb = load(imdbPath) ;
13 | imdb.shortListSize = opts.shortListSize ;
14 | imdb.sqrtHistograms = opts.sqrtHistograms ;
15 | 
16 | % --------------------------------------------------------------------
17 | %                                              Compute inverted index
18 | % --------------------------------------------------------------------
19 | 
20 | numImages = numel(imdb.images.id) ;
21 | indexes = cell(1, numImages) ;
22 | for i = 1:numImages
23 |   indexes{i} = i * ones(1,numel(imdb.images.words{i})) ;
24 | end
25 | 
26 | imdb.index = sparse(double([imdb.images.words{:}]), ...
27 |                     [indexes{:}], ...
28 |                     1, ...
29 |                     imdb.numWords, ...
30 |                     numel(imdb.images.id)) ;
31 | 
32 | info = whos('imdb') ;
33 | fprintf('loadIndex: path: %s\n', imdbPath) ;
34 | fprintf('loadIndex: total number of features: %.2f M\n', full(sum(sum(imdb.index))) / 1e6) ;
35 | fprintf('loadIndex: number of indexed images: %.2f k\n', numel(imdb.images.id) / 1e3) ;
36 | fprintf('loadIndex: average num features per image: %.2f k\n', full(mean(sum(imdb.index))) / 1e3) ;
37 | fprintf('loadIndex: size in memory: %.1f MB\n', info.bytes / 1024^2) ;
38 | fprintf('loadIndex: short list size: %d\n',  imdb.shortListSize) ;
39 | fprintf('loadIndex: use sqrt: %d\n', imdb.sqrtHistograms) ;
40 | 
41 | % IDF weights
42 | imdb.idf = log(numel(imdb.images.id)) - log(max(sum(imdb.index > 0, 2),1)) ;
43 | imdb.index = spdiags(imdb.idf, 0, imdb.numWords, imdb.numWords) * imdb.index ;
44 | 
45 | % square root
46 | if imdb.sqrtHistograms, imdb.index = sqrt(imdb.index) ; end
47 | 
48 | % final l2 normalisation
49 | mass = sqrt(full(sum(imdb.index.*imdb.index, 1)))' ;
50 | n = numel(imdb.images.id) ;
51 | imdb.index = imdb.index * spdiags(1./mass, 0, n, n) ;
52 | 


--------------------------------------------------------------------------------
/matchWords.m:
--------------------------------------------------------------------------------
 1 | function matches = matchWords(a, b)
 2 | % MATCHWORDS Matches sets of visual words
 3 | %   MATCHES = MATCHWORDS(A, B) finds occurences in B of each element
 4 | %   of A. Each matched pair is stored as a row of the 2xN matrix A,
 5 | %   such that A(MATCHES(1,i)) == B(MATCHES(2,i)).
 6 | %
 7 | %   By default, if an element of A matches to more than one element of
 8 | %   B, only one of the possible matches is generated.
 9 | 
10 | % Author: Andrea Vedaldi
11 | 
12 | a = single(a) ;
13 | b = single(b) ;
14 | 
15 | % Exclude words which are too common
16 | a(count(a) > 5) = NaN ;
17 | b(count(b) > 5) = NaN ;
18 | 
19 | % Now establish matches between the remaining features
20 | maxNumMatches = 1 ;
21 | 
22 | for i=1:maxNumMatches
23 |   [ok, m] = ismember(a, b) ;
24 |   matches{i} = [find(ok) ; m(ok)] ;
25 |   b(m(ok)) = NaN ;
26 | end
27 | matches = cat(2, matches{:}) ;
28 | 
29 | function c = count(a)
30 | [values,~,map] = unique(a) ;
31 | c = hist(a, values) ;
32 | c = c(map) ;
33 | 


--------------------------------------------------------------------------------
/plotMatches.m:
--------------------------------------------------------------------------------
 1 | function plotMatches(im1,im2,f1,f2,matches,varargin)
 2 | % PLOTMATCHES  Plot matching features between images
 3 | %   PLOTMATCHES(IM1, IM2, F1, F2, MATCHES) displays the images IM1 and
 4 | %   IM2 overlaying the feature frames F1 and F2 as well as lines
 5 | %   connecting them as specified by MATCHES. Each column of MATCHES
 6 | %   paris the frame F1(:, MATCHES(1,i)) to the frame F2(:,
 7 | %   MATCHES(2,i)).
 8 | %
 9 | %   Options:
10 | %
11 | %   plotallFrames:: false
12 | %     Set to true in order to plot all the frames, regardless of
13 | %     whether they are matched or not.
14 | %
15 | %   Homography:: []
16 | %     Set to an homography matrix from the first image to the second
17 | %     to display the homography mapping interactively.
18 | 
19 | % Author: Andrea Vedaldi
20 | 
21 | opts.plotAllFrames = false ;
22 | opts.homography = [];
23 | opts = vl_argparse(opts, varargin) ;
24 | 
25 | dh1 = max(size(im2,1)-size(im1,1),0) ;
26 | dh2 = max(size(im1,1)-size(im2,1),0) ;
27 | 
28 | o = size(im1,2) ;
29 | if size(matches,1) == 1
30 |   i1 = find(matches) ;
31 |   i2 = matches(i1) ;
32 | else
33 |   i1 = matches(1,:) ;
34 |   i2 = matches(2,:) ;
35 | end
36 | 
37 | hold on ;
38 | f2p = f2 ;
39 | f2p(1,:) = f2p(1,:) + o ;
40 | 
41 | cla ; set(gca,'ydir', 'reverse') ;
42 | imagesc([padarray(im1,dh1,'post') padarray(im2,dh2,'post')]) ;
43 | axis image off ;
44 | set(gca,'xlimmode', 'manual') ;
45 | set(gca,'ylimmode', 'manual') ;
46 | if opts.plotAllFrames
47 |   vl_plotframe(f1,'linewidth',2) ;
48 |   vl_plotframe(f2p,'linewidth',2) ;
49 | else
50 |   vl_plotframe(f1(:,i1),'linewidth',2) ;
51 |   vl_plotframe(f2p(:,i2),'linewidth',2) ;
52 | end
53 | line([f1(1,i1);f2p(1,i2)], [f1(2,i1);f2p(2,i2)]) ;
54 | title(sprintf('number of matches: %d', size(matches,2))) ;
55 | 
56 | if ~isempty(opts.homography)
57 |   s.axes = gca ;
58 |   s.cursor1 = [0;0];
59 |   s.cursor2 = [0;0];
60 |   s.size1 = size(im1) ;
61 |   s.size2 = size(im2) ;
62 |   if verLessThan('matlab', '8.4.0')
63 |     s.point1 = plot(0,0,'g+','MarkerSize', 40, 'EraseMode','xor') ;
64 |     s.point2 = plot(0,0,'r+','MarkerSize', 40, 'EraseMode','xor') ;
65 |   else
66 |     s.point1 = plot(0,0,'g+','MarkerSize', 40) ;
67 |     s.point2 = plot(0,0,'r+','MarkerSize', 40) ;
68 |   end
69 |   s.H = inv(opts.homography) ;
70 |   set(gcf, 'UserData', s)
71 |   set(gcf, 'WindowButtonMotionFcn', @mouseMove) ;
72 | end
73 | 
74 | function mouseMove(object, eventData)
75 | s = get(object, 'UserData') ;
76 | point = get(s.axes, 'CurrentPoint') ;
77 | if point(1) <= s.size1(2)
78 |   s.cursor1 = point(1, 1:2)' ;
79 |   z = s.H * [s.cursor1;1] ;
80 |   s.cursor2 = z(1:2) / z(3) ;
81 | else
82 |   s.cursor2 = point(1, 1:2)' - [s.size1(2) ; 0] ;
83 |   z = inv(s.H) * [s.cursor2;1] ;
84 |   s.cursor1 = z(1:2) / z(3) ;
85 | end
86 | set(s.point1, 'XData', s.cursor1(1) , 'YData', s.cursor1(2)) ;
87 | set(s.point2, 'XData', s.cursor2(1) + s.size1(2) , 'YData', s.cursor2(2)) ;
88 | if ~ verLessThan('matlab', '8.4.0'), drawnow expose ; end
89 |  
90 | 


--------------------------------------------------------------------------------
/plotQueryImage.m:
--------------------------------------------------------------------------------
 1 | function plotQueryImage(imbd, res)
 2 | % PLOTQUERYIMAGE  Plot the query image from a set of search results
 3 | %   PLOTQUERYIMAGE(IMDB, RES) displays the query image for the set
 4 | %   of search results RES.
 5 | 
 6 | % Author: Andrea Vedaldi
 7 | 
 8 | if numel(res.query.image) == 1
 9 |   ii = vl_binsearch(imdb.images.id, res.query.image) ;
10 |   im = imread(fullfile(imdb.dir, imdb.images.name{ii})) ;
11 | else
12 |   im = res.query.image ;
13 | end
14 | 
15 | cla ;
16 | imagesc(im) ; hold on ;
17 | axis image off ;
18 | if ~isempty(res.query.box)
19 |   vl_plotbox(res.query.box, 'linewidth', 2, 'color', 'b') ;
20 | end
21 | title('Query imge') ;
22 | 


--------------------------------------------------------------------------------
/plotRetrievedImages.m:
--------------------------------------------------------------------------------
  1 | function plotRetrievedImages(imdb, res, varargin)
  2 | % PLOTRETRIEVEDIMAGES  Displays search results
  3 | %   PLOTRETRIEVEDIMAGES(IMDB, SCORES) displays the images in the
  4 | %   database IMDB that have largest SCORES. SCORES is a row vector of
  5 | %   size equal to the number of images in IMDB.
  6 | 
  7 | % Author: Andrea Vedaldi and Mireca Cimpoi
  8 | 
  9 | opts.num = 16 ;
 10 | opts.labels = [] ;
 11 | opts = vl_argparse(opts, varargin) ;
 12 | 
 13 | if isstruct(res)
 14 |   scores = res.geom.scores ;
 15 | else
 16 |   scores = res ;
 17 | end
 18 | 
 19 | [scores, perm] = sort(scores, 'descend') ;
 20 | if isempty(opts.labels), opts.labels = zeros(1,numel(scores)) ; end
 21 | 
 22 | clf('reset') ;
 23 | 
 24 | for rank = 1:opts.num
 25 |   vl_tightsubplot(opts.num, rank) ;
 26 |   ii = perm(rank) ;
 27 |   im0 = getImage(imdb, ii, true) ;
 28 |   data.h(rank) = imagesc(im0) ; axis image off ; hold on ;
 29 |   switch opts.labels(ii)
 30 |     case 0, cl = 'y' ;
 31 |     case 1, cl = 'g' ;
 32 |     case -1, cl = 'r' ;
 33 |   end
 34 |   text(0,0,sprintf('%d: score:%.3g', rank, full(scores(rank))), ...
 35 |        'background', cl, ...
 36 |        'verticalalignment', 'top') ;
 37 | 
 38 |   set(data.h(rank), 'ButtonDownFcn', @zoomIn) ;
 39 | end
 40 | 
 41 | % for interactive plots
 42 | data.imdb = imdb ;
 43 | data.perm = perm ;
 44 | data.scores = scores ;
 45 | data.labels = opts.labels ;
 46 | data.res = res ;
 47 | guidata(gcf, data) ;
 48 | 
 49 | % --------------------------------------------------------------------
 50 | function im = getImage(imdb, ii, thumb)
 51 | % --------------------------------------------------------------------
 52 | imPath = fullfile(imdb.dir, imdb.images.name{ii}) ;
 53 | im = [] ;
 54 | 
 55 | if exist(imPath, 'file'), im = imread(imPath) ; end
 56 | 
 57 | if isempty(im) && isfield(imdb.images, 'wikiName')
 58 |   name = imdb.images.wikiName{ii} ;
 59 |   [~,~,url,thumbUrl] = getWikiImageUrl(name) ;
 60 |   if thumb
 61 |     fprintf('Downloading thumbnail ''%s'' (%s)\n', thumbUrl, name) ;
 62 |     if ~isempty(thumbUrl), im = imread(thumbUrl) ; end
 63 |   else
 64 |     fprintf('Downloading image ''%s'' (%s)\n', url, name) ;
 65 |     im = imread(url) ;
 66 |     if ~thumb
 67 |       width = size(im,1) ;
 68 |       height = size(im,2) ;
 69 |       scale = min([1, 1024/width, 1024/height]) ;
 70 |       im = imresize(im, scale) ;
 71 |     end
 72 |   end
 73 | end
 74 | 
 75 | if isempty(im)
 76 |   im = checkerboard(10,10) ;
 77 |   warning('Could not retrieve image ''%s''', imdb.images.name{ii}) ;
 78 | end
 79 | 
 80 | % --------------------------------------------------------------------
 81 | function zoomIn(h, event, data)
 82 | % --------------------------------------------------------------------
 83 | data = guidata(h) ;
 84 | rank = find(h == data.h) ;
 85 | 
 86 | if ~isstruct(data.res), return ; end
 87 | 
 88 | % get query image
 89 | if numel(data.res.query.image) == 1
 90 |   ii = vl_binsearch(data.imdb.images.id, data.res.query.image) ;
 91 |   im1 = imread(fullfile(data.imdb.dir, data.imdb.images.name{ii})) ;
 92 | else
 93 |   im1 = data.res.query.image ;
 94 | end
 95 | 
 96 | % get retrieved image
 97 | ii = data.perm(rank) ;
 98 | im2 = getImage(data.imdb, ii, false) ;
 99 | 
100 | % plot matches
101 | figure(100) ; clf('reset') ;
102 | plotMatches(im1,im2,...
103 |             data.res.features.frames, ...
104 |             data.imdb.images.frames{ii}, ...
105 |             data.res.geom.matches{ii}, ...
106 |             'homography', data.res.geom.H{ii}) ;
107 | 
108 | % if we have a wikipedia page, try opening the URL too
109 | if isfield(data.imdb.images, 'wikiName')
110 |   name = data.imdb.images.wikiName{ii} ;
111 |   urls = getWikiPageContainingImage(name) ;
112 |   for i=1:numel(urls)
113 |     fprintf('Found wikipedia page: %s\n', urls{i}) ;
114 |   end
115 |   if length(urls) > 0
116 |     fprintf('Opening first page %s\n', urls{1}) ;
117 |     web('url',urls{1}) ;
118 |   else
119 |     warning('Could not find an Wikipedia page containing %s', name) ;
120 |   end
121 |   return ;
122 | end
123 | 
124 | % --------------------------------------------------------------------
125 | function [comment, descUrl, imgUrl, thumbUrl] = getWikiImageUrl(imgTitle)
126 | % --------------------------------------------------------------------
127 | 
128 | % thumb size
129 | x='iiurlwidth=240' ;
130 | query = sprintf(['https://en.wikipedia.org/w/api.php?'...
131 |                  'action=query&prop=imageinfo&format=xml&iiprop=url|'...
132 |                  'parsedcomment&%siilimit=1&titles=%s'], ...
133 |                 x,urlencode(imgTitle)) ;
134 | content = urlread(query);
135 | 
136 | m = regexp(content, 'parsedcomment="(?<x>[^"]*)"', 'names') ;
137 | comment = m.x ;
138 | 
139 | m = regexp(content, ' url="(?<x>[^"]*)"', 'names') ;
140 | imgUrl = m.x ;
141 | 
142 | m = regexp(content, 'thumburl="(?<x>[^"]*)"', 'names') ;
143 | thumbUrl = m.x ;
144 | 
145 | m = regexp(content, 'descriptionurl="(?<x>[^"]*)"', 'names') ;
146 | descUrl = m.x ;
147 | 
148 | % -------------------------------------------------------------------
149 | function urlList = getWikiPageContainingImage(wikiTitle)
150 | % -------------------------------------------------------------------
151 | urlList = {};
152 | query = [...
153 |   'https://en.wikipedia.org//w/api.php?' ...
154 |   'action=query&list=imageusage&format=xml&iutitle=' ...
155 |   urlencode(wikiTitle) '&iunamespace=0&iulimit=10'];
156 | 
157 | content = urlread(query);
158 | 
159 | [s e] = regexp(content, '<imageusage>.*</imageusage>', 'start', 'end');
160 | iuTagsContent = content(s + 12:e - 13);
161 | 
162 | % get page urls
163 | [s, e] = regexp(iuTagsContent, 'pageid="[0-9]*"', 'start', 'end');
164 | 
165 | for ii = 1: length(s)
166 |   urlList{ii} = getWikiUrlFromPageId(iuTagsContent(s(ii) + 8 : e(ii) -1));
167 | end
168 | 
169 | % -------------------------------------------------------------------
170 | function pageUrl = getWikiUrlFromPageId(pageid)
171 | % -------------------------------------------------------------------
172 | query = ['https://en.wikipedia.org/w/api.php?action=query&prop=info&format=xml&inprop=url&pageids=' pageid];
173 | content = urlread(query);
174 | [s e] = regexp(content, 'fullurl=".*" editurl', 'start', 'end');
175 | pageUrl = content(s + 9 : e - 9);
176 | 


--------------------------------------------------------------------------------
/search.m:
--------------------------------------------------------------------------------
 1 | function res = search(imdb, query, varargin)
 2 | % SEARCH  Search the image database
 3 | %   RES = SEARCH(IMDB, QUERY) searches the image database IMDB for the
 4 | %   query image QUERY returing a list of results RES.
 5 | 
 6 | % Author: Andrea Vedaldi
 7 | 
 8 | opts.box = [] ;
 9 | opts.verbose = true ;
10 | opts.skipGeometricVerification = false ;
11 | opts = vl_argparse(opts, varargin) ;
12 | 
13 | % --------------------------------------------------------------------
14 | %                                      Fetch an image and bounding box
15 | % --------------------------------------------------------------------
16 | 
17 | % fetch image
18 | if isnumeric(query)
19 |   if numel(query) == 1
20 |     % imageId
21 |     ii = vl_binsearch(imdb.images.id, query) ;
22 |     res.features.frames = imdb.images.frames{ii} ;
23 |     res.features.words = imdb.images.words{ii} ;
24 |   else
25 |     im = query ;
26 |   end
27 | elseif isstr(query)
28 |   im = imread(query) ;
29 | else
30 |   error('IM is neither an image, a path name, a URL, or an image id.') ;
31 | end
32 | 
33 | % ask for box
34 | if isnan(opts.box)
35 |   figure(1) ; clf ;
36 |   imshow(im) ;
37 |   title('Select a query box') ;
38 |   r = imrect ;
39 |   opts.box = r.getPosition ;
40 |   opts.box(3:4) = opts.box(3:4) + opts.box(1:2) ;
41 | end
42 | 
43 | res.query.image = im ;
44 | res.query.box = opts.box ;
45 | 
46 | % --------------------------------------------------------------------
47 | %                                                       Image features
48 | % --------------------------------------------------------------------
49 | res.features.time = tic ;
50 | if exist('im', 'var')
51 |   [res.features.histogram, res.features.frames, res.features.words] = ...
52 |       getHistogramFromImage(imdb, res.query.image, 'box', res.query.box) ;
53 | else
54 |   [hes.features.histogram, res.features.frames, res.features.words] = ...
55 |       getHistogram(imdb, res.features.frames, res.features.words, 'box', res.query.box) ;
56 | end
57 | res.features.time = toc(res.features.time) ;
58 | 
59 | % --------------------------------------------------------------------
60 | %                                                       Inverted index
61 | % --------------------------------------------------------------------
62 | res.index.time = tic ;
63 | res.index.scores = res.features.histogram' * imdb.index ;
64 | res.index.time = toc(res.index.time) ;
65 | [~, perm] = sort(res.index.scores, 'descend') ;
66 | 
67 | % --------------------------------------------------------------------
68 | %                                               Geometric verification
69 | % --------------------------------------------------------------------
70 | res.geom.time = tic ;
71 | res.geom.scores = res.index.scores ;
72 | res.geom.matches = cell(size(res.geom.scores)) ;
73 | for j = vl_colsubset(perm, imdb.shortListSize, 'beginning') ;
74 |   if opts.skipGeometricVerification, continue ; end
75 |   matches = matchWords(res.features.words, imdb.images.words{j}) ;
76 |   [inliers,H] = geometricVerification(res.features.frames, imdb.images.frames{j}, matches) ;
77 |   res.geom.matches{j} = matches(:, inliers) ;
78 |   res.geom.scores(j) = max(res.geom.scores(j), numel(inliers)) ;
79 |   res.geom.H{j} = H ;
80 | end
81 | res.geom.time = toc(res.geom.time) ;
82 | 
83 | fprintf('search: feature time: %.3f s\n', res.features.time) ;
84 | fprintf('search: index time: %.3f s\n', res.index.time) ;
85 | fprintf('search: geometric verification time: %.3f s\n', res.geom.time) ;
86 | 


--------------------------------------------------------------------------------
/setup.m:
--------------------------------------------------------------------------------
1 | % SETUP  Add the required search paths to MATLAB
2 | if exist('vl_version') ~= 3, run('vlfeat/toolbox/vl_setup') ; end
3 | 


--------------------------------------------------------------------------------