├── .gitignore
├── LICENSE
├── README.md
├── demo
    ├── ECBIR.jpg
    └── ECBIRdemo.png
├── offline.py
├── requirements.txt
├── server.py
├── static
    └── dataset
    │   └── Art-Photography
    │       ├── 0000001.jpg
    │       ├── 0000002.jpg
    │       ├── 0000003.jpg
    │       ├── 0000004.jpg
    │       ├── 0000005.jpg
    │       ├── 0000006.jpg
    │       ├── 0000007.jpg
    │       ├── 0000008.jpg
    │       ├── 0000009.jpg
    │       └── 0000010.jpg
├── templates
    └── index.html
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | *DS_Store
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 HANyangguang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ECBIR
 2 | Exact Content-based Image Retrieval System
 3 | <h3 align="center">Book Search Engine</h3>
 4 | <p align="center">
 5 |     An application that allows user to search for their favorite book just by taking a picture of it.
 6 | </p>
 7 | 
 8 | <!-- TABLE OF CONTENTS -->
 9 | ## Table of Contents
10 | 
11 | * [About the Project](#about-the-project)
12 |   * [Built With](#built-with)
13 | * [Getting Started](#getting-started)
14 |   * [Prerequisites](#prerequisites)
15 |   * [Installation](#installation)
16 | * [Usage](#usage)
17 | * [License](#license)
18 | * [Acknowledgements](#acknowledgements)
19 | 
20 | <!-- ABOUT THE PROJECT -->
21 | ## About The Project
22 | 
23 | <p align="center">
24 | <img src="/demo/ECBIR.jpg"></img>
25 | </p>
26 | 
27 | There are many ways to search for a book that you are interested in -  searching it by name, author, ISBN, and any other relevant features. That is all great, but the process is becoming very long as we introduce more and more features in the search system. 
28 | 
29 | This project tries to find the book you want by leveraging the power of Deep Learning and creating an system that allows an end-user to take a picture of books' cover and search the book in the database. 
30 | 
31 | ### Built With
32 | * [Tensorflow](https://www.tensorflow.org/)
33 | * [Flask](https://www.palletsprojects.com/p/flask/)
34 | 
35 | 
36 | <!-- GETTING STARTED -->
37 | ## Getting Started
38 | 
39 | To get a local copy up and running follow these simple example steps.
40 | 
41 | ### Prerequisites
42 | 
43 | To run this project you'll need **Python 3.5 or later** and all dependencies listed in the **requirements.txt**. 
44 | 
45 | To install all dependencies listend in the requirements file:
46 | 
47 | ```sh
48 |  pip install -r requirements.txt 
49 | ```
50 | 
51 | ### Installation
52 | 
53 | 1. Clone the repo
54 | ```sh
55 | git clone https://github.com/HANyangguang/ECBIR.git
56 | ```
57 | 2. Create the **dataset** folder and other folders in the static folder
58 | ```sh
59 | mkdir static/dataset
60 | mkdir static/feature
61 | mkdir static/resized
62 | mkdir static/uploads
63 | ```
64 | 3. Download the books covers dataset from the Kaggle and unpack the dataset into the **dataset** folder
65 | 
66 | Link to the [dataset](https://www.kaggle.com/lukaanicin/book-covers-dataset)
67 | 
68 | 4. Run the script **offline.py** to index the database use DELF and HNSW
69 | ```sh
70 | python(3) offline.py
71 | ```
72 | 5. Start the Flaks server with the **server.py**
73 | ```sh
74 | python(3) server.py
75 | ```
76 | 
77 | <!-- USAGE EXAMPLES -->
78 | ## Usage examples
79 | 
80 | <p align="center"> 
81 |    <img src="/demo/ECBIRdemo.png" alt="Example Image" width="550">
82 | </p>
83 | 
84 | <!-- LICENSE -->
85 | ## License
86 | Distributed under the MIT License. See `LICENSE` for more information.
87 | 
88 | <!-- ACKNOWLEDGEMENTS -->
89 | ## Acknowledgements
90 | * [DEep Local Features (DELF) paper](https://arxiv.org/pdf/1612.06321.pdf)
91 | * [DELF Reference implementation](https://www.dlology.com/blog/easy-landmark-image-recognition-with-tensorflow-hub-delf-module/)
92 | * [Search Engine](https://github.com/lucko515/search-book-by-cover-server)
93 | * [Web HTML](https://github.com/matsui528/sis)


--------------------------------------------------------------------------------
/demo/ECBIR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/demo/ECBIR.jpg


--------------------------------------------------------------------------------
/demo/ECBIRdemo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/demo/ECBIRdemo.png


--------------------------------------------------------------------------------
/offline.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import pickle
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | import nmslib
 6 | 
 7 | # For DELF loading
 8 | import tensorflow as tf
 9 | import tensorflow_hub as hub
10 | from utils import *
11 | 
12 | 
13 | def generate_dataset_vectors(paths):
14 |     '''
15 |     Call this method to generate feature vectors for each image in the dataset.
16 |     '''
17 |     tf.reset_default_graph()
18 |     tf.logging.set_verbosity(tf.logging.FATAL)
19 | 
20 |     model = hub.Module('https://tfhub.dev/google/delf/1')
21 | 
22 |     image_placeholder = tf.placeholder(
23 |         tf.float32, shape=(None, None, 3), name='input_image')
24 | 
25 |     module_inputs = {
26 |         'image': image_placeholder,
27 |         'score_threshold': 100.0,
28 |         'image_scales': [0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0],
29 |         'max_feature_num': 1000,
30 |     }
31 | 
32 |     module_outputs = model(module_inputs, as_dict=True)
33 |     image_tf = paths_to_image_loader(list(paths))
34 | 
35 |     with tf.train.MonitoredSession() as sess:
36 |         results_dict = {}
37 |         for i in tqdm(range(len(paths))):
38 |             image_path = paths[i]
39 |             image = sess.run(image_tf)
40 |             results_dict[image_path] = sess.run(
41 |                 [module_outputs['locations'], module_outputs['descriptors']], feed_dict={image_placeholder: image})
42 |     return results_dict
43 | 
44 | 
45 | paths = [img_path for img_path in sorted(
46 |     glob.glob('static/dataset/*/*.[Jj][Pp][Gg]'))]
47 | rea_dict = generate_dataset_vectors(paths)
48 | 
49 | paths = list(rea_dict.keys())
50 | locations_agg = np.concatenate([rea_dict[img][0] for img in paths])
51 | descriptors_agg = np.concatenate([rea_dict[img][1] for img in paths])
52 | accumulated_indexes_boundaries = list(accumulate(
53 |     [rea_dict[img][0].shape[0] for img in paths]))
54 | 
55 | # Space name
56 | space_name = 'l2'
57 | # Intitialize the library, specify the space, the type of the vector and add data points
58 | index = nmslib.init(method='hnsw', space=space_name,
59 |                     data_type=nmslib.DataType.DENSE_VECTOR)
60 | index.addDataPointBatch(descriptors_agg)
61 | 
62 | 
63 | # Set index parameters
64 | # These are the most important onese
65 | # Create an index
66 | M = 15
67 | efC = 100
68 | num_threads = 4
69 | index_time_params = {'M': M, 'indexThreadQty': num_threads,
70 |                      'efConstruction': efC, 'post': 0}
71 | 
72 | index.createIndex(index_time_params)
73 | index.saveIndex('static/feature/feature_set.bin')
74 | pickle.dump(locations_agg, open('static/feature/locations_agg.pkl', 'wb'))
75 | pickle.dump(accumulated_indexes_boundaries, open(
76 |     'static/feature/accumulated_indexes_boundaries.pkl', 'wb'))
77 | pickle.dump(paths, open('static/feature/paths.pkl', 'wb'))
78 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | pandas
 3 | selenium
 4 | Flask
 5 | opencv_python
 6 | scipy
 7 | tensorflow_hub
 8 | scikit_image
 9 | matplotlib
10 | Werkzeug
11 | requests
12 | tqdm
13 | imutils
14 | tensorflow
15 | Pillow
16 | beautifulsoup4
17 | currency_converter
18 | skimage
19 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import werkzeug
 4 | import nmslib
 5 | from utils import find_close_books
 6 | 
 7 | # import Flask dependencies
 8 | from flask import Flask, request, render_template
 9 | 
10 | # Set root dir
11 | APP_ROOT = os.path.dirname(os.path.abspath(__file__))
12 | 
13 | with open("static/feature/locations_agg.pkl", 'rb') as f:
14 |     locations_agg = pickle.load(f)
15 | with open("static/feature/accumulated_indexes_boundaries.pkl", 'rb') as f:
16 |     accumulated_indexes_boundaries = pickle.load(f)
17 | with open("static/feature/paths.pkl", 'rb') as f:
18 |     paths = pickle.load(f)
19 | 
20 | space_name = 'l2'
21 | # Re-intitialize the library, specify the space, the type of the vector.
22 | newIndex = nmslib.init(method='hnsw', space=space_name,
23 |                        data_type=nmslib.DataType.DENSE_VECTOR)
24 | # For an optimized L2 index, there's no need to re-load data points, but this would be required for
25 | # non-optimized index or any other methods different from HNSW (other methods can save only meta indices)
26 | # newIndex.addDataPointBatch(data_matrix)
27 | 
28 | # Re-load the index and re-run queries
29 | newIndex.loadIndex('static/feature/feature_set.bin')
30 | 
31 | # Setting query-time parameters and querying
32 | efS = 100
33 | query_time_params = {'efSearch': efS}
34 | newIndex.setQueryTimeParams(query_time_params)
35 | 
36 | # Define Flask app
37 | app = Flask(__name__)
38 | 
39 | # Define apps home page
40 | 
41 | 
42 | @app.route("/", methods=['GET', 'POST'])
43 | def index():
44 |     if request.method == 'POST':
45 |         upload_dir = os.path.join(APP_ROOT, "static/uploads/")
46 |         if not os.path.isdir(upload_dir):
47 |             os.mkdir(upload_dir)
48 |         resized_dir = os.path.join(APP_ROOT, "static/resized/")
49 |         if not os.path.isdir(resized_dir):
50 |             os.mkdir(resized_dir)
51 |         imagefile = request.files['query_img']
52 |         filename = werkzeug.utils.secure_filename(imagefile.filename)
53 |         imagefile.save(upload_dir + filename)
54 |         # Perform the inference process on the uploaded image
55 |         results = find_close_books(upload_dir + filename, resized_dir +
56 |                                    filename, locations_agg, accumulated_indexes_boundaries, newIndex,)
57 |         book_covers = [(paths[result[0]], result[1]) for result in results]
58 |         return render_template('index.html', query_path='static/uploads/' + filename, cover_results=book_covers)
59 |     else:
60 |         return render_template("index.html")
61 | 
62 | 
63 | # Start the application
64 | if __name__ == "__main__":
65 |     app.run(host="0.0.0.0", port=5000, debug=True)


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000001.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000002.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000003.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000004.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000005.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000006.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000007.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000008.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000009.jpg


--------------------------------------------------------------------------------
/static/dataset/Art-Photography/0000010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HANyangguang/ECBIR/2e80a2f84523be69dd0383aa5c7e228e2014db39/static/dataset/Art-Photography/0000010.jpg


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |     <head><link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"></head>
 4 |     <body>
 5 |         <div class="container">
 6 | 	    <h2><b>Exact</b> Content Based Image Retrieval with Deep Learning</h2>
 7 | 	    <br />
 8 |             <form method="POST" enctype="multipart/form-data">
 9 |                 <input type="file" name="query_img"><br>
10 |                 <input type="submit">
11 |             </form>
12 |             <h3>Query:</h3>
13 |             {% if query_path %}
14 |             <img src="{{ query_path }}" width="250px">
15 |             {% endif %}
16 |             <h3>Results:</h3>
17 |             {% for cover in cover_results %}
18 |             <figure style="float: left; margin-right: 20px; margin-bottom: 20px;">
19 |                 <img src="{{ cover[0] }}" height="200px">
20 | 		<figcaption><p>{{ cover[1] }}</p></figcaption>
21 |             </figure>
22 |             {% endfor %}
23 |         </div>
24 |     </body>
25 | </html>
26 | 
27 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # For DELF loading
  4 | import tensorflow as tf
  5 | import tensorflow_hub as hub
  6 | 
  7 | # Image dependencies
  8 | from PIL import Image, ImageOps
  9 | 
 10 | # Remove bad results with Random Sampling
 11 | from skimage.measure import ransac
 12 | from skimage.transform import AffineTransform
 13 | 
 14 | # For Inlines calculation
 15 | from itertools import accumulate
 16 | 
 17 | 
 18 | # Create the TensorFlow graph object
 19 | g = tf.Graph()
 20 | 
 21 | # Load the DELF model from the TF Hub
 22 | with g.as_default():
 23 |     model = hub.Module('https://tfhub.dev/google/delf/1')
 24 | 
 25 |     # Define image placeholder that supports RGB images of any sizes
 26 |     image_placeholder = tf.placeholder(
 27 |         tf.float32, shape=(None, None, 3), name='input_image')
 28 |     module_inputs = {
 29 |         'image': image_placeholder,
 30 |         'score_threshold': 100.0,
 31 |         # these scales are taken form DELF paper
 32 |         'image_scales': [0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0],
 33 |         'max_feature_num': 1000,
 34 |     }
 35 |     module_outputs = model(module_inputs, as_dict=True)
 36 | 
 37 | 
 38 | def paths_to_image_loader(image_files):
 39 |     '''
 40 |     This functions loads one by one image from the folder.
 41 |     '''
 42 |     # Creates Queue from the list of paths
 43 |     filename_queue = tf.train.string_input_producer(image_files, shuffle=False)
 44 | 
 45 |     # Create file reader
 46 |     reader = tf.WholeFileReader()
 47 | 
 48 |     # Read queued files
 49 |     _, value = reader.read(filename_queue)
 50 | 
 51 |     # Conver loaded binary to the JPEG image in the RGB format
 52 |     image_tf = tf.image.decode_jpeg(value, channels=3)
 53 | 
 54 |     # Cast pixesl from int to float32 and return it
 55 |     return tf.image.convert_image_dtype(image_tf, tf.float32)
 56 | 
 57 | 
 58 | def resize_image(srcfile, destfile, new_width=128, new_height=128):
 59 |     '''
 60 |     Helper function that resizes an image to a specified new width and height.
 61 |     srcfile - String, path to the original image
 62 |     destfile - String, path where the resized image will be saved
 63 |     new_width - Integer
 64 |     new_height - Integer
 65 |     '''
 66 | 
 67 |     # Opens the original image
 68 |     pil_image = Image.open(srcfile)
 69 |     # resizes the original image and convers it back to RGB format
 70 |     pil_image = ImageOps.fit(
 71 |         pil_image, (new_width, new_height), Image.ANTIALIAS)
 72 |     pil_image_rgb = pil_image.convert('RGB')
 73 | 
 74 |     # Saves the resized image to the disk
 75 |     pil_image_rgb.save(destfile, format='JPEG', quality=90)
 76 |     return destfile
 77 | 
 78 | 
 79 | def query_image_features_generator(image_path):
 80 |     '''
 81 |     This function takes newly uploaded image (Query image) and generates features with the pre-trained model for that image.
 82 |     '''
 83 |     with tf.Session(graph=g) as s:
 84 |         # prepairs the image for the model
 85 |         image_tf = paths_to_image_loader([image_path])
 86 | 
 87 |         with tf.train.MonitoredSession() as sess:
 88 |             image = sess.run(image_tf)
 89 |             print('Extracting locations and descriptors from %s' % image_path)
 90 |             # Generate features
 91 |             return sess.run([module_outputs['locations'], module_outputs['descriptors']], feed_dict={image_placeholder: image})
 92 | 
 93 | 
 94 | def image_index_2_accumulated_indexes(index, accumulated_indexes_boundaries):
 95 |     '''
 96 |     Image index to accumulated/aggregated locations/descriptors pair indexes.
 97 |     '''
 98 |     if index > len(accumulated_indexes_boundaries) - 1:
 99 |         return None
100 |     accumulated_index_start = None
101 |     accumulated_index_end = None
102 |     if index == 0:
103 |         accumulated_index_start = 0
104 |         accumulated_index_end = accumulated_indexes_boundaries[index]
105 |     else:
106 |         accumulated_index_start = accumulated_indexes_boundaries[index-1]
107 |         accumulated_index_end = accumulated_indexes_boundaries[index]
108 |     return np.arange(accumulated_index_start, accumulated_index_end)
109 | 
110 | 
111 | def get_locations_2_use(image_db_index,
112 |                         k_nearest_indices,
113 |                         accumulated_indexes_boundaries,
114 |                         query_image_locations,
115 |                         locations_agg):
116 |     '''
117 |     Get a pair of locations to use, the query image to the database image with given index.
118 |     Return: a tuple of 2 numpy arrays, the locations pair.
119 |     '''
120 |     image_accumulated_indexes = image_index_2_accumulated_indexes(
121 |         image_db_index, accumulated_indexes_boundaries)
122 |     locations_2_use_query = []
123 |     locations_2_use_db = []
124 |     for i, row in enumerate(k_nearest_indices):
125 |         for acc_index in row:
126 |             if acc_index in image_accumulated_indexes:
127 |                 locations_2_use_query.append(query_image_locations[i])
128 |                 locations_2_use_db.append(locations_agg[acc_index])
129 |                 break
130 | 
131 |     return np.array(locations_2_use_query), np.array(locations_2_use_db)
132 | 
133 | 
134 | def query_image_pipeline(image_path, save_path):
135 |     # Model inference wrapper. This function queries the model with the Query image and generates its features.
136 |     # image_path - String, path to the Query image
137 |     # save_path - String, path where the Query image is saved
138 |     resized_image = resize_image(image_path, save_path)
139 |     query_image_locations, query_image_descriptors = query_image_features_generator(
140 |         resized_image,)
141 |     return query_image_locations, query_image_descriptors, resized_image
142 | 
143 | 
144 | def find_close_books(image_path,
145 |                      save_path,
146 |                      locations_agg,
147 |                      accumulated_indexes_boundaries,
148 |                      newIndex,
149 |                      k_neighbors=10,
150 |                      top_n=10):
151 |     '''
152 |     The inference function. Call this function to get top n close images based on the query image;
153 |     image_path - String, path to the Query image
154 |     save_path - String, path where the Query image is saved
155 |     image_database - features generated based on the dataset
156 |     paths - Numpy array, paths to database images (used to display closesest images)
157 |     distance_threshold - Float, How similar two nodes should be in the KD tree
158 |     k_neighbors -  Integer, How many neighbors is looked at in the KD tree
159 |     top_n - Integer, How many similar images is retrieved
160 |     '''
161 | 
162 |     query_image_locations, query_image_descriptors, resized_image = query_image_pipeline(image_path,
163 |                                                                                          save_path,
164 |                                                                                          )
165 |     # Query
166 |     list_ids_distances = newIndex.knnQueryBatch(
167 |         query_image_descriptors, k=k_neighbors, num_threads=4)
168 |     indices = np.array([item[0] for item in list_ids_distances])
169 |     #distances = [item[1] for item in list_ids_distances]
170 | 
171 |     # Find the list of unique accumulated/aggregated indexes
172 |     unique_indices = np.array(list(set(indices.flatten())))
173 |     unique_indices.sort()
174 |     if unique_indices[-1] == locations_agg.shape[0]:
175 |         unique_indices = unique_indices[:-1]
176 | 
177 |     unique_image_indexes = np.array(list(set([np.argmax(
178 |         [np.array(accumulated_indexes_boundaries) > index]) for index in unique_indices])))
179 |     inliers_counts = []
180 | 
181 |     for index in unique_image_indexes:
182 |         locations_2_use_query, locations_2_use_db = get_locations_2_use(
183 |             index, indices, accumulated_indexes_boundaries, query_image_locations, locations_agg)
184 | 
185 |         # Perform geometric verification using RANSAC.
186 |         try:
187 |             _, inliers = ransac((locations_2_use_db, locations_2_use_query),  # source and destination coordinates
188 |                                 AffineTransform,
189 |                                 min_samples=3,
190 |                                 residual_threshold=20,
191 |                                 max_trials=1000)
192 | 
193 |             # If no inlier is found for a database candidate image, we continue on to the next one.
194 |             if inliers is None or len(inliers) == 0:
195 |                 continue
196 |         except:
197 |             continue
198 | 
199 |         # the number of inliers as the score for retrieved images.
200 |         inliers_counts.append({"index": index, "inliers": sum(inliers)})
201 | 
202 |     list_of_dict_idx_and_inliers = sorted(
203 |         inliers_counts, key=lambda k: k['inliers'], reverse=True)[:top_n]
204 |     result = [(item["index"], item["inliers"])
205 |               for item in list_of_dict_idx_and_inliers]
206 |     return result
207 | 


--------------------------------------------------------------------------------