├── .dockerignore ├── .editorconfig ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── benchmark.py ├── docker-compose.yml ├── rc.yml ├── resources ├── logo.png └── logo.sketch ├── server.py └── wait-for-it.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | *.md 2 | *.pyc 3 | resources 4 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | charset = utf-8 10 | 11 | [*.{md,py}] 12 | max_line_length = 79 13 | 14 | # 4 space indentation 15 | [*.py] 16 | indent_style = space 17 | indent_size = 4 18 | trim_trailing_whitespace = true 19 | insert_final_newline = true 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | image_match/web/static/tmp/ 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | MAINTAINER Alex Kern 3 | 4 | RUN apt-get update && \ 5 | apt-get install -y libopenblas-dev gfortran && \ 6 | pip install numpy==1.12.1 && \ 7 | pip install scipy==0.19.0 && \ 8 | pip install gunicorn==19.7.1 && \ 9 | pip install flask==0.12.2 && \ 10 | pip install image-match==1.1.2 && \ 11 | pip install 'elasticsearch>=6.0.0,<7.0.0' && \ 12 | rm -rf /var/lib/apt/lists/* 13 | 14 | COPY server.py wait-for-it.sh / 15 | 16 | EXPOSE 80 17 | ENV PORT=80 \ 18 | WORKER_COUNT=4 \ 19 | ELASTICSEARCH_URL=elasticsearch:9200 \ 20 | ELASTICSEARCH_INDEX=images \ 21 | ELASTICSEARCH_DOC_TYPE=images \ 22 | ALL_ORIENTATIONS=true 23 | 24 | CMD gunicorn \ 25 | -t 60 \ 26 | --access-logfile - \ 27 | --access-logformat '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" - %(D)s' \ 28 | -b 0.0.0.0:${PORT} \ 29 | -w ${WORKER_COUNT} \ 30 | server:app 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Distributed Systems, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | 14 | =============================================================================== 15 | image_match license 16 | 17 | # Code Licenses 18 | 19 | All code is licensed under the Apache License, Version 2.0, the full text of which can be found at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0). 20 | 21 | # Documentation Licenses 22 | 23 | The official image-match documentation, _except for the short code snippets embedded within it_, is licensed under a Creative Commons Attribution-ShareAlike 4.0 International license, the full text of which can be found at [http://creativecommons.org/licenses/by-sa/4.0/legalcode](http://creativecommons.org/licenses/by-sa/4.0/legalcode). 24 | 25 | =============================================================================== 26 | wait-for-it.sh license 27 | 28 | The MIT License (MIT) 29 | Copyright (c) 2016 Giles Hall 30 | 31 | Permission is hereby granted, free of charge, to any person obtaining a copy of 32 | this software and associated documentation files (the "Software"), to deal in 33 | the Software without restriction, including without limitation the rights to 34 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 35 | of the Software, and to permit persons to whom the Software is furnished to do 36 | so, subject to the following conditions: 37 | 38 | The above copyright notice and this permission notice shall be included in all 39 | copies or substantial portions of the Software. 40 | 41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 47 | SOFTWARE. 48 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all build push run dev 2 | 3 | DOCKER_TAG ?= dsys/match:latest 4 | 5 | export PORT ?= 8888 6 | export ELASTICSEARCH_URL ?= elasticsearch:9200 7 | export ELASTICSEARCH_INDEX ?= images 8 | export ELASTICSEARCH_DOC_TYPE ?= images 9 | 10 | all: run 11 | 12 | build: 13 | docker build -t $(DOCKER_TAG) . 14 | 15 | push: build 16 | docker push $(DOCKER_TAG) 17 | 18 | run: build 19 | docker run \ 20 | -e PORT \ 21 | -e ELASTICSEARCH_URL \ 22 | -e ELASTICSEARCH_INDEX \ 23 | -e ELASTICSEARCH_DOC_TYPE \ 24 | -p $(PORT):$(PORT) \ 25 | -it $(DOCKER_TAG) 26 | 27 | dev: build 28 | docker-compose up 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

logo

2 | 3 |

Scalable reverse image search
built on Kubernetes and Elasticsearch

4 | 5 |

GitHub stars Docker Pulls Kubernetes shield

6 | 7 | **Match** makes it easy to search for images that look similar to each other. Using a state-of-the-art perceptual hash, it is invariant to scaling and 90 degree rotations. Its HTTP API is quick to integrate and flexible for a number of reverse image search applications. Kubernetes and Elasticsearch allow Match to scale to billions of images with ease while giving you full control over where your data is stored. Match uses the awesome [ascribe/image-match](https://github.com/ascribe/image-match) under the hood for most of the image search legwork. 8 | 9 | 1. [Getting Started](#getting-started) 10 | 2. [API](#api) 11 | 3. [Development](#development) 12 | 4. [License and Acknowledgements](#license-and-acknowledgements) 13 | 14 | ## Getting Started 15 | 16 | If you already have ElasticSearch running: 17 | ``` 18 | $ docker run -e ELASTICSEARCH_URL=https://daisy.us-west-1.es.amazonaws.com -it dsys/match 19 | ``` 20 | 21 | If you want to run ElasticSearch locally as well, have [`docker-compose`](https://docs.docker.com/compose/) installed on your system, clone this repository and type: 22 | ``` 23 | $ make dev 24 | ``` 25 | 26 | Match is packaged as a Docker container ([dsys/match](https://hub.docker.com/r/dsys/match/) on Docker Hub), making it highly portable and scalable to billions of images. You can configure a few options using environment variables: 27 | 28 | * **WORKER_COUNT** *(default: `4`)* 29 | 30 | The number of gunicorn workers to spin up. 31 | 32 | * **ELASTICSEARCH_URL** *(default: `elasticsearch:9200`)* 33 | 34 | A URL pointing to the Elasticsearch database where image signatures are to be stored. If you don't want to host your own Elasticsearch cluster, consider using [AWS Elasticsearch Service](https://aws.amazon.com/elasticsearch-service/). That's what we use. 35 | 36 | * **ELASTICSEARCH_INDEX** *(default: images)* 37 | 38 | The index in the Elasticsearch database where image signatures are to be stored. 39 | 40 | * **ELASTICSEARCH_DOC_TYPE** *(default: images)* 41 | 42 | The doc type used for storing image signatures. 43 | 44 | 45 | ### Using in your own Kubernetes cluster 46 | 47 | You can configure the service, replication controller, and secret like so: 48 | 49 | ```yaml 50 | # match-service.yml 51 | apiVersion: v1 52 | kind: Service 53 | metadata: 54 | name: match 55 | spec: 56 | ports: 57 | - name: http 58 | port: 80 59 | protocol: TCP 60 | selector: 61 | app: match 62 | ``` 63 | 64 | ```yaml 65 | # match-rc.yml 66 | apiVersion: v1 67 | kind: ReplicationController 68 | metadata: 69 | name: match 70 | spec: 71 | replicas: 1 72 | selector: 73 | app: match 74 | template: 75 | metadata: 76 | labels: 77 | app: match 78 | spec: 79 | containers: 80 | - name: match 81 | image: dsys/match:latest 82 | ports: 83 | - containerPort: 80 84 | env: 85 | - name: WORKER_COUNT 86 | value: "4" 87 | - name: ELASTICSEARCH_URL 88 | valueFrom: 89 | secretKeyRef: 90 | name: match 91 | key: elasticsearch.url 92 | - name: ELASTICSEARCH_INDEX 93 | valueFrom: 94 | secretKeyRef: 95 | name: match 96 | key: elasticsearch.index 97 | - name: ELASTICSEARCH_DOC_TYPE 98 | valueFrom: 99 | secretKeyRef: 100 | name: match 101 | key: elasticsearch.doc-type 102 | ``` 103 | 104 | ```yaml 105 | # match-secret.yml 106 | apiVersion: v1 107 | kind: Secret 108 | metadata: 109 | name: match 110 | data: 111 | # https://daisy.us-west-1.es.amazonaws.com (change me) 112 | elasticsearch.url: aHR0cHM6Ly9kYWlzeS51cy13ZXN0LTEuZXMuYW1hem9uYXdzLmNvbQ== 113 | 114 | # images 115 | elasticsearch.index: aW1hZ2Vz 116 | 117 | # images 118 | elasticsearch.doc-type: aW1hZ2Vz 119 | ``` 120 | 121 | ## API 122 | 123 | Match has a simple HTTP API. All request parameters are specified via `application/x-www-form-urlencoded` or `multipart/form-data`. 124 | 125 | * [POST `/add`](#post-add) 126 | * [DELETE `/delete`](#delete-delete) 127 | * [POST `/search`](#post-search) 128 | * [POST `/compare`](#post-compare) 129 | * [GET `/count`](#get-count) 130 | * [GET `/list`](#get-list) 131 | * [GET `/ping`](#get-ping) 132 | 133 | --- 134 | 135 | ### POST `/add` 136 | 137 | Adds an image signature to the database. 138 | 139 | #### Parameters 140 | 141 | * **url** or **image** *(required)* 142 | 143 | The image to add to the database. It may be provided as a URL via `url` or as a `multipart/form-data` file upload via `image`. 144 | 145 | * **filepath** *(required)* 146 | 147 | The path to save the image to in the database. If another image already exists at the given path, it will be overwritten. 148 | 149 | * **metadata** *(default: None)* 150 | 151 | An arbitrary JSON object featuring meta data to attach to the image. 152 | 153 | #### Example Response 154 | 155 | ```json 156 | { 157 | "status": "ok", 158 | "error": [], 159 | "method": "add", 160 | "result": [] 161 | } 162 | ``` 163 | 164 | --- 165 | 166 | ### DELETE `/delete` 167 | 168 | Deletes an image signature from the database. 169 | 170 | #### Parameters 171 | 172 | * **filepath** *(required)* 173 | 174 | The path of the image signature in the database. 175 | 176 | #### Example Response 177 | 178 | ```json 179 | { 180 | "status": "ok", 181 | "error": [], 182 | "method": "delete", 183 | "result": [] 184 | } 185 | ``` 186 | 187 | --- 188 | 189 | ### POST `/search` 190 | 191 | Searches for a similar image in the database. Scores range from 0 to 100, with 100 being a perfect match. 192 | 193 | #### Parameters 194 | 195 | * **url** or **image** *(required)* 196 | 197 | The image to add to the database. It may be provided as a URL via `url` or as a `multipart/form-data` file upload via `image`. 198 | 199 | * **all_orientations** *(default: true)* 200 | 201 | Whether or not to search for similar 90 degree rotations of the image. 202 | 203 | #### Example Response 204 | 205 | ```json 206 | { 207 | "status": "ok", 208 | "error": [], 209 | "method": "search", 210 | "result": [ 211 | { 212 | "score": 99.0, 213 | "filepath": "http://static.wixstatic.com/media/0149b5_345c8f862e914a80bcfcc98fcd432e97.jpg_srz_614_709_85_22_0.50_1.20_0.00_jpg_srz" 214 | } 215 | ] 216 | } 217 | ``` 218 | 219 | --- 220 | 221 | ### POST `/compare` 222 | 223 | Compares two images, returning a score for their similarity. Scores range from 0 to 100, with 100 being a perfect match. 224 | 225 | #### Parameters 226 | 227 | * **url1** or **image1**, **url2** or **image2** *(required)* 228 | 229 | The images to compare. They may be provided as a URL via `url1`/`url2` or as a `multipart/form-data` file upload via `image1`/`image2`. 230 | 231 | #### Example Response 232 | 233 | ```json 234 | { 235 | "status": "ok", 236 | "error": [], 237 | "method": "compare", 238 | "result": [ 239 | { 240 | "score": 99.0 241 | } 242 | ] 243 | } 244 | ``` 245 | 246 | --- 247 | 248 | ### GET `/count` 249 | 250 | Count the number of image signatures in the database. 251 | 252 | #### Example Response 253 | 254 | ```json 255 | { 256 | "status": "ok", 257 | "error": [], 258 | "method": "list", 259 | "result": [420] 260 | } 261 | ``` 262 | 263 | --- 264 | 265 | ### GET `/list` 266 | 267 | Lists the file paths for the image signatures in the database. 268 | 269 | #### Parameters 270 | 271 | * **offset** *(default: 0)* 272 | 273 | The location in the database to begin listing image paths. 274 | 275 | * **limit** *(default: 20)* 276 | 277 | The number of image paths to retrieve. 278 | 279 | #### Example Response 280 | 281 | ```json 282 | { 283 | "status": "ok", 284 | "error": [], 285 | "method": "list", 286 | "result": [ 287 | "http://img.youtube.com/vi/iqPqylKy-bY/0.jpg", 288 | "https://i.ytimg.com/vi/zbjIwBggt2k/hqdefault.jpg", 289 | "https://s-media-cache-ak0.pinimg.com/736x/3d/67/6d/3d676d3f7f3031c9fd91c10b17d56afe.jpg" 290 | ] 291 | } 292 | ``` 293 | 294 | --- 295 | 296 | ### GET `/ping` 297 | 298 | Check for the health of the server. 299 | 300 | #### Example Response 301 | 302 | ```json 303 | { 304 | "status": "ok", 305 | "error": [], 306 | "method": "ping", 307 | "result": [] 308 | } 309 | ``` 310 | 311 | ## Development 312 | 313 | $ export ELASTICSEARCH_URL=https://daisy.us-west-1.es.amazonaws.com 314 | $ make build 315 | $ make run 316 | $ make push 317 | 318 | ## License and Acknowledgements 319 | 320 | Match is based on [ascribe/image-match](https://github.com/ascribe/image-match), which is in turn based on the paper [_An image signature for any kind of image_, Goldberg et al](http://www.cs.cmu.edu/~hcwong/Pdfs/icip02.ps). There is an existing [reference implementation](https://www.pureftpd.org/project/libpuzzle) which may be more suited to your needs. 321 | 322 | Match itself is released under the [BSD 3-Clause license](https://github.com/dsys/match/blob/master/LICENSE). `ascribe/image-match` is released under the Apache 2.0 license. 323 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | import argparse 3 | import glob 4 | import random 5 | import requests 6 | import sys 7 | import time 8 | import uuid 9 | 10 | parser = argparse.ArgumentParser(description='Benchmark Match.') 11 | parser.add_argument('images_dir', metavar='IMAGES_DIR', type=str, help='directory with images to test') 12 | parser.add_argument('-u', dest='url', default='http://localhost:8888', type=str, help='the URL of Match') 13 | parser.add_argument('-i', dest='iterations', default=1000, type=int, help='number of iterations during the benchmark') 14 | parser.add_argument('-c', dest='concurrency', default=10, type=int, help='concurrency of requests during the benchmark') 15 | args = parser.parse_args() 16 | 17 | images = glob.glob(args.images_dir + '/*') 18 | print('benchmarking with {} images, {} iterations, {} concurrency'.format(len(images), args.iterations, args.concurrency)) 19 | 20 | def run(i): 21 | start = time.time() 22 | img = random.choice(images) 23 | filepath = uuid.uuid4() 24 | requests.post(args.url + '/add', files={'image': open(img, 'r')}, data={'filepath': filepath}) 25 | requests.post(args.url + '/search', files={'image': open(img, 'r')}) 26 | end = time.time() 27 | print('elapsed: {}'.format(end - start)) 28 | 29 | if __name__ == '__main__': 30 | total_start = time.time() 31 | executor = ThreadPoolExecutor(max_workers=args.concurrency) 32 | 33 | try: 34 | for x in executor.map(run, range(args.iterations)): pass 35 | except KeyboardInterrupt: 36 | print('shutting down...') 37 | executor.shutdown() 38 | 39 | total_end = time.time() 40 | total_elapsed = total_end - total_start 41 | avg_elapsed = total_elapsed / args.iterations 42 | print('total elapsed: {}, avg elapsed: {}'.format(total_elapsed, avg_elapsed)) 43 | sys.exit(0) 44 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | match: 4 | image: dsys/match:latest 5 | ports: 6 | - 127.0.0.1:8888:8888 7 | command: ["/wait-for-it.sh", "-t", "60", "elasticsearch:9200", "--", "gunicorn", "-b", "0.0.0.0:8888", "-w", "4", "server:app"] 8 | links: 9 | - elasticsearch 10 | elasticsearch: 11 | image: elasticsearch:6.4.2 12 | ports: 13 | - 127.0.0.1:9200:9200 14 | -------------------------------------------------------------------------------- /rc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ReplicationController 3 | metadata: 4 | name: match 5 | spec: 6 | replicas: 1 7 | selector: 8 | app: match 9 | template: 10 | metadata: 11 | labels: 12 | app: match 13 | spec: 14 | containers: 15 | - name: match 16 | image: dsys/match:latest 17 | ports: 18 | - containerPort: 80 19 | env: 20 | - name: WORKER_COUNT 21 | value: "4" 22 | - name: ELASTICSEARCH_URL 23 | valueFrom: 24 | secretKeyRef: 25 | name: match 26 | key: elasticsearch.url 27 | - name: ELASTICSEARCH_INDEX 28 | valueFrom: 29 | secretKeyRef: 30 | name: match 31 | key: elasticsearch.index 32 | - name: ELASTICSEARCH_DOC_TYPE 33 | valueFrom: 34 | secretKeyRef: 35 | name: match 36 | key: elasticsearch.doc-type 37 | -------------------------------------------------------------------------------- /resources/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsys/match/e66c3b5d215915533dad2fcc5b6fd7181fd05625/resources/logo.png -------------------------------------------------------------------------------- /resources/logo.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsys/match/e66c3b5d215915533dad2fcc5b6fd7181fd05625/resources/logo.sketch -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | from elasticsearch import Elasticsearch 2 | from flask import Flask, request 3 | from image_match.elasticsearch_driver import SignatureES 4 | from image_match.goldberg import ImageSignature 5 | import json 6 | import os 7 | import sys 8 | 9 | # ============================================================================= 10 | # Globals 11 | 12 | es_url = os.environ['ELASTICSEARCH_URL'] 13 | es_index = os.environ['ELASTICSEARCH_INDEX'] 14 | es_doc_type = os.environ['ELASTICSEARCH_DOC_TYPE'] 15 | all_orientations = os.environ['ALL_ORIENTATIONS'] 16 | 17 | app = Flask(__name__) 18 | es = Elasticsearch([es_url], verify_certs=True, timeout=60, max_retries=10, retry_on_timeout=True) 19 | ses = SignatureES(es, index=es_index, doc_type=es_doc_type) 20 | gis = ImageSignature() 21 | 22 | # Try to create the index and ignore IndexAlreadyExistsException 23 | # if the index already exists 24 | es.indices.create(index=es_index, ignore=400) 25 | 26 | # ============================================================================= 27 | # Helpers 28 | 29 | def ids_with_path(path): 30 | matches = es.search(index=es_index, 31 | _source='_id', 32 | q='path:' + json.dumps(path)) 33 | return [m['_id'] for m in matches['hits']['hits']] 34 | 35 | def paths_at_location(offset, limit): 36 | search = es.search(index=es_index, 37 | from_=offset, 38 | size=limit, 39 | _source='path') 40 | return [h['_source']['path'] for h in search['hits']['hits']] 41 | 42 | def count_images(): 43 | return es.count(index=es_index)['count'] 44 | 45 | def delete_ids(ids): 46 | for i in ids: 47 | es.delete(index=es_index, doc_type=es_doc_type, id=i, ignore=404) 48 | 49 | def dist_to_percent(dist): 50 | return (1 - dist) * 100 51 | 52 | def get_image(url_field, file_field): 53 | if url_field in request.form: 54 | return request.form[url_field], False 55 | else: 56 | return request.files[file_field].read(), True 57 | 58 | # ============================================================================= 59 | # Routes 60 | 61 | @app.route('/add', methods=['POST']) 62 | def add_handler(): 63 | path = request.form['filepath'] 64 | try: 65 | metadata = json.loads(request.form['metadata']) 66 | except KeyError: 67 | metadata = None 68 | img, bs = get_image('url', 'image') 69 | 70 | old_ids = ids_with_path(path) 71 | ses.add_image(path, img, bytestream=bs, metadata=metadata) 72 | delete_ids(old_ids) 73 | 74 | return json.dumps({ 75 | 'status': 'ok', 76 | 'error': [], 77 | 'method': 'add', 78 | 'result': [] 79 | }) 80 | 81 | @app.route('/delete', methods=['DELETE']) 82 | def delete_handler(): 83 | path = request.form['filepath'] 84 | ids = ids_with_path(path) 85 | delete_ids(ids) 86 | return json.dumps({ 87 | 'status': 'ok', 88 | 'error': [], 89 | 'method': 'delete', 90 | 'result': [] 91 | }) 92 | 93 | @app.route('/search', methods=['POST']) 94 | def search_handler(): 95 | img, bs = get_image('url', 'image') 96 | ao = request.form.get('all_orientations', all_orientations) == 'true' 97 | 98 | matches = ses.search_image( 99 | path=img, 100 | all_orientations=ao, 101 | bytestream=bs) 102 | 103 | return json.dumps({ 104 | 'status': 'ok', 105 | 'error': [], 106 | 'method': 'search', 107 | 'result': [{ 108 | 'score': dist_to_percent(m['dist']), 109 | 'filepath': m['path'], 110 | 'metadata': m['metadata'] 111 | } for m in matches] 112 | }) 113 | 114 | @app.route('/compare', methods=['POST']) 115 | def compare_handler(): 116 | img1, bs1 = get_image('url1', 'image1') 117 | img2, bs2 = get_image('url2', 'image2') 118 | img1_sig = gis.generate_signature(img1, bytestream=bs1) 119 | img2_sig = gis.generate_signature(img2, bytestream=bs2) 120 | score = dist_to_percent(gis.normalized_distance(img1_sig, img2_sig)) 121 | 122 | return json.dumps({ 123 | 'status': 'ok', 124 | 'error': [], 125 | 'method': 'compare', 126 | 'result': [{ 'score': score }] 127 | }) 128 | 129 | @app.route('/count', methods=['GET', 'POST']) 130 | def count_handler(): 131 | count = count_images() 132 | return json.dumps({ 133 | 'status': 'ok', 134 | 'error': [], 135 | 'method': 'count', 136 | 'result': [count] 137 | }) 138 | 139 | @app.route('/list', methods=['GET', 'POST']) 140 | def list_handler(): 141 | if request.method == 'GET': 142 | offset = max(int(request.args.get('offset', 0)), 0) 143 | limit = max(int(request.args.get('limit', 20)), 0) 144 | else: 145 | offset = max(int(request.form.get('offset', 0)), 0) 146 | limit = max(int(request.form.get('limit', 20)), 0) 147 | paths = paths_at_location(offset, limit) 148 | 149 | return json.dumps({ 150 | 'status': 'ok', 151 | 'error': [], 152 | 'method': 'list', 153 | 'result': paths 154 | }) 155 | 156 | @app.route('/ping', methods=['GET', 'POST']) 157 | def ping_handler(): 158 | return json.dumps({ 159 | 'status': 'ok', 160 | 'error': [], 161 | 'method': 'ping', 162 | 'result': [] 163 | }) 164 | 165 | # ============================================================================= 166 | # Error Handling 167 | 168 | @app.errorhandler(400) 169 | def bad_request(e): 170 | return json.dumps({ 171 | 'status': 'fail', 172 | 'error': ['bad request'], 173 | 'method': '', 174 | 'result': [] 175 | }), 400 176 | 177 | @app.errorhandler(404) 178 | def page_not_found(e): 179 | return json.dumps({ 180 | 'status': 'fail', 181 | 'error': ['not found'], 182 | 'method': '', 183 | 'result': [] 184 | }), 404 185 | 186 | @app.errorhandler(405) 187 | def method_not_allowed(e): 188 | return json.dumps({ 189 | 'status': 'fail', 190 | 'error': ['method not allowed'], 191 | 'method': '', 192 | 'result': [] 193 | }), 405 194 | 195 | @app.errorhandler(500) 196 | def server_error(e): 197 | return json.dumps({ 198 | 'status': 'fail', 199 | 'error': [str(e)], 200 | 'method': '', 201 | 'result': [] 202 | }), 500 203 | -------------------------------------------------------------------------------- /wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | 4 | # Thanks @visnubob! 5 | # 6 | # https://github.com/vishnubob/wait-for-it 7 | 8 | cmdname=$(basename $0) 9 | 10 | echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 11 | 12 | usage() 13 | { 14 | cat << USAGE >&2 15 | Usage: 16 | $cmdname host:port [-s] [-t timeout] [-- command args] 17 | -h HOST | --host=HOST Host or IP under test 18 | -p PORT | --port=PORT TCP port under test 19 | Alternatively, you specify the host and port as host:port 20 | -s | --strict Only execute subcommand if the test succeeds 21 | -q | --quiet Don't output any status messages 22 | -t TIMEOUT | --timeout=TIMEOUT 23 | Timeout in seconds, zero for no timeout 24 | -- COMMAND ARGS Execute command with args after the test finishes 25 | USAGE 26 | exit 1 27 | } 28 | 29 | wait_for() 30 | { 31 | if [[ $TIMEOUT -gt 0 ]]; then 32 | echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" 33 | else 34 | echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" 35 | fi 36 | start_ts=$(date +%s) 37 | while : 38 | do 39 | if [[ $ISBUSY -eq 1 ]]; then 40 | nc -z $HOST $PORT 41 | result=$? 42 | else 43 | (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 44 | result=$? 45 | fi 46 | if [[ $result -eq 0 ]]; then 47 | end_ts=$(date +%s) 48 | echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" 49 | break 50 | fi 51 | sleep 1 52 | done 53 | return $result 54 | } 55 | 56 | wait_for_wrapper() 57 | { 58 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 59 | if [[ $QUIET -eq 1 ]]; then 60 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 61 | else 62 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 63 | fi 64 | PID=$! 65 | trap "kill -INT -$PID" INT 66 | wait $PID 67 | RESULT=$? 68 | if [[ $RESULT -ne 0 ]]; then 69 | echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" 70 | fi 71 | return $RESULT 72 | } 73 | 74 | # process arguments 75 | while [[ $# -gt 0 ]] 76 | do 77 | case "$1" in 78 | *:* ) 79 | hostport=(${1//:/ }) 80 | HOST=${hostport[0]} 81 | PORT=${hostport[1]} 82 | shift 1 83 | ;; 84 | --child) 85 | CHILD=1 86 | shift 1 87 | ;; 88 | -q | --quiet) 89 | QUIET=1 90 | shift 1 91 | ;; 92 | -s | --strict) 93 | STRICT=1 94 | shift 1 95 | ;; 96 | -h) 97 | HOST="$2" 98 | if [[ $HOST == "" ]]; then break; fi 99 | shift 2 100 | ;; 101 | --host=*) 102 | HOST="${1#*=}" 103 | shift 1 104 | ;; 105 | -p) 106 | PORT="$2" 107 | if [[ $PORT == "" ]]; then break; fi 108 | shift 2 109 | ;; 110 | --port=*) 111 | PORT="${1#*=}" 112 | shift 1 113 | ;; 114 | -t) 115 | TIMEOUT="$2" 116 | if [[ $TIMEOUT == "" ]]; then break; fi 117 | shift 2 118 | ;; 119 | --timeout=*) 120 | TIMEOUT="${1#*=}" 121 | shift 1 122 | ;; 123 | --) 124 | shift 125 | CLI="$@" 126 | break 127 | ;; 128 | --help) 129 | usage 130 | ;; 131 | *) 132 | echoerr "Unknown argument: $1" 133 | usage 134 | ;; 135 | esac 136 | done 137 | 138 | if [[ "$HOST" == "" || "$PORT" == "" ]]; then 139 | echoerr "Error: you need to provide a host and port to test." 140 | usage 141 | fi 142 | 143 | TIMEOUT=${TIMEOUT:-15} 144 | STRICT=${STRICT:-0} 145 | CHILD=${CHILD:-0} 146 | QUIET=${QUIET:-0} 147 | 148 | # check to see if timeout is from busybox? 149 | # check to see if timeout is from busybox? 150 | TIMEOUT_PATH=$(realpath $(which timeout)) 151 | if [[ $TIMEOUT_PATH =~ "busybox" ]]; then 152 | ISBUSY=1 153 | BUSYTIMEFLAG="-t" 154 | else 155 | ISBUSY=0 156 | BUSYTIMEFLAG="" 157 | fi 158 | 159 | if [[ $CHILD -gt 0 ]]; then 160 | wait_for 161 | RESULT=$? 162 | exit $RESULT 163 | else 164 | if [[ $TIMEOUT -gt 0 ]]; then 165 | wait_for_wrapper 166 | RESULT=$? 167 | else 168 | wait_for 169 | RESULT=$? 170 | fi 171 | fi 172 | 173 | if [[ $CLI != "" ]]; then 174 | if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then 175 | echoerr "$cmdname: strict mode, refusing to execute subprocess" 176 | exit $RESULT 177 | fi 178 | exec $CLI 179 | else 180 | exit $RESULT 181 | fi 182 | --------------------------------------------------------------------------------