├── .gitignore
├── README.md
├── assets
    ├── argmax.png
    └── favicon.ico
├── data
    └── product_images.parquet
├── notebooks
    └── explanations.ipynb
├── requirements.txt
├── server.py
├── src
    └── .gitkeep
└── templates
    ├── 404.html
    ├── index.html
    └── results.html


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | *.npy
132 | *.parquet
133 | *.csv
134 | data/*.json
135 | *:Zone.Identifier
136 | *.zip
137 | *.npy
138 | data/clip_emb/
139 | upload/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # We are no longer accepting applications
 3 | ## Submission Deadline was Dec 21st, 2022
 4 | Thank you for spending the time to work on this task.
 5 | 
 6 | We have received an overwhelming amount of submissions, and reviewing all the submissions takes more time than expected.
 7 | 
 8 | 
 9 | If you submitted the application form on time - you will be contacted shortly.
10 | 
11 | In the follow up interview you will be asked to demonstrate the working web-server and extend it.
12 | Make sure you understand all of the code presented in this repo (including Flask, HTMX, vecsim, etc.)
13 | 
14 | Please watch [this video](https://argmax.ml/jr2) before your scheduled interview.
15 | 
16 | Thank you so much for your time, and keep in touch.
17 | 
18 | ![Argmax](https://raw.githubusercontent.com/argmaxml/image-search/master/assets/argmax.png)
19 | 
20 | Original text follows:
21 | # image-search task
22 | ## Who is this repo for ?
23 | [Argmax](https://www.argmaxml.com) is hiring Junior Data scientists.
24 | 
25 | This repo is meant to be a the first step in the process and it will set the stage for the interview.
26 | 
27 | ## About the position
28 | We are a botique service company that specialize in recommendation systems.
29 | 
30 | Building a recommender system requires understanding many aspects of the user behaviour and item properties, and we utilize a variety of tools to do so (such as computer vision, natural language processing, time series, etc).
31 | 
32 | An ideal candidate would be someone who is **proficient in python**, **curious** and able to do **independent research** when necessary.
33 | 
34 | Our offices are in Ramat-Gan, Jabotinsky st. 155 and we work one day a week from there, the rest of the week we work from home or from clients' premises.
35 | 
36 | ## Some videos from past projects
37 | 
38 | 1. [Benjamin Kempinski on offline metrics](https://www.youtube.com/watch?v=5OPa2RYL5VI)
39 | 1. [Daniel Hen & Uri Goren on pricing with contextual bandits](https://www.youtube.com/watch?v=IJtNBbINKbI)
40 | 1. [Ran Dan on column matching in databases](https://www.youtube.com/watch?v=bml3_U2RNKo)
41 | 1. [Uri's webinar on Contextual bandits](https://www.youtube.com/watch?v=7XDTYkUPN84&list=PLqkckaeDLF4IDdKltyBwx8jLaz5nwDPQU&index=16)
42 | 
43 | ## Instructions notebook:
44 | Please read [this notebook](https://github.com/argmaxml/image-search/blob/master/notebooks/explanations.ipynb) for background, motivation and submission instructions.
45 | 


--------------------------------------------------------------------------------
/assets/argmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argmaxml/image-search/0913f690b9ba597259496be79f7edc9155d7fb16/assets/argmax.png


--------------------------------------------------------------------------------
/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argmaxml/image-search/0913f690b9ba597259496be79f7edc9155d7fb16/assets/favicon.ico


--------------------------------------------------------------------------------
/data/product_images.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argmaxml/image-search/0913f690b9ba597259496be79f7edc9155d7fb16/data/product_images.parquet


--------------------------------------------------------------------------------
/notebooks/explanations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pathlib import Path\n",
 12 |     "data_dir = Path(\"../data\").absolute()"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "text/html": [
 23 |        "<div>\n",
 24 |        "<style scoped>\n",
 25 |        "    .dataframe tbody tr th:only-of-type {\n",
 26 |        "        vertical-align: middle;\n",
 27 |        "    }\n",
 28 |        "\n",
 29 |        "    .dataframe tbody tr th {\n",
 30 |        "        vertical-align: top;\n",
 31 |        "    }\n",
 32 |        "\n",
 33 |        "    .dataframe thead th {\n",
 34 |        "        text-align: right;\n",
 35 |        "    }\n",
 36 |        "</style>\n",
 37 |        "<table border=\"1\" class=\"dataframe\">\n",
 38 |        "  <thead>\n",
 39 |        "    <tr style=\"text-align: right;\">\n",
 40 |        "      <th></th>\n",
 41 |        "      <th>asin</th>\n",
 42 |        "      <th>title</th>\n",
 43 |        "      <th>primary_image</th>\n",
 44 |        "    </tr>\n",
 45 |        "  </thead>\n",
 46 |        "  <tbody>\n",
 47 |        "    <tr>\n",
 48 |        "      <th>3996</th>\n",
 49 |        "      <td>B0949GBY28</td>\n",
 50 |        "      <td>Cincinnati Bengals NFL Mens Gradient Wordmark ...</td>\n",
 51 |        "      <td>https://m.media-amazon.com/images/I/41pNkBE+zC...</td>\n",
 52 |        "    </tr>\n",
 53 |        "    <tr>\n",
 54 |        "      <th>78956</th>\n",
 55 |        "      <td>B0829NLSX7</td>\n",
 56 |        "      <td>USB C Coiled Cable for Car, Baseus Retractable...</td>\n",
 57 |        "      <td>https://m.media-amazon.com/images/I/41uqUkIsvG...</td>\n",
 58 |        "    </tr>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>52885</th>\n",
 61 |        "      <td>B091D171NH</td>\n",
 62 |        "      <td>Mexican Slaps Lollipop Candy Green Apple Flavo...</td>\n",
 63 |        "      <td>https://m.media-amazon.com/images/I/51cZMusUWh...</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>53735</th>\n",
 67 |        "      <td>B08S721WDG</td>\n",
 68 |        "      <td>MICPANG Knife Sharpener 3 Stage Knife Sharpeni...</td>\n",
 69 |        "      <td>https://m.media-amazon.com/images/I/41fIlOCdMa...</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>54315</th>\n",
 73 |        "      <td>B08LPMTSCN</td>\n",
 74 |        "      <td>HONOR Band 6 Smart Watch Fitness Tracker Watch...</td>\n",
 75 |        "      <td>https://m.media-amazon.com/images/I/41A98p+ro0...</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>34528</th>\n",
 79 |        "      <td>B089Y75PN2</td>\n",
 80 |        "      <td>Artistic Weavers Gaillard Modern Abstract Runn...</td>\n",
 81 |        "      <td>https://m.media-amazon.com/images/I/61+UP0RkLY...</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>59390</th>\n",
 85 |        "      <td>B09GV6HQBV</td>\n",
 86 |        "      <td>BXYJDJ Men's Running Shoes Walking Trainers Sn...</td>\n",
 87 |        "      <td>https://m.media-amazon.com/images/I/41wFtILOyF...</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>57204</th>\n",
 91 |        "      <td>B09SWKGR8H</td>\n",
 92 |        "      <td>50FT Expandable Garden Hose Water Hose with 10...</td>\n",
 93 |        "      <td>https://m.media-amazon.com/images/I/61lEcqpuWL...</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>59688</th>\n",
 97 |        "      <td>B09ZQXJSB8</td>\n",
 98 |        "      <td>BTFBM Women Casual Long Sleeve Ruched Wrap Dre...</td>\n",
 99 |        "      <td>https://m.media-amazon.com/images/I/410OOtbl+-...</td>\n",
100 |        "    </tr>\n",
101 |        "    <tr>\n",
102 |        "      <th>47911</th>\n",
103 |        "      <td>B09JNJX39P</td>\n",
104 |        "      <td>Under Armour Mens ArmourFleece Twist Hoodie , ...</td>\n",
105 |        "      <td>https://m.media-amazon.com/images/I/41U8hownKn...</td>\n",
106 |        "    </tr>\n",
107 |        "  </tbody>\n",
108 |        "</table>\n",
109 |        "</div>"
110 |       ],
111 |       "text/plain": [
112 |        "             asin                                              title  \\\n",
113 |        "3996   B0949GBY28  Cincinnati Bengals NFL Mens Gradient Wordmark ...   \n",
114 |        "78956  B0829NLSX7  USB C Coiled Cable for Car, Baseus Retractable...   \n",
115 |        "52885  B091D171NH  Mexican Slaps Lollipop Candy Green Apple Flavo...   \n",
116 |        "53735  B08S721WDG  MICPANG Knife Sharpener 3 Stage Knife Sharpeni...   \n",
117 |        "54315  B08LPMTSCN  HONOR Band 6 Smart Watch Fitness Tracker Watch...   \n",
118 |        "34528  B089Y75PN2  Artistic Weavers Gaillard Modern Abstract Runn...   \n",
119 |        "59390  B09GV6HQBV  BXYJDJ Men's Running Shoes Walking Trainers Sn...   \n",
120 |        "57204  B09SWKGR8H  50FT Expandable Garden Hose Water Hose with 10...   \n",
121 |        "59688  B09ZQXJSB8  BTFBM Women Casual Long Sleeve Ruched Wrap Dre...   \n",
122 |        "47911  B09JNJX39P  Under Armour Mens ArmourFleece Twist Hoodie , ...   \n",
123 |        "\n",
124 |        "                                           primary_image  \n",
125 |        "3996   https://m.media-amazon.com/images/I/41pNkBE+zC...  \n",
126 |        "78956  https://m.media-amazon.com/images/I/41uqUkIsvG...  \n",
127 |        "52885  https://m.media-amazon.com/images/I/51cZMusUWh...  \n",
128 |        "53735  https://m.media-amazon.com/images/I/41fIlOCdMa...  \n",
129 |        "54315  https://m.media-amazon.com/images/I/41A98p+ro0...  \n",
130 |        "34528  https://m.media-amazon.com/images/I/61+UP0RkLY...  \n",
131 |        "59390  https://m.media-amazon.com/images/I/41wFtILOyF...  \n",
132 |        "57204  https://m.media-amazon.com/images/I/61lEcqpuWL...  \n",
133 |        "59688  https://m.media-amazon.com/images/I/410OOtbl+-...  \n",
134 |        "47911  https://m.media-amazon.com/images/I/41U8hownKn...  "
135 |       ]
136 |      },
137 |      "execution_count": 2,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "df = pd.read_parquet(data_dir / \"product_images.parquet\")\n",
144 |     "df.sample(10)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "# Task description\n",
152 |     "## The Data\n",
153 |     "The dataframe contains the top 100k best-selling items on Amazon (as of November 2022) has 3 columns\n",
154 |     "\n",
155 |     "1. `asin` - The Amazon identifier.\n",
156 |     "1. `title` - The product title, as listed on the Amazon store.\n",
157 |     "1. `primary_image` - The image to be listed in search results.\n",
158 |     "\n",
159 |     "## Goal\n",
160 |     "The goal of the task is be able to search products both by textual similarity, and by image similarity.\n",
161 |     "\n",
162 |     "For example, a customer walking down the street could take a picture of a red dress she likes and get similar items from Amazon.\n",
163 |     "\n",
164 |     "Altenatively, that same customer might open the Amazon website and search for \"red dress\" and find items that correspond to that query.\n",
165 |     "\n",
166 |     "## Implementation\n",
167 |     "\n",
168 |     "### Embedding\n",
169 |     "We will use [CLIP](https://github.com/openai/CLIP) embedding for this task.\n",
170 |     "<img src=\"https://openaiassets.blob.core.windows.net/$web/clip/draft/20210104b/overview-b.svg\" width=\"400\">\n",
171 |     "\n",
172 |     "CLIP allows us to link images with their description and map them to the same embedding space.\n",
173 |     "\n",
174 |     "### Similarity search\n",
175 |     "\n",
176 |     "Once the embedding is done, we need to run a nearest-neighbor search using the `cosine` similarity measure.\n",
177 |     "\n",
178 |     "The products that are closest to the query vector should (hopefully) be similar to the customer's intentions.\n",
179 |     "\n",
180 |     "The query vector could be a result of either `CLIP` image embedding or `CLIP` textual embedding.\n",
181 |     "\n",
182 |     "We will use the [vecsim](https://github.com/argmaxml/vecsim) module to do the similarity search.\n",
183 |     "\n",
184 |     "### Serving\n",
185 |     "\n",
186 |     "We used [Flask](https://flask.palletsprojects.com/en/2.2.x/) to implement the web-server, the code is at `server.py`.\n",
187 |     "\n",
188 |     "**Note**: The server code cotains several `TODO:` comments, you will need to implement. The server is currently functional and it outputs random results.\n",
189 |     "\n",
190 |     "# Submission\n",
191 |     "\n",
192 |     "\n",
193 |     "1. Please clone this repo to a **private** repo on your github account.\n",
194 |     "1. Implement the missing parts.\n",
195 |     "1. Please fill in [this form](https://forms.gle/apMr8zPLbBf9pQY7A).\n",
196 |     "1. Once done, please schdule an interview with Uri to review the code\n",
197 |     "\n",
198 |     "## Submission deadline:\n",
199 |     "December 21st, 2022\n",
200 |     "\n",
201 |     "## Good luck !\n"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": []
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "Python 3",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.8.10"
229 |   }
230 |  },
231 |  "nbformat": 4,
232 |  "nbformat_minor": 4
233 | }
234 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=1.4.1
2 | numpy>=1.22.2
3 | vecsim>=0.0.2
4 | git+https://github.com/openai/CLIP.git
5 | Flask>=2.0.1
6 | pyarrow==7.0.0
7 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
  1 | import sys, json, collections, random
  2 | sys.path.append("src")
  3 | from pathlib import Path
  4 | from datetime import datetime, timedelta
  5 | from dataclasses import dataclass
  6 | from operator import itemgetter as at
  7 | from operator import attrgetter as dot
  8 | import pandas as pd
  9 | import numpy as np
 10 | from flask import Flask, request, send_from_directory, render_template, redirect, url_for, jsonify, Response, flash
 11 | from werkzeug.utils import secure_filename
 12 | from typing import List, Dict, Tuple, Optional
 13 | from vecsim import SciKitIndex, RedisIndex
 14 | 
 15 | __dir__ = Path(__file__).absolute().parent
 16 | upload_dir = __dir__ / "upload"
 17 | data_dir = __dir__ / "data"
 18 | upload_dir.mkdir(exist_ok=True)
 19 | NUMBER_OF_RESULTS = 12
 20 | app = Flask(__name__)
 21 | 
 22 | @dataclass
 23 | class Recommendation:
 24 |     id: int
 25 |     image: str
 26 |     title: str
 27 |     highlight: bool
 28 |     distance: float
 29 | 
 30 | @dataclass
 31 | class Item:
 32 |     id : int
 33 |     image: str
 34 |     title : str
 35 | 
 36 | 
 37 | 
 38 | @app.route('/favicon.ico')
 39 | def favicon():
 40 |     return send_from_directory('assets', 'favicon.ico')
 41 | 
 42 | @app.route('/assets/<path:path>')
 43 | def serve_assets(path):
 44 |     return send_from_directory('assets', path)
 45 | 
 46 | 
 47 | def allowed_file(filename):
 48 |     return '.' in filename and \
 49 |            filename.rsplit('.', 1)[1].lower() in {'jpg', 'jpeg', 'png'}
 50 | 
 51 | def embed_image(image_path):
 52 |     # random 512 dim vector
 53 |     # TODO: implement
 54 |     return np.random.rand(512)
 55 | 
 56 | def embed_text(text):
 57 |     # random 512 dim vector
 58 |     # TODO: implement
 59 |     return np.random.rand(512)
 60 | 
 61 | @app.route('/imgsearch', methods=['POST','GET'])
 62 | def imgsearch():
 63 |     if 'file' not in request.files:
 64 |         flash('No file part')
 65 |         return redirect(request.url)
 66 |     file = request.files['file']
 67 |     if file.filename == '':
 68 |         flash('No selected file')
 69 |         return redirect(request.url)
 70 |     if file and allowed_file(file.filename):
 71 |         filename = secure_filename(file.filename)
 72 |         file.save(upload_dir/filename)
 73 | 
 74 |         vec = embed_image(upload_dir/filename)
 75 |         (upload_dir/filename).unlink()
 76 |         dists, ids = sim.search(vec ,NUMBER_OF_RESULTS)
 77 |         df_results = df[df["id"].isin(ids)]
 78 | 
 79 |         recs=[
 80 |             Recommendation(row["id"],row["primary_image"],row["title"], False,round(d*100,3))
 81 |             for d,(idx,row) in sorted(zip(dists,df_results.iterrows()))
 82 |         ]
 83 |         return render_template('index.html', items=recs, recommendations=recs)
 84 |     else:
 85 |         return redirect(url_for('index'))
 86 | 
 87 | @app.route('/')
 88 | def index():
 89 |     recs=[
 90 |     ]
 91 |     return render_template('index.html', recommendations=recs)
 92 | 
 93 | @app.route('/txtsearch', methods=['POST'])
 94 | def txtsearch():
 95 |     txt = str(request.form.get('txt', ""))
 96 |     vec = embed_text(txt)
 97 |     dists, ids = sim.search(vec ,NUMBER_OF_RESULTS)
 98 |     df_results = df[df["id"].isin(ids)]
 99 | 
100 |     recs=[
101 |         Recommendation(row["id"],row["primary_image"],row["title"], False,round(d*100,3))
102 |         for d,(idx,row) in sorted(zip(dists,df_results.iterrows()))
103 |     ]
104 |     return render_template('results.html', recommendations=recs)
105 | 
106 | 
107 | @app.after_request
108 | def add_no_cache(response):
109 |     if request.endpoint != "static":
110 |         response.headers["Cache-Control"] = "no-cache"
111 |         response.headers["Pragma"] = "no-cache"
112 |     return response
113 | 
114 | 
115 | @app.errorhandler(404)
116 | def page_not_found(e):
117 |     return render_template("404.html")
118 | 
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     print("Loading data...")
123 |     SAMPLE_SIZE = 2000
124 |     # TODO:
125 |     # with (data_dir/"clip_ids.json").open('r') as f:
126 |     #    embedding_ids = json.load(f)
127 |     df = pd.read_parquet(data_dir/"product_images.parquet")  
128 |     df=df[df["primary_image"].str.endswith(".jpg")|df["primary_image"].str.endswith(".png")].rename(columns={"asin":"id"})
129 |     # TODO: remove this line and read the proper ids
130 |     embedding_ids = list(df["id"].sample(SAMPLE_SIZE))
131 |     df["title"]=df["title"].fillna("")
132 |     df["has_emb"]=df["id"].isin(embedding_ids)
133 |     df=df[df["has_emb"]]
134 | 
135 |     print("Indexing...")
136 |     sim = SciKitIndex("cosine",512)
137 |     # TODO:
138 |     # item_embedding = np.load(data_dir/"clip_emb.npy")
139 |     item_embedding = np.random.random((SAMPLE_SIZE,512))
140 |     sim.add_items(item_embedding, embedding_ids)
141 |     sim.init()
142 |     
143 |     print("Starting server...")
144 |     app.run(port=8080, host='0.0.0.0', debug=True)


--------------------------------------------------------------------------------
/src/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argmaxml/image-search/0913f690b9ba597259496be79f7edc9155d7fb16/src/.gitkeep


--------------------------------------------------------------------------------
/templates/404.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Not Found</title>
 5 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js"></script>
 6 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.1/jquery-ui.js"></script>
 7 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.1/jquery-ui.css"/>
 8 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"/>
 9 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>
10 |     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto|Varela+Round">
11 |     <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
12 |     <script src="https://unpkg.com/htmx.org@1.8.0"></script>
13 | </head>
14 | <body>
15 |     <div class="container">
16 |         <div class="row">
17 |             <div class="col-sm-3">
18 |                 <img src="/assets/argmax.png">
19 |             </div>
20 |             <div class="col-sm-9">
21 |                 <h1>Page not found</h1>
22 |             </div>
23 |         </div>
24 | </body>
25 | </html>
26 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Argmax Search Demo</title>
 5 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js"></script>
 6 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.1/jquery-ui.js"></script>
 7 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.1/jquery-ui.css"/>
 8 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"/>
 9 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>
10 |     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto|Varela+Round">
11 |     <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
12 |     <script src="https://unpkg.com/htmx.org@1.8.0"></script>
13 |     <style>
14 |         .panel-highlight {
15 |             background-color: #e7b2e2;
16 |         }
17 |         .panel-body {
18 |             text-align: center;
19 |         }
20 |         .margin-top {
21 |             margin-top: 25px;
22 |         }
23 |     </style>
24 | </head>
25 | <body>
26 |     <div class="container">
27 |         <div class="row">
28 |             <div class="col-sm-3">
29 |                 <img src="/assets/argmax.png" style="margin-top:20px">
30 |             </div>
31 |             <div class="col-sm-9">
32 |                 <h1>Text+Image Search Demo</h1>
33 |             </div>
34 |         </div>
35 |         <div class="row">
36 |             <form method="post" enctype="multipart/form-data" action="/imgsearch">
37 |                 <div class="col-sm-3">
38 |                     <label class="form-check-label" for="file">Image File</label><br />
39 |                     <div class="form-group">
40 |                         <input type="file" name="file" id="file" class="form-control">
41 |                     </div>
42 |                 </div>
43 |                 <div class="col-sm-3">
44 |                     <input type="submit" value="Image Search" class="btn btn-primary margin-top">
45 |                 </div>
46 |             </form>
47 |             <form hx-post="/txtsearch"  hx-swap="outerHTML" hx-target="#results" >
48 |                 <div class="col-sm-3">
49 |                     <label for="txt">Text Search</label>
50 |                     <div class="form-group">
51 |                         <input type="text" name="txt" id="txt" value="" placeholder="Text Search" class="form-control">
52 |                     </div>
53 |                 </div>
54 |                 <div class="col-sm-3">
55 |                     <button class="btn btn-primary margin-top">
56 |                         Text Search
57 |                       </button>
58 |                 </div>
59 |               </form>
60 |         </div>
61 |         {% include 'results.html' %}
62 |     </div>
63 |     <div class="row">
64 |         <div class="col-md-12">
65 | 
66 |         </div>
67 |     </div>
68 | </body>
69 | </html>
70 | 


--------------------------------------------------------------------------------
/templates/results.html:
--------------------------------------------------------------------------------
 1 | <div class="row" id="results">
 2 |     {%for rec in recommendations%}
 3 |         <div class="col-sm-4">
 4 |         {% if rec.highlight %}
 5 |             <div class="panel panel-highlight">
 6 |         {% else %}
 7 |             <div class="panel panel-default">
 8 |         {% endif %}
 9 |                 <div class="panel-heading">
10 |                     <h3 class="panel-title">{{rec.title}}</h3>
11 |                 </div>
12 |                 <div class="panel-body">
13 |                     <a href="{{rec.image}}" target="_blank"><img src="{{rec.image}}"  width="120"/></a>
14 |                 </div>
15 |             </div>
16 |         </div>
17 |     {%endfor%}
18 | </div>


--------------------------------------------------------------------------------