├── .gitignore
├── .pre-commit-config.yaml
├── README.md
├── art
├── 1.png
├── 2.png
├── 3.png
├── 4.png
├── 5.png
└── ChatGPT.png
├── notebooks
├── Cosine_vs_Dot.ipynb
├── CrossEncoder.ipynb
├── Document_Chunks.ipynb
├── Embedding_Size.ipynb
├── HNSW_Hyperparam_Search.ipynb
├── LSH.ipynb
├── Mistral_7b_rag.ipynb
├── Product_Quantization.ipynb
├── Scalar_and_Binary_Quantization.ipynb
└── Semantic_Search_Basics.ipynb
├── postgres_vector_length
├── main.py
├── postgres.py
└── requirements.txt
└── slides.pdf
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | notebooks/data/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | cover/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | .pybuilder/
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # poetry
101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | # This is especially recommended for binary packages to ensure reproducibility, and is more
103 | # commonly ignored for libraries.
104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 |
107 | # pdm
108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | # in version control.
112 | # https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | # pytype static type analyzer
153 | .pytype/
154 |
155 | # Cython debug symbols
156 | cython_debug/
157 |
158 | # PyCharm
159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | # and can be added to the global gitignore or merged into this file. For a more nuclear
162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_language_version:
2 | python: python3
3 |
4 | repos:
5 | #############################################################################
6 | # Misc
7 | #############################################################################
8 | - repo: https://github.com/pre-commit/pre-commit-hooks
9 | rev: v4.5.0
10 | hooks:
11 | - id: check-merge-conflict # Searches for merge conflict markers within files.
12 | - id: check-added-large-files # Blocks commits that add large files. Default limit is 500kB.
13 | # Can be configured with args, e.g., '--maxkb=1000' to change the limit.
14 | # exclude: 'your_dir/.*'
15 | args: ["--maxkb=10000"]
16 | - id: check-case-conflict # Identifies potential case-insensitive file name conflicts.
17 | - id: check-ast # Validates the syntax of Python files.
18 | - id: check-symlinks # Detects broken symlinks.
19 | - id: trailing-whitespace # Removes any trailing whitespace at the end of lines.
20 | - id: end-of-file-fixer # Ensures files end with a single newline or are empty.
21 |
22 | - repo: https://github.com/pre-commit/pre-commit-hooks
23 | rev: v4.5.0
24 | hooks:
25 | - id: check-json # Validates JSON files to ensure they are properly formatted and syntactically correct.
26 | types: [json]
27 | - id: check-toml # Checks TOML files for errors and format issues to ensure valid syntax.
28 | types: [toml]
29 |
30 | - repo: https://github.com/astral-sh/ruff-pre-commit
31 | rev: v0.1.5
32 | hooks:
33 | # Run the linter.
34 | - id: ruff
35 | types_or: [python, pyi, jupyter]
36 | args: [--fix, --ignore, E402, --ignore, F821]
37 | # Run the formatter.
38 | - id: ruff-format
39 | types_or: [python, pyi, jupyter]
40 |
41 | - repo: https://github.com/pre-commit/mirrors-eslint
42 | rev: "v8.54.0"
43 | hooks:
44 | - id: eslint
45 | entry: bash -c 'cd web && eslint'
46 | files: \.[jt]sx?$ # *.js, *.jsx, *.ts, and *.tsx
47 | types: [file]
48 |
49 | - repo: https://github.com/pre-commit/mirrors-prettier
50 | rev: "v3.1.0"
51 | hooks:
52 | - id: prettier
53 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Semantic Search with LLMs
2 |
3 | ### Purpose
4 |
5 | Accompanying code for [Shaan](https://www.shaankhosla.com)'s [3-hour live training](https://www.oreilly.com/live-events/semantic-search-with-llms/0790145045035/) offered on O'Reilly. Took place on January 9th and July 11th.
6 |
7 | The "repo art" was generated by DALL-E. These are some artistic takes on "Semantic Search with LLMs" in which the description for the class was used as the prompt.
8 |
9 | | | |
10 | | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
11 | |  |  |
12 | |  |  |
13 |
--------------------------------------------------------------------------------
/art/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/1.png
--------------------------------------------------------------------------------
/art/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/2.png
--------------------------------------------------------------------------------
/art/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/3.png
--------------------------------------------------------------------------------
/art/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/4.png
--------------------------------------------------------------------------------
/art/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/5.png
--------------------------------------------------------------------------------
/art/ChatGPT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/ChatGPT.png
--------------------------------------------------------------------------------
/notebooks/CrossEncoder.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | "\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 6,
15 | "metadata": {
16 | "id": "NLqmYKntd2vg"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%%capture\n",
21 | "\n",
22 | "%pip install sentence_transformers\n",
23 | "from sentence_transformers import CrossEncoder, SentenceTransformer\n",
24 | "import numpy as np"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 7,
30 | "metadata": {
31 | "id": "GGIrwN2pd2vh"
32 | },
33 | "outputs": [],
34 | "source": [
35 | "# Function to calculate cosine similarity\n",
36 | "\n",
37 | "\n",
38 | "def cosine_similarity(v1, v2):\n",
39 | " return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 11,
45 | "metadata": {
46 | "id": "km_lJoz9d2vh"
47 | },
48 | "outputs": [],
49 | "source": [
50 | "# Sample query\n",
51 | "query = \"How many people live in Berlin?\"\n",
52 | "\n",
53 | "# Sample answers\n",
54 | "answers = [\n",
55 | " \"Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.\", # Correct answer\n",
56 | " \"In 2020, the population of Germany's capital city surpassed 3.5 million.\", # Correct answer\n",
57 | " \"How many people live in Berlin? No clue\", # Distraction\n",
58 | " \"I visited Berlin last year; it seemed very crowded. Lots of people\", # Distraction\n",
59 | " \"Berlin, the capital of Germany, is known for its cultural landmarks and modern architecture.\", # Distraction\n",
60 | "]"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 13,
66 | "metadata": {
67 | "id": "gP9HBnijeE73"
68 | },
69 | "outputs": [],
70 | "source": [
71 | "embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # Common bi-encoder"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 21,
77 | "metadata": {
78 | "colab": {
79 | "base_uri": "https://localhost:8080/",
80 | "height": 54
81 | },
82 | "id": "Gxlo_AJTd2vj",
83 | "outputId": "6a438a41-3ffc-4c24-9311-a5dcaa5e8e32"
84 | },
85 | "outputs": [
86 | {
87 | "name": "stdout",
88 | "output_type": "stream",
89 | "text": [
90 | "[0.76697516, 0.5882465, 0.96647555, 0.78439665, 0.50811416]\n"
91 | ]
92 | },
93 | {
94 | "data": {
95 | "application/vnd.google.colaboratory.intrinsic+json": {
96 | "type": "string"
97 | },
98 | "text/plain": [
99 | "'How many people live in Berlin? No clue'"
100 | ]
101 | },
102 | "execution_count": 21,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "answer_embeddings = embedding_model.encode(answers)\n",
109 | "query_embedding = embedding_model.encode([query])[0]\n",
110 | "scores = [cosine_similarity(query_embedding, e) for e in answer_embeddings]\n",
111 | "print(scores)\n",
112 | "\n",
113 | "answers[np.argmax(scores)] # Incorrect answer"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 15,
119 | "metadata": {
120 | "colab": {
121 | "base_uri": "https://localhost:8080/",
122 | "height": 177,
123 | "referenced_widgets": [
124 | "dbf6be4c7ae8432b97472c573793c4fe",
125 | "e057fa5920eb4044b37c27aedf2ebc10",
126 | "a4b70d2d1f934264bc9602b9796021bf",
127 | "bba613ed1fa64880a68a427d0422edd9",
128 | "e5b12cf54e08497e8224b535951ad212",
129 | "ea99016cff8a4d83aa568eb492d95350",
130 | "f3438533609f46c0ac3090e8ed0eaa45",
131 | "27c0dd4a616a4f10b05b4feb9a147786",
132 | "306bf26960f5461591c07c142cd2e4b4",
133 | "177be2dd62954c02894d0d49b9e8625e",
134 | "278bd7aa89ed4a5e9020bbcafa7fba6c",
135 | "992b6f13c18e41a5b12c8d53c72e2794",
136 | "74ebcd701a2d41fd8f91cd4823e5410e",
137 | "f4bbd4ca186b4eecb43b06b5b80183ee",
138 | "e21beed8f64745a7830bb86a0be547a7",
139 | "5d2c500f1f404603911a8e17265de46a",
140 | "8bb689f214b74880be64a412dccbedda",
141 | "912ba07cdadb42f788bafb5ff2823bb7",
142 | "5033346b251342d38691077e6b7e6efc",
143 | "dc573e8f87f64292b3861bb98688c848",
144 | "82f02d6db6a74b0086beb9771eb45a81",
145 | "7ee0a510bf084edabd07fbb7a0af2cbc",
146 | "e3fae2ff299e42eab96475c1e7d03267",
147 | "5f5090841ceb42cbb743c61080dcef24",
148 | "905086bf6d5c4418916175d20d8a12b1",
149 | "6b9afb8789744ab0b0f1514fe43bbd1a",
150 | "d38d00434ce5496f901640976bd3e43a",
151 | "b16d2e5f754748cebcd3f12996d397a4",
152 | "871b26c9361744b7aee4805299613811",
153 | "ec3e7e4f488c41f2beb8e78e5adb75e1",
154 | "a8448a72f95c4ccaa03960dc68b246e1",
155 | "ce4ba9bf00954b91a35b8e9286fa09a1",
156 | "befbff68d4664ae0bb16b453b94036d7",
157 | "7d8f9fdf562c4dc4a9a76c5a2e65f772",
158 | "3c004375ffcd45e29c372b02af3a9987",
159 | "6dbc701e30544611b5da580af45670ca",
160 | "6065d722e9544446ad9859e8613de1c7",
161 | "98273030cd864920825a7648597cb686",
162 | "e8562c35e1974a1ba434698bc9fe74bf",
163 | "e6a12e9ec06448a989bfb555c29c45b9",
164 | "5081cf300a7b439a902037963228a825",
165 | "dcd1f01a5bee4dc4a872579856bfe1c1",
166 | "c61e39fc5a75454c96ba62ad0f972f51",
167 | "b09195e6e84447ad8573108ab2b6d332",
168 | "309d154853534fd1be8514e7778b3038",
169 | "0647eb4475c54596a2ba66b42236923a",
170 | "c046065b830d42ffb8fba239d2af4c7a",
171 | "8d9fe715090c47eab07831ee0bc34a20",
172 | "873e16965eb14210913979a57f7481b1",
173 | "59e6ace03fcf4043ae8092089b26144c",
174 | "f2836e2bc9fd4de590f7db1dfa7b62f2",
175 | "95a901cfb5114d86a4b7c30508d567a8",
176 | "2e04db8b476649a785e9b4d6601c16ec",
177 | "8af543001d8f4ad887dd8aeb0776a871",
178 | "fa30c5c766914ae2b158324bf790a239"
179 | ]
180 | },
181 | "id": "IgZ_fH77eKO6",
182 | "outputId": "f6ae48f9-7219-4db9-9f43-1227c2fccecf"
183 | },
184 | "outputs": [
185 | {
186 | "data": {
187 | "application/vnd.jupyter.widget-view+json": {
188 | "model_id": "dbf6be4c7ae8432b97472c573793c4fe",
189 | "version_major": 2,
190 | "version_minor": 0
191 | },
192 | "text/plain": [
193 | "config.json: 0%| | 0.00/791 [00:00, ?B/s]"
194 | ]
195 | },
196 | "metadata": {},
197 | "output_type": "display_data"
198 | },
199 | {
200 | "data": {
201 | "application/vnd.jupyter.widget-view+json": {
202 | "model_id": "992b6f13c18e41a5b12c8d53c72e2794",
203 | "version_major": 2,
204 | "version_minor": 0
205 | },
206 | "text/plain": [
207 | "pytorch_model.bin: 0%| | 0.00/134M [00:00, ?B/s]"
208 | ]
209 | },
210 | "metadata": {},
211 | "output_type": "display_data"
212 | },
213 | {
214 | "data": {
215 | "application/vnd.jupyter.widget-view+json": {
216 | "model_id": "e3fae2ff299e42eab96475c1e7d03267",
217 | "version_major": 2,
218 | "version_minor": 0
219 | },
220 | "text/plain": [
221 | "tokenizer_config.json: 0%| | 0.00/316 [00:00, ?B/s]"
222 | ]
223 | },
224 | "metadata": {},
225 | "output_type": "display_data"
226 | },
227 | {
228 | "data": {
229 | "application/vnd.jupyter.widget-view+json": {
230 | "model_id": "7d8f9fdf562c4dc4a9a76c5a2e65f772",
231 | "version_major": 2,
232 | "version_minor": 0
233 | },
234 | "text/plain": [
235 | "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
236 | ]
237 | },
238 | "metadata": {},
239 | "output_type": "display_data"
240 | },
241 | {
242 | "data": {
243 | "application/vnd.jupyter.widget-view+json": {
244 | "model_id": "309d154853534fd1be8514e7778b3038",
245 | "version_major": 2,
246 | "version_minor": 0
247 | },
248 | "text/plain": [
249 | "special_tokens_map.json: 0%| | 0.00/112 [00:00, ?B/s]"
250 | ]
251 | },
252 | "metadata": {},
253 | "output_type": "display_data"
254 | }
255 | ],
256 | "source": [
257 | "cross_model = CrossEncoder(\"cross-encoder/ms-marco-MiniLM-L-12-v2\") # Cross-encoder"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 20,
263 | "metadata": {
264 | "colab": {
265 | "base_uri": "https://localhost:8080/",
266 | "height": 54
267 | },
268 | "id": "ujEswMcud2vj",
269 | "outputId": "7c68b236-8d74-4ade-f872-a64b37e54564"
270 | },
271 | "outputs": [
272 | {
273 | "name": "stdout",
274 | "output_type": "stream",
275 | "text": [
276 | "[ 9.450561 3.8642488 5.0489817 0.7831064 -2.8230996]\n"
277 | ]
278 | },
279 | {
280 | "data": {
281 | "application/vnd.google.colaboratory.intrinsic+json": {
282 | "type": "string"
283 | },
284 | "text/plain": [
285 | "'Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.'"
286 | ]
287 | },
288 | "execution_count": 20,
289 | "metadata": {},
290 | "output_type": "execute_result"
291 | }
292 | ],
293 | "source": [
294 | "# Model predicts pairs\n",
295 | "\n",
296 | "scores = cross_model.predict(\n",
297 | " [\n",
298 | " (query, answers[0]),\n",
299 | " (query, answers[1]),\n",
300 | " (query, answers[2]),\n",
301 | " (query, answers[3]),\n",
302 | " (query, answers[4]),\n",
303 | " ]\n",
304 | ")\n",
305 | "print(scores)\n",
306 | "answers[np.argmax(scores)] # Correct answer"
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": null,
312 | "metadata": {
313 | "id": "lrnxFOxEd2vj"
314 | },
315 | "outputs": [],
316 | "source": []
317 | }
318 | ],
319 | "metadata": {
320 | "colab": {
321 | "provenance": []
322 | },
323 | "kernelspec": {
324 | "display_name": "Python 3",
325 | "language": "python",
326 | "name": "python3"
327 | },
328 | "language_info": {
329 | "codemirror_mode": {
330 | "name": "ipython",
331 | "version": 3
332 | },
333 | "file_extension": ".py",
334 | "mimetype": "text/x-python",
335 | "name": "python",
336 | "nbconvert_exporter": "python",
337 | "pygments_lexer": "ipython3",
338 | "version": "3.11.5"
339 | },
340 | "widgets": {
341 | "application/vnd.jupyter.widget-state+json": {
342 | "0647eb4475c54596a2ba66b42236923a": {
343 | "model_module": "@jupyter-widgets/controls",
344 | "model_module_version": "1.5.0",
345 | "model_name": "HTMLModel",
346 | "state": {
347 | "_dom_classes": [],
348 | "_model_module": "@jupyter-widgets/controls",
349 | "_model_module_version": "1.5.0",
350 | "_model_name": "HTMLModel",
351 | "_view_count": null,
352 | "_view_module": "@jupyter-widgets/controls",
353 | "_view_module_version": "1.5.0",
354 | "_view_name": "HTMLView",
355 | "description": "",
356 | "description_tooltip": null,
357 | "layout": "IPY_MODEL_59e6ace03fcf4043ae8092089b26144c",
358 | "placeholder": "",
359 | "style": "IPY_MODEL_f2836e2bc9fd4de590f7db1dfa7b62f2",
360 | "value": "special_tokens_map.json: 100%"
361 | }
362 | },
363 | "177be2dd62954c02894d0d49b9e8625e": {
364 | "model_module": "@jupyter-widgets/base",
365 | "model_module_version": "1.2.0",
366 | "model_name": "LayoutModel",
367 | "state": {
368 | "_model_module": "@jupyter-widgets/base",
369 | "_model_module_version": "1.2.0",
370 | "_model_name": "LayoutModel",
371 | "_view_count": null,
372 | "_view_module": "@jupyter-widgets/base",
373 | "_view_module_version": "1.2.0",
374 | "_view_name": "LayoutView",
375 | "align_content": null,
376 | "align_items": null,
377 | "align_self": null,
378 | "border": null,
379 | "bottom": null,
380 | "display": null,
381 | "flex": null,
382 | "flex_flow": null,
383 | "grid_area": null,
384 | "grid_auto_columns": null,
385 | "grid_auto_flow": null,
386 | "grid_auto_rows": null,
387 | "grid_column": null,
388 | "grid_gap": null,
389 | "grid_row": null,
390 | "grid_template_areas": null,
391 | "grid_template_columns": null,
392 | "grid_template_rows": null,
393 | "height": null,
394 | "justify_content": null,
395 | "justify_items": null,
396 | "left": null,
397 | "margin": null,
398 | "max_height": null,
399 | "max_width": null,
400 | "min_height": null,
401 | "min_width": null,
402 | "object_fit": null,
403 | "object_position": null,
404 | "order": null,
405 | "overflow": null,
406 | "overflow_x": null,
407 | "overflow_y": null,
408 | "padding": null,
409 | "right": null,
410 | "top": null,
411 | "visibility": null,
412 | "width": null
413 | }
414 | },
415 | "278bd7aa89ed4a5e9020bbcafa7fba6c": {
416 | "model_module": "@jupyter-widgets/controls",
417 | "model_module_version": "1.5.0",
418 | "model_name": "DescriptionStyleModel",
419 | "state": {
420 | "_model_module": "@jupyter-widgets/controls",
421 | "_model_module_version": "1.5.0",
422 | "_model_name": "DescriptionStyleModel",
423 | "_view_count": null,
424 | "_view_module": "@jupyter-widgets/base",
425 | "_view_module_version": "1.2.0",
426 | "_view_name": "StyleView",
427 | "description_width": ""
428 | }
429 | },
430 | "27c0dd4a616a4f10b05b4feb9a147786": {
431 | "model_module": "@jupyter-widgets/base",
432 | "model_module_version": "1.2.0",
433 | "model_name": "LayoutModel",
434 | "state": {
435 | "_model_module": "@jupyter-widgets/base",
436 | "_model_module_version": "1.2.0",
437 | "_model_name": "LayoutModel",
438 | "_view_count": null,
439 | "_view_module": "@jupyter-widgets/base",
440 | "_view_module_version": "1.2.0",
441 | "_view_name": "LayoutView",
442 | "align_content": null,
443 | "align_items": null,
444 | "align_self": null,
445 | "border": null,
446 | "bottom": null,
447 | "display": null,
448 | "flex": null,
449 | "flex_flow": null,
450 | "grid_area": null,
451 | "grid_auto_columns": null,
452 | "grid_auto_flow": null,
453 | "grid_auto_rows": null,
454 | "grid_column": null,
455 | "grid_gap": null,
456 | "grid_row": null,
457 | "grid_template_areas": null,
458 | "grid_template_columns": null,
459 | "grid_template_rows": null,
460 | "height": null,
461 | "justify_content": null,
462 | "justify_items": null,
463 | "left": null,
464 | "margin": null,
465 | "max_height": null,
466 | "max_width": null,
467 | "min_height": null,
468 | "min_width": null,
469 | "object_fit": null,
470 | "object_position": null,
471 | "order": null,
472 | "overflow": null,
473 | "overflow_x": null,
474 | "overflow_y": null,
475 | "padding": null,
476 | "right": null,
477 | "top": null,
478 | "visibility": null,
479 | "width": null
480 | }
481 | },
482 | "2e04db8b476649a785e9b4d6601c16ec": {
483 | "model_module": "@jupyter-widgets/controls",
484 | "model_module_version": "1.5.0",
485 | "model_name": "ProgressStyleModel",
486 | "state": {
487 | "_model_module": "@jupyter-widgets/controls",
488 | "_model_module_version": "1.5.0",
489 | "_model_name": "ProgressStyleModel",
490 | "_view_count": null,
491 | "_view_module": "@jupyter-widgets/base",
492 | "_view_module_version": "1.2.0",
493 | "_view_name": "StyleView",
494 | "bar_color": null,
495 | "description_width": ""
496 | }
497 | },
498 | "306bf26960f5461591c07c142cd2e4b4": {
499 | "model_module": "@jupyter-widgets/controls",
500 | "model_module_version": "1.5.0",
501 | "model_name": "ProgressStyleModel",
502 | "state": {
503 | "_model_module": "@jupyter-widgets/controls",
504 | "_model_module_version": "1.5.0",
505 | "_model_name": "ProgressStyleModel",
506 | "_view_count": null,
507 | "_view_module": "@jupyter-widgets/base",
508 | "_view_module_version": "1.2.0",
509 | "_view_name": "StyleView",
510 | "bar_color": null,
511 | "description_width": ""
512 | }
513 | },
514 | "309d154853534fd1be8514e7778b3038": {
515 | "model_module": "@jupyter-widgets/controls",
516 | "model_module_version": "1.5.0",
517 | "model_name": "HBoxModel",
518 | "state": {
519 | "_dom_classes": [],
520 | "_model_module": "@jupyter-widgets/controls",
521 | "_model_module_version": "1.5.0",
522 | "_model_name": "HBoxModel",
523 | "_view_count": null,
524 | "_view_module": "@jupyter-widgets/controls",
525 | "_view_module_version": "1.5.0",
526 | "_view_name": "HBoxView",
527 | "box_style": "",
528 | "children": [
529 | "IPY_MODEL_0647eb4475c54596a2ba66b42236923a",
530 | "IPY_MODEL_c046065b830d42ffb8fba239d2af4c7a",
531 | "IPY_MODEL_8d9fe715090c47eab07831ee0bc34a20"
532 | ],
533 | "layout": "IPY_MODEL_873e16965eb14210913979a57f7481b1"
534 | }
535 | },
536 | "3c004375ffcd45e29c372b02af3a9987": {
537 | "model_module": "@jupyter-widgets/controls",
538 | "model_module_version": "1.5.0",
539 | "model_name": "HTMLModel",
540 | "state": {
541 | "_dom_classes": [],
542 | "_model_module": "@jupyter-widgets/controls",
543 | "_model_module_version": "1.5.0",
544 | "_model_name": "HTMLModel",
545 | "_view_count": null,
546 | "_view_module": "@jupyter-widgets/controls",
547 | "_view_module_version": "1.5.0",
548 | "_view_name": "HTMLView",
549 | "description": "",
550 | "description_tooltip": null,
551 | "layout": "IPY_MODEL_e8562c35e1974a1ba434698bc9fe74bf",
552 | "placeholder": "",
553 | "style": "IPY_MODEL_e6a12e9ec06448a989bfb555c29c45b9",
554 | "value": "vocab.txt: 100%"
555 | }
556 | },
557 | "5033346b251342d38691077e6b7e6efc": {
558 | "model_module": "@jupyter-widgets/base",
559 | "model_module_version": "1.2.0",
560 | "model_name": "LayoutModel",
561 | "state": {
562 | "_model_module": "@jupyter-widgets/base",
563 | "_model_module_version": "1.2.0",
564 | "_model_name": "LayoutModel",
565 | "_view_count": null,
566 | "_view_module": "@jupyter-widgets/base",
567 | "_view_module_version": "1.2.0",
568 | "_view_name": "LayoutView",
569 | "align_content": null,
570 | "align_items": null,
571 | "align_self": null,
572 | "border": null,
573 | "bottom": null,
574 | "display": null,
575 | "flex": null,
576 | "flex_flow": null,
577 | "grid_area": null,
578 | "grid_auto_columns": null,
579 | "grid_auto_flow": null,
580 | "grid_auto_rows": null,
581 | "grid_column": null,
582 | "grid_gap": null,
583 | "grid_row": null,
584 | "grid_template_areas": null,
585 | "grid_template_columns": null,
586 | "grid_template_rows": null,
587 | "height": null,
588 | "justify_content": null,
589 | "justify_items": null,
590 | "left": null,
591 | "margin": null,
592 | "max_height": null,
593 | "max_width": null,
594 | "min_height": null,
595 | "min_width": null,
596 | "object_fit": null,
597 | "object_position": null,
598 | "order": null,
599 | "overflow": null,
600 | "overflow_x": null,
601 | "overflow_y": null,
602 | "padding": null,
603 | "right": null,
604 | "top": null,
605 | "visibility": null,
606 | "width": null
607 | }
608 | },
609 | "5081cf300a7b439a902037963228a825": {
610 | "model_module": "@jupyter-widgets/base",
611 | "model_module_version": "1.2.0",
612 | "model_name": "LayoutModel",
613 | "state": {
614 | "_model_module": "@jupyter-widgets/base",
615 | "_model_module_version": "1.2.0",
616 | "_model_name": "LayoutModel",
617 | "_view_count": null,
618 | "_view_module": "@jupyter-widgets/base",
619 | "_view_module_version": "1.2.0",
620 | "_view_name": "LayoutView",
621 | "align_content": null,
622 | "align_items": null,
623 | "align_self": null,
624 | "border": null,
625 | "bottom": null,
626 | "display": null,
627 | "flex": null,
628 | "flex_flow": null,
629 | "grid_area": null,
630 | "grid_auto_columns": null,
631 | "grid_auto_flow": null,
632 | "grid_auto_rows": null,
633 | "grid_column": null,
634 | "grid_gap": null,
635 | "grid_row": null,
636 | "grid_template_areas": null,
637 | "grid_template_columns": null,
638 | "grid_template_rows": null,
639 | "height": null,
640 | "justify_content": null,
641 | "justify_items": null,
642 | "left": null,
643 | "margin": null,
644 | "max_height": null,
645 | "max_width": null,
646 | "min_height": null,
647 | "min_width": null,
648 | "object_fit": null,
649 | "object_position": null,
650 | "order": null,
651 | "overflow": null,
652 | "overflow_x": null,
653 | "overflow_y": null,
654 | "padding": null,
655 | "right": null,
656 | "top": null,
657 | "visibility": null,
658 | "width": null
659 | }
660 | },
661 | "59e6ace03fcf4043ae8092089b26144c": {
662 | "model_module": "@jupyter-widgets/base",
663 | "model_module_version": "1.2.0",
664 | "model_name": "LayoutModel",
665 | "state": {
666 | "_model_module": "@jupyter-widgets/base",
667 | "_model_module_version": "1.2.0",
668 | "_model_name": "LayoutModel",
669 | "_view_count": null,
670 | "_view_module": "@jupyter-widgets/base",
671 | "_view_module_version": "1.2.0",
672 | "_view_name": "LayoutView",
673 | "align_content": null,
674 | "align_items": null,
675 | "align_self": null,
676 | "border": null,
677 | "bottom": null,
678 | "display": null,
679 | "flex": null,
680 | "flex_flow": null,
681 | "grid_area": null,
682 | "grid_auto_columns": null,
683 | "grid_auto_flow": null,
684 | "grid_auto_rows": null,
685 | "grid_column": null,
686 | "grid_gap": null,
687 | "grid_row": null,
688 | "grid_template_areas": null,
689 | "grid_template_columns": null,
690 | "grid_template_rows": null,
691 | "height": null,
692 | "justify_content": null,
693 | "justify_items": null,
694 | "left": null,
695 | "margin": null,
696 | "max_height": null,
697 | "max_width": null,
698 | "min_height": null,
699 | "min_width": null,
700 | "object_fit": null,
701 | "object_position": null,
702 | "order": null,
703 | "overflow": null,
704 | "overflow_x": null,
705 | "overflow_y": null,
706 | "padding": null,
707 | "right": null,
708 | "top": null,
709 | "visibility": null,
710 | "width": null
711 | }
712 | },
713 | "5d2c500f1f404603911a8e17265de46a": {
714 | "model_module": "@jupyter-widgets/base",
715 | "model_module_version": "1.2.0",
716 | "model_name": "LayoutModel",
717 | "state": {
718 | "_model_module": "@jupyter-widgets/base",
719 | "_model_module_version": "1.2.0",
720 | "_model_name": "LayoutModel",
721 | "_view_count": null,
722 | "_view_module": "@jupyter-widgets/base",
723 | "_view_module_version": "1.2.0",
724 | "_view_name": "LayoutView",
725 | "align_content": null,
726 | "align_items": null,
727 | "align_self": null,
728 | "border": null,
729 | "bottom": null,
730 | "display": null,
731 | "flex": null,
732 | "flex_flow": null,
733 | "grid_area": null,
734 | "grid_auto_columns": null,
735 | "grid_auto_flow": null,
736 | "grid_auto_rows": null,
737 | "grid_column": null,
738 | "grid_gap": null,
739 | "grid_row": null,
740 | "grid_template_areas": null,
741 | "grid_template_columns": null,
742 | "grid_template_rows": null,
743 | "height": null,
744 | "justify_content": null,
745 | "justify_items": null,
746 | "left": null,
747 | "margin": null,
748 | "max_height": null,
749 | "max_width": null,
750 | "min_height": null,
751 | "min_width": null,
752 | "object_fit": null,
753 | "object_position": null,
754 | "order": null,
755 | "overflow": null,
756 | "overflow_x": null,
757 | "overflow_y": null,
758 | "padding": null,
759 | "right": null,
760 | "top": null,
761 | "visibility": null,
762 | "width": null
763 | }
764 | },
765 | "5f5090841ceb42cbb743c61080dcef24": {
766 | "model_module": "@jupyter-widgets/controls",
767 | "model_module_version": "1.5.0",
768 | "model_name": "HTMLModel",
769 | "state": {
770 | "_dom_classes": [],
771 | "_model_module": "@jupyter-widgets/controls",
772 | "_model_module_version": "1.5.0",
773 | "_model_name": "HTMLModel",
774 | "_view_count": null,
775 | "_view_module": "@jupyter-widgets/controls",
776 | "_view_module_version": "1.5.0",
777 | "_view_name": "HTMLView",
778 | "description": "",
779 | "description_tooltip": null,
780 | "layout": "IPY_MODEL_b16d2e5f754748cebcd3f12996d397a4",
781 | "placeholder": "",
782 | "style": "IPY_MODEL_871b26c9361744b7aee4805299613811",
783 | "value": "tokenizer_config.json: 100%"
784 | }
785 | },
786 | "6065d722e9544446ad9859e8613de1c7": {
787 | "model_module": "@jupyter-widgets/controls",
788 | "model_module_version": "1.5.0",
789 | "model_name": "HTMLModel",
790 | "state": {
791 | "_dom_classes": [],
792 | "_model_module": "@jupyter-widgets/controls",
793 | "_model_module_version": "1.5.0",
794 | "_model_name": "HTMLModel",
795 | "_view_count": null,
796 | "_view_module": "@jupyter-widgets/controls",
797 | "_view_module_version": "1.5.0",
798 | "_view_name": "HTMLView",
799 | "description": "",
800 | "description_tooltip": null,
801 | "layout": "IPY_MODEL_c61e39fc5a75454c96ba62ad0f972f51",
802 | "placeholder": "",
803 | "style": "IPY_MODEL_b09195e6e84447ad8573108ab2b6d332",
804 | "value": " 232k/232k [00:00<00:00, 4.07MB/s]"
805 | }
806 | },
807 | "6b9afb8789744ab0b0f1514fe43bbd1a": {
808 | "model_module": "@jupyter-widgets/controls",
809 | "model_module_version": "1.5.0",
810 | "model_name": "HTMLModel",
811 | "state": {
812 | "_dom_classes": [],
813 | "_model_module": "@jupyter-widgets/controls",
814 | "_model_module_version": "1.5.0",
815 | "_model_name": "HTMLModel",
816 | "_view_count": null,
817 | "_view_module": "@jupyter-widgets/controls",
818 | "_view_module_version": "1.5.0",
819 | "_view_name": "HTMLView",
820 | "description": "",
821 | "description_tooltip": null,
822 | "layout": "IPY_MODEL_ce4ba9bf00954b91a35b8e9286fa09a1",
823 | "placeholder": "",
824 | "style": "IPY_MODEL_befbff68d4664ae0bb16b453b94036d7",
825 | "value": " 316/316 [00:00<00:00, 9.96kB/s]"
826 | }
827 | },
828 | "6dbc701e30544611b5da580af45670ca": {
829 | "model_module": "@jupyter-widgets/controls",
830 | "model_module_version": "1.5.0",
831 | "model_name": "FloatProgressModel",
832 | "state": {
833 | "_dom_classes": [],
834 | "_model_module": "@jupyter-widgets/controls",
835 | "_model_module_version": "1.5.0",
836 | "_model_name": "FloatProgressModel",
837 | "_view_count": null,
838 | "_view_module": "@jupyter-widgets/controls",
839 | "_view_module_version": "1.5.0",
840 | "_view_name": "ProgressView",
841 | "bar_style": "success",
842 | "description": "",
843 | "description_tooltip": null,
844 | "layout": "IPY_MODEL_5081cf300a7b439a902037963228a825",
845 | "max": 231508,
846 | "min": 0,
847 | "orientation": "horizontal",
848 | "style": "IPY_MODEL_dcd1f01a5bee4dc4a872579856bfe1c1",
849 | "value": 231508
850 | }
851 | },
852 | "74ebcd701a2d41fd8f91cd4823e5410e": {
853 | "model_module": "@jupyter-widgets/controls",
854 | "model_module_version": "1.5.0",
855 | "model_name": "HTMLModel",
856 | "state": {
857 | "_dom_classes": [],
858 | "_model_module": "@jupyter-widgets/controls",
859 | "_model_module_version": "1.5.0",
860 | "_model_name": "HTMLModel",
861 | "_view_count": null,
862 | "_view_module": "@jupyter-widgets/controls",
863 | "_view_module_version": "1.5.0",
864 | "_view_name": "HTMLView",
865 | "description": "",
866 | "description_tooltip": null,
867 | "layout": "IPY_MODEL_8bb689f214b74880be64a412dccbedda",
868 | "placeholder": "",
869 | "style": "IPY_MODEL_912ba07cdadb42f788bafb5ff2823bb7",
870 | "value": "pytorch_model.bin: 100%"
871 | }
872 | },
873 | "7d8f9fdf562c4dc4a9a76c5a2e65f772": {
874 | "model_module": "@jupyter-widgets/controls",
875 | "model_module_version": "1.5.0",
876 | "model_name": "HBoxModel",
877 | "state": {
878 | "_dom_classes": [],
879 | "_model_module": "@jupyter-widgets/controls",
880 | "_model_module_version": "1.5.0",
881 | "_model_name": "HBoxModel",
882 | "_view_count": null,
883 | "_view_module": "@jupyter-widgets/controls",
884 | "_view_module_version": "1.5.0",
885 | "_view_name": "HBoxView",
886 | "box_style": "",
887 | "children": [
888 | "IPY_MODEL_3c004375ffcd45e29c372b02af3a9987",
889 | "IPY_MODEL_6dbc701e30544611b5da580af45670ca",
890 | "IPY_MODEL_6065d722e9544446ad9859e8613de1c7"
891 | ],
892 | "layout": "IPY_MODEL_98273030cd864920825a7648597cb686"
893 | }
894 | },
895 | "7ee0a510bf084edabd07fbb7a0af2cbc": {
896 | "model_module": "@jupyter-widgets/controls",
897 | "model_module_version": "1.5.0",
898 | "model_name": "DescriptionStyleModel",
899 | "state": {
900 | "_model_module": "@jupyter-widgets/controls",
901 | "_model_module_version": "1.5.0",
902 | "_model_name": "DescriptionStyleModel",
903 | "_view_count": null,
904 | "_view_module": "@jupyter-widgets/base",
905 | "_view_module_version": "1.2.0",
906 | "_view_name": "StyleView",
907 | "description_width": ""
908 | }
909 | },
910 | "82f02d6db6a74b0086beb9771eb45a81": {
911 | "model_module": "@jupyter-widgets/base",
912 | "model_module_version": "1.2.0",
913 | "model_name": "LayoutModel",
914 | "state": {
915 | "_model_module": "@jupyter-widgets/base",
916 | "_model_module_version": "1.2.0",
917 | "_model_name": "LayoutModel",
918 | "_view_count": null,
919 | "_view_module": "@jupyter-widgets/base",
920 | "_view_module_version": "1.2.0",
921 | "_view_name": "LayoutView",
922 | "align_content": null,
923 | "align_items": null,
924 | "align_self": null,
925 | "border": null,
926 | "bottom": null,
927 | "display": null,
928 | "flex": null,
929 | "flex_flow": null,
930 | "grid_area": null,
931 | "grid_auto_columns": null,
932 | "grid_auto_flow": null,
933 | "grid_auto_rows": null,
934 | "grid_column": null,
935 | "grid_gap": null,
936 | "grid_row": null,
937 | "grid_template_areas": null,
938 | "grid_template_columns": null,
939 | "grid_template_rows": null,
940 | "height": null,
941 | "justify_content": null,
942 | "justify_items": null,
943 | "left": null,
944 | "margin": null,
945 | "max_height": null,
946 | "max_width": null,
947 | "min_height": null,
948 | "min_width": null,
949 | "object_fit": null,
950 | "object_position": null,
951 | "order": null,
952 | "overflow": null,
953 | "overflow_x": null,
954 | "overflow_y": null,
955 | "padding": null,
956 | "right": null,
957 | "top": null,
958 | "visibility": null,
959 | "width": null
960 | }
961 | },
962 | "871b26c9361744b7aee4805299613811": {
963 | "model_module": "@jupyter-widgets/controls",
964 | "model_module_version": "1.5.0",
965 | "model_name": "DescriptionStyleModel",
966 | "state": {
967 | "_model_module": "@jupyter-widgets/controls",
968 | "_model_module_version": "1.5.0",
969 | "_model_name": "DescriptionStyleModel",
970 | "_view_count": null,
971 | "_view_module": "@jupyter-widgets/base",
972 | "_view_module_version": "1.2.0",
973 | "_view_name": "StyleView",
974 | "description_width": ""
975 | }
976 | },
977 | "873e16965eb14210913979a57f7481b1": {
978 | "model_module": "@jupyter-widgets/base",
979 | "model_module_version": "1.2.0",
980 | "model_name": "LayoutModel",
981 | "state": {
982 | "_model_module": "@jupyter-widgets/base",
983 | "_model_module_version": "1.2.0",
984 | "_model_name": "LayoutModel",
985 | "_view_count": null,
986 | "_view_module": "@jupyter-widgets/base",
987 | "_view_module_version": "1.2.0",
988 | "_view_name": "LayoutView",
989 | "align_content": null,
990 | "align_items": null,
991 | "align_self": null,
992 | "border": null,
993 | "bottom": null,
994 | "display": null,
995 | "flex": null,
996 | "flex_flow": null,
997 | "grid_area": null,
998 | "grid_auto_columns": null,
999 | "grid_auto_flow": null,
1000 | "grid_auto_rows": null,
1001 | "grid_column": null,
1002 | "grid_gap": null,
1003 | "grid_row": null,
1004 | "grid_template_areas": null,
1005 | "grid_template_columns": null,
1006 | "grid_template_rows": null,
1007 | "height": null,
1008 | "justify_content": null,
1009 | "justify_items": null,
1010 | "left": null,
1011 | "margin": null,
1012 | "max_height": null,
1013 | "max_width": null,
1014 | "min_height": null,
1015 | "min_width": null,
1016 | "object_fit": null,
1017 | "object_position": null,
1018 | "order": null,
1019 | "overflow": null,
1020 | "overflow_x": null,
1021 | "overflow_y": null,
1022 | "padding": null,
1023 | "right": null,
1024 | "top": null,
1025 | "visibility": null,
1026 | "width": null
1027 | }
1028 | },
1029 | "8af543001d8f4ad887dd8aeb0776a871": {
1030 | "model_module": "@jupyter-widgets/base",
1031 | "model_module_version": "1.2.0",
1032 | "model_name": "LayoutModel",
1033 | "state": {
1034 | "_model_module": "@jupyter-widgets/base",
1035 | "_model_module_version": "1.2.0",
1036 | "_model_name": "LayoutModel",
1037 | "_view_count": null,
1038 | "_view_module": "@jupyter-widgets/base",
1039 | "_view_module_version": "1.2.0",
1040 | "_view_name": "LayoutView",
1041 | "align_content": null,
1042 | "align_items": null,
1043 | "align_self": null,
1044 | "border": null,
1045 | "bottom": null,
1046 | "display": null,
1047 | "flex": null,
1048 | "flex_flow": null,
1049 | "grid_area": null,
1050 | "grid_auto_columns": null,
1051 | "grid_auto_flow": null,
1052 | "grid_auto_rows": null,
1053 | "grid_column": null,
1054 | "grid_gap": null,
1055 | "grid_row": null,
1056 | "grid_template_areas": null,
1057 | "grid_template_columns": null,
1058 | "grid_template_rows": null,
1059 | "height": null,
1060 | "justify_content": null,
1061 | "justify_items": null,
1062 | "left": null,
1063 | "margin": null,
1064 | "max_height": null,
1065 | "max_width": null,
1066 | "min_height": null,
1067 | "min_width": null,
1068 | "object_fit": null,
1069 | "object_position": null,
1070 | "order": null,
1071 | "overflow": null,
1072 | "overflow_x": null,
1073 | "overflow_y": null,
1074 | "padding": null,
1075 | "right": null,
1076 | "top": null,
1077 | "visibility": null,
1078 | "width": null
1079 | }
1080 | },
1081 | "8bb689f214b74880be64a412dccbedda": {
1082 | "model_module": "@jupyter-widgets/base",
1083 | "model_module_version": "1.2.0",
1084 | "model_name": "LayoutModel",
1085 | "state": {
1086 | "_model_module": "@jupyter-widgets/base",
1087 | "_model_module_version": "1.2.0",
1088 | "_model_name": "LayoutModel",
1089 | "_view_count": null,
1090 | "_view_module": "@jupyter-widgets/base",
1091 | "_view_module_version": "1.2.0",
1092 | "_view_name": "LayoutView",
1093 | "align_content": null,
1094 | "align_items": null,
1095 | "align_self": null,
1096 | "border": null,
1097 | "bottom": null,
1098 | "display": null,
1099 | "flex": null,
1100 | "flex_flow": null,
1101 | "grid_area": null,
1102 | "grid_auto_columns": null,
1103 | "grid_auto_flow": null,
1104 | "grid_auto_rows": null,
1105 | "grid_column": null,
1106 | "grid_gap": null,
1107 | "grid_row": null,
1108 | "grid_template_areas": null,
1109 | "grid_template_columns": null,
1110 | "grid_template_rows": null,
1111 | "height": null,
1112 | "justify_content": null,
1113 | "justify_items": null,
1114 | "left": null,
1115 | "margin": null,
1116 | "max_height": null,
1117 | "max_width": null,
1118 | "min_height": null,
1119 | "min_width": null,
1120 | "object_fit": null,
1121 | "object_position": null,
1122 | "order": null,
1123 | "overflow": null,
1124 | "overflow_x": null,
1125 | "overflow_y": null,
1126 | "padding": null,
1127 | "right": null,
1128 | "top": null,
1129 | "visibility": null,
1130 | "width": null
1131 | }
1132 | },
1133 | "8d9fe715090c47eab07831ee0bc34a20": {
1134 | "model_module": "@jupyter-widgets/controls",
1135 | "model_module_version": "1.5.0",
1136 | "model_name": "HTMLModel",
1137 | "state": {
1138 | "_dom_classes": [],
1139 | "_model_module": "@jupyter-widgets/controls",
1140 | "_model_module_version": "1.5.0",
1141 | "_model_name": "HTMLModel",
1142 | "_view_count": null,
1143 | "_view_module": "@jupyter-widgets/controls",
1144 | "_view_module_version": "1.5.0",
1145 | "_view_name": "HTMLView",
1146 | "description": "",
1147 | "description_tooltip": null,
1148 | "layout": "IPY_MODEL_8af543001d8f4ad887dd8aeb0776a871",
1149 | "placeholder": "",
1150 | "style": "IPY_MODEL_fa30c5c766914ae2b158324bf790a239",
1151 | "value": " 112/112 [00:00<00:00, 4.15kB/s]"
1152 | }
1153 | },
1154 | "905086bf6d5c4418916175d20d8a12b1": {
1155 | "model_module": "@jupyter-widgets/controls",
1156 | "model_module_version": "1.5.0",
1157 | "model_name": "FloatProgressModel",
1158 | "state": {
1159 | "_dom_classes": [],
1160 | "_model_module": "@jupyter-widgets/controls",
1161 | "_model_module_version": "1.5.0",
1162 | "_model_name": "FloatProgressModel",
1163 | "_view_count": null,
1164 | "_view_module": "@jupyter-widgets/controls",
1165 | "_view_module_version": "1.5.0",
1166 | "_view_name": "ProgressView",
1167 | "bar_style": "success",
1168 | "description": "",
1169 | "description_tooltip": null,
1170 | "layout": "IPY_MODEL_ec3e7e4f488c41f2beb8e78e5adb75e1",
1171 | "max": 316,
1172 | "min": 0,
1173 | "orientation": "horizontal",
1174 | "style": "IPY_MODEL_a8448a72f95c4ccaa03960dc68b246e1",
1175 | "value": 316
1176 | }
1177 | },
1178 | "912ba07cdadb42f788bafb5ff2823bb7": {
1179 | "model_module": "@jupyter-widgets/controls",
1180 | "model_module_version": "1.5.0",
1181 | "model_name": "DescriptionStyleModel",
1182 | "state": {
1183 | "_model_module": "@jupyter-widgets/controls",
1184 | "_model_module_version": "1.5.0",
1185 | "_model_name": "DescriptionStyleModel",
1186 | "_view_count": null,
1187 | "_view_module": "@jupyter-widgets/base",
1188 | "_view_module_version": "1.2.0",
1189 | "_view_name": "StyleView",
1190 | "description_width": ""
1191 | }
1192 | },
1193 | "95a901cfb5114d86a4b7c30508d567a8": {
1194 | "model_module": "@jupyter-widgets/base",
1195 | "model_module_version": "1.2.0",
1196 | "model_name": "LayoutModel",
1197 | "state": {
1198 | "_model_module": "@jupyter-widgets/base",
1199 | "_model_module_version": "1.2.0",
1200 | "_model_name": "LayoutModel",
1201 | "_view_count": null,
1202 | "_view_module": "@jupyter-widgets/base",
1203 | "_view_module_version": "1.2.0",
1204 | "_view_name": "LayoutView",
1205 | "align_content": null,
1206 | "align_items": null,
1207 | "align_self": null,
1208 | "border": null,
1209 | "bottom": null,
1210 | "display": null,
1211 | "flex": null,
1212 | "flex_flow": null,
1213 | "grid_area": null,
1214 | "grid_auto_columns": null,
1215 | "grid_auto_flow": null,
1216 | "grid_auto_rows": null,
1217 | "grid_column": null,
1218 | "grid_gap": null,
1219 | "grid_row": null,
1220 | "grid_template_areas": null,
1221 | "grid_template_columns": null,
1222 | "grid_template_rows": null,
1223 | "height": null,
1224 | "justify_content": null,
1225 | "justify_items": null,
1226 | "left": null,
1227 | "margin": null,
1228 | "max_height": null,
1229 | "max_width": null,
1230 | "min_height": null,
1231 | "min_width": null,
1232 | "object_fit": null,
1233 | "object_position": null,
1234 | "order": null,
1235 | "overflow": null,
1236 | "overflow_x": null,
1237 | "overflow_y": null,
1238 | "padding": null,
1239 | "right": null,
1240 | "top": null,
1241 | "visibility": null,
1242 | "width": null
1243 | }
1244 | },
1245 | "98273030cd864920825a7648597cb686": {
1246 | "model_module": "@jupyter-widgets/base",
1247 | "model_module_version": "1.2.0",
1248 | "model_name": "LayoutModel",
1249 | "state": {
1250 | "_model_module": "@jupyter-widgets/base",
1251 | "_model_module_version": "1.2.0",
1252 | "_model_name": "LayoutModel",
1253 | "_view_count": null,
1254 | "_view_module": "@jupyter-widgets/base",
1255 | "_view_module_version": "1.2.0",
1256 | "_view_name": "LayoutView",
1257 | "align_content": null,
1258 | "align_items": null,
1259 | "align_self": null,
1260 | "border": null,
1261 | "bottom": null,
1262 | "display": null,
1263 | "flex": null,
1264 | "flex_flow": null,
1265 | "grid_area": null,
1266 | "grid_auto_columns": null,
1267 | "grid_auto_flow": null,
1268 | "grid_auto_rows": null,
1269 | "grid_column": null,
1270 | "grid_gap": null,
1271 | "grid_row": null,
1272 | "grid_template_areas": null,
1273 | "grid_template_columns": null,
1274 | "grid_template_rows": null,
1275 | "height": null,
1276 | "justify_content": null,
1277 | "justify_items": null,
1278 | "left": null,
1279 | "margin": null,
1280 | "max_height": null,
1281 | "max_width": null,
1282 | "min_height": null,
1283 | "min_width": null,
1284 | "object_fit": null,
1285 | "object_position": null,
1286 | "order": null,
1287 | "overflow": null,
1288 | "overflow_x": null,
1289 | "overflow_y": null,
1290 | "padding": null,
1291 | "right": null,
1292 | "top": null,
1293 | "visibility": null,
1294 | "width": null
1295 | }
1296 | },
1297 | "992b6f13c18e41a5b12c8d53c72e2794": {
1298 | "model_module": "@jupyter-widgets/controls",
1299 | "model_module_version": "1.5.0",
1300 | "model_name": "HBoxModel",
1301 | "state": {
1302 | "_dom_classes": [],
1303 | "_model_module": "@jupyter-widgets/controls",
1304 | "_model_module_version": "1.5.0",
1305 | "_model_name": "HBoxModel",
1306 | "_view_count": null,
1307 | "_view_module": "@jupyter-widgets/controls",
1308 | "_view_module_version": "1.5.0",
1309 | "_view_name": "HBoxView",
1310 | "box_style": "",
1311 | "children": [
1312 | "IPY_MODEL_74ebcd701a2d41fd8f91cd4823e5410e",
1313 | "IPY_MODEL_f4bbd4ca186b4eecb43b06b5b80183ee",
1314 | "IPY_MODEL_e21beed8f64745a7830bb86a0be547a7"
1315 | ],
1316 | "layout": "IPY_MODEL_5d2c500f1f404603911a8e17265de46a"
1317 | }
1318 | },
1319 | "a4b70d2d1f934264bc9602b9796021bf": {
1320 | "model_module": "@jupyter-widgets/controls",
1321 | "model_module_version": "1.5.0",
1322 | "model_name": "FloatProgressModel",
1323 | "state": {
1324 | "_dom_classes": [],
1325 | "_model_module": "@jupyter-widgets/controls",
1326 | "_model_module_version": "1.5.0",
1327 | "_model_name": "FloatProgressModel",
1328 | "_view_count": null,
1329 | "_view_module": "@jupyter-widgets/controls",
1330 | "_view_module_version": "1.5.0",
1331 | "_view_name": "ProgressView",
1332 | "bar_style": "success",
1333 | "description": "",
1334 | "description_tooltip": null,
1335 | "layout": "IPY_MODEL_27c0dd4a616a4f10b05b4feb9a147786",
1336 | "max": 791,
1337 | "min": 0,
1338 | "orientation": "horizontal",
1339 | "style": "IPY_MODEL_306bf26960f5461591c07c142cd2e4b4",
1340 | "value": 791
1341 | }
1342 | },
1343 | "a8448a72f95c4ccaa03960dc68b246e1": {
1344 | "model_module": "@jupyter-widgets/controls",
1345 | "model_module_version": "1.5.0",
1346 | "model_name": "ProgressStyleModel",
1347 | "state": {
1348 | "_model_module": "@jupyter-widgets/controls",
1349 | "_model_module_version": "1.5.0",
1350 | "_model_name": "ProgressStyleModel",
1351 | "_view_count": null,
1352 | "_view_module": "@jupyter-widgets/base",
1353 | "_view_module_version": "1.2.0",
1354 | "_view_name": "StyleView",
1355 | "bar_color": null,
1356 | "description_width": ""
1357 | }
1358 | },
1359 | "b09195e6e84447ad8573108ab2b6d332": {
1360 | "model_module": "@jupyter-widgets/controls",
1361 | "model_module_version": "1.5.0",
1362 | "model_name": "DescriptionStyleModel",
1363 | "state": {
1364 | "_model_module": "@jupyter-widgets/controls",
1365 | "_model_module_version": "1.5.0",
1366 | "_model_name": "DescriptionStyleModel",
1367 | "_view_count": null,
1368 | "_view_module": "@jupyter-widgets/base",
1369 | "_view_module_version": "1.2.0",
1370 | "_view_name": "StyleView",
1371 | "description_width": ""
1372 | }
1373 | },
1374 | "b16d2e5f754748cebcd3f12996d397a4": {
1375 | "model_module": "@jupyter-widgets/base",
1376 | "model_module_version": "1.2.0",
1377 | "model_name": "LayoutModel",
1378 | "state": {
1379 | "_model_module": "@jupyter-widgets/base",
1380 | "_model_module_version": "1.2.0",
1381 | "_model_name": "LayoutModel",
1382 | "_view_count": null,
1383 | "_view_module": "@jupyter-widgets/base",
1384 | "_view_module_version": "1.2.0",
1385 | "_view_name": "LayoutView",
1386 | "align_content": null,
1387 | "align_items": null,
1388 | "align_self": null,
1389 | "border": null,
1390 | "bottom": null,
1391 | "display": null,
1392 | "flex": null,
1393 | "flex_flow": null,
1394 | "grid_area": null,
1395 | "grid_auto_columns": null,
1396 | "grid_auto_flow": null,
1397 | "grid_auto_rows": null,
1398 | "grid_column": null,
1399 | "grid_gap": null,
1400 | "grid_row": null,
1401 | "grid_template_areas": null,
1402 | "grid_template_columns": null,
1403 | "grid_template_rows": null,
1404 | "height": null,
1405 | "justify_content": null,
1406 | "justify_items": null,
1407 | "left": null,
1408 | "margin": null,
1409 | "max_height": null,
1410 | "max_width": null,
1411 | "min_height": null,
1412 | "min_width": null,
1413 | "object_fit": null,
1414 | "object_position": null,
1415 | "order": null,
1416 | "overflow": null,
1417 | "overflow_x": null,
1418 | "overflow_y": null,
1419 | "padding": null,
1420 | "right": null,
1421 | "top": null,
1422 | "visibility": null,
1423 | "width": null
1424 | }
1425 | },
1426 | "bba613ed1fa64880a68a427d0422edd9": {
1427 | "model_module": "@jupyter-widgets/controls",
1428 | "model_module_version": "1.5.0",
1429 | "model_name": "HTMLModel",
1430 | "state": {
1431 | "_dom_classes": [],
1432 | "_model_module": "@jupyter-widgets/controls",
1433 | "_model_module_version": "1.5.0",
1434 | "_model_name": "HTMLModel",
1435 | "_view_count": null,
1436 | "_view_module": "@jupyter-widgets/controls",
1437 | "_view_module_version": "1.5.0",
1438 | "_view_name": "HTMLView",
1439 | "description": "",
1440 | "description_tooltip": null,
1441 | "layout": "IPY_MODEL_177be2dd62954c02894d0d49b9e8625e",
1442 | "placeholder": "",
1443 | "style": "IPY_MODEL_278bd7aa89ed4a5e9020bbcafa7fba6c",
1444 | "value": " 791/791 [00:00<00:00, 20.5kB/s]"
1445 | }
1446 | },
1447 | "befbff68d4664ae0bb16b453b94036d7": {
1448 | "model_module": "@jupyter-widgets/controls",
1449 | "model_module_version": "1.5.0",
1450 | "model_name": "DescriptionStyleModel",
1451 | "state": {
1452 | "_model_module": "@jupyter-widgets/controls",
1453 | "_model_module_version": "1.5.0",
1454 | "_model_name": "DescriptionStyleModel",
1455 | "_view_count": null,
1456 | "_view_module": "@jupyter-widgets/base",
1457 | "_view_module_version": "1.2.0",
1458 | "_view_name": "StyleView",
1459 | "description_width": ""
1460 | }
1461 | },
1462 | "c046065b830d42ffb8fba239d2af4c7a": {
1463 | "model_module": "@jupyter-widgets/controls",
1464 | "model_module_version": "1.5.0",
1465 | "model_name": "FloatProgressModel",
1466 | "state": {
1467 | "_dom_classes": [],
1468 | "_model_module": "@jupyter-widgets/controls",
1469 | "_model_module_version": "1.5.0",
1470 | "_model_name": "FloatProgressModel",
1471 | "_view_count": null,
1472 | "_view_module": "@jupyter-widgets/controls",
1473 | "_view_module_version": "1.5.0",
1474 | "_view_name": "ProgressView",
1475 | "bar_style": "success",
1476 | "description": "",
1477 | "description_tooltip": null,
1478 | "layout": "IPY_MODEL_95a901cfb5114d86a4b7c30508d567a8",
1479 | "max": 112,
1480 | "min": 0,
1481 | "orientation": "horizontal",
1482 | "style": "IPY_MODEL_2e04db8b476649a785e9b4d6601c16ec",
1483 | "value": 112
1484 | }
1485 | },
1486 | "c61e39fc5a75454c96ba62ad0f972f51": {
1487 | "model_module": "@jupyter-widgets/base",
1488 | "model_module_version": "1.2.0",
1489 | "model_name": "LayoutModel",
1490 | "state": {
1491 | "_model_module": "@jupyter-widgets/base",
1492 | "_model_module_version": "1.2.0",
1493 | "_model_name": "LayoutModel",
1494 | "_view_count": null,
1495 | "_view_module": "@jupyter-widgets/base",
1496 | "_view_module_version": "1.2.0",
1497 | "_view_name": "LayoutView",
1498 | "align_content": null,
1499 | "align_items": null,
1500 | "align_self": null,
1501 | "border": null,
1502 | "bottom": null,
1503 | "display": null,
1504 | "flex": null,
1505 | "flex_flow": null,
1506 | "grid_area": null,
1507 | "grid_auto_columns": null,
1508 | "grid_auto_flow": null,
1509 | "grid_auto_rows": null,
1510 | "grid_column": null,
1511 | "grid_gap": null,
1512 | "grid_row": null,
1513 | "grid_template_areas": null,
1514 | "grid_template_columns": null,
1515 | "grid_template_rows": null,
1516 | "height": null,
1517 | "justify_content": null,
1518 | "justify_items": null,
1519 | "left": null,
1520 | "margin": null,
1521 | "max_height": null,
1522 | "max_width": null,
1523 | "min_height": null,
1524 | "min_width": null,
1525 | "object_fit": null,
1526 | "object_position": null,
1527 | "order": null,
1528 | "overflow": null,
1529 | "overflow_x": null,
1530 | "overflow_y": null,
1531 | "padding": null,
1532 | "right": null,
1533 | "top": null,
1534 | "visibility": null,
1535 | "width": null
1536 | }
1537 | },
1538 | "ce4ba9bf00954b91a35b8e9286fa09a1": {
1539 | "model_module": "@jupyter-widgets/base",
1540 | "model_module_version": "1.2.0",
1541 | "model_name": "LayoutModel",
1542 | "state": {
1543 | "_model_module": "@jupyter-widgets/base",
1544 | "_model_module_version": "1.2.0",
1545 | "_model_name": "LayoutModel",
1546 | "_view_count": null,
1547 | "_view_module": "@jupyter-widgets/base",
1548 | "_view_module_version": "1.2.0",
1549 | "_view_name": "LayoutView",
1550 | "align_content": null,
1551 | "align_items": null,
1552 | "align_self": null,
1553 | "border": null,
1554 | "bottom": null,
1555 | "display": null,
1556 | "flex": null,
1557 | "flex_flow": null,
1558 | "grid_area": null,
1559 | "grid_auto_columns": null,
1560 | "grid_auto_flow": null,
1561 | "grid_auto_rows": null,
1562 | "grid_column": null,
1563 | "grid_gap": null,
1564 | "grid_row": null,
1565 | "grid_template_areas": null,
1566 | "grid_template_columns": null,
1567 | "grid_template_rows": null,
1568 | "height": null,
1569 | "justify_content": null,
1570 | "justify_items": null,
1571 | "left": null,
1572 | "margin": null,
1573 | "max_height": null,
1574 | "max_width": null,
1575 | "min_height": null,
1576 | "min_width": null,
1577 | "object_fit": null,
1578 | "object_position": null,
1579 | "order": null,
1580 | "overflow": null,
1581 | "overflow_x": null,
1582 | "overflow_y": null,
1583 | "padding": null,
1584 | "right": null,
1585 | "top": null,
1586 | "visibility": null,
1587 | "width": null
1588 | }
1589 | },
1590 | "d38d00434ce5496f901640976bd3e43a": {
1591 | "model_module": "@jupyter-widgets/base",
1592 | "model_module_version": "1.2.0",
1593 | "model_name": "LayoutModel",
1594 | "state": {
1595 | "_model_module": "@jupyter-widgets/base",
1596 | "_model_module_version": "1.2.0",
1597 | "_model_name": "LayoutModel",
1598 | "_view_count": null,
1599 | "_view_module": "@jupyter-widgets/base",
1600 | "_view_module_version": "1.2.0",
1601 | "_view_name": "LayoutView",
1602 | "align_content": null,
1603 | "align_items": null,
1604 | "align_self": null,
1605 | "border": null,
1606 | "bottom": null,
1607 | "display": null,
1608 | "flex": null,
1609 | "flex_flow": null,
1610 | "grid_area": null,
1611 | "grid_auto_columns": null,
1612 | "grid_auto_flow": null,
1613 | "grid_auto_rows": null,
1614 | "grid_column": null,
1615 | "grid_gap": null,
1616 | "grid_row": null,
1617 | "grid_template_areas": null,
1618 | "grid_template_columns": null,
1619 | "grid_template_rows": null,
1620 | "height": null,
1621 | "justify_content": null,
1622 | "justify_items": null,
1623 | "left": null,
1624 | "margin": null,
1625 | "max_height": null,
1626 | "max_width": null,
1627 | "min_height": null,
1628 | "min_width": null,
1629 | "object_fit": null,
1630 | "object_position": null,
1631 | "order": null,
1632 | "overflow": null,
1633 | "overflow_x": null,
1634 | "overflow_y": null,
1635 | "padding": null,
1636 | "right": null,
1637 | "top": null,
1638 | "visibility": null,
1639 | "width": null
1640 | }
1641 | },
1642 | "dbf6be4c7ae8432b97472c573793c4fe": {
1643 | "model_module": "@jupyter-widgets/controls",
1644 | "model_module_version": "1.5.0",
1645 | "model_name": "HBoxModel",
1646 | "state": {
1647 | "_dom_classes": [],
1648 | "_model_module": "@jupyter-widgets/controls",
1649 | "_model_module_version": "1.5.0",
1650 | "_model_name": "HBoxModel",
1651 | "_view_count": null,
1652 | "_view_module": "@jupyter-widgets/controls",
1653 | "_view_module_version": "1.5.0",
1654 | "_view_name": "HBoxView",
1655 | "box_style": "",
1656 | "children": [
1657 | "IPY_MODEL_e057fa5920eb4044b37c27aedf2ebc10",
1658 | "IPY_MODEL_a4b70d2d1f934264bc9602b9796021bf",
1659 | "IPY_MODEL_bba613ed1fa64880a68a427d0422edd9"
1660 | ],
1661 | "layout": "IPY_MODEL_e5b12cf54e08497e8224b535951ad212"
1662 | }
1663 | },
1664 | "dc573e8f87f64292b3861bb98688c848": {
1665 | "model_module": "@jupyter-widgets/controls",
1666 | "model_module_version": "1.5.0",
1667 | "model_name": "ProgressStyleModel",
1668 | "state": {
1669 | "_model_module": "@jupyter-widgets/controls",
1670 | "_model_module_version": "1.5.0",
1671 | "_model_name": "ProgressStyleModel",
1672 | "_view_count": null,
1673 | "_view_module": "@jupyter-widgets/base",
1674 | "_view_module_version": "1.2.0",
1675 | "_view_name": "StyleView",
1676 | "bar_color": null,
1677 | "description_width": ""
1678 | }
1679 | },
1680 | "dcd1f01a5bee4dc4a872579856bfe1c1": {
1681 | "model_module": "@jupyter-widgets/controls",
1682 | "model_module_version": "1.5.0",
1683 | "model_name": "ProgressStyleModel",
1684 | "state": {
1685 | "_model_module": "@jupyter-widgets/controls",
1686 | "_model_module_version": "1.5.0",
1687 | "_model_name": "ProgressStyleModel",
1688 | "_view_count": null,
1689 | "_view_module": "@jupyter-widgets/base",
1690 | "_view_module_version": "1.2.0",
1691 | "_view_name": "StyleView",
1692 | "bar_color": null,
1693 | "description_width": ""
1694 | }
1695 | },
1696 | "e057fa5920eb4044b37c27aedf2ebc10": {
1697 | "model_module": "@jupyter-widgets/controls",
1698 | "model_module_version": "1.5.0",
1699 | "model_name": "HTMLModel",
1700 | "state": {
1701 | "_dom_classes": [],
1702 | "_model_module": "@jupyter-widgets/controls",
1703 | "_model_module_version": "1.5.0",
1704 | "_model_name": "HTMLModel",
1705 | "_view_count": null,
1706 | "_view_module": "@jupyter-widgets/controls",
1707 | "_view_module_version": "1.5.0",
1708 | "_view_name": "HTMLView",
1709 | "description": "",
1710 | "description_tooltip": null,
1711 | "layout": "IPY_MODEL_ea99016cff8a4d83aa568eb492d95350",
1712 | "placeholder": "",
1713 | "style": "IPY_MODEL_f3438533609f46c0ac3090e8ed0eaa45",
1714 | "value": "config.json: 100%"
1715 | }
1716 | },
1717 | "e21beed8f64745a7830bb86a0be547a7": {
1718 | "model_module": "@jupyter-widgets/controls",
1719 | "model_module_version": "1.5.0",
1720 | "model_name": "HTMLModel",
1721 | "state": {
1722 | "_dom_classes": [],
1723 | "_model_module": "@jupyter-widgets/controls",
1724 | "_model_module_version": "1.5.0",
1725 | "_model_name": "HTMLModel",
1726 | "_view_count": null,
1727 | "_view_module": "@jupyter-widgets/controls",
1728 | "_view_module_version": "1.5.0",
1729 | "_view_name": "HTMLView",
1730 | "description": "",
1731 | "description_tooltip": null,
1732 | "layout": "IPY_MODEL_82f02d6db6a74b0086beb9771eb45a81",
1733 | "placeholder": "",
1734 | "style": "IPY_MODEL_7ee0a510bf084edabd07fbb7a0af2cbc",
1735 | "value": " 134M/134M [00:01<00:00, 168MB/s]"
1736 | }
1737 | },
1738 | "e3fae2ff299e42eab96475c1e7d03267": {
1739 | "model_module": "@jupyter-widgets/controls",
1740 | "model_module_version": "1.5.0",
1741 | "model_name": "HBoxModel",
1742 | "state": {
1743 | "_dom_classes": [],
1744 | "_model_module": "@jupyter-widgets/controls",
1745 | "_model_module_version": "1.5.0",
1746 | "_model_name": "HBoxModel",
1747 | "_view_count": null,
1748 | "_view_module": "@jupyter-widgets/controls",
1749 | "_view_module_version": "1.5.0",
1750 | "_view_name": "HBoxView",
1751 | "box_style": "",
1752 | "children": [
1753 | "IPY_MODEL_5f5090841ceb42cbb743c61080dcef24",
1754 | "IPY_MODEL_905086bf6d5c4418916175d20d8a12b1",
1755 | "IPY_MODEL_6b9afb8789744ab0b0f1514fe43bbd1a"
1756 | ],
1757 | "layout": "IPY_MODEL_d38d00434ce5496f901640976bd3e43a"
1758 | }
1759 | },
1760 | "e5b12cf54e08497e8224b535951ad212": {
1761 | "model_module": "@jupyter-widgets/base",
1762 | "model_module_version": "1.2.0",
1763 | "model_name": "LayoutModel",
1764 | "state": {
1765 | "_model_module": "@jupyter-widgets/base",
1766 | "_model_module_version": "1.2.0",
1767 | "_model_name": "LayoutModel",
1768 | "_view_count": null,
1769 | "_view_module": "@jupyter-widgets/base",
1770 | "_view_module_version": "1.2.0",
1771 | "_view_name": "LayoutView",
1772 | "align_content": null,
1773 | "align_items": null,
1774 | "align_self": null,
1775 | "border": null,
1776 | "bottom": null,
1777 | "display": null,
1778 | "flex": null,
1779 | "flex_flow": null,
1780 | "grid_area": null,
1781 | "grid_auto_columns": null,
1782 | "grid_auto_flow": null,
1783 | "grid_auto_rows": null,
1784 | "grid_column": null,
1785 | "grid_gap": null,
1786 | "grid_row": null,
1787 | "grid_template_areas": null,
1788 | "grid_template_columns": null,
1789 | "grid_template_rows": null,
1790 | "height": null,
1791 | "justify_content": null,
1792 | "justify_items": null,
1793 | "left": null,
1794 | "margin": null,
1795 | "max_height": null,
1796 | "max_width": null,
1797 | "min_height": null,
1798 | "min_width": null,
1799 | "object_fit": null,
1800 | "object_position": null,
1801 | "order": null,
1802 | "overflow": null,
1803 | "overflow_x": null,
1804 | "overflow_y": null,
1805 | "padding": null,
1806 | "right": null,
1807 | "top": null,
1808 | "visibility": null,
1809 | "width": null
1810 | }
1811 | },
1812 | "e6a12e9ec06448a989bfb555c29c45b9": {
1813 | "model_module": "@jupyter-widgets/controls",
1814 | "model_module_version": "1.5.0",
1815 | "model_name": "DescriptionStyleModel",
1816 | "state": {
1817 | "_model_module": "@jupyter-widgets/controls",
1818 | "_model_module_version": "1.5.0",
1819 | "_model_name": "DescriptionStyleModel",
1820 | "_view_count": null,
1821 | "_view_module": "@jupyter-widgets/base",
1822 | "_view_module_version": "1.2.0",
1823 | "_view_name": "StyleView",
1824 | "description_width": ""
1825 | }
1826 | },
1827 | "e8562c35e1974a1ba434698bc9fe74bf": {
1828 | "model_module": "@jupyter-widgets/base",
1829 | "model_module_version": "1.2.0",
1830 | "model_name": "LayoutModel",
1831 | "state": {
1832 | "_model_module": "@jupyter-widgets/base",
1833 | "_model_module_version": "1.2.0",
1834 | "_model_name": "LayoutModel",
1835 | "_view_count": null,
1836 | "_view_module": "@jupyter-widgets/base",
1837 | "_view_module_version": "1.2.0",
1838 | "_view_name": "LayoutView",
1839 | "align_content": null,
1840 | "align_items": null,
1841 | "align_self": null,
1842 | "border": null,
1843 | "bottom": null,
1844 | "display": null,
1845 | "flex": null,
1846 | "flex_flow": null,
1847 | "grid_area": null,
1848 | "grid_auto_columns": null,
1849 | "grid_auto_flow": null,
1850 | "grid_auto_rows": null,
1851 | "grid_column": null,
1852 | "grid_gap": null,
1853 | "grid_row": null,
1854 | "grid_template_areas": null,
1855 | "grid_template_columns": null,
1856 | "grid_template_rows": null,
1857 | "height": null,
1858 | "justify_content": null,
1859 | "justify_items": null,
1860 | "left": null,
1861 | "margin": null,
1862 | "max_height": null,
1863 | "max_width": null,
1864 | "min_height": null,
1865 | "min_width": null,
1866 | "object_fit": null,
1867 | "object_position": null,
1868 | "order": null,
1869 | "overflow": null,
1870 | "overflow_x": null,
1871 | "overflow_y": null,
1872 | "padding": null,
1873 | "right": null,
1874 | "top": null,
1875 | "visibility": null,
1876 | "width": null
1877 | }
1878 | },
1879 | "ea99016cff8a4d83aa568eb492d95350": {
1880 | "model_module": "@jupyter-widgets/base",
1881 | "model_module_version": "1.2.0",
1882 | "model_name": "LayoutModel",
1883 | "state": {
1884 | "_model_module": "@jupyter-widgets/base",
1885 | "_model_module_version": "1.2.0",
1886 | "_model_name": "LayoutModel",
1887 | "_view_count": null,
1888 | "_view_module": "@jupyter-widgets/base",
1889 | "_view_module_version": "1.2.0",
1890 | "_view_name": "LayoutView",
1891 | "align_content": null,
1892 | "align_items": null,
1893 | "align_self": null,
1894 | "border": null,
1895 | "bottom": null,
1896 | "display": null,
1897 | "flex": null,
1898 | "flex_flow": null,
1899 | "grid_area": null,
1900 | "grid_auto_columns": null,
1901 | "grid_auto_flow": null,
1902 | "grid_auto_rows": null,
1903 | "grid_column": null,
1904 | "grid_gap": null,
1905 | "grid_row": null,
1906 | "grid_template_areas": null,
1907 | "grid_template_columns": null,
1908 | "grid_template_rows": null,
1909 | "height": null,
1910 | "justify_content": null,
1911 | "justify_items": null,
1912 | "left": null,
1913 | "margin": null,
1914 | "max_height": null,
1915 | "max_width": null,
1916 | "min_height": null,
1917 | "min_width": null,
1918 | "object_fit": null,
1919 | "object_position": null,
1920 | "order": null,
1921 | "overflow": null,
1922 | "overflow_x": null,
1923 | "overflow_y": null,
1924 | "padding": null,
1925 | "right": null,
1926 | "top": null,
1927 | "visibility": null,
1928 | "width": null
1929 | }
1930 | },
1931 | "ec3e7e4f488c41f2beb8e78e5adb75e1": {
1932 | "model_module": "@jupyter-widgets/base",
1933 | "model_module_version": "1.2.0",
1934 | "model_name": "LayoutModel",
1935 | "state": {
1936 | "_model_module": "@jupyter-widgets/base",
1937 | "_model_module_version": "1.2.0",
1938 | "_model_name": "LayoutModel",
1939 | "_view_count": null,
1940 | "_view_module": "@jupyter-widgets/base",
1941 | "_view_module_version": "1.2.0",
1942 | "_view_name": "LayoutView",
1943 | "align_content": null,
1944 | "align_items": null,
1945 | "align_self": null,
1946 | "border": null,
1947 | "bottom": null,
1948 | "display": null,
1949 | "flex": null,
1950 | "flex_flow": null,
1951 | "grid_area": null,
1952 | "grid_auto_columns": null,
1953 | "grid_auto_flow": null,
1954 | "grid_auto_rows": null,
1955 | "grid_column": null,
1956 | "grid_gap": null,
1957 | "grid_row": null,
1958 | "grid_template_areas": null,
1959 | "grid_template_columns": null,
1960 | "grid_template_rows": null,
1961 | "height": null,
1962 | "justify_content": null,
1963 | "justify_items": null,
1964 | "left": null,
1965 | "margin": null,
1966 | "max_height": null,
1967 | "max_width": null,
1968 | "min_height": null,
1969 | "min_width": null,
1970 | "object_fit": null,
1971 | "object_position": null,
1972 | "order": null,
1973 | "overflow": null,
1974 | "overflow_x": null,
1975 | "overflow_y": null,
1976 | "padding": null,
1977 | "right": null,
1978 | "top": null,
1979 | "visibility": null,
1980 | "width": null
1981 | }
1982 | },
1983 | "f2836e2bc9fd4de590f7db1dfa7b62f2": {
1984 | "model_module": "@jupyter-widgets/controls",
1985 | "model_module_version": "1.5.0",
1986 | "model_name": "DescriptionStyleModel",
1987 | "state": {
1988 | "_model_module": "@jupyter-widgets/controls",
1989 | "_model_module_version": "1.5.0",
1990 | "_model_name": "DescriptionStyleModel",
1991 | "_view_count": null,
1992 | "_view_module": "@jupyter-widgets/base",
1993 | "_view_module_version": "1.2.0",
1994 | "_view_name": "StyleView",
1995 | "description_width": ""
1996 | }
1997 | },
1998 | "f3438533609f46c0ac3090e8ed0eaa45": {
1999 | "model_module": "@jupyter-widgets/controls",
2000 | "model_module_version": "1.5.0",
2001 | "model_name": "DescriptionStyleModel",
2002 | "state": {
2003 | "_model_module": "@jupyter-widgets/controls",
2004 | "_model_module_version": "1.5.0",
2005 | "_model_name": "DescriptionStyleModel",
2006 | "_view_count": null,
2007 | "_view_module": "@jupyter-widgets/base",
2008 | "_view_module_version": "1.2.0",
2009 | "_view_name": "StyleView",
2010 | "description_width": ""
2011 | }
2012 | },
2013 | "f4bbd4ca186b4eecb43b06b5b80183ee": {
2014 | "model_module": "@jupyter-widgets/controls",
2015 | "model_module_version": "1.5.0",
2016 | "model_name": "FloatProgressModel",
2017 | "state": {
2018 | "_dom_classes": [],
2019 | "_model_module": "@jupyter-widgets/controls",
2020 | "_model_module_version": "1.5.0",
2021 | "_model_name": "FloatProgressModel",
2022 | "_view_count": null,
2023 | "_view_module": "@jupyter-widgets/controls",
2024 | "_view_module_version": "1.5.0",
2025 | "_view_name": "ProgressView",
2026 | "bar_style": "success",
2027 | "description": "",
2028 | "description_tooltip": null,
2029 | "layout": "IPY_MODEL_5033346b251342d38691077e6b7e6efc",
2030 | "max": 133530889,
2031 | "min": 0,
2032 | "orientation": "horizontal",
2033 | "style": "IPY_MODEL_dc573e8f87f64292b3861bb98688c848",
2034 | "value": 133530889
2035 | }
2036 | },
2037 | "fa30c5c766914ae2b158324bf790a239": {
2038 | "model_module": "@jupyter-widgets/controls",
2039 | "model_module_version": "1.5.0",
2040 | "model_name": "DescriptionStyleModel",
2041 | "state": {
2042 | "_model_module": "@jupyter-widgets/controls",
2043 | "_model_module_version": "1.5.0",
2044 | "_model_name": "DescriptionStyleModel",
2045 | "_view_count": null,
2046 | "_view_module": "@jupyter-widgets/base",
2047 | "_view_module_version": "1.2.0",
2048 | "_view_name": "StyleView",
2049 | "description_width": ""
2050 | }
2051 | }
2052 | }
2053 | }
2054 | },
2055 | "nbformat": 4,
2056 | "nbformat_minor": 0
2057 | }
2058 |
--------------------------------------------------------------------------------
/notebooks/Document_Chunks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | "\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {
16 | "id": "L7ww6Lkqarb_"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%%capture\n",
21 | "\n",
22 | "%pip install langchain\n",
23 | "import requests\n",
24 | "from langchain.text_splitter import RecursiveCharacterTextSplitter"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "metadata": {
31 | "id": "_pyQ4wZHazwj"
32 | },
33 | "outputs": [],
34 | "source": [
35 | "response = requests.get(\"https://www.gutenberg.org/cache/epub/72392/pg72392.txt\")\n",
36 | "book_text = response.text"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 4,
42 | "metadata": {
43 | "colab": {
44 | "base_uri": "https://localhost:8080/",
45 | "height": 54
46 | },
47 | "id": "Z3KLleqMa2ga",
48 | "outputId": "4998792c-c8d2-496e-8a45-5a4209f8d32e"
49 | },
50 | "outputs": [
51 | {
52 | "data": {
53 | "application/vnd.google.colaboratory.intrinsic+json": {
54 | "type": "string"
55 | },
56 | "text/plain": [
57 | "'\\ufeffThe Project Gutenberg eBook of The inverted pyramid\\r\\n \\r\\nThis ebook is for the use of anyone anywhere in the United States and\\r\\nmost other parts of the world at no cost and with almost no restricti'"
58 | ]
59 | },
60 | "execution_count": 4,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "# First 200 characters\n",
67 | "\n",
68 | "book_text[:200]"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 9,
74 | "metadata": {
75 | "colab": {
76 | "base_uri": "https://localhost:8080/"
77 | },
78 | "id": "dRcE-hK0a32w",
79 | "outputId": "7de14707-9253-4fe2-b28e-270b30b6a43e"
80 | },
81 | "outputs": [
82 | {
83 | "name": "stdout",
84 | "output_type": "stream",
85 | "text": [
86 | " _All rights reserved_\r\n",
87 | "------------------------------\n",
88 | "\r\n",
89 | "------------------------------\n",
90 | " Published January, 1924\r\n",
91 | "------------------------------\n",
92 | "\r\n",
93 | "------------------------------\n",
94 | "\r\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "# Naive document chunking\n",
100 | "\n",
101 | "newline_chunks = book_text.split(\"\\n\")\n",
102 | "print(newline_chunks[50])\n",
103 | "print(\"-\" * 30)\n",
104 | "print(newline_chunks[51])\n",
105 | "print(\"-\" * 30)\n",
106 | "print(newline_chunks[52])\n",
107 | "print(\"-\" * 30)\n",
108 | "print(newline_chunks[53])\n",
109 | "print(\"-\" * 30)\n",
110 | "print(newline_chunks[54])"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 10,
116 | "metadata": {
117 | "colab": {
118 | "base_uri": "https://localhost:8080/"
119 | },
120 | "id": "1G4W3wPebMjO",
121 | "outputId": "fcea4c47-cd74-428b-c072-6c581d3907c0"
122 | },
123 | "outputs": [
124 | {
125 | "name": "stdout",
126 | "output_type": "stream",
127 | "text": [
128 | "arched over the hurrying tide and the encircling mountains. Vast\r\n",
129 | "peaks, from the green-mantled cones near by, to distant pinnacles\r\n",
130 | "lifting far above timber line and capped with everlasting white.\r\n",
131 | "\r\n",
132 | "Rod did not consciously apply his intellect to considering his\r\n",
133 | "environment. He felt it. It satisfied him, filled him with an\n",
134 | "------------------------------\n",
135 | "environment. He felt it. It satisfied him, filled him with an\r\n",
136 | "indefinable sense of well-being. His people for a hundred years had\r\n",
137 | "filled their eyes with that and found it good. Against this\r\n",
138 | "background they had lived and loved and died. No matter. Rod,\r\n",
139 | "floating lazily in his canoe, was not looking backward,\r\n",
140 | "introspectively considering if he were the sum of five generations,\n",
141 | "------------------------------\n",
142 | "introspectively considering if he were the sum of five generations,\r\n",
143 | "each of which had contributed its quota to subduing a wild land to\r\n",
144 | "its use and need, to its ambition as well as to its necessity, and\r\n",
145 | "becoming one at last with that portion of the earth the first\r\n",
146 | "Roderick Norquay had made his own and handed to his sons.\r\n",
147 | "\r\n",
148 | "No, eighteen mercifully wears invisible blinkers, and Rod was no\n"
149 | ]
150 | }
151 | ],
152 | "source": [
153 | "# Recursive text splitting with chunk overlap\n",
154 | "\n",
155 | "text_splitter = RecursiveCharacterTextSplitter(\n",
156 | " chunk_size=400,\n",
157 | " chunk_overlap=100,\n",
158 | " length_function=len,\n",
159 | " is_separator_regex=False,\n",
160 | ")\n",
161 | "chunks = text_splitter.split_text(book_text)\n",
162 | "\n",
163 | "print(chunks[50])\n",
164 | "print(\"-\" * 30)\n",
165 | "print(chunks[51])\n",
166 | "print(\"-\" * 30)\n",
167 | "print(chunks[52])"
168 | ]
169 | }
170 | ],
171 | "metadata": {
172 | "colab": {
173 | "provenance": []
174 | },
175 | "kernelspec": {
176 | "display_name": "Python 3",
177 | "name": "python3"
178 | },
179 | "language_info": {
180 | "name": "python"
181 | }
182 | },
183 | "nbformat": 4,
184 | "nbformat_minor": 0
185 | }
186 |
--------------------------------------------------------------------------------
/notebooks/LSH.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | "\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "id": "EkcnqMr7crU7"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np\n",
21 | "from sklearn.random_projection import SparseRandomProjection\n",
22 | "from sklearn.metrics.pairwise import cosine_similarity"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {
29 | "colab": {
30 | "base_uri": "https://localhost:8080/"
31 | },
32 | "id": "rVo11ZgscuIJ",
33 | "outputId": "0d5a458d-da4d-406a-c76e-411f0cca6025"
34 | },
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "Vector 0: [ 0.49671415 -0.1382643 0.64768854 1.52302986 -0.23415337]... Hash: 100\n",
41 | "Vector 1: [-1.41537074 -0.42064532 -0.34271452 -0.80227727 -0.16128571]... Hash: 001\n",
42 | "Vector 2: [ 0.35778736 0.56078453 1.08305124 1.05380205 -1.37766937]... Hash: 110\n",
43 | "Vector 3: [-0.82899501 -0.56018104 0.74729361 0.61037027 -0.02090159]... Hash: 111\n",
44 | "Vector 4: [-1.59442766 -0.59937502 0.0052437 0.04698059 -0.45006547]... Hash: 100\n",
45 | "Vector 5: [ 0.92617755 1.90941664 -1.39856757 0.56296924 -0.65064257]... Hash: 110\n",
46 | "Vector 6: [ 0.75698862 -0.92216532 0.86960592 1.35563786 0.4134349 ]... Hash: 001\n",
47 | "Vector 7: [-0.52272302 1.04900923 -0.70434369 -1.4084613 -1.55662917]... Hash: 111\n",
48 | "Vector 8: [ 0.93828381 -0.51604473 0.09612078 -0.46227529 -0.43449623]... Hash: 001\n",
49 | "Vector 9: [ 0.36867331 -0.39333881 0.02874482 1.27845186 0.19109907]... Hash: 001\n",
50 | "\n",
51 | "Example of similarity in hashes:\n",
52 | "Vector 0 hash: 100\n",
53 | "Vector 1 hash: 001\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "# Set parameters\n",
59 | "n_samples = 10\n",
60 | "n_features = 100\n",
61 | "n_components = 3 # lower dimension for LSH projection\n",
62 | "\n",
63 | "# Create random high-dimensional data\n",
64 | "np.random.seed(42)\n",
65 | "data = np.random.randn(n_samples, n_features)\n",
66 | "\n",
67 | "# Implement LSH using random projection\n",
68 | "lsh = SparseRandomProjection(n_components=n_components, random_state=42)\n",
69 | "lsh.fit(data)\n",
70 | "\n",
71 | "\n",
72 | "# Hash function: simply binarize the projected data\n",
73 | "def hash_vector(v):\n",
74 | " projection = lsh.transform(v.reshape(1, -1))\n",
75 | " binary_hash = (projection > 0).astype(int).flatten()\n",
76 | " return \"\".join(binary_hash.astype(str))\n",
77 | "\n",
78 | "\n",
79 | "# Hash each vector\n",
80 | "hashes = [hash_vector(d) for d in data]\n",
81 | "\n",
82 | "# Show the original vectors and their hashes\n",
83 | "for i, (vec, hsh) in enumerate(zip(data, hashes)):\n",
84 | " print(f\"Vector {i}: {vec[:5]}... Hash: {hsh}\")\n",
85 | "\n",
86 | "# Demonstrating similarity in hashes for similar vectors\n",
87 | "print(\"\\nExample of similarity in hashes:\")\n",
88 | "print(f\"Vector 0 hash: {hashes[0]}\")\n",
89 | "print(f\"Vector 1 hash: {hashes[1]}\") # Assuming Vector 1 is similar to Vector 0"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 4,
95 | "metadata": {
96 | "colab": {
97 | "base_uri": "https://localhost:8080/"
98 | },
99 | "id": "ivEMmN_1cvNS",
100 | "outputId": "93c59ac1-7b9a-4ee4-c9bf-a6c0b57f7879"
101 | },
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "Number of calculations for naive cosine similarity: 45\n",
108 | "Number of calculations for LSH cosine similarity: 9\n"
109 | ]
110 | }
111 | ],
112 | "source": [
113 | "# Function to calculate cosine similarity for all pairs\n",
114 | "def compute_all_cosine_similarities(data):\n",
115 | " n = len(data)\n",
116 | " count = 0\n",
117 | " for i in range(n):\n",
118 | " for j in range(i + 1, n):\n",
119 | " cosine_similarity(data[i].reshape(1, -1), data[j].reshape(1, -1))\n",
120 | " count += 1\n",
121 | " return count\n",
122 | "\n",
123 | "\n",
124 | "# Function to calculate cosine similarities using LSH\n",
125 | "def compute_lsh_cosine_similarities(data, hashes):\n",
126 | " unique_hashes = set(hashes)\n",
127 | " count = 0\n",
128 | " for h in unique_hashes:\n",
129 | " indices = [\n",
130 | " i for i, hash in enumerate(hashes) if hash == h\n",
131 | " ] # find subset with identical hash\n",
132 | " for i in range(len(indices)):\n",
133 | " for j in range(i + 1, len(indices)):\n",
134 | " cosine_similarity(\n",
135 | " data[indices[i]].reshape(1, -1), data[indices[j]].reshape(1, -1)\n",
136 | " )\n",
137 | " count += 1\n",
138 | " return count\n",
139 | "\n",
140 | "\n",
141 | "# Calculating cosine similarities for all pairs\n",
142 | "all_cosine_calculations = compute_all_cosine_similarities(data)\n",
143 | "\n",
144 | "# Calculating cosine similarities using LSH\n",
145 | "lsh_cosine_calculations = compute_lsh_cosine_similarities(data, hashes)\n",
146 | "\n",
147 | "print(\"Number of calculations for naive cosine similarity:\", all_cosine_calculations)\n",
148 | "print(\"Number of calculations for LSH cosine similarity:\", lsh_cosine_calculations)"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 3,
154 | "metadata": {
155 | "id": "rhWorUQ8dbkt"
156 | },
157 | "outputs": [],
158 | "source": []
159 | }
160 | ],
161 | "metadata": {
162 | "colab": {
163 | "provenance": []
164 | },
165 | "kernelspec": {
166 | "display_name": "Python 3",
167 | "name": "python3"
168 | },
169 | "language_info": {
170 | "name": "python"
171 | }
172 | },
173 | "nbformat": 4,
174 | "nbformat_minor": 0
175 | }
176 |
--------------------------------------------------------------------------------
/notebooks/Product_Quantization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | "\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "id": "7EpGFpF1XFJc"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np\n",
21 | "from sklearn.cluster import KMeans"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 4,
27 | "metadata": {
28 | "id": "FGj3o1ZYXIez"
29 | },
30 | "outputs": [],
31 | "source": [
32 | "class ProductQuantizer:\n",
33 | " def __init__(self, n_subvectors, n_clusters):\n",
34 | " \"\"\"\n",
35 | " Initialize the Product Quantizer.\n",
36 | " :param n_subvectors: Number of subvectors to divide each vector.\n",
37 | " :param n_clusters: Number of clusters for quantization in each subvector.\n",
38 | " \"\"\"\n",
39 | " self.n_subvectors = n_subvectors\n",
40 | " self.n_clusters = n_clusters\n",
41 | " self.kmeans = [KMeans(n_clusters=n_clusters) for _ in range(n_subvectors)]\n",
42 | "\n",
43 | " def fit(self, data):\n",
44 | " \"\"\"\n",
45 | " Fit the model on the data.\n",
46 | " :param data: Array of shape (n_samples, n_features).\n",
47 | " \"\"\"\n",
48 | " subvector_len = data.shape[1] // self.n_subvectors\n",
49 | " for i in range(self.n_subvectors):\n",
50 | " # Extract subvectors and fit KMeans\n",
51 | " sub_data = data[:, i * subvector_len : (i + 1) * subvector_len]\n",
52 | " self.kmeans[i].fit(sub_data)\n",
53 | "\n",
54 | " def quantize(self, data):\n",
55 | " \"\"\"\n",
56 | " Quantize the data using the fitted model.\n",
57 | " :param data: Array of shape (n_samples, n_features).\n",
58 | " :return: Quantized data as indices of centroids.\n",
59 | " \"\"\"\n",
60 | " subvector_len = data.shape[1] // self.n_subvectors\n",
61 | " quantized_data = []\n",
62 | " for i in range(self.n_subvectors):\n",
63 | " # Extract subvectors and predict the closest centroid\n",
64 | " sub_data = data[:, i * subvector_len : (i + 1) * subvector_len]\n",
65 | " quantized_data.append(self.kmeans[i].predict(sub_data))\n",
66 | " return np.array(quantized_data).T\n",
67 | "\n",
68 | " def inverse_transform(self, quantized_data):\n",
69 | " \"\"\"\n",
70 | " Convert quantized data back to approximate vectors.\n",
71 | " :param quantized_data: Array of quantized data (indices of centroids).\n",
72 | " :return: Approximate original vectors.\n",
73 | " \"\"\"\n",
74 | " subvector_len = self.kmeans[0].cluster_centers_.shape[1]\n",
75 | " approx_data = np.zeros(\n",
76 | " (quantized_data.shape[0], subvector_len * self.n_subvectors)\n",
77 | " )\n",
78 | " for i in range(self.n_subvectors):\n",
79 | " centroids = self.kmeans[i].cluster_centers_[quantized_data[:, i]]\n",
80 | " approx_data[:, i * subvector_len : (i + 1) * subvector_len] = centroids\n",
81 | " return approx_data"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 5,
87 | "metadata": {
88 | "id": "RFMgzGnqXLFl"
89 | },
90 | "outputs": [],
91 | "source": [
92 | "# Generate sample data\n",
93 | "\n",
94 | "np.random.seed(0)\n",
95 | "data = np.random.rand(100, 64) # 100 samples, 64-dimensional vectors"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {
102 | "colab": {
103 | "base_uri": "https://localhost:8080/"
104 | },
105 | "id": "cJQUt2o-VBv8",
106 | "outputId": "fda0c20a-a1fa-4092-d4cc-cc3cce15eeb2"
107 | },
108 | "outputs": [
109 | {
110 | "name": "stderr",
111 | "output_type": "stream",
112 | "text": [
113 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
114 | " warnings.warn(\n",
115 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
116 | " warnings.warn(\n",
117 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
118 | " warnings.warn(\n",
119 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
120 | " warnings.warn(\n",
121 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
122 | " warnings.warn(\n",
123 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
124 | " warnings.warn(\n",
125 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
126 | " warnings.warn(\n",
127 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
128 | " warnings.warn(\n"
129 | ]
130 | },
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | "Original data (first vector): [0.5488135 0.71518937 0.60276338 0.54488318 0.4236548 0.64589411\n",
136 | " 0.43758721 0.891773 0.96366276 0.38344152 0.79172504 0.52889492\n",
137 | " 0.56804456 0.92559664 0.07103606 0.0871293 0.0202184 0.83261985\n",
138 | " 0.77815675 0.87001215 0.97861834 0.79915856 0.46147936 0.78052918\n",
139 | " 0.11827443 0.63992102 0.14335329 0.94466892 0.52184832 0.41466194\n",
140 | " 0.26455561 0.77423369 0.45615033 0.56843395 0.0187898 0.6176355\n",
141 | " 0.61209572 0.616934 0.94374808 0.6818203 0.3595079 0.43703195\n",
142 | " 0.6976312 0.06022547 0.66676672 0.67063787 0.21038256 0.1289263\n",
143 | " 0.31542835 0.36371077 0.57019677 0.43860151 0.98837384 0.10204481\n",
144 | " 0.20887676 0.16130952 0.65310833 0.2532916 0.46631077 0.24442559\n",
145 | " 0.15896958 0.11037514 0.65632959 0.13818295]\n",
146 | "Compressed data (first vector): [0 6 6 4 2 2 3 8]\n",
147 | "Approximated data (first vector): [0.54019654 0.36485555 0.80442656 0.52945912 0.26097166 0.43403487\n",
148 | " 0.35285817 0.79455641 0.5989435 0.21066257 0.73069621 0.33785665\n",
149 | " 0.68675651 0.60267202 0.57986277 0.37160765 0.21162824 0.87089306\n",
150 | " 0.59306895 0.63841958 0.79780639 0.57421621 0.56062129 0.51786635\n",
151 | " 0.18948159 0.69750145 0.27819821 0.56544019 0.49665064 0.25799192\n",
152 | " 0.19297701 0.41691968 0.26339078 0.26005451 0.34358506 0.55870678\n",
153 | " 0.74224377 0.57784322 0.79144145 0.52381845 0.61077506 0.61437639\n",
154 | " 0.32514294 0.19577561 0.56556666 0.63606577 0.18373289 0.39832537\n",
155 | " 0.31965271 0.4305464 0.79324014 0.57142561 0.58212128 0.22830984\n",
156 | " 0.64554358 0.29054842 0.8200301 0.66880309 0.49622709 0.46668542\n",
157 | " 0.25961788 0.46590464 0.69995335 0.24137759]\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "pq = ProductQuantizer(\n",
163 | " n_subvectors=8, n_clusters=10\n",
164 | ") # Divide into 8 subvectors, 10 clusters each\n",
165 | "pq.fit(data)\n",
166 | "quantized_data = pq.quantize(data)\n",
167 | "approx_data = pq.inverse_transform(quantized_data)\n",
168 | "\n",
169 | "# Demonstrate the approximation\n",
170 | "print(\"Original data (first vector):\", data[0])\n",
171 | "print(\"Compressed data (first vector):\", quantized_data[0])\n",
172 | "print(\"Approximated data (first vector):\", approx_data[0])"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 7,
178 | "metadata": {
179 | "id": "5s3QAzyfXNSx"
180 | },
181 | "outputs": [],
182 | "source": [
183 | "# Nearest neighbor search using quantized vectors\n",
184 | "def nearest_neighbor(query, quantized_data, pq):\n",
185 | " approx_query = pq.inverse_transform(pq.quantize(query.reshape(1, -1)))[0]\n",
186 | " distances = np.linalg.norm(approx_data - approx_query, axis=1)\n",
187 | " return np.argmin(distances)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 8,
193 | "metadata": {
194 | "colab": {
195 | "base_uri": "https://localhost:8080/"
196 | },
197 | "id": "beowe90IXPRp",
198 | "outputId": "9c97645d-17f7-432d-ca08-9204c898ca15"
199 | },
200 | "outputs": [
201 | {
202 | "name": "stdout",
203 | "output_type": "stream",
204 | "text": [
205 | "Nearest neighbor index for the query: 58\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "# Example query\n",
211 | "query = np.random.rand(64)\n",
212 | "nn_index = nearest_neighbor(query, quantized_data, pq)\n",
213 | "print(f\"Nearest neighbor index for the query: {nn_index}\")"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "id": "pfCo3tEgVRcG"
221 | },
222 | "outputs": [],
223 | "source": []
224 | }
225 | ],
226 | "metadata": {
227 | "colab": {
228 | "provenance": []
229 | },
230 | "kernelspec": {
231 | "display_name": "Python 3",
232 | "name": "python3"
233 | },
234 | "language_info": {
235 | "name": "python"
236 | }
237 | },
238 | "nbformat": 4,
239 | "nbformat_minor": 0
240 | }
241 |
--------------------------------------------------------------------------------
/notebooks/Scalar_and_Binary_Quantization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | "\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "id": "JENXK-euYH9p"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%%capture\n",
21 | "\n",
22 | "%pip install qdrant-client==1.7.0\n",
23 | "from qdrant_client import QdrantClient\n",
24 | "from qdrant_client.http import models\n",
25 | "import numpy as np\n",
26 | "import random\n",
27 | "import time\n",
28 | "import os\n",
29 | "import shutil"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {
35 | "id": "N36PQREaccy0"
36 | },
37 | "source": [
38 | "# Sample Binary Quantization\n"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 2,
44 | "metadata": {
45 | "colab": {
46 | "base_uri": "https://localhost:8080/"
47 | },
48 | "id": "N_DImynUYgl9",
49 | "outputId": "d5bba92c-fcec-4e9e-e112-ca72f5916790"
50 | },
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "Original Data: [ 0.1 -0.5 0.9 -1.5 -2.1 2.5]\n",
57 | "Binary Quantized Data: [1 0 1 0 0 1]\n"
58 | ]
59 | }
60 | ],
61 | "source": [
62 | "def binary_quantization(input_data, threshold):\n",
63 | " return np.where(input_data >= threshold, 1, 0)\n",
64 | "\n",
65 | "\n",
66 | "data = np.array([0.1, -0.5, 0.9, -1.5, -2.1, 2.5])\n",
67 | "binary_data = binary_quantization(data, 0)\n",
68 | "print(\"Original Data:\", data)\n",
69 | "print(\"Binary Quantized Data:\", binary_data)"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "SnPuUZcscfAv"
76 | },
77 | "source": [
78 | "# With a real vector database\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 19,
84 | "metadata": {
85 | "colab": {
86 | "base_uri": "https://localhost:8080/"
87 | },
88 | "id": "fvb3KflGYH9q",
89 | "outputId": "43652690-d78a-440b-e713-4fe221845448"
90 | },
91 | "outputs": [
92 | {
93 | "data": {
94 | "text/plain": [
95 | "((2000,), numpy.float64)"
96 | ]
97 | },
98 | "execution_count": 19,
99 | "metadata": {},
100 | "output_type": "execute_result"
101 | }
102 | ],
103 | "source": [
104 | "# Generate fake data\n",
105 | "\n",
106 | "n_samples = 10_000\n",
107 | "n_features = 2_000\n",
108 | "\n",
109 | "data = np.random.randn(n_samples, n_features).astype(np.float64)\n",
110 | "data[0].shape, type(data[0][0])"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 4,
116 | "metadata": {
117 | "id": "jpJh9ixsYH9q"
118 | },
119 | "outputs": [],
120 | "source": [
121 | "# Function to calculate how accurate the semantic search is\n",
122 | "\n",
123 | "\n",
124 | "def calculate_recall(true_ids, predicted_ids):\n",
125 | " true_positive = len(set(true_ids).intersection(predicted_ids))\n",
126 | " total_relevant = len(true_ids)\n",
127 | " recall = true_positive / total_relevant\n",
128 | " return recall"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 5,
134 | "metadata": {
135 | "colab": {
136 | "base_uri": "https://localhost:8080/"
137 | },
138 | "id": "ieFVnBbQYH9r",
139 | "outputId": "cff4a11e-9a4b-40ac-eede-acd5d8088a7a"
140 | },
141 | "outputs": [
142 | {
143 | "data": {
144 | "text/plain": [
145 | "PointStruct(id=0, vector=[0.8739856615714474, 1.807544501917782, -0.3867121777810109, 0.7475418615175148, -0.011220441841003254, 0.45257889965144166, -0.9196444213114707, -1.0290491059951938, 2.918030652711712, 1.0421167848695043, 0.9405130118965381, 0.44188452295746605, -0.7324968849739459, -0.4153768193514054, 0.615227713264581, -0.18773011892615266, -1.1561768781843726, -1.24777927355494, 1.5964806315895208, 0.8547161224294311, -0.3038598659231874, -0.6439279820971076, 0.14474043756222754, 0.3090291498030087, 0.5472755250768923, 0.31077892217254394, 1.2022719476274863, -0.5398650761004974, -2.356072668699791, 0.2853584938779716, -0.13018253455324919, 0.3848423701222905, -0.42812877678193306, -0.8459145927895287, -1.213090240744919, 0.22027091205841778, 0.7649809671645519, -0.3088909853391043, 0.19020701290005637, 2.016694675476221, 0.027413739399153304, 0.6275031142213888, -1.7072609245372927, -1.1967124653346288, 0.3951523566787642, 0.19832771617608677, -0.41679453021084445, -0.3768448912047085, -0.4962602989791896, 0.2315142520286191, -0.25861984484140965, -0.808090021673335, 0.8285571831499642, -0.7220531549669087, -0.43623165832236016, -0.6382243611857582, -0.8635919258221331, 0.5833635347496621, -0.230069144649749, -0.3854583649462815, -0.6955460336316391, -0.09492573281138832, -1.6990531678387115, -1.4660830493359958, 0.6486390532000192, 0.034826949297492964, 0.18034836207235858, -0.010644131981413039, 0.6595363520916322, -0.48117843161865004, 0.5290767142322196, 0.719583313085802, 2.386143510365132, 1.276251437321848, -0.6380617141367132, 0.8583989912441614, -0.9409609814028781, -0.21511106201618868, -0.9398731599209335, -0.16100057282409438, -0.2854686475794584, -1.6133854494884647, -1.4307808842360343, 0.9752110172694974, -1.9911485031552356, -0.16582613459528905, 1.1596486233410326, 1.136073305957058, -2.400104140959142, -0.329001108811521, -0.15085624545440568, 1.150725221164357, 1.0864416721385848, -0.7215929170403728, 0.45985436393249707, -0.33563481473277845, -0.8881279197171073, -0.41395925554482277, 0.03142954304392228, -0.6895579346815556, 0.04901128644124648, 0.1838176795259276, -0.26473473992428553, -0.14735570193841493, 0.5947595873656595, -0.22358889411559582, 0.3649702363749173, 1.8022066917804516, 2.125491371073192, 0.28719837772873563, -1.617971743543826, 0.35745798551809654, 1.1270802690285664, -0.8758826287194008, 0.4403067903323955, 0.9899777442095141, 0.0525397155470262, -0.41257796678974545, 0.7005358823811887, -0.8370916045844198, -0.1227547950827695, 0.2083554183485485, 0.08321635521287682, 0.5108650118740367, -2.2043351373398514, -2.041452472089077, -0.5231759152284405, -1.2759729339092014, 1.238153836353617, 1.3482773908905843, 0.7398743694383785, -0.44494643127003214, -1.6657557354284218, 0.517752641748811, -1.8128304088127494, 1.158709526296948, -1.0912555517506775, 0.09235465191115129, -0.9395915185223676, 1.3008908964256214, 0.20833219501576677, -0.43424546694005955, 0.21083941881732063, 0.6174048297804832, 0.35112752391656166, 0.6234551567393238, 1.6995287896026419, -0.38034253658177336, -1.039808884462498, 0.01759958284022382, 0.6723068935717113, -0.2862215852574517, 0.12151453543949745, -1.1627663574048555, -0.6634088272706828, 0.5583566844179129, -0.4569480278643297, -0.6845612940400703, -1.8698816481253624, -0.040268341721252494, -0.2802853914823629, 1.967385263251446, -0.2361987699031495, 0.691848467924697, 1.1900915904116465, 0.9785775100444764, 1.422389420373977, -1.2977348090388228, 0.0253636370217711, 2.2747695222939477, 0.12376313528818167, -0.10066628075743671, -0.5379180326700851, 0.6897162634340557, -1.3889893963121513, -1.7634151722378186, 1.142489649900556, -1.0208772684603826, -1.7795490337054842, -0.701555983575828, 2.3434991577452053, 0.22476198405194905, -0.7118166245799107, 0.35935705134013574, 0.5393159604188043, 1.1627008366958969, 0.2836798030885163, -1.5217342712206414, -0.30330881709755336, 0.6499417126844457, -0.3511844739192, -0.9397962613027147, -0.5383418373069065, 2.6647317773663426, -0.5103249546979434, -0.6122750440325111, 0.1312668055841947, -0.26680923547303426, -1.5168772770602579, 0.10458441272289655, -0.5227869923704701, -1.6198540858473058, -0.3411731368593385, 0.746924260285729, -0.4339038572070828, -1.3839905009520466, -0.091329017691049, 0.4973232027973987, -0.11025203445914149, -0.06090403051150205, -0.21944266028484075, -0.806265353669887, 0.6911456400892636, -0.8615821719673875, 1.2074944965322478, -0.6638667958004415, 0.8219157570608214, 0.6948093184648547, -0.6078384677232166, 0.10286889838824445, 1.3236669645763002, -0.1422018228298019, 0.5691698567734607, -0.3951960722432252, -1.5760136294157234, -0.8306443950301385, 0.8602028210744199, -0.08008793015775575, 0.05000659412182344, 0.12731301202795697, -0.3450169953468329, 1.2672098242206151, 0.4232348089521366, -0.09792690008081752, -0.5836883087375134, 2.2743662237791393, -0.9597274678636308, 0.16439368829292106, 0.22010005765039156, 1.856013953137201, -0.5113112676949015, -0.5060195792382188, 0.5209417775178454, -0.013043114989990431, -0.8142705320747015, -0.5345881730065491, -0.06532376180536889, -0.30421759301018964, 0.01155801253549417, -0.9471180289513473, -0.4523253004364728, -0.2968798013212954, -0.9870393520960766, 0.6338553159653827, 1.4086979659606147, 0.5385536577540905, 1.3696957147724431, 0.5782745360803876, 1.3627483077146723, -0.9222891244325496, 1.1156923186978807, 0.7115779734151073, -0.5585789720315313, -0.593904471038568, 0.7563622432415409, -0.1479647854256813, -0.8889758806638851, -0.9279070360143741, -0.25491732871065065, -0.8615546363250921, -0.9147019617876051, 1.5863635457637177, 0.46265577981391387, -0.40111737076132725, -0.4735682245852609, 0.9969083787992383, -0.7859839625687204, 0.23513176954890094, -0.5117732817954718, 0.9869130926998313, -1.108762401510336, -1.6186732478188164, -0.09593834085637555, 0.42601740904966245, -2.00000373144404, 1.4146236614116967, 0.09319121146538076, 1.2974012241714643, -0.6117753691376677, 1.8264933658306204, -0.24536811664587976, -1.0477817553274533, -1.222439118372166, 0.14605762505741743, -1.4267919721129254, -1.6939449664446207, 1.2055148101267297, -0.6975622012121399, 0.1917022984766768, -0.7932612322449053, 2.6539828301697708, -1.1296611645986088, 1.8183272130714463, 0.21279182299475446, 1.1618529310476586, -1.5610802948077132, -0.5660325593727671, -1.2965107320847942, 0.33042300339394853, -0.2101564107376278, 1.6448948784427275, -1.5697791987489094, 0.6507858884016715, -0.09562108636722068, 0.5238642910365366, -0.0413007333640945, 1.0445691957652035, -0.6483697762657821, 1.124314523780108, 1.9948072441487672, 0.11653603862324625, 0.5419128179238362, -0.3503036239655612, -0.2024903856027717, -0.09491919343337278, -1.0914360854908272, 0.2772878909442388, 0.7175277637662891, -1.5743065584411517, 1.4956791582756284, 0.20043571565987775, -0.8614897632398332, 0.3522666177125854, -2.5940112031305067, 0.21347140418514496, 0.23375985425551424, -0.943357942791554, 0.24828335916692856, 1.179097709978088, 1.3511228625422627, 1.3831233253658048, 0.41940588808318424, 0.3856837540149298, 0.24326630166915142, -0.5705790216968735, -0.9672322369884202, 1.3884503150317768, 0.6440946203483854, -1.0088236618111714, 0.2519299706358328, 0.5694040349936582, -0.685179016274018, -0.9063428605732866, 0.6957346508903758, -1.789554466679911, 0.28621898947568913, 0.973378763109292, -1.89635337844661, -0.4935073164744471, -0.06776890572538476, -0.2281936783588049, -1.0395561137454772, 0.5584880188045954, 1.2550940075987709, 0.7826414744562139, 0.2585542528136326, 0.0423124364494526, 0.9186610558825312, -0.8729887134747064, 1.5652272532160436, -0.7962725596422613, 1.1641012064695773, 0.540800748146648, 0.4195865221721042, 1.7563657840454439, -0.8709761473105903, -1.5437947408409367, -0.8646109167030699, 1.1020894958701655, -0.2734173424802635, 0.42307828106798917, -0.5298199311774375, 1.2373156561594065, -0.10600344009915738, 1.8461451644820346, 0.09176159029724638, 0.016962559847412986, 0.608841495481799, 0.6690305558390355, -0.8738682189088378, -0.7563982319657698, -0.6591026027600448, -0.14322492650572452, 0.8982409267780486, -0.7443785798750505, -1.2541164095321906, -0.08898288303923992, -0.7091609106356238, -0.14483068947052186, 0.9698557001546162, 2.0319577097106207, -0.17363603899753358, -1.2288911546731394, -0.6814387008924646, -1.2009538107713744, 0.30604347948299765, -1.1083111776119365, -0.3261216033348528, 1.4890407414243467, -0.8145624542670111, -0.25847729286013477, 0.3301784575176401, 0.5597212600577293, 0.30697154558231465, -0.11019742371384686, -0.26373363481430456, 0.7821607874065782, -1.5521382960798558, 0.18520861197789681, -1.0882992759669683, -0.8427476691297193, 1.1022550578454484, 0.02174368370805144, -0.16371528410266964, 1.341626830727837, 2.5909642040214735, -0.06439142034827636, 0.34552834345586275, 1.37445007506629, 2.312675364325059, 0.15433563559712965, 1.3400601088191246, 0.5663979918190727, 0.591594145185851, -1.7005154568359095, -0.1030893396544319, -0.001962691648805766, -0.6317364950800864, -0.05256078270315826, -0.5229753162397426, -0.4916898807371568, 0.19185253764792606, 0.4821466929378671, 0.8210012253967427, -1.4457036969238104, -0.04570233638512601, 0.26942989615372753, -0.9088670413828271, -0.6222116252127856, -0.0905751269025448, 0.4654678075953441, -0.18251965295294595, -0.7487306500534452, 1.0399241836132367, 0.6796360368472725, -0.44880999597194676, -0.133252625925194, 0.5070153529183737, -0.430789392508267, -0.576720415777378, 0.7145822709360119, 0.37675843584950414, 0.5085571870152197, -0.34639540032609356, 0.30236465688579917, 0.8916354266478563, 0.2667154875835729, 1.4255076142617171, -0.4298954910139599, 0.5867259045872778, 0.4739705672256915, -0.12546180159178583, 1.0029621417338943, 0.149602983502331, 0.2783315642168051, -0.26480220745643623, -0.792357140850614, -0.20300326692286103, 1.171227308061891, -0.15614112648684497, 0.28628407680831974, 0.7800020280852357, 0.22292211934804182, -0.06796674185411358, 0.3557041545962791, 0.6403168303744173, -0.49218540199910854, 0.9612063723909208, -0.5037290133459246, -0.26881581750361133, 0.5098589623382531, -1.0388184643896239, 0.4404997636162868, -0.7315674314713295, 0.8538460216501939, -0.512782139974541, -0.4115564803794845, 0.34203404550906935, -0.901208857632116, -2.8509649515440496, 0.9282687003429269, 0.6066895231553071, 1.4086558420268058, 1.402912717815279, 0.8520781546121061, 0.43570037174945336, -0.3377528828064129, 1.7703842662183449, 1.1601004200015603, 0.7461428107109984, 0.2828260994173198, 1.8440107880597996, 1.5353525849820548, -0.32209327214228684, 1.7952497415609365, 1.1110839675118958, 1.3786362119267037, -1.6770693148573748, 1.481782476296522, -1.4421581170562894, 1.907298314702263, -1.7683112700027237, 0.5605296534121207, -0.619737072938621, -0.11384715332292172, 0.40847993617099465, -1.5517175679279651, 1.1923696107431334, 2.251926768935434, 0.44729154492175077, -1.3138761228792646, -0.8579324054222918, -0.9735374025316328, 0.2751020638566914, 1.374966635707915, -1.2924459766589271, 0.40469769189097715, -0.7162521629584148, -1.0329657269388046, 0.044284921201708315, -0.6773117379815284, -0.9017003414440038, -0.3521475589960403, 0.09041363798320852, -0.3090307090314419, -1.4540066432469059, 0.4464847418234119, 2.293209879457769, -1.2751528775787986, 0.8646398455186836, -1.3863995375446259, -0.20070944571118346, -0.2170646939839637, -0.8995361790252235, -0.2596539824808564, -1.3383297962293443, 1.0868866808701878, -1.2287607791449957, -1.2266798423844727, 0.11872590939583381, -1.031207884386554, -0.6115247562526889, -0.917826412267748, -0.3013975358538834, 0.24481660240281997, -0.648690129848851, 0.21807220192668061, -0.180710350169749, 0.26508136169564595, 0.8036366986791662, -0.6782681865332831, -0.7318427110050485, 0.125203238646975, 0.25912388538870257, 0.23324860414600704, -0.6221446417329906, -1.2585199097799753, 1.5844009173039746, -2.6765281531324994, 1.6753660738622098, 0.5896206603267488, 1.2774031550245797, -0.26518513887124245, 0.7199468809657413, 1.5211453051964028, -0.8481037041658348, 1.8243747474743592, -0.32266294930421546, 0.12978097537001648, 2.074044236750424, -1.1268619643021274, -0.6875084756566904, 0.20563655869907416, -0.41320720158846747, 0.5927779569651063, -0.2760412458636729, 0.8101390602055154, -0.26175996482495706, -0.04172982022961745, -1.3084774960329497, -0.5745084653794275, -0.5966077559798149, 2.1495881698736365, 2.0383644763823585, -2.285805377991299, 0.13073050569230948, -1.021153941335237, -0.014387623207345808, -1.0678276564780964, 0.16423462836000627, -0.25641052887774785, -0.9097479438716664, -0.814785756307686, 0.5571478592935751, -0.36527568219161255, -0.7948832296484798, 1.8745363628553122, -1.172787791913729, -0.5364842177496785, -0.046393103549231714, -1.176556001134429, 0.6128295153128935, -0.9934367580835219, 0.32643108703309676, -0.4341907463006036, -1.3985831529205628, -0.958891140473352, -1.0195188370564536, 0.24744067034541672, 2.09523917102666, 1.7284858382232373, -1.862106134414064, -0.5337270182177258, 1.7258916800174842, -1.7292372784617458, -1.8795765421615347, 0.22542013267680042, 1.2041888901281408, -1.1767945910249378, 1.0379675565530386, 0.5269199312204694, -1.1699157665042226, -0.5054507536570928, 0.5496441741228413, 1.6865856434730069, 1.8194883715039307, -0.02943914339102388, 1.1763547204669835, -1.3006850852318372, 0.4360063199906842, 0.004675450941487546, 1.3828281086619696, -0.07522690805918332, -1.6848945847913057, 0.48540039904882887, -0.9334877460884445, -0.4678184108259975, 0.7595515328710757, 0.010590398516091733, 0.3591803576542262, 0.7237216073854897, -1.029898117829915, -1.2776445911649352, -0.40955370709763556, 0.18426336683416877, -1.1920651444985426, 0.37608663689271987, -0.1871381083434368, -1.0505213893015197, 1.1750636365559886, 0.7012917277506181, 0.25678097920743936, 0.820253316610241, -0.9429455439478871, 1.1915202252096944, -1.2079066318049854, 1.7963727876161164, 0.7364335259164053, -0.7038589984336157, -2.2368946856900203, 1.0012803768709604, -0.46180908055036723, -0.3701761096681924, 0.7045649228970661, -0.7191446620479931, 0.20990946035138025, -0.9853972746897315, 0.39664451285599184, 0.7944071856745347, -0.9334915095910968, 1.702349497952416, -1.3588862845737135, 0.927932214995008, 0.4646310761957531, 0.7485931544471043, 0.9719803203873983, 0.2438659132119836, 0.2398269449865993, 0.39299602559074015, -1.3997646395532453, 0.22849422986525122, -1.0162979314703435, 1.3665780509426908, 0.5332656339590457, 1.380906723349161, -0.09545471305961607, -2.2399832372623774, 0.9518131850619272, -1.2006449869986293, 2.226985244662912, -1.538842113588818, -0.7770416764018844, 0.15927181886139652, -0.8215364800127253, -0.6533325204526949, -0.7384958074533362, 0.1674077172587117, 1.1762351685778192, -1.045571337964878, 1.3796188854594686, 0.6441594852733391, 0.775179108987916, 1.664029074122316, -0.5533689012952354, 1.3259685787349988, -2.0457209493206516, 1.3192010513213668, 1.218307654889701, 0.7388865736823325, -0.2346300532731865, 0.40730176552809283, 1.298024624753732, 0.7345944727747447, -0.8041433977606076, -0.05266184781591866, 1.625526005655891, 0.10810222092440826, -0.5051066955667007, 0.8676998473130714, -0.651071359001196, 1.4313609104449974, 1.3213006274401173, -0.8483912291599476, -0.39752870307259713, 1.7915348657006116, -0.3130866558147973, 2.479102654286555, -0.06948379445534886, 0.7528565998778501, -3.3683136967466067, 1.740169213120046, -0.908265095292864, -0.4620279040191046, 0.06267191117539996, -0.7990311054374428, 0.31957072464004244, -0.021614271590181637, -1.1481279033735188, -0.5001723458379502, 0.08653733563339588, 0.4439504405419998, -1.0627169644743926, -2.1918049225787772, -0.08871021574784907, 0.34827145430289963, -1.7185192498185944, 1.83923562296338, 1.552642784743935, 0.699264917232534, -0.49555371518962, 1.7303484129475954, 0.1784951867649678, -2.163233794738634, 1.0988550350095907, 0.49775442712536966, 1.3648061872796378, 0.8290857082728699, -0.7675919740866475, 1.2736163418065445, -0.732445029498191, 0.5012538704324375, 0.5356438891148995, 0.8446420042914694, -1.3913594133036478, -1.420705315861281, -0.8853111470046598, -0.8269981493643119, -1.0030473949818957, -0.11706715590128203, 0.11350716186166226, -0.6347647354227907, -0.870092726178067, -0.2558633688077023, 0.7300414225845165, -0.40115484776060634, -0.6101605874927177, 0.9554016601828329, 0.7443882837663842, -0.7347282225713324, -1.162096263391531, 1.1607980242831453, 0.3023115941811925, 0.5509707567194665, -0.6118400335752746, -0.8120968701498054, -1.0378881892318972, 0.5416796343109278, -0.00848916488569995, 1.5550313283128996, -1.0654108671980436, -0.795447955846968, -0.23618513566087768, 0.7541268078648524, 0.6029863466782375, 0.49115739342526604, -0.6108140649533065, -0.7910785475011395, -0.24151527604738368, 0.13995100451394227, -1.4747496521800645, -1.5534696774503967, -0.31357545645890644, 0.8283664744651716, 1.520410829370659, -0.8652331899007152, 0.9687728286425631, 0.7469908835135253, -0.45596986075515444, 2.0736388147687386, -0.9128522375380899, -1.558565981532091, -0.08060394307324573, 0.4574923719288784, -0.5373928142805574, -0.041654634121064266, 1.2932069004599342, -0.34390245310852763, -0.04278343336999984, 1.1671441562100933, -1.2612384059145931, 2.028750659021486, -0.4054892415803786, 0.3062839079542207, -0.24312003953846442, -0.6102845345611102, 0.43935260922557745, 0.8641468378168895, 0.7259226330778745, 0.6479010385883012, -0.1099655043786616, -0.6682988738581779, -0.31561192979646097, 1.3716191312171473, 0.3195401238390076, -1.3241693707283024, -1.1879732280786257, -1.4204057253711764, -1.4701215209856455, 1.5765623501481323, 0.5490465558171254, 0.979047264397118, 0.21535354030081502, 1.036739511818943, 0.6330687872486253, 2.0407317994414913, -0.7956228255852679, 1.3082813972299503, -1.2437066903413307, -2.310835382344026, -0.47384411883076877, 0.6035868781242854, -0.41528558274938204, -0.14163385830017122, -0.8554070964609389, 0.18464073164914635, -0.928189567378024, 0.869620056379157, 0.36324155432517685, -0.8006062113654259, 0.23298419112173321, -0.08610130281563935, -1.0095210577747102, 1.78793873863253, 0.2065778981613746, 0.0679110842687391, -0.07773818879462864, -0.7214744622939873, 0.9767934377603481, -0.2663321073285069, -1.04665150287966, 0.1723612589533114, -1.6471530670568952, 0.5803674885293802, 0.4984451191026125, -1.8757255987779395, 1.0679930181837607, 0.6408873819115, -0.4041543858252602, 0.10740245132036325, -0.4398593865851238, 1.462347624489168, -0.12698548247924113, -0.3747525805429678, -1.4290117551801498, 0.922674571821963, 0.3003724316609889, 1.3148089324854138, -0.8110774923643627, 0.08520254101636562, 1.222188394351855, -0.8981222010023386, -1.1652642397763526, 1.7374027138504506, 0.20787477916717828, -1.5871657456669614, -0.6670147772828121, -0.2426320402288069, -0.22524849599564928, -0.9386844670885864, 0.12890927001205657, -0.09752178539525468, -1.0327245709301318, 0.1284253606810811, 1.2705312971631202, 0.9575945020347184, 1.542540566146314, -1.924561015807509, -0.9030011945472559, -0.42611690200552, -0.13298606270659197, -1.2287296209580554, -0.7148911171996385, 0.8762550066172035, 0.1794605194128548, 0.4535228803294127, 0.5688030864835869, -0.7868914104697946, -0.8972249184110536, -0.9609431209520729, -0.8758216036792366, 1.211042890635401, 1.471888745178047, -0.7164794710745208, 1.0948486209408481, -0.48091511214337396, 2.1451133504448565, -0.3798590212998682, 1.4054650327800748, -0.8921990626285892, 0.10108357932918514, 2.035125019400607, 0.8346287903151435, -0.1373202481762141, 0.20228533344385727, 0.20703949215112144, -1.3470023782213159, -0.47454535827979843, -0.2738005578843613, -2.50712107048958, -0.388249114765123, -1.8285486069843124, -0.7293737245620747, 0.7217725712913249, -0.5071893808605782, 0.3862334437394401, -1.2209337294967602, -0.34140792612944404, 0.7748068473671835, -1.0047111896987782, 0.9163057701533942, -1.4449053457444754, 0.18424269792305406, -1.3578713483387248, -0.002453164487439328, 0.8774031446956307, -0.7009272883826753, -0.5245219706902311, -0.7824193307375615, -0.49327347272493804, -2.62874064412338, 0.08188517343188163, -1.377478706369955, 0.4266685120165082, 0.5965965601950391, -1.243556804023239, -0.6178031577518529, -0.4706060575848744, 1.2964801230985301, -0.6467443647843247, 0.8147379506653111, -0.9573363243455577, -0.43708412192215895, 1.1023885258230504, 0.25278520558813206, 0.03172907688882857, -0.5692841437006686, 0.17629148018639246, -2.8922721108401177, -0.6108385214194457, -0.41874858095885065, -1.9154836370892923, -0.6063219643097044, 1.5412922256186465, 2.1204405956608716, -1.01065421435547, -0.5434931144945486, 0.03636076688009171, -0.30600675705986385, -1.7165408090624312, 0.17935341960400833, 0.5657132769139446, -0.5608558874304533, -0.0456633527031202, -0.7014123423584304, -0.736683405921145, 0.8514320592693096, 0.11882118043211998, -0.6296249366649763, -2.3975665590167576, -0.37525586630608987, -0.8579464022844322, 1.788801685120996, -0.2883002145950792, 1.7177284998521287, -0.5897571986231824, -2.4567058598985967, 0.8372323495194054, 0.8830425644942801, 0.5810874294206456, -0.2936548736352306, -0.7552662180709748, -1.5704048538874875, 0.4379303278250992, 1.2490477537969145, -2.00880504886997, 0.5240585160304959, -0.032422196725695435, -1.0502420618958743, 1.7500097282520064, 0.5044858197404513, 2.304628905671056, 0.22955359917762141, -0.6440089998160929, -0.28797701768219436, 0.7644598464683564, 0.6943282759938817, -2.369582362202677, -1.4319367756207277, 0.3039521132941967, 0.1723656184372106, 0.4513167171103222, -0.39488649319349295, 0.43757139160556524, 0.37748755194999695, -0.7242093322602084, 0.10070602100027537, 0.26888889010386036, -0.7436501468944465, -0.08738714228689815, 1.6536479696683963, -0.6761391390460322, 0.481113905044138, -0.5188816047471286, -0.07274153776483046, -1.5472995528237108, 0.8963117251092272, 0.22016624412443994, -0.4230890155197589, 0.531969757471415, -1.5734928249718774, 0.8063943400479304, -0.5768953929548798, 1.2569939817090419, 0.9642707558927879, 0.8937273382059298, 1.4475909162089313, -0.0376595413300924, -0.6073779629085945, -0.3451365149668589, -0.21735007617218677, 0.2559544643890874, -1.0006784775890183, -0.7665406766866958, -0.04146350400864777, -0.2947201521729735, -1.8594389071209043, 0.41843939091765714, -0.9605291776150933, -1.543218383206259, -0.03099267935689953, 0.6238177866365001, -0.18150181929749726, 0.6018940317718751, 1.6510294273791368, -1.4763033006471469, 0.7791437053886265, -1.0830001801853093, 0.6383917974545249, 0.2009541389269373, 0.18500320589488467, 0.8868173239997531, -0.9216335093884569, -0.7593594773185075, 1.6746982084503759, -0.2647132147060413, -0.4010207243603685, 0.7175210772115517, -2.00259699951647, 0.9092292480229001, 2.02234200397613, 1.502800400508659, -0.19485087636402726, 0.10745640866036332, 0.8280192221876356, -0.719107604968126, -1.397655940834189, 0.1955354582179154, 0.2571307711663721, 0.7754174800334456, 1.603621080169364, -0.13830376668294816, 0.8882405993545711, 1.6006717497581193, -0.8211748290755523, 0.5976881774273867, 0.4234753134816082, -0.19140240792995564, 0.2558548388596074, -0.22931479636264043, 1.0995662637546046, -1.878368842307892, -1.515618917638002, -0.5606349773433865, -1.2753113165058512, 0.8360853373206024, -0.9774106205661572, -2.198394511529409, 1.6061198307950928, 2.1721383883624585, -0.8631435893771715, -0.3332146027388396, -1.6158442050784971, 1.8144457419386826, 1.4687084214117088, 0.17481667373342152, -1.59428577475825, 1.2395411376688399, 1.9052309774464542, 0.003965324496243773, 1.06125950334731, 1.619055701211538, 0.8115278989334432, -0.9975997177235506, 0.10784282877848587, -1.1482047866601663, -0.9613845943688877, -0.2307541106945451, 0.5126747561287869, -0.727828178317672, -0.5413829740438163, -0.8825814462611747, -0.1154354529637051, 0.16779110519118579, 1.5969333789296507, 1.8741520328395995, 0.6309831111156765, 0.47231808626577015, 0.7972812634414831, -1.2743842272154904, -0.7294671374756313, -0.07991456408893764, 0.9827271363960262, -1.7523241949517168, 0.03659540350553565, 0.11655283300851833, -0.5347759457505273, -0.7459575855790914, -1.0603635130368718, -0.09169796513572699, 1.0965103127379452, 1.6073404746067006, -0.05575638386870657, -2.0946259632805733, -0.06504506624805112, 0.03998337211250203, -1.3201404508864156, 0.21950424672561247, -0.06543832843128092, 0.05374255638541855, 0.5731083218420642, 0.023062132978781, -0.18331368584170438, -0.5430146242490301, -0.04928405909095008, 0.28679383912971035, 0.6548913037508406, -0.7403994260624559, 0.9610147147751577, 1.178947347878497, 0.7340450120237932, 0.052397887893558114, 0.03750792299285495, 1.041935207398616, 0.9817141926702226, -0.5679075925003171, -2.4314029894892464, 1.5396829559696479, -0.8952730129334676, 0.5166822147373515, -0.2684015459995828, 0.33334466759024706, 0.5600935306811604, 1.6891556955352678, 0.7969442240277408, 1.5594450044746215, 2.180294765534068, -1.371349271283305, -1.051276753603631, 0.621743318349236, 0.0018388288836425353, 0.3000608414619472, -0.4572507477557409, 0.2724655456707352, 1.0078462342359071, -0.1868627133852275, -0.6642347378905279, -1.2641259756133947, -0.7168051447489698, 0.06166118865276984, -0.409734823451057, -0.1550514548003163, -0.4392451880469997, 0.9360784713845819, -0.3973423327026237, -0.34980688682723143, 0.7064262848872751, 1.016936397771855, 0.16357378454973057, -1.2874885658968718, 0.749433326314927, -1.3853466691901746, -0.06456227380745626, 1.6629082331658713, 1.1013732324981031, 2.307831660716908, -1.8345100814312494, 2.393451924503449, -0.8088894178341166, 0.794849057204957, -1.0417851718860875, 0.36905489731557634, 1.1336595062541583, 0.8647396389358875, -0.2523754307178325, 1.2789929226110446, 0.2128024337788296, 0.7563670907285992, 0.6279547182404533, -0.7504621947114014, 0.7547376608158064, -0.0015136660280026023, 1.1944993506474633, 0.34888962329631246, -0.549067173660412, 1.4964138582110182, -0.6689297301414805, 1.8312697518743997, 0.8736801037216783, -1.094181461448198, 1.1081478927283375, 0.7441506573270389, 1.3874074482077245, -0.1355772142674292, -0.7642313574231182, 2.1263179519558175, -0.3615450156425273, 1.0620002971448885, -0.1895530485920172, -0.93938648740444, 0.10477090142588719, -2.094907349563714, 0.20782710455910647, 1.0825552573037804, 0.06853185511114315, 0.3632396297475239, -1.080381347156782, -0.7587017927333454, -2.434020881475415, -1.5964029128332156, 0.011745481573955883, 0.6686120403761413, -1.2806388469212846, -0.7541106208484624, -1.1552592331799854, 0.3187316541772342, -0.6488408395434028, -0.19064638527975158, 0.5160119488250674, -0.4010679747780823, -0.07839695278473996, -1.6398103698567725, 0.18069782752881774, 1.8404440169120564, 1.8830623322659767, -1.3294581545246849, 1.729087442779956, -1.8685502940746783, -1.169160199286787, 0.6698512061612744, 0.28602877051228953, 1.2292619171889012, -1.7948912652948281, 0.17688530191563248, 1.764517843970352, -2.5280212055778564, -0.9013026372890343, -0.5045005863178184, 2.053727582847807, 1.4570998733963358, 0.4424916734251097, -1.3411002170410602, -0.12254100214990367, 1.0679512840746104, 2.1637843202051092, -0.028840189206087725, -0.6303198589668074, 1.5756118524738014, -0.7981948666922393, -2.3958725940739285, -0.530836262754282, -0.6743153377474026, 1.7266282329163525, 0.7874563249832119, -0.7979734847563118, -0.8962260306094234, -0.87208571528837, 0.5171258120317094, 0.9972960692116409, -1.0641285779346359, -0.9474526005008098, -0.31554566803599426, 0.01705599162350719, 0.03268686649027622, -0.6098905094404874, 0.3957140286934079, 0.6408495842543688, 0.443527888426491, 0.9461085932266705, 0.8868902063994905, 0.9756063800800938, -0.6416164040434029, -0.425159193292544, -0.058345342916044936, -0.7241467880934279, -1.9901734301432077, -2.7091786728698195, -0.2351987835299176, 1.2448176850038597, 0.9708682905214576, -2.369506517223338, -0.1477969880087413, 0.9211217589381155, -1.1434252542754881, 1.5854052995212375, 0.8832519866122185, -0.9503918561653486, -1.5044018174526805, -0.3482086395213158, -1.0984647013164037, 0.05863363876727027, -1.0565962688685016, 0.589839833429577, -0.4561236466451523, 0.34137035741896793, -0.9754523291564361, -0.00217427626582553, 0.6353955805715834, -0.011329110925421729, 1.386581233341892, 1.2830640783925962, -0.5484836722184543, 0.5512678728547291, 0.07157538462713359, -0.026535868095555804, -2.7423164628088035, -0.28780210512624294, 0.27002354826825453, 0.30663556561120076, -0.5088001128712263, -1.0638733835345313, -0.18426466244941747, 1.414214971614511, -0.3237317131095091, 1.905164617218405, 1.6211306183532468, 0.7691388849624123, 0.25788609085420305, 0.17339085096373957, 0.17068706355227645, -0.13941076941325625, -0.2517623822367549, -0.1791051708134397, 0.9623670172332042, 0.8789504708989458, 0.4318777122281319, 1.0302645168422706, 1.1961051628649697, 1.9692581688683017, 0.8211296191115937, -1.0608217314799375, 1.1798683643486112, 0.9319760995472469, -1.1674635137872673, -0.25783962035866076, 0.1661089113873339, 1.154397545844328, -1.0166454942738863, -0.4583490684755798, 2.5268051984638977, -0.041861847406327465, 1.8651125421675416, 0.6299042436493635, 0.44254255841650486, -1.2699199273830686, 0.9418652082328122, 0.8200175964210318, 0.37853389740620624, 0.23943634660451468, 2.030924235706882, -0.7165088754842898, 0.6779777835342544, -1.349914438618158, -0.3165131102582926, 0.08686146660433183, 2.213234163419212, -0.03570424743812925, -1.1722253718764744, 2.1773007795187453, 1.3837953850314133, -0.10710323458040147, -1.4548722812965549, -0.3970577176118276, -1.374043657210044, 0.7255462735430791, -0.7683268330568346, -1.107300560484893, 0.6536981535594798, -1.0456335063287074, 0.671539211095781, 0.14177014236811075, -0.7720185432172093, 0.04628206694560977, -0.29144204822737435, 0.44111665435913106, -1.4468044237745188, -0.4948434031003428, 0.20973148846957482, -1.4705352743356102, 0.5509263805094416, 0.14836049703598717, 0.37142650110567194, 0.07671701390904923, 0.1832721674392343, 0.046938838073239716, 0.17670916933534012, -0.45804825701073837, 0.43023416286875543, 0.994531360232782, -0.046781434678865176, -0.6411281236926395, -0.8928000153166127, 0.5020270491657642, 0.36740075971319863, -0.4827108385908195, -0.23685866589629484, -0.8074519272656216, 1.5748268236440153, 1.4192853096349365, 0.09020547705776319, -0.6652511770649884, -1.9992167031321493, 0.4791188545948879, 0.6279108483861092, 1.0074790653617451, 0.7676312157522953, -0.11553684419657606, -0.4591865355248419, -1.639849492617964, 0.4130553130463872, 0.5390254097685058, -0.4349639740477211, -1.3192207739061903, 0.921095661636292, 0.11186833365594734, 0.3441768337737832, 1.4783039534492346, 2.263085347294753, -0.8349618339008534, -0.9049359600066962, 0.024162923476669818, 0.6066399333702527, -0.8417612166698258, -0.6961421526795921, 0.3202978824363036, -0.19130470797006155, -1.402607668854599, 0.28497046631102485, 0.4905213042336202, 0.19793455181657382, 2.6301564123361576, -0.16176932304370256, 0.44717956503954587, 0.8699840049513902, 1.234353436189499, -0.28804951892441494, 0.313974160529083, 1.1623300287931133, -0.48292707721980854, 0.1367494895765361, -0.20193916479192547, 2.256921332752218, 1.6374385645723855, -0.5461994538365549, -0.07554343845331998, 0.3692865266488121, -0.8084784558917282, 0.0765934842753864, -0.39039605912469244, -0.6517502322106101, -1.0999972515714094, 0.12141787168490469, 0.4223582871504148, -1.929776945067296, 0.1491648289319484, 0.668905036114273, 2.1273296694966035, -0.8341058352970173, -0.0038662459408752574, -0.4177766367908073, 0.25273869215538547, -0.4756713774592941, 0.0033228853696081555, -0.286753183458628, 0.32745535473866155, 0.025329830457291404, -1.7197090697488417, 0.8757768384549623, -1.4248328845959488, 0.37552052373808614, 0.8693679749864636, 0.5551853589509166, 1.454617789271402, -0.26632041516147126, 0.0006770720257820141, 0.9314972324397336, 0.11626025120379672, -1.6743920644425254, 1.4928084495173761, 0.07055981871884472, 0.45336863171779396, -0.5160549595407239, 1.152662508852422, -1.7497056098282482, -0.924700089823277, -2.1717265120499243, 0.5958928427533126, 0.6730883408493645, -0.84805585288396, 0.4824066261132788, -0.052614435678208706, 0.825405208732148, -1.6333874171339615, 0.6277507337962961, 0.249401284233602, -0.4109176797778198, -2.347189584557617, 1.1845645724697493, -2.3521670455572896, -1.1316320091771654, 0.5992380911013792, -0.31902124169706325, -0.46964102762822674, 1.5156994673978605, -1.2946608083845053, -0.6020676714959547, 0.3249742001636929, 0.11711158728440947, -0.007321114964193679, -0.6181879560435876, -2.134744413791715, -0.7112983608070629, -0.11597423642561937, -1.4744999523005498, 0.8621810658863058, -2.168307957202582, 0.3918226497225292, -1.1368285620990515, 0.10458156928330019, -0.07406496018493203, -0.40015460187469537, 0.12235621760846344, 0.2984735850943185, -0.29143957929644393, 1.5249246290756702, 1.4597426694367543, 2.0259171623758534, -0.590817527789978, -0.2936227843522231, -2.350563685161422, 0.5204520037274206, -1.0666412352357653, -0.27697862639741316, -0.2148124943518122, 0.6734957785469392, -1.8053516365213391, -1.778141783937991, -1.2084775577449522, 0.9934495828121953, -0.24423917623197028, -1.4813474233808355, 0.33112243279131987, 0.700726302636986, -1.2690864394958483, 2.50077915159786, -1.3575567383953087, 0.011926046930440538, -1.3193374579057675, 0.4889304126129667, -0.9109550658531209, -0.7341971062366668, 1.3350911986632095, 0.1891570737674318, 0.40709280499254247, 0.3802019536004736, -0.28204633202557666, -1.5977055097307233, -0.02911650264897409, 0.6131157184149074, 0.25149177652210464, 0.03382356876055713, 1.506606658949116, -1.7344902302873029, 0.7452744776446871, -1.9107272401785926, -1.582999530211274, 0.48222225973271177, -1.0384903144668758, 0.6725568062565108, -2.5046318569934747, 1.296511149503862, 0.853269156450691, 0.11943152619597955, -0.3450209857946171, 0.9312567740442399, -0.6707066875133985, 0.005142200140689065, -0.6622425728926742, 0.5590322499564966, -0.6705553220908641, 1.573364643401545, -0.9378192128795597, 0.25897411687301325, -0.523235177555831, -0.2853236646766811, 1.8425943763418455, -0.1207820601254575, 1.0012098031133287, -0.7256239293102813, -0.02271779763228539, 1.0036542321722706, -1.135909216473155, -1.189815381617683, -0.8493115552807492, -1.6888177375943607, -2.489616420576527, -0.41126522124777354, 0.9961668603829419, -0.7534275973480015, 1.849790093450653, 0.06921282422478346, -0.6074393733970507, 0.6619417712667524, -1.835004009108793, 0.3768387586158781, -1.1821904434754555, 0.014573407370971452, -0.7433444811234973, -0.329552484293008, 0.15018469760521694, -0.10973605653921424, 0.14420546653917488, 1.6304897807065826, -0.5968651613380547, -1.8023280044426582, -0.4305829392358626, 0.2557762607970183, 1.516870117861496, 0.5594967935525517, 0.0062557010311553056, 0.6255320341623587, 0.8339109137110668, -0.5635330527719713, 0.9218105797964087, 0.33145218259974435, 0.2889855465173865, -0.542335685178416, -0.2622889302699205, -0.8533813314567203, -0.449829299558278, -0.22917935684040222, 0.05743101469603045, -0.7296712922762247, -0.4814272083750075, 0.9271880724100623, -1.7158890136904523, -0.8842189210847783, 0.029357725437302862, 1.007095825780661, -0.6645393419822868, 0.4762254637014204, 2.135843118275408, -0.5131457945677272, 0.4341593057514976, 0.5547203107096045, 0.5582809372163969, 0.7326025512624773, -1.6004509435398273, 0.49390903300786004, -0.6273589411453684, 0.6204249584830497, -0.004749275153930262, 0.11525059305341144, 2.630624977560361, -0.26780218945171297, 1.094811068333461, 0.49860174981868177, -0.4975802795241047, 0.4418211270563782, -0.2851503334755573, -0.09783270716273874, -0.8056869415410617, -0.021836720949549925, -0.14327850353537921, -0.6134897130218929, -1.0908833087276866, 2.1485408375506436, -0.8502572490323435, 1.566069133322983, 1.6547207754540483, -0.29716084382953856, 0.6442619975668572, -1.6712756215556708, -1.0418433414147792, 1.5914121576093476, -0.12999723328116145, 2.2625040473786706, -0.6887600687835639, -2.392935678267709, -0.42423043984737213, -0.6453502398908528, 1.780850939443096, -0.29512813182874487, 0.6602842350181513, 2.5242286604271165, -0.768971612627111, -1.2499225563579224, 0.9896456044096121, -1.4583348201998096, -0.30019832681301145, -0.2169341800223744, -0.27918194220679776, 0.3082918845836181, 1.0745604087452207, -0.25497733629641756, 0.7436548330690215, -0.01798750766965811, -0.16698097995261732, -0.7596463403247344, 0.527075586096896, 0.20281948715540102, 1.0242386912638126, 1.075572733880468, 0.8435022795285625, -2.2448561497518598, -0.6486272087020284, -1.169450318895309, 0.2542761305625886, -0.368379912493706, 0.859601777933363, -0.5994438547345085, -0.8183299307417696, -0.996197764519836, 2.2639040845536953, 2.305251085859, 0.2570216983623788, 0.8211962638252076, 1.4543884394473836, 0.5142693861834959, -0.45100900087126905, 0.6367337816492784, 1.0334005587201107, 0.14284388692304759, 0.052926333489835095, -0.062313352727305475, 0.14683480238413224, 0.45923063376648066, -0.7463819418222971, 0.45479021185749835, -0.612376544022461, 0.8942631625777544, 1.193834252970334, -0.737905635740084, -0.30818387153031457, 1.0611707827228634, 0.16623162987098344, 0.08677775643378552, -0.31598527003860044, -0.5952323581377298, -0.6653619945042116, 0.8407887262811935, -1.1700191067922059, 0.14603471865875844, 1.560707477919896, 0.1276677899675394, 0.40771943432022456, 1.1960962415191534, -0.0691138093161685, -0.25811271087202814, 0.34070318408510397, 1.232272274230928, 1.1332771507171804, 0.8829602335192459, 0.36161594829311156, -0.18639018358016712, -0.1864775727504895, 0.6175617309270467, -0.7432181218240466, 0.1162407894806796, 1.6089484442646291, -1.6270975060753263, 0.7181710999956983, 0.6390076062081216, -1.1993755576201084, -0.3678974047917413, 1.019391926209488, 0.937729905413773, -0.39967899996628253, -1.5449000725792745, 0.517751255230349, -1.1860949887196317, 1.0079294987623286, 1.3933158676972373, 0.4499285623607037, 0.7214366719331048, 1.2596632744367866, 0.8879743019440172, -1.0133136090410004, -0.18400518591537182, -0.37727270917671213, 1.0727716665848628, 1.1842758379989071, -0.8230847486245987, 1.2034983985466399, 0.5598595739059454, -0.5921883421170341, -0.9871646403746361, -0.28204630785494894, 1.7419580084671538, 1.8704781272401372, -0.5756785490118363, -0.4343222821728877, 1.3595057253975915, -0.2904675417217015, 1.6392851577613643, -0.04808706200817097, 0.45771640371664324, -0.7205734582684044, -0.1929179471261812, -0.061468415860124005, 0.6594784860274778, 1.915169232184621, 0.24198711630092642, -0.1409192986170815, 0.7498968987041962, 0.8384295699410929, -0.31884337723534034, 0.5201715107719677, 1.078093557213665, -1.4106821188239103, -0.11299875488293797, -1.101851098266356, -1.9827857759459875, 0.674173949337249, -0.07976904508068182, 0.6413575219516752, 0.5579521727455968, 1.5447125674436322, 0.00336279364571832, 0.7169165208000876, -0.5219379135690675, -0.5547365951353832, -1.6094973752281074, -1.4517964713532026, 0.7295543161803131, -1.0485600509751898, 0.25250528262102045, 0.4514365061371215, -0.47322716534713005, 1.4277492880271965, -0.8519575823610668, 0.25010636599252417, 0.04688072494592699, -0.9608827401798883, -0.3007725940571144, -0.046293007279283885, -1.5398474800443722, 0.013805590010736243, -0.30126119542423085, -0.083967414917391, -0.08851702953330913, -0.4049342754396672, 1.2798400904483298, -1.606790768742053, -1.267613618320827, -1.2503576362925535, -1.755204138388206, 0.5258670748348347, -1.0101394529781884, -0.014396056474962199, 0.5821190499307055, -0.4043449949439321, 1.1531383771532613, 0.7048046900051976, 0.10660760828982706, 1.767840111284227, -0.3132350111393891, -0.28676674418183146, 0.5886217550716831, 0.0533267131349097, -0.7528340929518055, -0.724635525088517, 1.8254002239594138, -0.18841616203234243, -2.315751543437246, -1.377147264738426, -0.22869510121273423, 1.7199465071423408, -0.8227319868770513, 0.7895966627559355, -0.19896135497903228, -0.17774989865868365, 1.2547667767813169, -1.074639301257602, -1.206814829909523, -1.8396620602533253, -0.6168713486425301, 0.2859641314731942, 1.15767005918457, 0.6908763784567307, 0.6969509180467203, -0.028736647970287105, -0.3641425905163623, -0.06310377359862604, 0.5886394872584251, -0.029762415772008773, -0.4569277145966156, -1.4396622355478819, -0.2587529642573468, -0.41564597849878177, 0.6676475767176987, 1.0745653943626412, 0.6145686615783245, 0.5521167568378539, 0.5518405345213662, 1.839167073307593, 0.8564516958772312, 1.2078686147909117, 1.161310276413205, -0.5366273256337238, 0.21518084378203362, 0.47594089205626394, 0.7296065418707832, -0.13013036392341235, 0.5568552463995653, -0.07981669338795927, 0.9709477444901867, 2.3069915735666915, 1.1933313114605135, -1.6250290503525227, -0.42048813136286506, -1.0051606886955424, 0.5549248526402112, -0.29698308976860704, 1.612108853775127, -0.13013836416543, -0.41824203711094626, 0.5194426219405017, 0.05797598725927429, 1.3040653807403857, 0.8368386106798029], payload={'city': 'Berlin'})"
146 | ]
147 | },
148 | "execution_count": 5,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "# Format the data into objects that can be inserted into the DB\n",
155 | "\n",
156 | "data_to_insert = []\n",
157 | "for i, d in enumerate(data):\n",
158 | " point = models.PointStruct(\n",
159 | " id=i, vector=d.tolist(), payload={\"city\": random.choice([\"New York\", \"Berlin\"])}\n",
160 | " )\n",
161 | " data_to_insert.append(point)\n",
162 | "data_to_insert[0]"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 6,
168 | "metadata": {
169 | "id": "PoyxgmARYH9s"
170 | },
171 | "outputs": [],
172 | "source": [
173 | "# Remove any existing DB files\n",
174 | "\n",
175 | "if os.path.exists(\"data/qdrant_db\"):\n",
176 | " shutil.rmtree(\"data/qdrant_db\")\n",
177 | "\n",
178 | "client = QdrantClient(path=\"data/qdrant_db\")"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "MpMtMVTJYH9s"
185 | },
186 | "source": [
187 | "# Normal\n"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 7,
193 | "metadata": {
194 | "colab": {
195 | "base_uri": "https://localhost:8080/"
196 | },
197 | "id": "cL-4m5d-YH9u",
198 | "outputId": "67360ca2-6952-4642-b9eb-595317a5b515"
199 | },
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "True"
205 | ]
206 | },
207 | "execution_count": 7,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "client.create_collection(\n",
214 | " collection_name=\"semantic_search\",\n",
215 | " vectors_config=models.VectorParams(\n",
216 | " size=n_features, distance=models.Distance.COSINE\n",
217 | " ),\n",
218 | " quantization_config=None,\n",
219 | ")"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 8,
225 | "metadata": {
226 | "colab": {
227 | "base_uri": "https://localhost:8080/"
228 | },
229 | "id": "6mVgf2VTYH9u",
230 | "outputId": "767e1454-837e-4ab7-a928-e8282afb1bd3"
231 | },
232 | "outputs": [
233 | {
234 | "data": {
235 | "text/plain": [
236 | "UpdateResult(operation_id=0, status=)"
237 | ]
238 | },
239 | "execution_count": 8,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "client.upsert(\n",
246 | " collection_name=\"semantic_search\",\n",
247 | " points=data_to_insert,\n",
248 | ")"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 9,
254 | "metadata": {
255 | "colab": {
256 | "base_uri": "https://localhost:8080/"
257 | },
258 | "id": "WvFAoWr0YH9v",
259 | "outputId": "9d97c8a2-50c7-4808-d3a8-19787af80eb2"
260 | },
261 | "outputs": [
262 | {
263 | "name": "stdout",
264 | "output_type": "stream",
265 | "text": [
266 | "id=0 version=0 score=0.9999999992620814 payload={'city': 'Berlin'} vector=None shard_key=None\n",
267 | "id=367 version=0 score=0.0935900955869998 payload={'city': 'Berlin'} vector=None shard_key=None\n",
268 | "id=6777 version=0 score=0.09345209088487938 payload={'city': 'New York'} vector=None shard_key=None\n",
269 | "id=9674 version=0 score=0.07236113324158687 payload={'city': 'Berlin'} vector=None shard_key=None\n",
270 | "id=6268 version=0 score=0.07159429777598603 payload={'city': 'Berlin'} vector=None shard_key=None\n",
271 | "id=2973 version=0 score=0.06972900526967332 payload={'city': 'New York'} vector=None shard_key=None\n",
272 | "id=6340 version=0 score=0.06872670865599134 payload={'city': 'Berlin'} vector=None shard_key=None\n",
273 | "id=9953 version=0 score=0.06850956476044623 payload={'city': 'New York'} vector=None shard_key=None\n",
274 | "id=510 version=0 score=0.06589252393618895 payload={'city': 'New York'} vector=None shard_key=None\n",
275 | "id=1519 version=0 score=0.06586400388836113 payload={'city': 'Berlin'} vector=None shard_key=None\n"
276 | ]
277 | },
278 | {
279 | "data": {
280 | "text/plain": [
281 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]"
282 | ]
283 | },
284 | "execution_count": 9,
285 | "metadata": {},
286 | "output_type": "execute_result"
287 | }
288 | ],
289 | "source": [
290 | "results = client.search(collection_name=\"semantic_search\", query_vector=data[0])\n",
291 | "for result in results:\n",
292 | " print(result)\n",
293 | "truth_ids = [result.id for result in results]\n",
294 | "truth_ids"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {
300 | "id": "w4ys6mjxYH9w"
301 | },
302 | "source": [
303 | "# Scalar Quantization\n"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 10,
309 | "metadata": {
310 | "colab": {
311 | "base_uri": "https://localhost:8080/"
312 | },
313 | "id": "9iRb7IJ4YH9w",
314 | "outputId": "e0a61238-6544-4795-a276-20a4e82f787f"
315 | },
316 | "outputs": [
317 | {
318 | "data": {
319 | "text/plain": [
320 | "True"
321 | ]
322 | },
323 | "execution_count": 10,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "client.create_collection(\n",
330 | " collection_name=\"scalar_semantic_search\",\n",
331 | " vectors_config=models.VectorParams(\n",
332 | " size=n_features, distance=models.Distance.COSINE\n",
333 | " ),\n",
334 | " quantization_config=models.ScalarQuantization(\n",
335 | " scalar=models.ScalarQuantizationConfig(\n",
336 | " type=models.ScalarType.INT8,\n",
337 | " quantile=0.99, # 1% of extreme values will be excluded from quantization\n",
338 | " always_ram=True,\n",
339 | " ),\n",
340 | " ),\n",
341 | ")"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 11,
347 | "metadata": {
348 | "colab": {
349 | "base_uri": "https://localhost:8080/"
350 | },
351 | "id": "o-Afj4mxYH9w",
352 | "outputId": "1feee59d-a2e7-4269-8051-23f54fc6c70d"
353 | },
354 | "outputs": [
355 | {
356 | "data": {
357 | "text/plain": [
358 | "UpdateResult(operation_id=0, status=)"
359 | ]
360 | },
361 | "execution_count": 11,
362 | "metadata": {},
363 | "output_type": "execute_result"
364 | }
365 | ],
366 | "source": [
367 | "client.upsert(\n",
368 | " collection_name=\"scalar_semantic_search\",\n",
369 | " points=data_to_insert,\n",
370 | ")"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 12,
376 | "metadata": {
377 | "colab": {
378 | "base_uri": "https://localhost:8080/"
379 | },
380 | "id": "023CWpJ4YH9x",
381 | "outputId": "bc6a24a8-4574-4e40-96a3-03c5fda3cd99"
382 | },
383 | "outputs": [
384 | {
385 | "name": "stdout",
386 | "output_type": "stream",
387 | "text": [
388 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]\n"
389 | ]
390 | },
391 | {
392 | "data": {
393 | "text/plain": [
394 | "1.0"
395 | ]
396 | },
397 | "execution_count": 12,
398 | "metadata": {},
399 | "output_type": "execute_result"
400 | }
401 | ],
402 | "source": [
403 | "results = client.search(collection_name=\"scalar_semantic_search\", query_vector=data[0])\n",
404 | "predicted_ids = [result.id for result in results]\n",
405 | "print(predicted_ids)\n",
406 | "calculate_recall(truth_ids, predicted_ids)"
407 | ]
408 | },
409 | {
410 | "cell_type": "markdown",
411 | "metadata": {
412 | "id": "MRKqStCNYH9x"
413 | },
414 | "source": [
415 | "# Binary Quantization\n"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 13,
421 | "metadata": {
422 | "colab": {
423 | "base_uri": "https://localhost:8080/"
424 | },
425 | "id": "iwscbaIRYH9y",
426 | "outputId": "58d27a2b-96ca-4cad-bc6b-d76a624fd504"
427 | },
428 | "outputs": [
429 | {
430 | "data": {
431 | "text/plain": [
432 | "True"
433 | ]
434 | },
435 | "execution_count": 13,
436 | "metadata": {},
437 | "output_type": "execute_result"
438 | }
439 | ],
440 | "source": [
441 | "client.create_collection(\n",
442 | " collection_name=\"binary_semantic_search\",\n",
443 | " vectors_config=models.VectorParams(\n",
444 | " size=n_features, distance=models.Distance.COSINE\n",
445 | " ),\n",
446 | " quantization_config=models.BinaryQuantization(\n",
447 | " binary=models.BinaryQuantizationConfig(\n",
448 | " always_ram=True,\n",
449 | " )\n",
450 | " ),\n",
451 | ")"
452 | ]
453 | },
454 | {
455 | "cell_type": "code",
456 | "execution_count": 14,
457 | "metadata": {
458 | "colab": {
459 | "base_uri": "https://localhost:8080/"
460 | },
461 | "id": "f1Zf2g1bYH9y",
462 | "outputId": "f86dc3d8-aead-4b6d-f6c2-7d11330569ae"
463 | },
464 | "outputs": [
465 | {
466 | "data": {
467 | "text/plain": [
468 | "UpdateResult(operation_id=0, status=)"
469 | ]
470 | },
471 | "execution_count": 14,
472 | "metadata": {},
473 | "output_type": "execute_result"
474 | }
475 | ],
476 | "source": [
477 | "client.upsert(\n",
478 | " collection_name=\"binary_semantic_search\",\n",
479 | " points=data_to_insert,\n",
480 | ")"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 15,
486 | "metadata": {
487 | "colab": {
488 | "base_uri": "https://localhost:8080/"
489 | },
490 | "id": "5-M1fNspYH9y",
491 | "outputId": "f72111a5-8496-40b9-969c-b1e0b5873264"
492 | },
493 | "outputs": [
494 | {
495 | "name": "stdout",
496 | "output_type": "stream",
497 | "text": [
498 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]\n"
499 | ]
500 | },
501 | {
502 | "data": {
503 | "text/plain": [
504 | "1.0"
505 | ]
506 | },
507 | "execution_count": 15,
508 | "metadata": {},
509 | "output_type": "execute_result"
510 | }
511 | ],
512 | "source": [
513 | "results = client.search(collection_name=\"binary_semantic_search\", query_vector=data[0])\n",
514 | "predicted_ids = [result.id for result in results]\n",
515 | "print(predicted_ids)\n",
516 | "calculate_recall(truth_ids, predicted_ids)"
517 | ]
518 | },
519 | {
520 | "cell_type": "markdown",
521 | "metadata": {
522 | "id": "HbWIhZUMYH9y"
523 | },
524 | "source": [
525 | "# Time Metrics\n"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 38,
531 | "metadata": {
532 | "colab": {
533 | "base_uri": "https://localhost:8080/"
534 | },
535 | "id": "IxVu58AVftpb",
536 | "outputId": "cb0d5518-23a4-46f5-ad94-3cf73ebb9328"
537 | },
538 | "outputs": [
539 | {
540 | "name": "stdout",
541 | "output_type": "stream",
542 | "text": [
543 | "0.2501802444458008\n"
544 | ]
545 | }
546 | ],
547 | "source": [
548 | "# With quantized vectors\n",
549 | "\n",
550 | "n_reps = 15\n",
551 | "times = []\n",
552 | "for _ in range(n_reps):\n",
553 | " query_vector = np.random.randn(1, n_features)[0]\n",
554 | " st_time = time.time()\n",
555 | " client.search(\n",
556 | " collection_name=\"binary_semantic_search\",\n",
557 | " query_vector=query_vector,\n",
558 | " search_params=models.SearchParams(\n",
559 | " quantization=models.QuantizationSearchParams(\n",
560 | " ignore=False, rescore=False\n",
561 | " ) # use quantized\n",
562 | " ),\n",
563 | " limit=100,\n",
564 | " query_filter=models.Filter(\n",
565 | " must=[\n",
566 | " models.FieldCondition(\n",
567 | " key=\"city\", match=models.MatchValue(value=\"Berlin\")\n",
568 | " )\n",
569 | " ]\n",
570 | " ),\n",
571 | " )\n",
572 | " times.append(time.time() - st_time)\n",
573 | "print(np.median(times))"
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 36,
579 | "metadata": {
580 | "colab": {
581 | "base_uri": "https://localhost:8080/"
582 | },
583 | "id": "0Z7DmnCTf984",
584 | "outputId": "91f223be-b372-48c6-ac1b-af499be56a2b"
585 | },
586 | "outputs": [
587 | {
588 | "name": "stdout",
589 | "output_type": "stream",
590 | "text": [
591 | "0.32148098945617676\n"
592 | ]
593 | }
594 | ],
595 | "source": [
596 | "# With normal vectors\n",
597 | "\n",
598 | "times = []\n",
599 | "for _ in range(n_reps):\n",
600 | " query_vector = np.random.randn(1, n_features)[0]\n",
601 | " st_time = time.time()\n",
602 | " client.search(\n",
603 | " collection_name=\"binary_semantic_search\",\n",
604 | " query_vector=query_vector,\n",
605 | " search_params=models.SearchParams(\n",
606 | " quantization=models.QuantizationSearchParams(\n",
607 | " ignore=True, rescore=False\n",
608 | " ) # ignore quantized\n",
609 | " ),\n",
610 | " limit=100,\n",
611 | " query_filter=models.Filter(\n",
612 | " must=[\n",
613 | " models.FieldCondition(\n",
614 | " key=\"city\", match=models.MatchValue(value=\"Berlin\")\n",
615 | " )\n",
616 | " ]\n",
617 | " ),\n",
618 | " )\n",
619 | " times.append(time.time() - st_time)\n",
620 | "print(np.median(times))"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": 17,
626 | "metadata": {
627 | "id": "9vrZtPO_YH9y"
628 | },
629 | "outputs": [],
630 | "source": []
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": 17,
635 | "metadata": {
636 | "id": "u33t6OqJYH9y"
637 | },
638 | "outputs": [],
639 | "source": []
640 | },
641 | {
642 | "cell_type": "code",
643 | "execution_count": 17,
644 | "metadata": {
645 | "id": "b9pv1xODYH9y"
646 | },
647 | "outputs": [],
648 | "source": []
649 | }
650 | ],
651 | "metadata": {
652 | "colab": {
653 | "provenance": []
654 | },
655 | "kernelspec": {
656 | "display_name": "Python 3",
657 | "language": "python",
658 | "name": "python3"
659 | },
660 | "language_info": {
661 | "codemirror_mode": {
662 | "name": "ipython",
663 | "version": 3
664 | },
665 | "file_extension": ".py",
666 | "mimetype": "text/x-python",
667 | "name": "python",
668 | "nbconvert_exporter": "python",
669 | "pygments_lexer": "ipython3",
670 | "version": "3.11.5"
671 | }
672 | },
673 | "nbformat": 4,
674 | "nbformat_minor": 0
675 | }
676 |
--------------------------------------------------------------------------------
/postgres_vector_length/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import requests # type: ignore
5 | from dotenv import load_dotenv # type: ignore
6 | from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore
7 | from openai import OpenAI # type: ignore
8 | from postgres import PostgresClient
9 | from sklearn.decomposition import PCA # type: ignore
10 |
11 | load_dotenv()
12 |
13 | openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), max_retries=3)
14 |
15 |
16 | # get book from gutenberg
17 | title_url = {
18 | "inverted pyramid": "https://www.gutenberg.org/cache/epub/72392/pg72392.txt"
19 | }
20 | title_text_map = {}
21 | for title, url in title_url.items():
22 | response = requests.get(url)
23 | book_text = response.text
24 |
25 | title_text_map[title] = book_text
26 |
27 |
28 | # use langchain to do document chunking
29 | text_splitter = RecursiveCharacterTextSplitter(
30 | chunk_size=500,
31 | chunk_overlap=40,
32 | length_function=len,
33 | is_separator_regex=False,
34 | )
35 | documents = []
36 | for title, text in title_text_map.items():
37 | text_chunks = text_splitter.split_text(text)[:1000]
38 |
39 | embeddings = openai_client.embeddings.create(
40 | input=text_chunks,
41 | model="text-embedding-ada-002",
42 | ).data
43 | embeddings = [e.embedding for e in embeddings]
44 |
45 | pca = PCA(n_components=50)
46 | reduced_embeddings = pca.fit_transform(embeddings)
47 |
48 | for i, text_chunk in enumerate(text_chunks):
49 | documents.append(
50 | {
51 | "title": title,
52 | "text": text_chunk,
53 | "large_embedding": embeddings[i],
54 | "small_embedding": reduced_embeddings[i],
55 | }
56 | )
57 |
58 |
59 | # compare sizes
60 | postgres_client = PostgresClient(large_embedding_size=1536, small_embedding_size=50)
61 | postgres_client.delete_tables()
62 | postgres_client.create_tables()
63 | postgres_client.add_data(data=documents)
64 | size = postgres_client.get_vector_column_size()
65 | for column, size_bytes in size.items():
66 | print(f"Column {column} has size {size_bytes/1048576} MB")
67 | print("\n")
68 |
69 | # search over table
70 |
71 | quote = "Where was Rod Norquay sitting?"
72 | quote_embedding = (
73 | openai_client.embeddings.create(
74 | input=[quote],
75 | model="text-embedding-ada-002",
76 | )
77 | .data[0]
78 | .embedding
79 | )
80 |
81 | st_time = time.time()
82 | results = postgres_client.search_db(
83 | query_vec=quote_embedding,
84 | column="large_embedding",
85 | )
86 | print(f"Result using large embeddings took {time.time() -st_time:.2f}:\n")
87 | print(results[0]["text"])
88 | print("\n" * 5)
89 |
90 | reduced_quote_embedding = pca.transform([quote_embedding])[0]
91 | st_time = time.time()
92 | results = postgres_client.search_db(
93 | query_vec=reduced_quote_embedding,
94 | column="small_embedding",
95 | )
96 | print(f"Result using small embeddings took {time.time() -st_time:.2f}:\n")
97 | print(results[0]["text"])
98 | print("\n" * 5)
99 |
--------------------------------------------------------------------------------
/postgres_vector_length/postgres.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Literal
3 |
4 | import numpy as np
5 | import psycopg # type: ignore
6 | from dotenv import load_dotenv # type: ignore
7 | from pgvector.psycopg import register_vector # type: ignore
8 | from psycopg.rows import dict_row # type: ignore
9 |
10 | load_dotenv()
11 |
12 |
13 | class PostgresClient:
14 | def __init__(
15 | self,
16 | large_embedding_size: int,
17 | small_embedding_size: int,
18 | postgres_database: str = "semantic_search",
19 | ):
20 | NEON_USERNAME = os.getenv("NEON_USERNAME")
21 | NEON_PASSWORD = os.getenv("NEON_PASSWORD")
22 | self.large_embedding_size = large_embedding_size
23 | self.small_embedding_size = small_embedding_size
24 |
25 | self.postgres_url = f"postgresql://{NEON_USERNAME}:{NEON_PASSWORD}@ep-still-hat-20912390.us-east-2.aws.neon.tech/{postgres_database}?sslmode=require"
26 |
27 | def create_postgres_connection(self):
28 | if hasattr(self, "postgres_client"):
29 | self.postgres_client.close()
30 | self.postgres_client = psycopg.connect(
31 | conninfo=self.postgres_url,
32 | row_factory=dict_row,
33 | )
34 | register_vector(self.postgres_client)
35 |
36 | def init_postgres_client(self):
37 | if (
38 | not hasattr(self, "postgres_client")
39 | or self.postgres_client.connection.closed
40 | or self.postgres_client.connection.broken
41 | ):
42 | self.create_postgres_connection()
43 | try:
44 | with self.postgres_client.cursor() as cursor:
45 | cursor.execute("SELECT 1")
46 | except (psycopg.DatabaseError, psycopg.OperationalError):
47 | self.create_postgres_connection()
48 |
49 | def create_tables(self):
50 | self.init_postgres_client()
51 | create_table_sql = f"""
52 | DO $$
53 | BEGIN
54 | IF NOT EXISTS (
55 | SELECT FROM pg_tables
56 | WHERE schemaname = 'public'
57 | AND tablename = 'search_data'
58 | ) THEN
59 | CREATE EXTENSION IF NOT EXISTS vector;
60 | CREATE TABLE search_data (
61 | book_title VARCHAR(255) NOT NULL,
62 | text TEXT NOT NULL,
63 | large_embedding Vector({self.large_embedding_size}),
64 | small_embedding Vector({self.small_embedding_size})
65 | );
66 | CREATE INDEX ON search_data USING hnsw (large_embedding vector_ip_ops);
67 | CREATE INDEX ON search_data USING hnsw (small_embedding vector_ip_ops);
68 | END IF;
69 | END
70 | $$;
71 | """
72 |
73 | with self.postgres_client.cursor() as cursor:
74 | cursor.execute(create_table_sql)
75 | self.postgres_client.commit()
76 | register_vector(self.postgres_client)
77 |
78 | def delete_tables(self):
79 | self.init_postgres_client()
80 | sql = """
81 | DO $$
82 | BEGIN
83 | IF EXISTS (
84 | SELECT FROM pg_tables
85 | WHERE schemaname = 'public'
86 | AND tablename = 'search_data'
87 | ) THEN
88 | DROP TABLE search_data;
89 | END IF;
90 | END
91 | $$;
92 | """
93 | with self.postgres_client.cursor() as cursor:
94 | cursor.execute(sql)
95 | self.postgres_client.commit()
96 |
97 | def add_data(self, data: list[dict]):
98 | insert_query = """
99 | INSERT INTO search_data (book_title, text, large_embedding, small_embedding)
100 | VALUES (%s, %s, %s, %s)
101 | """
102 | insert_data = [
103 | (
104 | d["title"],
105 | d["text"],
106 | d["large_embedding"],
107 | np.array(d["small_embedding"]),
108 | )
109 | for d in data
110 | ]
111 | with self.postgres_client.cursor() as cursor:
112 | cursor.executemany(insert_query, insert_data)
113 | self.postgres_client.commit()
114 |
115 | def search_db(
116 | self,
117 | query_vec: list[float],
118 | column: Literal["large_embedding", "small_embedding"],
119 | ) -> list:
120 | self.init_postgres_client()
121 | query = f"""
122 | SELECT book_title, text
123 | FROM search_data
124 | ORDER BY {column} <-> %s
125 | LIMIT 1;
126 | """
127 |
128 | with self.postgres_client.cursor() as cursor:
129 | results = cursor.execute(
130 | query,
131 | (np.array(query_vec),),
132 | ).fetchall()
133 |
134 | return results
135 |
136 | def get_vector_column_size(self) -> dict[str, int]:
137 | sql = """
138 | SELECT SUM(pg_column_size(large_embedding)) as large_embedding_size,
139 | SUM(pg_column_size(small_embedding)) as small_embedding_size
140 | FROM search_data;
141 | """
142 | with self.postgres_client.cursor() as cursor:
143 | results = cursor.execute(sql).fetchone()
144 |
145 | return results
146 |
--------------------------------------------------------------------------------
/postgres_vector_length/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.341
2 | openai==1.3.5
3 | psycopg[binary]==3.1.14
4 | pgvector==0.2.4
5 | scikit-learn==1.3.0
6 |
--------------------------------------------------------------------------------
/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/slides.pdf
--------------------------------------------------------------------------------