├── .gitignore ├── .pre-commit-config.yaml ├── README.md ├── art ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png └── ChatGPT.png ├── notebooks ├── Cosine_vs_Dot.ipynb ├── CrossEncoder.ipynb ├── Document_Chunks.ipynb ├── Embedding_Size.ipynb ├── HNSW_Hyperparam_Search.ipynb ├── LSH.ipynb ├── Mistral_7b_rag.ipynb ├── Product_Quantization.ipynb ├── Scalar_and_Binary_Quantization.ipynb └── Semantic_Search_Basics.ipynb ├── postgres_vector_length ├── main.py ├── postgres.py └── requirements.txt └── slides.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | notebooks/data/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | repos: 5 | ############################################################################# 6 | # Misc 7 | ############################################################################# 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v4.5.0 10 | hooks: 11 | - id: check-merge-conflict # Searches for merge conflict markers within files. 12 | - id: check-added-large-files # Blocks commits that add large files. Default limit is 500kB. 13 | # Can be configured with args, e.g., '--maxkb=1000' to change the limit. 14 | # exclude: 'your_dir/.*' 15 | args: ["--maxkb=10000"] 16 | - id: check-case-conflict # Identifies potential case-insensitive file name conflicts. 17 | - id: check-ast # Validates the syntax of Python files. 18 | - id: check-symlinks # Detects broken symlinks. 19 | - id: trailing-whitespace # Removes any trailing whitespace at the end of lines. 20 | - id: end-of-file-fixer # Ensures files end with a single newline or are empty. 21 | 22 | - repo: https://github.com/pre-commit/pre-commit-hooks 23 | rev: v4.5.0 24 | hooks: 25 | - id: check-json # Validates JSON files to ensure they are properly formatted and syntactically correct. 26 | types: [json] 27 | - id: check-toml # Checks TOML files for errors and format issues to ensure valid syntax. 28 | types: [toml] 29 | 30 | - repo: https://github.com/astral-sh/ruff-pre-commit 31 | rev: v0.1.5 32 | hooks: 33 | # Run the linter. 34 | - id: ruff 35 | types_or: [python, pyi, jupyter] 36 | args: [--fix, --ignore, E402, --ignore, F821] 37 | # Run the formatter. 38 | - id: ruff-format 39 | types_or: [python, pyi, jupyter] 40 | 41 | - repo: https://github.com/pre-commit/mirrors-eslint 42 | rev: "v8.54.0" 43 | hooks: 44 | - id: eslint 45 | entry: bash -c 'cd web && eslint' 46 | files: \.[jt]sx?$ # *.js, *.jsx, *.ts, and *.tsx 47 | types: [file] 48 | 49 | - repo: https://github.com/pre-commit/mirrors-prettier 50 | rev: "v3.1.0" 51 | hooks: 52 | - id: prettier 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Semantic Search with LLMs 2 | 3 | ### Purpose 4 | 5 | Accompanying code for [Shaan](https://www.shaankhosla.com)'s [3-hour live training](https://www.oreilly.com/live-events/semantic-search-with-llms/0790145045035/) offered on O'Reilly. Took place on January 9th and July 11th. 6 | 7 | The "repo art" was generated by DALL-E. These are some artistic takes on "Semantic Search with LLMs" in which the description for the class was used as the prompt. 8 | 9 | | | | 10 | | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | 11 | | ![Image 1](https://github.com/shaankhosla/semanticsearch/blob/main/art/1.png) | ![Image 2](https://github.com/shaankhosla/semanticsearch/blob/main/art/2.png) | 12 | | ![Image 3](https://github.com/shaankhosla/semanticsearch/blob/main/art/3.png) | ![Image 4](https://github.com/shaankhosla/semanticsearch/blob/main/art/4.png) | 13 | -------------------------------------------------------------------------------- /art/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/1.png -------------------------------------------------------------------------------- /art/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/2.png -------------------------------------------------------------------------------- /art/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/3.png -------------------------------------------------------------------------------- /art/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/4.png -------------------------------------------------------------------------------- /art/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/5.png -------------------------------------------------------------------------------- /art/ChatGPT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/art/ChatGPT.png -------------------------------------------------------------------------------- /notebooks/CrossEncoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | " \"Open\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 6, 15 | "metadata": { 16 | "id": "NLqmYKntd2vg" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "%%capture\n", 21 | "\n", 22 | "%pip install sentence_transformers\n", 23 | "from sentence_transformers import CrossEncoder, SentenceTransformer\n", 24 | "import numpy as np" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 7, 30 | "metadata": { 31 | "id": "GGIrwN2pd2vh" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# Function to calculate cosine similarity\n", 36 | "\n", 37 | "\n", 38 | "def cosine_similarity(v1, v2):\n", 39 | " return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 11, 45 | "metadata": { 46 | "id": "km_lJoz9d2vh" 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Sample query\n", 51 | "query = \"How many people live in Berlin?\"\n", 52 | "\n", 53 | "# Sample answers\n", 54 | "answers = [\n", 55 | " \"Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.\", # Correct answer\n", 56 | " \"In 2020, the population of Germany's capital city surpassed 3.5 million.\", # Correct answer\n", 57 | " \"How many people live in Berlin? No clue\", # Distraction\n", 58 | " \"I visited Berlin last year; it seemed very crowded. Lots of people\", # Distraction\n", 59 | " \"Berlin, the capital of Germany, is known for its cultural landmarks and modern architecture.\", # Distraction\n", 60 | "]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 13, 66 | "metadata": { 67 | "id": "gP9HBnijeE73" 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # Common bi-encoder" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 21, 77 | "metadata": { 78 | "colab": { 79 | "base_uri": "https://localhost:8080/", 80 | "height": 54 81 | }, 82 | "id": "Gxlo_AJTd2vj", 83 | "outputId": "6a438a41-3ffc-4c24-9311-a5dcaa5e8e32" 84 | }, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "[0.76697516, 0.5882465, 0.96647555, 0.78439665, 0.50811416]\n" 91 | ] 92 | }, 93 | { 94 | "data": { 95 | "application/vnd.google.colaboratory.intrinsic+json": { 96 | "type": "string" 97 | }, 98 | "text/plain": [ 99 | "'How many people live in Berlin? No clue'" 100 | ] 101 | }, 102 | "execution_count": 21, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "answer_embeddings = embedding_model.encode(answers)\n", 109 | "query_embedding = embedding_model.encode([query])[0]\n", 110 | "scores = [cosine_similarity(query_embedding, e) for e in answer_embeddings]\n", 111 | "print(scores)\n", 112 | "\n", 113 | "answers[np.argmax(scores)] # Incorrect answer" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 15, 119 | "metadata": { 120 | "colab": { 121 | "base_uri": "https://localhost:8080/", 122 | "height": 177, 123 | "referenced_widgets": [ 124 | "dbf6be4c7ae8432b97472c573793c4fe", 125 | "e057fa5920eb4044b37c27aedf2ebc10", 126 | "a4b70d2d1f934264bc9602b9796021bf", 127 | "bba613ed1fa64880a68a427d0422edd9", 128 | "e5b12cf54e08497e8224b535951ad212", 129 | "ea99016cff8a4d83aa568eb492d95350", 130 | "f3438533609f46c0ac3090e8ed0eaa45", 131 | "27c0dd4a616a4f10b05b4feb9a147786", 132 | "306bf26960f5461591c07c142cd2e4b4", 133 | "177be2dd62954c02894d0d49b9e8625e", 134 | "278bd7aa89ed4a5e9020bbcafa7fba6c", 135 | "992b6f13c18e41a5b12c8d53c72e2794", 136 | "74ebcd701a2d41fd8f91cd4823e5410e", 137 | "f4bbd4ca186b4eecb43b06b5b80183ee", 138 | "e21beed8f64745a7830bb86a0be547a7", 139 | "5d2c500f1f404603911a8e17265de46a", 140 | "8bb689f214b74880be64a412dccbedda", 141 | "912ba07cdadb42f788bafb5ff2823bb7", 142 | "5033346b251342d38691077e6b7e6efc", 143 | "dc573e8f87f64292b3861bb98688c848", 144 | "82f02d6db6a74b0086beb9771eb45a81", 145 | "7ee0a510bf084edabd07fbb7a0af2cbc", 146 | "e3fae2ff299e42eab96475c1e7d03267", 147 | "5f5090841ceb42cbb743c61080dcef24", 148 | "905086bf6d5c4418916175d20d8a12b1", 149 | "6b9afb8789744ab0b0f1514fe43bbd1a", 150 | "d38d00434ce5496f901640976bd3e43a", 151 | "b16d2e5f754748cebcd3f12996d397a4", 152 | "871b26c9361744b7aee4805299613811", 153 | "ec3e7e4f488c41f2beb8e78e5adb75e1", 154 | "a8448a72f95c4ccaa03960dc68b246e1", 155 | "ce4ba9bf00954b91a35b8e9286fa09a1", 156 | "befbff68d4664ae0bb16b453b94036d7", 157 | "7d8f9fdf562c4dc4a9a76c5a2e65f772", 158 | "3c004375ffcd45e29c372b02af3a9987", 159 | "6dbc701e30544611b5da580af45670ca", 160 | "6065d722e9544446ad9859e8613de1c7", 161 | "98273030cd864920825a7648597cb686", 162 | "e8562c35e1974a1ba434698bc9fe74bf", 163 | "e6a12e9ec06448a989bfb555c29c45b9", 164 | "5081cf300a7b439a902037963228a825", 165 | "dcd1f01a5bee4dc4a872579856bfe1c1", 166 | "c61e39fc5a75454c96ba62ad0f972f51", 167 | "b09195e6e84447ad8573108ab2b6d332", 168 | "309d154853534fd1be8514e7778b3038", 169 | "0647eb4475c54596a2ba66b42236923a", 170 | "c046065b830d42ffb8fba239d2af4c7a", 171 | "8d9fe715090c47eab07831ee0bc34a20", 172 | "873e16965eb14210913979a57f7481b1", 173 | "59e6ace03fcf4043ae8092089b26144c", 174 | "f2836e2bc9fd4de590f7db1dfa7b62f2", 175 | "95a901cfb5114d86a4b7c30508d567a8", 176 | "2e04db8b476649a785e9b4d6601c16ec", 177 | "8af543001d8f4ad887dd8aeb0776a871", 178 | "fa30c5c766914ae2b158324bf790a239" 179 | ] 180 | }, 181 | "id": "IgZ_fH77eKO6", 182 | "outputId": "f6ae48f9-7219-4db9-9f43-1227c2fccecf" 183 | }, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "application/vnd.jupyter.widget-view+json": { 188 | "model_id": "dbf6be4c7ae8432b97472c573793c4fe", 189 | "version_major": 2, 190 | "version_minor": 0 191 | }, 192 | "text/plain": [ 193 | "config.json: 0%| | 0.00/791 [00:00\n", 8 | " \"Open\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": { 16 | "id": "L7ww6Lkqarb_" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "%%capture\n", 21 | "\n", 22 | "%pip install langchain\n", 23 | "import requests\n", 24 | "from langchain.text_splitter import RecursiveCharacterTextSplitter" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": { 31 | "id": "_pyQ4wZHazwj" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "response = requests.get(\"https://www.gutenberg.org/cache/epub/72392/pg72392.txt\")\n", 36 | "book_text = response.text" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": { 43 | "colab": { 44 | "base_uri": "https://localhost:8080/", 45 | "height": 54 46 | }, 47 | "id": "Z3KLleqMa2ga", 48 | "outputId": "4998792c-c8d2-496e-8a45-5a4209f8d32e" 49 | }, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "application/vnd.google.colaboratory.intrinsic+json": { 54 | "type": "string" 55 | }, 56 | "text/plain": [ 57 | "'\\ufeffThe Project Gutenberg eBook of The inverted pyramid\\r\\n \\r\\nThis ebook is for the use of anyone anywhere in the United States and\\r\\nmost other parts of the world at no cost and with almost no restricti'" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "# First 200 characters\n", 67 | "\n", 68 | "book_text[:200]" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 9, 74 | "metadata": { 75 | "colab": { 76 | "base_uri": "https://localhost:8080/" 77 | }, 78 | "id": "dRcE-hK0a32w", 79 | "outputId": "7de14707-9253-4fe2-b28e-270b30b6a43e" 80 | }, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | " _All rights reserved_\r\n", 87 | "------------------------------\n", 88 | "\r\n", 89 | "------------------------------\n", 90 | " Published January, 1924\r\n", 91 | "------------------------------\n", 92 | "\r\n", 93 | "------------------------------\n", 94 | "\r\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "# Naive document chunking\n", 100 | "\n", 101 | "newline_chunks = book_text.split(\"\\n\")\n", 102 | "print(newline_chunks[50])\n", 103 | "print(\"-\" * 30)\n", 104 | "print(newline_chunks[51])\n", 105 | "print(\"-\" * 30)\n", 106 | "print(newline_chunks[52])\n", 107 | "print(\"-\" * 30)\n", 108 | "print(newline_chunks[53])\n", 109 | "print(\"-\" * 30)\n", 110 | "print(newline_chunks[54])" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 10, 116 | "metadata": { 117 | "colab": { 118 | "base_uri": "https://localhost:8080/" 119 | }, 120 | "id": "1G4W3wPebMjO", 121 | "outputId": "fcea4c47-cd74-428b-c072-6c581d3907c0" 122 | }, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "arched over the hurrying tide and the encircling mountains. Vast\r\n", 129 | "peaks, from the green-mantled cones near by, to distant pinnacles\r\n", 130 | "lifting far above timber line and capped with everlasting white.\r\n", 131 | "\r\n", 132 | "Rod did not consciously apply his intellect to considering his\r\n", 133 | "environment. He felt it. It satisfied him, filled him with an\n", 134 | "------------------------------\n", 135 | "environment. He felt it. It satisfied him, filled him with an\r\n", 136 | "indefinable sense of well-being. His people for a hundred years had\r\n", 137 | "filled their eyes with that and found it good. Against this\r\n", 138 | "background they had lived and loved and died. No matter. Rod,\r\n", 139 | "floating lazily in his canoe, was not looking backward,\r\n", 140 | "introspectively considering if he were the sum of five generations,\n", 141 | "------------------------------\n", 142 | "introspectively considering if he were the sum of five generations,\r\n", 143 | "each of which had contributed its quota to subduing a wild land to\r\n", 144 | "its use and need, to its ambition as well as to its necessity, and\r\n", 145 | "becoming one at last with that portion of the earth the first\r\n", 146 | "Roderick Norquay had made his own and handed to his sons.\r\n", 147 | "\r\n", 148 | "No, eighteen mercifully wears invisible blinkers, and Rod was no\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "# Recursive text splitting with chunk overlap\n", 154 | "\n", 155 | "text_splitter = RecursiveCharacterTextSplitter(\n", 156 | " chunk_size=400,\n", 157 | " chunk_overlap=100,\n", 158 | " length_function=len,\n", 159 | " is_separator_regex=False,\n", 160 | ")\n", 161 | "chunks = text_splitter.split_text(book_text)\n", 162 | "\n", 163 | "print(chunks[50])\n", 164 | "print(\"-\" * 30)\n", 165 | "print(chunks[51])\n", 166 | "print(\"-\" * 30)\n", 167 | "print(chunks[52])" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "colab": { 173 | "provenance": [] 174 | }, 175 | "kernelspec": { 176 | "display_name": "Python 3", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "name": "python" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 0 185 | } 186 | -------------------------------------------------------------------------------- /notebooks/LSH.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | " \"Open\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "id": "EkcnqMr7crU7" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "from sklearn.random_projection import SparseRandomProjection\n", 22 | "from sklearn.metrics.pairwise import cosine_similarity" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "colab": { 30 | "base_uri": "https://localhost:8080/" 31 | }, 32 | "id": "rVo11ZgscuIJ", 33 | "outputId": "0d5a458d-da4d-406a-c76e-411f0cca6025" 34 | }, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Vector 0: [ 0.49671415 -0.1382643 0.64768854 1.52302986 -0.23415337]... Hash: 100\n", 41 | "Vector 1: [-1.41537074 -0.42064532 -0.34271452 -0.80227727 -0.16128571]... Hash: 001\n", 42 | "Vector 2: [ 0.35778736 0.56078453 1.08305124 1.05380205 -1.37766937]... Hash: 110\n", 43 | "Vector 3: [-0.82899501 -0.56018104 0.74729361 0.61037027 -0.02090159]... Hash: 111\n", 44 | "Vector 4: [-1.59442766 -0.59937502 0.0052437 0.04698059 -0.45006547]... Hash: 100\n", 45 | "Vector 5: [ 0.92617755 1.90941664 -1.39856757 0.56296924 -0.65064257]... Hash: 110\n", 46 | "Vector 6: [ 0.75698862 -0.92216532 0.86960592 1.35563786 0.4134349 ]... Hash: 001\n", 47 | "Vector 7: [-0.52272302 1.04900923 -0.70434369 -1.4084613 -1.55662917]... Hash: 111\n", 48 | "Vector 8: [ 0.93828381 -0.51604473 0.09612078 -0.46227529 -0.43449623]... Hash: 001\n", 49 | "Vector 9: [ 0.36867331 -0.39333881 0.02874482 1.27845186 0.19109907]... Hash: 001\n", 50 | "\n", 51 | "Example of similarity in hashes:\n", 52 | "Vector 0 hash: 100\n", 53 | "Vector 1 hash: 001\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "# Set parameters\n", 59 | "n_samples = 10\n", 60 | "n_features = 100\n", 61 | "n_components = 3 # lower dimension for LSH projection\n", 62 | "\n", 63 | "# Create random high-dimensional data\n", 64 | "np.random.seed(42)\n", 65 | "data = np.random.randn(n_samples, n_features)\n", 66 | "\n", 67 | "# Implement LSH using random projection\n", 68 | "lsh = SparseRandomProjection(n_components=n_components, random_state=42)\n", 69 | "lsh.fit(data)\n", 70 | "\n", 71 | "\n", 72 | "# Hash function: simply binarize the projected data\n", 73 | "def hash_vector(v):\n", 74 | " projection = lsh.transform(v.reshape(1, -1))\n", 75 | " binary_hash = (projection > 0).astype(int).flatten()\n", 76 | " return \"\".join(binary_hash.astype(str))\n", 77 | "\n", 78 | "\n", 79 | "# Hash each vector\n", 80 | "hashes = [hash_vector(d) for d in data]\n", 81 | "\n", 82 | "# Show the original vectors and their hashes\n", 83 | "for i, (vec, hsh) in enumerate(zip(data, hashes)):\n", 84 | " print(f\"Vector {i}: {vec[:5]}... Hash: {hsh}\")\n", 85 | "\n", 86 | "# Demonstrating similarity in hashes for similar vectors\n", 87 | "print(\"\\nExample of similarity in hashes:\")\n", 88 | "print(f\"Vector 0 hash: {hashes[0]}\")\n", 89 | "print(f\"Vector 1 hash: {hashes[1]}\") # Assuming Vector 1 is similar to Vector 0" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": { 96 | "colab": { 97 | "base_uri": "https://localhost:8080/" 98 | }, 99 | "id": "ivEMmN_1cvNS", 100 | "outputId": "93c59ac1-7b9a-4ee4-c9bf-a6c0b57f7879" 101 | }, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Number of calculations for naive cosine similarity: 45\n", 108 | "Number of calculations for LSH cosine similarity: 9\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "# Function to calculate cosine similarity for all pairs\n", 114 | "def compute_all_cosine_similarities(data):\n", 115 | " n = len(data)\n", 116 | " count = 0\n", 117 | " for i in range(n):\n", 118 | " for j in range(i + 1, n):\n", 119 | " cosine_similarity(data[i].reshape(1, -1), data[j].reshape(1, -1))\n", 120 | " count += 1\n", 121 | " return count\n", 122 | "\n", 123 | "\n", 124 | "# Function to calculate cosine similarities using LSH\n", 125 | "def compute_lsh_cosine_similarities(data, hashes):\n", 126 | " unique_hashes = set(hashes)\n", 127 | " count = 0\n", 128 | " for h in unique_hashes:\n", 129 | " indices = [\n", 130 | " i for i, hash in enumerate(hashes) if hash == h\n", 131 | " ] # find subset with identical hash\n", 132 | " for i in range(len(indices)):\n", 133 | " for j in range(i + 1, len(indices)):\n", 134 | " cosine_similarity(\n", 135 | " data[indices[i]].reshape(1, -1), data[indices[j]].reshape(1, -1)\n", 136 | " )\n", 137 | " count += 1\n", 138 | " return count\n", 139 | "\n", 140 | "\n", 141 | "# Calculating cosine similarities for all pairs\n", 142 | "all_cosine_calculations = compute_all_cosine_similarities(data)\n", 143 | "\n", 144 | "# Calculating cosine similarities using LSH\n", 145 | "lsh_cosine_calculations = compute_lsh_cosine_similarities(data, hashes)\n", 146 | "\n", 147 | "print(\"Number of calculations for naive cosine similarity:\", all_cosine_calculations)\n", 148 | "print(\"Number of calculations for LSH cosine similarity:\", lsh_cosine_calculations)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 3, 154 | "metadata": { 155 | "id": "rhWorUQ8dbkt" 156 | }, 157 | "outputs": [], 158 | "source": [] 159 | } 160 | ], 161 | "metadata": { 162 | "colab": { 163 | "provenance": [] 164 | }, 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "name": "python" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 0 175 | } 176 | -------------------------------------------------------------------------------- /notebooks/Product_Quantization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | " \"Open\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "id": "7EpGFpF1XFJc" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "from sklearn.cluster import KMeans" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 4, 27 | "metadata": { 28 | "id": "FGj3o1ZYXIez" 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "class ProductQuantizer:\n", 33 | " def __init__(self, n_subvectors, n_clusters):\n", 34 | " \"\"\"\n", 35 | " Initialize the Product Quantizer.\n", 36 | " :param n_subvectors: Number of subvectors to divide each vector.\n", 37 | " :param n_clusters: Number of clusters for quantization in each subvector.\n", 38 | " \"\"\"\n", 39 | " self.n_subvectors = n_subvectors\n", 40 | " self.n_clusters = n_clusters\n", 41 | " self.kmeans = [KMeans(n_clusters=n_clusters) for _ in range(n_subvectors)]\n", 42 | "\n", 43 | " def fit(self, data):\n", 44 | " \"\"\"\n", 45 | " Fit the model on the data.\n", 46 | " :param data: Array of shape (n_samples, n_features).\n", 47 | " \"\"\"\n", 48 | " subvector_len = data.shape[1] // self.n_subvectors\n", 49 | " for i in range(self.n_subvectors):\n", 50 | " # Extract subvectors and fit KMeans\n", 51 | " sub_data = data[:, i * subvector_len : (i + 1) * subvector_len]\n", 52 | " self.kmeans[i].fit(sub_data)\n", 53 | "\n", 54 | " def quantize(self, data):\n", 55 | " \"\"\"\n", 56 | " Quantize the data using the fitted model.\n", 57 | " :param data: Array of shape (n_samples, n_features).\n", 58 | " :return: Quantized data as indices of centroids.\n", 59 | " \"\"\"\n", 60 | " subvector_len = data.shape[1] // self.n_subvectors\n", 61 | " quantized_data = []\n", 62 | " for i in range(self.n_subvectors):\n", 63 | " # Extract subvectors and predict the closest centroid\n", 64 | " sub_data = data[:, i * subvector_len : (i + 1) * subvector_len]\n", 65 | " quantized_data.append(self.kmeans[i].predict(sub_data))\n", 66 | " return np.array(quantized_data).T\n", 67 | "\n", 68 | " def inverse_transform(self, quantized_data):\n", 69 | " \"\"\"\n", 70 | " Convert quantized data back to approximate vectors.\n", 71 | " :param quantized_data: Array of quantized data (indices of centroids).\n", 72 | " :return: Approximate original vectors.\n", 73 | " \"\"\"\n", 74 | " subvector_len = self.kmeans[0].cluster_centers_.shape[1]\n", 75 | " approx_data = np.zeros(\n", 76 | " (quantized_data.shape[0], subvector_len * self.n_subvectors)\n", 77 | " )\n", 78 | " for i in range(self.n_subvectors):\n", 79 | " centroids = self.kmeans[i].cluster_centers_[quantized_data[:, i]]\n", 80 | " approx_data[:, i * subvector_len : (i + 1) * subvector_len] = centroids\n", 81 | " return approx_data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": { 88 | "id": "RFMgzGnqXLFl" 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "# Generate sample data\n", 93 | "\n", 94 | "np.random.seed(0)\n", 95 | "data = np.random.rand(100, 64) # 100 samples, 64-dimensional vectors" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": { 102 | "colab": { 103 | "base_uri": "https://localhost:8080/" 104 | }, 105 | "id": "cJQUt2o-VBv8", 106 | "outputId": "fda0c20a-a1fa-4092-d4cc-cc3cce15eeb2" 107 | }, 108 | "outputs": [ 109 | { 110 | "name": "stderr", 111 | "output_type": "stream", 112 | "text": [ 113 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 114 | " warnings.warn(\n", 115 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 116 | " warnings.warn(\n", 117 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 118 | " warnings.warn(\n", 119 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 120 | " warnings.warn(\n", 121 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 122 | " warnings.warn(\n", 123 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 124 | " warnings.warn(\n", 125 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 126 | " warnings.warn(\n", 127 | "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 128 | " warnings.warn(\n" 129 | ] 130 | }, 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "Original data (first vector): [0.5488135 0.71518937 0.60276338 0.54488318 0.4236548 0.64589411\n", 136 | " 0.43758721 0.891773 0.96366276 0.38344152 0.79172504 0.52889492\n", 137 | " 0.56804456 0.92559664 0.07103606 0.0871293 0.0202184 0.83261985\n", 138 | " 0.77815675 0.87001215 0.97861834 0.79915856 0.46147936 0.78052918\n", 139 | " 0.11827443 0.63992102 0.14335329 0.94466892 0.52184832 0.41466194\n", 140 | " 0.26455561 0.77423369 0.45615033 0.56843395 0.0187898 0.6176355\n", 141 | " 0.61209572 0.616934 0.94374808 0.6818203 0.3595079 0.43703195\n", 142 | " 0.6976312 0.06022547 0.66676672 0.67063787 0.21038256 0.1289263\n", 143 | " 0.31542835 0.36371077 0.57019677 0.43860151 0.98837384 0.10204481\n", 144 | " 0.20887676 0.16130952 0.65310833 0.2532916 0.46631077 0.24442559\n", 145 | " 0.15896958 0.11037514 0.65632959 0.13818295]\n", 146 | "Compressed data (first vector): [0 6 6 4 2 2 3 8]\n", 147 | "Approximated data (first vector): [0.54019654 0.36485555 0.80442656 0.52945912 0.26097166 0.43403487\n", 148 | " 0.35285817 0.79455641 0.5989435 0.21066257 0.73069621 0.33785665\n", 149 | " 0.68675651 0.60267202 0.57986277 0.37160765 0.21162824 0.87089306\n", 150 | " 0.59306895 0.63841958 0.79780639 0.57421621 0.56062129 0.51786635\n", 151 | " 0.18948159 0.69750145 0.27819821 0.56544019 0.49665064 0.25799192\n", 152 | " 0.19297701 0.41691968 0.26339078 0.26005451 0.34358506 0.55870678\n", 153 | " 0.74224377 0.57784322 0.79144145 0.52381845 0.61077506 0.61437639\n", 154 | " 0.32514294 0.19577561 0.56556666 0.63606577 0.18373289 0.39832537\n", 155 | " 0.31965271 0.4305464 0.79324014 0.57142561 0.58212128 0.22830984\n", 156 | " 0.64554358 0.29054842 0.8200301 0.66880309 0.49622709 0.46668542\n", 157 | " 0.25961788 0.46590464 0.69995335 0.24137759]\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "pq = ProductQuantizer(\n", 163 | " n_subvectors=8, n_clusters=10\n", 164 | ") # Divide into 8 subvectors, 10 clusters each\n", 165 | "pq.fit(data)\n", 166 | "quantized_data = pq.quantize(data)\n", 167 | "approx_data = pq.inverse_transform(quantized_data)\n", 168 | "\n", 169 | "# Demonstrate the approximation\n", 170 | "print(\"Original data (first vector):\", data[0])\n", 171 | "print(\"Compressed data (first vector):\", quantized_data[0])\n", 172 | "print(\"Approximated data (first vector):\", approx_data[0])" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": { 179 | "id": "5s3QAzyfXNSx" 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "# Nearest neighbor search using quantized vectors\n", 184 | "def nearest_neighbor(query, quantized_data, pq):\n", 185 | " approx_query = pq.inverse_transform(pq.quantize(query.reshape(1, -1)))[0]\n", 186 | " distances = np.linalg.norm(approx_data - approx_query, axis=1)\n", 187 | " return np.argmin(distances)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 8, 193 | "metadata": { 194 | "colab": { 195 | "base_uri": "https://localhost:8080/" 196 | }, 197 | "id": "beowe90IXPRp", 198 | "outputId": "9c97645d-17f7-432d-ca08-9204c898ca15" 199 | }, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "Nearest neighbor index for the query: 58\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "# Example query\n", 211 | "query = np.random.rand(64)\n", 212 | "nn_index = nearest_neighbor(query, quantized_data, pq)\n", 213 | "print(f\"Nearest neighbor index for the query: {nn_index}\")" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "id": "pfCo3tEgVRcG" 221 | }, 222 | "outputs": [], 223 | "source": [] 224 | } 225 | ], 226 | "metadata": { 227 | "colab": { 228 | "provenance": [] 229 | }, 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "name": "python" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 0 240 | } 241 | -------------------------------------------------------------------------------- /notebooks/Scalar_and_Binary_Quantization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | " \"Open\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "id": "JENXK-euYH9p" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "%%capture\n", 21 | "\n", 22 | "%pip install qdrant-client==1.7.0\n", 23 | "from qdrant_client import QdrantClient\n", 24 | "from qdrant_client.http import models\n", 25 | "import numpy as np\n", 26 | "import random\n", 27 | "import time\n", 28 | "import os\n", 29 | "import shutil" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "N36PQREaccy0" 36 | }, 37 | "source": [ 38 | "# Sample Binary Quantization\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "colab": { 46 | "base_uri": "https://localhost:8080/" 47 | }, 48 | "id": "N_DImynUYgl9", 49 | "outputId": "d5bba92c-fcec-4e9e-e112-ca72f5916790" 50 | }, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "Original Data: [ 0.1 -0.5 0.9 -1.5 -2.1 2.5]\n", 57 | "Binary Quantized Data: [1 0 1 0 0 1]\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "def binary_quantization(input_data, threshold):\n", 63 | " return np.where(input_data >= threshold, 1, 0)\n", 64 | "\n", 65 | "\n", 66 | "data = np.array([0.1, -0.5, 0.9, -1.5, -2.1, 2.5])\n", 67 | "binary_data = binary_quantization(data, 0)\n", 68 | "print(\"Original Data:\", data)\n", 69 | "print(\"Binary Quantized Data:\", binary_data)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "id": "SnPuUZcscfAv" 76 | }, 77 | "source": [ 78 | "# With a real vector database\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 19, 84 | "metadata": { 85 | "colab": { 86 | "base_uri": "https://localhost:8080/" 87 | }, 88 | "id": "fvb3KflGYH9q", 89 | "outputId": "43652690-d78a-440b-e713-4fe221845448" 90 | }, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "((2000,), numpy.float64)" 96 | ] 97 | }, 98 | "execution_count": 19, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "# Generate fake data\n", 105 | "\n", 106 | "n_samples = 10_000\n", 107 | "n_features = 2_000\n", 108 | "\n", 109 | "data = np.random.randn(n_samples, n_features).astype(np.float64)\n", 110 | "data[0].shape, type(data[0][0])" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 4, 116 | "metadata": { 117 | "id": "jpJh9ixsYH9q" 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# Function to calculate how accurate the semantic search is\n", 122 | "\n", 123 | "\n", 124 | "def calculate_recall(true_ids, predicted_ids):\n", 125 | " true_positive = len(set(true_ids).intersection(predicted_ids))\n", 126 | " total_relevant = len(true_ids)\n", 127 | " recall = true_positive / total_relevant\n", 128 | " return recall" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 5, 134 | "metadata": { 135 | "colab": { 136 | "base_uri": "https://localhost:8080/" 137 | }, 138 | "id": "ieFVnBbQYH9r", 139 | "outputId": "cff4a11e-9a4b-40ac-eede-acd5d8088a7a" 140 | }, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "PointStruct(id=0, vector=[0.8739856615714474, 1.807544501917782, -0.3867121777810109, 0.7475418615175148, -0.011220441841003254, 0.45257889965144166, -0.9196444213114707, -1.0290491059951938, 2.918030652711712, 1.0421167848695043, 0.9405130118965381, 0.44188452295746605, -0.7324968849739459, -0.4153768193514054, 0.615227713264581, -0.18773011892615266, -1.1561768781843726, -1.24777927355494, 1.5964806315895208, 0.8547161224294311, -0.3038598659231874, -0.6439279820971076, 0.14474043756222754, 0.3090291498030087, 0.5472755250768923, 0.31077892217254394, 1.2022719476274863, -0.5398650761004974, -2.356072668699791, 0.2853584938779716, -0.13018253455324919, 0.3848423701222905, -0.42812877678193306, -0.8459145927895287, -1.213090240744919, 0.22027091205841778, 0.7649809671645519, -0.3088909853391043, 0.19020701290005637, 2.016694675476221, 0.027413739399153304, 0.6275031142213888, -1.7072609245372927, -1.1967124653346288, 0.3951523566787642, 0.19832771617608677, -0.41679453021084445, -0.3768448912047085, -0.4962602989791896, 0.2315142520286191, -0.25861984484140965, -0.808090021673335, 0.8285571831499642, -0.7220531549669087, -0.43623165832236016, -0.6382243611857582, -0.8635919258221331, 0.5833635347496621, -0.230069144649749, -0.3854583649462815, -0.6955460336316391, -0.09492573281138832, -1.6990531678387115, -1.4660830493359958, 0.6486390532000192, 0.034826949297492964, 0.18034836207235858, -0.010644131981413039, 0.6595363520916322, -0.48117843161865004, 0.5290767142322196, 0.719583313085802, 2.386143510365132, 1.276251437321848, -0.6380617141367132, 0.8583989912441614, -0.9409609814028781, -0.21511106201618868, -0.9398731599209335, -0.16100057282409438, -0.2854686475794584, -1.6133854494884647, -1.4307808842360343, 0.9752110172694974, -1.9911485031552356, -0.16582613459528905, 1.1596486233410326, 1.136073305957058, -2.400104140959142, -0.329001108811521, -0.15085624545440568, 1.150725221164357, 1.0864416721385848, -0.7215929170403728, 0.45985436393249707, -0.33563481473277845, -0.8881279197171073, -0.41395925554482277, 0.03142954304392228, -0.6895579346815556, 0.04901128644124648, 0.1838176795259276, -0.26473473992428553, -0.14735570193841493, 0.5947595873656595, -0.22358889411559582, 0.3649702363749173, 1.8022066917804516, 2.125491371073192, 0.28719837772873563, -1.617971743543826, 0.35745798551809654, 1.1270802690285664, -0.8758826287194008, 0.4403067903323955, 0.9899777442095141, 0.0525397155470262, -0.41257796678974545, 0.7005358823811887, -0.8370916045844198, -0.1227547950827695, 0.2083554183485485, 0.08321635521287682, 0.5108650118740367, -2.2043351373398514, -2.041452472089077, -0.5231759152284405, -1.2759729339092014, 1.238153836353617, 1.3482773908905843, 0.7398743694383785, -0.44494643127003214, -1.6657557354284218, 0.517752641748811, -1.8128304088127494, 1.158709526296948, -1.0912555517506775, 0.09235465191115129, -0.9395915185223676, 1.3008908964256214, 0.20833219501576677, -0.43424546694005955, 0.21083941881732063, 0.6174048297804832, 0.35112752391656166, 0.6234551567393238, 1.6995287896026419, -0.38034253658177336, -1.039808884462498, 0.01759958284022382, 0.6723068935717113, -0.2862215852574517, 0.12151453543949745, -1.1627663574048555, -0.6634088272706828, 0.5583566844179129, -0.4569480278643297, -0.6845612940400703, -1.8698816481253624, -0.040268341721252494, -0.2802853914823629, 1.967385263251446, -0.2361987699031495, 0.691848467924697, 1.1900915904116465, 0.9785775100444764, 1.422389420373977, -1.2977348090388228, 0.0253636370217711, 2.2747695222939477, 0.12376313528818167, -0.10066628075743671, -0.5379180326700851, 0.6897162634340557, -1.3889893963121513, -1.7634151722378186, 1.142489649900556, -1.0208772684603826, -1.7795490337054842, -0.701555983575828, 2.3434991577452053, 0.22476198405194905, -0.7118166245799107, 0.35935705134013574, 0.5393159604188043, 1.1627008366958969, 0.2836798030885163, -1.5217342712206414, -0.30330881709755336, 0.6499417126844457, -0.3511844739192, -0.9397962613027147, -0.5383418373069065, 2.6647317773663426, -0.5103249546979434, -0.6122750440325111, 0.1312668055841947, -0.26680923547303426, -1.5168772770602579, 0.10458441272289655, -0.5227869923704701, -1.6198540858473058, -0.3411731368593385, 0.746924260285729, -0.4339038572070828, -1.3839905009520466, -0.091329017691049, 0.4973232027973987, -0.11025203445914149, -0.06090403051150205, -0.21944266028484075, -0.806265353669887, 0.6911456400892636, -0.8615821719673875, 1.2074944965322478, -0.6638667958004415, 0.8219157570608214, 0.6948093184648547, -0.6078384677232166, 0.10286889838824445, 1.3236669645763002, -0.1422018228298019, 0.5691698567734607, -0.3951960722432252, -1.5760136294157234, -0.8306443950301385, 0.8602028210744199, -0.08008793015775575, 0.05000659412182344, 0.12731301202795697, -0.3450169953468329, 1.2672098242206151, 0.4232348089521366, -0.09792690008081752, -0.5836883087375134, 2.2743662237791393, -0.9597274678636308, 0.16439368829292106, 0.22010005765039156, 1.856013953137201, -0.5113112676949015, -0.5060195792382188, 0.5209417775178454, -0.013043114989990431, -0.8142705320747015, -0.5345881730065491, -0.06532376180536889, -0.30421759301018964, 0.01155801253549417, -0.9471180289513473, -0.4523253004364728, -0.2968798013212954, -0.9870393520960766, 0.6338553159653827, 1.4086979659606147, 0.5385536577540905, 1.3696957147724431, 0.5782745360803876, 1.3627483077146723, -0.9222891244325496, 1.1156923186978807, 0.7115779734151073, -0.5585789720315313, -0.593904471038568, 0.7563622432415409, -0.1479647854256813, -0.8889758806638851, -0.9279070360143741, -0.25491732871065065, -0.8615546363250921, -0.9147019617876051, 1.5863635457637177, 0.46265577981391387, -0.40111737076132725, -0.4735682245852609, 0.9969083787992383, -0.7859839625687204, 0.23513176954890094, -0.5117732817954718, 0.9869130926998313, -1.108762401510336, -1.6186732478188164, -0.09593834085637555, 0.42601740904966245, -2.00000373144404, 1.4146236614116967, 0.09319121146538076, 1.2974012241714643, -0.6117753691376677, 1.8264933658306204, -0.24536811664587976, -1.0477817553274533, -1.222439118372166, 0.14605762505741743, -1.4267919721129254, -1.6939449664446207, 1.2055148101267297, -0.6975622012121399, 0.1917022984766768, -0.7932612322449053, 2.6539828301697708, -1.1296611645986088, 1.8183272130714463, 0.21279182299475446, 1.1618529310476586, -1.5610802948077132, -0.5660325593727671, -1.2965107320847942, 0.33042300339394853, -0.2101564107376278, 1.6448948784427275, -1.5697791987489094, 0.6507858884016715, -0.09562108636722068, 0.5238642910365366, -0.0413007333640945, 1.0445691957652035, -0.6483697762657821, 1.124314523780108, 1.9948072441487672, 0.11653603862324625, 0.5419128179238362, -0.3503036239655612, -0.2024903856027717, -0.09491919343337278, -1.0914360854908272, 0.2772878909442388, 0.7175277637662891, -1.5743065584411517, 1.4956791582756284, 0.20043571565987775, -0.8614897632398332, 0.3522666177125854, -2.5940112031305067, 0.21347140418514496, 0.23375985425551424, -0.943357942791554, 0.24828335916692856, 1.179097709978088, 1.3511228625422627, 1.3831233253658048, 0.41940588808318424, 0.3856837540149298, 0.24326630166915142, -0.5705790216968735, -0.9672322369884202, 1.3884503150317768, 0.6440946203483854, -1.0088236618111714, 0.2519299706358328, 0.5694040349936582, -0.685179016274018, -0.9063428605732866, 0.6957346508903758, -1.789554466679911, 0.28621898947568913, 0.973378763109292, -1.89635337844661, -0.4935073164744471, -0.06776890572538476, -0.2281936783588049, -1.0395561137454772, 0.5584880188045954, 1.2550940075987709, 0.7826414744562139, 0.2585542528136326, 0.0423124364494526, 0.9186610558825312, -0.8729887134747064, 1.5652272532160436, -0.7962725596422613, 1.1641012064695773, 0.540800748146648, 0.4195865221721042, 1.7563657840454439, -0.8709761473105903, -1.5437947408409367, -0.8646109167030699, 1.1020894958701655, -0.2734173424802635, 0.42307828106798917, -0.5298199311774375, 1.2373156561594065, -0.10600344009915738, 1.8461451644820346, 0.09176159029724638, 0.016962559847412986, 0.608841495481799, 0.6690305558390355, -0.8738682189088378, -0.7563982319657698, -0.6591026027600448, -0.14322492650572452, 0.8982409267780486, -0.7443785798750505, -1.2541164095321906, -0.08898288303923992, -0.7091609106356238, -0.14483068947052186, 0.9698557001546162, 2.0319577097106207, -0.17363603899753358, -1.2288911546731394, -0.6814387008924646, -1.2009538107713744, 0.30604347948299765, -1.1083111776119365, -0.3261216033348528, 1.4890407414243467, -0.8145624542670111, -0.25847729286013477, 0.3301784575176401, 0.5597212600577293, 0.30697154558231465, -0.11019742371384686, -0.26373363481430456, 0.7821607874065782, -1.5521382960798558, 0.18520861197789681, -1.0882992759669683, -0.8427476691297193, 1.1022550578454484, 0.02174368370805144, -0.16371528410266964, 1.341626830727837, 2.5909642040214735, -0.06439142034827636, 0.34552834345586275, 1.37445007506629, 2.312675364325059, 0.15433563559712965, 1.3400601088191246, 0.5663979918190727, 0.591594145185851, -1.7005154568359095, -0.1030893396544319, -0.001962691648805766, -0.6317364950800864, -0.05256078270315826, -0.5229753162397426, -0.4916898807371568, 0.19185253764792606, 0.4821466929378671, 0.8210012253967427, -1.4457036969238104, -0.04570233638512601, 0.26942989615372753, -0.9088670413828271, -0.6222116252127856, -0.0905751269025448, 0.4654678075953441, -0.18251965295294595, -0.7487306500534452, 1.0399241836132367, 0.6796360368472725, -0.44880999597194676, -0.133252625925194, 0.5070153529183737, -0.430789392508267, -0.576720415777378, 0.7145822709360119, 0.37675843584950414, 0.5085571870152197, -0.34639540032609356, 0.30236465688579917, 0.8916354266478563, 0.2667154875835729, 1.4255076142617171, -0.4298954910139599, 0.5867259045872778, 0.4739705672256915, -0.12546180159178583, 1.0029621417338943, 0.149602983502331, 0.2783315642168051, -0.26480220745643623, -0.792357140850614, -0.20300326692286103, 1.171227308061891, -0.15614112648684497, 0.28628407680831974, 0.7800020280852357, 0.22292211934804182, -0.06796674185411358, 0.3557041545962791, 0.6403168303744173, -0.49218540199910854, 0.9612063723909208, -0.5037290133459246, -0.26881581750361133, 0.5098589623382531, -1.0388184643896239, 0.4404997636162868, -0.7315674314713295, 0.8538460216501939, -0.512782139974541, -0.4115564803794845, 0.34203404550906935, -0.901208857632116, -2.8509649515440496, 0.9282687003429269, 0.6066895231553071, 1.4086558420268058, 1.402912717815279, 0.8520781546121061, 0.43570037174945336, -0.3377528828064129, 1.7703842662183449, 1.1601004200015603, 0.7461428107109984, 0.2828260994173198, 1.8440107880597996, 1.5353525849820548, -0.32209327214228684, 1.7952497415609365, 1.1110839675118958, 1.3786362119267037, -1.6770693148573748, 1.481782476296522, -1.4421581170562894, 1.907298314702263, -1.7683112700027237, 0.5605296534121207, -0.619737072938621, -0.11384715332292172, 0.40847993617099465, -1.5517175679279651, 1.1923696107431334, 2.251926768935434, 0.44729154492175077, -1.3138761228792646, -0.8579324054222918, -0.9735374025316328, 0.2751020638566914, 1.374966635707915, -1.2924459766589271, 0.40469769189097715, -0.7162521629584148, -1.0329657269388046, 0.044284921201708315, -0.6773117379815284, -0.9017003414440038, -0.3521475589960403, 0.09041363798320852, -0.3090307090314419, -1.4540066432469059, 0.4464847418234119, 2.293209879457769, -1.2751528775787986, 0.8646398455186836, -1.3863995375446259, -0.20070944571118346, -0.2170646939839637, -0.8995361790252235, -0.2596539824808564, -1.3383297962293443, 1.0868866808701878, -1.2287607791449957, -1.2266798423844727, 0.11872590939583381, -1.031207884386554, -0.6115247562526889, -0.917826412267748, -0.3013975358538834, 0.24481660240281997, -0.648690129848851, 0.21807220192668061, -0.180710350169749, 0.26508136169564595, 0.8036366986791662, -0.6782681865332831, -0.7318427110050485, 0.125203238646975, 0.25912388538870257, 0.23324860414600704, -0.6221446417329906, -1.2585199097799753, 1.5844009173039746, -2.6765281531324994, 1.6753660738622098, 0.5896206603267488, 1.2774031550245797, -0.26518513887124245, 0.7199468809657413, 1.5211453051964028, -0.8481037041658348, 1.8243747474743592, -0.32266294930421546, 0.12978097537001648, 2.074044236750424, -1.1268619643021274, -0.6875084756566904, 0.20563655869907416, -0.41320720158846747, 0.5927779569651063, -0.2760412458636729, 0.8101390602055154, -0.26175996482495706, -0.04172982022961745, -1.3084774960329497, -0.5745084653794275, -0.5966077559798149, 2.1495881698736365, 2.0383644763823585, -2.285805377991299, 0.13073050569230948, -1.021153941335237, -0.014387623207345808, -1.0678276564780964, 0.16423462836000627, -0.25641052887774785, -0.9097479438716664, -0.814785756307686, 0.5571478592935751, -0.36527568219161255, -0.7948832296484798, 1.8745363628553122, -1.172787791913729, -0.5364842177496785, -0.046393103549231714, -1.176556001134429, 0.6128295153128935, -0.9934367580835219, 0.32643108703309676, -0.4341907463006036, -1.3985831529205628, -0.958891140473352, -1.0195188370564536, 0.24744067034541672, 2.09523917102666, 1.7284858382232373, -1.862106134414064, -0.5337270182177258, 1.7258916800174842, -1.7292372784617458, -1.8795765421615347, 0.22542013267680042, 1.2041888901281408, -1.1767945910249378, 1.0379675565530386, 0.5269199312204694, -1.1699157665042226, -0.5054507536570928, 0.5496441741228413, 1.6865856434730069, 1.8194883715039307, -0.02943914339102388, 1.1763547204669835, -1.3006850852318372, 0.4360063199906842, 0.004675450941487546, 1.3828281086619696, -0.07522690805918332, -1.6848945847913057, 0.48540039904882887, -0.9334877460884445, -0.4678184108259975, 0.7595515328710757, 0.010590398516091733, 0.3591803576542262, 0.7237216073854897, -1.029898117829915, -1.2776445911649352, -0.40955370709763556, 0.18426336683416877, -1.1920651444985426, 0.37608663689271987, -0.1871381083434368, -1.0505213893015197, 1.1750636365559886, 0.7012917277506181, 0.25678097920743936, 0.820253316610241, -0.9429455439478871, 1.1915202252096944, -1.2079066318049854, 1.7963727876161164, 0.7364335259164053, -0.7038589984336157, -2.2368946856900203, 1.0012803768709604, -0.46180908055036723, -0.3701761096681924, 0.7045649228970661, -0.7191446620479931, 0.20990946035138025, -0.9853972746897315, 0.39664451285599184, 0.7944071856745347, -0.9334915095910968, 1.702349497952416, -1.3588862845737135, 0.927932214995008, 0.4646310761957531, 0.7485931544471043, 0.9719803203873983, 0.2438659132119836, 0.2398269449865993, 0.39299602559074015, -1.3997646395532453, 0.22849422986525122, -1.0162979314703435, 1.3665780509426908, 0.5332656339590457, 1.380906723349161, -0.09545471305961607, -2.2399832372623774, 0.9518131850619272, -1.2006449869986293, 2.226985244662912, -1.538842113588818, -0.7770416764018844, 0.15927181886139652, -0.8215364800127253, -0.6533325204526949, -0.7384958074533362, 0.1674077172587117, 1.1762351685778192, -1.045571337964878, 1.3796188854594686, 0.6441594852733391, 0.775179108987916, 1.664029074122316, -0.5533689012952354, 1.3259685787349988, -2.0457209493206516, 1.3192010513213668, 1.218307654889701, 0.7388865736823325, -0.2346300532731865, 0.40730176552809283, 1.298024624753732, 0.7345944727747447, -0.8041433977606076, -0.05266184781591866, 1.625526005655891, 0.10810222092440826, -0.5051066955667007, 0.8676998473130714, -0.651071359001196, 1.4313609104449974, 1.3213006274401173, -0.8483912291599476, -0.39752870307259713, 1.7915348657006116, -0.3130866558147973, 2.479102654286555, -0.06948379445534886, 0.7528565998778501, -3.3683136967466067, 1.740169213120046, -0.908265095292864, -0.4620279040191046, 0.06267191117539996, -0.7990311054374428, 0.31957072464004244, -0.021614271590181637, -1.1481279033735188, -0.5001723458379502, 0.08653733563339588, 0.4439504405419998, -1.0627169644743926, -2.1918049225787772, -0.08871021574784907, 0.34827145430289963, -1.7185192498185944, 1.83923562296338, 1.552642784743935, 0.699264917232534, -0.49555371518962, 1.7303484129475954, 0.1784951867649678, -2.163233794738634, 1.0988550350095907, 0.49775442712536966, 1.3648061872796378, 0.8290857082728699, -0.7675919740866475, 1.2736163418065445, -0.732445029498191, 0.5012538704324375, 0.5356438891148995, 0.8446420042914694, -1.3913594133036478, -1.420705315861281, -0.8853111470046598, -0.8269981493643119, -1.0030473949818957, -0.11706715590128203, 0.11350716186166226, -0.6347647354227907, -0.870092726178067, -0.2558633688077023, 0.7300414225845165, -0.40115484776060634, -0.6101605874927177, 0.9554016601828329, 0.7443882837663842, -0.7347282225713324, -1.162096263391531, 1.1607980242831453, 0.3023115941811925, 0.5509707567194665, -0.6118400335752746, -0.8120968701498054, -1.0378881892318972, 0.5416796343109278, -0.00848916488569995, 1.5550313283128996, -1.0654108671980436, -0.795447955846968, -0.23618513566087768, 0.7541268078648524, 0.6029863466782375, 0.49115739342526604, -0.6108140649533065, -0.7910785475011395, -0.24151527604738368, 0.13995100451394227, -1.4747496521800645, -1.5534696774503967, -0.31357545645890644, 0.8283664744651716, 1.520410829370659, -0.8652331899007152, 0.9687728286425631, 0.7469908835135253, -0.45596986075515444, 2.0736388147687386, -0.9128522375380899, -1.558565981532091, -0.08060394307324573, 0.4574923719288784, -0.5373928142805574, -0.041654634121064266, 1.2932069004599342, -0.34390245310852763, -0.04278343336999984, 1.1671441562100933, -1.2612384059145931, 2.028750659021486, -0.4054892415803786, 0.3062839079542207, -0.24312003953846442, -0.6102845345611102, 0.43935260922557745, 0.8641468378168895, 0.7259226330778745, 0.6479010385883012, -0.1099655043786616, -0.6682988738581779, -0.31561192979646097, 1.3716191312171473, 0.3195401238390076, -1.3241693707283024, -1.1879732280786257, -1.4204057253711764, -1.4701215209856455, 1.5765623501481323, 0.5490465558171254, 0.979047264397118, 0.21535354030081502, 1.036739511818943, 0.6330687872486253, 2.0407317994414913, -0.7956228255852679, 1.3082813972299503, -1.2437066903413307, -2.310835382344026, -0.47384411883076877, 0.6035868781242854, -0.41528558274938204, -0.14163385830017122, -0.8554070964609389, 0.18464073164914635, -0.928189567378024, 0.869620056379157, 0.36324155432517685, -0.8006062113654259, 0.23298419112173321, -0.08610130281563935, -1.0095210577747102, 1.78793873863253, 0.2065778981613746, 0.0679110842687391, -0.07773818879462864, -0.7214744622939873, 0.9767934377603481, -0.2663321073285069, -1.04665150287966, 0.1723612589533114, -1.6471530670568952, 0.5803674885293802, 0.4984451191026125, -1.8757255987779395, 1.0679930181837607, 0.6408873819115, -0.4041543858252602, 0.10740245132036325, -0.4398593865851238, 1.462347624489168, -0.12698548247924113, -0.3747525805429678, -1.4290117551801498, 0.922674571821963, 0.3003724316609889, 1.3148089324854138, -0.8110774923643627, 0.08520254101636562, 1.222188394351855, -0.8981222010023386, -1.1652642397763526, 1.7374027138504506, 0.20787477916717828, -1.5871657456669614, -0.6670147772828121, -0.2426320402288069, -0.22524849599564928, -0.9386844670885864, 0.12890927001205657, -0.09752178539525468, -1.0327245709301318, 0.1284253606810811, 1.2705312971631202, 0.9575945020347184, 1.542540566146314, -1.924561015807509, -0.9030011945472559, -0.42611690200552, -0.13298606270659197, -1.2287296209580554, -0.7148911171996385, 0.8762550066172035, 0.1794605194128548, 0.4535228803294127, 0.5688030864835869, -0.7868914104697946, -0.8972249184110536, -0.9609431209520729, -0.8758216036792366, 1.211042890635401, 1.471888745178047, -0.7164794710745208, 1.0948486209408481, -0.48091511214337396, 2.1451133504448565, -0.3798590212998682, 1.4054650327800748, -0.8921990626285892, 0.10108357932918514, 2.035125019400607, 0.8346287903151435, -0.1373202481762141, 0.20228533344385727, 0.20703949215112144, -1.3470023782213159, -0.47454535827979843, -0.2738005578843613, -2.50712107048958, -0.388249114765123, -1.8285486069843124, -0.7293737245620747, 0.7217725712913249, -0.5071893808605782, 0.3862334437394401, -1.2209337294967602, -0.34140792612944404, 0.7748068473671835, -1.0047111896987782, 0.9163057701533942, -1.4449053457444754, 0.18424269792305406, -1.3578713483387248, -0.002453164487439328, 0.8774031446956307, -0.7009272883826753, -0.5245219706902311, -0.7824193307375615, -0.49327347272493804, -2.62874064412338, 0.08188517343188163, -1.377478706369955, 0.4266685120165082, 0.5965965601950391, -1.243556804023239, -0.6178031577518529, -0.4706060575848744, 1.2964801230985301, -0.6467443647843247, 0.8147379506653111, -0.9573363243455577, -0.43708412192215895, 1.1023885258230504, 0.25278520558813206, 0.03172907688882857, -0.5692841437006686, 0.17629148018639246, -2.8922721108401177, -0.6108385214194457, -0.41874858095885065, -1.9154836370892923, -0.6063219643097044, 1.5412922256186465, 2.1204405956608716, -1.01065421435547, -0.5434931144945486, 0.03636076688009171, -0.30600675705986385, -1.7165408090624312, 0.17935341960400833, 0.5657132769139446, -0.5608558874304533, -0.0456633527031202, -0.7014123423584304, -0.736683405921145, 0.8514320592693096, 0.11882118043211998, -0.6296249366649763, -2.3975665590167576, -0.37525586630608987, -0.8579464022844322, 1.788801685120996, -0.2883002145950792, 1.7177284998521287, -0.5897571986231824, -2.4567058598985967, 0.8372323495194054, 0.8830425644942801, 0.5810874294206456, -0.2936548736352306, -0.7552662180709748, -1.5704048538874875, 0.4379303278250992, 1.2490477537969145, -2.00880504886997, 0.5240585160304959, -0.032422196725695435, -1.0502420618958743, 1.7500097282520064, 0.5044858197404513, 2.304628905671056, 0.22955359917762141, -0.6440089998160929, -0.28797701768219436, 0.7644598464683564, 0.6943282759938817, -2.369582362202677, -1.4319367756207277, 0.3039521132941967, 0.1723656184372106, 0.4513167171103222, -0.39488649319349295, 0.43757139160556524, 0.37748755194999695, -0.7242093322602084, 0.10070602100027537, 0.26888889010386036, -0.7436501468944465, -0.08738714228689815, 1.6536479696683963, -0.6761391390460322, 0.481113905044138, -0.5188816047471286, -0.07274153776483046, -1.5472995528237108, 0.8963117251092272, 0.22016624412443994, -0.4230890155197589, 0.531969757471415, -1.5734928249718774, 0.8063943400479304, -0.5768953929548798, 1.2569939817090419, 0.9642707558927879, 0.8937273382059298, 1.4475909162089313, -0.0376595413300924, -0.6073779629085945, -0.3451365149668589, -0.21735007617218677, 0.2559544643890874, -1.0006784775890183, -0.7665406766866958, -0.04146350400864777, -0.2947201521729735, -1.8594389071209043, 0.41843939091765714, -0.9605291776150933, -1.543218383206259, -0.03099267935689953, 0.6238177866365001, -0.18150181929749726, 0.6018940317718751, 1.6510294273791368, -1.4763033006471469, 0.7791437053886265, -1.0830001801853093, 0.6383917974545249, 0.2009541389269373, 0.18500320589488467, 0.8868173239997531, -0.9216335093884569, -0.7593594773185075, 1.6746982084503759, -0.2647132147060413, -0.4010207243603685, 0.7175210772115517, -2.00259699951647, 0.9092292480229001, 2.02234200397613, 1.502800400508659, -0.19485087636402726, 0.10745640866036332, 0.8280192221876356, -0.719107604968126, -1.397655940834189, 0.1955354582179154, 0.2571307711663721, 0.7754174800334456, 1.603621080169364, -0.13830376668294816, 0.8882405993545711, 1.6006717497581193, -0.8211748290755523, 0.5976881774273867, 0.4234753134816082, -0.19140240792995564, 0.2558548388596074, -0.22931479636264043, 1.0995662637546046, -1.878368842307892, -1.515618917638002, -0.5606349773433865, -1.2753113165058512, 0.8360853373206024, -0.9774106205661572, -2.198394511529409, 1.6061198307950928, 2.1721383883624585, -0.8631435893771715, -0.3332146027388396, -1.6158442050784971, 1.8144457419386826, 1.4687084214117088, 0.17481667373342152, -1.59428577475825, 1.2395411376688399, 1.9052309774464542, 0.003965324496243773, 1.06125950334731, 1.619055701211538, 0.8115278989334432, -0.9975997177235506, 0.10784282877848587, -1.1482047866601663, -0.9613845943688877, -0.2307541106945451, 0.5126747561287869, -0.727828178317672, -0.5413829740438163, -0.8825814462611747, -0.1154354529637051, 0.16779110519118579, 1.5969333789296507, 1.8741520328395995, 0.6309831111156765, 0.47231808626577015, 0.7972812634414831, -1.2743842272154904, -0.7294671374756313, -0.07991456408893764, 0.9827271363960262, -1.7523241949517168, 0.03659540350553565, 0.11655283300851833, -0.5347759457505273, -0.7459575855790914, -1.0603635130368718, -0.09169796513572699, 1.0965103127379452, 1.6073404746067006, -0.05575638386870657, -2.0946259632805733, -0.06504506624805112, 0.03998337211250203, -1.3201404508864156, 0.21950424672561247, -0.06543832843128092, 0.05374255638541855, 0.5731083218420642, 0.023062132978781, -0.18331368584170438, -0.5430146242490301, -0.04928405909095008, 0.28679383912971035, 0.6548913037508406, -0.7403994260624559, 0.9610147147751577, 1.178947347878497, 0.7340450120237932, 0.052397887893558114, 0.03750792299285495, 1.041935207398616, 0.9817141926702226, -0.5679075925003171, -2.4314029894892464, 1.5396829559696479, -0.8952730129334676, 0.5166822147373515, -0.2684015459995828, 0.33334466759024706, 0.5600935306811604, 1.6891556955352678, 0.7969442240277408, 1.5594450044746215, 2.180294765534068, -1.371349271283305, -1.051276753603631, 0.621743318349236, 0.0018388288836425353, 0.3000608414619472, -0.4572507477557409, 0.2724655456707352, 1.0078462342359071, -0.1868627133852275, -0.6642347378905279, -1.2641259756133947, -0.7168051447489698, 0.06166118865276984, -0.409734823451057, -0.1550514548003163, -0.4392451880469997, 0.9360784713845819, -0.3973423327026237, -0.34980688682723143, 0.7064262848872751, 1.016936397771855, 0.16357378454973057, -1.2874885658968718, 0.749433326314927, -1.3853466691901746, -0.06456227380745626, 1.6629082331658713, 1.1013732324981031, 2.307831660716908, -1.8345100814312494, 2.393451924503449, -0.8088894178341166, 0.794849057204957, -1.0417851718860875, 0.36905489731557634, 1.1336595062541583, 0.8647396389358875, -0.2523754307178325, 1.2789929226110446, 0.2128024337788296, 0.7563670907285992, 0.6279547182404533, -0.7504621947114014, 0.7547376608158064, -0.0015136660280026023, 1.1944993506474633, 0.34888962329631246, -0.549067173660412, 1.4964138582110182, -0.6689297301414805, 1.8312697518743997, 0.8736801037216783, -1.094181461448198, 1.1081478927283375, 0.7441506573270389, 1.3874074482077245, -0.1355772142674292, -0.7642313574231182, 2.1263179519558175, -0.3615450156425273, 1.0620002971448885, -0.1895530485920172, -0.93938648740444, 0.10477090142588719, -2.094907349563714, 0.20782710455910647, 1.0825552573037804, 0.06853185511114315, 0.3632396297475239, -1.080381347156782, -0.7587017927333454, -2.434020881475415, -1.5964029128332156, 0.011745481573955883, 0.6686120403761413, -1.2806388469212846, -0.7541106208484624, -1.1552592331799854, 0.3187316541772342, -0.6488408395434028, -0.19064638527975158, 0.5160119488250674, -0.4010679747780823, -0.07839695278473996, -1.6398103698567725, 0.18069782752881774, 1.8404440169120564, 1.8830623322659767, -1.3294581545246849, 1.729087442779956, -1.8685502940746783, -1.169160199286787, 0.6698512061612744, 0.28602877051228953, 1.2292619171889012, -1.7948912652948281, 0.17688530191563248, 1.764517843970352, -2.5280212055778564, -0.9013026372890343, -0.5045005863178184, 2.053727582847807, 1.4570998733963358, 0.4424916734251097, -1.3411002170410602, -0.12254100214990367, 1.0679512840746104, 2.1637843202051092, -0.028840189206087725, -0.6303198589668074, 1.5756118524738014, -0.7981948666922393, -2.3958725940739285, -0.530836262754282, -0.6743153377474026, 1.7266282329163525, 0.7874563249832119, -0.7979734847563118, -0.8962260306094234, -0.87208571528837, 0.5171258120317094, 0.9972960692116409, -1.0641285779346359, -0.9474526005008098, -0.31554566803599426, 0.01705599162350719, 0.03268686649027622, -0.6098905094404874, 0.3957140286934079, 0.6408495842543688, 0.443527888426491, 0.9461085932266705, 0.8868902063994905, 0.9756063800800938, -0.6416164040434029, -0.425159193292544, -0.058345342916044936, -0.7241467880934279, -1.9901734301432077, -2.7091786728698195, -0.2351987835299176, 1.2448176850038597, 0.9708682905214576, -2.369506517223338, -0.1477969880087413, 0.9211217589381155, -1.1434252542754881, 1.5854052995212375, 0.8832519866122185, -0.9503918561653486, -1.5044018174526805, -0.3482086395213158, -1.0984647013164037, 0.05863363876727027, -1.0565962688685016, 0.589839833429577, -0.4561236466451523, 0.34137035741896793, -0.9754523291564361, -0.00217427626582553, 0.6353955805715834, -0.011329110925421729, 1.386581233341892, 1.2830640783925962, -0.5484836722184543, 0.5512678728547291, 0.07157538462713359, -0.026535868095555804, -2.7423164628088035, -0.28780210512624294, 0.27002354826825453, 0.30663556561120076, -0.5088001128712263, -1.0638733835345313, -0.18426466244941747, 1.414214971614511, -0.3237317131095091, 1.905164617218405, 1.6211306183532468, 0.7691388849624123, 0.25788609085420305, 0.17339085096373957, 0.17068706355227645, -0.13941076941325625, -0.2517623822367549, -0.1791051708134397, 0.9623670172332042, 0.8789504708989458, 0.4318777122281319, 1.0302645168422706, 1.1961051628649697, 1.9692581688683017, 0.8211296191115937, -1.0608217314799375, 1.1798683643486112, 0.9319760995472469, -1.1674635137872673, -0.25783962035866076, 0.1661089113873339, 1.154397545844328, -1.0166454942738863, -0.4583490684755798, 2.5268051984638977, -0.041861847406327465, 1.8651125421675416, 0.6299042436493635, 0.44254255841650486, -1.2699199273830686, 0.9418652082328122, 0.8200175964210318, 0.37853389740620624, 0.23943634660451468, 2.030924235706882, -0.7165088754842898, 0.6779777835342544, -1.349914438618158, -0.3165131102582926, 0.08686146660433183, 2.213234163419212, -0.03570424743812925, -1.1722253718764744, 2.1773007795187453, 1.3837953850314133, -0.10710323458040147, -1.4548722812965549, -0.3970577176118276, -1.374043657210044, 0.7255462735430791, -0.7683268330568346, -1.107300560484893, 0.6536981535594798, -1.0456335063287074, 0.671539211095781, 0.14177014236811075, -0.7720185432172093, 0.04628206694560977, -0.29144204822737435, 0.44111665435913106, -1.4468044237745188, -0.4948434031003428, 0.20973148846957482, -1.4705352743356102, 0.5509263805094416, 0.14836049703598717, 0.37142650110567194, 0.07671701390904923, 0.1832721674392343, 0.046938838073239716, 0.17670916933534012, -0.45804825701073837, 0.43023416286875543, 0.994531360232782, -0.046781434678865176, -0.6411281236926395, -0.8928000153166127, 0.5020270491657642, 0.36740075971319863, -0.4827108385908195, -0.23685866589629484, -0.8074519272656216, 1.5748268236440153, 1.4192853096349365, 0.09020547705776319, -0.6652511770649884, -1.9992167031321493, 0.4791188545948879, 0.6279108483861092, 1.0074790653617451, 0.7676312157522953, -0.11553684419657606, -0.4591865355248419, -1.639849492617964, 0.4130553130463872, 0.5390254097685058, -0.4349639740477211, -1.3192207739061903, 0.921095661636292, 0.11186833365594734, 0.3441768337737832, 1.4783039534492346, 2.263085347294753, -0.8349618339008534, -0.9049359600066962, 0.024162923476669818, 0.6066399333702527, -0.8417612166698258, -0.6961421526795921, 0.3202978824363036, -0.19130470797006155, -1.402607668854599, 0.28497046631102485, 0.4905213042336202, 0.19793455181657382, 2.6301564123361576, -0.16176932304370256, 0.44717956503954587, 0.8699840049513902, 1.234353436189499, -0.28804951892441494, 0.313974160529083, 1.1623300287931133, -0.48292707721980854, 0.1367494895765361, -0.20193916479192547, 2.256921332752218, 1.6374385645723855, -0.5461994538365549, -0.07554343845331998, 0.3692865266488121, -0.8084784558917282, 0.0765934842753864, -0.39039605912469244, -0.6517502322106101, -1.0999972515714094, 0.12141787168490469, 0.4223582871504148, -1.929776945067296, 0.1491648289319484, 0.668905036114273, 2.1273296694966035, -0.8341058352970173, -0.0038662459408752574, -0.4177766367908073, 0.25273869215538547, -0.4756713774592941, 0.0033228853696081555, -0.286753183458628, 0.32745535473866155, 0.025329830457291404, -1.7197090697488417, 0.8757768384549623, -1.4248328845959488, 0.37552052373808614, 0.8693679749864636, 0.5551853589509166, 1.454617789271402, -0.26632041516147126, 0.0006770720257820141, 0.9314972324397336, 0.11626025120379672, -1.6743920644425254, 1.4928084495173761, 0.07055981871884472, 0.45336863171779396, -0.5160549595407239, 1.152662508852422, -1.7497056098282482, -0.924700089823277, -2.1717265120499243, 0.5958928427533126, 0.6730883408493645, -0.84805585288396, 0.4824066261132788, -0.052614435678208706, 0.825405208732148, -1.6333874171339615, 0.6277507337962961, 0.249401284233602, -0.4109176797778198, -2.347189584557617, 1.1845645724697493, -2.3521670455572896, -1.1316320091771654, 0.5992380911013792, -0.31902124169706325, -0.46964102762822674, 1.5156994673978605, -1.2946608083845053, -0.6020676714959547, 0.3249742001636929, 0.11711158728440947, -0.007321114964193679, -0.6181879560435876, -2.134744413791715, -0.7112983608070629, -0.11597423642561937, -1.4744999523005498, 0.8621810658863058, -2.168307957202582, 0.3918226497225292, -1.1368285620990515, 0.10458156928330019, -0.07406496018493203, -0.40015460187469537, 0.12235621760846344, 0.2984735850943185, -0.29143957929644393, 1.5249246290756702, 1.4597426694367543, 2.0259171623758534, -0.590817527789978, -0.2936227843522231, -2.350563685161422, 0.5204520037274206, -1.0666412352357653, -0.27697862639741316, -0.2148124943518122, 0.6734957785469392, -1.8053516365213391, -1.778141783937991, -1.2084775577449522, 0.9934495828121953, -0.24423917623197028, -1.4813474233808355, 0.33112243279131987, 0.700726302636986, -1.2690864394958483, 2.50077915159786, -1.3575567383953087, 0.011926046930440538, -1.3193374579057675, 0.4889304126129667, -0.9109550658531209, -0.7341971062366668, 1.3350911986632095, 0.1891570737674318, 0.40709280499254247, 0.3802019536004736, -0.28204633202557666, -1.5977055097307233, -0.02911650264897409, 0.6131157184149074, 0.25149177652210464, 0.03382356876055713, 1.506606658949116, -1.7344902302873029, 0.7452744776446871, -1.9107272401785926, -1.582999530211274, 0.48222225973271177, -1.0384903144668758, 0.6725568062565108, -2.5046318569934747, 1.296511149503862, 0.853269156450691, 0.11943152619597955, -0.3450209857946171, 0.9312567740442399, -0.6707066875133985, 0.005142200140689065, -0.6622425728926742, 0.5590322499564966, -0.6705553220908641, 1.573364643401545, -0.9378192128795597, 0.25897411687301325, -0.523235177555831, -0.2853236646766811, 1.8425943763418455, -0.1207820601254575, 1.0012098031133287, -0.7256239293102813, -0.02271779763228539, 1.0036542321722706, -1.135909216473155, -1.189815381617683, -0.8493115552807492, -1.6888177375943607, -2.489616420576527, -0.41126522124777354, 0.9961668603829419, -0.7534275973480015, 1.849790093450653, 0.06921282422478346, -0.6074393733970507, 0.6619417712667524, -1.835004009108793, 0.3768387586158781, -1.1821904434754555, 0.014573407370971452, -0.7433444811234973, -0.329552484293008, 0.15018469760521694, -0.10973605653921424, 0.14420546653917488, 1.6304897807065826, -0.5968651613380547, -1.8023280044426582, -0.4305829392358626, 0.2557762607970183, 1.516870117861496, 0.5594967935525517, 0.0062557010311553056, 0.6255320341623587, 0.8339109137110668, -0.5635330527719713, 0.9218105797964087, 0.33145218259974435, 0.2889855465173865, -0.542335685178416, -0.2622889302699205, -0.8533813314567203, -0.449829299558278, -0.22917935684040222, 0.05743101469603045, -0.7296712922762247, -0.4814272083750075, 0.9271880724100623, -1.7158890136904523, -0.8842189210847783, 0.029357725437302862, 1.007095825780661, -0.6645393419822868, 0.4762254637014204, 2.135843118275408, -0.5131457945677272, 0.4341593057514976, 0.5547203107096045, 0.5582809372163969, 0.7326025512624773, -1.6004509435398273, 0.49390903300786004, -0.6273589411453684, 0.6204249584830497, -0.004749275153930262, 0.11525059305341144, 2.630624977560361, -0.26780218945171297, 1.094811068333461, 0.49860174981868177, -0.4975802795241047, 0.4418211270563782, -0.2851503334755573, -0.09783270716273874, -0.8056869415410617, -0.021836720949549925, -0.14327850353537921, -0.6134897130218929, -1.0908833087276866, 2.1485408375506436, -0.8502572490323435, 1.566069133322983, 1.6547207754540483, -0.29716084382953856, 0.6442619975668572, -1.6712756215556708, -1.0418433414147792, 1.5914121576093476, -0.12999723328116145, 2.2625040473786706, -0.6887600687835639, -2.392935678267709, -0.42423043984737213, -0.6453502398908528, 1.780850939443096, -0.29512813182874487, 0.6602842350181513, 2.5242286604271165, -0.768971612627111, -1.2499225563579224, 0.9896456044096121, -1.4583348201998096, -0.30019832681301145, -0.2169341800223744, -0.27918194220679776, 0.3082918845836181, 1.0745604087452207, -0.25497733629641756, 0.7436548330690215, -0.01798750766965811, -0.16698097995261732, -0.7596463403247344, 0.527075586096896, 0.20281948715540102, 1.0242386912638126, 1.075572733880468, 0.8435022795285625, -2.2448561497518598, -0.6486272087020284, -1.169450318895309, 0.2542761305625886, -0.368379912493706, 0.859601777933363, -0.5994438547345085, -0.8183299307417696, -0.996197764519836, 2.2639040845536953, 2.305251085859, 0.2570216983623788, 0.8211962638252076, 1.4543884394473836, 0.5142693861834959, -0.45100900087126905, 0.6367337816492784, 1.0334005587201107, 0.14284388692304759, 0.052926333489835095, -0.062313352727305475, 0.14683480238413224, 0.45923063376648066, -0.7463819418222971, 0.45479021185749835, -0.612376544022461, 0.8942631625777544, 1.193834252970334, -0.737905635740084, -0.30818387153031457, 1.0611707827228634, 0.16623162987098344, 0.08677775643378552, -0.31598527003860044, -0.5952323581377298, -0.6653619945042116, 0.8407887262811935, -1.1700191067922059, 0.14603471865875844, 1.560707477919896, 0.1276677899675394, 0.40771943432022456, 1.1960962415191534, -0.0691138093161685, -0.25811271087202814, 0.34070318408510397, 1.232272274230928, 1.1332771507171804, 0.8829602335192459, 0.36161594829311156, -0.18639018358016712, -0.1864775727504895, 0.6175617309270467, -0.7432181218240466, 0.1162407894806796, 1.6089484442646291, -1.6270975060753263, 0.7181710999956983, 0.6390076062081216, -1.1993755576201084, -0.3678974047917413, 1.019391926209488, 0.937729905413773, -0.39967899996628253, -1.5449000725792745, 0.517751255230349, -1.1860949887196317, 1.0079294987623286, 1.3933158676972373, 0.4499285623607037, 0.7214366719331048, 1.2596632744367866, 0.8879743019440172, -1.0133136090410004, -0.18400518591537182, -0.37727270917671213, 1.0727716665848628, 1.1842758379989071, -0.8230847486245987, 1.2034983985466399, 0.5598595739059454, -0.5921883421170341, -0.9871646403746361, -0.28204630785494894, 1.7419580084671538, 1.8704781272401372, -0.5756785490118363, -0.4343222821728877, 1.3595057253975915, -0.2904675417217015, 1.6392851577613643, -0.04808706200817097, 0.45771640371664324, -0.7205734582684044, -0.1929179471261812, -0.061468415860124005, 0.6594784860274778, 1.915169232184621, 0.24198711630092642, -0.1409192986170815, 0.7498968987041962, 0.8384295699410929, -0.31884337723534034, 0.5201715107719677, 1.078093557213665, -1.4106821188239103, -0.11299875488293797, -1.101851098266356, -1.9827857759459875, 0.674173949337249, -0.07976904508068182, 0.6413575219516752, 0.5579521727455968, 1.5447125674436322, 0.00336279364571832, 0.7169165208000876, -0.5219379135690675, -0.5547365951353832, -1.6094973752281074, -1.4517964713532026, 0.7295543161803131, -1.0485600509751898, 0.25250528262102045, 0.4514365061371215, -0.47322716534713005, 1.4277492880271965, -0.8519575823610668, 0.25010636599252417, 0.04688072494592699, -0.9608827401798883, -0.3007725940571144, -0.046293007279283885, -1.5398474800443722, 0.013805590010736243, -0.30126119542423085, -0.083967414917391, -0.08851702953330913, -0.4049342754396672, 1.2798400904483298, -1.606790768742053, -1.267613618320827, -1.2503576362925535, -1.755204138388206, 0.5258670748348347, -1.0101394529781884, -0.014396056474962199, 0.5821190499307055, -0.4043449949439321, 1.1531383771532613, 0.7048046900051976, 0.10660760828982706, 1.767840111284227, -0.3132350111393891, -0.28676674418183146, 0.5886217550716831, 0.0533267131349097, -0.7528340929518055, -0.724635525088517, 1.8254002239594138, -0.18841616203234243, -2.315751543437246, -1.377147264738426, -0.22869510121273423, 1.7199465071423408, -0.8227319868770513, 0.7895966627559355, -0.19896135497903228, -0.17774989865868365, 1.2547667767813169, -1.074639301257602, -1.206814829909523, -1.8396620602533253, -0.6168713486425301, 0.2859641314731942, 1.15767005918457, 0.6908763784567307, 0.6969509180467203, -0.028736647970287105, -0.3641425905163623, -0.06310377359862604, 0.5886394872584251, -0.029762415772008773, -0.4569277145966156, -1.4396622355478819, -0.2587529642573468, -0.41564597849878177, 0.6676475767176987, 1.0745653943626412, 0.6145686615783245, 0.5521167568378539, 0.5518405345213662, 1.839167073307593, 0.8564516958772312, 1.2078686147909117, 1.161310276413205, -0.5366273256337238, 0.21518084378203362, 0.47594089205626394, 0.7296065418707832, -0.13013036392341235, 0.5568552463995653, -0.07981669338795927, 0.9709477444901867, 2.3069915735666915, 1.1933313114605135, -1.6250290503525227, -0.42048813136286506, -1.0051606886955424, 0.5549248526402112, -0.29698308976860704, 1.612108853775127, -0.13013836416543, -0.41824203711094626, 0.5194426219405017, 0.05797598725927429, 1.3040653807403857, 0.8368386106798029], payload={'city': 'Berlin'})" 146 | ] 147 | }, 148 | "execution_count": 5, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "# Format the data into objects that can be inserted into the DB\n", 155 | "\n", 156 | "data_to_insert = []\n", 157 | "for i, d in enumerate(data):\n", 158 | " point = models.PointStruct(\n", 159 | " id=i, vector=d.tolist(), payload={\"city\": random.choice([\"New York\", \"Berlin\"])}\n", 160 | " )\n", 161 | " data_to_insert.append(point)\n", 162 | "data_to_insert[0]" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 6, 168 | "metadata": { 169 | "id": "PoyxgmARYH9s" 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "# Remove any existing DB files\n", 174 | "\n", 175 | "if os.path.exists(\"data/qdrant_db\"):\n", 176 | " shutil.rmtree(\"data/qdrant_db\")\n", 177 | "\n", 178 | "client = QdrantClient(path=\"data/qdrant_db\")" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "MpMtMVTJYH9s" 185 | }, 186 | "source": [ 187 | "# Normal\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 7, 193 | "metadata": { 194 | "colab": { 195 | "base_uri": "https://localhost:8080/" 196 | }, 197 | "id": "cL-4m5d-YH9u", 198 | "outputId": "67360ca2-6952-4642-b9eb-595317a5b515" 199 | }, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "True" 205 | ] 206 | }, 207 | "execution_count": 7, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "client.create_collection(\n", 214 | " collection_name=\"semantic_search\",\n", 215 | " vectors_config=models.VectorParams(\n", 216 | " size=n_features, distance=models.Distance.COSINE\n", 217 | " ),\n", 218 | " quantization_config=None,\n", 219 | ")" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 8, 225 | "metadata": { 226 | "colab": { 227 | "base_uri": "https://localhost:8080/" 228 | }, 229 | "id": "6mVgf2VTYH9u", 230 | "outputId": "767e1454-837e-4ab7-a928-e8282afb1bd3" 231 | }, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "UpdateResult(operation_id=0, status=)" 237 | ] 238 | }, 239 | "execution_count": 8, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "client.upsert(\n", 246 | " collection_name=\"semantic_search\",\n", 247 | " points=data_to_insert,\n", 248 | ")" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 9, 254 | "metadata": { 255 | "colab": { 256 | "base_uri": "https://localhost:8080/" 257 | }, 258 | "id": "WvFAoWr0YH9v", 259 | "outputId": "9d97c8a2-50c7-4808-d3a8-19787af80eb2" 260 | }, 261 | "outputs": [ 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "id=0 version=0 score=0.9999999992620814 payload={'city': 'Berlin'} vector=None shard_key=None\n", 267 | "id=367 version=0 score=0.0935900955869998 payload={'city': 'Berlin'} vector=None shard_key=None\n", 268 | "id=6777 version=0 score=0.09345209088487938 payload={'city': 'New York'} vector=None shard_key=None\n", 269 | "id=9674 version=0 score=0.07236113324158687 payload={'city': 'Berlin'} vector=None shard_key=None\n", 270 | "id=6268 version=0 score=0.07159429777598603 payload={'city': 'Berlin'} vector=None shard_key=None\n", 271 | "id=2973 version=0 score=0.06972900526967332 payload={'city': 'New York'} vector=None shard_key=None\n", 272 | "id=6340 version=0 score=0.06872670865599134 payload={'city': 'Berlin'} vector=None shard_key=None\n", 273 | "id=9953 version=0 score=0.06850956476044623 payload={'city': 'New York'} vector=None shard_key=None\n", 274 | "id=510 version=0 score=0.06589252393618895 payload={'city': 'New York'} vector=None shard_key=None\n", 275 | "id=1519 version=0 score=0.06586400388836113 payload={'city': 'Berlin'} vector=None shard_key=None\n" 276 | ] 277 | }, 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]" 282 | ] 283 | }, 284 | "execution_count": 9, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "results = client.search(collection_name=\"semantic_search\", query_vector=data[0])\n", 291 | "for result in results:\n", 292 | " print(result)\n", 293 | "truth_ids = [result.id for result in results]\n", 294 | "truth_ids" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "id": "w4ys6mjxYH9w" 301 | }, 302 | "source": [ 303 | "# Scalar Quantization\n" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 10, 309 | "metadata": { 310 | "colab": { 311 | "base_uri": "https://localhost:8080/" 312 | }, 313 | "id": "9iRb7IJ4YH9w", 314 | "outputId": "e0a61238-6544-4795-a276-20a4e82f787f" 315 | }, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "True" 321 | ] 322 | }, 323 | "execution_count": 10, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "client.create_collection(\n", 330 | " collection_name=\"scalar_semantic_search\",\n", 331 | " vectors_config=models.VectorParams(\n", 332 | " size=n_features, distance=models.Distance.COSINE\n", 333 | " ),\n", 334 | " quantization_config=models.ScalarQuantization(\n", 335 | " scalar=models.ScalarQuantizationConfig(\n", 336 | " type=models.ScalarType.INT8,\n", 337 | " quantile=0.99, # 1% of extreme values will be excluded from quantization\n", 338 | " always_ram=True,\n", 339 | " ),\n", 340 | " ),\n", 341 | ")" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 11, 347 | "metadata": { 348 | "colab": { 349 | "base_uri": "https://localhost:8080/" 350 | }, 351 | "id": "o-Afj4mxYH9w", 352 | "outputId": "1feee59d-a2e7-4269-8051-23f54fc6c70d" 353 | }, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "text/plain": [ 358 | "UpdateResult(operation_id=0, status=)" 359 | ] 360 | }, 361 | "execution_count": 11, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "client.upsert(\n", 368 | " collection_name=\"scalar_semantic_search\",\n", 369 | " points=data_to_insert,\n", 370 | ")" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 12, 376 | "metadata": { 377 | "colab": { 378 | "base_uri": "https://localhost:8080/" 379 | }, 380 | "id": "023CWpJ4YH9x", 381 | "outputId": "bc6a24a8-4574-4e40-96a3-03c5fda3cd99" 382 | }, 383 | "outputs": [ 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]\n" 389 | ] 390 | }, 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "1.0" 395 | ] 396 | }, 397 | "execution_count": 12, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "results = client.search(collection_name=\"scalar_semantic_search\", query_vector=data[0])\n", 404 | "predicted_ids = [result.id for result in results]\n", 405 | "print(predicted_ids)\n", 406 | "calculate_recall(truth_ids, predicted_ids)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": { 412 | "id": "MRKqStCNYH9x" 413 | }, 414 | "source": [ 415 | "# Binary Quantization\n" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 13, 421 | "metadata": { 422 | "colab": { 423 | "base_uri": "https://localhost:8080/" 424 | }, 425 | "id": "iwscbaIRYH9y", 426 | "outputId": "58d27a2b-96ca-4cad-bc6b-d76a624fd504" 427 | }, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "True" 433 | ] 434 | }, 435 | "execution_count": 13, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "client.create_collection(\n", 442 | " collection_name=\"binary_semantic_search\",\n", 443 | " vectors_config=models.VectorParams(\n", 444 | " size=n_features, distance=models.Distance.COSINE\n", 445 | " ),\n", 446 | " quantization_config=models.BinaryQuantization(\n", 447 | " binary=models.BinaryQuantizationConfig(\n", 448 | " always_ram=True,\n", 449 | " )\n", 450 | " ),\n", 451 | ")" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 14, 457 | "metadata": { 458 | "colab": { 459 | "base_uri": "https://localhost:8080/" 460 | }, 461 | "id": "f1Zf2g1bYH9y", 462 | "outputId": "f86dc3d8-aead-4b6d-f6c2-7d11330569ae" 463 | }, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "UpdateResult(operation_id=0, status=)" 469 | ] 470 | }, 471 | "execution_count": 14, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [ 477 | "client.upsert(\n", 478 | " collection_name=\"binary_semantic_search\",\n", 479 | " points=data_to_insert,\n", 480 | ")" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 15, 486 | "metadata": { 487 | "colab": { 488 | "base_uri": "https://localhost:8080/" 489 | }, 490 | "id": "5-M1fNspYH9y", 491 | "outputId": "f72111a5-8496-40b9-969c-b1e0b5873264" 492 | }, 493 | "outputs": [ 494 | { 495 | "name": "stdout", 496 | "output_type": "stream", 497 | "text": [ 498 | "[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]\n" 499 | ] 500 | }, 501 | { 502 | "data": { 503 | "text/plain": [ 504 | "1.0" 505 | ] 506 | }, 507 | "execution_count": 15, 508 | "metadata": {}, 509 | "output_type": "execute_result" 510 | } 511 | ], 512 | "source": [ 513 | "results = client.search(collection_name=\"binary_semantic_search\", query_vector=data[0])\n", 514 | "predicted_ids = [result.id for result in results]\n", 515 | "print(predicted_ids)\n", 516 | "calculate_recall(truth_ids, predicted_ids)" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": { 522 | "id": "HbWIhZUMYH9y" 523 | }, 524 | "source": [ 525 | "# Time Metrics\n" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 38, 531 | "metadata": { 532 | "colab": { 533 | "base_uri": "https://localhost:8080/" 534 | }, 535 | "id": "IxVu58AVftpb", 536 | "outputId": "cb0d5518-23a4-46f5-ad94-3cf73ebb9328" 537 | }, 538 | "outputs": [ 539 | { 540 | "name": "stdout", 541 | "output_type": "stream", 542 | "text": [ 543 | "0.2501802444458008\n" 544 | ] 545 | } 546 | ], 547 | "source": [ 548 | "# With quantized vectors\n", 549 | "\n", 550 | "n_reps = 15\n", 551 | "times = []\n", 552 | "for _ in range(n_reps):\n", 553 | " query_vector = np.random.randn(1, n_features)[0]\n", 554 | " st_time = time.time()\n", 555 | " client.search(\n", 556 | " collection_name=\"binary_semantic_search\",\n", 557 | " query_vector=query_vector,\n", 558 | " search_params=models.SearchParams(\n", 559 | " quantization=models.QuantizationSearchParams(\n", 560 | " ignore=False, rescore=False\n", 561 | " ) # use quantized\n", 562 | " ),\n", 563 | " limit=100,\n", 564 | " query_filter=models.Filter(\n", 565 | " must=[\n", 566 | " models.FieldCondition(\n", 567 | " key=\"city\", match=models.MatchValue(value=\"Berlin\")\n", 568 | " )\n", 569 | " ]\n", 570 | " ),\n", 571 | " )\n", 572 | " times.append(time.time() - st_time)\n", 573 | "print(np.median(times))" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 36, 579 | "metadata": { 580 | "colab": { 581 | "base_uri": "https://localhost:8080/" 582 | }, 583 | "id": "0Z7DmnCTf984", 584 | "outputId": "91f223be-b372-48c6-ac1b-af499be56a2b" 585 | }, 586 | "outputs": [ 587 | { 588 | "name": "stdout", 589 | "output_type": "stream", 590 | "text": [ 591 | "0.32148098945617676\n" 592 | ] 593 | } 594 | ], 595 | "source": [ 596 | "# With normal vectors\n", 597 | "\n", 598 | "times = []\n", 599 | "for _ in range(n_reps):\n", 600 | " query_vector = np.random.randn(1, n_features)[0]\n", 601 | " st_time = time.time()\n", 602 | " client.search(\n", 603 | " collection_name=\"binary_semantic_search\",\n", 604 | " query_vector=query_vector,\n", 605 | " search_params=models.SearchParams(\n", 606 | " quantization=models.QuantizationSearchParams(\n", 607 | " ignore=True, rescore=False\n", 608 | " ) # ignore quantized\n", 609 | " ),\n", 610 | " limit=100,\n", 611 | " query_filter=models.Filter(\n", 612 | " must=[\n", 613 | " models.FieldCondition(\n", 614 | " key=\"city\", match=models.MatchValue(value=\"Berlin\")\n", 615 | " )\n", 616 | " ]\n", 617 | " ),\n", 618 | " )\n", 619 | " times.append(time.time() - st_time)\n", 620 | "print(np.median(times))" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 17, 626 | "metadata": { 627 | "id": "9vrZtPO_YH9y" 628 | }, 629 | "outputs": [], 630 | "source": [] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 17, 635 | "metadata": { 636 | "id": "u33t6OqJYH9y" 637 | }, 638 | "outputs": [], 639 | "source": [] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": 17, 644 | "metadata": { 645 | "id": "b9pv1xODYH9y" 646 | }, 647 | "outputs": [], 648 | "source": [] 649 | } 650 | ], 651 | "metadata": { 652 | "colab": { 653 | "provenance": [] 654 | }, 655 | "kernelspec": { 656 | "display_name": "Python 3", 657 | "language": "python", 658 | "name": "python3" 659 | }, 660 | "language_info": { 661 | "codemirror_mode": { 662 | "name": "ipython", 663 | "version": 3 664 | }, 665 | "file_extension": ".py", 666 | "mimetype": "text/x-python", 667 | "name": "python", 668 | "nbconvert_exporter": "python", 669 | "pygments_lexer": "ipython3", 670 | "version": "3.11.5" 671 | } 672 | }, 673 | "nbformat": 4, 674 | "nbformat_minor": 0 675 | } 676 | -------------------------------------------------------------------------------- /postgres_vector_length/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import requests # type: ignore 5 | from dotenv import load_dotenv # type: ignore 6 | from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore 7 | from openai import OpenAI # type: ignore 8 | from postgres import PostgresClient 9 | from sklearn.decomposition import PCA # type: ignore 10 | 11 | load_dotenv() 12 | 13 | openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), max_retries=3) 14 | 15 | 16 | # get book from gutenberg 17 | title_url = { 18 | "inverted pyramid": "https://www.gutenberg.org/cache/epub/72392/pg72392.txt" 19 | } 20 | title_text_map = {} 21 | for title, url in title_url.items(): 22 | response = requests.get(url) 23 | book_text = response.text 24 | 25 | title_text_map[title] = book_text 26 | 27 | 28 | # use langchain to do document chunking 29 | text_splitter = RecursiveCharacterTextSplitter( 30 | chunk_size=500, 31 | chunk_overlap=40, 32 | length_function=len, 33 | is_separator_regex=False, 34 | ) 35 | documents = [] 36 | for title, text in title_text_map.items(): 37 | text_chunks = text_splitter.split_text(text)[:1000] 38 | 39 | embeddings = openai_client.embeddings.create( 40 | input=text_chunks, 41 | model="text-embedding-ada-002", 42 | ).data 43 | embeddings = [e.embedding for e in embeddings] 44 | 45 | pca = PCA(n_components=50) 46 | reduced_embeddings = pca.fit_transform(embeddings) 47 | 48 | for i, text_chunk in enumerate(text_chunks): 49 | documents.append( 50 | { 51 | "title": title, 52 | "text": text_chunk, 53 | "large_embedding": embeddings[i], 54 | "small_embedding": reduced_embeddings[i], 55 | } 56 | ) 57 | 58 | 59 | # compare sizes 60 | postgres_client = PostgresClient(large_embedding_size=1536, small_embedding_size=50) 61 | postgres_client.delete_tables() 62 | postgres_client.create_tables() 63 | postgres_client.add_data(data=documents) 64 | size = postgres_client.get_vector_column_size() 65 | for column, size_bytes in size.items(): 66 | print(f"Column {column} has size {size_bytes/1048576} MB") 67 | print("\n") 68 | 69 | # search over table 70 | 71 | quote = "Where was Rod Norquay sitting?" 72 | quote_embedding = ( 73 | openai_client.embeddings.create( 74 | input=[quote], 75 | model="text-embedding-ada-002", 76 | ) 77 | .data[0] 78 | .embedding 79 | ) 80 | 81 | st_time = time.time() 82 | results = postgres_client.search_db( 83 | query_vec=quote_embedding, 84 | column="large_embedding", 85 | ) 86 | print(f"Result using large embeddings took {time.time() -st_time:.2f}:\n") 87 | print(results[0]["text"]) 88 | print("\n" * 5) 89 | 90 | reduced_quote_embedding = pca.transform([quote_embedding])[0] 91 | st_time = time.time() 92 | results = postgres_client.search_db( 93 | query_vec=reduced_quote_embedding, 94 | column="small_embedding", 95 | ) 96 | print(f"Result using small embeddings took {time.time() -st_time:.2f}:\n") 97 | print(results[0]["text"]) 98 | print("\n" * 5) 99 | -------------------------------------------------------------------------------- /postgres_vector_length/postgres.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Literal 3 | 4 | import numpy as np 5 | import psycopg # type: ignore 6 | from dotenv import load_dotenv # type: ignore 7 | from pgvector.psycopg import register_vector # type: ignore 8 | from psycopg.rows import dict_row # type: ignore 9 | 10 | load_dotenv() 11 | 12 | 13 | class PostgresClient: 14 | def __init__( 15 | self, 16 | large_embedding_size: int, 17 | small_embedding_size: int, 18 | postgres_database: str = "semantic_search", 19 | ): 20 | NEON_USERNAME = os.getenv("NEON_USERNAME") 21 | NEON_PASSWORD = os.getenv("NEON_PASSWORD") 22 | self.large_embedding_size = large_embedding_size 23 | self.small_embedding_size = small_embedding_size 24 | 25 | self.postgres_url = f"postgresql://{NEON_USERNAME}:{NEON_PASSWORD}@ep-still-hat-20912390.us-east-2.aws.neon.tech/{postgres_database}?sslmode=require" 26 | 27 | def create_postgres_connection(self): 28 | if hasattr(self, "postgres_client"): 29 | self.postgres_client.close() 30 | self.postgres_client = psycopg.connect( 31 | conninfo=self.postgres_url, 32 | row_factory=dict_row, 33 | ) 34 | register_vector(self.postgres_client) 35 | 36 | def init_postgres_client(self): 37 | if ( 38 | not hasattr(self, "postgres_client") 39 | or self.postgres_client.connection.closed 40 | or self.postgres_client.connection.broken 41 | ): 42 | self.create_postgres_connection() 43 | try: 44 | with self.postgres_client.cursor() as cursor: 45 | cursor.execute("SELECT 1") 46 | except (psycopg.DatabaseError, psycopg.OperationalError): 47 | self.create_postgres_connection() 48 | 49 | def create_tables(self): 50 | self.init_postgres_client() 51 | create_table_sql = f""" 52 | DO $$ 53 | BEGIN 54 | IF NOT EXISTS ( 55 | SELECT FROM pg_tables 56 | WHERE schemaname = 'public' 57 | AND tablename = 'search_data' 58 | ) THEN 59 | CREATE EXTENSION IF NOT EXISTS vector; 60 | CREATE TABLE search_data ( 61 | book_title VARCHAR(255) NOT NULL, 62 | text TEXT NOT NULL, 63 | large_embedding Vector({self.large_embedding_size}), 64 | small_embedding Vector({self.small_embedding_size}) 65 | ); 66 | CREATE INDEX ON search_data USING hnsw (large_embedding vector_ip_ops); 67 | CREATE INDEX ON search_data USING hnsw (small_embedding vector_ip_ops); 68 | END IF; 69 | END 70 | $$; 71 | """ 72 | 73 | with self.postgres_client.cursor() as cursor: 74 | cursor.execute(create_table_sql) 75 | self.postgres_client.commit() 76 | register_vector(self.postgres_client) 77 | 78 | def delete_tables(self): 79 | self.init_postgres_client() 80 | sql = """ 81 | DO $$ 82 | BEGIN 83 | IF EXISTS ( 84 | SELECT FROM pg_tables 85 | WHERE schemaname = 'public' 86 | AND tablename = 'search_data' 87 | ) THEN 88 | DROP TABLE search_data; 89 | END IF; 90 | END 91 | $$; 92 | """ 93 | with self.postgres_client.cursor() as cursor: 94 | cursor.execute(sql) 95 | self.postgres_client.commit() 96 | 97 | def add_data(self, data: list[dict]): 98 | insert_query = """ 99 | INSERT INTO search_data (book_title, text, large_embedding, small_embedding) 100 | VALUES (%s, %s, %s, %s) 101 | """ 102 | insert_data = [ 103 | ( 104 | d["title"], 105 | d["text"], 106 | d["large_embedding"], 107 | np.array(d["small_embedding"]), 108 | ) 109 | for d in data 110 | ] 111 | with self.postgres_client.cursor() as cursor: 112 | cursor.executemany(insert_query, insert_data) 113 | self.postgres_client.commit() 114 | 115 | def search_db( 116 | self, 117 | query_vec: list[float], 118 | column: Literal["large_embedding", "small_embedding"], 119 | ) -> list: 120 | self.init_postgres_client() 121 | query = f""" 122 | SELECT book_title, text 123 | FROM search_data 124 | ORDER BY {column} <-> %s 125 | LIMIT 1; 126 | """ 127 | 128 | with self.postgres_client.cursor() as cursor: 129 | results = cursor.execute( 130 | query, 131 | (np.array(query_vec),), 132 | ).fetchall() 133 | 134 | return results 135 | 136 | def get_vector_column_size(self) -> dict[str, int]: 137 | sql = """ 138 | SELECT SUM(pg_column_size(large_embedding)) as large_embedding_size, 139 | SUM(pg_column_size(small_embedding)) as small_embedding_size 140 | FROM search_data; 141 | """ 142 | with self.postgres_client.cursor() as cursor: 143 | results = cursor.execute(sql).fetchone() 144 | 145 | return results 146 | -------------------------------------------------------------------------------- /postgres_vector_length/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.341 2 | openai==1.3.5 3 | psycopg[binary]==3.1.14 4 | pgvector==0.2.4 5 | scikit-learn==1.3.0 6 | -------------------------------------------------------------------------------- /slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaankhosla/semanticsearch/16bfd0a79211f023b95c1920b9d3c75d44a00890/slides.pdf --------------------------------------------------------------------------------