├── 10_fine_tuning_CLIP_models.ipynb ├── 1_intro_to_vector_representations.ipynb ├── 2_foundations_of_embedding_models.ipynb ├── 4_building_vector_search_applications ├── movie_search.py └── multimodal_search.py ├── 5_introduction_to_sentence_transformers.ipynb ├── 6_fine_tune_sentence_transformers.ipynb ├── 8_fine_tune_vision_transformers.ipynb ├── 9_introduction_to_CLIP_and_multimodal_models.ipynb └── README.md /1_intro_to_vector_representations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1vQElM_SrIghzkvOf0HgFnahnPGHwjkOw?usp=sharing)\n" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "id": "yOHM4VQ1VcLy" 14 | }, 15 | "source": [ 16 | "## Introduction to Vector Representations" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "LR0SOiGRWV0k" 23 | }, 24 | "source": [ 25 | "We first begin by installing `sentence_transformers` so that we can use their cosine similarity function, `cos_sim`." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "collapsed": true, 36 | "id": "xoKirZMGU1Dl", 37 | "outputId": "7b983432-b9bb-40f0-b201-d0332c018794" 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Collecting sentence-transformers\n", 45 | " Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", 46 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 47 | "\u001b[?25hRequirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.40.2)\n", 48 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.66.4)\n", 49 | "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.2.1+cu121)\n", 50 | "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.25.2)\n", 51 | "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n", 52 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.11.4)\n", 53 | "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.20.3)\n", 54 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (9.4.0)\n", 55 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (3.14.0)\n", 56 | "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2023.6.0)\n", 57 | "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2.31.0)\n", 58 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (6.0.1)\n", 59 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (4.11.0)\n", 60 | "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (24.0)\n", 61 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (1.12)\n", 62 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.3)\n", 63 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.4)\n", 64 | "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 65 | " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", 66 | "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 67 | " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", 68 | "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 69 | " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", 70 | "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)\n", 71 | " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", 72 | "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)\n", 73 | " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", 74 | "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers)\n", 75 | " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", 76 | "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence-transformers)\n", 77 | " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", 78 | "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence-transformers)\n", 79 | " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", 80 | "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence-transformers)\n", 81 | " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", 82 | "Collecting nvidia-nccl-cu12==2.19.3 (from torch>=1.11.0->sentence-transformers)\n", 83 | " Using cached nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", 84 | "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 85 | " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", 86 | "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.2.0)\n", 87 | "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers)\n", 88 | " Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", 89 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (2023.12.25)\n", 90 | "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.19.1)\n", 91 | "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.4.3)\n", 92 | "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", 93 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", 94 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (2.1.5)\n", 95 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.3.2)\n", 96 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.7)\n", 97 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2.0.7)\n", 98 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2024.2.2)\n", 99 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n", 100 | "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, sentence-transformers\n", 101 | "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 sentence-transformers-2.7.0\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "!pip install sentence-transformers" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "id": "6zEtfUleWenG" 113 | }, 114 | "source": [ 115 | "We can now import `cos_sim`, create two vectors and generate their cosine similarity." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "colab": { 123 | "base_uri": "https://localhost:8080/" 124 | }, 125 | "id": "6V3W7IgIVWZ9", 126 | "outputId": "a2e169f9-f2a4-4ce8-c768-f80de221a76c" 127 | }, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "tensor([[0.9883]])" 133 | ] 134 | }, 135 | "execution_count": 3, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "from sentence_transformers.util import cos_sim\n", 142 | "\n", 143 | "vector1 = [0.1, 0.4, 0.5, 0.0]\n", 144 | "vector2 = [0.1, 0.4, 0.5, 0.1]\n", 145 | "\n", 146 | "cosine_similarity = cos_sim(vector1, vector2)\n", 147 | "\n", 148 | "cosine_similarity" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "id": "2AodaSZKWlOn" 155 | }, 156 | "source": [ 157 | "Notice how the two vectors have the same entries except for the last number which deviates by only 0.1. This is why we have a cosine similarity score near to 1 because the vectors are closely related." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": { 163 | "id": "mpsLlRTDWvpG" 164 | }, 165 | "source": [ 166 | "Let's try the same but for very different vectors." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "colab": { 174 | "base_uri": "https://localhost:8080/" 175 | }, 176 | "id": "hP6k7CazWyGN", 177 | "outputId": "ccca66ca-1b6f-4238-ef4f-7455a0547015" 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "tensor([[-0.7173]])" 184 | ] 185 | }, 186 | "execution_count": 4, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "vector3 = [0.9, 0.0, -0.5, 0.0]\n", 193 | "vector4 = [-0.9, 0.8, 0.5, 0.6]\n", 194 | "\n", 195 | "cosine_similarity = cos_sim(vector3, vector4)\n", 196 | "\n", 197 | "cosine_similarity" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": { 203 | "id": "m-0g6MJ0Wx3m" 204 | }, 205 | "source": [ 206 | "Notice how we now have a negative value for the cosine similarity. This means that these two vectors are not similar." 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "id": "CwkoOKBDW-Xg" 213 | }, 214 | "source": [ 215 | "Why don't you try out a few different vectors (try varying the number of elements inside) and see what results you get!" 216 | ] 217 | } 218 | ], 219 | "metadata": { 220 | "colab": { 221 | "provenance": [] 222 | }, 223 | "kernelspec": { 224 | "display_name": "Python 3", 225 | "name": "python3" 226 | }, 227 | "language_info": { 228 | "name": "python" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 0 233 | } 234 | -------------------------------------------------------------------------------- /4_building_vector_search_applications/movie_search.py: -------------------------------------------------------------------------------- 1 | import marqo 2 | 3 | # Create a Marqo client 4 | mq = marqo.Client(url="http://localhost:8882") 5 | 6 | # Delete the movie index if it already exists 7 | try: 8 | mq.index("movies-index").delete() 9 | except: 10 | pass 11 | 12 | # Create the movie index 13 | mq.create_index("movies-index", model="hf/e5-base-v2") 14 | 15 | # Add documents (movie descriptions) to the index 16 | mq.index("movies-index").add_documents( 17 | [ 18 | { 19 | "Title": "Inception", 20 | "Description": "A mind-bending thriller about dream invasion and manipulation.", 21 | }, 22 | { 23 | "Title": "Shrek", 24 | "Description": "An ogre's peaceful life is disrupted by a horde of fairy tale characters who need his help.", 25 | }, 26 | { 27 | "Title": "Interstellar", 28 | "Description": "A team of explorers travel through a wormhole in space to ensure humanity's survival.", 29 | }, 30 | { 31 | "Title": "The Martian", 32 | "Description": "An astronaut becomes stranded on Mars and must find a way to survive.", 33 | }, 34 | ], 35 | tensor_fields=["Description"], 36 | ) 37 | 38 | # Perform a search query on the index 39 | results = mq.index("movies-index").search( 40 | q="Which movie is about space exploration?" 41 | ) 42 | 43 | # Print the search results 44 | for result in results['hits']: 45 | print(f"Title: {result['Title']}, Description: {result['Description']}. Score: {result['_score']}") -------------------------------------------------------------------------------- /4_building_vector_search_applications/multimodal_search.py: -------------------------------------------------------------------------------- 1 | import marqo 2 | 3 | # Create a Marqo client with the specified URL 4 | mq = marqo.Client(url="http://localhost:8882") 5 | 6 | # Delete the movie index if it already exists 7 | try: 8 | mq.index("my-multimodal-index").delete() 9 | except: 10 | pass 11 | 12 | # Settings for the index creation, enabling image indexing and specifying the model to use. 13 | settings = { 14 | "treat_urls_and_pointers_as_images": True, # allows us to treat URLs as images and index them 15 | "model": "open_clip/ViT-B-32/laion2b_s34b_b79k", # model used for indexing 16 | } 17 | 18 | # Create the index with the specified settings 19 | response = mq.create_index("my-multimodal-index", **settings) 20 | 21 | # Add documents to the created index, including an image and its description 22 | response = mq.index("my-multimodal-index").add_documents( 23 | [ 24 | { 25 | "My_Image": "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b3/Hipop%C3%B3tamo_%28Hippopotamus_amphibius%29%2C_parque_nacional_de_Chobe%2C_Botsuana%2C_2018-07-28%2C_DD_82.jpg/640px-Hipop%C3%B3tamo_%28Hippopotamus_amphibius%29%2C_parque_nacional_de_Chobe%2C_Botsuana%2C_2018-07-28%2C_DD_82.jpg", 26 | "Description": "The hippopotamus, also called the common hippopotamus or river hippopotamus, is a large semiaquatic mammal native to sub-Saharan Africa", 27 | "_id": "hippo-facts", # unique identifier for the document 28 | } 29 | ], 30 | tensor_fields=["My_Image"], # specify that "My_Image" should be treated as a tensor field 31 | ) 32 | 33 | # Search the index for the term "animal" 34 | results = mq.index("my-multimodal-index").search("animal") 35 | 36 | # Print the search results 37 | import pprint 38 | pprint.pprint(results) -------------------------------------------------------------------------------- /5_introduction_to_sentence_transformers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1FI5iO_RAZXsyzA1OjinDzu59Y9q5ZZ-b?usp=drive_link)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "id": "l8LyIe7GNK7Z" 14 | }, 15 | "source": [ 16 | "## Introduction to Sentence Transformers" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "RCTtp6opNNNc" 23 | }, 24 | "source": [ 25 | "First, we install the `sentence-transformers` library:" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "collapsed": true, 36 | "id": "2B1cBPtwLscs", 37 | "outputId": "376abe4c-eed1-47dc-9e90-d76147a3fb8a" 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Collecting sentence-transformers\n", 45 | " Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", 46 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 47 | "\u001b[?25hRequirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.41.2)\n", 48 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.66.4)\n", 49 | "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.3.0+cu121)\n", 50 | "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.25.2)\n", 51 | "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n", 52 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.11.4)\n", 53 | "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.23.4)\n", 54 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (9.4.0)\n", 55 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (3.15.3)\n", 56 | "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2023.6.0)\n", 57 | "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (24.1)\n", 58 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (6.0.1)\n", 59 | "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2.31.0)\n", 60 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (4.12.2)\n", 61 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (1.12.1)\n", 62 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.3)\n", 63 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.4)\n", 64 | "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 65 | " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", 66 | "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 67 | " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", 68 | "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 69 | " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", 70 | "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)\n", 71 | " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", 72 | "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)\n", 73 | " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", 74 | "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers)\n", 75 | " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", 76 | "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence-transformers)\n", 77 | " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", 78 | "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence-transformers)\n", 79 | " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", 80 | "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence-transformers)\n", 81 | " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", 82 | "Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence-transformers)\n", 83 | " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", 84 | "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", 85 | " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", 86 | "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.3.0)\n", 87 | "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers)\n", 88 | " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n", 89 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 90 | "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (2024.5.15)\n", 91 | "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.19.1)\n", 92 | "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.4.3)\n", 93 | "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", 94 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", 95 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (2.1.5)\n", 96 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.3.2)\n", 97 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.7)\n", 98 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2.0.7)\n", 99 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2024.6.2)\n", 100 | "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n", 101 | "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, sentence-transformers\n", 102 | "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 sentence-transformers-3.0.1\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "!pip install sentence-transformers" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "id": "d_cDJOZXNRHX" 114 | }, 115 | "source": [ 116 | "Next, we create our Sentence Transformer model. We will use the original SBERT model `bert-base-nli-mean-tokens`:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 2, 122 | "metadata": { 123 | "colab": { 124 | "base_uri": "https://localhost:8080/", 125 | "height": 689, 126 | "referenced_widgets": [ 127 | "360b954585c84f2a86779847a259603c", 128 | "fb4ea5855dcf4c5db027c3faa10db3d2", 129 | "86cf20f36cdd417c96065a5d4898130d", 130 | "424efd371c144d6a8642a3f8950a86e1", 131 | "2692ee7db6ef41c8958c38408500fa8f", 132 | "fd584199c71641e4a9bec5b36ec56201", 133 | "36d5965ff6f0481b9167af771e4e0df7", 134 | "0f582051d27c4569abc1a01df7e2963b", 135 | "2dad5440236942afba85e07b687a5a7c", 136 | "6ec837048af840e1a82f96a84ad655c2", 137 | "c1419d28271043a99e478b090a2ccd4b", 138 | "ee310c8d459d40fdb76cc7ef4bc2ac11", 139 | "30b42e04d0a7459aae73d5e5b2dcb6df", 140 | "3d84b97a245f4ba4847d8f1fd30e4642", 141 | "853d36943a744d4f94a6a20e8d947ab8", 142 | "bd47f5fe5d054bdda29989cd20b88f5a", 143 | "3b17c152bf1e4cb4939478cab9295425", 144 | "221e05cf46a94f9092ba2d4ef0125693", 145 | "e142c6264d3945a0ba424927c6fb81a8", 146 | "ae31f39c929a4301ab2c790758f90023", 147 | "146f4d816c4b43d3b27cd9eddf111b38", 148 | "5a13346e7e4643c7a5fbf5c4b64b7c5f", 149 | "aa08720bb09143ee8b80ef06975e5e24", 150 | "7342dc77170d4cdcaf9c4bf66fa375c2", 151 | "6348fcfdd5b147a48941e79250200134", 152 | "197d0bbf985943beb0e58e548b6c47e0", 153 | "718ab7c31c4e4801a1f35fa52920c5fb", 154 | "4b1646623a584d769cc29ffd1c7642a7", 155 | "bc564a1a32e642208fea29b1ea683c8a", 156 | "0a8aab50dc894d5b8cc0396c204ea484", 157 | "1a5e2a13bde547588e90df2b254f5e5b", 158 | "5849dcdf477c49b38988f7e0f10c85ea", 159 | "d34b262b7eb1407d87adc44af95f2ef8", 160 | "e44d0e452f054debaf1bf96b6254c1d7", 161 | "762b576dee6047a6b0237b8e05393f4a", 162 | "fba3de7cc88b4eec9abd9296a7e9f09f", 163 | "dec2ae82e65846b9947a50a3c125c4ea", 164 | "77f8f5f2681b4ecaa5568579fd9bd557", 165 | "c3364639dde6421ba75efbc88ba94cbd", 166 | "2c4d2f6c62724d538575216d55e53561", 167 | "0515d0d4fca04c94820cac6c9298f00d", 168 | "38e08f79a0fb4b7dac97a643bc3f7d33", 169 | "476f3a98d0f64826855b7fcce615cb6d", 170 | "2b1b80750b124745b4ffa672dab26a74", 171 | "af091750a9b34129a2324ba1e01117b4", 172 | "59f781b84b9041c28fdf027d0e9b2533", 173 | "02fbbf167d6a4d0293319265037e6686", 174 | "0ff61dd665c1469eafede3ec2299d03e", 175 | "e518cf3b82e54e9cbb0dce7b99140e22", 176 | "82eb053972774b75a8597f9a0863266d", 177 | "202f03a34746444086121d5c40dc2292", 178 | "3b5a6535e75d4397b797b72517e4eefc", 179 | "f8dfa2bb33f94d03977ac66efa8d2eae", 180 | "fc59a261b0d9406d8599fa65402ef50e", 181 | "0d2bb251996a45bcabbffec0f4a36c52", 182 | "e3bee91bfb8248eab62834743864d437", 183 | "453b02f180bb4eb292375d1094a99f92", 184 | "862bf0b020784c008dc23127af2a705f", 185 | "3e092121914d4594b27fd1dd18c70104", 186 | "beafaae81f304b418349fb4eefa5bcf0", 187 | "bbbe7b3f12b849a4a2cb32a24112cc8d", 188 | "a62f3e3ff3d34ccc823ebb01734fea60", 189 | "fb2ef3c8aff3461ba5bc3de90da5b5a6", 190 | "f55ee1eb137f4a71abbb801922ec492f", 191 | "702b50e7b84a4ea6943fc426ef2cf4cc", 192 | "dc91fdfdf4fc49858a4932c7eaa06bca", 193 | "2eb30b05df9a46bcb6e1dd10760e91b3", 194 | "467929a958334c0c8faa4400bb3bdeb6", 195 | "6591a726ed864af785746993cf55fd78", 196 | "fbdcae0db91f4e7094bda0fb18a9af8f", 197 | "c5fcc32399ed437496ab0fd6c9023517", 198 | "f1055bd3bdc94af7bf55c3ca5b36a72b", 199 | "1518499740274c379a4e58bd145f0f49", 200 | "0a032e1b45704fd3a6c9f38a59d32cba", 201 | "0d93b507d9564442b6dbb55974d5e65b", 202 | "f231b57194e34adb9352bb83b2194126", 203 | "d47a32d005d9446c8c5e103be14588d0", 204 | "ad7263c24b4146f4ba21d14a98e5d015", 205 | "f4c964eac66f4cf082db538989090184", 206 | "694560dd0c6b408e886af5adf1d1aefa", 207 | "f5d6db967883474dae96456d98aaf766", 208 | "2608f69f374c440e80331de40cf62ff4", 209 | "14f0c2bd5c434d2ea489fd38585733b4", 210 | "ad2a338e366a4df5b579c83c41045d97", 211 | "1baae174fc074117a1a790d664d86ebc", 212 | "a81dae6df07145f2ac695344ecf27ea7", 213 | "ccaeefbb10e94d0f94ba5cb31e425440", 214 | "e6e3b688126346f8a7bbe5e96cdbbd30", 215 | "3940c508f7d74fa79152a41db6c1c8cb", 216 | "7f01ad82eb2c4761b3cf722fddc2b5bb", 217 | "89069958e5f641359c62d930ff17c952", 218 | "eef306c8c544455694a2b0916fcadb57", 219 | "d7aebad597e2435cbc3e549851837607", 220 | "2784f56b55fa4d939c91a6fa1bcb60f4", 221 | "0d1653b53a86417f94ff8fd9a5c1fd56", 222 | "bf191b56d9f2437386c3d5296a712e6d", 223 | "a479c7fcd78c4028a649cf793b74f04f", 224 | "0e227d1507844e119aa38912fa28ebe7", 225 | "4d8f1f67dea54ae195fbc1a3e73dbcbe", 226 | "3b63e231303148e3a0ad88bb0f76a3c8", 227 | "a01364624917492e869340d3553197ee", 228 | "5e81150c5e354841818c1d5e2ecfec13", 229 | "485c849a93be4b9aa2622bf3a0b141a2", 230 | "050c86324c934ae68f82f7fcffec9dca", 231 | "fe034e16cfc641e5825cca81485b61ed", 232 | "295824acee454d068b80b1a3da504934", 233 | "235c47625de5442c83a7eb3c88e22b87", 234 | "d70801e685a247a5957a003e96ebd51c", 235 | "098b2b6708ed4dcbb1763145ed85b337", 236 | "125dac24516b4ecdb01c7671cece467c", 237 | "511386c8e27a44bb89808d86de7a72c0", 238 | "8b1a84d4050e401eb9e748ff2b2d7c2f", 239 | "a10ed8623ef943b58a64fb83d3b46258", 240 | "42e7324e002e473a9525377cbd167303", 241 | "f089c055c090448f8783014c296ee8b4", 242 | "b459b5dc43a24129b01300849bc08cd4", 243 | "9b14ad21ebe84047ad3ed51dc8995845", 244 | "d8608d0402c04cfeba56c55e3812caee", 245 | "0610894ca6c24ab3b734d858f10a96a1", 246 | "189de58ef5d443f8bbc022300b314804", 247 | "153117f9c04b4c65997280105c558b61", 248 | "8acc6f5ec63f4d3b8b92ee00895f9a26", 249 | "e94ad71dc8c246769718cd6b35435bb8", 250 | "66d6a28b6dff427dad914069c1204100", 251 | "ee2e54c4a182470fb6a709bb503640d4", 252 | "8d6f580e2c11490b99b99397012e1a57", 253 | "366a540e568f4cb48e834641fc3baa75", 254 | "705db19cf0194cd89124f8cfe0a6d90f", 255 | "127e9347cbda48909237465134692735", 256 | "8cf074d1a50548709543e8bf368057c5", 257 | "aab79b118a0b4b0288575a9d698f92f2", 258 | "edc8d0479807488781069e227ea65749" 259 | ] 260 | }, 261 | "id": "DbEBUTTmMGcK", 262 | "outputId": "00f4a8dd-eccd-4eeb-bc89-2d49f1a0f762" 263 | }, 264 | "outputs": [ 265 | { 266 | "name": "stderr", 267 | "output_type": "stream", 268 | "text": [ 269 | "/usr/local/lib/python3.10/dist-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", 270 | " from tqdm.autonotebook import tqdm, trange\n", 271 | "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", 272 | "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", 273 | "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", 274 | "You will be able to reuse this secret in all of your notebooks.\n", 275 | "Please note that authentication is recommended but still optional to access public models or datasets.\n", 276 | " warnings.warn(\n" 277 | ] 278 | }, 279 | { 280 | "data": { 281 | "application/vnd.jupyter.widget-view+json": { 282 | "model_id": "360b954585c84f2a86779847a259603c", 283 | "version_major": 2, 284 | "version_minor": 0 285 | }, 286 | "text/plain": [ 287 | "modules.json: 0%| | 0.00/229 [00:00