├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── intro-workshop.iml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── data ├── explore_data.ipynb ├── jeopardy_1k.json ├── movies.csv ├── tmdb_data_1950_2024.json └── winemag_tiny.json ├── images ├── hybrid_search_1.png ├── hybrid_search_2.png ├── hybrid_search_3.png ├── hybrid_search_4.png ├── keyword_search_1.png ├── keyword_search_2.png ├── keyword_search_3.png ├── object_import_process_byov.png ├── object_import_process_vectorizer.png ├── vector_search_1.png ├── vector_search_2.png └── vector_search_3.png ├── requirements.txt ├── workshop_finished.ipynb └── z_optional_explain_vectors.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | temp.ipynb 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | # General 165 | .DS_Store 166 | .AppleDouble 167 | .LSOverride 168 | 169 | # Icon must end with two \r 170 | Icon 171 | 172 | 173 | # Thumbnails 174 | ._* 175 | 176 | # Files that might appear in the root of a volume 177 | .DocumentRevisions-V100 178 | .fseventsd 179 | .Spotlight-V100 180 | .TemporaryItems 181 | .Trashes 182 | .VolumeIcon.icns 183 | .com.apple.timemachine.donotpresent 184 | 185 | # Directories potentially created on remote AFP share 186 | .AppleDB 187 | .AppleDesktop 188 | Network Trash Folder 189 | Temporary Items 190 | .apdisk 191 | 192 | # Logs 193 | logs 194 | *.log 195 | npm-debug.log* 196 | yarn-debug.log* 197 | yarn-error.log* 198 | lerna-debug.log* 199 | .pnpm-debug.log* 200 | 201 | # Diagnostic reports (https://nodejs.org/api/report.html) 202 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 203 | 204 | # Runtime data 205 | pids 206 | *.pid 207 | *.seed 208 | *.pid.lock 209 | 210 | # Directory for instrumented libs generated by jscoverage/JSCover 211 | lib-cov 212 | 213 | # Coverage directory used by tools like istanbul 214 | coverage 215 | *.lcov 216 | 217 | # nyc test coverage 218 | .nyc_output 219 | 220 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 221 | .grunt 222 | 223 | # Bower dependency directory (https://bower.io/) 224 | bower_components 225 | 226 | # node-waf configuration 227 | .lock-wscript 228 | 229 | # Compiled binary addons (https://nodejs.org/api/addons.html) 230 | build/Release 231 | 232 | # Dependency directories 233 | node_modules/ 234 | jspm_packages/ 235 | 236 | # Snowpack dependency directory (https://snowpack.dev/) 237 | web_modules/ 238 | 239 | # TypeScript cache 240 | *.tsbuildinfo 241 | 242 | # Optional npm cache directory 243 | .npm 244 | 245 | # Optional eslint cache 246 | .eslintcache 247 | 248 | # Optional stylelint cache 249 | .stylelintcache 250 | 251 | # Microbundle cache 252 | .rpt2_cache/ 253 | .rts2_cache_cjs/ 254 | .rts2_cache_es/ 255 | .rts2_cache_umd/ 256 | 257 | # Optional REPL history 258 | .node_repl_history 259 | 260 | # Output of 'npm pack' 261 | *.tgz 262 | 263 | # Yarn Integrity file 264 | .yarn-integrity 265 | 266 | # dotenv environment variable files 267 | .env 268 | .env.development.local 269 | .env.test.local 270 | .env.production.local 271 | .env.local 272 | 273 | # parcel-bundler cache (https://parceljs.org/) 274 | .cache 275 | .parcel-cache 276 | 277 | # Next.js build output 278 | .next 279 | out 280 | 281 | # Nuxt.js build / generate output 282 | .nuxt 283 | dist 284 | 285 | # Gatsby files 286 | .cache/ 287 | # Comment in the public line in if your project uses Gatsby and not Next.js 288 | # https://nextjs.org/blog/next-9-1#public-directory-support 289 | # public 290 | 291 | # vuepress build output 292 | .vuepress/dist 293 | 294 | # vuepress v2.x temp and cache directory 295 | .temp 296 | .cache 297 | 298 | # Docusaurus cache and generated files 299 | .docusaurus 300 | 301 | # Serverless directories 302 | .serverless/ 303 | 304 | # FuseBox cache 305 | .fusebox/ 306 | 307 | # DynamoDB Local files 308 | .dynamodb/ 309 | 310 | # TernJS port file 311 | .tern-port 312 | 313 | # Stores VSCode versions used for testing VSCode extensions 314 | .vscode-test 315 | 316 | # yarn v2 317 | .yarn/cache 318 | .yarn/unplugged 319 | .yarn/build-state.yml 320 | .yarn/install-state.gz 321 | .pnp.* 322 | 323 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 324 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 325 | 326 | # User-specific stuff 327 | .idea/**/workspace.xml 328 | .idea/**/tasks.xml 329 | .idea/**/usage.statistics.xml 330 | .idea/**/dictionaries 331 | .idea/**/shelf 332 | 333 | # AWS User-specific 334 | .idea/**/aws.xml 335 | 336 | # Generated files 337 | .idea/**/contentModel.xml 338 | 339 | # Sensitive or high-churn files 340 | .idea/**/dataSources/ 341 | .idea/**/dataSources.ids 342 | .idea/**/dataSources.local.xml 343 | .idea/**/sqlDataSources.xml 344 | .idea/**/dynamic.xml 345 | .idea/**/uiDesigner.xml 346 | .idea/**/dbnavigator.xml 347 | 348 | # Gradle 349 | .idea/**/gradle.xml 350 | .idea/**/libraries 351 | 352 | # Gradle and Maven with auto-import 353 | # When using Gradle or Maven with auto-import, you should exclude module files, 354 | # since they will be recreated, and may cause churn. Uncomment if using 355 | # auto-import. 356 | # .idea/artifacts 357 | # .idea/compiler.xml 358 | # .idea/jarRepositories.xml 359 | # .idea/modules.xml 360 | # .idea/*.iml 361 | # .idea/modules 362 | # *.iml 363 | # *.ipr 364 | 365 | # CMake 366 | cmake-build-*/ 367 | 368 | # Mongo Explorer plugin 369 | .idea/**/mongoSettings.xml 370 | 371 | # File-based project format 372 | *.iws 373 | 374 | # IntelliJ 375 | out/ 376 | 377 | # mpeltonen/sbt-idea plugin 378 | .idea_modules/ 379 | 380 | # JIRA plugin 381 | atlassian-ide-plugin.xml 382 | 383 | # Cursive Clojure plugin 384 | .idea/replstate.xml 385 | 386 | # SonarLint plugin 387 | .idea/sonarlint/ 388 | 389 | # Crashlytics plugin (for Android Studio and IntelliJ) 390 | com_crashlytics_export_strings.xml 391 | crashlytics.properties 392 | crashlytics-build.properties 393 | fabric.properties 394 | 395 | # Editor-based Rest Client 396 | .idea/httpRequests 397 | 398 | # Android studio 3.1+ serialized cache file 399 | .idea/caches/build_file_checksums.ser 400 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/intro-workshop.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction to Weaviate - Online Workshop 2 | 3 | This repository contains files related to the "Introduction to Weaviate" workshops, which we run periodically for an enthusiastic audience. 4 | 5 | The repository is live, and will be updated with new content as we run more workshops and iterate from your feedback. 6 | 7 | You can run the code using an online environment such as Google Colab, or locally. 8 | 9 | ## Notebooks explained 10 | 11 | The notebooks are numbered based on the order in which they are used in the workshop. The notebooks are as follows: 12 | 13 | - `workshop_clean.ipynb`: A blank notebook for the audience to follow along with the instructor 14 | - `workshop.ipynb`: A pre-filled version of the notebook above, with the code already written 15 | - `z_optional_explain_vectors.ipynb`: An optional notebook for explaining how vectors represent meaning 16 | 17 | ### Weaviate Cloud Services 18 | 19 | Some of the workshop code and examples are written around Weaviate Cloud Services (WCS). 20 | You can sign up to WCS and create a free sandbox instance at `https://console.weaviate.cloud`. 21 | -------------------------------------------------------------------------------- /data/explore_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from pathlib import Path\n", 11 | "import json\n", 12 | "\n", 13 | "data_file = Path(\"tmdb_data_1950_2024.json\")\n", 14 | "data = json.loads(data_file.read_text())" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "year_dfs = list()\n", 24 | "for k, v in data.items():\n", 25 | " tmp_df = pd.DataFrame(v)\n", 26 | " tmp_df[\"year\"] = k\n", 27 | " year_dfs.append(tmp_df)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "df = pd.concat(year_dfs)\n", 37 | "df = df[df[\"vote_count\"] > 2000]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "assert len(df[df[\"adult\"] == True]) == 0" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 5, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "df.drop(columns=[\"adult\"], inplace=True)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 6, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | "
backdrop_pathgenre_idsidoriginal_languageoriginal_titleoverviewpopularityposter_pathrelease_datetitlevideovote_averagevote_countyear
0/rH0DPF7pB35jxLxKb3JRUgCrrnp.jpg[10751, 14, 16, 10749]11224enCinderellaCinderella has faith her dreams of a better li...100.819/avz6S9HYWs4O8Oe4PenBFNX4uDi.jpg1950-02-22CinderellaFalse7.04465231950
1/p47ihFj4A7EpBjmPHdTj4ipyq1S.jpg[18]599enSunset BoulevardA hack screenwriter writes a screenplay for a ...57.740/sC4Dpmn87oz9AuxZ15Lmip0Ftgr.jpg1950-08-10Sunset BoulevardFalse8.31224851950
11/zyO6j74DKMWfp5snWg6Hwo0T3Mz.jpg[80, 18, 9648]548ja羅生門Brimming with action while incisively examinin...21.011/vL7Xw04nFMHwnvXRFCmYYAzMUvY.jpg1950-08-26RashomonFalse8.09121211950
0/b4yiLlIFuiULuuLTxT0Pt1QyT6J.jpg[16, 10751, 14, 12]12092enAlice in WonderlandOn a golden afternoon, young Alice follows a W...75.465/20cvfwfaFqNbe9Fc3VEHJuPRxmn.jpg1951-07-28Alice in WonderlandFalse7.20056971951
0/mxf8hJJkHTCqZP3m4o8E1TtwHHs.jpg[35, 10749]872enSingin' in the RainIn 1927 Hollywood, a silent film production co...31.407/w03EiJVHP8Un77boQeE7hg9DVdU.jpg1952-04-09Singin' in the RainFalse8.20030361952
\n", 188 | "
" 189 | ], 190 | "text/plain": [ 191 | " backdrop_path genre_ids id \\\n", 192 | "0 /rH0DPF7pB35jxLxKb3JRUgCrrnp.jpg [10751, 14, 16, 10749] 11224 \n", 193 | "1 /p47ihFj4A7EpBjmPHdTj4ipyq1S.jpg [18] 599 \n", 194 | "11 /zyO6j74DKMWfp5snWg6Hwo0T3Mz.jpg [80, 18, 9648] 548 \n", 195 | "0 /b4yiLlIFuiULuuLTxT0Pt1QyT6J.jpg [16, 10751, 14, 12] 12092 \n", 196 | "0 /mxf8hJJkHTCqZP3m4o8E1TtwHHs.jpg [35, 10749] 872 \n", 197 | "\n", 198 | " original_language original_title \\\n", 199 | "0 en Cinderella \n", 200 | "1 en Sunset Boulevard \n", 201 | "11 ja 羅生門 \n", 202 | "0 en Alice in Wonderland \n", 203 | "0 en Singin' in the Rain \n", 204 | "\n", 205 | " overview popularity \\\n", 206 | "0 Cinderella has faith her dreams of a better li... 100.819 \n", 207 | "1 A hack screenwriter writes a screenplay for a ... 57.740 \n", 208 | "11 Brimming with action while incisively examinin... 21.011 \n", 209 | "0 On a golden afternoon, young Alice follows a W... 75.465 \n", 210 | "0 In 1927 Hollywood, a silent film production co... 31.407 \n", 211 | "\n", 212 | " poster_path release_date title video \\\n", 213 | "0 /avz6S9HYWs4O8Oe4PenBFNX4uDi.jpg 1950-02-22 Cinderella False \n", 214 | "1 /sC4Dpmn87oz9AuxZ15Lmip0Ftgr.jpg 1950-08-10 Sunset Boulevard False \n", 215 | "11 /vL7Xw04nFMHwnvXRFCmYYAzMUvY.jpg 1950-08-26 Rashomon False \n", 216 | "0 /20cvfwfaFqNbe9Fc3VEHJuPRxmn.jpg 1951-07-28 Alice in Wonderland False \n", 217 | "0 /w03EiJVHP8Un77boQeE7hg9DVdU.jpg 1952-04-09 Singin' in the Rain False \n", 218 | "\n", 219 | " vote_average vote_count year \n", 220 | "0 7.044 6523 1950 \n", 221 | "1 8.312 2485 1950 \n", 222 | "11 8.091 2121 1950 \n", 223 | "0 7.200 5697 1951 \n", 224 | "0 8.200 3036 1952 " 225 | ] 226 | }, 227 | "execution_count": 6, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "df.head()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 7, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "df.to_csv(\"movies.csv\", index=False)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 8, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/html": [ 253 | "
\n", 254 | "\n", 267 | "\n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | "
backdrop_pathgenre_idsidoriginal_languageoriginal_titleoverviewpopularityposter_pathrelease_datetitlevideovote_averagevote_countyear
1/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg[28, 878, 12]27205enInceptionCobb, a skilled thief who commits corporate es...235.098/oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg2010-07-15InceptionFalse8.368359182010
0/xJHokMbljvjADYdit5fK5VQsXEG.jpg[12, 18, 878]157336enInterstellarThe adventures of a group of explorers who mak...283.624/gEU2QniE6E77NI6lCU6MxlNBvIx.jpg2014-11-05InterstellarFalse8.436345942014
3/dqK9Hag1054tghRQSqLSfrkvQnA.jpg[18, 28, 80, 53]155enThe Dark KnightBatman raises the stakes in his war on crime. ...112.129/qJ2tW6WMUDux911r6m7haRef0WH.jpg2008-07-16The Dark KnightFalse8.516321202008
3/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg[28, 12, 14, 878]19995enAvatarIn the 22nd century, a paraplegic Marine is di...128.898/kyeqWdyUXW608qlYkRqosgbbJyK.jpg2009-12-15AvatarFalse7.582309792009
4/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg[878, 28, 12]24428enThe AvengersWhen an unexpected enemy emerges and threatens...172.761/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg2012-04-25The AvengersFalse7.714301622012
1/en971MEXui9diirXlogOrPKmsEn.jpg[28, 12, 35]293660enDeadpoolThe origin story of former Special Forces oper...299.745/3E53WEZJqP6aM84D8CckXx4pIHw.jpg2016-02-09DeadpoolFalse7.614300572016
4/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg[12, 28, 878]299536enAvengers: Infinity WarAs the Avengers and their allies have continue...240.754/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg2018-04-25Avengers: Infinity WarFalse8.246290392018
1/hZkgoQYus5vegHoetLkCJzb17zJ.jpg[18]550enFight ClubA ticking-time-bomb insomniac and a slippery s...135.882/pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg1999-10-15Fight ClubFalse8.440287031999
0/suaEOtk1N1sgg2MTM7oZd2cfVp3.jpg[53, 80]680enPulp FictionA burger-loving hit man, his philosophical par...261.651/d5iIlFn5s0ImszYzBPb8JPIfbXD.jpg1994-09-10Pulp FictionFalse8.488272731994
2/mzfx54nfDPTUXZOG48u4LaEheDy.jpg[35, 18, 10749]13enForrest GumpA man with a low IQ has accomplished great thi...159.372/arw2vcBveWOVZr6pxd9XTd1TdQa.jpg1994-06-23Forrest GumpFalse8.475267901994
\n", 460 | "
" 461 | ], 462 | "text/plain": [ 463 | " backdrop_path genre_ids id \\\n", 464 | "1 /8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg [28, 878, 12] 27205 \n", 465 | "0 /xJHokMbljvjADYdit5fK5VQsXEG.jpg [12, 18, 878] 157336 \n", 466 | "3 /dqK9Hag1054tghRQSqLSfrkvQnA.jpg [18, 28, 80, 53] 155 \n", 467 | "3 /vL5LR6WdxWPjLPFRLe133jXWsh5.jpg [28, 12, 14, 878] 19995 \n", 468 | "4 /9BBTo63ANSmhC4e6r62OJFuK2GL.jpg [878, 28, 12] 24428 \n", 469 | "1 /en971MEXui9diirXlogOrPKmsEn.jpg [28, 12, 35] 293660 \n", 470 | "4 /mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg [12, 28, 878] 299536 \n", 471 | "1 /hZkgoQYus5vegHoetLkCJzb17zJ.jpg [18] 550 \n", 472 | "0 /suaEOtk1N1sgg2MTM7oZd2cfVp3.jpg [53, 80] 680 \n", 473 | "2 /mzfx54nfDPTUXZOG48u4LaEheDy.jpg [35, 18, 10749] 13 \n", 474 | "\n", 475 | " original_language original_title \\\n", 476 | "1 en Inception \n", 477 | "0 en Interstellar \n", 478 | "3 en The Dark Knight \n", 479 | "3 en Avatar \n", 480 | "4 en The Avengers \n", 481 | "1 en Deadpool \n", 482 | "4 en Avengers: Infinity War \n", 483 | "1 en Fight Club \n", 484 | "0 en Pulp Fiction \n", 485 | "2 en Forrest Gump \n", 486 | "\n", 487 | " overview popularity \\\n", 488 | "1 Cobb, a skilled thief who commits corporate es... 235.098 \n", 489 | "0 The adventures of a group of explorers who mak... 283.624 \n", 490 | "3 Batman raises the stakes in his war on crime. ... 112.129 \n", 491 | "3 In the 22nd century, a paraplegic Marine is di... 128.898 \n", 492 | "4 When an unexpected enemy emerges and threatens... 172.761 \n", 493 | "1 The origin story of former Special Forces oper... 299.745 \n", 494 | "4 As the Avengers and their allies have continue... 240.754 \n", 495 | "1 A ticking-time-bomb insomniac and a slippery s... 135.882 \n", 496 | "0 A burger-loving hit man, his philosophical par... 261.651 \n", 497 | "2 A man with a low IQ has accomplished great thi... 159.372 \n", 498 | "\n", 499 | " poster_path release_date title \\\n", 500 | "1 /oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg 2010-07-15 Inception \n", 501 | "0 /gEU2QniE6E77NI6lCU6MxlNBvIx.jpg 2014-11-05 Interstellar \n", 502 | "3 /qJ2tW6WMUDux911r6m7haRef0WH.jpg 2008-07-16 The Dark Knight \n", 503 | "3 /kyeqWdyUXW608qlYkRqosgbbJyK.jpg 2009-12-15 Avatar \n", 504 | "4 /RYMX2wcKCBAr24UyPD7xwmjaTn.jpg 2012-04-25 The Avengers \n", 505 | "1 /3E53WEZJqP6aM84D8CckXx4pIHw.jpg 2016-02-09 Deadpool \n", 506 | "4 /7WsyChQLEftFiDOVTGkv3hFpyyt.jpg 2018-04-25 Avengers: Infinity War \n", 507 | "1 /pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg 1999-10-15 Fight Club \n", 508 | "0 /d5iIlFn5s0ImszYzBPb8JPIfbXD.jpg 1994-09-10 Pulp Fiction \n", 509 | "2 /arw2vcBveWOVZr6pxd9XTd1TdQa.jpg 1994-06-23 Forrest Gump \n", 510 | "\n", 511 | " video vote_average vote_count year \n", 512 | "1 False 8.368 35918 2010 \n", 513 | "0 False 8.436 34594 2014 \n", 514 | "3 False 8.516 32120 2008 \n", 515 | "3 False 7.582 30979 2009 \n", 516 | "4 False 7.714 30162 2012 \n", 517 | "1 False 7.614 30057 2016 \n", 518 | "4 False 8.246 29039 2018 \n", 519 | "1 False 8.440 28703 1999 \n", 520 | "0 False 8.488 27273 1994 \n", 521 | "2 False 8.475 26790 1994 " 522 | ] 523 | }, 524 | "execution_count": 8, 525 | "metadata": {}, 526 | "output_type": "execute_result" 527 | } 528 | ], 529 | "source": [ 530 | "df.sort_values(\"vote_count\", ascending=False).head(10)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [] 539 | } 540 | ], 541 | "metadata": { 542 | "kernelspec": { 543 | "display_name": "venv", 544 | "language": "python", 545 | "name": "python3" 546 | }, 547 | "language_info": { 548 | "codemirror_mode": { 549 | "name": "ipython", 550 | "version": 3 551 | }, 552 | "file_extension": ".py", 553 | "mimetype": "text/x-python", 554 | "name": "python", 555 | "nbconvert_exporter": "python", 556 | "pygments_lexer": "ipython3", 557 | "version": "3.10.12" 558 | } 559 | }, 560 | "nbformat": 4, 561 | "nbformat_minor": 2 562 | } 563 | -------------------------------------------------------------------------------- /data/winemag_tiny.json: -------------------------------------------------------------------------------- 1 | [{"country":"Argentina","description":"Vegetal-leaning, stewy aromas of plum and jammy berry are saucy. This feels sticky and low in acidity. Grassy, herbal plum and berry flavors are a bit green on the finish.","designation":"Misterio","points":83,"price":10.0,"province":"Mendoza Province","region_1":"Mendoza","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Finca Flichman 2015 Misterio Malbec (Mendoza)","variety":"Malbec","winery":"Finca Flichman","desc_len":171},{"country":"US","description":"Dark-fruit flavors reach a high level of ripeness in this full-bodied wine while a firm texture of supportive acidity and moderate tannins keep it balanced. It's tight and concentrated now, so best to drink after 2019.","designation":"0","points":91,"price":49.0,"province":"California","region_1":"Anderson Valley","region_2":"0","taster_name":"Jim Gordon","taster_twitter_handle":"@gordone_cellars","title":"Bink 2014 Pinot Noir (Anderson Valley)","variety":"Pinot Noir","winery":"Bink","desc_len":218},{"country":"Austria","description":"With notions of cherry and cinnamon on the nose and just slight fizz, this is a refreshing, fruit-driven sparkling ros\u00e9 that's full of strawberry and cherry notes\u2014it might just be the very definition of easy summer wine. It ends dry, yet refreshing.","designation":"Frizzante Ros\u00e9","points":85,"price":21.0,"province":"\u00d6sterreichischer Perlwein","region_1":"0","region_2":"0","taster_name":"Anne Krebiehl\u00a0MW","taster_twitter_handle":"@AnneInVino","title":"Gebeshuber 2013 Frizzante Ros\u00e9 Pinot Noir (\u00d6sterreichischer Perlwein)","variety":"Pinot Noir","winery":"Gebeshuber","desc_len":249},{"country":"US","description":"The unique flavors of this special block match pretty red-berry fruit to aromatic suggestions of potpourri and bouquet garni. It's detailed and nicely knitted together, with a pleasing hint of coconut carrying through the finish.","designation":"Southeast Block","points":93,"price":56.0,"province":"Oregon","region_1":"Eola-Amity Hills","region_2":"Willamette Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Bethel Heights 2014 Southeast Block Pinot Noir (Eola-Amity Hills)","variety":"Pinot Noir","winery":"Bethel Heights","desc_len":229},{"country":"US","description":"This rich and round, Rh\u00f4ne Ranger red is loaded with fruit. The flavors pile on, one after another, beginning with citrus, banana and orange, and moving into berry and cherry and currant. It's underscored with moderate acidity and hints of pepper and tea.","designation":"Red Roan Red Wine","points":88,"price":30.0,"province":"Washington","region_1":"Columbia Valley (WA)","region_2":"Columbia Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Tagaris 2005 Red Roan Red Wine Red (Columbia Valley (WA))","variety":"Red Blend","winery":"Tagaris","desc_len":255},{"country":"Italy","description":"Made mostly with Sangiovese and topped up with a small amount of Canaiolo and Malvasia Nera, this offers aromas that recall red berry, dark spice, menthol and a whiff of moist earth. The easygoing, tangy palate offers wild cherry, orange zest and a hint of clove alongside zesty acidity. Enjoy through 2016.","designation":"0","points":86,"price":0.0,"province":"Tuscany","region_1":"Chianti Classico","region_2":"0","taster_name":"Kerin O\u2019Keefe","taster_twitter_handle":"@kerinokeefe","title":"Casa Emma 2013 Chianti Classico","variety":"Red Blend","winery":"Casa Emma","desc_len":307},{"country":"Italy","description":"Aromas of toasted nut and dried stone fruit lead the nose on this structured white. On the palate, dried herb and a mineral note accent the core of yellow apple and citrus. Fresh acidity generates a clean finish.","designation":"0","points":88,"price":32.0,"province":"Northeastern Italy","region_1":"Friuli Isonzo","region_2":"0","taster_name":"Kerin O\u2019Keefe","taster_twitter_handle":"@kerinokeefe","title":"Borgo San Daniele 2013 Friulano (Friuli Isonzo)","variety":"Friulano","winery":"Borgo San Daniele","desc_len":212},{"country":"US","description":"Rich in leather and oak, with swirls of ripe red raspberry and blackberry cobbler, this is a big-boy Zinfandel with hair on its chest, a taste of vanilla on the finish.","designation":"Old Vine","points":85,"price":16.0,"province":"California","region_1":"Lodi","region_2":"Central Valley","taster_name":"Virginie Boone","taster_twitter_handle":"@vboone","title":"Seven Deadly Zins 2011 Old Vine Zinfandel (Lodi)","variety":"Zinfandel","winery":"Seven Deadly Zins","desc_len":168},{"country":"Italy","description":"Ripe and full bodied, this has aromas of lightly toasted oak, vanilla, red berry and coffee. On the palate, notes of mocha, anisette and grilled herb accent crushed wild cherry while bracing tannins provide structure. It closes on a licorice note.","designation":"Archineri Rosso","points":90,"price":35.0,"province":"Sicily & Sardinia","region_1":"Etna","region_2":"0","taster_name":"Kerin O\u2019Keefe","taster_twitter_handle":"@kerinokeefe","title":"Pietradolce 2012 Archineri Rosso (Etna)","variety":"Nerello Mascalese","winery":"Pietradolce","desc_len":247},{"country":"Argentina","description":"This warm-climate Chardonnay displays aromas of baked apple, toast and spice. It's resiny and heavy in the mouth, but there's enough acidity to maintain balance. Flavors of honeyed peach, baked apple, cinnamon and white pepper finish in tropical fashion, where banana is the lasting fruit note.","designation":"Bramare Marchiori Vineyard","points":88,"price":46.0,"province":"Mendoza Province","region_1":"Perdriel","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Vi\u00f1a Cobos 2012 Bramare Marchiori Vineyard Chardonnay (Perdriel)","variety":"Chardonnay","winery":"Vi\u00f1a Cobos","desc_len":294},{"country":"US","description":"Full bodied and softly tannic, this wine's fruit is front and center, wrapped in a subtle jacket of sweet oak. It dazzles with blackberries, cherries, raspberries, currants and chocolate flavors. Drink now for its sheer lusciousness.","designation":"0","points":90,"price":32.0,"province":"California","region_1":"Napa Valley","region_2":"Napa","taster_name":"0","taster_twitter_handle":"0","title":"White Oak 2009 Cabernet Sauvignon (Napa Valley)","variety":"Cabernet Sauvignon","winery":"White Oak","desc_len":233},{"country":"France","description":"This wine comes from a vineyard, created in 1894, in which the vines are planted against walls to get the maximum reflected heat. It is a majestic wine\u2014a smooth wave, opulent and rich with intense acidity and just a hint of spice. The wine is just ready to drink, although it will be even more impressive from 2018.","designation":"Clos d'Entre les Murs","points":95,"price":120.0,"province":"Loire Valley","region_1":"Saumur","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Ch\u00e2teau de Parnay 2011 Clos d'Entre les Murs (Saumur)","variety":"Chenin Blanc","winery":"Ch\u00e2teau de Parnay","desc_len":315},{"country":"France","description":"This firmly structured, beautifully crafted wine has a solid backdrop of tannins and plenty of concentration. Beyond that, it is already well balanced, bringing together its dark texture and blackberry fruit flavors with generous acidity. It needs to age, drink from 2020.","designation":"0","points":94,"price":0.0,"province":"Bordeaux","region_1":"Margaux","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Ch\u00e2teau Rauzan-S\u00e9gla 2012 Margaux","variety":"Bordeaux-style Red Blend","winery":"Ch\u00e2teau Rauzan-S\u00e9gla","desc_len":272},{"country":"France","description":"This wine has some richness, with ripe black currants and balanced acidity. The wood-aging just shows but, again, is balanced. With its fresh fruitiness, the wine will age quickly, so drink from 2017.","designation":"0","points":88,"price":45.0,"province":"Bordeaux","region_1":"Haut-M\u00e9doc","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Ch\u00e2teau Balac 2013 Haut-M\u00e9doc","variety":"Bordeaux-style Red Blend","winery":"Ch\u00e2teau Balac","desc_len":200},{"country":"France","description":"Firm tannins make this wine solid and dry. It has banana and bright cherry fruits well integrated with the acidity. The aftertaste suggests the wine could benefit from a few more months before drinking, so wait until 2017.","designation":"0","points":85,"price":15.0,"province":"Beaujolais","region_1":"Beaujolais-Villages","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Domaine Foretal 2015 Beaujolais-Villages","variety":"Gamay","winery":"Domaine Foretal","desc_len":222},{"country":"US","description":"There is a delicious, citrusy kick to this wine, lifting the bright berry fruit flavors. The flavors pile on, with peppery spice and chocolate, and a complex midpalate that tastes like cherry cola.","designation":"Guadalupe Vineyard","points":92,"price":40.0,"province":"Oregon","region_1":"Willamette Valley","region_2":"0","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Sol\u00e9na 2009 Guadalupe Vineyard Pinot Noir","variety":"Pinot Noir","winery":"Sol\u00e9na","desc_len":197},{"country":"France","description":"This ripe wine with its red fruits and well-balanced texture is beautifully ready to drink, with just the right crisp acidity to balance the soft richness. There is no sign yet of maturity, the fruit from this great vintage showing at its perfumed best. Drink the wine now.","designation":"Ros\u00e9 Brut","points":94,"price":130.0,"province":"Champagne","region_1":"Champagne","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Pol Roger 2008 Ros\u00e9 Brut (Champagne)","variety":"Champagne Blend","winery":"Pol Roger","desc_len":273},{"country":"US","description":"More than three quarters Syrah, this is made from young vines planted in the estate vineyard in the extreme western Yakima Valley. With such new vines it is difficult to point to specific varietal character. It's tart and lightly fruity, with a pleasant spicy edge.","designation":"Estate Syrah-Mourv\u00e8dre","points":85,"price":29.0,"province":"Washington","region_1":"Columbia Valley (WA)","region_2":"Columbia Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Wilridge 2009 Estate Syrah-Mourv\u00e8dre Syrah-Mourv\u00e8dre (Columbia Valley (WA))","variety":"Syrah-Mourv\u00e8dre","winery":"Wilridge","desc_len":265},{"country":"Spain","description":"This is a bit clipped on the nose, with notes of saline, seashell, metal, leather and earthy blackberry. The palate feels wide and a touch wayward, while leathery berry and plum flavors include a note of mushroom. Lemony oak and herbal notes rise up on the finish.","designation":"Embocadero","points":89,"price":22.0,"province":"Northern Spain","region_1":"Ribera del Duero","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Bodega San Pedro Regalado 2011 Embocadero (Ribera del Duero)","variety":"Tempranillo","winery":"Bodega San Pedro Regalado","desc_len":264},{"country":"US","description":"Labeled a \u201cSuper Tuscan-Style Blend\u201d this is 36% Syrah, 45% Sangiovese and 18% Cabernet Sauvignon, all from Red Mountain vineyards. It opens gracefully with pretty raspberry and cherry fruit, then grounds itself in a base of mineral and earth, enlivened with natural acidity.","designation":"Flying Colors","points":89,"price":29.0,"province":"Washington","region_1":"Red Mountain","region_2":"Columbia Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Genoa 2011 Flying Colors Red (Red Mountain)","variety":"Red Blend","winery":"Genoa","desc_len":275},{"country":"Hungary","description":"This dry Furmint is brilliant gold in color, with notes of Bartlett pear, Granny Smith apple, and lime zest in the complex bouquet. These aromas transfer seamlessly onto the palate as vibrant fruit flavors, with round body and bracing minerality. Delightful with food or by itself as an ap\u00e9ritif.","designation":"Szent Tam\u00e1s Betsek Vineyards","points":93,"price":25.0,"province":"Tokaji","region_1":"0","region_2":"0","taster_name":"Jeff Jenssen","taster_twitter_handle":"@worldwineguys","title":"Dobog\u00f3 2011 Szent Tam\u00e1s Betsek Vineyards (Tokaji)","variety":"Furmint","winery":"Dobog\u00f3","desc_len":296},{"country":"US","description":"Not at all a typical Sauvignon Blanc, this smells like apricot and honeysuckle and tastes like marmalade. It is dry, yet tastes like a late-harvest dessert wine. Expect a little taste adventure here.","designation":"0","points":85,"price":22.0,"province":"California","region_1":"Sierra Foothills","region_2":"Sierra Foothills","taster_name":"Jim Gordon","taster_twitter_handle":"@gordone_cellars","title":"Schmitz 24 Brix 2012 Sauvignon Blanc (Sierra Foothills)","variety":"Sauvignon Blanc","winery":"Schmitz 24 Brix","desc_len":199},{"country":"Spain","description":"This wine's classic Albari\u00f1o aromas of white flowers and stone fruit are simple and nice. It feels round and true but a touch flat. Thompson grape, melon and ripe apple flavors hold steady on the finish. Drink now.","designation":"0","points":88,"price":14.0,"province":"Galicia","region_1":"R\u00edas Baixas","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Condes de Albarei 2016 Albari\u00f1o (R\u00edas Baixas)","variety":"Albari\u00f1o","winery":"Condes de Albarei","desc_len":214},{"country":"France","description":"The Layon valley vineyards produce deliciously honeyed wines like this one. There is botrytis here that gives a light texture to the wine, along with apricot fruit. With this richness, there is also refreshing crisp acidity. Worth aging 4\u20135 years but already delicious.","designation":"Le Savetier","points":90,"price":25.0,"province":"Loire Valley","region_1":"Coteaux du Layon","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Ch\u00e2teau la Vari\u00e8re 2011 Le Savetier Chenin Blanc (Coteaux du Layon)","variety":"Chenin Blanc","winery":"Ch\u00e2teau la Vari\u00e8re","desc_len":269},{"country":"US","description":"Roasted coffee bean, ember and spice aromas take the lead on this blend. The flavors display the same, showing a surprising amount of restraint for the vintage.","designation":"Abracadabra","points":88,"price":22.0,"province":"Washington","region_1":"Columbia Valley (WA)","region_2":"Columbia Valley","taster_name":"Sean P. Sullivan","taster_twitter_handle":"@wawinereport","title":"Brian Carter Cellars 2013 Abracadabra Red (Columbia Valley (WA))","variety":"Red Blend","winery":"Brian Carter Cellars","desc_len":160},{"country":"Italy","description":"Here's a very attractive and seductive Barolo with chiseled mineral tones that really help the wine stand out in a crowd. Layers of graphite and slate give purity, linearity and clarity to this austere and elegant expression. Earthy tones, as well as forest berry flavors, appear on the long finish.","designation":"Costa di Bussia","points":92,"price":50.0,"province":"Piedmont","region_1":"Barolo","region_2":"0","taster_name":"0","taster_twitter_handle":"0","title":"Tenuta Arnulfo 2006 Costa di Bussia (Barolo)","variety":"Nebbiolo","winery":"Tenuta Arnulfo","desc_len":299},{"country":"US","description":"It would be a pity to open this wine anytime before, say, 2010, because it needs age. Now, and for a while, it will taste closed and tannic and raw. But such are the tannins and acids and overall balance, and the core of blackberries and cherries is so rich, that it should effortlessly negotiate the next decade.","designation":"0","points":92,"price":90.0,"province":"California","region_1":"Spring Mountain District","region_2":"Napa","taster_name":"0","taster_twitter_handle":"0","title":"Vineyard 7&8 2005 Cabernet Sauvignon (Spring Mountain District)","variety":"Cabernet Sauvignon","winery":"Vineyard 7&8","desc_len":313},{"country":"Hungary","description":"This wine is made from 100% Furmint and has an intriguing bouquet of jasmine and lemon blossom. It is crisp and acidic on the palate with pleasant flavors of lemon zest and lime juice.","designation":"0","points":89,"price":25.0,"province":"Tokaj","region_1":"0","region_2":"0","taster_name":"Jeff Jenssen","taster_twitter_handle":"@worldwineguys","title":"Kikelet 2015 Furmint (Tokaj)","variety":"Furmint","winery":"Kikelet","desc_len":184},{"country":"US","description":"This is a good wine, fancy and dry, with plenty of acidity. The citrus, peach and gooseberry flavors have an edge of new French oak. Hall bought this vineyard from Iron Horse, who used to blend some Viognier into the wine, with excellent results.","designation":"T Bar T Ranch","points":87,"price":30.0,"province":"California","region_1":"Alexander Valley","region_2":"Sonoma","taster_name":"0","taster_twitter_handle":"0","title":"Hall 2012 T Bar T Ranch Sauvignon Blanc (Alexander Valley)","variety":"Sauvignon Blanc","winery":"Hall","desc_len":246},{"country":"Italy","description":"This gorgeous Barolo has a penetrating, dark color and generous aromas of black cherry, tar, leather, tobacco, spice, black truffle and Spanish cedar. The intensity and power is impressive, and the mouthfeel is packed tight with ripe fruit flavors and a bold structure. Hold for 10\u201320 years to let those tannins evolve.","designation":"Le Vigne","points":94,"price":154.0,"province":"Piedmont","region_1":"Barolo","region_2":"0","taster_name":"0","taster_twitter_handle":"0","title":"Luciano Sandrone 2007 Le Vigne (Barolo)","variety":"Nebbiolo","winery":"Luciano Sandrone","desc_len":319},{"country":"US","description":"Although the blend includes 20% Primitivo and 5% Petite Sirah, which might be expected to add some tannic muscle to the wine, this is a simple red quaffer, with light raspberry fruit and a hint of milk chocolate.","designation":"Kubli Bench","points":84,"price":24.0,"province":"Oregon","region_1":"Applegate Valley","region_2":"Southern Oregon","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Troon 2009 Kubli Bench Zinfandel (Applegate Valley)","variety":"Zinfandel","winery":"Troon","desc_len":212},{"country":"US","description":"A chocolaty, fragrant wine \u201474% Merlot, 15% Cab Franc, 6% Malbec, 5% Petit Verdot\u2014with interesting spice highlights throughout the nose and mouth. The fruit is a mix of Bing cherry and strawberry, washed in a lush barrel-induced mix of chocolate, mocha, espresso and clove. Finishes a little hot, with alcohol listed at 14.6%.","designation":"DC3","points":88,"price":24.0,"province":"Washington","region_1":"Walla Walla Valley (WA)","region_2":"Columbia Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Dynasty Cellars 2007 DC3 Meritage (Walla Walla Valley (WA))","variety":"Meritage","winery":"Dynasty Cellars","desc_len":326},{"country":"US","description":"Easton's estate Zin is big and lusty in blackberry, currant, licorice and spice flavors that are wrapped into sturdy tannins. It's a little on the superextracted, sweetly jammy side that emphasizes the fruit and pushes the wine's structural elements to the sideline. Could be an ager.","designation":"Estate Bottled","points":87,"price":30.0,"province":"California","region_1":"Shenandoah Valley (CA)","region_2":"Sierra Foothills","taster_name":"0","taster_twitter_handle":"0","title":"Easton 2005 Estate Bottled Zinfandel (Shenandoah Valley (CA))","variety":"Zinfandel","winery":"Easton","desc_len":284},{"country":"Portugal","description":"This is a spicy earthy wine with its high percentage of Alicante Bouschet giving rich tannins and a dark dense texture. The wine is packed with dense fruit and power, perhaps erring on the side of too much, with a pepper edge to the concentrated black fruit. Drink now.","designation":"Sem Barrica Unoaked","points":88,"price":22.0,"province":"Alentejano","region_1":"0","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Herdade das Servas 2015 Sem Barrica Unoaked Red (Alentejano)","variety":"Portuguese Red","winery":"Herdade das Servas","desc_len":269},{"country":"Argentina","description":"Far less ripe and appealing than the 2004 version. The '05 comes out of the chute smelling malty and lactic; airing reveals strawberry milk notes, while the flavors struggle to express themselves. Sticky, oily and as if someone hit the mute button along the way. Where's the ripe, lush quality we've come to love about Argentinean Malbec?","designation":"D.O.C Single Vineyard","points":84,"price":22.0,"province":"Mendoza Province","region_1":"Luj\u00e1n de Cuyo","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Luigi Bosca 2005 D.O.C Single Vineyard Malbec (Luj\u00e1n de Cuyo)","variety":"Malbec","winery":"Luigi Bosca","desc_len":338},{"country":"US","description":"Austere on the nose, and light on the palate, this vineyard-designated expression of this Rh\u00f4ne variety works in its full embrace of succulent pear, peach and tart citrus. Given time in neutral oak, it has enough body to remain meaningful in the glass around dry, focused acidity.","designation":"Haug Vineyard","points":91,"price":35.0,"province":"California","region_1":"Rutherford","region_2":"Napa","taster_name":"Virginie Boone","taster_twitter_handle":"@vboone","title":"Muddy Arch 2015 Haug Vineyard Roussanne (Rutherford)","variety":"Roussanne","winery":"Muddy Arch","desc_len":280},{"country":"Italy","description":"This Amarone has forward aromas of ripe black fruit and spice. The palate delivers rich black cherry flavors layered with chocolate, black pepper and hints of espresso. It's robust, with great depth and a velvety texture.","designation":"Ca' Florian","points":92,"price":0.0,"province":"Veneto","region_1":"Amarone della Valpolicella Classico","region_2":"0","taster_name":"Kerin O\u2019Keefe","taster_twitter_handle":"@kerinokeefe","title":"Tommasi 2007 Ca' Florian (Amarone della Valpolicella Classico)","variety":"Red Blend","winery":"Tommasi","desc_len":221},{"country":"Austria","description":"Beautifully perfumed, with acidity, white fruits and a mineral context. The wine is layered with citrus and lime, hints of fresh pineapple acidity. Screw cap.","designation":"Steinterrassen","points":89,"price":27.0,"province":"Kremstal","region_1":"0","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Stadt Krems 2009 Steinterrassen Riesling (Kremstal)","variety":"Riesling","winery":"Stadt Krems","desc_len":158},{"country":"US","description":"Whiffs of tea leaves and tobacco lend earthiness to this slightly savory but exotic appealing ros\u00e9. There's plenty of red-cherry and raspberry flavor, but it's slightly muted, embraced in shrouds of Cabernet Franc's brambly leafy character. It's a unique, full-bodied ros\u00e9 to try over dinner with poultry or other white meats.","designation":"Island Sunset Cabernet Franc","points":88,"price":16.0,"province":"New York","region_1":"New York","region_2":"New York Other","taster_name":"Anna Lee C. Iijima","taster_twitter_handle":"0","title":"Adirondack Winery 2015 Island Sunset Cabernet Franc Ros\u00e9 (New York)","variety":"Ros\u00e9","winery":"Adirondack Winery","desc_len":326},{"country":"Italy","description":"Surprisingly bright for the vintage and displaying Gaja's trademark elegance, this offers aromas of blue flower, perfumed berry, cake spices and a whiff of leather. The ripe palate delivers mature black cherry layered with white pepper and cinnamon alongside supple tannins. Drink 2016\u20132026.","designation":"Costa Russi","points":93,"price":500.0,"province":"Piedmont","region_1":"Langhe","region_2":"0","taster_name":"Kerin O\u2019Keefe","taster_twitter_handle":"@kerinokeefe","title":"Gaja 2011 Costa Russi Red (Langhe)","variety":"Red Blend","winery":"Gaja","desc_len":291},{"country":"US","description":"Dry, smooth and classy, with rich chocolate, blackberry and cherry pie, anise and oak flavors. A bit too soft and sweet for extended aging, but will provide pleasant drinking over the next six years.","designation":"Whitestone Vineyard","points":88,"price":40.0,"province":"California","region_1":"Central Coast","region_2":"Central Coast","taster_name":"0","taster_twitter_handle":"0","title":"Clos La Chance 2008 Whitestone Vineyard Cabernet Sauvignon (Central Coast)","variety":"Cabernet Sauvignon","winery":"Clos La Chance","desc_len":199},{"country":"Argentina","description":"Fruity on the nose, with a friendly mix of pineapple, apple, melon and powdered sugar aromas. Feels smooth and round but not heavy, with good acidity and likable, simple flavors of melon, banana and apple. There's a light note of toast on the finish.","designation":"0","points":87,"price":13.0,"province":"Mendoza Province","region_1":"Mendoza","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Alamos 2010 Chardonnay (Mendoza)","variety":"Chardonnay","winery":"Alamos","desc_len":250},{"country":"France","description":"Definite juiciness here, although the wine has a light and fresh character. The acidity and tannins are there to give the berry fruit a boost.","designation":"0","points":86,"price":13.0,"province":"Southwest France","region_1":"Cahors","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Ch\u00e2teau Saint-Sernin 2007 Malbec (Cahors)","variety":"Malbec","winery":"Ch\u00e2teau Saint-Sernin","desc_len":142},{"country":"Argentina","description":"Black plum, cola and chocolate define this textbook Malbec. The wine has body, heft and lots of fruit, but also some complexity and character. Flavors of black cherry, plum and cassis are layered and fairly boisterous, while the mouthfeel is firm and just slightly tannic. Just right for red meats and pastas.","designation":"Premium","points":89,"price":12.0,"province":"Mendoza Province","region_1":"Luj\u00e1n de Cuyo","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Piattelli 2005 Premium Malbec (Luj\u00e1n de Cuyo)","variety":"Malbec","winery":"Piattelli","desc_len":309},{"country":"Argentina","description":"Generous, sizable aromas of cassis, blackberry and violet are inviting. This is juicy, with slightly wiry acidity creating a racy mouthfeel. Flavors of berry and cassis are loamy on their own but ride comfortably on a wave of tartaric acidity, while the finish is lifted and lively, with black-fruit flavors.","designation":"0","points":91,"price":27.0,"province":"Mendoza Province","region_1":"Mendoza","region_2":"0","taster_name":"Michael Schachner","taster_twitter_handle":"@wineschach","title":"Do\u00f1a Silvina 2011 Malbec (Mendoza)","variety":"Malbec","winery":"Do\u00f1a Silvina","desc_len":308},{"country":"US","description":"In Spain, Albari\u00f1o is usually a steely, bone dry, sea-inflected white wine. Here it is made in an off-dry style, with 10 g\/L residual sugar. The sweetness masks any apparent varietal specificity, resulting in a pleasant, but generic white wine.","designation":"0","points":86,"price":22.0,"province":"Washington","region_1":"Columbia Valley (WA)","region_2":"Columbia Valley","taster_name":"Paul Gregutt","taster_twitter_handle":"@paulgwine\u00a0","title":"Castillo De Feliciana 2012 Albari\u00f1o (Columbia Valley (WA))","variety":"Albari\u00f1o","winery":"Castillo De Feliciana","desc_len":244},{"country":"US","description":"Sheer on the nose and palate with flavors of lemon and lime juice accented by hints of salted nuts and brine. These flavors are savory and clean, but could use a boost of concentration and depth.","designation":"0","points":83,"price":13.0,"province":"New York","region_1":"New York","region_2":"New York Other","taster_name":"Anna Lee C. Iijima","taster_twitter_handle":"0","title":"Brotherhood 2010 Chardonnay (New York)","variety":"Chardonnay","winery":"Brotherhood","desc_len":195},{"country":"Portugal","description":"In a medium sweet style, the wine has a light prickle and fine citrus acidity to balance the soft freshness. It is a refreshing wine, totally drinkable immediately.","designation":"Santola","points":85,"price":8.0,"province":"Vinho Verde","region_1":"0","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Messias 2015 Santola White (Vinho Verde)","variety":"Portuguese White","winery":"Messias","desc_len":164},{"country":"France","description":"Classic Viognier apricot flavors are prominent in this ripe, full wine. It's as poised as it is rich\u2014full of fruit, with a warm, rounded aftertaste.","designation":"0","points":87,"price":10.0,"province":"France Other","region_1":"Vin de France","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"Maison de la Villette 2016 Viognier (Vin de France)","variety":"Viognier","winery":"Maison de la Villette","desc_len":148},{"country":"Portugal","description":"A soft, creamy wine full of apple and pear fruits that are edged with crisp acidity. It is ripe, but still young; drink this attractive wine from 2017.","designation":"Grand'Arte","points":87,"price":13.0,"province":"Lisboa","region_1":"0","region_2":"0","taster_name":"Roger Voss","taster_twitter_handle":"@vossroger","title":"DFJ Vinhos 2015 Grand'Arte Alvarinho (Lisboa)","variety":"Alvarinho","winery":"DFJ Vinhos","desc_len":151}] -------------------------------------------------------------------------------- /images/hybrid_search_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/hybrid_search_1.png -------------------------------------------------------------------------------- /images/hybrid_search_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/hybrid_search_2.png -------------------------------------------------------------------------------- /images/hybrid_search_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/hybrid_search_3.png -------------------------------------------------------------------------------- /images/hybrid_search_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/hybrid_search_4.png -------------------------------------------------------------------------------- /images/keyword_search_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/keyword_search_1.png -------------------------------------------------------------------------------- /images/keyword_search_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/keyword_search_2.png -------------------------------------------------------------------------------- /images/keyword_search_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/keyword_search_3.png -------------------------------------------------------------------------------- /images/object_import_process_byov.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/object_import_process_byov.png -------------------------------------------------------------------------------- /images/object_import_process_vectorizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/object_import_process_vectorizer.png -------------------------------------------------------------------------------- /images/vector_search_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/vector_search_1.png -------------------------------------------------------------------------------- /images/vector_search_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/vector_search_2.png -------------------------------------------------------------------------------- /images/vector_search_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/9b9c8c96c60e3f562099c87728856d0661921a0f/images/vector_search_3.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.8.0 3 | appnope==0.1.4 4 | argon2-cffi==23.1.0 5 | argon2-cffi-bindings==21.2.0 6 | arrow==1.3.0 7 | asttokens==3.0.0 8 | async-lru==2.0.4 9 | attrs==24.3.0 10 | Authlib==1.3.1 11 | babel==2.16.0 12 | beautifulsoup4==4.12.3 13 | bleach==6.2.0 14 | certifi==2024.12.14 15 | cffi==1.17.1 16 | charset-normalizer==3.4.1 17 | comm==0.2.2 18 | cryptography==44.0.0 19 | debugpy==1.8.12 20 | decorator==5.1.1 21 | defusedxml==0.7.1 22 | deprecation==2.1.0 23 | executing==2.1.0 24 | fastjsonschema==2.21.1 25 | fqdn==1.5.1 26 | grpcio==1.69.0 27 | grpcio-health-checking==1.69.0 28 | grpcio-tools==1.69.0 29 | h11==0.14.0 30 | httpcore==1.0.7 31 | httpx==0.28.1 32 | idna==3.10 33 | ipykernel==6.29.5 34 | ipython==8.31.0 35 | isoduration==20.11.0 36 | jedi==0.19.2 37 | Jinja2==3.1.5 38 | json5==0.10.0 39 | jsonpointer==3.0.0 40 | jsonschema==4.23.0 41 | jsonschema-specifications==2024.10.1 42 | jupyter-events==0.11.0 43 | jupyter-lsp==2.2.5 44 | jupyter_client==8.6.3 45 | jupyter_core==5.7.2 46 | jupyter_server==2.15.0 47 | jupyter_server_terminals==0.5.3 48 | jupyterlab==4.3.4 49 | jupyterlab_pygments==0.3.0 50 | jupyterlab_server==2.27.3 51 | markdown-it-py==3.0.0 52 | MarkupSafe==3.0.2 53 | matplotlib-inline==0.1.7 54 | mdurl==0.1.2 55 | mistune==3.1.0 56 | nbclient==0.10.2 57 | nbconvert==7.16.5 58 | nbformat==5.10.4 59 | nest-asyncio==1.6.0 60 | notebook_shim==0.2.4 61 | numpy==2.2.2 62 | overrides==7.7.0 63 | packaging==24.2 64 | pandas==2.2.3 65 | pandocfilters==1.5.1 66 | parso==0.8.4 67 | pexpect==4.9.0 68 | platformdirs==4.3.6 69 | prometheus_client==0.21.1 70 | prompt_toolkit==3.0.50 71 | protobuf==5.29.3 72 | psutil==6.1.1 73 | ptyprocess==0.7.0 74 | pure_eval==0.2.3 75 | pycparser==2.22 76 | pydantic==2.10.5 77 | pydantic_core==2.27.2 78 | Pygments==2.19.1 79 | python-dateutil==2.9.0.post0 80 | python-json-logger==3.2.1 81 | pytz==2024.2 82 | PyYAML==6.0.2 83 | pyzmq==26.2.0 84 | referencing==0.36.1 85 | requests==2.32.3 86 | rfc3339-validator==0.1.4 87 | rfc3986-validator==0.1.1 88 | rich==14.0.0 89 | rpds-py==0.22.3 90 | Send2Trash==1.8.3 91 | six==1.17.0 92 | sniffio==1.3.1 93 | soupsieve==2.6 94 | stack-data==0.6.3 95 | terminado==0.18.1 96 | tinycss2==1.4.0 97 | tornado==6.4.2 98 | traitlets==5.14.3 99 | types-python-dateutil==2.9.0.20241206 100 | typing_extensions==4.12.2 101 | tzdata==2024.2 102 | uri-template==1.3.0 103 | urllib3==2.3.0 104 | validators==0.34.0 105 | wcwidth==0.2.13 106 | weaviate-agents==0.8.2 107 | weaviate-client==4.15.0 108 | webcolors==24.11.1 109 | webencodings==0.5.1 110 | websocket-client==1.8.0 111 | -------------------------------------------------------------------------------- /z_optional_explain_vectors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "9468501f", 7 | "metadata": {}, 8 | "source": [ 9 | "\n", 10 | " \"Open\n", 11 | "" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "ccd5cf0e", 18 | "metadata": { 19 | "slideshow": { 20 | "slide_type": "skip" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import openai, os\n", 26 | "import numpy as np\n", 27 | "import seaborn as sns\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import plotly.express as px\n", 30 | "from sklearn.decomposition import PCA\n", 31 | "import pandas as pd\n", 32 | "\n", 33 | "openai_key = os.environ[\"OPENAI_APIKEY\"]\n", 34 | "openai.api_key = openai_key" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "d5117b44", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "skip" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "def get_emb(sent_inputs):\n", 49 | " oai_resp = openai.Embedding.create(\n", 50 | " input=sent_inputs,\n", 51 | " model=\"text-embedding-ada-002\"\n", 52 | " )\n", 53 | " return oai_resp" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "ff5c81eb", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "skip" 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def plot_embs(df_in):\n", 68 | " fig = px.scatter(df_in, \n", 69 | " template=\"ggplot2\",\n", 70 | " x=\"PC1\", y=\"PC2\", color=\"category\",\n", 71 | " hover_data=\"sentence\")\n", 72 | " fig.update_layout(\n", 73 | " margin=dict(\n", 74 | " l=20,\n", 75 | " r=20,\n", 76 | " b=20,\n", 77 | " t=20,\n", 78 | " pad=4\n", 79 | " )\n", 80 | " )\n", 81 | " fig.update_traces(marker_size=20)\n", 82 | " return fig" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "id": "a9874eb0", 89 | "metadata": { 90 | "slideshow": { 91 | "slide_type": "-" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "def plot_vectors(arr_in):\n", 97 | " pca = PCA(n_components=2)\n", 98 | " embeddings_pca = pca.fit_transform(arr_in)\n", 99 | "\n", 100 | " df = pd.DataFrame(embeddings_pca, columns=[\"PC1\", \"PC2\"])\n", 101 | " df[\"sentence\"] = sent_inputs\n", 102 | " df[\"category\"] = \"other\"\n", 103 | " df.loc[:4, \"category\"] = \"cats\"\n", 104 | " df.loc[5:9, \"category\"] = \"dogs\"\n", 105 | " fig = plot_embs(df)\n", 106 | " return fig" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "af386f2b", 113 | "metadata": { 114 | "slideshow": { 115 | "slide_type": "-" 116 | } 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "def add_new_emb(sents_in, arr_in, sent_inputs_in):\n", 121 | " resp = get_emb(sents_in)\n", 122 | " for d in resp[\"data\"]:\n", 123 | " arr_in = np.vstack([arr_in, d[\"embedding\"]])\n", 124 | " sent_inputs_in = sent_inputs_in + sents_in\n", 125 | " return arr_in, sent_inputs_in" 126 | ] 127 | }, 128 | { 129 | "attachments": {}, 130 | "cell_type": "markdown", 131 | "id": "f6f12e8b-9f4e-45b1-9221-f708e712f124", 132 | "metadata": { 133 | "slideshow": { 134 | "slide_type": "slide" 135 | } 136 | }, 137 | "source": [ 138 | "## Visual demo - vector embeddings" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "cbda975d", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "slide" 148 | } 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "sent_inputs = [\n", 153 | " # Cat-related sentences\n", 154 | " \"The Bengal showed off its striking coat pattern.\",\n", 155 | " \"A lion's powerful roar echoed through the plains.\",\n", 156 | " \"A leopard's spots provided perfect camouflage in the dappled light.\",\n", 157 | " \"A cheetah's unmatched speed allowed it to outrun its prey.\",\n", 158 | " \"The Sphynx basked in the warmth of its owner's lap.\",\n", 159 | " # Dog-related sentences \n", 160 | " \"The golden retriever chased after the frisbee.\",\n", 161 | " \"The playful puppy rolled in the grass.\",\n", 162 | " \"A loyal companion is always by your side.\",\n", 163 | " \"The Labrador retriever enjoyed playing in the water.\",\n", 164 | " \"The family adopted a furry friend from the shelter.\" \n", 165 | "]\n", 166 | "\n", 167 | "resp = get_emb(sent_inputs)\n", 168 | "arr = np.array([i[\"embedding\"] for i in resp[\"data\"]])" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "id": "e0396eb6", 175 | "metadata": { 176 | "slideshow": { 177 | "slide_type": "slide" 178 | } 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "fig = plot_vectors(arr)\n", 183 | "fig.show()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "acb69a06", 190 | "metadata": { 191 | "slideshow": { 192 | "slide_type": "slide" 193 | } 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "arr, sent_inputs = add_new_emb(\n", 198 | " [\"Who doesn't love going to the Alps in the summer?\"], \n", 199 | " arr, \n", 200 | " sent_inputs\n", 201 | ")" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "56e5934f", 208 | "metadata": { 209 | "slideshow": { 210 | "slide_type": "subslide" 211 | } 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "fig = plot_vectors(arr)\n", 216 | "fig.show()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "7d501f4f", 223 | "metadata": { 224 | "slideshow": { 225 | "slide_type": "slide" 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "new_sents = [\n", 231 | " \"The chef prepared a delicious vegetable stir-fry for dinner.\",\n", 232 | " \"The astronomer gazed at the distant stars, searching for undiscovered galaxies.\",\n", 233 | " \"The young artist skillfully painted a beautiful landscape on the canvas.\",\n", 234 | " \"John is loving taking his new puppy out for walks in the morning.\"\n", 235 | "]" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "id": "6b9410f0", 242 | "metadata": { 243 | "slideshow": { 244 | "slide_type": "slide" 245 | } 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "arr, sent_inputs = add_new_emb(new_sents, arr, sent_inputs)\n", 250 | "fig = plot_vectors(arr)\n", 251 | "fig.show()" 252 | ] 253 | } 254 | ], 255 | "metadata": { 256 | "celltoolbar": "Slideshow", 257 | "kernelspec": { 258 | "display_name": "Python 3 (ipykernel)", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.10.8" 273 | }, 274 | "varInspector": { 275 | "cols": { 276 | "lenName": 16, 277 | "lenType": 16, 278 | "lenVar": 40 279 | }, 280 | "kernels_config": { 281 | "python": { 282 | "delete_cmd_postfix": "", 283 | "delete_cmd_prefix": "del ", 284 | "library": "var_list.py", 285 | "varRefreshCmd": "print(var_dic_list())" 286 | }, 287 | "r": { 288 | "delete_cmd_postfix": ") ", 289 | "delete_cmd_prefix": "rm(", 290 | "library": "var_list.r", 291 | "varRefreshCmd": "cat(var_dic_list()) " 292 | } 293 | }, 294 | "types_to_exclude": [ 295 | "module", 296 | "function", 297 | "builtin_function_or_method", 298 | "instance", 299 | "_Feature" 300 | ], 301 | "window_display": false 302 | } 303 | }, 304 | "nbformat": 4, 305 | "nbformat_minor": 5 306 | } 307 | --------------------------------------------------------------------------------