├── .dockerignore ├── .gitignore ├── archive ├── docker_milvus │ ├── Dockerfile │ ├── install.sh │ ├── readme.md │ └── supervisord.conf ├── notebooks │ ├── 01_bm25.ipynb │ ├── 02_dense_retriever_milvus.ipynb │ └── 04_ann-elastic.ipynb └── notebooks_stackoverflow │ ├── 00_data_fetch_bq.ipynb │ ├── 00_data_fetch_spark.ipynb │ ├── 01_b_setup.ipynb │ ├── 01_data_cleanup.ipynb │ ├── 01_data_subset.ipynb │ ├── 01_workshop_data_preview.ipynb │ ├── 02_retrieval_dense_milvus.ipynb │ ├── 02_retrieval_sparse.ipynb │ ├── 03_comparision.ipynb │ ├── ann_benchmark_recall.ipynb │ ├── metrics_utils.py │ ├── other__retrieve_rerank_simple_wikipedia.ipynb │ ├── test_setup.ipynb │ └── workshop_setup.ipynb ├── assets ├── all_assets.sw ├── slides_odsc2022.pdf ├── slides_pydatanyc2022.pdf └── slides_pydataseattle2023.pdf ├── docker-compose.yaml ├── docs ├── internal_notes.md └── slide_notes.md ├── environment.yaml ├── notebooks ├── 00_a_setup_dataset.ipynb ├── 00_b_setup_stats.ipynb ├── 00_c_sample_images.ipynb ├── 01_bm25_elastic.ipynb ├── 02_dense_retriever.ipynb ├── 03_clip_embed.ipynb ├── 04_ann.ipynb └── workshop_setup.ipynb ├── readme.md ├── requirements.txt └── workshop_infra ├── Dockerfile ├── cert └── .gitkeep ├── config.enc.yaml ├── config_public.yaml ├── scripts ├── build_setup_root.sh ├── build_setup_user.sh └── container_startup.sh └── setup.md /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/.dockerignore -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/.gitignore -------------------------------------------------------------------------------- /archive/docker_milvus/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/docker_milvus/Dockerfile -------------------------------------------------------------------------------- /archive/docker_milvus/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/docker_milvus/install.sh -------------------------------------------------------------------------------- /archive/docker_milvus/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/docker_milvus/readme.md -------------------------------------------------------------------------------- /archive/docker_milvus/supervisord.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/docker_milvus/supervisord.conf -------------------------------------------------------------------------------- /archive/notebooks/01_bm25.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks/01_bm25.ipynb -------------------------------------------------------------------------------- /archive/notebooks/02_dense_retriever_milvus.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks/02_dense_retriever_milvus.ipynb -------------------------------------------------------------------------------- /archive/notebooks/04_ann-elastic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks/04_ann-elastic.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/00_data_fetch_bq.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/00_data_fetch_bq.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/00_data_fetch_spark.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/00_data_fetch_spark.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/01_b_setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/01_b_setup.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/01_data_cleanup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/01_data_cleanup.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/01_data_subset.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/01_data_subset.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/01_workshop_data_preview.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/01_workshop_data_preview.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/02_retrieval_dense_milvus.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/02_retrieval_dense_milvus.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/02_retrieval_sparse.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/02_retrieval_sparse.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/03_comparision.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/03_comparision.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/ann_benchmark_recall.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/ann_benchmark_recall.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/metrics_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/metrics_utils.py -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/other__retrieve_rerank_simple_wikipedia.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/other__retrieve_rerank_simple_wikipedia.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/test_setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/test_setup.ipynb -------------------------------------------------------------------------------- /archive/notebooks_stackoverflow/workshop_setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/archive/notebooks_stackoverflow/workshop_setup.ipynb -------------------------------------------------------------------------------- /assets/all_assets.sw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/assets/all_assets.sw -------------------------------------------------------------------------------- /assets/slides_odsc2022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/assets/slides_odsc2022.pdf -------------------------------------------------------------------------------- /assets/slides_pydatanyc2022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/assets/slides_pydatanyc2022.pdf -------------------------------------------------------------------------------- /assets/slides_pydataseattle2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/assets/slides_pydataseattle2023.pdf -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/docker-compose.yaml -------------------------------------------------------------------------------- /docs/internal_notes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/docs/internal_notes.md -------------------------------------------------------------------------------- /docs/slide_notes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/docs/slide_notes.md -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/environment.yaml -------------------------------------------------------------------------------- /notebooks/00_a_setup_dataset.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/00_a_setup_dataset.ipynb -------------------------------------------------------------------------------- /notebooks/00_b_setup_stats.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/00_b_setup_stats.ipynb -------------------------------------------------------------------------------- /notebooks/00_c_sample_images.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/00_c_sample_images.ipynb -------------------------------------------------------------------------------- /notebooks/01_bm25_elastic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/01_bm25_elastic.ipynb -------------------------------------------------------------------------------- /notebooks/02_dense_retriever.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/02_dense_retriever.ipynb -------------------------------------------------------------------------------- /notebooks/03_clip_embed.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/03_clip_embed.ipynb -------------------------------------------------------------------------------- /notebooks/04_ann.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/04_ann.ipynb -------------------------------------------------------------------------------- /notebooks/workshop_setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/notebooks/workshop_setup.ipynb -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/readme.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/requirements.txt -------------------------------------------------------------------------------- /workshop_infra/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/Dockerfile -------------------------------------------------------------------------------- /workshop_infra/cert/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /workshop_infra/config.enc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/config.enc.yaml -------------------------------------------------------------------------------- /workshop_infra/config_public.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/config_public.yaml -------------------------------------------------------------------------------- /workshop_infra/scripts/build_setup_root.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/scripts/build_setup_root.sh -------------------------------------------------------------------------------- /workshop_infra/scripts/build_setup_user.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/scripts/build_setup_user.sh -------------------------------------------------------------------------------- /workshop_infra/scripts/container_startup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/scripts/container_startup.sh -------------------------------------------------------------------------------- /workshop_infra/setup.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npatta01/search-engine-workshop/HEAD/workshop_infra/setup.md --------------------------------------------------------------------------------