├── .chainlit ├── config.toml └── translations │ ├── bn.json │ ├── de.json │ ├── en-US.json │ ├── es-ES.json │ ├── fa-IR.json │ ├── fr-FR.json │ ├── gu.json │ ├── he-IL.json │ ├── hi.json │ ├── it-IT.json │ ├── ja-JP.json │ ├── kn.json │ ├── ml.json │ ├── mr.json │ ├── pt-BR.json │ ├── pt-PT.json │ ├── ru-RU.json │ ├── ta.json │ ├── te.json │ └── zh-CN.json ├── .gitignore ├── LICENSE ├── README.md ├── backend_server.py ├── benchmark ├── prompts │ ├── detect_language.prompt │ ├── feedback_gpt4.prompt │ ├── scale_factuality.prompt │ ├── split_claim_for_eval.prompt │ ├── translate.prompt │ ├── user_multihop.prompt │ ├── user_with_passage.prompt │ └── user_with_topic.prompt ├── scripts │ ├── evaluate_distillation.py │ └── get_wikipedia_articles_for_benchmark.py ├── topics │ ├── head_articles_de.json │ ├── head_articles_en.json │ ├── head_articles_es.json │ ├── head_articles_fa.json │ ├── head_articles_fr.json │ ├── head_articles_it.json │ ├── head_articles_ja.json │ ├── head_articles_pt.json │ ├── head_articles_ru.json │ ├── head_articles_zh.json │ ├── recent_articles_de.json │ ├── recent_articles_en.json │ ├── recent_articles_es.json │ ├── recent_articles_fa.json │ ├── recent_articles_fr.json │ ├── recent_articles_it.json │ ├── recent_articles_ja.json │ ├── recent_articles_pt.json │ ├── recent_articles_ru.json │ ├── recent_articles_zh.json │ ├── tail_articles_de.json │ ├── tail_articles_en.json │ ├── tail_articles_es.json │ ├── tail_articles_fa.json │ ├── tail_articles_fr.json │ ├── tail_articles_it.json │ ├── tail_articles_ja.json │ ├── tail_articles_pt.json │ ├── tail_articles_ru.json │ ├── tail_articles_zh.json │ ├── wizard_of_internet_dev.txt │ └── wizard_of_internet_train.txt └── user_simulator.py ├── chainlit.md ├── chainlit_callback_handler.py ├── command_line_chatbot.py ├── conda_env.yaml ├── corpora.py ├── database.py ├── llm_config.yaml ├── pipelines ├── chatbot.py ├── dialogue_state.py ├── pipeline_arguments.py ├── prompts │ ├── draft_w_citation.prompt │ ├── filter_irrelevant_info.prompt │ ├── generate_split_claims.prompt │ ├── query.prompt │ ├── refine.prompt │ ├── refine_w_feedback.prompt │ ├── rerank_listwise.prompt │ └── rerank_pointwise.prompt └── utils.py ├── pixi.lock ├── pixi.toml ├── preprocessing ├── __init__.py ├── block.py ├── convert_jsonl_to_parquet.py ├── convert_old_collection_to_new_format.py ├── custom_docling.py ├── entity_translation.py ├── get_all_wiki_sizes.py ├── get_common_english_words.py ├── inspect_collection.py ├── preprocess_semantic_scholar_dump.py ├── preprocess_wikipedia_html_dump.py ├── upload_collections_to_hf_hub.py ├── utils.py ├── wikipedia_disambiguation.py └── word_list.txt ├── public ├── css │ ├── search.css │ └── wikichat.css ├── favicon.png ├── img │ ├── empty.png │ ├── general_history_of_africa.png │ ├── logo_dark.png │ ├── logo_light.png │ ├── pipeline.svg │ ├── s2_logo.png │ ├── stanford.png │ ├── the_african_times.jpg │ ├── user_avatar.png │ └── wikipedia.png ├── js │ ├── search.js │ ├── upload.js │ └── wikichat.js └── templates │ ├── base.jinja2 │ ├── partials │ ├── _loading_spinner.jinja2 │ ├── _no_results_alert.jinja2 │ └── _search_form.jinja2 │ ├── search.jinja2 │ ├── search_api.jinja2 │ └── upload.jinja2 ├── retrieval ├── create_index.py ├── embedding_model_info.py ├── llm_reranker.py ├── qdrant_index.py ├── retrieval_commons.py ├── retriever_api.py ├── retriever_server.py ├── scripts │ ├── add_payload_index.py │ ├── benchmark_embedding_latency.py │ ├── compare_rerankers.py │ ├── qdrant_snapshot.py │ └── upload_folder_to_hf_hub.py ├── search_query.py ├── search_result_block.py ├── server_utils.py └── upload_collection.py ├── tasks ├── __init__.py ├── benchmark.py ├── defaults.py ├── docker_tasks.py ├── main.py ├── preprocessing.py ├── retrieval.py └── setup.py ├── tests ├── pytest.ini ├── test_collection.jsonl ├── test_collection_malformed.jsonl ├── test_custom_docling.py ├── test_index.py ├── test_pipelines.py ├── test_search_query.py └── test_wikipedia_preprocessing.py └── utils ├── __init__.py ├── cache.py ├── docker_utils.py └── logging.py /.chainlit/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/config.toml -------------------------------------------------------------------------------- /.chainlit/translations/bn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/bn.json -------------------------------------------------------------------------------- /.chainlit/translations/de.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/de.json -------------------------------------------------------------------------------- /.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/en-US.json -------------------------------------------------------------------------------- /.chainlit/translations/es-ES.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/es-ES.json -------------------------------------------------------------------------------- /.chainlit/translations/fa-IR.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/fa-IR.json -------------------------------------------------------------------------------- /.chainlit/translations/fr-FR.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/fr-FR.json -------------------------------------------------------------------------------- /.chainlit/translations/gu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/gu.json -------------------------------------------------------------------------------- /.chainlit/translations/he-IL.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/he-IL.json -------------------------------------------------------------------------------- /.chainlit/translations/hi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/hi.json -------------------------------------------------------------------------------- /.chainlit/translations/it-IT.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/it-IT.json -------------------------------------------------------------------------------- /.chainlit/translations/ja-JP.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/ja-JP.json -------------------------------------------------------------------------------- /.chainlit/translations/kn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/kn.json -------------------------------------------------------------------------------- /.chainlit/translations/ml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/ml.json -------------------------------------------------------------------------------- /.chainlit/translations/mr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/mr.json -------------------------------------------------------------------------------- /.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/pt-BR.json -------------------------------------------------------------------------------- /.chainlit/translations/pt-PT.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/pt-PT.json -------------------------------------------------------------------------------- /.chainlit/translations/ru-RU.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/ru-RU.json -------------------------------------------------------------------------------- /.chainlit/translations/ta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/ta.json -------------------------------------------------------------------------------- /.chainlit/translations/te.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/te.json -------------------------------------------------------------------------------- /.chainlit/translations/zh-CN.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.chainlit/translations/zh-CN.json -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/README.md -------------------------------------------------------------------------------- /backend_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/backend_server.py -------------------------------------------------------------------------------- /benchmark/prompts/detect_language.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/detect_language.prompt -------------------------------------------------------------------------------- /benchmark/prompts/feedback_gpt4.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/feedback_gpt4.prompt -------------------------------------------------------------------------------- /benchmark/prompts/scale_factuality.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/scale_factuality.prompt -------------------------------------------------------------------------------- /benchmark/prompts/split_claim_for_eval.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/split_claim_for_eval.prompt -------------------------------------------------------------------------------- /benchmark/prompts/translate.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/translate.prompt -------------------------------------------------------------------------------- /benchmark/prompts/user_multihop.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/user_multihop.prompt -------------------------------------------------------------------------------- /benchmark/prompts/user_with_passage.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/user_with_passage.prompt -------------------------------------------------------------------------------- /benchmark/prompts/user_with_topic.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/prompts/user_with_topic.prompt -------------------------------------------------------------------------------- /benchmark/scripts/evaluate_distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/scripts/evaluate_distillation.py -------------------------------------------------------------------------------- /benchmark/scripts/get_wikipedia_articles_for_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/scripts/get_wikipedia_articles_for_benchmark.py -------------------------------------------------------------------------------- /benchmark/topics/head_articles_de.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_de.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_en.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_en.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_es.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_es.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_fa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_fa.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_fr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_fr.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_it.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_it.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_ja.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_ja.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_pt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_pt.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_ru.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_ru.json -------------------------------------------------------------------------------- /benchmark/topics/head_articles_zh.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/head_articles_zh.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_de.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_de.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_en.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_en.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_es.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_es.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_fa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_fa.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_fr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_fr.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_it.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_it.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_ja.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_ja.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_pt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_pt.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_ru.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_ru.json -------------------------------------------------------------------------------- /benchmark/topics/recent_articles_zh.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/recent_articles_zh.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_de.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_de.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_en.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_en.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_es.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_es.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_fa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_fa.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_fr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_fr.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_it.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_it.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_ja.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_ja.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_pt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_pt.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_ru.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_ru.json -------------------------------------------------------------------------------- /benchmark/topics/tail_articles_zh.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/tail_articles_zh.json -------------------------------------------------------------------------------- /benchmark/topics/wizard_of_internet_dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/wizard_of_internet_dev.txt -------------------------------------------------------------------------------- /benchmark/topics/wizard_of_internet_train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/topics/wizard_of_internet_train.txt -------------------------------------------------------------------------------- /benchmark/user_simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/benchmark/user_simulator.py -------------------------------------------------------------------------------- /chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/chainlit.md -------------------------------------------------------------------------------- /chainlit_callback_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/chainlit_callback_handler.py -------------------------------------------------------------------------------- /command_line_chatbot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/command_line_chatbot.py -------------------------------------------------------------------------------- /conda_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/conda_env.yaml -------------------------------------------------------------------------------- /corpora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/corpora.py -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/database.py -------------------------------------------------------------------------------- /llm_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/llm_config.yaml -------------------------------------------------------------------------------- /pipelines/chatbot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/chatbot.py -------------------------------------------------------------------------------- /pipelines/dialogue_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/dialogue_state.py -------------------------------------------------------------------------------- /pipelines/pipeline_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/pipeline_arguments.py -------------------------------------------------------------------------------- /pipelines/prompts/draft_w_citation.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/draft_w_citation.prompt -------------------------------------------------------------------------------- /pipelines/prompts/filter_irrelevant_info.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/filter_irrelevant_info.prompt -------------------------------------------------------------------------------- /pipelines/prompts/generate_split_claims.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/generate_split_claims.prompt -------------------------------------------------------------------------------- /pipelines/prompts/query.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/query.prompt -------------------------------------------------------------------------------- /pipelines/prompts/refine.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/refine.prompt -------------------------------------------------------------------------------- /pipelines/prompts/refine_w_feedback.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/refine_w_feedback.prompt -------------------------------------------------------------------------------- /pipelines/prompts/rerank_listwise.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/rerank_listwise.prompt -------------------------------------------------------------------------------- /pipelines/prompts/rerank_pointwise.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/prompts/rerank_pointwise.prompt -------------------------------------------------------------------------------- /pipelines/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pipelines/utils.py -------------------------------------------------------------------------------- /pixi.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pixi.lock -------------------------------------------------------------------------------- /pixi.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/pixi.toml -------------------------------------------------------------------------------- /preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preprocessing/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/block.py -------------------------------------------------------------------------------- /preprocessing/convert_jsonl_to_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/convert_jsonl_to_parquet.py -------------------------------------------------------------------------------- /preprocessing/convert_old_collection_to_new_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/convert_old_collection_to_new_format.py -------------------------------------------------------------------------------- /preprocessing/custom_docling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/custom_docling.py -------------------------------------------------------------------------------- /preprocessing/entity_translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/entity_translation.py -------------------------------------------------------------------------------- /preprocessing/get_all_wiki_sizes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/get_all_wiki_sizes.py -------------------------------------------------------------------------------- /preprocessing/get_common_english_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/get_common_english_words.py -------------------------------------------------------------------------------- /preprocessing/inspect_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/inspect_collection.py -------------------------------------------------------------------------------- /preprocessing/preprocess_semantic_scholar_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/preprocess_semantic_scholar_dump.py -------------------------------------------------------------------------------- /preprocessing/preprocess_wikipedia_html_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/preprocess_wikipedia_html_dump.py -------------------------------------------------------------------------------- /preprocessing/upload_collections_to_hf_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/upload_collections_to_hf_hub.py -------------------------------------------------------------------------------- /preprocessing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/utils.py -------------------------------------------------------------------------------- /preprocessing/wikipedia_disambiguation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/wikipedia_disambiguation.py -------------------------------------------------------------------------------- /preprocessing/word_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/preprocessing/word_list.txt -------------------------------------------------------------------------------- /public/css/search.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/css/search.css -------------------------------------------------------------------------------- /public/css/wikichat.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/css/wikichat.css -------------------------------------------------------------------------------- /public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/favicon.png -------------------------------------------------------------------------------- /public/img/empty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/empty.png -------------------------------------------------------------------------------- /public/img/general_history_of_africa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/general_history_of_africa.png -------------------------------------------------------------------------------- /public/img/logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/logo_dark.png -------------------------------------------------------------------------------- /public/img/logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/logo_light.png -------------------------------------------------------------------------------- /public/img/pipeline.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/pipeline.svg -------------------------------------------------------------------------------- /public/img/s2_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/s2_logo.png -------------------------------------------------------------------------------- /public/img/stanford.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/stanford.png -------------------------------------------------------------------------------- /public/img/the_african_times.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/the_african_times.jpg -------------------------------------------------------------------------------- /public/img/user_avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/user_avatar.png -------------------------------------------------------------------------------- /public/img/wikipedia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/img/wikipedia.png -------------------------------------------------------------------------------- /public/js/search.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/js/search.js -------------------------------------------------------------------------------- /public/js/upload.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/js/upload.js -------------------------------------------------------------------------------- /public/js/wikichat.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/js/wikichat.js -------------------------------------------------------------------------------- /public/templates/base.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/base.jinja2 -------------------------------------------------------------------------------- /public/templates/partials/_loading_spinner.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/partials/_loading_spinner.jinja2 -------------------------------------------------------------------------------- /public/templates/partials/_no_results_alert.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/partials/_no_results_alert.jinja2 -------------------------------------------------------------------------------- /public/templates/partials/_search_form.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/partials/_search_form.jinja2 -------------------------------------------------------------------------------- /public/templates/search.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/search.jinja2 -------------------------------------------------------------------------------- /public/templates/search_api.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/search_api.jinja2 -------------------------------------------------------------------------------- /public/templates/upload.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/public/templates/upload.jinja2 -------------------------------------------------------------------------------- /retrieval/create_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/create_index.py -------------------------------------------------------------------------------- /retrieval/embedding_model_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/embedding_model_info.py -------------------------------------------------------------------------------- /retrieval/llm_reranker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/llm_reranker.py -------------------------------------------------------------------------------- /retrieval/qdrant_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/qdrant_index.py -------------------------------------------------------------------------------- /retrieval/retrieval_commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/retrieval_commons.py -------------------------------------------------------------------------------- /retrieval/retriever_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/retriever_api.py -------------------------------------------------------------------------------- /retrieval/retriever_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/retriever_server.py -------------------------------------------------------------------------------- /retrieval/scripts/add_payload_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/scripts/add_payload_index.py -------------------------------------------------------------------------------- /retrieval/scripts/benchmark_embedding_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/scripts/benchmark_embedding_latency.py -------------------------------------------------------------------------------- /retrieval/scripts/compare_rerankers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/scripts/compare_rerankers.py -------------------------------------------------------------------------------- /retrieval/scripts/qdrant_snapshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/scripts/qdrant_snapshot.py -------------------------------------------------------------------------------- /retrieval/scripts/upload_folder_to_hf_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/scripts/upload_folder_to_hf_hub.py -------------------------------------------------------------------------------- /retrieval/search_query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/search_query.py -------------------------------------------------------------------------------- /retrieval/search_result_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/search_result_block.py -------------------------------------------------------------------------------- /retrieval/server_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/server_utils.py -------------------------------------------------------------------------------- /retrieval/upload_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/retrieval/upload_collection.py -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/__init__.py -------------------------------------------------------------------------------- /tasks/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/benchmark.py -------------------------------------------------------------------------------- /tasks/defaults.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/defaults.py -------------------------------------------------------------------------------- /tasks/docker_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/docker_tasks.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/preprocessing.py -------------------------------------------------------------------------------- /tasks/retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/retrieval.py -------------------------------------------------------------------------------- /tasks/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tasks/setup.py -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_default_fixture_loop_scope = session -------------------------------------------------------------------------------- /tests/test_collection.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_collection.jsonl -------------------------------------------------------------------------------- /tests/test_collection_malformed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_collection_malformed.jsonl -------------------------------------------------------------------------------- /tests/test_custom_docling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_custom_docling.py -------------------------------------------------------------------------------- /tests/test_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_index.py -------------------------------------------------------------------------------- /tests/test_pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_pipelines.py -------------------------------------------------------------------------------- /tests/test_search_query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_search_query.py -------------------------------------------------------------------------------- /tests/test_wikipedia_preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/tests/test_wikipedia_preprocessing.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/utils/cache.py -------------------------------------------------------------------------------- /utils/docker_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/utils/docker_utils.py -------------------------------------------------------------------------------- /utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-oval/WikiChat/HEAD/utils/logging.py --------------------------------------------------------------------------------