├── .devcontainer ├── Dockerfile ├── devcontainer.json └── setupEnv.sh ├── .dockerignore ├── .env.sample ├── .flake8 ├── .gitattributes ├── .github ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── subtask.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── bicep-audit.yml │ ├── build-docker-images.yml │ ├── build-docker.yml │ ├── ci.yml │ ├── comment_coverage.yml │ ├── create-release.yml │ ├── pr-title-checker.yml │ ├── scheduled-Dependabot-PRs-Auto-Merge.yml │ ├── stale-bot.yml │ ├── test-automation.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── CDLA-Permissive-2.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── SUPPORT.md ├── azure.yaml ├── code ├── app.py ├── backend │ ├── Admin.py │ ├── api │ │ ├── __init__.py │ │ └── chat_history.py │ ├── batch │ │ ├── .funcignore │ │ ├── .gitignore │ │ ├── .vscode │ │ │ └── extensions.json │ │ ├── __init__.py │ │ ├── add_url_embeddings.py │ │ ├── batch_push_results.py │ │ ├── batch_start_processing.py │ │ ├── function_app.py │ │ ├── get_conversation_response.py │ │ ├── host.json │ │ ├── local.settings.json.sample │ │ └── utilities │ │ │ ├── __init__.py │ │ │ ├── chat_history │ │ │ ├── auth_utils.py │ │ │ ├── cosmosdb.py │ │ │ ├── database_client_base.py │ │ │ ├── database_factory.py │ │ │ ├── postgresdbservice.py │ │ │ └── sample_user.py │ │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── answer.py │ │ │ └── source_document.py │ │ │ ├── document_chunking │ │ │ ├── __init__.py │ │ │ ├── chunking_strategy.py │ │ │ ├── document_chunking_base.py │ │ │ ├── fixed_size_overlap.py │ │ │ ├── json.py │ │ │ ├── layout.py │ │ │ ├── page.py │ │ │ ├── paragraph.py │ │ │ └── strategies.py │ │ │ ├── document_loading │ │ │ ├── __init__.py │ │ │ ├── document_loading_base.py │ │ │ ├── layout.py │ │ │ ├── read.py │ │ │ ├── strategies.py │ │ │ ├── web.py │ │ │ └── word_document.py │ │ │ ├── helpers │ │ │ ├── __init__.py │ │ │ ├── azure_blob_storage_client.py │ │ │ ├── azure_computer_vision_client.py │ │ │ ├── azure_form_recognizer_helper.py │ │ │ ├── azure_postgres_helper.py │ │ │ ├── azure_search_helper.py │ │ │ ├── config │ │ │ │ ├── assistant_strategy.py │ │ │ │ ├── config_helper.py │ │ │ │ ├── conversation_flow.py │ │ │ │ ├── database_type.py │ │ │ │ ├── default.json │ │ │ │ ├── default_contract_assistant_prompt.txt │ │ │ │ ├── default_employee_assistant_prompt.txt │ │ │ │ └── embedding_config.py │ │ │ ├── document_chunking_helper.py │ │ │ ├── document_loading_helper.py │ │ │ ├── embedders │ │ │ │ ├── embedder_base.py │ │ │ │ ├── embedder_factory.py │ │ │ │ ├── integrated_vectorization_embedder.py │ │ │ │ ├── postgres_embedder.py │ │ │ │ └── push_embedder.py │ │ │ ├── env_helper.py │ │ │ ├── llm_helper.py │ │ │ └── orchestrator_helper.py │ │ │ ├── integrated_vectorization │ │ │ ├── azure_search_datasource.py │ │ │ ├── azure_search_index.py │ │ │ ├── azure_search_indexer.py │ │ │ └── azure_search_skillset.py │ │ │ ├── loggers │ │ │ └── conversation_logger.py │ │ │ ├── orchestrator │ │ │ ├── __init__.py │ │ │ ├── lang_chain_agent.py │ │ │ ├── open_ai_functions.py │ │ │ ├── orchestration_strategy.py │ │ │ ├── orchestrator_base.py │ │ │ ├── prompt_flow.py │ │ │ ├── semantic_kernel.py │ │ │ └── strategies.py │ │ │ ├── parser │ │ │ ├── __init__.py │ │ │ ├── output_parser_tool.py │ │ │ └── parser_base.py │ │ │ ├── plugins │ │ │ ├── chat_plugin.py │ │ │ └── post_answering_plugin.py │ │ │ ├── search │ │ │ ├── azure_search_handler.py │ │ │ ├── integrated_vectorization_search_handler.py │ │ │ ├── postgres_search_handler.py │ │ │ ├── search.py │ │ │ └── search_handler_base.py │ │ │ └── tools │ │ │ ├── __init__.py │ │ │ ├── answer_processing_base.py │ │ │ ├── answering_tool_base.py │ │ │ ├── content_safety_checker.py │ │ │ ├── post_prompt_tool.py │ │ │ ├── question_answer_tool.py │ │ │ └── text_processing_tool.py │ ├── images │ │ ├── favicon.ico │ │ └── logo.png │ └── pages │ │ ├── 01_Ingest_Data.py │ │ ├── 02_Explore_Data.py │ │ ├── 03_Delete_Data.py │ │ ├── 04_Configuration.py │ │ └── common.css ├── create_app.py ├── frontend │ ├── __mocks__ │ │ ├── SampleData.ts │ │ ├── fileMock.js │ │ └── styleMock.js │ ├── index.html │ ├── jest.config.ts │ ├── jest.polyfills.js │ ├── package-lock.json │ ├── package.json │ ├── public │ │ └── favicon.ico │ ├── setupTests.ts │ ├── src │ │ ├── api │ │ │ ├── api.ts │ │ │ ├── index.ts │ │ │ └── models.ts │ │ ├── assets │ │ │ ├── Azure.svg │ │ │ ├── Interact with data.svg │ │ │ ├── Quick source reference.svg │ │ │ ├── Send.svg │ │ │ ├── Summarize contracts.svg │ │ │ ├── mic-outline.svg │ │ │ ├── pauseIcon.svg │ │ │ └── speakerIcon.svg │ │ ├── components │ │ │ ├── Answer │ │ │ │ ├── Answer.module.css │ │ │ │ ├── Answer.test.tsx │ │ │ │ ├── Answer.tsx │ │ │ │ ├── AnswerParser.tsx │ │ │ │ └── index.ts │ │ │ ├── AssistantTypeSection │ │ │ │ ├── AssistantTypeSection.module.css │ │ │ │ ├── AssistantTypeSection.test.tsx │ │ │ │ └── AssistantTypeSection.tsx │ │ │ ├── ChatHistoryListItemCell │ │ │ │ ├── ChatHistoryListItemCell.module.css │ │ │ │ ├── ChatHistoryListItemCell.test.tsx │ │ │ │ └── ChatHistoryListItemCell.tsx │ │ │ ├── ChatHistoryListItemGroups │ │ │ │ ├── ChatHistoryListItemGroups.module.css │ │ │ │ ├── ChatHistoryListItemGroups.test.tsx │ │ │ │ └── ChatHistoryListItemGroups.tsx │ │ │ ├── ChatHistoryPanel │ │ │ │ ├── ChatHistoryPanel.module.css │ │ │ │ ├── ChatHistoryPanel.test.tsx │ │ │ │ └── ChatHistoryPanel.tsx │ │ │ ├── ChatMessageContainer │ │ │ │ ├── ChatMessageContainer.module.css │ │ │ │ ├── ChatMessageContainer.test.tsx │ │ │ │ └── ChatMessageContainer.tsx │ │ │ ├── CitationPanel │ │ │ │ ├── CitationPanel.module.css │ │ │ │ ├── CitationPanel.test.tsx │ │ │ │ └── CitationPanel.tsx │ │ │ ├── HistoryButton │ │ │ │ ├── HistoryButton.module.css │ │ │ │ ├── HistoryButton.test.tsx │ │ │ │ └── HistoryButton.tsx │ │ │ ├── QuestionInput │ │ │ │ ├── QuestionInput.module.css │ │ │ │ ├── QuestionInput.test.tsx │ │ │ │ ├── QuestionInput.tsx │ │ │ │ └── index.ts │ │ │ ├── Spinner │ │ │ │ ├── Spinner.module.css │ │ │ │ ├── Spinner.test.tsx │ │ │ │ └── Spinner.tsx │ │ │ └── Utils │ │ │ │ └── utils.tsx │ │ ├── index.css │ │ ├── index.tsx │ │ ├── pages │ │ │ ├── NoPage.test.tsx │ │ │ ├── NoPage.tsx │ │ │ ├── chat │ │ │ │ ├── Cards_contract │ │ │ │ │ ├── Cards.module.css │ │ │ │ │ ├── Cards.test.tsx │ │ │ │ │ └── Cards.tsx │ │ │ │ ├── Chat.module.css │ │ │ │ ├── Chat.test.tsx │ │ │ │ └── Chat.tsx │ │ │ └── layout │ │ │ │ ├── Layout.module.css │ │ │ │ ├── Layout.test.tsx │ │ │ │ └── Layout.tsx │ │ ├── util │ │ │ ├── SpeechToText.test.ts │ │ │ └── SpeechToText.ts │ │ └── vite-env.d.ts │ ├── tsconfig.json │ ├── tsconfig.node.json │ └── vite.config.ts └── tests │ ├── chat_history │ ├── test_cosmosdb.py │ ├── test_database_factory.py │ └── test_postgresdbservice.py │ ├── common │ └── test_source_document.py │ ├── conftest.py │ ├── constants.py │ ├── functional │ ├── README.md │ ├── app_config.py │ ├── conftest.py │ ├── resources │ │ ├── 9b5ad71b2ce5302211f9c61530b329a4922fc6a4 │ │ └── README.md │ └── tests │ │ ├── backend_api │ │ ├── README.md │ │ ├── common.py │ │ ├── default │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_advanced_image_processing.py │ │ │ ├── test_conversation.py │ │ │ ├── test_health.py │ │ │ ├── test_post_prompt_tool.py │ │ │ └── test_speech_token.py │ │ ├── integrated_vectorization_custom_conversation │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ └── test_iv_question_answer_tool.py │ │ ├── sk_orchestrator │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_response_with_search_documents_tool.py │ │ │ ├── test_response_with_text_processing_tool.py │ │ │ └── test_response_without_tool_call.py │ │ ├── with_byod │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ └── test_conversation_flow.py │ │ └── without_data │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ └── test_azure_byod_without_data.py │ │ └── functions │ │ ├── README.md │ │ ├── advanced_image_processing │ │ ├── __init__.py │ │ ├── conftest.py │ │ └── test_advanced_image_processing.py │ │ └── integrated_vectorization │ │ ├── __init__.py │ │ ├── conftest.py │ │ └── test_integrated_vectorization_resource_creation.py │ ├── request_matching.py │ ├── search_utilities │ ├── test_azure_search_handler.py │ ├── test_integrated_vectorization_search_handler.py │ ├── test_postgres_search_handler.py │ └── test_search.py │ ├── test_add_url_embeddings.py │ ├── test_app.py │ ├── test_azure_blob_storage.py │ ├── test_batch_push_results.py │ ├── test_batch_start_processing.py │ ├── test_chat_history.py │ ├── test_document_processor.py │ ├── test_get_conversation_response.py │ ├── test_output_parser_tool.py │ └── utilities │ ├── helpers │ ├── processors │ │ └── test_integrated_vectorization_embedder.py │ ├── test_azure_computer_vision_client.py │ ├── test_azure_postgres_helper.py │ ├── test_azure_search_helper.py │ ├── test_config_helper.py │ ├── test_document_chunking_helper.py │ ├── test_document_loading_helper.py │ ├── test_env_helper.py │ ├── test_llm_helper.py │ ├── test_postgress_embedder.py │ ├── test_push_embedder.py │ └── test_secret_helper.py │ ├── integrated_vectorization │ ├── test_azure_search_datasource.py │ ├── test_azure_search_index.py │ ├── test_azure_search_indexer.py │ └── test_azure_search_skillset.py │ ├── orchestrator │ ├── test_lang_chain_agent.py │ ├── test_open_ai_functions.py │ ├── test_orchestrator.py │ ├── test_orchestrator_base.py │ ├── test_prompt_flow.py │ └── test_semantic_kernel.py │ ├── plugins │ ├── test_chat_plugin.py │ └── test_post_answering_plugin.py │ ├── test_azure_blob_storage_client.py │ └── tools │ ├── test_content_safety_checker.py │ ├── test_post_prompt_tool.py │ └── test_question_answer_tool.py ├── data ├── Benefit_Options.pdf ├── MSFT_FY23Q4_10K.docx ├── Northwind_Health_Plus_Benefits_Details.pdf ├── Northwind_Standard_Benefits_Details.pdf ├── PerksPlus.pdf ├── PressReleaseFY23Q4.docx ├── Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance.pdf ├── Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance_Important Prompts For_Claims Handlers.pdf ├── Woodgrove - Insurance Underwriting_Key Prompts for Underwriters when evaluating Financial Results.pdf ├── Woodgrove - Insurance_Summary Plan Description_Employee Benefits.pdf ├── Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Employees.pdf ├── Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Insurance Agents.pdf ├── Woodgrove - Mortgage Product Manual - 1.0.pdf ├── Woodgrove Asset Management - Prospective of Asset Management Funds.pdf ├── contract_data │ ├── 1628215729_Kyndryl_-_Master_Agreement__executed_utah.pdf │ ├── Final_MA_999_200000000170_3_MA_FORM_ADV_PDF wireless.PDF │ ├── Final_MA_999_200000000325_3_MA_FORM_ADV_PDF.PDF │ ├── Initial_MA_2023_V1 - servers.pdf │ ├── Legal contract_20240411112609.pdf │ ├── Master_Agreement_OEM_Filters_ALDOT_V1 - OEM filters.pdf │ ├── Master_Agreement_OEM_Filters_V1.pdf │ ├── Master_Agreement_Renewed_V2 - copiers.pdf │ ├── Master_Agreement_V1 (1).pdf │ ├── Master_Agreement_V1 - July.pdf │ ├── Master_Agreement_V1 - May.pdf │ ├── Master_Agreement_V1 - propane.pdf │ ├── Master_agreement_2024_V1 products_services.pdf │ ├── NASPO_Participating_Addendum - insight public sector.pdf │ ├── NASPO_VP_SVAR_Insight_AL_PA.pdf │ ├── Server_Storage_Solutions_Technical_Services_ITB_v1.2 - OEM Terms.pdf │ ├── State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx 1.pdf │ ├── State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx.pdf │ └── Statewide_Truck_Chassis_19_000_GVWR_and_Greater-Southland_V1.pdf ├── employee_handbook.pdf ├── init.md └── role_library.pdf ├── docker ├── Admin.Dockerfile ├── Backend.Dockerfile ├── Frontend.Dockerfile ├── docker-compose.yml └── function-host.json ├── docs ├── LOCAL_DEPLOYMENT.md ├── NON_DEVCONTAINER_SETUP.md ├── RELEASE_GUIDELINES.md ├── TEAMS_LOCAL_DEPLOYMENT.md ├── advanced_image_processing.md ├── azure_app_service_auth_setup.md ├── best_practices.md ├── chat_history.md ├── container_registry_migration.md ├── contract_assistance.md ├── conversation_flow_options.md ├── create_new_app_registration.md ├── customer_truth.md ├── design │ └── adrs │ │ ├── 2024-03-15-use-pytest-httpserver-as-a-mock-server-for-functional-testing.md │ │ ├── 2024-03-18-use-cypres-as-a-framework-for-e2e-testing.md │ │ ├── 2024-04-08-use-pull-throughout-for-ingestion.md │ │ ├── 2024-04-23-implement-prompflow-orchestration.md │ │ ├── 2024-04-30-remove-langchain-from-tools.md │ │ ├── 2024-05-02-enable-project-versioning.md │ │ ├── README.md │ │ └── template.md ├── employee_assistance.md ├── images │ ├── AddDetails.png │ ├── AddPlatform.png │ ├── AddRedirectURL.png │ ├── AppAuthIdentityProvider.png │ ├── AppAuthIdentityProviderAdd.png │ ├── AppAuthIdentityProviderAdded.png │ ├── AppAuthentication.png │ ├── AppAuthenticationIdentity.png │ ├── Appregistrations.png │ ├── MicrosoftEntraID.png │ ├── NewRegistration.png │ ├── Web.png │ ├── WebAppURL.png │ ├── admin-ingest.png │ ├── admin-site.png │ ├── architecture_cdb.png │ ├── architecture_pg.png │ ├── azure-search-use-iv.png │ ├── chat-app.png │ ├── customerTruth.png │ ├── cwyd_admin_contract_selected.png │ ├── cwyd_admin_employe_selected.png │ ├── cwyd_admin_legal_unselected.png │ ├── db_selection.png │ ├── delete-search-datasource.png │ ├── delete-search-index.png │ ├── delete-search-indexer.png │ ├── delete-search-skillset.png │ ├── deployment_center.png │ ├── enable_advanced_image_processing.png │ ├── oneClickDeploy.png │ ├── prompt-flow-download.png │ ├── prompt-flow-error.png │ ├── resource_menu.png │ ├── supportingDocuments.png │ ├── teams-1.png │ ├── teams-10.png │ ├── teams-11.png │ ├── teams-12.png │ ├── teams-13.png │ ├── teams-14.png │ ├── teams-15.png │ ├── teams-16.png │ ├── teams-17.png │ ├── teams-18.png │ ├── teams-19.png │ ├── teams-2.png │ ├── teams-20.png │ ├── teams-21.png │ ├── teams-22.png │ ├── teams-3.png │ ├── teams-4.png │ ├── teams-5.png │ ├── teams-6.png │ ├── teams-7.png │ ├── teams-8.png │ ├── teams-9.png │ ├── teams-cwyd.png │ ├── teams-deploy-env.png │ ├── teams-local-1.png │ ├── teams-local-2.png │ ├── teams-local-3.png │ ├── teams-ux-1.png │ ├── teams-ux-2.png │ ├── teams-ux-3.png │ ├── teams.png │ ├── userStory.png │ ├── web-app-authentication.png │ ├── web-nlu.png │ ├── web-speech-to-text.png │ ├── web-unstructureddata.png │ ├── with_advanced_image_processing.png │ ├── without_advanced_image_processing.png │ ├── workbook-advanced-editor.png │ ├── workbook-edit.png │ ├── workbook-json.png │ ├── workbook-resource-parameters.png │ └── workbook-tabs.png ├── integrated_vectorization.md ├── model_configuration.md ├── postgreSQL.md ├── prompt_flow.md ├── speech_to_text.md ├── spikes │ └── using-image-data │ │ ├── ai-vision.ipynb │ │ └── azure-services.png ├── supported_file_types.md ├── teams_extension.md ├── transparency_faq.md ├── web-apps.md └── workbook.md ├── extensions └── teams │ ├── .gitignore │ ├── .vscode │ ├── extensions.json │ ├── launch.json │ ├── settings.json │ └── tasks.json │ ├── .webappignore │ ├── README.md │ ├── appPackage │ ├── CogSearchColor.png │ ├── CogSearchOutline.png │ ├── color.png │ ├── manifest.json │ └── outline.png │ ├── cards │ └── cardBuilder.ts │ ├── config.ts │ ├── env │ ├── .env.dev │ ├── .env.test │ └── .env.testtool │ ├── index.ts │ ├── infra │ ├── azure.bicep │ ├── azure.parameters.json │ └── botRegistration │ │ ├── azurebot.bicep │ │ └── readme.md │ ├── model.ts │ ├── package-lock.json │ ├── package.json │ ├── teamsBot.ts │ ├── teamsapp.local.yml │ ├── teamsapp.testtool.yml │ ├── teamsapp.yml │ ├── tsconfig.json │ └── web.config ├── infra ├── abbreviations.json ├── app │ ├── adminweb.bicep │ ├── eventgrid.bicep │ ├── function.bicep │ ├── machinelearning.bicep │ ├── storekeys.bicep │ ├── web.bicep │ └── workbook.bicep ├── core │ ├── ai │ │ └── cognitiveservices.bicep │ ├── database │ │ ├── cosmos-sql-role-assign.bicep │ │ ├── cosmosdb.bicep │ │ ├── deploy_create_table_script.bicep │ │ └── postgresdb.bicep │ ├── host │ │ ├── appservice-appsettings.bicep │ │ ├── appservice.bicep │ │ ├── appserviceplan.bicep │ │ └── functions.bicep │ ├── monitor │ │ ├── applicationinsights-dashboard.bicep │ │ ├── applicationinsights.bicep │ │ ├── loganalytics.bicep │ │ ├── monitoring.bicep │ │ └── workbook.bicep │ ├── search │ │ └── search-services.bicep │ ├── security │ │ ├── keyvault-access.bicep │ │ ├── keyvault-secret.bicep │ │ ├── keyvault.bicep │ │ ├── managed-identity.bicep │ │ ├── registry-access.bicep │ │ └── role.bicep │ └── storage │ │ └── storage-account.bicep ├── main.bicep ├── main.bicepparam ├── main.json ├── prompt-flow │ ├── create-prompt-flow.sh │ ├── cwyd │ │ ├── Prompt_variants.jinja2 │ │ ├── chat_with_context.jinja2 │ │ ├── flow.dag.template.yaml │ │ ├── flow.meta.yaml │ │ ├── generate_prompt_context.py │ │ ├── requirements.txt │ │ └── samples.json │ ├── deployment.yaml │ ├── endpoint.yaml │ └── model.yaml └── workbooks │ └── workbook.json ├── package-lock.json ├── package.json ├── poetry.lock ├── pyproject.toml ├── pytest.ini ├── scripts ├── checkquota.sh ├── data_scripts │ ├── create_postgres_tables.py │ └── requirements.txt ├── generate_arm_templates.sh ├── package_frontend.ps1 ├── package_frontend.sh ├── parse_env.ps1 ├── parse_env.sh └── run_create_table_script.sh └── tests ├── e2e-test ├── .gitignore ├── README.md ├── base │ ├── __init__.py │ └── base.py ├── config │ └── constants.py ├── pages │ ├── __init__.py │ ├── adminPage.py │ ├── loginPage.py │ └── webUserPage.py ├── pytest.ini ├── requirements.txt ├── sample_dotenv_file.txt ├── testdata │ └── golden_path_data.json └── tests │ ├── __init__.py │ ├── conftest.py │ └── test_poc_chat_with_your_data.py ├── integration └── ui │ ├── cypress.config.ts │ ├── cypress │ ├── README.md │ └── e2e │ │ ├── admin.spec.cy.ts │ │ └── chat.spec.cy.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json └── llm-evaluator ├── .env.sample ├── api_evaluation.py ├── cwyd_conversation_client.py ├── data ├── dataset.jsonl ├── evaluation_results.xlsx └── input_questions.json ├── dataset_generation.py ├── readme.md └── requirements.txt /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/devcontainers/python:3.11 2 | 3 | # install git 4 | RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 5 | && apt-get -y install --no-install-recommends git libgtk2.0-0 libgtk-3-0 libgbm-dev libnotify-dev libnss3 libxss1 libasound2 libxtst6 xauth xvfb 6 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Chat with your data", 3 | "build": { 4 | "dockerfile": "Dockerfile" 5 | }, 6 | "features": { 7 | "ghcr.io/devcontainers/features/azure-cli:1": { 8 | "extensions": "ml" 9 | }, 10 | "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {}, 11 | "ghcr.io/devcontainers/features/node:1": {}, 12 | "ghcr.io/jlaundry/devcontainer-features/azure-functions-core-tools:1": {}, 13 | "ghcr.io/azure/azure-dev/azd:latest": {}, 14 | "ghcr.io/rchaganti/vsc-devcontainer-features/azurebicep:1.0.5": {} 15 | }, 16 | 17 | "postCreateCommand": "./.devcontainer/setupEnv.sh", 18 | 19 | "customizations": { 20 | "vscode": { 21 | "extensions": [ 22 | "github.vscode-pull-request-github", 23 | "github.vscode-github-actions", 24 | "ms-azuretools.azure-dev", 25 | "ms-azuretools.vscode-azurefunctions", 26 | "ms-azuretools.vscode-bicep", 27 | "ms-azuretools.vscode-docker", 28 | "ms-python.python", 29 | "ms-python.black-formatter", 30 | "ms-python.vscode-pylance", 31 | "ms-python.pylint", 32 | "ms-toolsai.jupyter", 33 | "ms-vscode.vscode-node-azure-pack", 34 | "TeamsDevApp.ms-teams-vscode-extension", 35 | "zeshuaro.vscode-python-poetry", 36 | "prompt-flow.prompt-flow" 37 | ], 38 | "settings": { 39 | "python.defaultInterpreterPath": "/usr/local/bin/python", 40 | "python.pythonPath": "/usr/local/bin/python", 41 | "remote.autoForwardPortsFallback": 0, 42 | "remote.autoForwardPortsSource": "process" 43 | } 44 | } 45 | }, 46 | 47 | "remoteEnv": { 48 | // the original host directory which is needed for volume mount commands from inside the container (Docker in Docker) 49 | "HOST_DOCKER_FOLDER": "${localWorkspaceFolder}/docker" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /.devcontainer/setupEnv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install --upgrade pip 4 | 5 | pip install poetry 6 | 7 | # https://pypi.org/project/poetry-plugin-export/ 8 | pip install poetry-plugin-export 9 | 10 | poetry env use python3.11 11 | 12 | poetry config warnings.export false 13 | 14 | poetry install --with dev 15 | 16 | poetry run pre-commit install 17 | 18 | (cd ./code/frontend; npm install) 19 | 20 | (cd ./tests/integration/ui; npm install) 21 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E501 4 | exclude = .venv 5 | ignore = E203, W503 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | *.{cmd,[cC][mM][dD]} text eol=crlf 3 | *.{bat,[bB][aA][tT]} text eol=crlf 4 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Lines starting with '#' are comments. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # These owners will be the default owners for everything in the repo. 5 | * @Avijit-Microsoft @Roopan-Microsoft @Prajwal-Microsoft @Fr4nc3 @Vinay-Microsoft @aniaroramsft 6 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Describe the bug 11 | A clear and concise description of what the bug is. 12 | 13 | # Expected behavior 14 | A clear and concise description of what you expected to happen. 15 | 16 | # How does this bug make you feel? 17 | _Share a gif from [giphy](https://giphy.com/) to tells us how you'd feel_ 18 | 19 | --- 20 | 21 | # Debugging information 22 | 23 | ## Steps to reproduce 24 | Steps to reproduce the behavior: 25 | 1. Go to '...' 26 | 2. Click on '....' 27 | 3. Scroll down to '....' 28 | 4. See error 29 | 30 | ## Screenshots 31 | If applicable, add screenshots to help explain your problem. 32 | 33 | ## Logs 34 | 35 | If applicable, add logs to help the engineer debug the problem. 36 | 37 | --- 38 | 39 | # Tasks 40 | 41 | _To be filled in by the engineer picking up the issue_ 42 | 43 | - [ ] Task 1 44 | - [ ] Task 2 45 | - [ ] ... 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Motivation 11 | 12 | A clear and concise description of why this feature would be useful and the value it would bring. 13 | Explain any alternatives considered and why they are not sufficient. 14 | 15 | # How would you feel if this feature request was implemented? 16 | 17 | _Share a gif from [giphy](https://giphy.com/) to tells us how you'd feel. Format: ![alt_text](https://media.giphy.com/media/xxx/giphy.gif)_ 18 | 19 | # Requirements 20 | 21 | A list of requirements to consider this feature delivered 22 | - Requirement 1 23 | - Requirement 2 24 | - ... 25 | 26 | # Tasks 27 | 28 | _To be filled in by the engineer picking up the issue_ 29 | 30 | - [ ] Task 1 31 | - [ ] Task 2 32 | - [ ] ... 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/subtask.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Sub task 3 | about: A sub task 4 | title: '' 5 | labels: subtask 6 | assignees: '' 7 | 8 | --- 9 | 10 | Required by 11 | 12 | # Description 13 | 14 | A clear and concise description of what this subtask is. 15 | 16 | # Tasks 17 | 18 | _To be filled in by the engineer picking up the subtask 19 | 20 | - [ ] Task 1 21 | - [ ] Task 2 22 | - [ ] ... 23 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Purpose 2 | 3 | * ... 4 | 5 | ## Does this introduce a breaking change? 6 | 7 | 8 | - [ ] Yes 9 | - [ ] No 10 | 11 | 25 | 26 | ## How to Test 27 | * Get the code 28 | 29 | ``` 30 | git clone [repo-address] 31 | cd [repo-name] 32 | git checkout [branch-name] 33 | npm install 34 | ``` 35 | 36 | * Test the code 37 | 38 | ``` 39 | ``` 40 | 41 | ## What to Check 42 | Verify that the following are valid 43 | * ... 44 | 45 | ## Other Information 46 | 47 | -------------------------------------------------------------------------------- /.github/workflows/bicep-audit.yml: -------------------------------------------------------------------------------- 1 | name: Validate bicep templates 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths: 7 | - "**/*.bicep" 8 | pull_request: 9 | branches: 10 | - main 11 | paths: 12 | - "**/*.bicep" 13 | workflow_dispatch: 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | permissions: 19 | security-events: write 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | 24 | - name: Run Microsoft Security DevOps Analysis 25 | uses: microsoft/security-devops-action@preview 26 | id: msdo 27 | continue-on-error: true 28 | with: 29 | tools: templateanalyzer 30 | 31 | - name: Upload alerts to Security tab 32 | uses: github/codeql-action/upload-sarif@v3 33 | if: github.repository_owner == 'Azure-Samples' 34 | with: 35 | sarif_file: ${{ steps.msdo.outputs.sarifFile }} 36 | -------------------------------------------------------------------------------- /.github/workflows/build-docker-images.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker and Optional Push 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - dev 8 | - demo 9 | pull_request: 10 | branches: 11 | - main 12 | - dev 13 | - demo 14 | types: 15 | - opened 16 | - ready_for_review 17 | - reopened 18 | - synchronize 19 | merge_group: 20 | workflow_dispatch: 21 | 22 | jobs: 23 | docker-build: 24 | strategy: 25 | matrix: 26 | include: 27 | - app_name: rag-adminwebapp 28 | dockerfile: docker/Admin.Dockerfile 29 | - app_name: rag-backend 30 | dockerfile: docker/Backend.Dockerfile 31 | - app_name: rag-webapp 32 | dockerfile: docker/Frontend.Dockerfile 33 | uses: ./.github/workflows/build-docker.yml 34 | with: 35 | old_registry: ${{ github.ref_name == 'main' && 'fruoccopublic.azurecr.io' }} 36 | new_registry: 'cwydcontainerreg.azurecr.io' 37 | old_username: ${{ github.ref_name == 'main' && 'fruoccopublic' }} 38 | new_username: 'cwydcontainerreg' 39 | app_name: ${{ matrix.app_name }} 40 | dockerfile: ${{ matrix.dockerfile }} 41 | push: ${{ github.ref_name == 'main' || github.ref_name == 'dev' || github.ref_name == 'demo'|| github.ref_name == 'dependabotchanges' }} 42 | secrets: inherit 43 | -------------------------------------------------------------------------------- /.github/workflows/comment_coverage.yml: -------------------------------------------------------------------------------- 1 | name: Comment coverage 2 | 3 | on: 4 | workflow_run: 5 | workflows: [Tests] 6 | types: 7 | - completed 8 | 9 | permissions: 10 | pull-requests: write 11 | 12 | jobs: 13 | comment: 14 | name: Comment coverage 15 | runs-on: ubuntu-latest 16 | if: > 17 | github.event.workflow_run.event == 'pull_request' && 18 | github.event.workflow_run.conclusion != 'cancelled' 19 | steps: 20 | - name: Download artifact 21 | uses: actions/download-artifact@v4 22 | with: 23 | name: coverage 24 | github-token: ${{ secrets.GITHUB_TOKEN }} 25 | run-id: ${{ github.event.workflow_run.id }} 26 | - name: Find associated pull request 27 | id: pr 28 | uses: actions/github-script@v7 29 | if: ${{ github.event.workflow_run.pull_requests[0].number == null }} 30 | with: 31 | script: | 32 | const response = await github.rest.pulls.list({ 33 | owner: "${{ github.repository_owner }}", 34 | repo: context.payload.workflow_run.repository.name, 35 | state: "open", 36 | head: `${context.payload.workflow_run.head_repository.owner.login}:${context.payload.workflow_run.head_branch}`, 37 | }); 38 | 39 | return response.data[0]?.number ?? ""; 40 | retries: 3 41 | - name: Comment coverage 42 | uses: MishaKav/pytest-coverage-comment@81882822c5b22af01f91bd3eacb1cefb6ad73dc2 43 | with: 44 | pytest-xml-coverage-path: coverage.xml 45 | junitxml-path: coverage-junit.xml 46 | issue-number: ${{ github.event.workflow_run.pull_requests[0].number || steps.pr.outputs.result }} 47 | report-only-changed-files: true 48 | -------------------------------------------------------------------------------- /.github/workflows/pr-title-checker.yml: -------------------------------------------------------------------------------- 1 | name: "PR Title Checker" 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - opened 7 | - edited 8 | - synchronize 9 | merge_group: 10 | 11 | permissions: 12 | pull-requests: read 13 | 14 | jobs: 15 | main: 16 | name: Validate PR title 17 | runs-on: ubuntu-latest 18 | if: ${{ github.event_name != 'merge_group' }} 19 | steps: 20 | - uses: amannn/action-semantic-pull-request@v5 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.2.0 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | 8 | - repo: https://github.com/pycqa/flake8 9 | rev: 7.0.0 10 | hooks: 11 | - id: flake8 12 | args: [--extend-ignore=E501] 13 | 14 | - repo: local 15 | hooks: 16 | - id: bicep 17 | name: bicep 18 | description: Lint and build Bicep files 19 | entry: ./scripts/generate_arm_templates.sh 20 | language: script 21 | files: \.bicep$ 22 | require_serial: true 23 | args: # Bicep files that we want to generate ARM templates from 24 | - -f=./infra/main.bicep 25 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "github.vscode-pull-request-github", 4 | "github.vscode-github-actions", 5 | "ms-azuretools.azure-dev", 6 | "ms-azuretools.vscode-azurefunctions", 7 | "ms-azuretools.vscode-bicep", 8 | "ms-azuretools.vscode-docker", 9 | "ms-python.python", 10 | "ms-python.black-formatter", 11 | "ms-python.vscode-pylance", 12 | "ms-python.pylint", 13 | "ms-toolsai.jupyter", 14 | "ms-vscode.vscode-node-azure-pack", 15 | "TeamsDevApp.ms-teams-vscode-extension", 16 | "zeshuaro.vscode-python-poetry" 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.deploySubpath": "code/backend/batch", 3 | "azureFunctions.projectSubpath": "code/backend/batch", 4 | "azureFunctions.scmDoBuildDuringDeployment": true, 5 | "azureFunctions.projectLanguage": "Python", 6 | "azureFunctions.projectRuntime": "~4", 7 | "debug.internalConsoleOptions": "neverOpen", 8 | "azureFunctions.projectLanguageModel": 2, 9 | "files.insertFinalNewline": true, 10 | "files.trimFinalNewlines": true, 11 | "files.trimTrailingWhitespace": true, 12 | "githubPullRequests.ignoredPullRequestBranches": [ 13 | "main" 14 | ], 15 | "python.testing.pytestArgs": [ 16 | ".", 17 | // Until we reconfigure these tests 18 | "-m", 19 | "not azure", 20 | ], 21 | "python.testing.cwd": "${workspaceFolder}/code", 22 | "python.testing.unittestEnabled": false, 23 | "python.testing.pytestEnabled": true, 24 | "pylint.cwd": "${workspaceFolder}/code", 25 | } 26 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "func", 6 | "label": "func: host start", 7 | "command": "host start", 8 | "problemMatcher": "$func-python-watch", 9 | "isBackground": true, 10 | "dependsOn": "poetry install", 11 | "options": { 12 | "cwd": "${workspaceFolder}/code/backend/batch" 13 | } 14 | }, 15 | { 16 | "label": "poetry install", 17 | "type": "shell", 18 | "command": "poetry install", 19 | "problemMatcher": [], 20 | "options": { 21 | "cwd": "${workspaceFolder}" 22 | } 23 | }, 24 | { 25 | "label": "npm install", 26 | "type": "shell", 27 | "command": "npm install", 28 | "problemMatcher": [], 29 | "options": { 30 | "cwd": "${workspaceFolder}/code/frontend" 31 | } 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting support 2 | 3 | This document explains how to get support if you're facing any issues with the Chat with your Data Solution Accelerator. Please read through the following guidelines. 4 | 5 | 6 | ## Raise an issue 7 | If you've found a bug in Chat with your data, please [raise an issue](CONTRIBUTING.md#submit-issue) with us to get support. 8 | 9 | ## Troubleshoot a problem 10 | You could also troubleshoot the issue yourself by accessing the Azure Workbook that gets deployed when you deploy Chat With Your Data to your Azure subscription. The Workbook contains logs and metrics from various components of the application (e.g. latency, request failure count, tokens processed by OpenAI service etc). 11 | 12 | To access the workbook, go to your resource group on the Azure portal and click on the Azure Workbook resource. 13 | For more details on how to access your Azure Workbook, please refer to this [guide](https://learn.microsoft.com/en-us/azure/azure-monitor/visualize/workbooks-overview#accessing-azure-workbooks). 14 | -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json 2 | 3 | name: chat-with-your-data-solution-accelerator 4 | metadata: 5 | template: chat-with-your-data-solution-accelerator@1.7.0 6 | hooks: 7 | postprovision: 8 | # run: ./infra/prompt-flow/create-prompt-flow.sh 9 | posix: 10 | shell: sh 11 | run: chmod +x ./scripts/parse_env.sh && ./scripts/parse_env.sh 12 | windows: 13 | shell: pwsh 14 | run: ./scripts/parse_env.ps1 15 | services: 16 | web: 17 | project: ./code 18 | language: py 19 | host: appservice 20 | dist: ./dist 21 | hooks: 22 | prepackage: 23 | windows: 24 | shell: pwsh 25 | run: ../scripts/package_frontend.ps1 26 | interactive: true 27 | continueOnError: false 28 | posix: 29 | shell: sh 30 | run: ../scripts/package_frontend.sh 31 | interactive: true 32 | continueOnError: false 33 | 34 | adminweb: 35 | project: ./code/backend 36 | language: py 37 | host: appservice 38 | hooks: 39 | prepackage: 40 | windows: 41 | shell: pwsh 42 | run: poetry install; poetry export -o requirements.txt 43 | posix: 44 | shell: sh 45 | run: poetry install; poetry export -o requirements.txt 46 | 47 | function: 48 | project: ./code/backend/batch 49 | language: py 50 | host: function 51 | hooks: 52 | prepackage: 53 | windows: 54 | shell: pwsh 55 | run: poetry export -o requirements.txt; pip install -r requirements.txt 56 | posix: 57 | shell: sh 58 | run: poetry export -o requirements.txt; pip install -r requirements.txt 59 | -------------------------------------------------------------------------------- /code/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the entry point for the application. 3 | """ 4 | 5 | import os 6 | import logging 7 | from azure.monitor.opentelemetry import configure_azure_monitor 8 | from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor 9 | 10 | logging.captureWarnings(True) 11 | logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper()) 12 | # Raising the azure log level to WARN as it is too verbose - 13 | # https://github.com/Azure/azure-sdk-for-python/issues/9422 14 | logging.getLogger("azure").setLevel(os.environ.get("LOGLEVEL_AZURE", "WARN").upper()) 15 | # We cannot use EnvHelper here as Application Insights should be configured first 16 | # for instrumentation to work correctly 17 | if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": 18 | configure_azure_monitor() 19 | HTTPXClientInstrumentor().instrument() # httpx is used by openai 20 | 21 | # pylint: disable=wrong-import-position 22 | from create_app import create_app # noqa: E402 23 | 24 | app = create_app() 25 | 26 | if __name__ == "__main__": 27 | app.run() 28 | -------------------------------------------------------------------------------- /code/backend/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/api/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/.funcignore: -------------------------------------------------------------------------------- 1 | .git* 2 | .vscode 3 | __azurite_db*__.json 4 | __blobstorage__ 5 | __queuestorage__ 6 | local.settings.json 7 | test 8 | .venv -------------------------------------------------------------------------------- /code/backend/batch/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-azuretools.vscode-azurefunctions" 4 | ] 5 | } -------------------------------------------------------------------------------- /code/backend/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/batch/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/batch_start_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | import azure.functions as func 5 | from utilities.helpers.embedders.integrated_vectorization_embedder import ( 6 | IntegratedVectorizationEmbedder, 7 | ) 8 | from utilities.helpers.env_helper import EnvHelper 9 | from utilities.helpers.azure_blob_storage_client import ( 10 | AzureBlobStorageClient, 11 | create_queue_client, 12 | ) 13 | 14 | bp_batch_start_processing = func.Blueprint() 15 | logger = logging.getLogger(__name__) 16 | logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) 17 | 18 | 19 | @bp_batch_start_processing.route(route="BatchStartProcessing") 20 | def batch_start_processing(req: func.HttpRequest) -> func.HttpResponse: 21 | logger.info("Requested to start processing all documents received") 22 | env_helper: EnvHelper = EnvHelper() 23 | # Set up Blob Storage Client 24 | azure_blob_storage_client = AzureBlobStorageClient() 25 | # Get all files from Blob Storage 26 | files_data = azure_blob_storage_client.get_all_files() 27 | 28 | files_data = list(map(lambda x: {"filename": x["filename"]}, files_data)) 29 | 30 | if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: 31 | reprocess_integrated_vectorization(env_helper) 32 | else: 33 | # Send a message to the queue for each file 34 | queue_client = create_queue_client() 35 | for fd in files_data: 36 | queue_client.send_message(json.dumps(fd).encode("utf-8")) 37 | 38 | return func.HttpResponse( 39 | f"Conversion started successfully for {len(files_data)} documents.", 40 | status_code=200, 41 | ) 42 | 43 | 44 | def reprocess_integrated_vectorization(env_helper: EnvHelper): 45 | indexer_embedder = IntegratedVectorizationEmbedder(env_helper) 46 | indexer_embedder.reprocess_all() 47 | -------------------------------------------------------------------------------- /code/backend/batch/function_app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import azure.functions as func 4 | from add_url_embeddings import bp_add_url_embeddings 5 | from batch_push_results import bp_batch_push_results 6 | from batch_start_processing import bp_batch_start_processing 7 | from get_conversation_response import bp_get_conversation_response 8 | from azure.monitor.opentelemetry import configure_azure_monitor 9 | 10 | logging.captureWarnings(True) 11 | # Raising the azure log level to WARN as it is too verbose - https://github.com/Azure/azure-sdk-for-python/issues/9422 12 | logging.getLogger("azure").setLevel(os.environ.get("LOGLEVEL_AZURE", "WARN").upper()) 13 | if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": 14 | configure_azure_monitor() 15 | 16 | app = func.FunctionApp( 17 | http_auth_level=func.AuthLevel.FUNCTION 18 | ) # change to ANONYMOUS for local debugging 19 | app.register_functions(bp_add_url_embeddings) 20 | app.register_functions(bp_batch_push_results) 21 | app.register_functions(bp_batch_start_processing) 22 | app.register_functions(bp_get_conversation_response) 23 | -------------------------------------------------------------------------------- /code/backend/batch/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[3.*, 4.0.0)" 14 | } 15 | } -------------------------------------------------------------------------------- /code/backend/batch/local.settings.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "IsEncrypted": false, 3 | "Values": { 4 | "FUNCTIONS_WORKER_RUNTIME": "python", 5 | "AzureWebJobsStorage": "", 6 | "MyBindingConnection": "", 7 | "AzureWebJobs.HttpExample.Disabled": "true" 8 | }, 9 | "Host": { 10 | "LocalHttpPort": 7071, 11 | "CORS": "*", 12 | "CORSCredentials": false 13 | } 14 | } -------------------------------------------------------------------------------- /code/backend/batch/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/batch/utilities/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/utilities/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/batch/utilities/common/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import os.path 4 | import pkgutil 5 | 6 | 7 | # Get a list of all the classes defined in the module 8 | def get_all_classes() -> List[str]: 9 | return [name for _, name, _ in pkgutil.iter_modules([os.path.dirname(__file__)])] 10 | 11 | 12 | __all__ = get_all_classes() 13 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/chunking_strategy.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ChunkingStrategy(Enum): 5 | LAYOUT = "layout" 6 | PAGE = "page" 7 | FIXED_SIZE_OVERLAP = "fixed_size_overlap" 8 | PARAGRAPH = "paragraph" 9 | JSON = "json" 10 | 11 | 12 | class ChunkingSettings: 13 | def __init__(self, chunking: dict): 14 | self.chunking_strategy = ChunkingStrategy(chunking["strategy"]) 15 | self.chunk_size = chunking["size"] 16 | self.chunk_overlap = chunking["overlap"] 17 | 18 | def __eq__(self, other: object) -> bool: 19 | if isinstance(self, other.__class__): 20 | return ( 21 | self.chunking_strategy == other.chunking_strategy 22 | and self.chunk_size == other.chunk_size 23 | and self.chunk_overlap == other.chunk_overlap 24 | ) 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/document_chunking_base.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for document loading 2 | from typing import List 3 | from abc import ABC, abstractmethod 4 | from ..common.source_document import SourceDocument 5 | from .chunking_strategy import ChunkingSettings 6 | 7 | 8 | class DocumentChunkingBase(ABC): 9 | def __init__(self) -> None: 10 | pass 11 | 12 | @abstractmethod 13 | def chunk( 14 | self, documents: List[SourceDocument], chunking: ChunkingSettings 15 | ) -> List[SourceDocument]: 16 | pass 17 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/fixed_size_overlap.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_chunking_base import DocumentChunkingBase 3 | from langchain.text_splitter import TokenTextSplitter 4 | from .chunking_strategy import ChunkingSettings 5 | from ..common.source_document import SourceDocument 6 | 7 | 8 | class FixedSizeOverlapDocumentChunking(DocumentChunkingBase): 9 | def __init__(self) -> None: 10 | pass 11 | 12 | def chunk( 13 | self, documents: List[SourceDocument], chunking: ChunkingSettings 14 | ) -> List[SourceDocument]: 15 | full_document_content = "".join( 16 | list(map(lambda document: document.content, documents)) 17 | ) 18 | document_url = documents[0].source 19 | splitter = TokenTextSplitter.from_tiktoken_encoder( 20 | chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap 21 | ) 22 | chunked_content_list = splitter.split_text(full_document_content) 23 | # Create document for each chunk 24 | documents = [] 25 | chunk_offset = 0 26 | for idx, chunked_content in enumerate(chunked_content_list): 27 | documents.append( 28 | SourceDocument.from_metadata( 29 | content=chunked_content, 30 | document_url=document_url, 31 | metadata={"offset": chunk_offset}, 32 | idx=idx, 33 | ) 34 | ) 35 | chunk_offset += len(chunked_content) 36 | return documents 37 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List 3 | from .document_chunking_base import DocumentChunkingBase 4 | from langchain.text_splitter import RecursiveJsonSplitter 5 | from .chunking_strategy import ChunkingSettings 6 | from ..common.source_document import SourceDocument 7 | 8 | 9 | class JSONDocumentChunking(DocumentChunkingBase): 10 | def __init__(self) -> None: 11 | pass 12 | 13 | def chunk( 14 | self, documents: List[SourceDocument], chunking: ChunkingSettings 15 | ) -> List[SourceDocument]: 16 | full_document_content = "".join( 17 | list(map(lambda document: str(document.content), documents)) 18 | ) 19 | document_url = documents[0].source 20 | json_data = json.loads(full_document_content) 21 | splitter = RecursiveJsonSplitter(max_chunk_size=chunking.chunk_size) 22 | chunked_content_list = splitter.split_json(json_data) 23 | # Create document for each chunk 24 | documents = [] 25 | chunk_offset = 0 26 | for idx, chunked_content in enumerate(chunked_content_list): 27 | documents.append( 28 | SourceDocument.from_metadata( 29 | content=str(chunked_content), 30 | document_url=document_url, 31 | metadata={"offset": chunk_offset}, 32 | idx=idx, 33 | ) 34 | ) 35 | 36 | chunk_offset += len(chunked_content) 37 | return documents 38 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/layout.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_chunking_base import DocumentChunkingBase 3 | from langchain.text_splitter import MarkdownTextSplitter 4 | from .chunking_strategy import ChunkingSettings 5 | from ..common.source_document import SourceDocument 6 | 7 | 8 | class LayoutDocumentChunking(DocumentChunkingBase): 9 | def __init__(self) -> None: 10 | pass 11 | 12 | def chunk( 13 | self, documents: List[SourceDocument], chunking: ChunkingSettings 14 | ) -> List[SourceDocument]: 15 | full_document_content = "".join( 16 | list(map(lambda document: document.content, documents)) 17 | ) 18 | document_url = documents[0].source 19 | splitter = MarkdownTextSplitter.from_tiktoken_encoder( 20 | chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap 21 | ) 22 | chunked_content_list = splitter.split_text(full_document_content) 23 | # Create document for each chunk 24 | documents = [] 25 | chunk_offset = 0 26 | for idx, chunked_content in enumerate(chunked_content_list): 27 | documents.append( 28 | SourceDocument.from_metadata( 29 | content=chunked_content, 30 | document_url=document_url, 31 | metadata={"offset": chunk_offset}, 32 | idx=idx, 33 | ) 34 | ) 35 | 36 | chunk_offset += len(chunked_content) 37 | return documents 38 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/page.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_chunking_base import DocumentChunkingBase 3 | from langchain.text_splitter import MarkdownTextSplitter 4 | from .chunking_strategy import ChunkingSettings 5 | from ..common.source_document import SourceDocument 6 | 7 | 8 | class PageDocumentChunking(DocumentChunkingBase): 9 | def __init__(self) -> None: 10 | pass 11 | 12 | def chunk( 13 | self, documents: List[SourceDocument], chunking: ChunkingSettings 14 | ) -> List[SourceDocument]: 15 | document_url = documents[0].source 16 | splitter = MarkdownTextSplitter.from_tiktoken_encoder( 17 | chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap 18 | ) 19 | documents_chunked = [] 20 | for idx, document in enumerate(documents): 21 | chunked_content_list = splitter.split_text(document.content) 22 | for chunked_content in chunked_content_list: 23 | documents_chunked.append( 24 | SourceDocument.from_metadata( 25 | content=chunked_content, 26 | document_url=document_url, 27 | metadata={ 28 | "offset": document.offset, 29 | "page_number": document.page_number, 30 | }, 31 | idx=idx, 32 | ) 33 | ) 34 | return documents_chunked 35 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/paragraph.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_chunking_base import DocumentChunkingBase 3 | from .chunking_strategy import ChunkingSettings 4 | from ..common.source_document import SourceDocument 5 | 6 | 7 | class ParagraphDocumentChunking(DocumentChunkingBase): 8 | def __init__(self) -> None: 9 | pass 10 | 11 | # TO DO: Implement the following chunking strategies 12 | def chunk( 13 | self, documents: List[SourceDocument], chunking: ChunkingSettings 14 | ) -> List[SourceDocument]: 15 | raise NotImplementedError("Paragraph chunking is not implemented yet") 16 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_chunking/strategies.py: -------------------------------------------------------------------------------- 1 | from .chunking_strategy import ChunkingStrategy 2 | from .layout import LayoutDocumentChunking 3 | from .page import PageDocumentChunking 4 | from .fixed_size_overlap import FixedSizeOverlapDocumentChunking 5 | from .paragraph import ParagraphDocumentChunking 6 | from .json import JSONDocumentChunking 7 | 8 | 9 | def get_document_chunker(chunking_strategy: str): 10 | if chunking_strategy == ChunkingStrategy.LAYOUT.value: 11 | return LayoutDocumentChunking() 12 | elif chunking_strategy == ChunkingStrategy.PAGE.value: 13 | return PageDocumentChunking() 14 | elif chunking_strategy == ChunkingStrategy.FIXED_SIZE_OVERLAP.value: 15 | return FixedSizeOverlapDocumentChunking() 16 | elif chunking_strategy == ChunkingStrategy.PARAGRAPH.value: 17 | return ParagraphDocumentChunking() 18 | elif chunking_strategy == ChunkingStrategy.JSON.value: 19 | return JSONDocumentChunking() 20 | else: 21 | raise Exception(f"Unknown chunking strategy: {chunking_strategy}") 22 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import os.path 4 | import pkgutil 5 | from .strategies import LoadingStrategy 6 | 7 | 8 | class LoadingSettings: 9 | def __init__(self, loading): 10 | self.loading_strategy = LoadingStrategy(loading["strategy"]) 11 | 12 | def __eq__(self, other: object) -> bool: 13 | if isinstance(self, other.__class__): 14 | return self.loading_strategy == other.loading_strategy 15 | else: 16 | return False 17 | 18 | 19 | # Get a list of all the classes defined in the module 20 | def get_all_classes() -> List[str]: 21 | return [name for _, name, _ in pkgutil.iter_modules([os.path.dirname(__file__)])] 22 | 23 | 24 | __all__ = get_all_classes() 25 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/document_loading_base.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for document loading 2 | from typing import List 3 | from abc import ABC, abstractmethod 4 | from ..common.source_document import SourceDocument 5 | 6 | 7 | class DocumentLoadingBase(ABC): 8 | def __init__(self) -> None: 9 | pass 10 | 11 | @abstractmethod 12 | def load(self, document_url: str) -> List[SourceDocument]: 13 | pass 14 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/layout.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_loading_base import DocumentLoadingBase 3 | from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient 4 | from ..common.source_document import SourceDocument 5 | 6 | 7 | class LayoutDocumentLoading(DocumentLoadingBase): 8 | def __init__(self) -> None: 9 | super().__init__() 10 | 11 | def load(self, document_url: str) -> List[SourceDocument]: 12 | azure_form_recognizer_client = AzureFormRecognizerClient() 13 | pages_content = azure_form_recognizer_client.begin_analyze_document_from_url( 14 | document_url, use_layout=True 15 | ) 16 | documents = [ 17 | SourceDocument( 18 | content=page["page_text"], 19 | source=document_url, 20 | offset=page["offset"], 21 | page_number=page["page_number"], 22 | ) 23 | for page in pages_content 24 | ] 25 | return documents 26 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/read.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .document_loading_base import DocumentLoadingBase 3 | from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient 4 | from ..common.source_document import SourceDocument 5 | 6 | 7 | class ReadDocumentLoading(DocumentLoadingBase): 8 | def __init__(self) -> None: 9 | super().__init__() 10 | 11 | def load(self, document_url: str) -> List[SourceDocument]: 12 | azure_form_recognizer_client = AzureFormRecognizerClient() 13 | pages_content = azure_form_recognizer_client.begin_analyze_document_from_url( 14 | document_url, use_layout=False 15 | ) 16 | documents = [ 17 | SourceDocument( 18 | content=page["page_text"], 19 | source=document_url, 20 | page_number=page["page_number"], 21 | offset=page["offset"], 22 | ) 23 | for page in pages_content 24 | ] 25 | return documents 26 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/strategies.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from .layout import LayoutDocumentLoading 3 | from .read import ReadDocumentLoading 4 | from .web import WebDocumentLoading 5 | from .word_document import WordDocumentLoading 6 | 7 | 8 | class LoadingStrategy(Enum): 9 | LAYOUT = "layout" 10 | READ = "read" 11 | WEB = "web" 12 | DOCX = "docx" 13 | 14 | 15 | def get_document_loader(loader_strategy: str): 16 | if loader_strategy == LoadingStrategy.LAYOUT.value: 17 | return LayoutDocumentLoading() 18 | elif loader_strategy == LoadingStrategy.READ.value: 19 | return ReadDocumentLoading() 20 | elif loader_strategy == LoadingStrategy.WEB.value: 21 | return WebDocumentLoading() 22 | elif loader_strategy == LoadingStrategy.DOCX.value: 23 | return WordDocumentLoading() 24 | else: 25 | raise Exception(f"Unknown loader strategy: {loader_strategy}") 26 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/web.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import re 3 | from langchain_community.document_loaders import WebBaseLoader 4 | from .document_loading_base import DocumentLoadingBase 5 | from ..common.source_document import SourceDocument 6 | 7 | 8 | class WebDocumentLoading(DocumentLoadingBase): 9 | def __init__(self) -> None: 10 | super().__init__() 11 | 12 | def load(self, document_url: str) -> List[SourceDocument]: 13 | documents = WebBaseLoader(document_url).load() 14 | for document in documents: 15 | document.page_content = re.sub("\n{3,}", "\n\n", document.page_content) 16 | # Remove half non-ascii character from start/end of doc content 17 | pattern = re.compile( 18 | r"[\x00-\x1f\x7f\u0080-\u00a0\u2000-\u3000\ufff0-\uffff]" 19 | ) 20 | document.page_content = re.sub(pattern, "", document.page_content) 21 | if document.page_content == "": 22 | documents.remove(document) 23 | source_documents: List[SourceDocument] = [ 24 | SourceDocument( 25 | content=document.page_content, 26 | source=document.metadata["source"], 27 | ) 28 | for document in documents 29 | ] 30 | return source_documents 31 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/document_loading/word_document.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from io import BytesIO 3 | from docx import Document 4 | import requests 5 | from .document_loading_base import DocumentLoadingBase 6 | from ..common.source_document import SourceDocument 7 | 8 | 9 | class WordDocumentLoading(DocumentLoadingBase): 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self.doc_headings_to_markdown_tags = { 13 | "Heading 1": "h1", 14 | "Heading 2": "h2", 15 | "Heading 3": "h3", 16 | "Heading 4": "h4", 17 | "Heading 5": "h5", 18 | "Heading 6": "h6", 19 | } 20 | 21 | def _download_document(self, document_url: str) -> BytesIO: 22 | response = requests.get(document_url) 23 | file = BytesIO(response.content) 24 | return file 25 | 26 | def _get_opening_tag(self, heading_level: int) -> str: 27 | return f"<{self.doc_headings_to_markdown_tags.get(f'{heading_level}', 'p')}>" 28 | 29 | def _get_closing_tag(self, heading_level: int) -> str: 30 | return f"" 31 | 32 | def load(self, document_url: str) -> List[SourceDocument]: 33 | output = "" 34 | document = Document(self._download_document(document_url)) 35 | for paragraph in document.paragraphs: 36 | output += f"{self._get_opening_tag(paragraph.style.name)}{paragraph.text}{self._get_closing_tag(paragraph.style.name)}\n" 37 | documents = [ 38 | SourceDocument( 39 | content=output, 40 | source=document_url, 41 | offset=0, 42 | page_number=0, 43 | ) 44 | ] 45 | return documents 46 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/batch/utilities/helpers/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/config/assistant_strategy.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AssistantStrategy(Enum): 5 | DEFAULT = "default" 6 | CONTRACT_ASSISTANT = "contract assistant" 7 | EMPLOYEE_ASSISTANT = "employee assistant" 8 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/config/conversation_flow.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ConversationFlow(Enum): 5 | CUSTOM = "custom" 6 | BYOD = "byod" 7 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/config/database_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class DatabaseType(Enum): 5 | COSMOSDB = "CosmosDB" 6 | POSTGRESQL = "PostgreSQL" 7 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/config/embedding_config.py: -------------------------------------------------------------------------------- 1 | from ..document_loading_helper import LoadingSettings 2 | from ..document_chunking_helper import ChunkingSettings 3 | 4 | 5 | class EmbeddingConfig(ChunkingSettings, LoadingSettings): 6 | def __init__( 7 | self, 8 | document_type: str, 9 | chunking: ChunkingSettings | None, 10 | loading: LoadingSettings | None, 11 | use_advanced_image_processing: bool, 12 | ): 13 | self.document_type = document_type 14 | self.chunking = chunking 15 | self.loading = loading 16 | self.use_advanced_image_processing = use_advanced_image_processing 17 | 18 | def __eq__(self, other): 19 | if isinstance(self, other.__class__): 20 | return ( 21 | self.document_type == other.document_type 22 | and self.chunking == other.chunking 23 | and self.loading == other.loading 24 | and self.use_advanced_image_processing 25 | == other.use_advanced_image_processing 26 | ) 27 | return False 28 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/document_chunking_helper.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..common.source_document import SourceDocument 4 | from ..document_chunking.chunking_strategy import ChunkingSettings, ChunkingStrategy 5 | from ..document_chunking.strategies import get_document_chunker 6 | 7 | __all__ = ["ChunkingStrategy"] 8 | 9 | 10 | class DocumentChunking: 11 | def __init__(self) -> None: 12 | pass 13 | 14 | def chunk( 15 | self, documents: List[SourceDocument], chunking: ChunkingSettings 16 | ) -> List[SourceDocument]: 17 | chunker = get_document_chunker(chunking.chunking_strategy.value) 18 | if chunker is None: 19 | raise Exception( 20 | f"Unknown chunking strategy: {chunking.chunking_strategy.value}" 21 | ) 22 | return chunker.chunk(documents, chunking) 23 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/document_loading_helper.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..common.source_document import SourceDocument 4 | from ..document_loading import LoadingSettings 5 | from ..document_loading.strategies import get_document_loader 6 | 7 | 8 | class DocumentLoading: 9 | def __init__(self) -> None: 10 | pass 11 | 12 | def load(self, document_url: str, loading: LoadingSettings) -> List[SourceDocument]: 13 | loader = get_document_loader(loading.loading_strategy.value) 14 | if loader is None: 15 | raise Exception( 16 | f"Unknown loader strategy: {loading.loading_strategy.value}" 17 | ) 18 | return loader.load(document_url) 19 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/embedders/embedder_base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class EmbedderBase(ABC): 5 | @abstractmethod 6 | def embed_file(self, source_url: str, file_name: str = None): 7 | pass 8 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/embedders/embedder_factory.py: -------------------------------------------------------------------------------- 1 | from ..env_helper import EnvHelper 2 | from ..config.database_type import DatabaseType 3 | from ..azure_blob_storage_client import AzureBlobStorageClient 4 | from .push_embedder import PushEmbedder 5 | from .postgres_embedder import PostgresEmbedder 6 | from .integrated_vectorization_embedder import ( 7 | IntegratedVectorizationEmbedder, 8 | ) 9 | 10 | 11 | class EmbedderFactory: 12 | @staticmethod 13 | def create(env_helper: EnvHelper): 14 | if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value: 15 | return PostgresEmbedder(AzureBlobStorageClient(), env_helper) 16 | else: 17 | if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: 18 | return IntegratedVectorizationEmbedder(env_helper) 19 | else: 20 | return PushEmbedder(AzureBlobStorageClient(), env_helper) 21 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/helpers/orchestrator_helper.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..orchestrator.orchestration_strategy import OrchestrationStrategy 4 | from ..orchestrator import OrchestrationSettings 5 | from ..orchestrator.strategies import get_orchestrator 6 | 7 | __all__ = ["OrchestrationStrategy"] 8 | 9 | 10 | class Orchestrator: 11 | def __init__(self) -> None: 12 | pass 13 | 14 | async def handle_message( 15 | self, 16 | user_message: str, 17 | chat_history: List[dict], 18 | conversation_id: str, 19 | orchestrator: OrchestrationSettings, 20 | **kwargs: dict, 21 | ) -> dict: 22 | orchestrator = get_orchestrator(orchestrator.strategy.value) 23 | if orchestrator is None: 24 | raise Exception( 25 | f"Unknown orchestration strategy: {orchestrator.strategy.value}" 26 | ) 27 | return await orchestrator.handle_message( 28 | user_message, chat_history, conversation_id 29 | ) 30 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/orchestrator/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import os.path 4 | import pkgutil 5 | from .orchestration_strategy import OrchestrationStrategy 6 | 7 | 8 | class OrchestrationSettings: 9 | def __init__(self, orchestration: dict): 10 | self.strategy = OrchestrationStrategy(orchestration["strategy"]) 11 | 12 | 13 | # Get a list of all the classes defined in the module 14 | def get_all_classes() -> List[str]: 15 | return [name for _, name, _ in pkgutil.iter_modules([os.path.dirname(__file__)])] 16 | 17 | 18 | __all__ = get_all_classes() 19 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/orchestrator/orchestration_strategy.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class OrchestrationStrategy(Enum): 5 | OPENAI_FUNCTION = "openai_function" 6 | LANGCHAIN = "langchain" 7 | SEMANTIC_KERNEL = "semantic_kernel" 8 | PROMPT_FLOW = "prompt_flow" 9 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/orchestrator/strategies.py: -------------------------------------------------------------------------------- 1 | from .orchestration_strategy import OrchestrationStrategy 2 | from .open_ai_functions import OpenAIFunctionsOrchestrator 3 | from .lang_chain_agent import LangChainAgent 4 | from .semantic_kernel import SemanticKernelOrchestrator 5 | from .prompt_flow import PromptFlowOrchestrator 6 | 7 | 8 | def get_orchestrator(orchestration_strategy: str): 9 | if orchestration_strategy == OrchestrationStrategy.OPENAI_FUNCTION.value: 10 | return OpenAIFunctionsOrchestrator() 11 | elif orchestration_strategy == OrchestrationStrategy.LANGCHAIN.value: 12 | return LangChainAgent() 13 | elif orchestration_strategy == OrchestrationStrategy.SEMANTIC_KERNEL.value: 14 | return SemanticKernelOrchestrator() 15 | elif orchestration_strategy == OrchestrationStrategy.PROMPT_FLOW.value: 16 | return PromptFlowOrchestrator() 17 | else: 18 | raise Exception(f"Unknown orchestration strategy: {orchestration_strategy}") 19 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/parser/__init__.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for parser 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class ParserBase(ABC): 6 | def __init__(self) -> None: 7 | pass 8 | 9 | @abstractmethod 10 | def parse(self, input: dict, **kwargs: dict) -> dict: 11 | pass 12 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/parser/parser_base.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for parser 2 | from abc import ABC, abstractmethod 3 | from typing import List 4 | from ..common.source_document import SourceDocument 5 | 6 | 7 | class ParserBase(ABC): 8 | def __init__(self) -> None: 9 | pass 10 | 11 | @abstractmethod 12 | def parse( 13 | self, 14 | question: str, 15 | answer: str, 16 | source_documents: List[SourceDocument], 17 | **kwargs: dict 18 | ) -> List[dict]: 19 | pass 20 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/plugins/chat_plugin.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | from semantic_kernel.functions import kernel_function 4 | 5 | from ..common.answer import Answer 6 | from ..tools.question_answer_tool import QuestionAnswerTool 7 | from ..tools.text_processing_tool import TextProcessingTool 8 | 9 | 10 | class ChatPlugin: 11 | def __init__(self, question: str, chat_history: list[dict]) -> None: 12 | self.question = question 13 | self.chat_history = chat_history 14 | 15 | @kernel_function( 16 | description="Provide answers to any fact question coming from users." 17 | ) 18 | def search_documents( 19 | self, 20 | question: Annotated[ 21 | str, "A standalone question, converted from the chat history" 22 | ], 23 | ) -> Answer: 24 | return QuestionAnswerTool().answer_question( 25 | question=question, chat_history=self.chat_history 26 | ) 27 | 28 | @kernel_function( 29 | description="Useful when you want to apply a transformation on the text, like translate, summarize, rephrase and so on." 30 | ) 31 | def text_processing( 32 | self, 33 | text: Annotated[str, "The text to be processed"], 34 | operation: Annotated[ 35 | str, 36 | "The operation to be performed on the text. Like Translate to Italian, Summarize, Paraphrase, etc. If a language is specified, return that as part of the operation. Preserve the operation name in the user language.", 37 | ], 38 | ) -> Answer: 39 | return TextProcessingTool().answer_question( 40 | question=self.question, 41 | chat_history=self.chat_history, 42 | text=text, 43 | operation=operation, 44 | ) 45 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/plugins/post_answering_plugin.py: -------------------------------------------------------------------------------- 1 | from semantic_kernel.functions import kernel_function 2 | from semantic_kernel.functions.kernel_arguments import KernelArguments 3 | 4 | from ..common.answer import Answer 5 | from ..tools.post_prompt_tool import PostPromptTool 6 | 7 | 8 | class PostAnsweringPlugin: 9 | @kernel_function(description="Run post answering prompt to validate the answer.") 10 | def validate_answer(self, arguments: KernelArguments) -> Answer: 11 | return PostPromptTool().validate_answer(arguments["answer"]) 12 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/search/search.py: -------------------------------------------------------------------------------- 1 | from ..search.postgres_search_handler import AzurePostgresHandler 2 | from ..helpers.config.database_type import DatabaseType 3 | from ..search.azure_search_handler import AzureSearchHandler 4 | from ..search.integrated_vectorization_search_handler import ( 5 | IntegratedVectorizationSearchHandler, 6 | ) 7 | from ..search.search_handler_base import SearchHandlerBase 8 | from ..common.source_document import SourceDocument 9 | from ..helpers.env_helper import EnvHelper 10 | 11 | 12 | class Search: 13 | @staticmethod 14 | def get_search_handler(env_helper: EnvHelper) -> SearchHandlerBase: 15 | # TODO Since the full workflow for PostgreSQL indexing is not yet complete, you can comment out env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value. 16 | if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value: 17 | return AzurePostgresHandler(env_helper) 18 | else: 19 | if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: 20 | return IntegratedVectorizationSearchHandler(env_helper) 21 | else: 22 | return AzureSearchHandler(env_helper) 23 | 24 | @staticmethod 25 | def get_source_documents( 26 | search_handler: SearchHandlerBase, question: str 27 | ) -> list[SourceDocument]: 28 | return search_handler.query_search(question) 29 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/batch/utilities/tools/__init__.py -------------------------------------------------------------------------------- /code/backend/batch/utilities/tools/answer_processing_base.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for tool 2 | from abc import ABC, abstractmethod 3 | from ..common.answer import Answer 4 | 5 | 6 | class AnswerProcessingBase(ABC): 7 | def __init__(self) -> None: 8 | pass 9 | 10 | @abstractmethod 11 | def process_answer(self, answer: Answer, **kwargs: dict) -> Answer: 12 | pass 13 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/tools/answering_tool_base.py: -------------------------------------------------------------------------------- 1 | # Create an abstract class for tool 2 | from abc import ABC, abstractmethod 3 | from typing import List 4 | from ..common.answer import Answer 5 | 6 | 7 | class AnsweringToolBase(ABC): 8 | def __init__(self) -> None: 9 | pass 10 | 11 | @abstractmethod 12 | def answer_question( 13 | self, question: str, chat_history: List[dict], **kwargs 14 | ) -> Answer: 15 | pass 16 | -------------------------------------------------------------------------------- /code/backend/batch/utilities/tools/text_processing_tool.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from ..helpers.llm_helper import LLMHelper 3 | from .answering_tool_base import AnsweringToolBase 4 | from ..common.answer import Answer 5 | 6 | 7 | class TextProcessingTool(AnsweringToolBase): 8 | def __init__(self) -> None: 9 | self.name = "TextProcessing" 10 | 11 | def answer_question(self, question: str, chat_history: List[dict] = [], **kwargs): 12 | llm_helper = LLMHelper() 13 | text = kwargs.get("text") 14 | operation = kwargs.get("operation") 15 | user_content = ( 16 | f"{operation} the following TEXT: {text}" 17 | if (text and operation) 18 | else question 19 | ) 20 | 21 | system_message = """You are an AI assistant for the user.""" 22 | 23 | result = llm_helper.get_chat_completion( 24 | [ 25 | {"role": "system", "content": system_message}, 26 | {"role": "user", "content": user_content}, 27 | ] 28 | ) 29 | 30 | answer = Answer( 31 | question=question, 32 | answer=result.choices[0].message.content, 33 | source_documents=[], 34 | prompt_tokens=result.usage.prompt_tokens, 35 | completion_tokens=result.usage.completion_tokens, 36 | ) 37 | return answer 38 | -------------------------------------------------------------------------------- /code/backend/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/images/favicon.ico -------------------------------------------------------------------------------- /code/backend/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/backend/images/logo.png -------------------------------------------------------------------------------- /code/backend/pages/common.css: -------------------------------------------------------------------------------- 1 | #MainMenu {visibility: hidden;} 2 | footer {visibility: hidden;} 3 | header {visibility: hidden;} 4 | [data-testid="baseButton-secondary"] svg{display: none;} 5 | [data-testid="stPopoverBody"] button{float: right;} 6 | [data-testid="stSidebar"] {z-index: 999;} 7 | [data-testid="stPopoverBody"] .stTooltipIcon [data-testid="baseButton-secondary"] p{ 8 | font-weight: bold; 9 | } 10 | [data-testid="stPopoverBody"] .stTooltipIcon { 11 | position: absolute; 12 | right: 0; 13 | margin-top: 0.3rem; 14 | } 15 | [data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]{ 16 | width: 520px; 17 | } 18 | [data-testid="stPopoverBody"] .stTooltipIcon [data-testid="baseButton-secondary"]{ 19 | border: none; 20 | z-index: 1; 21 | } 22 | 23 | .stTextArea{width: 100% !important;} 24 | @media screen and (-ms-high-contrast: active), (forced-colors: active) { 25 | section, .st-emotion-cache-ch5dnh{ 26 | border: 2px solid WindowText;padding: 10px; 27 | background-color: Window; 28 | color: WindowText; 29 | } 30 | } 31 | @media screen and (max-width: 1280px) { 32 | .st-emotion-cache-1kyxreq{ 33 | max-width: 200px !important; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /code/frontend/__mocks__/fileMock.js: -------------------------------------------------------------------------------- 1 | module.exports = 'test-file-stub'; 2 | -------------------------------------------------------------------------------- /code/frontend/__mocks__/styleMock.js: -------------------------------------------------------------------------------- 1 | module.exports = {}; 2 | -------------------------------------------------------------------------------- /code/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Chat with your data 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /code/frontend/jest.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from '@jest/types' 2 | 3 | const config: Config.InitialOptions = { 4 | preset: 'ts-jest', 5 | testEnvironment: 'jest-environment-jsdom', 6 | moduleNameMapper: { 7 | '\\.(css|less|sass|scss)$': 'identity-obj-proxy', 8 | '\\.(jpg|jpeg|png|gif|svg)$': '/__mocks__/fileMock.js', 9 | '^lodash-es$': 'lodash', 10 | }, 11 | setupFilesAfterEnv: ['/setupTests.ts'], 12 | testMatch: ['**/?(*.)+(spec|test).[jt]s?(x)'], 13 | transform: { 14 | '^.+\\.[tj]sx?$': 'ts-jest', 15 | }, 16 | transformIgnorePatterns: [ 17 | '/node_modules/(?!react-markdown|vfile|unist-util-stringify-position|unist-util-visit|bail|is-plain-obj)', 18 | ], 19 | collectCoverageFrom: ['src/**/*.{ts,tsx,js,jsx}'], 20 | coveragePathIgnorePatterns: [ 21 | '/node_modules/', // Ignore node_modules 22 | '/__mocks__/', // Ignore mocks 23 | '/src/api/', 24 | '/src/mocks/', 25 | '/src/test/', 26 | '/src/index.tsx', 27 | '/src/vite-env.d.ts', 28 | '/src/components/QuestionInput/index.ts', 29 | '/src/components/Answer/index.ts', 30 | '/src/components/Utils/utils.tsx', 31 | ], 32 | }; 33 | 34 | export default config; 35 | -------------------------------------------------------------------------------- /code/frontend/jest.polyfills.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @note The block below contains polyfills for Node.js globals 3 | * required for Jest to function when running JSDOM tests. 4 | 5 | * These HAVE to be require's and HAVE to be in this exact 6 | * order, since "undici" depends on the "TextEncoder" global API. 7 | * 8 | * Consider migrating to a more modern test runner if 9 | * you don't want to deal with this. 10 | */ 11 | const { TextDecoder, TextEncoder } = require('node:util') 12 | 13 | Object.defineProperties(globalThis, { 14 | TextDecoder: { value: TextDecoder }, 15 | TextEncoder: { value: TextEncoder }, 16 | 17 | }) 18 | 19 | const { Blob } = require('node:buffer') 20 | const { fetch, Headers, FormData, Request, Response } = require('undici') 21 | 22 | Object.defineProperties(globalThis, { 23 | 24 | fetch: { value: fetch, writable: true }, 25 | Blob: { value: Blob }, 26 | Headers: { value: Headers }, 27 | FormData: { value: FormData }, 28 | Request: { value: Request }, 29 | Response: { value: Response }, 30 | }) 31 | -------------------------------------------------------------------------------- /code/frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/frontend/public/favicon.ico -------------------------------------------------------------------------------- /code/frontend/setupTests.ts: -------------------------------------------------------------------------------- 1 | import "@testing-library/jest-dom"; 2 | const { TextDecoder, TextEncoder } = require('node:util') 3 | 4 | import { initializeIcons } from "@fluentui/react/lib/Icons"; 5 | initializeIcons(); 6 | global.TextEncoder = TextEncoder; 7 | global.TextDecoder = TextDecoder; 8 | 9 | // Mock IntersectionObserver 10 | class IntersectionObserverMock { 11 | callback: IntersectionObserverCallback; 12 | options: IntersectionObserverInit; 13 | 14 | root: Element | null = null; // Required property 15 | rootMargin: string = '0px'; // Required property 16 | thresholds: number[] = [0]; // Required property 17 | 18 | constructor(callback: IntersectionObserverCallback, options: IntersectionObserverInit) { 19 | this.callback = callback; 20 | this.options = options; 21 | } 22 | 23 | observe = jest.fn((target: Element) => { 24 | // Simulate intersection with an observer instance 25 | this.callback([{ isIntersecting: true }] as IntersectionObserverEntry[], this as IntersectionObserver); 26 | }); 27 | 28 | unobserve = jest.fn(); 29 | disconnect = jest.fn(); // Required method 30 | takeRecords = jest.fn(); // Required method 31 | } 32 | 33 | // Store the original IntersectionObserver 34 | const originalIntersectionObserver = window.IntersectionObserver; 35 | 36 | beforeAll(() => { 37 | window.IntersectionObserver = IntersectionObserverMock as any; 38 | }); 39 | 40 | afterAll(() => { 41 | // Restore the original IntersectionObserver 42 | window.IntersectionObserver = originalIntersectionObserver; 43 | }); 44 | 45 | -------------------------------------------------------------------------------- /code/frontend/src/api/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./api"; 2 | export * from "./models"; 3 | -------------------------------------------------------------------------------- /code/frontend/src/assets/Quick source reference.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /code/frontend/src/assets/Send.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /code/frontend/src/assets/Summarize contracts.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /code/frontend/src/assets/mic-outline.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /code/frontend/src/assets/pauseIcon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /code/frontend/src/assets/speakerIcon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /code/frontend/src/components/Answer/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./Answer"; 2 | -------------------------------------------------------------------------------- /code/frontend/src/components/AssistantTypeSection/AssistantTypeSection.module.css: -------------------------------------------------------------------------------- 1 | .chatEmptyState { 2 | flex-grow: 1; 3 | display: flex; 4 | flex-direction: column; 5 | justify-content: center; 6 | align-items: center; 7 | } 8 | 9 | .chatIcon { 10 | height: 62px; 11 | width: 62px; 12 | } 13 | 14 | .chatEmptyStateTitle { 15 | font-family: "Segoe UI"; 16 | font-style: normal; 17 | font-weight: 700; 18 | font-size: 36px; 19 | display: flex; 20 | align-items: flex-end; 21 | text-align: center; 22 | margin-top: 24px; 23 | margin-bottom: 0px; 24 | } 25 | 26 | .chatEmptyStateSubtitle { 27 | margin-top: 16px; 28 | font-family: "Segoe UI"; 29 | font-style: normal; 30 | font-weight: 600; 31 | font-size: 16px; 32 | line-height: 150%; 33 | display: flex; 34 | align-items: flex-end; 35 | text-align: center; 36 | letter-spacing: -0.01em; 37 | color: #616161; 38 | } 39 | 40 | .dataText { 41 | background: linear-gradient(90deg, #464FEB 10.42%, #8330E9 100%); 42 | color: transparent; 43 | background-clip: text; 44 | } 45 | 46 | .loadingContainer { 47 | display: flex; 48 | flex-direction: column; 49 | align-items: center; 50 | justify-content: center; 51 | height: 100vh; /* Full viewport height */ 52 | } 53 | 54 | .loadingIcon { 55 | border: 8px solid #f3f3f3; /* Light grey */ 56 | border-top: 8px solid #3498db; /* Blue */ 57 | border-radius: 50%; 58 | width: 50px; 59 | height: 50px; 60 | animation: spin 1s linear infinite; 61 | } 62 | -------------------------------------------------------------------------------- /code/frontend/src/components/ChatHistoryListItemCell/ChatHistoryListItemCell.module.css: -------------------------------------------------------------------------------- 1 | .container { 2 | max-height: calc(100vh - 100px); 3 | width: 300px; 4 | } 5 | 6 | .itemCell { 7 | min-height: 32px; 8 | cursor: pointer; 9 | padding-left: 12px; 10 | padding-right: 12px; 11 | padding-top: 5px; 12 | padding-bottom: 5px; 13 | box-sizing: border-box; 14 | border-radius: 5px; 15 | display: flex; 16 | } 17 | 18 | .itemCell:hover { 19 | background: #e6e6e6; 20 | } 21 | 22 | .itemButton { 23 | display: flex; 24 | justify-content: center; 25 | align-items: center; 26 | width: 28px; 27 | height: 28px; 28 | border: 1px solid #d1d1d1; 29 | border-radius: 5px; 30 | background-color: white; 31 | margin: auto 2.5px; 32 | cursor: pointer; 33 | } 34 | 35 | .itemButton:hover { 36 | background-color: #e6e6e6; 37 | } 38 | 39 | .chatTitle { 40 | width: 80%; 41 | overflow: hidden; 42 | white-space: nowrap; 43 | text-overflow: ellipsis; 44 | } 45 | 46 | @media (max-width: 480px) { 47 | .container { 48 | width: 100%; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /code/frontend/src/components/ChatHistoryListItemGroups/ChatHistoryListItemGroups.module.css: -------------------------------------------------------------------------------- 1 | .listContainer { 2 | height: 100%; 3 | overflow: hidden auto; 4 | max-height: 80vh; 5 | } 6 | 7 | .chatGroup { 8 | margin: auto 5px; 9 | width: 100%; 10 | } 11 | 12 | .chatMonth { 13 | font-size: 14px; 14 | font-weight: 600; 15 | margin-bottom: 5px; 16 | padding-left: 15px; 17 | } 18 | 19 | .chatList { 20 | width: 100%; 21 | } 22 | 23 | .spinnerContainer { 24 | display: flex; 25 | justify-content: center; 26 | align-items: center; 27 | height: 22px; 28 | margin-top: -8px; 29 | } 30 | -------------------------------------------------------------------------------- /code/frontend/src/components/ChatHistoryPanel/ChatHistoryPanel.module.css: -------------------------------------------------------------------------------- 1 | .historyContainer { 2 | width: 20vw; 3 | background: radial-gradient(108.78% 108.78% at 50.02% 19.78%, #FFFFFF 57.29%, #EEF6FE 100%); 4 | border-radius: 8px; 5 | max-height: calc(100vh - 88px); 6 | box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.14), 0px 0px 2px rgba(0, 0, 0, 0.12); 7 | overflow-y: hidden; 8 | } 9 | 10 | .historyPanelTopRightButtons { 11 | height: 48px; 12 | } 13 | 14 | .chatHistoryListContainer { 15 | height: 100%; 16 | } 17 | -------------------------------------------------------------------------------- /code/frontend/src/components/ChatMessageContainer/ChatMessageContainer.module.css: -------------------------------------------------------------------------------- 1 | .fetchMessagesSpinner { 2 | margin-top: 30vh; 3 | } 4 | 5 | .chatMessageUser { 6 | display: flex; 7 | justify-content: flex-end; 8 | margin-bottom: 12px; 9 | } 10 | 11 | .chatMessageUserMessage { 12 | padding: 20px; 13 | background: #edf5fd; 14 | border-radius: 8px; 15 | box-shadow: 16 | 0px 2px 4px rgba(0, 0, 0, 0.14), 17 | 0px 0px 2px rgba(0, 0, 0, 0.12); 18 | font-family: "Segoe UI"; 19 | font-style: normal; 20 | font-weight: 400; 21 | font-size: 14px; 22 | line-height: 22px; 23 | color: #242424; 24 | flex: none; 25 | order: 0; 26 | flex-grow: 0; 27 | white-space: pre-wrap; 28 | word-wrap: break-word; 29 | max-width: 800px; 30 | } 31 | 32 | .chatMessageGpt { 33 | margin-bottom: 12px; 34 | max-width: 80%; 35 | display: flex; 36 | } 37 | 38 | /* High contrast mode specific styles */ 39 | @media screen and (-ms-high-contrast: active), (forced-colors: active) { 40 | .chatMessageUserMessage { 41 | border: 2px solid WindowText; 42 | padding: 10px; 43 | background-color: Window; 44 | color: WindowText; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /code/frontend/src/components/HistoryButton/HistoryButton.module.css: -------------------------------------------------------------------------------- 1 | .historyButtonRoot { 2 | border: 1px solid #d1d1d1; 3 | } 4 | 5 | .historyButtonRoot:hover { 6 | border: 1px solid #d1d1d1; 7 | } 8 | 9 | .historyButtonRoot:active { 10 | border: 1px solid #d1d1d1; 11 | } 12 | 13 | @media (max-width: 480px) { 14 | .shareButtonRoot { 15 | width: auto; 16 | padding: 5px 8px; 17 | } 18 | 19 | .historyButtonRoot { 20 | width: auto; 21 | padding: 0 8px; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /code/frontend/src/components/HistoryButton/HistoryButton.tsx: -------------------------------------------------------------------------------- 1 | import {DefaultButton, IButtonProps } from "@fluentui/react"; 2 | import styles from "./HistoryButton.module.css"; 3 | 4 | interface ButtonProps extends IButtonProps { 5 | onClick: () => void; 6 | text: string | undefined; 7 | } 8 | 9 | export const HistoryButton: React.FC = ({ onClick, text }) => { 10 | return ( 11 | 17 | ); 18 | }; 19 | -------------------------------------------------------------------------------- /code/frontend/src/components/QuestionInput/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./QuestionInput"; 2 | -------------------------------------------------------------------------------- /code/frontend/src/components/Spinner/Spinner.module.css: -------------------------------------------------------------------------------- 1 | /* Full-screen overlay */ 2 | .overlay { 3 | position: fixed; 4 | top: 0; 5 | left: 0; 6 | width: 100%; 7 | height: 100%; 8 | background-color: rgba(0, 0, 0, 0.5); /* semi-transparent black background */ 9 | display: flex; 10 | align-items: center; 11 | justify-content: center; 12 | z-index: 999999999; /* Ensure it is above other content */ 13 | } 14 | 15 | -------------------------------------------------------------------------------- /code/frontend/src/components/Spinner/Spinner.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { Spinner, SpinnerSize,ISpinnerStyles } from '@fluentui/react'; 3 | import styles from './Spinner.module.css'; 4 | 5 | interface SpinnerComponentProps { 6 | loading: boolean; 7 | label?: string; // Label is optional 8 | } 9 | 10 | const spinnerStyles: ISpinnerStyles = { 11 | label: { 12 | fontSize: '20px', // Increase font size to 20px 13 | color: 'rgb(91 184 255)', 14 | fontWeight: 600 15 | }, 16 | }; 17 | 18 | 19 | const SpinnerComponent: React.FC = ({ loading, label }) => { 20 | if (!loading) return null; 21 | 22 | return ( 23 |
24 | 25 |
26 | ); 27 | }; 28 | 29 | export default SpinnerComponent; 30 | -------------------------------------------------------------------------------- /code/frontend/src/index.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | } 4 | 5 | html, 6 | body { 7 | height: 100%; 8 | margin: 0; 9 | padding: 0; 10 | } 11 | 12 | html { 13 | background: #f2f2f2; 14 | font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; 15 | -webkit-font-smoothing: antialiased; 16 | -moz-osx-font-smoothing: grayscale; 17 | } 18 | 19 | #root { 20 | height: 100%; 21 | } 22 | .mt-8 { 23 | margin-top: 8px; 24 | } 25 | -------------------------------------------------------------------------------- /code/frontend/src/index.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom/client"; 3 | import { HashRouter, Routes, Route } from "react-router-dom"; 4 | import { initializeIcons } from "@fluentui/react"; 5 | 6 | import "./index.css"; 7 | 8 | import NoPage from "./pages/NoPage"; 9 | import Chat from "./pages/chat/Chat"; 10 | 11 | initializeIcons(); 12 | 13 | export default function App() { 14 | return ( 15 | 16 | 17 | 18 | } /> 19 | } /> 20 | 21 | 22 | 23 | ); 24 | } 25 | 26 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render( 27 | 28 | 29 | 30 | ); 31 | -------------------------------------------------------------------------------- /code/frontend/src/pages/NoPage.test.tsx: -------------------------------------------------------------------------------- 1 | import "@testing-library/jest-dom"; 2 | import { render, screen } from "@testing-library/react"; 3 | import NoPage from "./NoPage"; 4 | 5 | describe("NoPage.tsx", () => { 6 | beforeEach(() => { 7 | jest.clearAllMocks(); 8 | }); 9 | 10 | test("renders NoPage Component heading", () => { 11 | render(); 12 | const headingElement = screen.getByRole("heading"); 13 | expect(headingElement).toBeInTheDocument(); 14 | }); 15 | 16 | test("renders NoPage Component with Correct Text", () => { 17 | render(); 18 | const ErrorElement = screen.getByText("404"); 19 | expect(ErrorElement.textContent).toEqual("404"); 20 | }); 21 | 22 | test("renders NoPage Component heading Level 1", () => { 23 | render(); 24 | const headingElement = screen.getByRole("heading", { level: 1 }); 25 | expect(headingElement.tagName).toEqual("H1"); 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /code/frontend/src/pages/NoPage.tsx: -------------------------------------------------------------------------------- 1 | const NoPage = () => { 2 | return

404

; 3 | }; 4 | 5 | export default NoPage; 6 | -------------------------------------------------------------------------------- /code/frontend/src/pages/chat/Cards_contract/Cards.module.css: -------------------------------------------------------------------------------- 1 | 2 | /* Homepage.module.css */ 3 | .container { 4 | display: flex; 5 | flex-direction: column; 6 | align-items: center; 7 | padding: 16px; 8 | /* background-color: #FAF9F8; */ 9 | } 10 | 11 | .mainContent { 12 | width: 100%; 13 | display: flex; 14 | justify-content: center; 15 | } 16 | 17 | .cards { 18 | display: flex; 19 | flex-wrap: wrap; 20 | gap: 16px; /* Adjust gap between cards as needed */ 21 | justify-content: center; /* Center align cards in the row */ 22 | } 23 | 24 | .featureCard { 25 | width: 260px; /* Set a fixed width for the cards or adjust as needed */ 26 | display: inline; 27 | flex-direction: column; 28 | justify-content: space-between; 29 | text-align: start; 30 | background-color: #FAF9F8; 31 | border-radius: 8px; /* Rounded corners */ 32 | border: 1px solid #EDEBE9; /* Border around the card */ 33 | box-shadow: 0px 4px 8px 0px #00000024; 34 | padding: 18px; /* Add padding inside the card */ 35 | } 36 | 37 | 38 | .cardTitle { 39 | font-size: 16px; /* Adjust font size as needed */ 40 | margin-top: -30px; 41 | margin-left: 30px; 42 | font-weight: 600; 43 | color: #3d3d3d; 44 | 45 | } 46 | .iconTitleContainer{ 47 | display: flex; 48 | align-items: center; 49 | } 50 | .cardDescription { 51 | font-size: 14px; /* Adjust font size as needed */ 52 | color: #3d3d3d; 53 | margin-bottom: -2px; 54 | } 55 | .icon{ 56 | margin-top: 10px; 57 | } 58 | -------------------------------------------------------------------------------- /code/frontend/src/pages/chat/Cards_contract/Cards.test.tsx: -------------------------------------------------------------------------------- 1 | import { render, screen } from '@testing-library/react'; 2 | import Cards from './Cards'; 3 | 4 | describe('Cards Component', () => { 5 | beforeEach(() => { 6 | render(); 7 | }); 8 | 9 | test('renders the main container', () => { 10 | expect(screen.getByRole('main')).toBeInTheDocument(); 11 | }); 12 | 13 | test('renders "Interact with Data" card correctly', () => { 14 | expect(screen.getByText(/Interact with Data/i)).toBeInTheDocument(); 15 | expect(screen.getByText(/Intuitive and conversational search experience/i)).toBeInTheDocument(); 16 | expect(screen.getByAltText(/Intract with Data/i)).toBeInTheDocument(); 17 | }); 18 | 19 | test('renders "Summarize Contracts" card correctly', () => { 20 | expect(screen.getByText(/Summarize Contracts/i)).toBeInTheDocument(); 21 | expect(screen.getByText(/Quickly review and summarize lengthy documents/i)).toBeInTheDocument(); 22 | expect(screen.getByAltText(/Summarize Contracts/i)).toBeInTheDocument(); 23 | }); 24 | 25 | test('renders "Quick Source Reference" card correctly', () => { 26 | expect(screen.getByText(/Quick Source Reference/i)).toBeInTheDocument(); 27 | expect(screen.getByText(/Effortlessly retrieve and reference original documents/i)).toBeInTheDocument(); 28 | expect(screen.getByAltText(/Source Reference/i)).toBeInTheDocument(); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /code/frontend/src/util/SpeechToText.ts: -------------------------------------------------------------------------------- 1 | import { 2 | SpeechConfig, 3 | AudioConfig, 4 | SpeechRecognizer, 5 | AutoDetectSourceLanguageConfig, 6 | } from "microsoft-cognitiveservices-speech-sdk"; 7 | 8 | const fetchSpeechConfig = async (): Promise<{ token: string, region: string, languages: string[]; }> => { 9 | try { 10 | const response = await fetch("/api/speech"); 11 | 12 | if (!response.ok) { 13 | throw new Error("Network response was not ok"); 14 | } 15 | return response.json(); 16 | } catch (error) { 17 | throw error; 18 | } 19 | }; 20 | 21 | export const multiLingualSpeechRecognizer = async () => { 22 | const { token, region, languages } = await fetchSpeechConfig(); 23 | 24 | const speechConfig = SpeechConfig.fromAuthorizationToken( 25 | token, 26 | region 27 | ); 28 | 29 | const audioConfig = AudioConfig.fromDefaultMicrophoneInput(); 30 | 31 | try { 32 | const autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.fromLanguages(languages); 33 | return SpeechRecognizer.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig); 34 | } catch (error) { 35 | return new SpeechRecognizer(speechConfig, audioConfig); 36 | } 37 | }; 38 | -------------------------------------------------------------------------------- /code/frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /code/frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "useDefineForClassFields": true, 5 | "lib": [ 6 | "DOM", 7 | "DOM.Iterable", 8 | "ESNext" 9 | ], 10 | "allowJs": false, 11 | "skipLibCheck": true, 12 | "esModuleInterop": true, 13 | "allowSyntheticDefaultImports": true, 14 | "strict": true, 15 | "forceConsistentCasingInFileNames": true, 16 | "module": "ESNext", 17 | "moduleResolution": "Node", 18 | "resolveJsonModule": true, 19 | "isolatedModules": true, 20 | "noEmit": true, 21 | "jsx": "react-jsx", 22 | "typeRoots": ["node_modules/@types"], 23 | "types": [ 24 | "vite/client", "jest", "node", 25 | ], 26 | }, 27 | "exclude": ["node_modules"], 28 | "include": [ 29 | "src", "setupTests.ts" 30 | ], 31 | "references": [ 32 | { 33 | "path": "./tsconfig.node.json" 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /code/frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "module": "ESNext", 5 | "moduleResolution": "Node", 6 | "allowSyntheticDefaultImports": true 7 | }, 8 | "include": ["vite.config.ts"] 9 | } 10 | -------------------------------------------------------------------------------- /code/frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | build: { 8 | outDir: "../dist/static", 9 | emptyOutDir: true, 10 | sourcemap: true 11 | }, 12 | server: { 13 | host: true, 14 | proxy: { 15 | "/api": { 16 | target: "http://127.0.0.1:5050", 17 | changeOrigin: true, 18 | secure: false 19 | } 20 | } 21 | } 22 | }); 23 | -------------------------------------------------------------------------------- /code/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import ssl 2 | 3 | import pytest 4 | import trustme 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def ca(): 9 | """ 10 | This fixture is required to run the http mock server with SSL. 11 | https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server 12 | """ 13 | return trustme.CA() 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def httpserver_ssl_context(ca): 18 | """ 19 | This fixture is required to run the http mock server with SSL. 20 | https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server 21 | """ 22 | context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) 23 | localhost_cert = ca.issue_cert("localhost") 24 | localhost_cert.configure_cert(context) 25 | return context 26 | 27 | 28 | @pytest.fixture(scope="session") 29 | def httpclient_ssl_context(ca): 30 | """ 31 | This fixture is required to run the http mock server with SSL. 32 | https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server 33 | """ 34 | with ca.cert_pem.tempfile() as ca_temp_path: 35 | return ssl.create_default_context(cafile=ca_temp_path) 36 | -------------------------------------------------------------------------------- /code/tests/constants.py: -------------------------------------------------------------------------------- 1 | AZURE_STORAGE_CONFIG_CONTAINER_NAME = "config" 2 | AZURE_STORAGE_CONFIG_FILE_NAME = "active.json" 3 | 4 | COMPUTER_VISION_VECTORIZE_IMAGE_PATH = "/computervision/retrieval:vectorizeImage" 5 | COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD = "POST" 6 | COMPUTER_VISION_VECTORIZE_TEXT_PATH = "/computervision/retrieval:vectorizeText" 7 | COMPUTER_VISION_VECTORIZE_TEXT_REQUEST_METHOD = "POST" 8 | -------------------------------------------------------------------------------- /code/tests/functional/resources/README.md: -------------------------------------------------------------------------------- 1 | # Why is this here? 2 | 3 | The file [9b5ad71b2ce5302211f9c61530b329a4922fc6a4](9b5ad71b2ce5302211f9c61530b329a4922fc6a4) is required to stop the 4 | tiktoken library from making a call out to the internet to retrieve the required encoder. 5 | 6 | You can see where this happens in the code here https://github.com/openai/tiktoken/blob/1b9faf2779855124f05174adf1383e53689ed94b/tiktoken/load.py#L25, 7 | which calls out to https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken. 8 | 9 | There is an open issue against the library to resolve this problem https://github.com/openai/tiktoken/issues/232. 10 | 11 | The stored file is a copy of this remote file. -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/common.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import socket 3 | import threading 4 | import time 5 | import requests 6 | from threading import Thread 7 | from create_app import create_app 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def start_app(app_port: int) -> Thread: 13 | logger.info(f"Starting application on port {app_port}") 14 | app = create_app() 15 | app_process = threading.Thread(target=lambda: app.run(port=app_port), daemon=True) 16 | app_process.start() 17 | wait_for_app(app_port) 18 | logger.info("Application started") 19 | return app_process 20 | 21 | 22 | def wait_for_app(port: int, initial_check_delay: int = 2): 23 | attempts = 0 24 | time.sleep(initial_check_delay) 25 | while attempts < 10: 26 | try: 27 | response = requests.get(f"http://localhost:{port}/api/health") 28 | if response.status_code == 200: 29 | return 30 | except Exception: 31 | pass 32 | 33 | attempts += 1 34 | time.sleep(1) 35 | 36 | raise Exception("App failed to start") 37 | 38 | 39 | def get_free_port() -> int: 40 | s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM) 41 | s.bind(("localhost", 0)) 42 | _, port = s.getsockname() 43 | s.close() 44 | return port 45 | -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/default/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/backend_api/default/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/default/test_health.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | 4 | from tests.functional.app_config import AppConfig 5 | 6 | pytestmark = pytest.mark.functional 7 | 8 | 9 | def test_health(app_url: str, app_config: AppConfig): 10 | # when 11 | response = requests.get(f"{app_url}/api/health") 12 | 13 | # then 14 | assert response.status_code == 200 15 | assert response.text == "OK" 16 | -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/integrated_vectorization_custom_conversation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/backend_api/integrated_vectorization_custom_conversation/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/sk_orchestrator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/backend_api/sk_orchestrator/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/with_byod/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/backend_api/with_byod/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/backend_api/without_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/backend_api/without_data/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/functions/README.md: -------------------------------------------------------------------------------- 1 | # Azure Functions Tests 2 | 3 | The functional tests for Azure Functions do not run the Azure functions locally, but instead invoke the entrypoints of the Python functions directly within each test. 4 | 5 | For example, consider the following: 6 | 7 | ```py 8 | import azure.functions as func 9 | 10 | app = func.FunctionApp() 11 | 12 | @app.function_name(name="HttpTrigger1") 13 | @app.route(route="req") 14 | def main(req): 15 | user = req.params.get("user") 16 | return f"Hello, {user}!" 17 | ``` 18 | 19 | Instead of making an HTTP request to `/api/req` from within a test, import the function directly and call the function with a payload similar to what would be 20 | expected when running in Azure. 21 | 22 | 23 | ```py 24 | import azure.functions as func 25 | 26 | def test_main(): 27 | # given 28 | req = func.HttpRequest( 29 | method="GET", 30 | url="http://localhost:7071/api/req", 31 | body=b"", 32 | params={ 33 | "user": "world", 34 | }, 35 | ) 36 | 37 | # when 38 | res = main.build().get_user_function()(req) 39 | 40 | # then 41 | assert res == "Hello, world!" 42 | ``` 43 | 44 | Downstream dependcies are mocked using [pytest-httpserver](https://pytest-httpserver.readthedocs.io/). 45 | -------------------------------------------------------------------------------- /code/tests/functional/tests/functions/advanced_image_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/functions/advanced_image_processing/__init__.py -------------------------------------------------------------------------------- /code/tests/functional/tests/functions/integrated_vectorization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/code/tests/functional/tests/functions/integrated_vectorization/__init__.py -------------------------------------------------------------------------------- /code/tests/test_azure_blob_storage.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from backend.batch.utilities.helpers.azure_blob_storage_client import ( 3 | AzureBlobStorageClient, 4 | ) 5 | 6 | 7 | @pytest.fixture 8 | def blob_client(): 9 | return AzureBlobStorageClient() 10 | 11 | 12 | @pytest.mark.azure("This test requires Azure Blob Storage") 13 | def test_upload_and_download_file(blob_client): 14 | # Upload a file 15 | file_name = "test_file.txt" 16 | file_contents = b"Hello, world!" 17 | blob_client.upload_file(file_contents, file_name) 18 | # Download the file 19 | downloaded_contents = blob_client.download_file(file_name) 20 | # Check that the downloaded contents match the original contents 21 | assert downloaded_contents == file_contents 22 | # Delete the file 23 | blob_client.delete_file(file_name) 24 | -------------------------------------------------------------------------------- /code/tests/utilities/helpers/test_secret_helper.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | from pytest import MonkeyPatch 3 | from backend.batch.utilities.helpers.env_helper import SecretHelper 4 | 5 | 6 | def test_get_secret_returns_value_from_environment_variables(monkeypatch: MonkeyPatch): 7 | # given 8 | secret_name = "MY_SECRET" 9 | expected_value = "my_secret_value" 10 | monkeypatch.setenv(secret_name, expected_value) 11 | secret_helper = SecretHelper() 12 | 13 | # when 14 | actual_value = secret_helper.get_secret(secret_name) 15 | 16 | # then 17 | assert actual_value == expected_value 18 | 19 | 20 | @patch("backend.batch.utilities.helpers.env_helper.SecretClient") 21 | def test_get_secret_returns_value_from_secret_client_when_use_key_vault_is_true( 22 | secret_client: MagicMock, monkeypatch: MonkeyPatch 23 | ): 24 | # given 25 | secret_name = "MY_SECRET" 26 | expected_value = "" 27 | monkeypatch.setenv("USE_KEY_VAULT", "true") 28 | secret_client.return_value.get_secret.return_value.value = expected_value 29 | secret_helper = SecretHelper() 30 | 31 | # when 32 | actual_value = secret_helper.get_secret(secret_name) 33 | 34 | # then 35 | assert actual_value == expected_value 36 | 37 | 38 | def test_get_secret_returns_empty_string_when_secret_name_is_empty(): 39 | # given 40 | secret_name = "" 41 | expected_value = "" 42 | secret_helper = SecretHelper() 43 | 44 | # when 45 | actual_value = secret_helper.get_secret(secret_name) 46 | 47 | # then 48 | assert actual_value == expected_value 49 | -------------------------------------------------------------------------------- /code/tests/utilities/orchestrator/test_orchestrator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from backend.batch.utilities.helpers.orchestrator_helper import ( 3 | Orchestrator, 4 | OrchestrationSettings, 5 | ) 6 | 7 | 8 | @pytest.mark.azure("This test requires Azure Open AI configured") 9 | @pytest.mark.asyncio 10 | async def test_orchestrator_openai_function(): 11 | message_orchestrator = Orchestrator() 12 | strategy = "openai_function" 13 | messages = await message_orchestrator.handle_message( 14 | user_message="What's Azure AI Search?", 15 | chat_history=[], 16 | conversation_id="test_openai_function", 17 | orchestrator=OrchestrationSettings({"strategy": strategy}), 18 | ) 19 | assert messages[-1]["role"] == "assistant" 20 | assert messages[-1]["content"] != "" 21 | 22 | 23 | @pytest.mark.azure("This test requires Azure Open AI configured") 24 | @pytest.mark.asyncio 25 | async def test_orchestrator_langchain(): 26 | message_orchestrator = Orchestrator() 27 | strategy = "langchain" 28 | messages = await message_orchestrator.handle_message( 29 | user_message="What's Azure AI Search?", 30 | chat_history=[], 31 | conversation_id="test_langchain", 32 | orchestrator=OrchestrationSettings({"strategy": strategy}), 33 | ) 34 | assert messages[-1]["role"] == "assistant" 35 | assert messages[-1]["content"] != "" 36 | -------------------------------------------------------------------------------- /code/tests/utilities/plugins/test_post_answering_plugin.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch, MagicMock 2 | 3 | import pytest 4 | from backend.batch.utilities.common.answer import Answer 5 | from backend.batch.utilities.plugins.post_answering_plugin import PostAnsweringPlugin 6 | from semantic_kernel import Kernel 7 | 8 | 9 | @patch("backend.batch.utilities.plugins.post_answering_plugin.PostPromptTool") 10 | @pytest.mark.asyncio 11 | async def test_validate_answer(PostPromptToolMock: MagicMock): 12 | # given 13 | kernel = Kernel() 14 | 15 | plugin = kernel.add_plugin( 16 | plugin=PostAnsweringPlugin(), 17 | plugin_name="PostAnswering", 18 | ) 19 | answer = Answer(question="question", answer="answer") 20 | mock_answer = Answer(question="question", answer="mock-answer") 21 | 22 | PostPromptToolMock.return_value.validate_answer.return_value = mock_answer 23 | 24 | # when 25 | response = await kernel.invoke(plugin["validate_answer"], answer=answer) 26 | 27 | # then 28 | assert response is not None 29 | assert response.value == mock_answer 30 | 31 | PostPromptToolMock.return_value.validate_answer.assert_called_once_with(answer) 32 | -------------------------------------------------------------------------------- /code/tests/utilities/tools/test_content_safety_checker.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from backend.batch.utilities.tools.content_safety_checker import ContentSafetyChecker 3 | 4 | 5 | @pytest.mark.azure("This test requires Azure Content Safety configured") 6 | def test_document_chunking_layout(): 7 | cut = ContentSafetyChecker() 8 | 9 | safe_input = "This is a test" 10 | unsafe_input = "I hate short people, they are dumb" 11 | 12 | assert cut.validate_input_and_replace_if_harmful(safe_input) == safe_input 13 | assert cut.validate_output_and_replace_if_harmful(safe_input) == safe_input 14 | assert cut.validate_input_and_replace_if_harmful(unsafe_input) != unsafe_input 15 | assert cut.validate_output_and_replace_if_harmful(unsafe_input) != unsafe_input 16 | -------------------------------------------------------------------------------- /data/Benefit_Options.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Benefit_Options.pdf -------------------------------------------------------------------------------- /data/MSFT_FY23Q4_10K.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/MSFT_FY23Q4_10K.docx -------------------------------------------------------------------------------- /data/Northwind_Health_Plus_Benefits_Details.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Northwind_Health_Plus_Benefits_Details.pdf -------------------------------------------------------------------------------- /data/Northwind_Standard_Benefits_Details.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Northwind_Standard_Benefits_Details.pdf -------------------------------------------------------------------------------- /data/PerksPlus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/PerksPlus.pdf -------------------------------------------------------------------------------- /data/PressReleaseFY23Q4.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/PressReleaseFY23Q4.docx -------------------------------------------------------------------------------- /data/Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance_Important Prompts For_Claims Handlers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Cyber Risk Insurance Policy_Commercial Insurance_Important Prompts For_Claims Handlers.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Insurance Underwriting_Key Prompts for Underwriters when evaluating Financial Results.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Insurance Underwriting_Key Prompts for Underwriters when evaluating Financial Results.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Employees.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Employees.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Insurance Agents.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Insurance_Summary Plan Description_Employee Benefits_Important Prompts For_Insurance Agents.pdf -------------------------------------------------------------------------------- /data/Woodgrove - Mortgage Product Manual - 1.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove - Mortgage Product Manual - 1.0.pdf -------------------------------------------------------------------------------- /data/Woodgrove Asset Management - Prospective of Asset Management Funds.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/Woodgrove Asset Management - Prospective of Asset Management Funds.pdf -------------------------------------------------------------------------------- /data/contract_data/1628215729_Kyndryl_-_Master_Agreement__executed_utah.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/1628215729_Kyndryl_-_Master_Agreement__executed_utah.pdf -------------------------------------------------------------------------------- /data/contract_data/Final_MA_999_200000000170_3_MA_FORM_ADV_PDF wireless.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Final_MA_999_200000000170_3_MA_FORM_ADV_PDF wireless.PDF -------------------------------------------------------------------------------- /data/contract_data/Final_MA_999_200000000325_3_MA_FORM_ADV_PDF.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Final_MA_999_200000000325_3_MA_FORM_ADV_PDF.PDF -------------------------------------------------------------------------------- /data/contract_data/Initial_MA_2023_V1 - servers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Initial_MA_2023_V1 - servers.pdf -------------------------------------------------------------------------------- /data/contract_data/Legal contract_20240411112609.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Legal contract_20240411112609.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_OEM_Filters_ALDOT_V1 - OEM filters.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_OEM_Filters_ALDOT_V1 - OEM filters.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_OEM_Filters_V1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_OEM_Filters_V1.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_Renewed_V2 - copiers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_Renewed_V2 - copiers.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_V1 (1).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_V1 (1).pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_V1 - July.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_V1 - July.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_V1 - May.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_V1 - May.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_Agreement_V1 - propane.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_Agreement_V1 - propane.pdf -------------------------------------------------------------------------------- /data/contract_data/Master_agreement_2024_V1 products_services.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Master_agreement_2024_V1 products_services.pdf -------------------------------------------------------------------------------- /data/contract_data/NASPO_Participating_Addendum - insight public sector.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/NASPO_Participating_Addendum - insight public sector.pdf -------------------------------------------------------------------------------- /data/contract_data/NASPO_VP_SVAR_Insight_AL_PA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/NASPO_VP_SVAR_Insight_AL_PA.pdf -------------------------------------------------------------------------------- /data/contract_data/Server_Storage_Solutions_Technical_Services_ITB_v1.2 - OEM Terms.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Server_Storage_Solutions_Technical_Services_ITB_v1.2 - OEM Terms.pdf -------------------------------------------------------------------------------- /data/contract_data/State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx 1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx 1.pdf -------------------------------------------------------------------------------- /data/contract_data/State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/State_of_Alabama_NASPO_Cloud_Services_PA_032224_.docx.pdf -------------------------------------------------------------------------------- /data/contract_data/Statewide_Truck_Chassis_19_000_GVWR_and_Greater-Southland_V1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/contract_data/Statewide_Truck_Chassis_19_000_GVWR_and_Greater-Southland_V1.pdf -------------------------------------------------------------------------------- /data/employee_handbook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/employee_handbook.pdf -------------------------------------------------------------------------------- /data/init.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/role_library.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/data/role_library.pdf -------------------------------------------------------------------------------- /docker/Admin.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.7-bookworm 2 | RUN apt-get update && apt-get install python3-tk tk-dev -y 3 | COPY pyproject.toml /usr/local/src/myscripts/pyproject.toml 4 | COPY poetry.lock /usr/local/src/myscripts/poetry.lock 5 | WORKDIR /usr/local/src/myscripts/ 6 | RUN pip install --upgrade pip && pip install poetry && poetry self add poetry-plugin-export && poetry export -o requirements.txt && pip install -r requirements.txt 7 | COPY ./code/backend /usr/local/src/myscripts/admin 8 | COPY ./code/backend/batch/utilities /usr/local/src/myscripts/utilities 9 | WORKDIR /usr/local/src/myscripts/admin 10 | # https://github.com/docker/buildx/issues/2751 11 | ENV PYTHONPATH="${PYTHONPATH}:/usr/local/src/myscripts/" 12 | EXPOSE 80 13 | CMD ["streamlit", "run", "Admin.py", "--server.port", "80", "--server.enableXsrfProtection", "false"] 14 | -------------------------------------------------------------------------------- /docker/Backend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azure-functions/python:4-python3.11 2 | 3 | ENV AzureWebJobsScriptRoot=/home/site/wwwroot \ 4 | AzureFunctionsJobHost__Logging__Console__IsEnabled=true \ 5 | AzureWebJobsFeatureFlags=EnableWorkerIndexing 6 | 7 | COPY pyproject.toml / 8 | COPY poetry.lock / 9 | RUN pip install --upgrade pip && pip install poetry && poetry self add poetry-plugin-export && poetry export -o requirements.txt && pip install -r requirements.txt 10 | 11 | COPY ./code/backend/batch/utilities /home/site/wwwroot/utilities 12 | COPY ./code/backend/batch /home/site/wwwroot 13 | -------------------------------------------------------------------------------- /docker/Frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine AS frontend 2 | RUN mkdir -p /home/node/app/node_modules && chown -R node:node /home/node/app 3 | WORKDIR /home/node/app 4 | COPY ./code/frontend/package*.json ./ 5 | USER node 6 | # RUN npm install --force 7 | RUN npm ci 8 | COPY --chown=node:node ./code/frontend ./frontend 9 | WORKDIR /home/node/app/frontend 10 | RUN npm install --save-dev @types/node @types/jest 11 | RUN npm run build 12 | 13 | FROM python:3.11.7-bookworm 14 | RUN apt-get update && apt-get install python3-tk tk-dev -y 15 | 16 | COPY pyproject.toml /usr/src/app/pyproject.toml 17 | COPY poetry.lock /usr/src/app/poetry.lock 18 | WORKDIR /usr/src/app 19 | RUN pip install --upgrade pip && pip install poetry uwsgi && poetry self add poetry-plugin-export && poetry export -o requirements.txt && pip install -r requirements.txt 20 | 21 | COPY ./code/*.py /usr/src/app/ 22 | COPY ./code/backend /usr/src/app/backend 23 | COPY --from=frontend /home/node/app/dist/static /usr/src/app/static/ 24 | # https://github.com/docker/buildx/issues/2751 25 | ENV PYTHONPATH="${PYTHONPATH}:/usr/src/app" 26 | EXPOSE 80 27 | CMD ["uwsgi", "--http", ":80", "--wsgi-file", "app.py", "--callable", "app", "-b", "32768", "--http-timeout", "230"] 28 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | name: cwydsa 2 | services: 3 | web: 4 | image: fruoccopublic.azurecr.io/rag-webapp 5 | ports: 6 | - "8080:80" 7 | env_file: 8 | - path: ../.env 9 | required: false 10 | - path: $AZD_ENV_FILE 11 | required: false 12 | admin: 13 | image: fruoccopublic.azurecr.io/rag-adminwebapp 14 | ports: 15 | - "8081:80" 16 | environment: 17 | - BACKEND_URL=http://backend 18 | - FUNCTION_KEY=some-test-key 19 | env_file: 20 | - path: ../.env 21 | required: false 22 | - path: $AZD_ENV_FILE 23 | required: false 24 | backend: 25 | image: fruoccopublic.azurecr.io/rag-backend 26 | ports: 27 | - "8082:80" 28 | environment: 29 | - AzureWebJobsSecretStorageType=files 30 | env_file: 31 | - path: ../.env 32 | required: false 33 | - path: $AZD_ENV_FILE 34 | required: false 35 | volumes: 36 | - ${HOST_DOCKER_FOLDER:-.}/function-host.json:/azure-functions-host/Secrets/host.json 37 | -------------------------------------------------------------------------------- /docker/function-host.json: -------------------------------------------------------------------------------- 1 | { 2 | "masterKey": { 3 | "name": "master", 4 | "value": "some-test-key", 5 | "encrypted": false 6 | }, 7 | "functionKeys": [ 8 | { 9 | "name": "default", 10 | "value": "some-test-key" 11 | }, 12 | { 13 | "name": "clientKey", 14 | "value": "some-test-key" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /docs/azure_app_service_auth_setup.md: -------------------------------------------------------------------------------- 1 | # Set Up Authentication in Azure App Service 2 | 3 | This document provides step-by-step instructions to configure Azure App Registrations for a front-end application. 4 | 5 | ## Prerequisites 6 | 7 | - Access to **Microsoft Entra ID** 8 | - Necessary permissions to create and manage **App Registrations** 9 | 10 | ## Step 1: Add Authentication in Azure App Service configuration 11 | 12 | 1. Click on `Authentication` from left menu. 13 | 14 | ![Authentication](images/AppAuthentication.png) 15 | 16 | 2. Click on `+ Add identity provider` to see a list of identity providers. 17 | 18 | ![Authentication Identity](images/AppAuthenticationIdentity.png) 19 | 20 | 3. Click on `Identity Provider` dropdown to see a list of identity providers. 21 | 22 | ![Add Provider](images/AppAuthIdentityProvider.png) 23 | 24 | 4. Select the first option `Microsoft Entra Id` from the drop-down list and select `client secret expiration` under App registration. 25 | > NOTE: If `Create new app registration` is disabled, then go to [Create new app registration](/docs/create_new_app_registration.md) and come back to this step to complete the app authentication. 26 | 27 | ![Add Provider](images/AppAuthIdentityProviderAdd.png) 28 | 29 | 5. Accept the default values and click on `Add` button to go back to the previous page with the identify provider added. 30 | 31 | ![Add Provider](images/AppAuthIdentityProviderAdded.png) 32 | 33 | 6. You have successfully added app authentication, and now required to log in to access the application. 34 | -------------------------------------------------------------------------------- /docs/conversation_flow_options.md: -------------------------------------------------------------------------------- 1 | # Conversation Flow Options 2 | 3 | The backend service for 'Chat With Your Data' supports both 'custom' and 'On Your Data' conversation flows. 4 | 5 | ## Configuration 6 | 7 | To switch between the two conversation flows, you can set the `CONVERSATION_FLOW` environment variable to either `custom` or `byod`. 8 | 9 | When running locally, you can set the environment variable in the `.env` file. 10 | 11 | ## Options 12 | 13 | ### Custom 14 | 15 | ```env 16 | CONVERSATION_FLOW=custom 17 | ``` 18 | 19 | Provides the option to use a custom orchestrator to handle the conversation flow. 'Chat With Your Data' provides support for the following orchestrators: 20 | 21 | - [Semantic Kernel](https://learn.microsoft.com/en-us/semantic-kernel/) 22 | - [Langchain](https://python.langchain.com/v0.2/docs/introduction/) 23 | - [OpenAI Function](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/function-calling) 24 | 25 | ### 'On Your Data' 26 | 27 | ```env 28 | CONVERSATION_FLOW=byod 29 | ``` 30 | 31 | With `CONVERSATION_FLOW` set to "byod", the backend service will mimic the [On Your Data](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/use-your-data) flow. 32 | 33 | 'On Your Data' enables you to run advanced AI models such as GPT-35-Turbo and GPT-4 on your own enterprise data without needing to train or fine-tune models. You can chat on top of and analyze your data with greater accuracy. You can specify sources to support the responses based on the latest information available in your designated data sources. 34 | -------------------------------------------------------------------------------- /docs/create_new_app_registration.md: -------------------------------------------------------------------------------- 1 | # Creating a new App Registration 2 | 3 | 1. Click on `Home` and select `Microsoft Entra ID`. 4 | 5 | ![Microsoft Entra ID](images/MicrosoftEntraID.png) 6 | 7 | 2. Click on `App registrations`. 8 | 9 | ![App registrations](images/Appregistrations.png) 10 | 11 | 3. Click on `+ New registration`. 12 | 13 | ![New Registrations](images/NewRegistration.png) 14 | 15 | 4. Provide the `Name`, select supported account types as `Accounts in this organizational directory only(Contoso only - Single tenant)`, select platform as `Web`, enter/select the `URL` and register. 16 | 17 | ![Add Details](images/AddDetails.png) 18 | 19 | 5. After application is created successfully, then click on `Add a Redirect URL`. 20 | 21 | ![Redirect URL](images/AddRedirectURL.png) 22 | 23 | 6. Click on `+ Add a platform`. 24 | 25 | ![+ Add platform](images/AddPlatform.png) 26 | 27 | 7. Click on `Web`. 28 | 29 | ![Web](images/Web.png) 30 | 31 | 8. Enter the `web app URL` (Provide the app service name in place of XXXX) and Save. Then go back to [Set Up Authentication in Azure App Service](/docs/azure_app_service_auth_setup.md) Step 1 page and follow from _Point 4_ choose `Pick an existing app registration in this directory` from the Add an Identity Provider page and provide the newly registered App Name. 32 | 33 | E.g. <>.azurewebsites.net/.auth/login/aad/callback>> 34 | 35 | ![Add Details](images/WebAppURL.png) 36 | -------------------------------------------------------------------------------- /docs/customer_truth.md: -------------------------------------------------------------------------------- 1 | [Back to *Chat with your data* README](../README.md) 2 | 3 | ![Customer truth](images/customerTruth.png) 4 | # Customer truth 5 | Customer stories coming soon. For early access, contact: fabrizio.ruocco@microsoft.com 6 | -------------------------------------------------------------------------------- /docs/design/adrs/README.md: -------------------------------------------------------------------------------- 1 | # What Are ADRs? 2 | 3 | Architecture Decision Records (ADRs) are a structured way to document significant decisions made during the design and 4 | evolution of a software system or an architectural component. These records capture the context, rationale, and 5 | consequences of each decision. Here’s what you need to know: 6 | 7 | ADRs serve as a historical record, allowing engineers to understand why certain architectural choices were made. By 8 | documenting decisions, ADRs prevent recurring debates and provide clarity. 9 | 10 | ## Benefits of ADRs 11 | 12 | - **Transparency:** ADRs make architectural decisions visible and accessible. 13 | - **Traceability:** Teams can trace back to the reasoning behind a specific choice. 14 | - **Consistency:** Encourages consistent decision-making across projects. 15 | - **Learning:** ADRs help new engineers understand the system’s evolution. 16 | 17 | ## How to add an ADR 18 | 19 | Create a new file in the format `yyyy-mm-dd-.md` in this directory. Copy in the template from `template.md` and 20 | fill in the relevant details. Once complete, raise a PR and discuss your proposal with other engineers involved in the 21 | project. 22 | 23 | Unless in a draft state, once merged, ADRs should be immutable *except* for the status. If an ADR needs to be changed, 24 | create a new ADR with the reasoning and change the existing ADR status to `superseeded - <link to new ADR>`. -------------------------------------------------------------------------------- /docs/images/AddDetails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AddDetails.png -------------------------------------------------------------------------------- /docs/images/AddPlatform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AddPlatform.png -------------------------------------------------------------------------------- /docs/images/AddRedirectURL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AddRedirectURL.png -------------------------------------------------------------------------------- /docs/images/AppAuthIdentityProvider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AppAuthIdentityProvider.png -------------------------------------------------------------------------------- /docs/images/AppAuthIdentityProviderAdd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AppAuthIdentityProviderAdd.png -------------------------------------------------------------------------------- /docs/images/AppAuthIdentityProviderAdded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AppAuthIdentityProviderAdded.png -------------------------------------------------------------------------------- /docs/images/AppAuthentication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AppAuthentication.png -------------------------------------------------------------------------------- /docs/images/AppAuthenticationIdentity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/AppAuthenticationIdentity.png -------------------------------------------------------------------------------- /docs/images/Appregistrations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/Appregistrations.png -------------------------------------------------------------------------------- /docs/images/MicrosoftEntraID.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/MicrosoftEntraID.png -------------------------------------------------------------------------------- /docs/images/NewRegistration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/NewRegistration.png -------------------------------------------------------------------------------- /docs/images/Web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/Web.png -------------------------------------------------------------------------------- /docs/images/WebAppURL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/WebAppURL.png -------------------------------------------------------------------------------- /docs/images/admin-ingest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/admin-ingest.png -------------------------------------------------------------------------------- /docs/images/admin-site.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/admin-site.png -------------------------------------------------------------------------------- /docs/images/architecture_cdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/architecture_cdb.png -------------------------------------------------------------------------------- /docs/images/architecture_pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/architecture_pg.png -------------------------------------------------------------------------------- /docs/images/azure-search-use-iv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/azure-search-use-iv.png -------------------------------------------------------------------------------- /docs/images/chat-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/chat-app.png -------------------------------------------------------------------------------- /docs/images/customerTruth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/customerTruth.png -------------------------------------------------------------------------------- /docs/images/cwyd_admin_contract_selected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/cwyd_admin_contract_selected.png -------------------------------------------------------------------------------- /docs/images/cwyd_admin_employe_selected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/cwyd_admin_employe_selected.png -------------------------------------------------------------------------------- /docs/images/cwyd_admin_legal_unselected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/cwyd_admin_legal_unselected.png -------------------------------------------------------------------------------- /docs/images/db_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/db_selection.png -------------------------------------------------------------------------------- /docs/images/delete-search-datasource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/delete-search-datasource.png -------------------------------------------------------------------------------- /docs/images/delete-search-index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/delete-search-index.png -------------------------------------------------------------------------------- /docs/images/delete-search-indexer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/delete-search-indexer.png -------------------------------------------------------------------------------- /docs/images/delete-search-skillset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/delete-search-skillset.png -------------------------------------------------------------------------------- /docs/images/deployment_center.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/deployment_center.png -------------------------------------------------------------------------------- /docs/images/enable_advanced_image_processing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/enable_advanced_image_processing.png -------------------------------------------------------------------------------- /docs/images/oneClickDeploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/oneClickDeploy.png -------------------------------------------------------------------------------- /docs/images/prompt-flow-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/prompt-flow-download.png -------------------------------------------------------------------------------- /docs/images/prompt-flow-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/prompt-flow-error.png -------------------------------------------------------------------------------- /docs/images/resource_menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/resource_menu.png -------------------------------------------------------------------------------- /docs/images/supportingDocuments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/supportingDocuments.png -------------------------------------------------------------------------------- /docs/images/teams-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-1.png -------------------------------------------------------------------------------- /docs/images/teams-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-10.png -------------------------------------------------------------------------------- /docs/images/teams-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-11.png -------------------------------------------------------------------------------- /docs/images/teams-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-12.png -------------------------------------------------------------------------------- /docs/images/teams-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-13.png -------------------------------------------------------------------------------- /docs/images/teams-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-14.png -------------------------------------------------------------------------------- /docs/images/teams-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-15.png -------------------------------------------------------------------------------- /docs/images/teams-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-16.png -------------------------------------------------------------------------------- /docs/images/teams-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-17.png -------------------------------------------------------------------------------- /docs/images/teams-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-18.png -------------------------------------------------------------------------------- /docs/images/teams-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-19.png -------------------------------------------------------------------------------- /docs/images/teams-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-2.png -------------------------------------------------------------------------------- /docs/images/teams-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-20.png -------------------------------------------------------------------------------- /docs/images/teams-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-21.png -------------------------------------------------------------------------------- /docs/images/teams-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-22.png -------------------------------------------------------------------------------- /docs/images/teams-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-3.png -------------------------------------------------------------------------------- /docs/images/teams-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-4.png -------------------------------------------------------------------------------- /docs/images/teams-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-5.png -------------------------------------------------------------------------------- /docs/images/teams-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-6.png -------------------------------------------------------------------------------- /docs/images/teams-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-7.png -------------------------------------------------------------------------------- /docs/images/teams-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-8.png -------------------------------------------------------------------------------- /docs/images/teams-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-9.png -------------------------------------------------------------------------------- /docs/images/teams-cwyd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-cwyd.png -------------------------------------------------------------------------------- /docs/images/teams-deploy-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-deploy-env.png -------------------------------------------------------------------------------- /docs/images/teams-local-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-local-1.png -------------------------------------------------------------------------------- /docs/images/teams-local-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-local-2.png -------------------------------------------------------------------------------- /docs/images/teams-local-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-local-3.png -------------------------------------------------------------------------------- /docs/images/teams-ux-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-ux-1.png -------------------------------------------------------------------------------- /docs/images/teams-ux-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-ux-2.png -------------------------------------------------------------------------------- /docs/images/teams-ux-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams-ux-3.png -------------------------------------------------------------------------------- /docs/images/teams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/teams.png -------------------------------------------------------------------------------- /docs/images/userStory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/userStory.png -------------------------------------------------------------------------------- /docs/images/web-app-authentication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/web-app-authentication.png -------------------------------------------------------------------------------- /docs/images/web-nlu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/web-nlu.png -------------------------------------------------------------------------------- /docs/images/web-speech-to-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/web-speech-to-text.png -------------------------------------------------------------------------------- /docs/images/web-unstructureddata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/web-unstructureddata.png -------------------------------------------------------------------------------- /docs/images/with_advanced_image_processing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/with_advanced_image_processing.png -------------------------------------------------------------------------------- /docs/images/without_advanced_image_processing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/without_advanced_image_processing.png -------------------------------------------------------------------------------- /docs/images/workbook-advanced-editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/workbook-advanced-editor.png -------------------------------------------------------------------------------- /docs/images/workbook-edit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/workbook-edit.png -------------------------------------------------------------------------------- /docs/images/workbook-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/workbook-json.png -------------------------------------------------------------------------------- /docs/images/workbook-resource-parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/workbook-resource-parameters.png -------------------------------------------------------------------------------- /docs/images/workbook-tabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/images/workbook-tabs.png -------------------------------------------------------------------------------- /docs/speech_to_text.md: -------------------------------------------------------------------------------- 1 | [Back to *Chat with your data* README](../README.md) 2 | 3 | ![User Story](images/userStory.png) 4 | # Speech-to-text functionality 5 | Many users are used to the convenience of speech-to-text functionality in their consumer products. With hybrid work increasing, speech-to-text supports a more flexible way for users to chat with their data, whether they’re at their computer or on the go with their mobile device. The speech-to-text capability is combined with NLP capabilities to extract intent and context from spoken language, allowing the chatbot to understand and respond to user requests more intelligently. 6 | 7 | ![Web - Chat with unstructured data](images/web-unstructureddata.png)Chat with your unstructured data 8 | 9 | ![Web - Get responses using natural language](images/web-nlu.png)Get responses using natural language 10 | -------------------------------------------------------------------------------- /docs/spikes/using-image-data/azure-services.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/docs/spikes/using-image-data/azure-services.png -------------------------------------------------------------------------------- /docs/supported_file_types.md: -------------------------------------------------------------------------------- 1 | [Back to *Chat with your data* README](../README.md) 2 | 3 | ![User Story](images/userStory.png) 4 | # Supported file types 5 | 6 | Out-of-the-box, you can upload the following file types: 7 | * PDF 8 | * JPEG 9 | * JPG 10 | * PNG 11 | * TXT 12 | * HTML 13 | * MD (Markdown) 14 | * DOCX 15 | * JSON 16 | -------------------------------------------------------------------------------- /extensions/teams/.gitignore: -------------------------------------------------------------------------------- 1 | # TeamsFx files 2 | env/.env.*.user 3 | env/.env.local 4 | .localConfigs.testTool 5 | .localConfigs 6 | .notification.localstore.json 7 | .notification.testtoolstore.json 8 | appPackage/build 9 | 10 | # dependencies 11 | node_modules/ 12 | 13 | # misc 14 | .env 15 | .deployment 16 | .DS_Store 17 | 18 | # build 19 | lib/ 20 | -------------------------------------------------------------------------------- /extensions/teams/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "TeamsDevApp.ms-teams-vscode-extension" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /extensions/teams/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "debug.onTaskErrors": "abort", 3 | "json.schemas": [ 4 | { 5 | "fileMatch": [ 6 | "/aad.*.json" 7 | ], 8 | "schema": {} 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /extensions/teams/.webappignore: -------------------------------------------------------------------------------- 1 | .webappignore 2 | .fx 3 | .deployment 4 | .localConfigs.testTool 5 | .localConfigs 6 | .notification.localstore.json 7 | .notification.testtoolstore.json 8 | .vscode 9 | *.js.map 10 | *.ts.map 11 | *.ts 12 | .git* 13 | .tsbuildinfo 14 | CHANGELOG.md 15 | readme.md 16 | local.settings.json 17 | test 18 | tsconfig.json 19 | .DS_Store 20 | teamsapp.yml 21 | teamsapp.*.yml 22 | /env/ 23 | /node_modules/.bin 24 | /node_modules/ts-node 25 | /node_modules/typescript 26 | /appPackage/ 27 | /infra/ 28 | -------------------------------------------------------------------------------- /extensions/teams/appPackage/CogSearchColor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/extensions/teams/appPackage/CogSearchColor.png -------------------------------------------------------------------------------- /extensions/teams/appPackage/CogSearchOutline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/extensions/teams/appPackage/CogSearchOutline.png -------------------------------------------------------------------------------- /extensions/teams/appPackage/color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/extensions/teams/appPackage/color.png -------------------------------------------------------------------------------- /extensions/teams/appPackage/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://developer.microsoft.com/en-us/json-schemas/teams/v1.16/MicrosoftTeams.schema.json", 3 | "manifestVersion": "1.16", 4 | "version": "3.0.1", 5 | "id": "${{TEAMS_APP_ID}}", 6 | "packageName": "com.microsoft.teams.extension", 7 | "developer": { 8 | "name": "James Hunter", 9 | "websiteUrl": "https://www.microsoft.com", 10 | "privacyUrl": "https://www.microsoft.com/en-us/privacystatement", 11 | "termsOfUseUrl": "https://www.microsoft.com/en-us/servicesagreement" 12 | }, 13 | "icons": { 14 | "color": "CogSearchColor.png", 15 | "outline": "CogSearchOutline.png" 16 | }, 17 | "name": { 18 | "short": "Chat with Data", 19 | "full": "Chat with your own Data" 20 | }, 21 | "description": { 22 | "short": "A tool that combines the capabilities of Azure AI Search and LLMs.", 23 | "full": "The 'Chat with your data' Solution Accelerator is a powerful tool that combines the capabilities of Azure Cognitive Search and Large Language Models (LLMs) to create a conversational search experience. This solution accelerator uses an Azure OpenAI GPT model and an Azure Cognitive Search index generated from your data, which is integrated into a web application to provide a natural language interface for search queries." 24 | }, 25 | "accentColor": "#FFFFFF", 26 | "bots": [ 27 | { 28 | "botId": "${{BOT_ID}}", 29 | "scopes": [ 30 | "personal" 31 | ], 32 | "supportsFiles": false, 33 | "isNotificationOnly": false 34 | } 35 | ], 36 | "composeExtensions": [], 37 | "configurableTabs": [], 38 | "staticTabs": [], 39 | "permissions": [ 40 | "identity", 41 | "messageTeamMembers" 42 | ], 43 | "validDomains": [] 44 | } -------------------------------------------------------------------------------- /extensions/teams/appPackage/outline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/extensions/teams/appPackage/outline.png -------------------------------------------------------------------------------- /extensions/teams/config.ts: -------------------------------------------------------------------------------- 1 | const config = { 2 | botId: process.env.BOT_ID, 3 | botPassword: process.env.BOT_PASSWORD, 4 | azureFunctionUrl: process.env.AZURE_FUNCTION_URL, 5 | }; 6 | 7 | export default config; 8 | -------------------------------------------------------------------------------- /extensions/teams/env/.env.dev: -------------------------------------------------------------------------------- 1 | # This file includes environment variables that will be committed to git by default. 2 | 3 | # Built-in environment variables 4 | TEAMSFX_ENV=dev 5 | APP_NAME_SUFFIX=dev 6 | 7 | # Updating AZURE_SUBSCRIPTION_ID or AZURE_RESOURCE_GROUP_NAME after provision may also require an update to RESOURCE_SUFFIX, because some services require a globally unique name across subscriptions/resource groups. 8 | AZURE_SUBSCRIPTION_ID= 9 | AZURE_RESOURCE_GROUP_NAME= 10 | RESOURCE_SUFFIX= 11 | 12 | # Generated during provision, you can also add your own variables. 13 | BOT_ID= 14 | TEAMS_APP_ID= 15 | BOT_AZURE_APP_SERVICE_RESOURCE_ID= 16 | BOT_DOMAIN= 17 | AZURE_FUNCTION_URL=https://backend-<RESOURCE_TOKEN>.azurewebsites.net/api/GetConversationResponse?code=<FUNCTION_APP_CLIENT_KEY>&clientId=clientKey 18 | -------------------------------------------------------------------------------- /extensions/teams/env/.env.test: -------------------------------------------------------------------------------- 1 | # This file includes environment variables that will be committed to git by default. 2 | 3 | # Built-in environment variables 4 | TEAMSFX_ENV=test 5 | APP_NAME_SUFFIX=test 6 | 7 | # Updating AZURE_SUBSCRIPTION_ID or AZURE_RESOURCE_GROUP_NAME after provision may also require an update to RESOURCE_SUFFIX, because some services require a globally unique name across subscriptions/resource groups. 8 | AZURE_SUBSCRIPTION_ID= 9 | AZURE_RESOURCE_GROUP_NAME= 10 | RESOURCE_SUFFIX= 11 | 12 | # Generated during provision, you can also add your own variables. 13 | BOT_ID= 14 | TEAMS_APP_ID= 15 | BOT_AZURE_APP_SERVICE_RESOURCE_ID= 16 | BOT_DOMAIN= 17 | AZURE_FUNCTION_URL=https://backend-<RESOURCE_TOKEN>.azurewebsites.net/api/GetConversationResponse?code=<FUNCTION_APP_CLIENT_KEY>&clientId=clientKey 18 | TEAMS_APP_TENANT_ID= 19 | TEAMS_APP_PUBLISHED_APP_ID= 20 | -------------------------------------------------------------------------------- /extensions/teams/env/.env.testtool: -------------------------------------------------------------------------------- 1 | # This file includes environment variables that can be committed to git. It's gitignored by default because it represents your local development environment. 2 | 3 | # Built-in environment variables 4 | TEAMSFX_ENV=testtool 5 | 6 | # Environment variables used by test tool 7 | TEAMSAPPTESTER_PORT=56150 8 | TEAMSFX_NOTIFICATION_STORE_FILENAME=.notification.testtoolstore.json 9 | AZURE_FUNCTION_URL=https://backend-<RESOURCE_TOKEN>.azurewebsites.net/api/GetConversationResponse?code=<FUNCTION_APP_CLIENT_KEY>&clientId=clientKey 10 | -------------------------------------------------------------------------------- /extensions/teams/infra/azure.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceBaseName": { 6 | "value": "bot${{RESOURCE_SUFFIX}}" 7 | }, 8 | "botAadAppClientId": { 9 | "value": "${{BOT_ID}}" 10 | }, 11 | "botAadAppClientSecret": { 12 | "value": "${{SECRET_BOT_PASSWORD}}" 13 | }, 14 | "azureFunctionURL": { 15 | "value": "${{AZURE_FUNCTION_URL}}" 16 | }, 17 | "webAppSKU": { 18 | "value": "B1" 19 | }, 20 | "botDisplayName": { 21 | "value": "teams-bot-toolkit" 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /extensions/teams/infra/botRegistration/azurebot.bicep: -------------------------------------------------------------------------------- 1 | @maxLength(20) 2 | @minLength(4) 3 | @description('Used to generate names for all resources in this file') 4 | param resourceBaseName string 5 | 6 | @maxLength(42) 7 | param botDisplayName string 8 | 9 | param botServiceName string = resourceBaseName 10 | param botServiceSku string = 'F0' 11 | param botAadAppClientId string 12 | param botAppDomain string 13 | 14 | // Register your web service as a bot with the Bot Framework 15 | resource botService 'Microsoft.BotService/botServices@2021-03-01' = { 16 | kind: 'azurebot' 17 | location: 'global' 18 | name: botServiceName 19 | properties: { 20 | displayName: botDisplayName 21 | endpoint: 'https://${botAppDomain}/api/messages' 22 | msaAppId: botAadAppClientId 23 | } 24 | sku: { 25 | name: botServiceSku 26 | } 27 | } 28 | 29 | // Connect the bot service to Microsoft Teams 30 | resource botServiceMsTeamsChannel 'Microsoft.BotService/botServices/channels@2021-03-01' = { 31 | parent: botService 32 | location: 'global' 33 | name: 'MsTeamsChannel' 34 | properties: { 35 | channelName: 'MsTeamsChannel' 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /extensions/teams/infra/botRegistration/readme.md: -------------------------------------------------------------------------------- 1 | The `azurebot.bicep` module is provided to help you create Azure Bot service when you don't use Azure to host your app. If you use Azure as infrastrcture for your app, `azure.bicep` under infra folder already leverages this module to create Azure Bot service for you. You don't need to deploy `azurebot.bicep` again. -------------------------------------------------------------------------------- /extensions/teams/model.ts: -------------------------------------------------------------------------------- 1 | export type AskResponse = { 2 | answer: string; 3 | citations: Citation[]; 4 | error?: string; 5 | }; 6 | 7 | export type Citation = { 8 | content: string; 9 | id: string; 10 | title: string | null; 11 | filepath: string | null; 12 | url: string | null; 13 | metadata: string | null; 14 | chunk_id: string | null; 15 | reindex_id: string | null; 16 | } 17 | 18 | export enum CardType { 19 | OpenUrl = "Action.OpenUrl", 20 | ShowCard = "Action.ShowCard", 21 | AdaptiveCard = "AdaptiveCard", 22 | TextBlock = "TextBlock" 23 | } 24 | 25 | export type ToolMessageContent = { 26 | citations: Citation[]; 27 | intent: string; 28 | } 29 | 30 | export type ChatMessage = { 31 | role: string; 32 | content: string; 33 | end_turn?: boolean; 34 | }; 35 | 36 | export enum ChatCompletionType { 37 | ChatCompletion = "chat.completion", 38 | ChatCompletionChunk = "chat.completion.chunk" 39 | } 40 | 41 | export type ChatResponseChoice = { 42 | messages: ChatMessage[]; 43 | } 44 | 45 | export type ChatResponse = { 46 | id: string; 47 | model: string; 48 | created: number; 49 | object: ChatCompletionType; 50 | choices: ChatResponseChoice[]; 51 | error: string; 52 | } 53 | 54 | export type ConversationRequest = { 55 | id?: string; 56 | messages: ChatMessage[]; 57 | }; 58 | -------------------------------------------------------------------------------- /extensions/teams/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "teams-bot-toolkit", 3 | "version": "1.0.0", 4 | "description": "Microsoft Teams Toolkit hello world Bot sample", 5 | "engines": { 6 | "node": "16 || 18" 7 | }, 8 | "author": "Microsoft", 9 | "license": "MIT", 10 | "main": "./lib/index.js", 11 | "scripts": { 12 | "dev:teamsfx": "env-cmd --silent -f .localConfigs npm run dev", 13 | "dev:teamsfx:testtool": "env-cmd --silent -f .localConfigs.testTool npm run dev", 14 | "dev:teamsfx:launch-testtool": "env-cmd --silent -f env/.env.testtool teamsapptester start", 15 | "dev": "nodemon --exec node --inspect=9239 --signal SIGINT -r ts-node/register ./index.ts", 16 | "build": "tsc --build", 17 | "start": "node ./lib/index.js", 18 | "watch": "nodemon --exec \"npm run start\"", 19 | "test": "echo \"Error: no test specified\" && exit 1" 20 | }, 21 | "repository": { 22 | "type": "git", 23 | "url": "https://github.com" 24 | }, 25 | "dependencies": { 26 | "botbuilder": "^4.23.0", 27 | "restify": "^10.0.0" 28 | }, 29 | "devDependencies": { 30 | "@types/node": "^18.19.31", 31 | "@types/restify": "^8.5.5", 32 | "env-cmd": "^10.1.0", 33 | "nodemon": "^3.1.0", 34 | "shx": "^0.3.3", 35 | "ts-node": "^10.4.0", 36 | "typescript": "^4.4.4" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /extensions/teams/teamsapp.testtool.yml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://aka.ms/teams-toolkit/v1.3/yaml.schema.json 2 | # Visit https://aka.ms/teamsfx-v5.0-guide for details on this file 3 | # Visit https://aka.ms/teamsfx-actions for details on actions 4 | version: v1.3 5 | 6 | deploy: 7 | # Install development tool(s) 8 | - uses: devTool/install 9 | with: 10 | testTool: 11 | version: ~0.1.0-beta 12 | symlinkDir: ./devTools/teamsapptester 13 | 14 | # Run npm command 15 | - uses: cli/runNpmCommand 16 | with: 17 | args: install --no-audit 18 | 19 | # Generate runtime environment variables 20 | - uses: file/createOrUpdateEnvironmentFile 21 | with: 22 | target: ./.localConfigs.testTool 23 | envs: 24 | TEAMSFX_NOTIFICATION_STORE_FILENAME: ${{TEAMSFX_NOTIFICATION_STORE_FILENAME}} 25 | AZURE_FUNCTION_URL: ${{AZURE_FUNCTION_URL}} -------------------------------------------------------------------------------- /extensions/teams/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "declaration": true, 4 | "target": "es2021", 5 | "module": "commonjs", 6 | "outDir": "./lib", 7 | "rootDir": "./", 8 | "sourceMap": true, 9 | "incremental": true, 10 | "tsBuildInfoFile": "./lib/.tsbuildinfo", 11 | "resolveJsonModule": true, 12 | "esModuleInterop": true, 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /infra/app/eventgrid.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string 3 | param storageAccountId string 4 | param queueName string 5 | param blobContainerName string 6 | 7 | resource eventGridSystemTopic 'Microsoft.EventGrid/systemTopics@2021-12-01' = { 8 | name: name 9 | location: location 10 | properties: { 11 | source: storageAccountId 12 | topicType: 'Microsoft.Storage.StorageAccounts' 13 | } 14 | } 15 | 16 | resource eventGridSystemTopicNameBlobEvents 'Microsoft.EventGrid/systemTopics/eventSubscriptions@2021-12-01' = { 17 | parent: eventGridSystemTopic 18 | name: 'BlobEvents' 19 | properties: { 20 | destination: { 21 | endpointType: 'StorageQueue' 22 | properties: { 23 | queueMessageTimeToLiveInSeconds: -1 24 | queueName: queueName 25 | resourceId: storageAccountId 26 | } 27 | } 28 | filter: { 29 | includedEventTypes: [ 30 | 'Microsoft.Storage.BlobCreated' 31 | 'Microsoft.Storage.BlobDeleted' 32 | ] 33 | enableAdvancedFilteringOnArrays: true 34 | subjectBeginsWith: '/blobServices/default/containers/${blobContainerName}/blobs/' 35 | } 36 | labels: [] 37 | eventDeliverySchema: 'EventGridSchema' 38 | retryPolicy: { 39 | maxDeliveryAttempts: 30 40 | eventTimeToLiveInMinutes: 1440 41 | } 42 | } 43 | } 44 | 45 | output name string = eventGridSystemTopic.name 46 | -------------------------------------------------------------------------------- /infra/core/database/cosmos-sql-role-assign.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a SQL role assignment under an Azure Cosmos DB account.' 2 | param accountName string 3 | 4 | param roleDefinitionId string 5 | param principalId string = '' 6 | 7 | resource role 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2022-05-15' = { 8 | parent: cosmos 9 | name: guid(roleDefinitionId, principalId, cosmos.id) 10 | properties: { 11 | principalId: principalId 12 | roleDefinitionId: roleDefinitionId 13 | scope: cosmos.id 14 | } 15 | } 16 | 17 | resource cosmos 'Microsoft.DocumentDB/databaseAccounts@2022-08-15' existing = { 18 | name: accountName 19 | } 20 | -------------------------------------------------------------------------------- /infra/core/database/deploy_create_table_script.bicep: -------------------------------------------------------------------------------- 1 | @description('Specifies the location for resources.') 2 | param solutionLocation string 3 | 4 | param baseUrl string 5 | param keyVaultName string 6 | param identity string 7 | param postgresSqlServerName string 8 | param webAppPrincipalName string 9 | param adminAppPrincipalName string 10 | param managedIdentityName string 11 | param functionAppPrincipalName string 12 | 13 | resource create_index 'Microsoft.Resources/deploymentScripts@2020-10-01' = { 14 | kind:'AzureCLI' 15 | name: 'create_postgres_table' 16 | location: solutionLocation // Replace with your desired location 17 | identity: { 18 | type: 'UserAssigned' 19 | userAssignedIdentities: { 20 | '${identity}' : {} 21 | } 22 | } 23 | properties: { 24 | azCliVersion: '2.52.0' 25 | primaryScriptUri: '${baseUrl}scripts/run_create_table_script.sh' 26 | arguments: '${baseUrl} ${keyVaultName} ${resourceGroup().name} ${postgresSqlServerName} ${webAppPrincipalName} ${adminAppPrincipalName} ${functionAppPrincipalName} ${managedIdentityName}' // Specify any arguments for the script 27 | timeout: 'PT1H' // Specify the desired timeout duration 28 | retentionInterval: 'PT1H' // Specify the desired retention interval 29 | cleanupPreference:'OnSuccess' 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /infra/core/host/appservice-appsettings.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Updates app settings for an Azure App Service.' 2 | @description('The name of the app service resource within the current resource group scope') 3 | param name string 4 | 5 | @description('The app settings to be applied to the app service') 6 | @secure() 7 | param appSettings object 8 | 9 | resource appService 'Microsoft.Web/sites@2022-03-01' existing = { 10 | name: name 11 | } 12 | 13 | resource settings 'Microsoft.Web/sites/config@2022-03-01' = { 14 | name: 'appsettings' 15 | parent: appService 16 | properties: appSettings 17 | } 18 | -------------------------------------------------------------------------------- /infra/core/host/appserviceplan.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | param kind string = '' 7 | param reserved bool = true 8 | param sku object 9 | 10 | resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = { 11 | name: name 12 | location: location 13 | tags: tags 14 | sku: sku 15 | kind: kind 16 | properties: { 17 | reserved: reserved 18 | } 19 | } 20 | 21 | output id string = appServicePlan.id 22 | output name string = appServicePlan.name 23 | -------------------------------------------------------------------------------- /infra/core/monitor/applicationinsights.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Application Insights instance based on an existing Log Analytics workspace.' 2 | param name string 3 | param dashboardName string = '' 4 | param location string = resourceGroup().location 5 | param tags object = {} 6 | param logAnalyticsWorkspaceId string 7 | 8 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { 9 | name: name 10 | location: location 11 | tags: tags 12 | kind: 'web' 13 | properties: { 14 | Application_Type: 'web' 15 | WorkspaceResourceId: logAnalyticsWorkspaceId 16 | } 17 | } 18 | 19 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if (!empty(dashboardName)) { 20 | name: 'application-insights-dashboard' 21 | params: { 22 | name: dashboardName 23 | location: location 24 | applicationInsightsName: applicationInsights.name 25 | } 26 | } 27 | 28 | output connectionString string = applicationInsights.properties.ConnectionString 29 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey 30 | output name string = applicationInsights.name 31 | output id string = applicationInsights.id -------------------------------------------------------------------------------- /infra/core/monitor/loganalytics.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a Log Analytics workspace.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { 7 | name: name 8 | location: location 9 | tags: tags 10 | properties: any({ 11 | retentionInDays: 30 12 | features: { 13 | searchVersion: 1 14 | } 15 | sku: { 16 | name: 'PerGB2018' 17 | } 18 | }) 19 | } 20 | 21 | output id string = logAnalytics.id 22 | output name string = logAnalytics.name 23 | -------------------------------------------------------------------------------- /infra/core/monitor/monitoring.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Application Insights instance and a Log Analytics workspace.' 2 | param logAnalyticsName string 3 | param applicationInsightsName string 4 | param applicationInsightsDashboardName string = '' 5 | param location string = resourceGroup().location 6 | param tags object = {} 7 | 8 | module logAnalytics 'loganalytics.bicep' = { 9 | name: 'loganalytics' 10 | params: { 11 | name: logAnalyticsName 12 | location: location 13 | tags: tags 14 | } 15 | } 16 | 17 | module applicationInsights 'applicationinsights.bicep' = { 18 | name: 'applicationinsights' 19 | params: { 20 | name: applicationInsightsName 21 | location: location 22 | tags: tags 23 | dashboardName: applicationInsightsDashboardName 24 | logAnalyticsWorkspaceId: logAnalytics.outputs.id 25 | } 26 | } 27 | 28 | output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString 29 | output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey 30 | output applicationInsightsName string = applicationInsights.outputs.name 31 | output applicationInsightsId string = applicationInsights.outputs.id 32 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id 33 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name -------------------------------------------------------------------------------- /infra/core/monitor/workbook.bicep: -------------------------------------------------------------------------------- 1 | @description('The friendly name for the workbook. This name must be unique within a resource group.') 2 | param workbookDisplayName string 3 | 4 | @description('The gallery that the workbook will been shown under. Supported values include workbook, tsg, etc. Usually, this is \'workbook\'') 5 | param workbookType string = 'workbook' 6 | 7 | @description('The id of resource instance to which the workbook will be associated') 8 | param workbookSourceId string = 'azure monitor' 9 | 10 | @description('The unique guid for this workbook instance') 11 | param workbookId string 12 | 13 | @description('The json content of the workbook') 14 | param workbookContents string 15 | 16 | param location string = resourceGroup().location 17 | 18 | resource workbook_resource 'microsoft.insights/workbooks@2023-06-01' = { 19 | name: workbookId 20 | location: location 21 | kind: 'shared' 22 | properties: { 23 | displayName: workbookDisplayName 24 | serializedData: workbookContents 25 | version: '1.0' 26 | sourceId: workbookSourceId 27 | category: workbookType 28 | } 29 | } 30 | 31 | output workbookId string = workbook_resource.id 32 | -------------------------------------------------------------------------------- /infra/core/search/search-services.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure AI Search instance.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | param sku object = { 7 | name: 'standard' 8 | } 9 | 10 | param authOptions object = {} 11 | param disableLocalAuth bool = false 12 | param disabledDataExfiltrationOptions array = [] 13 | param encryptionWithCmk object = { 14 | enforcement: 'Unspecified' 15 | } 16 | @allowed([ 17 | 'default' 18 | 'highDensity' 19 | ]) 20 | param hostingMode string = 'default' 21 | param networkRuleSet object = { 22 | bypass: 'None' 23 | ipRules: [] 24 | } 25 | param partitionCount int = 1 26 | @allowed([ 27 | 'enabled' 28 | 'disabled' 29 | ]) 30 | param publicNetworkAccess string = 'enabled' 31 | param replicaCount int = 1 32 | @allowed([ 33 | 'disabled' 34 | 'free' 35 | 'standard' 36 | ]) 37 | param semanticSearch string = 'disabled' 38 | 39 | resource search 'Microsoft.Search/searchServices@2021-04-01-preview' = { 40 | name: name 41 | location: location 42 | tags: tags 43 | identity: { 44 | type: 'SystemAssigned' 45 | } 46 | properties: { 47 | authOptions: authOptions 48 | disableLocalAuth: disableLocalAuth 49 | disabledDataExfiltrationOptions: disabledDataExfiltrationOptions 50 | encryptionWithCmk: encryptionWithCmk 51 | hostingMode: hostingMode 52 | networkRuleSet: networkRuleSet 53 | partitionCount: partitionCount 54 | publicNetworkAccess: publicNetworkAccess 55 | replicaCount: replicaCount 56 | semanticSearch: semanticSearch 57 | } 58 | sku: sku 59 | } 60 | 61 | output id string = search.id 62 | output endpoint string = 'https://${name}.search.windows.net/' 63 | output name string = search.name 64 | output identityPrincipalId string = search.identity.principalId 65 | -------------------------------------------------------------------------------- /infra/core/security/keyvault-access.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Assigns an Azure Key Vault access policy.' 2 | param name string = 'add' 3 | 4 | param keyVaultName string 5 | param permissions object = { secrets: ['get', 'list'] } 6 | param principalId string 7 | 8 | resource keyVaultAccessPolicies 'Microsoft.KeyVault/vaults/accessPolicies@2022-07-01' = { 9 | parent: keyVault 10 | name: name 11 | properties: { 12 | accessPolicies: [ 13 | { 14 | objectId: principalId 15 | tenantId: subscription().tenantId 16 | permissions: permissions 17 | } 18 | ] 19 | } 20 | } 21 | 22 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = { 23 | name: keyVaultName 24 | } 25 | -------------------------------------------------------------------------------- /infra/core/security/keyvault-secret.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates or updates a secret in an Azure Key Vault.' 2 | param name string 3 | param tags object = {} 4 | param keyVaultName string 5 | param contentType string = 'string' 6 | @description('The value of the secret. Provide only derived values like blob storage access, but do not hard code any secrets in your templates') 7 | @secure() 8 | param secretValue string 9 | 10 | param enabled bool = true 11 | param exp int = 0 12 | param nbf int = 0 13 | 14 | resource keyVaultSecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { 15 | name: name 16 | tags: tags 17 | parent: keyVault 18 | properties: { 19 | attributes: { 20 | enabled: enabled 21 | exp: exp 22 | nbf: nbf 23 | } 24 | contentType: contentType 25 | value: secretValue 26 | } 27 | } 28 | 29 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = { 30 | name: keyVaultName 31 | } 32 | -------------------------------------------------------------------------------- /infra/core/security/managed-identity.bicep: -------------------------------------------------------------------------------- 1 | // ========== Managed Identity ========== // 2 | targetScope = 'resourceGroup' 3 | 4 | @minLength(3) 5 | @maxLength(15) 6 | @description('Solution Name') 7 | param solutionName string 8 | 9 | @description('Solution Location') 10 | param solutionLocation string 11 | 12 | @description('Name') 13 | param miName string 14 | 15 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = { 16 | name: miName 17 | location: solutionLocation 18 | tags: { 19 | app: solutionName 20 | location: solutionLocation 21 | } 22 | } 23 | 24 | @description('This is the built-in owner role. See https://docs.microsoft.com/azure/role-based-access-control/built-in-roles#owner') 25 | resource ownerRoleDefinition 'Microsoft.Authorization/roleDefinitions@2018-01-01-preview' existing = { 26 | scope: resourceGroup() 27 | name: '8e3af657-a8ff-443c-a75c-2fe8c4bcb635' 28 | } 29 | 30 | resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 31 | name: guid(resourceGroup().id, managedIdentity.id, ownerRoleDefinition.id) 32 | properties: { 33 | principalId: managedIdentity.properties.principalId 34 | roleDefinitionId: ownerRoleDefinition.id 35 | principalType: 'ServicePrincipal' 36 | } 37 | } 38 | 39 | output managedIdentityOutput object = { 40 | id: managedIdentity.id 41 | objectId: managedIdentity.properties.principalId 42 | name: miName 43 | } 44 | -------------------------------------------------------------------------------- /infra/core/security/registry-access.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Assigns ACR Pull permissions to access an Azure Container Registry.' 2 | param containerRegistryName string 3 | param principalId string 4 | 5 | var acrPullRole = subscriptionResourceId( 6 | 'Microsoft.Authorization/roleDefinitions', 7 | '7f951dda-4ed3-4680-a7ca-43fe172d538d' 8 | ) 9 | 10 | resource aksAcrPull 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 11 | scope: containerRegistry // Use when specifying a scope that is different than the deployment scope 12 | name: guid(subscription().id, resourceGroup().id, principalId, acrPullRole) 13 | properties: { 14 | roleDefinitionId: acrPullRole 15 | principalType: 'ServicePrincipal' 16 | principalId: principalId 17 | } 18 | } 19 | 20 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' existing = { 21 | name: containerRegistryName 22 | } 23 | -------------------------------------------------------------------------------- /infra/core/security/role.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates a role assignment for a service principal.' 2 | param principalId string 3 | 4 | @allowed([ 5 | 'Device' 6 | 'ForeignGroup' 7 | 'Group' 8 | 'ServicePrincipal' 9 | 'User' 10 | ]) 11 | param principalType string = 'ServicePrincipal' 12 | param roleDefinitionId string 13 | 14 | resource role 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 15 | name: guid(subscription().id, resourceGroup().id, principalId, roleDefinitionId) 16 | properties: { 17 | principalId: principalId 18 | principalType: principalType 19 | roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDefinitionId) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infra/prompt-flow/cwyd/chat_with_context.jinja2: -------------------------------------------------------------------------------- 1 | {{prompt_text}} 2 | 3 | -------------------------------------------------------------------------------- /infra/prompt-flow/cwyd/flow.meta.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/flow.schema.json 2 | name: bring_your_own_data_chat_qna 3 | display_name: Multi-Round Q&A on Your Data 4 | type: chat 5 | path: ./flow.dag.yaml 6 | description: Create a chatbot that uses LLM and data from your own indexed files to ground multi-round question and answering capabilities in enterprise chat scenarios. 7 | properties: 8 | promptflow.stage: prod 9 | promptflow.details.type: markdown 10 | promptflow.details.source: README.md 11 | promptflow.batch_inputs: samples.json 12 | -------------------------------------------------------------------------------- /infra/prompt-flow/cwyd/generate_prompt_context.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from promptflow import tool 3 | from promptflow_vectordb.core.contracts import SearchResultEntity 4 | 5 | 6 | @tool 7 | def generate_prompt_context(search_result: List[dict]) -> str: 8 | retrieved_docs = {} 9 | for index, item in enumerate(search_result): 10 | 11 | entity = SearchResultEntity.from_dict(item) 12 | content = entity.text or "" 13 | additional_fields = entity.additional_fields 14 | filepath = additional_fields.get("source") 15 | chunk_id = additional_fields.get("chunk_id", additional_fields.get("chunk", "")) 16 | 17 | retrieved_docs[f"[doc{index+1}]"] = { 18 | "content": content, 19 | "filepath": filepath, 20 | "chunk_id": chunk_id, 21 | } 22 | 23 | return retrieved_docs 24 | -------------------------------------------------------------------------------- /infra/prompt-flow/cwyd/requirements.txt: -------------------------------------------------------------------------------- 1 | promptflow_vectordb[azure] 2 | -------------------------------------------------------------------------------- /infra/prompt-flow/cwyd/samples.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "chat_input": "What benefits are included in the Northwind Standard plan?", 4 | "chat_history": [ 5 | { 6 | "inputs": { 7 | "chat_input": "Hi" 8 | }, 9 | "outputs": { 10 | "chat_output": "Hello! How can I assist you today?" 11 | } 12 | } 13 | ] 14 | } 15 | ] 16 | -------------------------------------------------------------------------------- /infra/prompt-flow/deployment.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json 2 | name: cwyd-deployment 3 | endpoint_name: cwyd-endpoint 4 | model: azureml:cwyd-model:1 5 | # You can also specify model files path inline 6 | # path: examples/flows/chat/basic-chat 7 | environment: 8 | image: mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest 9 | inference_config: 10 | liveness_route: 11 | path: /health 12 | port: 8080 13 | readiness_route: 14 | path: /health 15 | port: 8080 16 | scoring_route: 17 | path: /score 18 | port: 8080 19 | instance_type: Standard_DS3_v2 20 | instance_count: 1 21 | request_settings: 22 | request_timeout_ms: 120000 # 2 minutes 23 | environment_variables: 24 | 25 | # "compute" mode is the default mode, if you want to deploy to serving mode, you need to set this env variable to "serving" 26 | PROMPTFLOW_RUN_MODE: serving 27 | 28 | # for pulling connections from workspace 29 | PRT_CONFIG_OVERRIDE: deployment.subscription_id=<subscription_id>,deployment.resource_group=<resource_group>,deployment.workspace_name=<workspace_name>,deployment.endpoint_name=<endpoint_name>,deployment.deployment_name=<deployment_name> 30 | 31 | # (Optional) When there are multiple fields in the response, using this env variable will filter the fields to expose in the response. 32 | # For example, if there are 2 flow outputs: "answer", "context", and I only want to have "answer" in the endpoint response, I can set this env variable to '["answer"]'. 33 | # If you don't set this environment, by default all flow outputs will be included in the endpoint response. 34 | # PROMPTFLOW_RESPONSE_INCLUDED_FIELDS: '["category", "evidence"]' 35 | -------------------------------------------------------------------------------- /infra/prompt-flow/endpoint.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json 2 | name: cwyd-endpoint 3 | auth_mode: key 4 | properties: 5 | enforce_access_to_default_secret_stores: enabled 6 | -------------------------------------------------------------------------------- /infra/prompt-flow/model.yaml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json 2 | name: cwyd-model 3 | path: cwyd/ 4 | description: register cwyd flow folder as a custom model 5 | properties: 6 | # In AuzreML studio UI, endpoint detail UI Test tab needs this property to know it's from prompt flow 7 | azureml.promptflow.source_flow_id: cwyd 8 | 9 | # Following are properties only for classification flow 10 | # endpoint detail UI Test tab needs this property to know it's a classification flow 11 | # azureml.promptflow.mode: classification 12 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chat-with-your-data", 3 | "version": "1.1.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "chat-with-your-data", 9 | "version": "1.1.0" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chat-with-your-data", 3 | "version": "1.1.0", 4 | "private": true 5 | } 6 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | unittest: Unit Tests (relatively fast) 4 | functional: Functional Tests (tests that require a running server, with stubbed downstreams) 5 | azure: marks tests as extended (run less frequently, relatively slow) 6 | pythonpath = ./code 7 | log_level=debug 8 | -------------------------------------------------------------------------------- /scripts/data_scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2-binary==2.9.10 2 | azure-identity==1.19.0 3 | azure-keyvault-secrets==4.9.0 4 | -------------------------------------------------------------------------------- /scripts/generate_arm_templates.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | az bicep version 2>/dev/null || az bicep install 6 | 7 | TEMPLATES=() 8 | FILES=() 9 | 10 | for ARG in $@; do 11 | # If the argument is supplied with "-f", then it is a template file that needs to be built 12 | if [[ $ARG == -f=* ]]; then 13 | TEMPLATES+=(${ARG#-f=}) 14 | else 15 | # Otherwise, it is a file that has been edited 16 | az bicep format -f $ARG & 17 | FILES+=($ARG) 18 | fi 19 | done 20 | 21 | wait 22 | 23 | git add ${FILES[@]} 24 | 25 | # Build the templates 26 | for TEMPLATE in ${TEMPLATES[@]}; do 27 | az bicep build -f $TEMPLATE 28 | git add "${TEMPLATE%.bicep}.json" # Change the extension from .bicep to .json 29 | done 30 | -------------------------------------------------------------------------------- /scripts/package_frontend.ps1: -------------------------------------------------------------------------------- 1 | mkdir dist -Force 2 | rm dist/* -r -Force 3 | 4 | # Python 5 | poetry install 6 | poetry export -o dist/requirements.txt 7 | cp *.py dist -Force 8 | cp backend dist -r -Force 9 | 10 | # Node 11 | cd frontend 12 | npm install 13 | npm run build 14 | -------------------------------------------------------------------------------- /scripts/package_frontend.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eou pipefail 3 | 4 | mkdir -p dist 5 | rm -rf dist/* 6 | poetry install 7 | poetry export -o dist/requirements.txt 8 | cp *.py dist 9 | cp -r backend dist 10 | 11 | cd frontend 12 | npm install 13 | npm run build 14 | -------------------------------------------------------------------------------- /scripts/run_create_table_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "started the script" 3 | 4 | # Variables 5 | baseUrl="$1" 6 | keyvaultName="$2" 7 | requirementFile="requirements.txt" 8 | requirementFileUrl=${baseUrl}"scripts/data_scripts/requirements.txt" 9 | resourceGroup="$3" 10 | serverName="$4" 11 | webAppPrincipalName="$5" 12 | adminAppPrincipalName="$6" 13 | functionAppPrincipalName="$7" 14 | managedIdentityName="$8" 15 | 16 | echo "Script Started" 17 | 18 | # Get the public IP address of the machine running the script 19 | publicIp=$(curl -s https://api.ipify.org) 20 | 21 | # Use Azure CLI to add the public IP to the PostgreSQL firewall rule 22 | az postgres flexible-server firewall-rule create --resource-group $resourceGroup --name $serverName --rule-name "AllowScriptIp" --start-ip-address "$publicIp" --end-ip-address "$publicIp" 23 | 24 | # Download the create table python file 25 | curl --output "create_postgres_tables.py" ${baseUrl}"scripts/data_scripts/create_postgres_tables.py" 26 | 27 | # Download the requirement file 28 | curl --output "$requirementFile" "$requirementFileUrl" 29 | 30 | echo "Download completed" 31 | 32 | #Replace key vault name 33 | sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "create_postgres_tables.py" 34 | sed -i "s/webAppPrincipalName/${webAppPrincipalName}/g" "create_postgres_tables.py" 35 | sed -i "s/adminAppPrincipalName/${adminAppPrincipalName}/g" "create_postgres_tables.py" 36 | sed -i "s/managedIdentityName/${managedIdentityName}/g" "create_postgres_tables.py" 37 | sed -i "s/functionAppPrincipalName/${functionAppPrincipalName}/g" "create_postgres_tables.py" 38 | sed -i "s/serverName/${serverName}/g" "create_postgres_tables.py" 39 | 40 | pip install -r requirements.txt 41 | 42 | python create_postgres_tables.py 43 | -------------------------------------------------------------------------------- /tests/e2e-test/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/tests/e2e-test/base/__init__.py -------------------------------------------------------------------------------- /tests/e2e-test/base/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import uuid 4 | 5 | from dotenv import load_dotenv 6 | 7 | 8 | class BasePage: 9 | 10 | def __init__(self, page): 11 | self.page = page 12 | 13 | def scroll_into_view(self, locator): 14 | reference_list = locator 15 | locator.nth(reference_list.count() - 1).scroll_into_view_if_needed() 16 | 17 | def select_an_element(self, locator, text): 18 | elements = locator.all() 19 | for element in elements: 20 | clientele = element.text_content() 21 | if clientele == text: 22 | element.click() 23 | break 24 | 25 | def is_visible(self, locator): 26 | locator.is_visible() 27 | 28 | def validate_response_status(self, questions): 29 | load_dotenv() 30 | WEB_URL = os.getenv("web_url") 31 | 32 | url = f"{WEB_URL}/api/conversation" 33 | 34 | user_message_id = str(uuid.uuid4()) 35 | assistant_message_id = str(uuid.uuid4()) 36 | conversation_id = str(uuid.uuid4()) 37 | 38 | payload = { 39 | "messages": [{"role": "user", "content": questions, "id": user_message_id}], 40 | "conversation_id": conversation_id, 41 | } 42 | # Serialize the payload to JSON 43 | payload_json = json.dumps(payload) 44 | headers = {"Content-Type": "application/json", "Accept": "*/*"} 45 | response = self.page.request.post(url, headers=headers, data=payload_json) 46 | # Check the response status code 47 | assert response.status == 200, ( 48 | "response code is " + str(response.status) + " " + str(response.json()) 49 | ) 50 | 51 | def wait_for_load(self, wait_time): 52 | self.page.wait_for_timeout(wait_time) 53 | -------------------------------------------------------------------------------- /tests/e2e-test/config/constants.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | WEB_URL = os.getenv("web_url") 8 | if WEB_URL.endswith("/"): 9 | WEB_URL = WEB_URL[:-1] 10 | 11 | ADMIN_URL = os.getenv("admin_url") 12 | if ADMIN_URL.endswith("/"): 13 | ADMIN_URL = ADMIN_URL[:-1] 14 | 15 | # Get the absolute path to the repository root 16 | repo_root = os.getenv("GITHUB_WORKSPACE", os.getcwd()) 17 | 18 | # Construct the absolute path to the JSON file 19 | # note: may have to remove 'tests/e2e-test' from below when running locally 20 | json_file_path = os.path.join( 21 | repo_root, "tests/e2e-test", "testdata", "golden_path_data.json" 22 | ) 23 | 24 | # Load questions from JSON file 25 | with open(json_file_path, "r") as file: 26 | data = json.load(file) 27 | questions = data["questions"] 28 | 29 | 30 | # Admin Page input data 31 | admin_page_title = "Chat with your data Solution Accelerator" 32 | upload_file_success_message = "Embeddings computation in progress." 33 | upload_page_url = "https://plasticsmartcities.org/public-awareness/" 34 | upload_url_success_message = "Embeddings added successfully for" 35 | unsupported_file_message = "application/json files are not allowed." 36 | no_files_to_delete_message = "No files to delete" 37 | 38 | # Web User Page input data 39 | user_page_title = "Azure AI" 40 | 41 | 42 | invalid_response = "The requested information is not available in the retrieved data. Please try another query or topic.AI-generated content may be incorrect" 43 | -------------------------------------------------------------------------------- /tests/e2e-test/pages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/tests/e2e-test/pages/__init__.py -------------------------------------------------------------------------------- /tests/e2e-test/pages/adminPage.py: -------------------------------------------------------------------------------- 1 | from base.base import BasePage 2 | 3 | 4 | class AdminPage(BasePage): 5 | ADMIN_PAGE_TITLE = "//h1[text()='Chat with your data Solution Accelerator']" 6 | INGEST_DATA_TAB = "//span[text()='Ingest Data']" 7 | EXPLORE_DATA_TAB = "//span[text()='Explore Data']" 8 | DELETE_DATA_TAB = "//span[text()='Delete Data']" 9 | CONFIGURATION_TAB = "//span[text()='Configuration']" 10 | BROWSE_FILES_BUTTON = "//button[normalize-space()='Browse files']" 11 | UPLOAD_SUCCESS_MESSAGE = "//div[@data-testid='stAlertContentSuccess']//p" 12 | REPROCESS_ALL_DOCUMENTS_BUTTON = "//p[contains(text(),'Reprocess all documents')]" 13 | ADD_URLS_TEXT_AREA = "//textarea[contains(@aria-label,'Add URLs ')]" 14 | PROCESS_INGEST_WEB_PAGES_BUTTON = "//p[text()='Process and ingest web pages']" 15 | SELECT_YOUR_FILE_DROP_DOWN = "//div[@data-baseweb='select']" 16 | DROP_DOWN_OPTION = "//div[@data-testid='stTooltipHoverTarget']/div/div" 17 | DELETE_CHECK_BOXES = "//label[@data-baseweb='checkbox']/span" 18 | DELETE_BUTTON = "//p[text()='Delete']" 19 | UNSUPPORTED_FILE_ERROR_MESSAGE = ( 20 | "//span[@data-testid='stFileUploaderFileErrorMessage']" 21 | ) 22 | REMOVE_ICON = "//button[@data-testid='stBaseButton-minimal']" 23 | NO_FILES_TO_DELETE_MESSAGE = "//div[@data-testid='stAlertContentInfo']//p" 24 | 25 | def __init__(self, page): 26 | self.page = page 27 | 28 | def click_delete_data_tab(self): 29 | self.page.locator(self.DELETE_DATA_TAB).click() 30 | self.page.wait_for_timeout(5000) 31 | -------------------------------------------------------------------------------- /tests/e2e-test/pages/loginPage.py: -------------------------------------------------------------------------------- 1 | from base.base import BasePage 2 | from playwright.sync_api import TimeoutError as PlaywightTimeoutError 3 | 4 | 5 | class LoginPage(BasePage): 6 | 7 | EMAIL_TEXT_BOX = "//input[@type='email']" 8 | NEXT_BUTTON = "//input[@type='submit']" 9 | PASSWORD_TEXT_BOX = "//input[@type='password']" 10 | SIGNIN_BUTTON = "//input[@id='idSIButton9']" 11 | YES_BUTTON = "//input[@id='idSIButton9']" 12 | PERMISSION_ACCEPT_BUTTON = "//input[@type='submit']" 13 | 14 | def __init__(self, page): 15 | self.page = page 16 | 17 | def authenticate(self, username, password): 18 | # login with username and password in web url 19 | self.page.locator(self.EMAIL_TEXT_BOX).fill(username) 20 | self.page.locator(self.NEXT_BUTTON).click() 21 | 22 | # Wait for the password input field to be available and fill it 23 | self.page.wait_for_load_state("networkidle") 24 | # Enter password 25 | self.page.locator(self.PASSWORD_TEXT_BOX).fill(password) 26 | # Click on SignIn button 27 | self.page.locator(self.SIGNIN_BUTTON).click() 28 | try: 29 | self.page.locator(self.YES_BUTTON).wait_for(state="visible", timeout=30000) 30 | # Click on YES button 31 | self.page.locator(self.YES_BUTTON).click() 32 | except PlaywightTimeoutError: 33 | pass 34 | try: 35 | self.page.locator(self.PERMISSION_ACCEPT_BUTTON).wait_for( 36 | state="visible", timeout=10000 37 | ) 38 | # Click on Permissions ACCEPT button 39 | self.page.locator(self.PERMISSION_ACCEPT_BUTTON).click() 40 | self.page.wait_for_load_state("networkidle") 41 | except PlaywightTimeoutError: 42 | pass 43 | -------------------------------------------------------------------------------- /tests/e2e-test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_cli = true 3 | log_cli_level = INFO 4 | log_file = logs/tests.log 5 | log_file_level = INFO 6 | addopts = -p no:warnings 7 | -------------------------------------------------------------------------------- /tests/e2e-test/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest-playwright 2 | pytest-reporter-html1 3 | python-dotenv 4 | pytest-check 5 | pytest-html 6 | py -------------------------------------------------------------------------------- /tests/e2e-test/sample_dotenv_file.txt: -------------------------------------------------------------------------------- 1 | web_url = 'web app url' 2 | admin_url = 'admin url' 3 | user_name = 'user name' 4 | pass_word = 'pass word' 5 | client_id = 'client id' 6 | client_secret = 'client secret' 7 | tenant_id = 'tenant id' -------------------------------------------------------------------------------- /tests/e2e-test/testdata/golden_path_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "questions": [ 3 | "How do I enroll in health benefits a new employee?", 4 | "What options are available to me in terms of health coverage?", 5 | "What providers are available under each option?", 6 | "Can I access my current provider?", 7 | "What benefits are available to employees (besides health coverage)?", 8 | "How do I enroll in employee benefits?", 9 | "How much does health coverage cost?", 10 | "Can I extend my benefits to cover my spouse or dependents?" 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /tests/e2e-test/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/tests/e2e-test/tests/__init__.py -------------------------------------------------------------------------------- /tests/e2e-test/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from config.constants import * 5 | from playwright.sync_api import sync_playwright 6 | from py.xml import html # type: ignore 7 | 8 | 9 | @pytest.fixture(scope="session") 10 | def login_logout(): 11 | # perform login and browser close once in a session 12 | with sync_playwright() as p: 13 | browser = p.chromium.launch(headless=False, args=["--start-maximized"]) 14 | context = browser.new_context(no_viewport=True) 15 | context.set_default_timeout(80000) 16 | page = context.new_page() 17 | # Navigate to the login URL 18 | page.goto(WEB_URL) 19 | # Wait for the login form to appear 20 | page.wait_for_load_state("networkidle") 21 | page.wait_for_timeout(5000) 22 | # login to web url with username and password 23 | # login_page = LoginPage(page) 24 | # load_dotenv() 25 | # login_page.authenticate(os.getenv('user_name'), os.getenv('pass_word')) 26 | yield page 27 | browser.close() 28 | 29 | 30 | @pytest.hookimpl(tryfirst=True) 31 | def pytest_html_report_title(report): 32 | report.title = "Test_Automation_Chat_with_your_Data" 33 | 34 | 35 | # Add a column for descriptions 36 | def pytest_html_results_table_header(cells): 37 | cells.insert(1, html.th("Description")) 38 | 39 | 40 | def pytest_html_results_table_row(report, cells): 41 | cells.insert( 42 | 1, html.td(report.description if hasattr(report, "description") else "") 43 | ) 44 | 45 | 46 | # Add logs and docstring to report 47 | @pytest.hookimpl(hookwrapper=True) 48 | def pytest_runtest_makereport(item, call): 49 | outcome = yield 50 | report = outcome.get_result() 51 | report.description = str(item.function.__doc__) 52 | os.makedirs("logs", exist_ok=True) 53 | extra = getattr(report, "extra", []) 54 | report.extra = extra 55 | -------------------------------------------------------------------------------- /tests/integration/ui/cypress.config.ts: -------------------------------------------------------------------------------- 1 | const { defineConfig } = require("cypress"); 2 | 3 | module.exports = defineConfig({ 4 | e2e: { 5 | setupNodeEvents(on, config) { 6 | // implement node event listeners here 7 | }, 8 | supportFile: false 9 | }, 10 | }); 11 | -------------------------------------------------------------------------------- /tests/integration/ui/cypress/README.md: -------------------------------------------------------------------------------- 1 | # Running UI Tests 2 | 3 | Run the command for cypress tests (to run in headless mode) 4 | 5 | ``` 6 | npx cypress run --env ADMIN_WEBSITE_NAME=https://example-admin.com,FRONTEND_WEBSITE_NAME=https://example.com 7 | ``` 8 | 9 | If you want to run the tests in interactive mode (in a browser) 10 | 11 | ``` 12 | npx cypress open --env ADMIN_WEBSITE_NAME=https://example-admin.com,FRONTEND_WEBSITE_NAME=https://example.com 13 | ``` 14 | 15 | Then follow the instructions on the opened electron browser -------------------------------------------------------------------------------- /tests/integration/ui/cypress/e2e/admin.spec.cy.ts: -------------------------------------------------------------------------------- 1 | Cypress.config('baseUrl', Cypress.env('ADMIN_WEBSITE_NAME')) 2 | 3 | describe('the cwyd admin website', () => { 4 | before(() => { 5 | cy.visit('/Ingest_Data'); 6 | }); 7 | 8 | it('allows file upload', () => { 9 | cy.get('input[type=file]', { timeout: 30000 }).selectFile('../../../data/PerksPlus.pdf', { force: true }); 10 | cy.get('div[data-testid*="stNotificationContentSuccess"]', { timeout: 30000 }).then(($div) => { 11 | expect($div.text()).to.contain('1 documents uploaded'); 12 | }); 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /tests/integration/ui/cypress/e2e/chat.spec.cy.ts: -------------------------------------------------------------------------------- 1 | Cypress.config('baseUrl', Cypress.env('FRONTEND_WEBSITE_NAME')) 2 | 3 | describe('the cwyd user website', () => { 4 | before(() => { 5 | cy.visit('/'); 6 | }); 7 | 8 | it('answers user chat', () => { 9 | cy.get('textarea').type('Hello{enter}'); 10 | 11 | cy.get('div[class*="chatMessageUserMessage"]').then(($div) => { 12 | cy.log('Text from user chat:', $div.text()); 13 | expect($div).to.exist; 14 | expect($div.text()).to.contain('Hello'); 15 | }); 16 | 17 | cy.get('div[class*="answerText"]', { timeout: 30000 }).then(($div) => { 18 | cy.log('Text from AI:', $div.text()); 19 | expect($div).to.exist; 20 | }); 21 | }); 22 | }); 23 | -------------------------------------------------------------------------------- /tests/integration/ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "private": true, 4 | "version": "0.0.0", 5 | "description": "ui tests", 6 | "scripts": { 7 | "cypress:open": "cypress open" 8 | }, 9 | "devDependencies": { 10 | "cypress": "^14.2.1", 11 | "typescript": "^5.8.2" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/integration/ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "useDefineForClassFields": true, 5 | "lib": [ 6 | "ESNext" 7 | ], 8 | "allowJs": false, 9 | "skipLibCheck": true, 10 | "esModuleInterop": false, 11 | "allowSyntheticDefaultImports": true, 12 | "strict": true, 13 | "forceConsistentCasingInFileNames": true, 14 | "module": "ESNext", 15 | "moduleResolution": "Node", 16 | "resolveJsonModule": true, 17 | "isolatedModules": true, 18 | "noEmit": true 19 | }, 20 | "include": [ 21 | "cypress" 22 | ] 23 | } -------------------------------------------------------------------------------- /tests/llm-evaluator/.env.sample: -------------------------------------------------------------------------------- 1 | # Azure Credentials 2 | CLIENT_ID= 3 | CLIENT_SECRET= 4 | TENANT_ID= 5 | 6 | # Azure AI Project Details 7 | SUBSCRIPTION_ID= 8 | RESOURCE_GROUP_NAME= 9 | # PROJECT_NAME= 10 | 11 | # Model Configuration (cwyd rg) 12 | AZURE_ENDPOINT= 13 | AZURE_DEPLOYMENT= 14 | BASE_URL= -------------------------------------------------------------------------------- /tests/llm-evaluator/data/evaluation_results.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/6b8128c4112eab0a731a5ddef6ead5adca2be54a/tests/llm-evaluator/data/evaluation_results.xlsx -------------------------------------------------------------------------------- /tests/llm-evaluator/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-evaluation 2 | azure-ai-projects 3 | azure-identity 4 | openai 5 | openpyxl 6 | pandas 7 | pathlib 8 | typing-extensions 9 | --------------------------------------------------------------------------------