├── .chainlit ├── config.toml └── translations │ ├── de.json │ ├── en-US.json │ └── pt-BR.json ├── .devcontainer ├── Dockerfile ├── devcontainer.json └── noop.txt ├── .env.sample ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README-Azure-Monitor.md ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── chainlit.md ├── environment.yml ├── images ├── ade-1.png ├── ade-2.png ├── ade-3.png ├── ai-studio-traces.png ├── app-insights-1.png ├── app-insights-2.png ├── app-insights-3.png ├── assistant-ai-studio.png ├── dashboard-0.png ├── dashboard-1.png ├── dashboard-2.png ├── dashboard-3.png ├── dashboard-4.png ├── dashboard-5.png ├── grafana-1.png ├── log-analytics-1.png ├── log-analytics-2.png └── sad-puppy.png ├── scripts ├── post_create.sh ├── start_chainlit.sh ├── test_assistant_ui.sh └── test_sales_insights_ui.sh └── src ├── .chainlit ├── config.toml └── translations │ └── en-US.json ├── .gitignore ├── app.py ├── assistant_flow ├── .gitignore ├── chat.py ├── core.py ├── requirements.txt └── setup.py ├── chainlit.md ├── custom_evaluators ├── execution_time.py ├── in_domain_evaluator.prompty ├── sql_similarity.prompty ├── sql_similarity_brief.prompty ├── test.ipynb └── user_vote.prompty ├── evaluate ├── azure_monitor │ ├── call_promptflow.kql │ └── sales_data_insights.kql ├── eval_azure_monitor.py ├── evaluate.py ├── response.json ├── sequence.sh └── test.ipynb ├── evaluation_readme.md ├── finetune └── finetune.py ├── generate_data ├── batch_generate_sql.py ├── generate.py ├── product_categories.csv ├── sqllite.ipynb ├── test_set_large.jsonl ├── test_set_mini.jsonl ├── test_set_small.csv ├── test_set_small.jsonl ├── test_set_xxl.csv ├── test_set_xxl.jsonl └── train_set_xxl.jsonl └── sales_data_insights ├── data └── order_data.db ├── main.py └── system_message.py /.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = false 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | [features.multi_modal] 29 | enabled = false 30 | accept = ["*/*"] 31 | max_files = 20 32 | max_size_mb = 500 33 | 34 | # Allows user to use speech to text 35 | [features.speech_to_text] 36 | enabled = false 37 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 38 | # language = "en-US" 39 | 40 | [UI] 41 | # Name of the app and chatbot. 42 | name = "Chatbot" 43 | 44 | # Show the readme while the thread is empty. 45 | show_readme_as_default = true 46 | 47 | # Description of the app and chatbot. This is used for HTML tags. 48 | # description = "" 49 | 50 | # Large size content are by default collapsed for a cleaner ui 51 | default_collapse_content = false 52 | 53 | # The default value for the expand messages settings. 54 | default_expand_messages = false 55 | 56 | # Hide the chain of thought details from the user in the UI. 57 | hide_cot = false 58 | 59 | # Link to your github repo. This will add a github button in the UI's header. 60 | # github = "" 61 | 62 | # Specify a CSS file that can be used to customize the user interface. 63 | # The CSS file can be served from the public directory or via an external link. 64 | # custom_css = "/public/test.css" 65 | 66 | # Override default MUI light theme. (Check theme.ts) 67 | [UI.theme.light] 68 | #background = "#FAFAFA" 69 | #paper = "#FFFFFF" 70 | 71 | [UI.theme.light.primary] 72 | #main = "#F80061" 73 | #dark = "#980039" 74 | #light = "#FFE7EB" 75 | 76 | # Override default MUI dark theme. (Check theme.ts) 77 | [UI.theme.dark] 78 | #background = "#FAFAFA" 79 | #paper = "#FFFFFF" 80 | 81 | [UI.theme.dark.primary] 82 | #main = "#F80061" 83 | #dark = "#980039" 84 | #light = "#FFE7EB" 85 | 86 | 87 | [meta] 88 | generated_by = "1.0.101" 89 | -------------------------------------------------------------------------------- /.chainlit/translations/de.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Einstellungen", 8 | "settingsKey": "S", 9 | "APIKeys": "API-Schl\u00fcssel", 10 | "logout": "Abmelden" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Neuer Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Aufgabenliste", 22 | "loading": "L\u00e4dt...", 23 | "error": "Ein Fehler ist aufgetreten" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Upload abbrechen", 28 | "removeAttachment": "Anhang entfernen" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Neuen Chat erstellen?", 32 | "clearChat": "Dies wird die aktuellen Nachrichten l\u00f6schen und einen neuen Chat starten.", 33 | "cancel": "Abbrechen", 34 | "confirm": "Best\u00e4tigen" 35 | }, 36 | "settingsModal": { 37 | "settings": "Einstellungen", 38 | "expandMessages": "Nachrichten ausklappen", 39 | "hideChainOfThought": "Zwischenschritte verbergen", 40 | "darkMode": "Dunkelmodus" 41 | } 42 | }, 43 | "organisms": { 44 | "chat": { 45 | "history": { 46 | "index": { 47 | "showHistory": "Zeige Chatverlauf", 48 | "lastInputs": "Letzte Eingaben", 49 | "noInputs": "Leer...", 50 | "loading": "L\u00e4dt..." 51 | } 52 | }, 53 | "inputBox": { 54 | "input": { 55 | "placeholder": "Nachricht eingeben..." 56 | }, 57 | "speechButton": { 58 | "start": "Aufnahme starten", 59 | "stop": "Aufnahme stoppen" 60 | }, 61 | "SubmitButton": { 62 | "sendMessage": "Nachricht senden", 63 | "stopTask": "Aufgabe stoppen" 64 | }, 65 | "UploadButton": { 66 | "attachFiles": "Dateien anh\u00e4ngen" 67 | }, 68 | "waterMark": { 69 | "text": "Erstellt mit" 70 | } 71 | }, 72 | "Messages": { 73 | "index": { 74 | "running": "L\u00e4uft", 75 | "executedSuccessfully": "erfolgreich ausgef\u00fchrt", 76 | "failed": "fehlgeschlagen", 77 | "feedbackUpdated": "Feedback aktualisiert", 78 | "updating": "Aktualisiert" 79 | } 80 | }, 81 | "dropScreen": { 82 | "dropYourFilesHere": "Ziehe deine Dateien hierher" 83 | }, 84 | "index": { 85 | "failedToUpload": "Upload fehlgeschlagen", 86 | "cancelledUploadOf": "Upload abgebrochen von", 87 | "couldNotReachServer": "Konnte den Server nicht erreichen", 88 | "continuingChat": "Vorherigen Chat fortsetzen" 89 | }, 90 | "settings": { 91 | "settingsPanel": "Einstellungsfenster", 92 | "reset": "Zur\u00fccksetzen", 93 | "cancel": "Abbrechen", 94 | "confirm": "Best\u00e4tigen" 95 | } 96 | }, 97 | "threadHistory": { 98 | "sidebar": { 99 | "filters": { 100 | "FeedbackSelect": { 101 | "feedbackAll": "Feedback: Alle", 102 | "feedbackPositive": "Feedback: Positiv", 103 | "feedbackNegative": "Feedback: Negativ" 104 | }, 105 | "SearchBar": { 106 | "search": "Suche" 107 | } 108 | }, 109 | "DeleteThreadButton": { 110 | "confirmMessage": "Dies wird den Thread sowie seine Nachrichten und Elemente l\u00f6schen.", 111 | "cancel": "Abbrechen", 112 | "confirm": "Best\u00e4tigen", 113 | "deletingChat": "Chat wird gel\u00f6scht", 114 | "chatDeleted": "Chat gel\u00f6scht" 115 | }, 116 | "index": { 117 | "pastChats": "Vergangene Chats" 118 | }, 119 | "ThreadList": { 120 | "empty": "Leer...", 121 | "today": "Heute", 122 | "yesterday": "Gestern", 123 | "previous7days": "Vor 7 Tagen", 124 | "previous30days": "Vor 30 Tagen" 125 | }, 126 | "TriggerButton": { 127 | "closeSidebar": "Seitenleiste schlie\u00dfen", 128 | "openSidebar": "Seitenleiste \u00f6ffnen" 129 | } 130 | }, 131 | "Thread": { 132 | "backToChat": "Zur\u00fcck zum Chat", 133 | "chatCreatedOn": "Dieser Chat wurde erstellt am" 134 | } 135 | }, 136 | "header": { 137 | "chat": "Chat", 138 | "readme": "Liesmich" 139 | } 140 | } 141 | }, 142 | "hooks": { 143 | "useLLMProviders": { 144 | "failedToFetchProviders": "Anbieter konnten nicht geladen werden:" 145 | } 146 | }, 147 | "pages": { 148 | "Design": {}, 149 | "Env": { 150 | "savedSuccessfully": "Erfolgreich gespeichert", 151 | "requiredApiKeys": "Ben\u00f6tigte API-Schl\u00fcssel", 152 | "requiredApiKeysInfo": "Um diese App zu nutzen, werden die folgenden API-Schl\u00fcssel ben\u00f6tigt. Die Schl\u00fcssel werden im lokalen Speicher Ihres Ger\u00e4ts gespeichert." 153 | }, 154 | "Page": { 155 | "notPartOfProject": "Sie sind nicht Teil dieses Projekts." 156 | }, 157 | "ResumeButton": { 158 | "resumeChat": "Chat fortsetzen" 159 | } 160 | } 161 | } -------------------------------------------------------------------------------- /.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "settings": "Settings", 38 | "expandMessages": "Expand Messages", 39 | "hideChainOfThought": "Hide Chain of Thought", 40 | "darkMode": "Dark Mode" 41 | }, 42 | "detailsButton": { 43 | "using": "Using", 44 | "running": "Running", 45 | "took_one": "Took {{count}} step", 46 | "took_other": "Took {{count}} steps" 47 | }, 48 | "auth": { 49 | "authLogin": { 50 | "title": "Login to access the app.", 51 | "form": { 52 | "email": "Email address", 53 | "password": "Password", 54 | "noAccount": "Don't have an account?", 55 | "alreadyHaveAccount": "Already have an account?", 56 | "signup": "Sign Up", 57 | "signin": "Sign In", 58 | "or": "OR", 59 | "continue": "Continue", 60 | "forgotPassword": "Forgot password?", 61 | "passwordMustContain": "Your password must contain:", 62 | "emailRequired": "email is a required field", 63 | "passwordRequired": "password is a required field" 64 | }, 65 | "error": { 66 | "default": "Unable to sign in.", 67 | "signin": "Try signing in with a different account.", 68 | "oauthsignin": "Try signing in with a different account.", 69 | "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.", 70 | "oauthcallbackerror": "Try signing in with a different account.", 71 | "oauthcreateaccount": "Try signing in with a different account.", 72 | "emailcreateaccount": "Try signing in with a different account.", 73 | "callback": "Try signing in with a different account.", 74 | "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.", 75 | "emailsignin": "The e-mail could not be sent.", 76 | "emailverify": "Please verify your email, a new email has been sent.", 77 | "credentialssignin": "Sign in failed. Check the details you provided are correct.", 78 | "sessionrequired": "Please sign in to access this page." 79 | } 80 | }, 81 | "authVerifyEmail": { 82 | "almostThere": "You're almost there! We've sent an email to ", 83 | "verifyEmailLink": "Please click on the link in that email to complete your signup.", 84 | "didNotReceive": "Can't find the email?", 85 | "resendEmail": "Resend email", 86 | "goBack": "Go Back", 87 | "emailSent": "Email sent successfully.", 88 | "verifyEmail": "Verify your email address" 89 | }, 90 | "providerButton": { 91 | "continue": "Continue with {{provider}}", 92 | "signup": "Sign up with {{provider}}" 93 | }, 94 | "authResetPassword": { 95 | "newPasswordRequired": "New password is a required field", 96 | "passwordsMustMatch": "Passwords must match", 97 | "confirmPasswordRequired": "Confirm password is a required field", 98 | "newPassword": "New password", 99 | "confirmPassword": "Confirm password", 100 | "resetPassword": "Reset Password" 101 | }, 102 | "authForgotPassword": { 103 | "email": "Email address", 104 | "emailRequired": "email is a required field", 105 | "emailSent": "Please check the email address {{email}} for instructions to reset your password.", 106 | "enterEmail": "Enter your email address and we will send you instructions to reset your password.", 107 | "resendEmail": "Resend email", 108 | "continue": "Continue", 109 | "goBack": "Go Back" 110 | } 111 | } 112 | }, 113 | "organisms": { 114 | "chat": { 115 | "history": { 116 | "index": { 117 | "showHistory": "Show history", 118 | "lastInputs": "Last Inputs", 119 | "noInputs": "Such empty...", 120 | "loading": "Loading..." 121 | } 122 | }, 123 | "inputBox": { 124 | "input": { 125 | "placeholder": "Type your message here..." 126 | }, 127 | "speechButton": { 128 | "start": "Start recording", 129 | "stop": "Stop recording" 130 | }, 131 | "SubmitButton": { 132 | "sendMessage": "Send message", 133 | "stopTask": "Stop Task" 134 | }, 135 | "UploadButton": { 136 | "attachFiles": "Attach files" 137 | }, 138 | "waterMark": { 139 | "text": "Built with" 140 | } 141 | }, 142 | "Messages": { 143 | "index": { 144 | "running": "Running", 145 | "executedSuccessfully": "executed successfully", 146 | "failed": "failed", 147 | "feedbackUpdated": "Feedback updated", 148 | "updating": "Updating" 149 | } 150 | }, 151 | "dropScreen": { 152 | "dropYourFilesHere": "Drop your files here" 153 | }, 154 | "index": { 155 | "failedToUpload": "Failed to upload", 156 | "cancelledUploadOf": "Cancelled upload of", 157 | "couldNotReachServer": "Could not reach the server", 158 | "continuingChat": "Continuing previous chat" 159 | }, 160 | "settings": { 161 | "settingsPanel": "Settings panel", 162 | "reset": "Reset", 163 | "cancel": "Cancel", 164 | "confirm": "Confirm" 165 | } 166 | }, 167 | "threadHistory": { 168 | "sidebar": { 169 | "filters": { 170 | "FeedbackSelect": { 171 | "feedbackAll": "Feedback: All", 172 | "feedbackPositive": "Feedback: Positive", 173 | "feedbackNegative": "Feedback: Negative" 174 | }, 175 | "SearchBar": { 176 | "search": "Search" 177 | } 178 | }, 179 | "DeleteThreadButton": { 180 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 181 | "cancel": "Cancel", 182 | "confirm": "Confirm", 183 | "deletingChat": "Deleting chat", 184 | "chatDeleted": "Chat deleted" 185 | }, 186 | "index": { 187 | "pastChats": "Past Chats" 188 | }, 189 | "ThreadList": { 190 | "empty": "Empty...", 191 | "today": "Today", 192 | "yesterday": "Yesterday", 193 | "previous7days": "Previous 7 days", 194 | "previous30days": "Previous 30 days" 195 | }, 196 | "TriggerButton": { 197 | "closeSidebar": "Close sidebar", 198 | "openSidebar": "Open sidebar" 199 | } 200 | }, 201 | "Thread": { 202 | "backToChat": "Go back to chat", 203 | "chatCreatedOn": "This chat was created on" 204 | } 205 | }, 206 | "header": { 207 | "chat": "Chat", 208 | "readme": "Readme" 209 | } 210 | } 211 | }, 212 | "hooks": { 213 | "useLLMProviders": { 214 | "failedToFetchProviders": "Failed to fetch providers:" 215 | } 216 | }, 217 | "pages": { 218 | "Design": {}, 219 | "Env": { 220 | "savedSuccessfully": "Saved successfully", 221 | "requiredApiKeys": "Required API Keys", 222 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 223 | }, 224 | "Page": { 225 | "notPartOfProject": "You are not part of this project." 226 | }, 227 | "ResumeButton": { 228 | "resumeChat": "Resume Chat" 229 | } 230 | } 231 | } -------------------------------------------------------------------------------- /.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "settings": "Configura\u00e7\u00f5es", 38 | "expandMessages": "Expandir Mensagens", 39 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 40 | "darkMode": "Modo Escuro" 41 | } 42 | }, 43 | "organisms": { 44 | "chat": { 45 | "history": { 46 | "index": { 47 | "showHistory": "Mostrar hist\u00f3rico", 48 | "lastInputs": "\u00daltimas Entradas", 49 | "noInputs": "Vazio...", 50 | "loading": "Carregando..." 51 | } 52 | }, 53 | "inputBox": { 54 | "input": { 55 | "placeholder": "Digite sua mensagem aqui..." 56 | }, 57 | "speechButton": { 58 | "start": "Iniciar grava\u00e7\u00e3o", 59 | "stop": "Parar grava\u00e7\u00e3o" 60 | }, 61 | "SubmitButton": { 62 | "sendMessage": "Enviar mensagem", 63 | "stopTask": "Parar Tarefa" 64 | }, 65 | "UploadButton": { 66 | "attachFiles": "Anexar arquivos" 67 | }, 68 | "waterMark": { 69 | "text": "Constru\u00eddo com" 70 | } 71 | }, 72 | "Messages": { 73 | "index": { 74 | "running": "Executando", 75 | "executedSuccessfully": "executado com sucesso", 76 | "failed": "falhou", 77 | "feedbackUpdated": "Feedback atualizado", 78 | "updating": "Atualizando" 79 | } 80 | }, 81 | "dropScreen": { 82 | "dropYourFilesHere": "Solte seus arquivos aqui" 83 | }, 84 | "index": { 85 | "failedToUpload": "Falha ao enviar", 86 | "cancelledUploadOf": "Envio cancelado de", 87 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 88 | "continuingChat": "Continuando o chat anterior" 89 | }, 90 | "settings": { 91 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 92 | "reset": "Redefinir", 93 | "cancel": "Cancelar", 94 | "confirm": "Confirmar" 95 | } 96 | }, 97 | "threadHistory": { 98 | "sidebar": { 99 | "filters": { 100 | "FeedbackSelect": { 101 | "feedbackAll": "Feedback: Todos", 102 | "feedbackPositive": "Feedback: Positivo", 103 | "feedbackNegative": "Feedback: Negativo" 104 | }, 105 | "SearchBar": { 106 | "search": "Buscar" 107 | } 108 | }, 109 | "DeleteThreadButton": { 110 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 111 | "cancel": "Cancelar", 112 | "confirm": "Confirmar", 113 | "deletingChat": "Deletando conversa", 114 | "chatDeleted": "Conversa deletada" 115 | }, 116 | "index": { 117 | "pastChats": "Conversas Anteriores" 118 | }, 119 | "ThreadList": { 120 | "empty": "Vazio..." 121 | }, 122 | "TriggerButton": { 123 | "closeSidebar": "Fechar barra lateral", 124 | "openSidebar": "Abrir barra lateral" 125 | } 126 | }, 127 | "Thread": { 128 | "backToChat": "Voltar para a conversa", 129 | "chatCreatedOn": "Esta conversa foi criada em" 130 | } 131 | }, 132 | "header": { 133 | "chat": "Conversa", 134 | "readme": "Leia-me" 135 | } 136 | }, 137 | "hooks": { 138 | "useLLMProviders": { 139 | "failedToFetchProviders": "Falha ao buscar provedores:" 140 | } 141 | }, 142 | "pages": { 143 | "Design": {}, 144 | "Env": { 145 | "savedSuccessfully": "Salvo com sucesso", 146 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 147 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 148 | }, 149 | "Page": { 150 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 151 | }, 152 | "ResumeButton": { 153 | "resumeChat": "Continuar Conversa" 154 | } 155 | } 156 | } 157 | } -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/devcontainers/miniconda:0-3 2 | 3 | # Copy environment.yml (if found) to a temp location so we update the environment. Also 4 | # copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists. 5 | COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/ 6 | RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env create -f /tmp/conda-tmp/environment.yml; fi \ 7 | && rm -rf /tmp/conda-tmp 8 | 9 | # [Optional] Uncomment this section to install additional OS packages. 10 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 11 | # && apt-get -y install --no-install-recommends 12 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/miniconda 3 | { 4 | "name": "Azure AI studio and code first demo", 5 | "build": { 6 | "context": "..", 7 | "dockerfile": "Dockerfile" 8 | }, 9 | // "hostRequirements": { 10 | // "cpus": 8, 11 | // "memory": "4gb", 12 | // "storage": "64gb" 13 | // }, 14 | "customizations": { 15 | "vscode": { 16 | "extensions": [ 17 | "ms-python.python", 18 | "ms-toolsai.jupyter", 19 | "prompt-flow.prompt-flow" 20 | ], 21 | "terminal.integrated.shellIntegration.history": 5000, 22 | "terminal.integrated.defaultProfile.linux": "zsh" 23 | } 24 | }, 25 | "features": { 26 | "ghcr.io/devcontainers/features/azure-cli:1": {} 27 | }, 28 | 29 | // Features to add to the dev container. More info: https://containers.dev/features. 30 | // "features": {}, 31 | 32 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 33 | // "forwardPorts": [], 34 | 35 | // Use 'postCreateCommand' to run commands after the container is created. 36 | "postCreateCommand": "echo $ ZSH_CUSTOM && export ZSH_CUSTOM && bash scripts/post_create.sh" 37 | // Configure tool-specific properties. 38 | // "customizations": {}, 39 | 40 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 41 | // "remoteUser": "root" 42 | } 43 | -------------------------------------------------------------------------------- /.devcontainer/noop.txt: -------------------------------------------------------------------------------- 1 | This file is copied into the container along with environment.yml* from the 2 | parent folder. This is done to prevent the Dockerfile COPY instruction from 3 | failing if no environment.yml is found. -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | OPENAI_API_TYPE="azure" 2 | OPENAI_API_VERSION="2024-02-15-preview" 3 | OPENAI_API_BASE="https://********.openai.azure.com/" 4 | OPENAI_API_KEY="**********" 5 | OPENAI_ASSISTANT_MODEL="gpt-35-turbo-1106" 6 | OPENAI_ANALYST_CHAT_MODEL="gpt-4-turbo" 7 | OPENAI_EVAL_MODEL="gpt-4-turbo" 8 | OPENAI_ASSISTANT_ID="asst_PMApxNOyiRLA4mrTWNfuvq5n" 9 | APPLICATIONINSIGHTS_CONNECTION_STRING="InstrumentationKey=***********;IngestionEndpoint=https://southcentralus-3.in.applicationinsights.azure.com/;LiveEndpoint=https://southcentralus.livediagnostics.monitor.azure.com/;ApplicationId=**********" 10 | 11 | AZUREAI_COHERE_CHAT_URL="https://cohere-cmdR-plus-gyahe-serverless.eastus2.inference.ai.azure.com" 12 | AZUREAI_COHERE_EMBEDDING_URL="https://cohere-embed-v3-us-yuefg-serverless.eastus2.inference.ai.azure.com" 13 | AZUREAI_MISTRAL_SMALL_URL="https://mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com" 14 | AZUREAI_MISTRAL_LARGE_URL="https://mistral-large-ajmih-serverless.eastus2.inference.ai.azure.com" 15 | AZUREAI_LLAMA3_URL="https://llama-3-70b-instruct-qgxvd-serverless.eastus2.inference.ai.azure.com" 16 | AZUREAI_PHI3_MINI_URL="https://phi-3-mini-4k-qwerty.eastus2.inference.ml.azure.com" 17 | AZUREAI_PHI3_MEDIUM_URL="https://do-not-delete-build-demo.westus3.inference.ml.azure.com" 18 | AZUREAI_COHERE_CHAT_KEY="*****" 19 | AZUREAI_COHERE_EMBEDDING_KEY="******" 20 | AZUREAI_MISTRAL_SMALL_KEY="******" 21 | AZUREAI_MISTRAL_LARGE_KEY="******" 22 | AZUREAI_LLAMA3_KEY="*******" 23 | AZUREAI_PHI3_MINI_KEY="*******" 24 | AZUREAI_PHI3_MEDIUM_KEY="******" 25 | 26 | FT_SUBSCRIPTION="*******" 27 | FT_RESOURCE_GROUP="***" 28 | FT_RESOURCE_NAME="***" 29 | FT_OPENAI_API_BASE="https://****.openai.azure.com/" 30 | FT_OPENAI_API_KEY="***" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | .env 400 | spans.json 401 | .files/ 402 | **/.promptflow 403 | src/generate_data/*_batch_*.jsonl 404 | src/evaluate/azure_monitor/*time_stamp.txt 405 | test.ipynb 406 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python Debugger: Current File", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "jupyter.debugJustMyCode": false, 3 | "python.testing.unittestEnabled": false, 4 | "python.testing.pytestEnabled": true, 5 | "python.envFile": "${workspaceFolder}/.env", 6 | "terminal.integrated.env.osx": { 7 | "PYTHONPATH": "${workspaceFolder}/src", 8 | }, 9 | "terminal.integrated.env.linux": { 10 | "PYTHONPATH": "${workspaceFolder}/src" 11 | }, 12 | "terminal.integrated.env.windows": { 13 | "PYTHONPATH": "${workspaceFolder}/src" 14 | }, 15 | "python.terminal.activateEnvironment": true, 16 | "notebook.output.wordWrap": true, 17 | "python.testing.pytestArgs": [ 18 | "test" 19 | ], 20 | "debugpy.debugJustMyCode": false, 21 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README-Azure-Monitor.md: -------------------------------------------------------------------------------- 1 | ## Application Insights / Azure Monitor 2 | 3 | This demo is set up to view your telemetry in Application Insights / Azure Monitor both to capture the Open Telementry traces as well as evaluation of those traces (both by humans and by automated evaluation). 4 | 5 | > Note: The way that LLM executions are captured in Open Telemetry, aka the semantic conventions, are still in development by the Open Telemetry community (see [here for the project that is tracking this](https://github.com/open-telemetry/community/blob/main/projects/llm-semconv.md)). The property names and structure used in this demo are based on the current best practices and are guaranteed to change as the OpenAI Semantic Conventions for LLMs are finalized. 6 | 7 | ### View traces in Application Insights 8 | 9 | In addition to the Promptflow Tracing UI, you can also view the traces in Application Insights. You already set the environment variable `APPLICATIONINSIGHTS_CONNECTION_STRING` in the `.env` file. The value should be the **connection string** of the Application Insights instance you want to use. 10 | ![](images/app-insights-1.png) 11 | 12 | To see the traces here, you can for instance use the End-to-End transaction details view in Application Insights. To get there, follow the clicks as shown in the image below: 13 | 14 | ![](images/app-insights-2.png) 15 | 16 | Which will give you a view like this: 17 | 18 | ![](images/app-insights-3.png) 19 | 20 | ### Building a Dashboard with Promptflow Telemetry 21 | 22 | > Note: You can access the telemetry data both through the Application Insights instance and the associated Log Analytics Workspace, albeit using different table and column names (see [here for details](https://learn.microsoft.com/en-us/azure/azure-monitor/app/convert-classic-resource#table-structure)). We are choosing access through the Log Analytics Workspace since it is the recommended way for new projects and it allows Python access to the data which we will be using to create automated evaluations. 23 | 24 | To have the Telemetry from Promptflow and your app show up in and Azure Portal Dashboard or a Grafana Dashboard, you can follow these steps: 25 | 26 | 1. Navigate to the Log Analytics Workspace: In App Insights, got to Overview page and click on the Workspace link: 27 | ![](images/dashboard-0.png) 28 | 29 | 2. Under **Maximize your Log Analytics experience**, click on **View logs**: 30 | ![](images/dashboard-1.png) 31 | 32 | 3. In the Logs view, create a new query. Make sure to edit your query in KQL Mode so you have access to the different tables. 33 | 34 | ![](images/dashboard-2.png) 35 | 36 | Here is an example of a query that shows the tokens used over time: 37 | 38 | ```kql 39 | AppDependencies 40 | | extend 41 | total_tokens = toint(Properties["llm.usage.total_tokens"]), 42 | prompt_tokens = toint(Properties["llm.usage.prompt_tokens"]), 43 | completion_tokens = toint(Properties["llm.usage.completion_tokens"]) 44 | | summarize sum(total_tokens), sum(prompt_tokens), sum(completion_tokens) by bin(TimeGenerated, 5m) 45 | | render timechart 46 | ``` 47 | 48 | 49 | 4. Save the query and pin it to a dashboard. You can create a new dashboard or add it to an existing one. 50 | 51 | ![](images/dashboard-3.png) 52 | 53 | 5. Once pinned to the Dashboard, you can edit the title and move/resize the chart as you see fit. 54 | 55 | ![](images/dashboard-4.png) 56 | 57 | To manage your Azure Portal Dashboards, got to the Dashboard hub in the Azure Portal: 58 | ![](images/dashboard-5.png) 59 | 60 | Here are a few KQL queries that you can use to get started with building your own dashboards: 61 | 62 | > Note: As mentioned above, the property names and structure used in this demo are based on the current best practices and are guaranteed to change as the OpenAI Semantic Conventions for LLMs are finalized. 63 | 64 | - Average duration of OpenAI Chat calls model and time: 65 | ```kql 66 | AppDependencies 67 | | where Name in ("openai_chat_async", "Iterated(openai_chat)", "openai_chat") 68 | | extend model = substring(tostring(Properties["llm.response.model"]), 0, 30), 69 | duration_sec = DurationMs / 1000 70 | | summarize avg(duration_sec) by bin(TimeGenerated, 1h), model 71 | | order by TimeGenerated asc 72 | | render timechart 73 | ``` 74 | 75 | - Average duration of OpenAI Chat calls by model: 76 | ```kql 77 | AppDependencies 78 | | where Name in ("openai_chat_async", "Iterated(openai_chat)", "openai_chat") 79 | | extend model = substring(tostring(Properties["llm.response.model"]), 0, 30) 80 | | project duration_sec = DurationMs / 1000, model 81 | | summarize avg(duration_sec) by model 82 | | render columnchart 83 | ``` 84 | 85 | - Average duration of Assistant Runs by model: 86 | ```kql 87 | AppDependencies 88 | | where Name in ("AssistantAPI.run") 89 | | extend model = substring(tostring(Properties["llm.response.model"]), 0, 30) 90 | | where model != "" 91 | | project duration_sec = DurationMs / 1000, model 92 | | summarize avg(duration_sec) by model 93 | | render columnchart 94 | ``` 95 | 96 | - Tokens used by model over time: 97 | ```kql 98 | AppDependencies 99 | | where Name in ("openai_chat_async", "Iterated(openai_chat)", "openai_chat", "AssistantAPI.run") 100 | | extend 101 | total_tokens = toint(Properties["llm.usage.total_tokens"]), 102 | prompt_tokens = toint(Properties["llm.usage.prompt_tokens"]), 103 | completion_tokens = toint(Properties["llm.usage.completion_tokens"]), 104 | model = substring(tostring(Properties["llm.response.model"]), 0, 22) 105 | | where model != "" 106 | | summarize prompt = sum(prompt_tokens), completion = sum(completion_tokens) by model 107 | | render columnchart 108 | ``` 109 | 110 | - Total tokens used by model/deployment 111 | ```kql 112 | AppDependencies 113 | | where Name in ("openai_chat_async", "Iterated(openai_chat)", "openai_chat", "AssistantAPI.run") 114 | | extend 115 | total_tokens = toint(Properties["llm.usage.total_tokens"]), 116 | prompt_tokens = toint(Properties["llm.usage.prompt_tokens"]), 117 | completion_tokens = toint(Properties["llm.usage.completion_tokens"]), 118 | model = substring(tostring(Properties["llm.response.model"]), 0, 30) 119 | | summarize prompt = sum(prompt_tokens), completion = sum(completion_tokens) by model 120 | | render columnchart 121 | ``` 122 | 123 | 124 | - User votes over time: 125 | ```kql 126 | AppTraces 127 | | where Properties["event.name"] == "gen_ai.evaluation.user_vote" 128 | | extend vote = toint(Properties["gen_ai.evaluation.vote"]) 129 | | project vote, OperationId, ParentId 130 | | join kind=innerunique AppDependencies on $left.OperationId == $right.OperationId and $left.ParentId == $right.Id 131 | | summarize down = countif(vote == 0), up = countif(vote == 1), up_percent = 100*avg(vote) by bin(TimeGenerated, 1d) 132 | | render timechart 133 | ``` 134 | 135 | - Question Quality over time: 136 | ```kql 137 | AppTraces 138 | | where Properties["event.name"] == "gen_ai.evaluation.InDomainQuestion" 139 | | extend score = toint(Properties["gen_ai.evaluation.score"]) 140 | | project score, OperationId, ParentId 141 | | join kind=innerunique AppDependencies on $left.OperationId == $right.OperationId and $left.ParentId == $right.Id 142 | | summarize low_score_count = countif(score <= 3), high_score_count = countif(score > 3) by bin(TimeGenerated, 1d) 143 | | render timechart 144 | ``` 145 | 146 | Here an example of a Grafana dashboard with the above queries: 147 | 148 | ![](images/grafana-1.png) 149 | 150 | ### Query the data in Azure Data Explorer 151 | In addition to the Azure UX for the Azure Log Analytics workspace, and you can query the data from Azure Data Explorer (ADE) by follwing these steps: 152 | 153 | 1. go to https://dataexplorer.azure.com/ and add a connection: 154 | 155 | 156 | 157 | Then add the URL for you App Insights instance like so: 158 | 159 | 160 | 161 | Following this format: 162 | `https://ade.loganalytics.io/subscriptions//resourcegroups//providers/microsoft.operationalinsights/workspaces/` 163 | 164 | This will allow you to execute the same queries as above but in a more developer-friendly fronted. You can then take them back to Azure Monitor or Grafana and pin them to a Dashboard. 165 | 166 | In addition, you can pull data from App Insights to build datasets for validation and fine tuning. For instance, in our example the sub-flow that provides the sales data insights is called `SalesDataInsights`. That means that you will find traces with that name from which you can retrieve the input and output parameters with a query like this: 167 | 168 | ```kql 169 | AppDependencies 170 | | where Name == "SalesDataInsights" 171 | | extend inputs = parse_json(tostring(Properties.inputs)), 172 | output = parse_json(tostring(Properties.output)) 173 | | project question = inputs.question, query = output.query, error = output.error 174 | ``` 175 | 176 | This will allow you to export the data to a CSV and then use for tasks like human evaluation or fine tuning. 177 | ![](images/ade-3.png) 178 | 179 | 180 | ### Programmatic access of telementry data 181 | 182 | Azure Monitor / Log Analytics also allows prgrammatic access to you telemetry data by executing KQL queries in Python (and other languages). To access the data you will need the GUID of the Log Analytics Workspace which you can get from the Overview page: 183 | 184 | ![](images/log-analytics-1.png) 185 | 186 | 187 | Here is an example of how to execute a query on a Log Analytics Workspace using that `workspace_id`: 188 | 189 | ```python 190 | import os 191 | import pandas as pd 192 | from datetime import datetime, timezone, timedelta 193 | from azure.monitor.query import LogsQueryClient, LogsQueryStatus 194 | from azure.identity import DefaultAzureCredential 195 | from azure.core.exceptions import HttpResponseError 196 | 197 | credential = DefaultAzureCredential() 198 | client = LogsQueryClient(credential) 199 | workspace_id="********-****-****-****-**********" 200 | 201 | query = f""" 202 | AppDependencies 203 | | where Name == "SalesDataInsights" 204 | | extend inputs = parse_json(tostring(Properties.inputs)), 205 | output = parse_json(tostring(Properties.output)) 206 | | project question = inputs.question, query = output.query, error = output.error 207 | """ 208 | 209 | end_time=datetime.now(timezone.utc) 210 | start_time=end_time - timedelta(days=1) 211 | 212 | try: 213 | response = client.query_workspace( 214 | workspace_id=workspace_id, 215 | query=query, 216 | timespan=(start_time, end_time) 217 | ) 218 | if response.status == LogsQueryStatus.PARTIAL: 219 | error = response.partial_error 220 | data = response.partial_data 221 | print(error) 222 | elif response.status == LogsQueryStatus.SUCCESS: 223 | data = response.tables 224 | for table in data: 225 | df = pd.DataFrame(data=table.rows, columns=table.columns) 226 | 227 | except HttpResponseError as err: 228 | print("something fatal happened") 229 | print(err) 230 | 231 | print(df) 232 | ``` 233 | 234 | ### Automatic Evaluations 235 | 236 | This demo provides a way to run evaluations against the telemetry collected. The result of the evaluation is then written back to the Application Insights instance as an open telemetry event (i.e. to the AppTraces table) to the relevant span, allowing you to query it through KQL and visualizing it in your dashboards. 237 | 238 | The script requires the following environment variables to be set (e.g. in `.env`): 239 | - `OPENAI_API_BASE`, `OPENAI_API_KEY`, `OPENAI_API_VERSION`, `OPENAI_EVAL_MODEL`: The model configuration to be used to execute the evaluation prompty. 240 | - `LOG_ANALYTICS_WORKSPACE_ID`: The GUID of your log analytics workspace (to read the telemetry from) -- see above 241 | - `APPLICATIONINSIGHTS_CONNECTION_STRING`: The App Insights connection string (to write the evaluation events back) -- see above 242 | 243 | To execute the script with defaults just run: 244 | ```bash 245 | python src/evaluate/azure_monitor/eval_azure_monitor.py 246 | ``` 247 | 248 | By default, the command will run the [in_domain_evaluator.prompty](src/custom_evaluators/in_domain_evaluator.prompty) over the output of this kql query (saved in [sales_data_insights.kql](src/evaluate/azure_monitor/sales_data_insights.kql)): 249 | 250 | ```kql 251 | AppDependencies 252 | | where Name == "SalesDataInsights" 253 | | extend inputs = parse_json(tostring(Properties.inputs)), 254 | output = parse_json(tostring(Properties.output)), 255 | hash = hash(OperationId, 2) // select 1 in 2 traces 256 | | where hash==0 257 | | project question = inputs.question, query = output.query, error = output.error, trace_id = OperationId, span_id = Id, time_stamp = TimeGenerated 258 | | order by time_stamp asc 259 | ``` 260 | 261 | The above query will return the input and output of spans of 50% of the traces (1 out of every 2) for the `SalesDataInsights` sub-flow along with the `trace_id`, `span_id` and `time_stamp` fields. The `trace_id` and `span_id` are used to write the evaluation results back to the App Insights instance as events under the respective span. The `time_stamp` is used to keep track of the last timestamp processed by the script, so subsequent exections won't process the same spans again. 262 | 263 | To run with different evaluator, you can pass the path to the promtpy file as an argument to the script. **As you do that, make sure to also change the timestamp file to a new one to start from scratch.** Here is the usage of the script: 264 | 265 | ```bash 266 | usage: eval_azure_monitor.py [-h] [--kql-file KQL_FILE] [--timestamp-file TIMESTAMP_FILE] [--evaluator-path EVALUATOR_PATH] [--dry-run] 267 | 268 | Evaluate Azure Monitor data 269 | 270 | options: 271 | -h, --help show this help message and exit 272 | --kql-file KQL_FILE KQL query file. Default is sales_data_insights.kql 273 | --timestamp-file TIMESTAMP_FILE 274 | Timestamp file. Default is in_domain_evaluator_time_stamp.txt 275 | --evaluator-path EVALUATOR_PATH 276 | Evaluator path. Currently only prompty is supported. Default is in_domain_evaluator.prompty 277 | --dry-run When set, the script will not write to App Insights. Default is False. 278 | ``` 279 | 280 | To view the evaluation results in a dashboard, you can use the following query: 281 | 282 | ```kql 283 | AppTraces 284 | | where Properties["event.name"] == "gen_ai.evaluation.InDomainQuestion" 285 | | extend score = toint(Properties["gen_ai.evaluation.score"]) 286 | | project score, OperationId, ParentId 287 | | join kind=innerunique AppDependencies on $left.OperationId == $right.OperationId and $left.ParentId == $right.Id 288 | | summarize low_score_count = countif(score <= 3), high_score_count = countif(score > 3) by bin(TimeGenerated, 1d) 289 | | render columnchart 290 | ``` 291 | 292 | ![](images/log-analytics-2.png) 293 | (you might need to set the visualization to stacked column chart under "Chart Formatting" to get this view) 294 | 295 | Enjoy exploring your Promptflow telemetry! 296 | 297 | ![](images/sad-puppy.png) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project 2 | 3 | ## Tracing function calls 4 | 5 | In this sample, we will show how to use [Azure OpenAI Assistants](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/assistant) with [Prompt Flow](https://microsoft.github.io/promptflow/). 6 | 7 | ### Prerequisites: 8 | 9 | - Python 3.11 10 | - Conda 11 | - Azure CLI 12 | - AI Studio Hub & Project with an Azure OpenAI endpoint (**OPENAI_API_BASE**, **OPENAI_API_KEY**) in a [region that supports assistants](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#assistants-preview) with an 1106 model or better -- I recommend creating it in Sweden central. 13 | - Deployments of OpenAI models: 14 | - deployment of `gpt-4-1106-preview`/`gpt-35-turbo-1106` or later for use by the **OpenAI assistant**. Both work, but `gpt-35-turbo-1106` is faster and `gpt-4-1106-preview` is more accurate. (**OPENAI_ASSISTANT_MODEL**) 15 | - deployment of `gpt-35-turbo-1106` or later for use by the **Data Analyst** to perform some limited NL to SQL. (**OPENAI_ANALYST_CHAT_MODEL**) 16 | - Azure Application Insights (**APPINSIGHTS_CONNECTION_STRING**) 17 | 18 | Copy `.env.sample` to `.env` and fill in the values: 19 | 20 | ```bash 21 | OPENAI_API_TYPE="azure" 22 | OPENAI_API_VERSION="2024-02-15-preview" 23 | OPENAI_API_BASE="https://***.openai.azure.com/" 24 | OPENAI_API_KEY="******************" 25 | OPENAI_ASSISTANT_MODEL="gpt-35-turbo-1106" 26 | OPENAI_ANALYST_CHAT_MODEL="gpt-35-turbo-1106" 27 | OPENAI_ASSISTANT_ID="asst_0leWabwuOmzsNVG5Kst1CpeV" <-- you will create this further down 28 | APPINSIGHTS_CONNECTION_STRING="InstrumentationKey=***;IngestionEndpoint=https://****.in.applicationinsights.azure.com/;LiveEndpoint=https://****" 29 | ``` 30 | 31 | ### Install dependencies 32 | 33 | ```bash 34 | conda env create -f environment.yml 35 | conda activate assistant 36 | ``` 37 | 38 | Then install the pre-release version of azure-monitor-opentelemetry-exporter 39 | ```bash 40 | pip install azure-monitor-opentelemetry-exporter --pre 41 | ``` 42 | 43 | ### Create an OpenAI assistant 44 | 45 | Create an OpenAI assistant using the setup.py script 46 | 47 | ```bash 48 | python src/assistant_flow/setup.py 49 | ``` 50 | 51 | This should ouput something like this: 52 | 53 | ```log 54 | OPENAI_API_KEY ****** 55 | OPENAI_API_BASE https://******.openai.azure.com 56 | OPENAI_API_VERSION 2024-02-15-preview 57 | {'id': 'asst_wgEXCRBQ7E4BfznSkGgJy41k', 'created_at': 1714610540, 'description': None, 'instructions': "\nYou are a helpful assistant that helps the user potentially with the help of some functions.\n\nIf you are using multiple tools to solve a user's task, make sure to communicate \ninformation learned from one tool to the next tool.\nFirst, make a plan of how you will use the tools to solve the user's task and communicated\nthat plan to the user with the first response. Then execute the plan making sure to communicate\nthe required information between tools since tools only see the information passed to them;\nThey do not have access to the chat history.\nIf you think that tool use can be parallelized (e.g. to get weather data for multiple cities) \nmake sure to use the multi_tool_use.parallel function to execute.\n\nOnly use a tool when it is necessary to solve the user's task. \nDon't use a tool if you can answer the user's question directly.\nOnly use the tools provided in the tools list -- don't make up tools!!\n\nAnything that would benefit from a tabular presentation should be returned as markup table.\n", 'metadata': {}, 'model': 'gpt-35-turbo-1106', 'name': 'Contoso Assistant', 'object': 'assistant', 'tools': [{'type': 'code_interpreter'}, {'function': {'name': 'sales_data_insights', 'description': '\n get some data insights about the contoso sales data. This tool has information about total sales, return return rates, discounts given, etc., by date, product category, etc.\n you can ask questions like:\n - query for the month with the strongest revenue\n - which day of the week has the least sales in january\n - query the average value of orders by month\n - what is the average sale value for Tuesdays\n If you are unsure of the data available, you can ask for a list of categories, days, etc.\n - query for all the values for the main_category\n If a query cannot be answered, the tool will return a message saying that the query is not supported. otherwise the data will be returned.\n ', 'parameters': {'type': 'object', 'properties': {'question': {'type': 'string', 'description': "The question you want to ask the tool in plain English. e.g. 'what is the average sale value for Tuesdays'"}}, 'required': ['question']}}, 'type': 'function'}], 'response_format': None, 'temperature': None, 'tool_resources': None, 'top_p': None, 'file_ids': []} 58 | Assistant created with id asst_wgEXCRBQ7E4BfznSkGgJy41k 59 | add the following to your .env file 60 | OPENAI_ASSISTANT_ID="asst_wgEXCRBQ7E4BfznSkGgJy41k" 61 | ``` 62 | 63 | You should go to your Azure AI Studio project and check that the assistant was actually created -- it should look like this: 64 | ![](images/assistant-ai-studio.png) 65 | 66 | Then do as suggested on the console by adding the provided line `OPENAI_ASSISTANT_ID="asst_*****"` to the `.env` file. 67 | 68 | ### Test the Assistant Flow 69 | 70 | You can test that the flow is working correctly by running the following command: 71 | 72 | ```bash 73 | pf flow test --flow assistant_flow.chat:chat_completion --ui 74 | ``` 75 | 76 | This will start the prompt flow service (pfs) and provide a URL to the chat UI. The console output will be similar to this: 77 | 78 | ```log 79 | Prompt flow service has started... 80 | You can begin chat flow on http://127.0.0.1:23333/v1.0/ui/chat?flow=L1VzZXJzL2RhbmllbHNjL2dpdC9hc3Npc3RhbnQtcGYtZGVtby9zcmMvYXNzaXN0YW50X2Zsb3cvZmxvdy5kYWcueWFtbA%3D%3D 81 | ``` 82 | 83 | You can ask the assistant questions like: 84 | > get the order numbers by month for the last year and plot it in a line chart using matplotlib. Make sure to use the month names in the plot. 85 | 86 | ### Run the Chainlit sample app 87 | 88 | The sample app uses [chainlit](https://docs.chainlit.io/get-started/overview) to build a simple chat UI that is capable of displaying images. The app is started like so: 89 | 90 | ```bash 91 | python src/app.py 92 | ``` 93 | 94 | The console output will be similar to this (port numbers might differ): 95 | 96 | ```bash 97 | 2024-05-01 20:49:19 - Loaded .env file 98 | 2024-05-01 20:49:21 - collection: assistant-test 99 | 2024-05-01 20:49:21 - resource attributes: {'service.name': 'promptflow', 'collection': 'assistant-test'} 100 | 2024-05-01 20:49:21 - tracer provider is set with resource attributes: {'service.name': 'promptflow', 'collection': 'assistant-test'} 101 | Starting prompt flow service... 102 | Start prompt flow service on port 23334, version: 1.10.0. 103 | You can stop the prompt flow service with the following command:'pf service stop'. 104 | Alternatively, if no requests are made within 1 hours, it will automatically stop. 105 | 2024-03-30 11:15:14 - Your app is available at http://localhost:8000 106 | ``` 107 | 108 | Open two browser tabs, one to `http://localhost:8000` and one to `http://localhost:23334/v1.0/ui/traces/` 109 | 110 | Should be able to chat with the assistant in the chat UI and see the traces in the other tab. 111 | 112 | ### Sequence Diagram 113 | Here is an example of a sequence diagram that illustrates the flow of an assistant interaction with the Sales Data Insights tool. The flow starts with the frontend asking for a line chart of sales data. The Promptflow service then calls the assistant to get the sales data. The assistant then calls the Sales Data Insights tool to get the sales data. The Sales Data Insights tool then calls a SQL database to get the data. The data is then passed back to the assistant, which then calls the Code Interpreter tool to generate the line chart. The line chart is then passed back to the frontend. The flow is illustrated below: 114 | ```mermaid 115 | sequenceDiagram 116 | autonumber 117 | actor Frontend 118 | participant Promptflow 119 | participant SalesDataInsights 120 | box Azure OpenAI Service 121 | participant Assistant 122 | participant CodeInterpreter 123 | end 124 | 125 | Frontend->>Promptflow: show me sales data in a line chart 126 | activate Promptflow 127 | Promptflow->>Assistant: show me sales data in a line chart 128 | activate Assistant 129 | loop 130 | Assistant->>Assistant: call LLM 131 | end 132 | Assistant-->>Promptflow: tool_call: SDI("get sales data") 133 | deactivate Assistant 134 | Promptflow-->Frontend: notify:calling SDI("get sales data") 135 | Promptflow->>SalesDataInsights: get sales data 136 | activate SalesDataInsights 137 | loop 138 | SalesDataInsights->>SalesDataInsights: call SQL 139 | end 140 | SalesDataInsights-->>Promptflow: ["sales", "data", "in", "json", "format"] 141 | deactivate SalesDataInsights 142 | Promptflow->>Assistant: tool_reply: ["sales", ...] 143 | activate Assistant 144 | loop 145 | Assistant->>Assistant: call LLM 146 | end 147 | Assistant->>CodeInterpreter: "import matplotlib ...." 148 | CodeInterpreter-->>Assistant: 149 | Assistant-->>Promptflow: "Here is the requested plot", 150 | deactivate Assistant 151 | Promptflow-->>Frontend: "Here is the requested plot", 152 | deactivate Promptflow 153 | ``` 154 | 155 | ### Log traces to AI Studio 156 | 157 | In addition to viewing the traces in the local promptflow traces view, you can also log the traces to Azure AI Studio. To do this, you need to set the trace destination to the Azure AI Studio workspace. You will need your Azure subscription ID, resource group, and project name. 158 | 159 | ```bash 160 | pf config set trace.destination=azureml://subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/danielsc/providers/Microsoft.MachineLearningServices/workspaces/build-demo-project 161 | ``` 162 | 163 | On first run, the above should produce output like this: 164 | 165 | ```log 166 | The workspace Cosmos DB is not initialized yet, will start initialization, which may take some minutes... 167 | Set config [{'trace.destination': 'azureml://subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/danielsc/providers/Microsoft.MachineLearningServices/workspaces/build-demo-project'}] successfully. 168 | ``` 169 | 170 | After setting the trace destination, you might need to restart the pfs service: 171 | 172 | ```bash 173 | pf service stop 174 | pf service start 175 | ``` 176 | 177 | When you use the chat UI, you should see traces in the Azure AI Studio workspace. In the output logs of the app you should see the URL to the trace views for local and Azure AI Studio: 178 | 179 | ```log 180 | ... 181 | You can view the trace detail from the following URL: 182 | http://localhost:23334/v1.0/ui/traces/?#collection=assistant-test&uiTraceId=0x67a45d1c29d32e62f50eda806ff51a3b 183 | https://ai.azure.com/projecttrace/detail/0x67a45d1c29d32e62f50eda806ff51a3b?wsid=/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/danielsc/providers/Microsoft.MachineLearningServices/workspaces/build-demo-project&flight=PFTrace 184 | ... 185 | ``` 186 | ![](images/ai-studio-traces.png) 187 | 188 | Read more about how to use Application Insights to monitor your application [here](README-Azure-Monitor.md) 189 | 190 | ![](images/sad-puppy.png) 191 | 192 | ## Contributing 193 | 194 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 195 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 196 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 197 | 198 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 199 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 200 | provided by the bot. You will only need to do this once across all repos using our CLA. 201 | 202 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 203 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 204 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 205 | 206 | ## Trademarks 207 | 208 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 209 | trademarks or logos is subject to and must follow 210 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 211 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 212 | Any use of third-party trademarks or logos are subject to those third-party's policies. 213 | 214 | ## Responsible AI Guidelines 215 | 216 | This project follows below responsible AI guidelines and best practices, please review them before using this project: 217 | 218 | - [Microsoft Responsible AI Guidelines](https://www.microsoft.com/en-us/ai/responsible-ai) 219 | - [Responsible AI practices for Azure OpenAI models](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/overview) 220 | - [Safety evaluations transparency notes](https://learn.microsoft.com/en-us/azure/ai-studio/concepts/safety-evaluations-transparency-note) 221 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /chainlit.md: -------------------------------------------------------------------------------- 1 | # Assistant Demo! 2 | 3 | This is a simple demo of using the OpenAI Assistant API to create a chatbot. The assistant has access to two tools: 4 | 1. A function to query the 2023 sales data for Contoso. 5 | 1. A code interpreter, which it will use to make graphs of the sales data. 6 | 7 | Test it by asking the assistant some questions about the sales data, for instance: 8 | - show the 2023 sales by category in a bar chart 9 | - show the total sales revenue aggregated by year and month in a line chart 10 | - show the total sales revenue for May 2024 11 | - break it down by day and show it to me in a line chart 12 | - break it down by day and by catgory and show it in a multi-series bar chart 13 | - show the same in a multi-series line chart 14 | 15 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: assistant2 2 | channels: 3 | - anaconda 4 | - pytorch 5 | - conda-forge 6 | dependencies: 7 | - python==3.11 8 | - pip==24.0 9 | - pip: 10 | - openai 11 | - azure-identity 12 | - azure-ai-ml 13 | - promptflow-azure>=1.12.0 14 | - promptflow>=1.12.0 15 | - promptflow-evals>=0.3.0 16 | - promptflow-tools 17 | - pytest 18 | - jinja2 19 | - chainlit 20 | - tiktoken 21 | - ipykernel 22 | - ipython 23 | - pandas 24 | - numpy 25 | - azure-monitor-query 26 | - azure-monitor-opentelemetry-exporter --pre 27 | - azure-ai-inference -------------------------------------------------------------------------------- /images/ade-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/ade-1.png -------------------------------------------------------------------------------- /images/ade-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/ade-2.png -------------------------------------------------------------------------------- /images/ade-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/ade-3.png -------------------------------------------------------------------------------- /images/ai-studio-traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/ai-studio-traces.png -------------------------------------------------------------------------------- /images/app-insights-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/app-insights-1.png -------------------------------------------------------------------------------- /images/app-insights-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/app-insights-2.png -------------------------------------------------------------------------------- /images/app-insights-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/app-insights-3.png -------------------------------------------------------------------------------- /images/assistant-ai-studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/assistant-ai-studio.png -------------------------------------------------------------------------------- /images/dashboard-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-0.png -------------------------------------------------------------------------------- /images/dashboard-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-1.png -------------------------------------------------------------------------------- /images/dashboard-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-2.png -------------------------------------------------------------------------------- /images/dashboard-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-3.png -------------------------------------------------------------------------------- /images/dashboard-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-4.png -------------------------------------------------------------------------------- /images/dashboard-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/dashboard-5.png -------------------------------------------------------------------------------- /images/grafana-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/grafana-1.png -------------------------------------------------------------------------------- /images/log-analytics-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/log-analytics-1.png -------------------------------------------------------------------------------- /images/log-analytics-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/log-analytics-2.png -------------------------------------------------------------------------------- /images/sad-puppy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/images/sad-puppy.png -------------------------------------------------------------------------------- /scripts/post_create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | conda init 4 | conda init zsh 5 | echo $ZSH_CUSTOM 6 | git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions 7 | git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting 8 | sed -i "s|plugins=(git)|plugins=(git zsh-autosuggestions zsh-syntax-highlighting)|g" ~/.zshrc -------------------------------------------------------------------------------- /scripts/start_chainlit.sh: -------------------------------------------------------------------------------- 1 | python -m app -------------------------------------------------------------------------------- /scripts/test_assistant_ui.sh: -------------------------------------------------------------------------------- 1 | pf flow test --flow assistant_flow.chat:chat_completion --ui -------------------------------------------------------------------------------- /scripts/test_sales_insights_ui.sh: -------------------------------------------------------------------------------- 1 | pf flow test --flow sales_data_insights.main:SalesDataInsights --ui 2 | -------------------------------------------------------------------------------- /src/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = false 4 | 5 | 6 | # List of environment variables to be provided by each user to use the app. 7 | user_env = [] 8 | 9 | # Duration (in seconds) during which the session is saved when the connection is lost 10 | session_timeout = 3600 11 | 12 | # Enable third parties caching (e.g LangChain cache) 13 | cache = false 14 | 15 | # Authorized origins 16 | allow_origins = ["*"] 17 | 18 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 19 | # follow_symlink = false 20 | 21 | [features] 22 | # Show the prompt playground 23 | prompt_playground = true 24 | 25 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 26 | unsafe_allow_html = false 27 | 28 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 29 | latex = false 30 | 31 | # Automatically tag threads with the current chat profile (if a chat profile is used) 32 | auto_tag_thread = true 33 | 34 | # Authorize users to upload files with messages 35 | [features.multi_modal] 36 | enabled = true 37 | accept = ["*/*"] 38 | max_files = 20 39 | max_size_mb = 500 40 | 41 | # Allows user to use speech to text 42 | [features.speech_to_text] 43 | enabled = false 44 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 45 | # language = "en-US" 46 | 47 | [UI] 48 | # Name of the app and chatbot. 49 | name = "Chatbot" 50 | 51 | # Show the readme while the thread is empty. 52 | show_readme_as_default = true 53 | 54 | # Description of the app and chatbot. This is used for HTML tags. 55 | # description = "" 56 | 57 | # Large size content are by default collapsed for a cleaner ui 58 | default_collapse_content = true 59 | 60 | # The default value for the expand messages settings. 61 | default_expand_messages = false 62 | 63 | # Hide the chain of thought details from the user in the UI. 64 | hide_cot = false 65 | 66 | # Link to your github repo. This will add a github button in the UI's header. 67 | # github = "" 68 | 69 | # Specify a CSS file that can be used to customize the user interface. 70 | # The CSS file can be served from the public directory or via an external link. 71 | # custom_css = "/public/test.css" 72 | 73 | # Specify a Javascript file that can be used to customize the user interface. 74 | # The Javascript file can be served from the public directory. 75 | # custom_js = "/public/test.js" 76 | 77 | # Specify a custom font url. 78 | # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap" 79 | 80 | # Specify a custom build directory for the frontend. 81 | # This can be used to customize the frontend code. 82 | # Be careful: If this is a relative path, it should not start with a slash. 83 | # custom_build = "./public/build" 84 | 85 | # Override default MUI light theme. (Check theme.ts) 86 | [UI.theme] 87 | #font_family = "Inter, sans-serif" 88 | [UI.theme.light] 89 | #background = "#FAFAFA" 90 | #paper = "#FFFFFF" 91 | 92 | [UI.theme.light.primary] 93 | #main = "#F80061" 94 | #dark = "#980039" 95 | #light = "#FFE7EB" 96 | 97 | # Override default MUI dark theme. (Check theme.ts) 98 | [UI.theme.dark] 99 | #background = "#FAFAFA" 100 | #paper = "#FFFFFF" 101 | 102 | [UI.theme.dark.primary] 103 | #main = "#F80061" 104 | #dark = "#980039" 105 | #light = "#FFE7EB" 106 | 107 | 108 | [meta] 109 | generated_by = "1.0.506" 110 | -------------------------------------------------------------------------------- /src/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "settings": "Settings", 38 | "expandMessages": "Expand Messages", 39 | "hideChainOfThought": "Hide Chain of Thought", 40 | "darkMode": "Dark Mode" 41 | }, 42 | "detailsButton": { 43 | "using": "Using", 44 | "running": "Running", 45 | "took_one": "Took {{count}} step", 46 | "took_other": "Took {{count}} steps" 47 | }, 48 | "auth": { 49 | "authLogin": { 50 | "title": "Login to access the app.", 51 | "form": { 52 | "email": "Email address", 53 | "password": "Password", 54 | "noAccount": "Don't have an account?", 55 | "alreadyHaveAccount": "Already have an account?", 56 | "signup": "Sign Up", 57 | "signin": "Sign In", 58 | "or": "OR", 59 | "continue": "Continue", 60 | "forgotPassword": "Forgot password?", 61 | "passwordMustContain": "Your password must contain:", 62 | "emailRequired": "email is a required field", 63 | "passwordRequired": "password is a required field" 64 | }, 65 | "error": { 66 | "default": "Unable to sign in.", 67 | "signin": "Try signing in with a different account.", 68 | "oauthsignin": "Try signing in with a different account.", 69 | "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.", 70 | "oauthcallbackerror": "Try signing in with a different account.", 71 | "oauthcreateaccount": "Try signing in with a different account.", 72 | "emailcreateaccount": "Try signing in with a different account.", 73 | "callback": "Try signing in with a different account.", 74 | "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.", 75 | "emailsignin": "The e-mail could not be sent.", 76 | "emailverify": "Please verify your email, a new email has been sent.", 77 | "credentialssignin": "Sign in failed. Check the details you provided are correct.", 78 | "sessionrequired": "Please sign in to access this page." 79 | } 80 | }, 81 | "authVerifyEmail": { 82 | "almostThere": "You're almost there! We've sent an email to ", 83 | "verifyEmailLink": "Please click on the link in that email to complete your signup.", 84 | "didNotReceive": "Can't find the email?", 85 | "resendEmail": "Resend email", 86 | "goBack": "Go Back", 87 | "emailSent": "Email sent successfully.", 88 | "verifyEmail": "Verify your email address" 89 | }, 90 | "providerButton": { 91 | "continue": "Continue with {{provider}}", 92 | "signup": "Sign up with {{provider}}" 93 | }, 94 | "authResetPassword": { 95 | "newPasswordRequired": "New password is a required field", 96 | "passwordsMustMatch": "Passwords must match", 97 | "confirmPasswordRequired": "Confirm password is a required field", 98 | "newPassword": "New password", 99 | "confirmPassword": "Confirm password", 100 | "resetPassword": "Reset Password" 101 | }, 102 | "authForgotPassword": { 103 | "email": "Email address", 104 | "emailRequired": "email is a required field", 105 | "emailSent": "Please check the email address {{email}} for instructions to reset your password.", 106 | "enterEmail": "Enter your email address and we will send you instructions to reset your password.", 107 | "resendEmail": "Resend email", 108 | "continue": "Continue", 109 | "goBack": "Go Back" 110 | } 111 | } 112 | }, 113 | "organisms": { 114 | "chat": { 115 | "history": { 116 | "index": { 117 | "showHistory": "Show history", 118 | "lastInputs": "Last Inputs", 119 | "noInputs": "Such empty...", 120 | "loading": "Loading..." 121 | } 122 | }, 123 | "inputBox": { 124 | "input": { 125 | "placeholder": "Type your message here..." 126 | }, 127 | "speechButton": { 128 | "start": "Start recording", 129 | "stop": "Stop recording" 130 | }, 131 | "SubmitButton": { 132 | "sendMessage": "Send message", 133 | "stopTask": "Stop Task" 134 | }, 135 | "UploadButton": { 136 | "attachFiles": "Attach files" 137 | }, 138 | "waterMark": { 139 | "text": "Built with" 140 | } 141 | }, 142 | "Messages": { 143 | "index": { 144 | "running": "Running", 145 | "executedSuccessfully": "executed successfully", 146 | "failed": "failed", 147 | "feedbackUpdated": "Feedback updated", 148 | "updating": "Updating" 149 | } 150 | }, 151 | "dropScreen": { 152 | "dropYourFilesHere": "Drop your files here" 153 | }, 154 | "index": { 155 | "failedToUpload": "Failed to upload", 156 | "cancelledUploadOf": "Cancelled upload of", 157 | "couldNotReachServer": "Could not reach the server", 158 | "continuingChat": "Continuing previous chat" 159 | }, 160 | "settings": { 161 | "settingsPanel": "Settings panel", 162 | "reset": "Reset", 163 | "cancel": "Cancel", 164 | "confirm": "Confirm" 165 | } 166 | }, 167 | "threadHistory": { 168 | "sidebar": { 169 | "filters": { 170 | "FeedbackSelect": { 171 | "feedbackAll": "Feedback: All", 172 | "feedbackPositive": "Feedback: Positive", 173 | "feedbackNegative": "Feedback: Negative" 174 | }, 175 | "SearchBar": { 176 | "search": "Search" 177 | } 178 | }, 179 | "DeleteThreadButton": { 180 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 181 | "cancel": "Cancel", 182 | "confirm": "Confirm", 183 | "deletingChat": "Deleting chat", 184 | "chatDeleted": "Chat deleted" 185 | }, 186 | "index": { 187 | "pastChats": "Past Chats" 188 | }, 189 | "ThreadList": { 190 | "empty": "Empty...", 191 | "today": "Today", 192 | "yesterday": "Yesterday", 193 | "previous7days": "Previous 7 days", 194 | "previous30days": "Previous 30 days" 195 | }, 196 | "TriggerButton": { 197 | "closeSidebar": "Close sidebar", 198 | "openSidebar": "Open sidebar" 199 | } 200 | }, 201 | "Thread": { 202 | "backToChat": "Go back to chat", 203 | "chatCreatedOn": "This chat was created on" 204 | } 205 | }, 206 | "header": { 207 | "chat": "Chat", 208 | "readme": "Readme" 209 | } 210 | } 211 | }, 212 | "hooks": { 213 | "useLLMProviders": { 214 | "failedToFetchProviders": "Failed to fetch providers:" 215 | } 216 | }, 217 | "pages": { 218 | "Design": {}, 219 | "Env": { 220 | "savedSuccessfully": "Saved successfully", 221 | "requiredApiKeys": "Required API Keys", 222 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 223 | }, 224 | "Page": { 225 | "notPartOfProject": "You are not part of this project." 226 | }, 227 | "ResumeButton": { 228 | "resumeChat": "Resume Chat" 229 | } 230 | } 231 | } -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | .env 400 | spans.json 401 | .files/ 402 | **/.promptflow 403 | src/generate_data/*_batch_*.jsonl 404 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import chainlit as cl 4 | import base64 5 | from time import time_ns 6 | 7 | from opentelemetry import trace 8 | from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator 9 | from opentelemetry.sdk.trace import TracerProvider 10 | from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor 11 | import opentelemetry 12 | from opentelemetry import _logs # _log is an unfortunate hack that will eventually be resolved on the OTel side with a new Event API 13 | from opentelemetry.sdk.trace import TracerProvider 14 | from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter 15 | from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator 16 | from opentelemetry.trace.span import TraceFlags 17 | from opentelemetry.sdk._logs import LoggerProvider 18 | from opentelemetry.sdk._logs.export import SimpleLogRecordProcessor, ConsoleLogExporter 19 | from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter, AzureMonitorTraceExporter 20 | 21 | from assistant_flow.chat import chat_completion 22 | 23 | from promptflow.tracing import start_trace 24 | from dotenv import load_dotenv 25 | import logging 26 | load_dotenv() 27 | 28 | def setup_app_insights(): 29 | from promptflow.tracing._integrations._openai_injector import inject_openai_api 30 | inject_openai_api() 31 | 32 | # dial down the logs for azure monitor -- it is so chatty 33 | azmon_logger = logging.getLogger('azure') 34 | azmon_logger.setLevel(logging.WARNING) 35 | 36 | # Set the Tracer Provider 37 | trace.set_tracer_provider(TracerProvider()) 38 | 39 | from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter 40 | 41 | # Configure Azure Monitor as the Exporter 42 | print("using the following connection string", os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')) 43 | trace_exporter = AzureMonitorTraceExporter( 44 | connection_string=os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING') 45 | ) 46 | 47 | # Add the Azure exporter to the tracer provider 48 | trace.get_tracer_provider().add_span_processor( 49 | SimpleSpanProcessor(trace_exporter) 50 | ) 51 | 52 | # Configure Console as the Exporter 53 | file = open('spans.json', 'w') 54 | 55 | console_exporter = ConsoleSpanExporter(out=file) 56 | trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(console_exporter)) 57 | 58 | provider = LoggerProvider() 59 | _logs.set_logger_provider(provider) 60 | console_exporter = ConsoleLogExporter(out=file) 61 | provider.add_log_record_processor(SimpleLogRecordProcessor(console_exporter)) 62 | provider.add_log_record_processor(SimpleLogRecordProcessor(AzureMonitorLogExporter(connection_string=os.getenv('APPLICATIONINSIGHTS_CONNECTION_STRING')))) 63 | 64 | # Get a tracer 65 | return trace.get_tracer(__name__) 66 | 67 | @cl.on_chat_start 68 | def start_chat(): 69 | print("starting chat") 70 | 71 | cl.user_session.set("last_message_context", None) 72 | cl.user_session.set("session_state", {}) 73 | 74 | 75 | @cl.action_callback("upvote") 76 | async def on_action(action): 77 | span_context = json.loads(action.value) 78 | log_evaluation_event(name="user_vote", scores={"vote": 1}, span_context=span_context, message="User upvoted the answer") 79 | 80 | @cl.action_callback("downvote") 81 | async def on_action(action): 82 | span_context = json.loads(action.value) 83 | log_evaluation_event(name="user_vote", scores={"vote": 0}, span_context=span_context, message="User downvoted the answer") 84 | 85 | 86 | def show_images(image): 87 | elements = [ 88 | cl.Image( 89 | content=image, 90 | name="generated image", 91 | display="inline", 92 | ) 93 | ] 94 | return elements 95 | 96 | 97 | async def call_promptflow(message): 98 | 99 | tracer = trace.get_tracer(__name__) 100 | with tracer.start_as_current_span("call_promptflow") as span: 101 | carrier = {} 102 | # Write the current context into the carrier. 103 | TraceContextTextMapPropagator().inject(carrier) 104 | cl.user_session.set("last_message_context", carrier) 105 | 106 | span.set_attribute("inputs", json.dumps({"question": message.content})) 107 | span.set_attribute("span_type", "function") 108 | span.set_attribute("framework", "promptflow") 109 | span.set_attribute("function", "call_promptflow") 110 | 111 | session_state = cl.user_session.get("session_state") 112 | 113 | response = await cl.make_async(chat_completion)(question=message.content, 114 | session_state=session_state) 115 | 116 | try: 117 | span.set_attribute("output", json.dumps(response)) 118 | except Exception as e: 119 | span.set_attribute("output", str(e)) 120 | 121 | return response 122 | 123 | 124 | def log_evaluation_event(name: str, scores: dict, span_context: dict, message: str) -> None: 125 | trace_id = int(span_context["traceparent"].split("-")[1], 16) 126 | span_id = int(span_context["traceparent"].split("-")[2], 16) 127 | trace_flags = TraceFlags(int(span_context["traceparent"].split("-")[3], 16)) 128 | # print(trace_id, span_id, trace_flags) 129 | 130 | attributes = {"event.name": f"gen_ai.evaluation.{name}"} 131 | for key, value in scores.items(): 132 | attributes[f"gen_ai.evaluation.{key}"] = value 133 | 134 | event = opentelemetry.sdk._logs.LogRecord( 135 | timestamp=time_ns(), 136 | observed_timestamp=time_ns(), 137 | trace_id=trace_id, 138 | span_id=span_id, 139 | trace_flags=trace_flags, 140 | severity_text=None, 141 | severity_number=_logs.SeverityNumber.UNSPECIFIED, 142 | body=message, 143 | attributes=attributes 144 | ) 145 | 146 | _logs.get_logger(__name__).emit(event) 147 | 148 | async def feedback(feedback_type, trace_context): 149 | tracer = trace.get_tracer(__name__) 150 | last_message_context = cl.user_session.get("last_message_context") 151 | if last_message_context is None: 152 | await cl.Message(content=f"#### no last message set").send() 153 | return 154 | 155 | ctx = TraceContextTextMapPropagator().extract(carrier=last_message_context) 156 | 157 | with tracer.start_as_current_span("user_feedback", context=ctx) as span: 158 | span.set_attribute("evaluation", "user_feedback") 159 | span.set_attribute("output", json.dumps({"feedback": feedback_type})) 160 | 161 | 162 | await cl.Message(content=f"#### Feedback recorded: {feedback_type} for {last_message_context}").send() 163 | return 164 | 165 | @cl.on_message 166 | async def run_conversation(message: cl.Message): 167 | question = message.content 168 | 169 | from chainlit import make_async, run_sync 170 | 171 | msg = cl.Message(content="") 172 | await msg.send() 173 | 174 | reply = await call_promptflow(message) 175 | if "session_state" in reply: 176 | cl.user_session.set("session_state", reply["session_state"]) 177 | stream = reply["chat_output"] 178 | response = "" 179 | images = [] 180 | for thing in stream: 181 | 182 | if thing.strip().startswith("!["): 183 | image = parse_image(thing.strip()) 184 | images.append(image) 185 | msg.elements = images 186 | else: 187 | response += thing 188 | msg.content = response 189 | 190 | await msg.update() 191 | await msg.stream_token("🏁") 192 | await msg.update() 193 | 194 | last_message_context = cl.user_session.get("last_message_context") 195 | last_message_context_json = json.dumps(last_message_context) 196 | # Sending an action button within a chatbot message 197 | actions = [ 198 | cl.Action(name="upvote", value=last_message_context_json, description="Click me if you like the answer!"), 199 | cl.Action(name="downvote", value=last_message_context_json, description="Click me if you don't like the answer!") 200 | ] 201 | 202 | await cl.Message(content="Rate the Chatbot's answer:", actions=actions).send() 203 | 204 | 205 | def parse_image(thing): 206 | # parse the image data from this inline markdown image 207 | # ![]() 208 | image = thing.split("(data:image/png;base64,")[1].split(")")[0] 209 | data = base64.b64decode(image) 210 | return cl.Image(content=data, name="generated image", display="inline", size="large") 211 | 212 | if __name__ == "__main__": 213 | start_trace() 214 | setup_app_insights() 215 | 216 | print("using the follwoing chat_model", os.getenv("OPENAI_CHAT_MODEL")) 217 | 218 | from chainlit.cli import run_chainlit 219 | run_chainlit(__file__) 220 | -------------------------------------------------------------------------------- /src/assistant_flow/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | __pycache__/ 3 | .promptflow/* 4 | !.promptflow/flow.tools.json 5 | .runs/ 6 | -------------------------------------------------------------------------------- /src/assistant_flow/chat.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | # local imports 5 | from assistant_flow.core import AssistantAPI 6 | from promptflow.tracing import start_trace, trace 7 | from openai import AzureOpenAI 8 | from sales_data_insights.main import SalesDataInsights 9 | from typing import TypedDict 10 | 11 | 12 | # You can get the same code with this link. https://aka.ms/2024-brk141​ 13 | 14 | class AssistantStream(TypedDict): 15 | 16 | """ 17 | Assistant flow response. This is the output of the assistant flow and chat_completion function. 18 | It contains the chat_output and session_state. 19 | 20 | Attributes: 21 | chat_output (str): The streamed output from the assistant. 22 | session_state (dict): The assistant thread bookkeeping. 23 | """ 24 | 25 | chat_output: str 26 | session_state: dict 27 | 28 | 29 | @trace 30 | def chat_completion( 31 | question: str, 32 | session_state: dict = None, 33 | ) -> AssistantStream: 34 | 35 | """ 36 | This is the entry point of Assistant flow. 37 | Args: 38 | question (str): The question to ask the assistant. 39 | session_state (dict, optional): The session state to resume from. Defaults to None. 40 | Returns: AssistantStream 41 | """ 42 | 43 | # verify all env vars are present 44 | required_env_vars = [ 45 | "OPENAI_API_BASE", 46 | "OPENAI_API_KEY", 47 | "OPENAI_API_VERSION", 48 | "OPENAI_ASSISTANT_ID", 49 | ] 50 | missing_env_vars = [] 51 | for env_var in required_env_vars: 52 | if env_var not in os.environ: 53 | missing_env_vars.append(env_var) 54 | 55 | assert ( 56 | not missing_env_vars 57 | ), f"Missing environment variables: {missing_env_vars}" 58 | 59 | global client 60 | client = AzureOpenAI( 61 | azure_endpoint=os.getenv("OPENAI_API_BASE"), 62 | api_key=os.getenv("OPENAI_API_KEY"), 63 | api_version=os.getenv("OPENAI_API_VERSION"), 64 | ) 65 | sales_data_insights = SalesDataInsights() 66 | 67 | handler = AssistantAPI(client=client, 68 | session_state=session_state, 69 | tools=dict(sales_data_insights=sales_data_insights)) 70 | return handler.start(question=question) 71 | 72 | def _test(): 73 | """Test the chat completion function.""" 74 | # try a functions combo (without RAG) 75 | response = chat_completion( 76 | question= "Plot the order numbers and USD revenue for 2023 by month in a bar chart?" 77 | ) 78 | 79 | return response 80 | 81 | 82 | if __name__ == "__main__": 83 | # we need those only for local testing 84 | from dotenv import load_dotenv 85 | import argparse 86 | 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument("--env", help="Path to .env file", default=".env") 89 | parser.add_argument( 90 | "--log", 91 | help="Logging level", 92 | default="INFO", 93 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 94 | ) 95 | parser.add_argument("--output", help="Output file", default="output.log") 96 | args = parser.parse_args() 97 | 98 | # turn on logging 99 | logging.basicConfig( 100 | level=args.log, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 101 | ) 102 | 103 | # load environment variables 104 | logging.debug("Loading environment variables from {}".format(args.env)) 105 | load_dotenv(args.env, override=True) 106 | 107 | start_trace() 108 | 109 | # write tokens to output file 110 | with open(args.output, "w") as f: 111 | for token in _test()["chat_output"]: 112 | # write token to stream and flush 113 | f.write(str(token)) 114 | #f.write("\n") 115 | f.flush() 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /src/assistant_flow/requirements.txt: -------------------------------------------------------------------------------- 1 | openai==1.13.3 2 | promptflow==1.7.0 3 | promptflow-tracing==1.0.0 4 | -------------------------------------------------------------------------------- /src/assistant_flow/setup.py: -------------------------------------------------------------------------------- 1 | # this script creates an assistant with a code interpreter and a function tool 2 | # to do data analytics on sales data. 3 | 4 | import json, yaml 5 | from dotenv import load_dotenv 6 | from openai import AzureOpenAI 7 | import os 8 | 9 | load_dotenv(override=True) 10 | 11 | def show_json(obj): 12 | print(json.loads(obj.model_dump_json())) 13 | 14 | def show_yaml(obj): 15 | print(yaml.dump(json.loads(obj.model_dump_json()), indent=4)) 16 | 17 | print("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY")) 18 | print("OPENAI_API_BASE", os.getenv("OPENAI_API_BASE")) 19 | print("OPENAI_API_VERSION", os.getenv("OPENAI_API_VERSION")) 20 | client = AzureOpenAI( 21 | api_key = os.getenv("OPENAI_API_KEY"), 22 | azure_endpoint = os.getenv("OPENAI_API_BASE"), 23 | api_version = os.getenv("OPENAI_API_VERSION") 24 | ) 25 | 26 | tools = [ 27 | { 28 | "type": "code_interpreter" 29 | }, 30 | { 31 | "type": "function", 32 | "function": { 33 | "name": "sales_data_insights", 34 | "description": """ 35 | get some data insights about the contoso sales data. This tool has aggregated information in the following structure: 36 | Number_of_Orders INTEGER "the number of orders processed" 37 | Sum_of_Order_Value_USD REAL "the total value of the orders processed in USD" 38 | Sum_of_Number_of_Items REAL "the sum of items in the orders processed" 39 | Number_of_Orders_with_Discount INTEGER "the number of orders that received a discount" 40 | Sum_of_Discount_Percentage REAL "the sum of discount percentage -- useful to calculate average discounts given" 41 | Sum_of_Shipping_Cost_USD REAL "the sum of shipping cost for the processed orders" 42 | Number_of_Orders_Returned INTEGER "the number of orders returned by the customers" 43 | Number_of_Orders_Cancelled INTEGER "the number or orders cancelled by the customers before they were sent out" 44 | Sum_of_Time_to_Fulfillment REAL "the sum of time to fulfillment" 45 | Number_of_Orders_Repeat_Customers INTEGER "number of orders that were placed by repeat customers" 46 | Year INTEGER 47 | Month INTEGER 48 | Day INTEGER 49 | Date TIMESTAMP 50 | Day_of_Week INTEGER in 0 based format, Monday is 0, Tuesday is 1, etc. 51 | main_category TEXT 52 | sub_category TEXT 53 | product_type TEXT 54 | Region TEXT 55 | you can ask questions like: 56 | - what was the total revenue in Q1 2024 by region 57 | - which day of month has the least sales in january 58 | - show the average value of orders by month 59 | - what is the average sale value for Tuesdays 60 | If you are unsure of the data available, you can ask for a list of categories, days, etc. 61 | - query for all the values for the main_category 62 | The data will be returned in a json format in the data property of the returned object with the query used 63 | to get the data in the query property. 64 | If a query cannot be answered, the tool will return a message in the error property of the returned object. 65 | """, 66 | "parameters": { 67 | "type": "object", 68 | "properties": { 69 | "question": { 70 | "type": "string", 71 | "description": "The question you want to ask the tool in plain English. e.g. 'what is the average sale value for Tuesdays'", 72 | } 73 | }, 74 | "required": ["question"], 75 | }, 76 | }, 77 | } 78 | ] 79 | 80 | instructions=""" 81 | You are a helpful assistant that helps the user potentially with the help of some functions. 82 | 83 | If you are using multiple tools to solve a user's task, make sure to communicate 84 | information learned from one tool to the next tool. 85 | First, make a plan of how you will use the tools to solve the user's task and communicated 86 | that plan to the user with the first response. Then execute the plan making sure to communicate 87 | the required information between tools since tools only see the information passed to them; 88 | They do not have access to the chat history. 89 | If you think that tool use can be parallelized (e.g. to get weather data for multiple cities) 90 | make sure to use the multi_tool_use.parallel function to execute. 91 | 92 | Only use a tool when it is necessary to solve the user's task. 93 | Don't use a tool if you can answer the user's question directly. 94 | Only use the tools provided in the tools list -- don't make up tools!! 95 | If you are not getting the right information from a tool, make sure to ask the user for clarification. 96 | Do not just return the wrong information. Do not make up information. 97 | 98 | Anything that would benefit from a tabular presentation should be returned as markup table. 99 | """ 100 | 101 | assistant = client.beta.assistants.create( 102 | name="Contoso Assistant", 103 | instructions=instructions, 104 | model=os.environ["OPENAI_ASSISTANT_MODEL"], 105 | tools=tools 106 | ) 107 | show_json(assistant) 108 | 109 | print("Assistant created with id", assistant.id) 110 | print("add the following to your .env file") 111 | print(f'OPENAI_ASSISTANT_ID="{assistant.id}"') -------------------------------------------------------------------------------- /src/chainlit.md: -------------------------------------------------------------------------------- 1 | # Assistant Demo! 2 | 3 | This is a simple demo of using the OpenAI Assistant API to create a chatbot. The assistant has access to two tools: 4 | 1. A function to query the 2023 sales data for Contoso. 5 | 1. A code interpreter, which it will use to make graphs of the sales data. 6 | 7 | Test it by asking the assistant some questions about the sales data, for instance: 8 | - show the 2023 sales by category in a bar chart 9 | - show the total sales revenue aggregated by year and month in a line chart -------------------------------------------------------------------------------- /src/custom_evaluators/execution_time.py: -------------------------------------------------------------------------------- 1 | class ExecutionTimeEvaluator: 2 | def __init__(self): 3 | pass 4 | 5 | def __call__(self, *, execution_time: str,**kwargs): 6 | return {"seconds": execution_time} -------------------------------------------------------------------------------- /src/custom_evaluators/in_domain_evaluator.prompty: -------------------------------------------------------------------------------- 1 | --- 2 | name: InDomainQuestion 3 | description: Determines whether a question is in-domain for the given sales data table 4 | model: 5 | api: chat 6 | configuration: 7 | type: azure_openai 8 | azure_deployment: gpt-4-turbo 9 | api_key: ${env:OPENAI_API_KEY} 10 | azure_endpoint: ${env:OPENAI_API_BASE} 11 | parameters: 12 | temperature: 0 13 | response_format: { "type": "json_object" } 14 | 15 | sample: 16 | question: | 17 | when did we have the highest revenue in 2023? 18 | 19 | inputs: 20 | question: 21 | type: string 22 | 23 | outputs: 24 | score: 25 | type: int 26 | explanation: 27 | type: string 28 | --- 29 | 30 | system: 31 | You are an AI tool that whether a question given to the sales data insights agent is in the domain. 32 | You will be given a question and you will give a score based on how related to the domain the question is. 33 | 34 | ### SQLite table `order_data` with properties: 35 | # 36 | # Number_of_Orders INTEGER "the number of orders processed" 37 | # Sum_of_Order_Value_USD REAL "the total value of the orders processed in USD" 38 | # Sum_of_Number_of_Items REAL "the sum of items in the orders processed" 39 | # Number_of_Orders_with_Discount INTEGER "the number of orders that received a discount" 40 | # Sum_of_Discount_Percentage REAL "the sum of discount percentage -- useful to calculate average discounts given" 41 | # Sum_of_Shipping_Cost_USD REAL "the sum of shipping cost for the processed orders" 42 | # Number_of_Orders_Returned INTEGER "the number of orders returned by the customers" 43 | # Number_of_Orders_Cancelled INTEGER "the number or orders cancelled by the customers before they were sent out" 44 | # Sum_of_Time_to_Fulfillment REAL "the sum of time to fulfillment" 45 | # Number_of_Orders_Repeat_Customers INTEGER "number of orders that were placed by repeat customers" 46 | # Year INTEGER 47 | # Month INTEGER 48 | # Day INTEGER 49 | # Date TIMESTAMP 50 | # Day_of_Week INTEGER in 0 based format, Monday is 0, Tuesday is 1, etc. 51 | # main_category TEXT 52 | # sub_category TEXT 53 | # product_type TEXT 54 | # Region TEXT 55 | # 56 | 57 | 58 | This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5. 59 | 60 | Use the following criteria to determine whether the question is off-domain: 61 | First determine if the question is related to sales at all. If it is not, it should be rated 1. 62 | 63 | Then, if you didn't fail the question based on the above, check if the question is related to the sales data table. 64 | If it is not, it should be rated 2. 65 | 66 | If the question is related to the sales data table, but the question is vague and it is ambiguous or imprecise, it should be rated 3. 67 | 68 | If the question is sales data related, can be answered with tables, and is specific/precise, then it should be rated a 5. 69 | 70 | Provide a detailed assessment of the comparison of the response query to the ground_truth query. 71 | 72 | Provide response as valid JSON. 73 | 74 | Here are some examples of questions with their ratings: 75 | 76 | **Example 1** 77 | question: 78 | What is the total revenue for the month with the highest revenue in 2024? 79 | 80 | result: 81 | { 82 | "explanation": "The question is related to sales data, as it asks for the total revenue for a specific month in a specific year. The question is also specific enough to be answered using the sales data table, as it requires finding the month with the highest revenue in 2024 and calculating the total revenue for that month. Therefore, the question is in-domain and should be rated 5.", 83 | "score": 5 84 | } 85 | 86 | **Example 2** 87 | 88 | question: 89 | How many orders were placed on a Holiday in 2023? 90 | 91 | result: 92 | { 93 | "explanation": "The question is related to sales data, as it asks for the number of orders placed on specific days, however, the question cannot be answered using the sales data table provided, as the table does not have a column for holidays. Therefore, the question is off-domain and should be rated 3.", 94 | "score": 2 95 | } 96 | 97 | **Example 3** 98 | question: 99 | How many swallow do make a summer? 100 | 101 | result: 102 | { 103 | "explanation": "The question is not related to sales data, as it is asking about the number of swallows in a season, which is not relevant to the sales data table provided. Therefore, the question is off-domain and should be rated 1.", 104 | "score": 1 105 | } 106 | 107 | **Example 4** 108 | question: 109 | What were our sales in the month of December 2023? 110 | 111 | result: 112 | { 113 | "explanation": "The question is related to the sales data, as it asks for the sales in a specific month and year. However, it is vague what is meant by sales, which could mean revenue, number of orders, or other sales metrics. Therefore, the question is related to the sales data table but is vague and imprecise, so it should be rated 4.", 114 | "score": 4 115 | } 116 | 117 | 118 | **Here the actual conversation to be scored:** 119 | question: 120 | {{question}} 121 | 122 | result: 123 | -------------------------------------------------------------------------------- /src/custom_evaluators/sql_similarity.prompty: -------------------------------------------------------------------------------- 1 | --- 2 | name: Sql Similarity Evaluator 3 | description: Sql Similarity Evaluator to compare two SQL queries and return a similarity score 4 | model: 5 | api: chat 6 | configuration: 7 | type: azure_openai 8 | azure_deployment: gpt-4-turbo 9 | api_key: ${env:OPENAI_API_KEY} 10 | azure_endpoint: ${env:OPENAI_API_BASE} 11 | parameters: 12 | temperature: 0 13 | response_format: { "type": "json_object" } 14 | 15 | sample: 16 | response: | 17 | SELECT Region 18 | FROM order_data 19 | 20 | ground_truth: | 21 | SELECT DISTINCT Region 22 | FROM order_data 23 | 24 | inputs: 25 | response: 26 | type: string 27 | 28 | ground_truth: 29 | type: string 30 | 31 | outputs: 32 | score: 33 | type: int 34 | explanation: 35 | type: string 36 | --- 37 | 38 | system: 39 | You are an AI tool that determines similarity of two SQL queries. 40 | You will be given two SQL queries against the below table and you need to determine how similar they are. 41 | 42 | ### SQLite table `order_data` with properties: 43 | # 44 | # Number_of_Orders INTEGER "the number of orders processed" 45 | # Sum_of_Order_Value_USD REAL "the total value of the orders processed in USD" 46 | # Sum_of_Number_of_Items REAL "the sum of items in the orders processed" 47 | # Number_of_Orders_with_Discount INTEGER "the number of orders that received a discount" 48 | # Sum_of_Discount_Percentage REAL "the sum of discount percentage -- useful to calculate average discounts given" 49 | # Sum_of_Shipping_Cost_USD REAL "the sum of shipping cost for the processed orders" 50 | # Number_of_Orders_Returned INTEGER "the number of orders returned by the customers" 51 | # Number_of_Orders_Cancelled INTEGER "the number or orders cancelled by the customers before they were sent out" 52 | # Sum_of_Time_to_Fulfillment REAL "the sum of time to fulfillment" 53 | # Number_of_Orders_Repeat_Customers INTEGER "number of orders that were placed by repeat customers" 54 | # Year INTEGER 55 | # Month INTEGER 56 | # Day INTEGER 57 | # Date TIMESTAMP 58 | # Day_of_Week INTEGER in 0 based format, Monday is 0, Tuesday is 1, etc. 59 | # main_category TEXT 60 | # sub_category TEXT 61 | # product_type TEXT 62 | # Region TEXT 63 | # 64 | 65 | 66 | This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5. 67 | 68 | Use the following criteria to determine the similarity score of the response compared to the ground_truth query: 69 | First determine if the response query is correct or not. 70 | - Syntactical correctness: If the response query is syntactically incorrect, it should be rated 1. 71 | - Use of fields from the above table: If the response query uses fields from the above table incorrectly, it should be rated 1. 72 | - Use of tables: If the response query does not use any table other than order_data and common sqlite metadata/PRAGMA tables, it should be rated 1. 73 | 74 | Then, if you didn't fail the response based on the above, compare the response with the ground_truth query based on the following criteria: 75 | - Cardinality: Will the results be the same number of rows as the ground_truth? If not, it should be rated no better than 3. 76 | - Will the reponse query return the same data structure as the ground_truth query? If not, it should be rated no better than 3. 77 | - The structure of each query: How is the response query constructed? Does it use similar clauses and operators as the ground_truth query? If not, it should be rated no better than 4. 78 | - The efficiency of each query: Is the response query ? 79 | 80 | Provide a detailed assessment of the comparison of the response query to the ground_truth query. 81 | 82 | Provide response as valid JSON. 83 | 84 | Here are some examples of chat conversations and the correct response: 85 | 86 | **Example 1** 87 | generated_query: 88 | SELECT SUM(Sum_of_Order_Value_USD) as Total_Revenue, Month 89 | FROM order_data 90 | GROUP BY Month 91 | ORDER BY Total_Revenue DESC 92 | LIMIT 1 93 | 94 | ground_truth_query: 95 | SELECT SUM(Sum_of_Order_Value_USD) as Revenue, Month 96 | FROM order_data 97 | GROUP BY Month 98 | ORDER BY Revenue DESC 99 | LIMIT 1 100 | 101 | result: 102 | { 103 | "explanation": "Both queries are functionally identical, aiming to retrieve the highest monthly total revenue from the order_data table. They both sum the order values, group the results by month, order them in descending order by the summed value, and limit the output to the top result. The only difference is the alias used for the summed value, which does not affect the query's functionality, efficiency, or readability.", 104 | "score": 5 105 | } 106 | 107 | **Example 2** 108 | 109 | generated_query: 110 | SELECT SUM(Number_of_Orders) 111 | FROM order_data 112 | WHERE Month = 5 113 | AND Year = 2024 114 | 115 | ground_truth_query: 116 | SELECT SUM(Number_of_Orders) 117 | FROM order_data 118 | WHERE Year = 2024 119 | AND Month = 5 120 | GROUP BY Day 121 | 122 | result: 123 | { 124 | "explanation": "The queries are similar in that they both aim to sum the number of orders from the order_data table for a specific month and year. However, the ground_truth_query includes a GROUP BY clause that groups the results by day, which the generated_query does not have. This means the ground_truth_query will return a sum for each day, while the generated_query will return a single sum for the entire month.", 125 | "score": 2 126 | } 127 | 128 | **Here the actual conversation to be scored:** 129 | generated_query: 130 | {{response}} 131 | 132 | ground_truth_query: 133 | {{ground_truth}} 134 | 135 | result: 136 | -------------------------------------------------------------------------------- /src/custom_evaluators/sql_similarity_brief.prompty: -------------------------------------------------------------------------------- 1 | --- 2 | name: Sql Similarity Evaluator 3 | description: Sql Similarity Evaluator to compare two SQL queries and return a similarity score 4 | model: 5 | api: chat 6 | configuration: 7 | type: azure_openai 8 | azure_deployment: gpt-4-turbo 9 | api_key: ${env:OPENAI_API_KEY} 10 | azure_endpoint: ${env:OPENAI_API_BASE} 11 | parameters: 12 | temperature: 0 13 | response_format: { "type": "json_object" } 14 | inputs: 15 | response: 16 | type: string 17 | default: | 18 | SELECT Region 19 | FROM order_data 20 | 21 | ground_truth: 22 | type: string 23 | default: | 24 | SELECT DISTINCT Region 25 | FROM order_data 26 | 27 | outputs: 28 | score: 29 | type: int 30 | 31 | --- 32 | 33 | system: 34 | You are an AI tool that determines similarity of two SQL queries. 35 | You will be given two SQL queries against the following schema and you need to determine how similar they are. 36 | 37 | 38 | 39 | This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5. 40 | 41 | Use the following criteris to determine the similarity score: 42 | - The purpose of each query: What is each query designed to do? 43 | - The structure of each query: How is each query constructed? What clauses and operators are used? 44 | - The efficiency of each query: Which query is likely to be more efficient and why? Consider factors such as the number of records accessed, the use of indexes, and the complexity of the operations performed. 45 | - The readability and maintainability of each query: Which query is easier to understand and modify? 46 | - The accuracy of each query: Which query is more likely to return the correct results? 47 | 48 | Conduct a detailed comparison of two queries in terms of their structure, functionality, and efficiency and the provide a similarity score between 1 and 5. 49 | 50 | Provide response as valid JSON. 51 | 52 | Here are some examples of chat conversations and the correct response: 53 | 54 | **Example 1** 55 | generated_query: 56 | SELECT SUM(Sum_of_Order_Value_USD) as Total_Revenue, Month 57 | FROM order_data 58 | GROUP BY Month 59 | ORDER BY Total_Revenue DESC 60 | LIMIT 1 61 | 62 | ground_truth_query: 63 | SELECT SUM(Sum_of_Order_Value_USD) as Revenue, Month 64 | FROM order_data 65 | GROUP BY Month 66 | ORDER BY Revenue DESC 67 | LIMIT 1 68 | 69 | result: 70 | {"score": 5} 71 | 72 | **Example 2** 73 | 74 | generated_query: 75 | SELECT SUM(Number_of_Orders) 76 | FROM order_data 77 | WHERE Month = 5 78 | AND Year = 2024 79 | 80 | ground_truth_query: 81 | SELECT SUM(Number_of_Orders) 82 | FROM order_data 83 | WHERE Year = 2024 84 | AND Month = 5 85 | GROUP BY Day 86 | 87 | result: 88 | {"score": 2} 89 | 90 | **Here the actual conversation to be scored:** 91 | generated_query: 92 | {{response}} 93 | 94 | ground_truth_query: 95 | {{ground_truth}} 96 | 97 | result: 98 | -------------------------------------------------------------------------------- /src/custom_evaluators/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from dotenv import load_dotenv \n", 10 | "load_dotenv()\n", 11 | "\n", 12 | "from promptflow.core._model_configuration import AzureOpenAIModelConfiguration\n", 13 | "from promptflow.client import load_flow\n", 14 | "import os\n", 15 | "\n", 16 | "model_config = {\n", 17 | " \"api\": \"chat\",\n", 18 | " \"configuration\": AzureOpenAIModelConfiguration(\n", 19 | " api_key=os.environ[\"OPENAI_API_KEY\"],\n", 20 | " api_version=os.environ[\"OPENAI_API_VERSION\"],\n", 21 | " azure_endpoint=os.environ[\"OPENAI_API_BASE\"],\n", 22 | " azure_deployment=\"gpt-35-turbo-1106\",\n", 23 | " ),\n", 24 | " \"parameters\": {\n", 25 | " \"max_token\": 512\n", 26 | " }\n", 27 | "}\n", 28 | "sql_similarity_brief = load_flow('sql_similarity_brief.prompty')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 15, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "from promptflow.client import load_flow\n", 38 | "\n", 39 | "sql_similarity = load_flow('sql_similarity.prompty')\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 16, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "generated_sql = \"\"\"\n", 49 | "SELECT SUM(Number_of_Orders)\n", 50 | "FROM order_data\n", 51 | "WHERE Month = 5\n", 52 | "AND Year = 2024\n", 53 | "\"\"\"\n", 54 | "\n", 55 | "ground_truth_sql = \"\"\"\n", 56 | "SELECT SUM(Number_of_Orders)\n", 57 | "FROM order_data\n", 58 | "WHERE Year = 2024\n", 59 | "AND Month = 5\n", 60 | "GROUP BY Day\n", 61 | "\"\"\"\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 17, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "{'score': 2,\n", 73 | " 'explanation': 'The queries are similar in that they both aim to sum the number of orders from the order_data table for a specific month and year. However, the ground_truth_query includes a GROUP BY clause that groups the results by day, which the generated_query does not have. This means the ground_truth_query will return a sum for each day, while the generated_query will return a single sum for the entire month. The difference in the GROUP BY clause affects the granularity of the results and thus the purpose and functionality of the queries are different.'}" 74 | ] 75 | }, 76 | "execution_count": 17, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "sql_similarity(response=generated_sql, ground_truth=ground_truth_sql)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 20, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "{'score': 2}" 94 | ] 95 | }, 96 | "execution_count": 20, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "sql_similarity_brief(response=generated_sql, ground_truth=ground_truth_sql)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "assistant-eval", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.11.0" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /src/custom_evaluators/user_vote.prompty: -------------------------------------------------------------------------------- 1 | --- 2 | name: user_vote 3 | description: Simulates a user voting on the reply given by a chatbot to a question they asked 4 | model: 5 | api: chat 6 | configuration: 7 | type: azure_openai 8 | azure_deployment: gpt-4-turbo 9 | api_key: ${env:OPENAI_API_KEY} 10 | azure_endpoint: ${env:OPENAI_API_BASE} 11 | parameters: 12 | temperature: 0 13 | response_format: { "type": "json_object" } 14 | 15 | sample: 16 | question: | 17 | when did we have the highest revenue in 2023? 18 | response: | 19 | The highest revenue in 2023 was recorded in the month of August, with a total revenue of $2,345,678.90. 20 | 21 | inputs: 22 | question: 23 | type: string 24 | response: 25 | type: string 26 | 27 | outputs: 28 | vote: 29 | type: int 30 | explanation: 31 | type: string 32 | --- 33 | 34 | system: 35 | You are an AI tool simulates a user voting on the reply given by a chatbot to a question they asked. 36 | You will be given a question that was asked by a user and an response that was given by a chat bot, 37 | and you will give a vode based on the quality of the response and whether a user would like it. 38 | 39 | 40 | This rating vote should always be an integer 0 or 1 with 0 representing a negative vote and 1 representing a positive vote. 41 | 42 | Use the following criteria to determine whether the user would like the response or not: 43 | 44 | - If the response is relevant, appears to be accurate, and helpful, give a positive vote (1). 45 | - If the response is not relevant to the question, give a negative vote (0). 46 | - If the response is relevant but not helpful or accurate, give a negative vote (0). 47 | 48 | Provide response as valid JSON. 49 | 50 | Here are some examples of questions with their ratings: 51 | 52 | **Example 1** 53 | question: 54 | which is the worst performing main category by revenue? 55 | 56 | response: 57 | The worst performing main category by revenue is the "Travel" category, with a total revenue of $1,177,601.43. 58 | 59 | result: 60 | { 61 | "explanation": "The response is relevant to the question, as it provides the worst performing main category by revenue and the corresponding revenue amount. The response is clear and accurate, providing the requested information. Therefore, the response is helpful and should be rated 1.", 62 | "vote": 1 63 | } 64 | 65 | **Example 2** 66 | 67 | question: 68 | list all the main categories 69 | 70 | response: 71 | Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 265064 tokens (264884 in the messages, 180 in the functions). Please reduce the length of the messages or functions.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}} 72 | 73 | result: 74 | { 75 | "explanation": "The response is not relevant to the question, as it does not list all the main categories but instead provides an error message. The error message is not helpful or accurate in the context of the question asked. Therefore, the response is not helpful and should be rated 0.", 76 | "vote": 0 77 | } 78 | 79 | **Example 3** 80 | question: 81 | how many orders in men's clothing did we have in Europe this month 82 | 83 | response: 84 | It seems that we've encountered a persistent syntax error while trying to retrieve the number of orders for men's clothing in Europe this month. Despite our efforts, we haven't been able to correct this issue. I apologize for the inconvenience. If there's anything else I can assist you with or if you have another request, please feel free to let me know., 85 | 86 | result: 87 | { 88 | "explanation": "The response is relevant to the question, as it attempts to provide the number of orders, but it fails due to a syntax error. The response acknowledges the issue and apologizes for the inconvenience, showing an attempt to be helpful. However, the response does not provide the requested information due to the error. Therefore, the response is not helpful and should be rated 0.", 89 | "vote": 0 90 | } 91 | 92 | **Here the actual conversation to be scored:** 93 | question: 94 | {{question}} 95 | 96 | response: 97 | {{response}} 98 | 99 | result: 100 | -------------------------------------------------------------------------------- /src/evaluate/azure_monitor/call_promptflow.kql: -------------------------------------------------------------------------------- 1 | AppDependencies 2 | | where Name == "call_promptflow" 3 | | extend inputs = parse_json(todynamic(tostring(Properties["inputs"]))) 4 | | extend question = inputs["question"], 5 | hash = hash(OperationId, 2) // select 1 in 2 traces 6 | | where hash==0 7 | | join kind = innerunique AppDependencies on $left.OperationId == $right.OperationId 8 | | where Name1 == "stream" 9 | | extend output = parse_json(todynamic(tostring(Properties1["output"]))) 10 | | extend response = output[array_length(output) - 1] 11 | | where response != "" 12 | | project question, response, trace_id = OperationId, span_id = Id, time_stamp = TimeGenerated 13 | | order by time_stamp asc -------------------------------------------------------------------------------- /src/evaluate/azure_monitor/sales_data_insights.kql: -------------------------------------------------------------------------------- 1 | AppDependencies 2 | | where Name == "SalesDataInsights" 3 | | extend inputs = parse_json(tostring(Properties.inputs)), 4 | output = parse_json(tostring(Properties.output)), 5 | hash = hash(OperationId, 2) // select 1 in 2 traces 6 | | where hash==0 7 | | project question = inputs.question, query = output.query, error = output.error, trace_id = OperationId, span_id = Id, time_stamp = TimeGenerated 8 | | order by time_stamp asc -------------------------------------------------------------------------------- /src/evaluate/eval_azure_monitor.py: -------------------------------------------------------------------------------- 1 | ## this script is responsible for evaluating the data from an Azure Monitor workspace. 2 | # reads last_timestamp from timestamp-file 3 | # executes KQL query to get the data from the Azure Monitor workspace for timestamp >= last_timestamp 4 | # note: the KQL query must return the fields trace_id, span_id, time_stamp 5 | # in addition to the fields that are required by the evaluator. 6 | # passes the data into evaluator to get the evaluation results. 7 | # writes the evaluattion results as events to app insights instance. 8 | # writes the last_timestamp back to the timestamp-file 9 | 10 | import asyncio 11 | import pathlib 12 | import os, json 13 | import pandas as pd 14 | from datetime import datetime, timezone, timedelta 15 | from time import time_ns 16 | from azure.monitor.query.aio import LogsQueryClient 17 | from azure.monitor.query import LogsQueryStatus 18 | from azure.identity import DefaultAzureCredential 19 | from azure.core.exceptions import HttpResponseError 20 | import logging 21 | from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration 22 | 23 | import opentelemetry 24 | from opentelemetry import _logs # _log is unfortunate hack that will eventually be resolved on OTel side with new Event API 25 | from opentelemetry.sdk.trace import TracerProvider 26 | from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter 27 | from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator 28 | from opentelemetry.trace.span import TraceFlags 29 | from opentelemetry.sdk._logs import LoggerProvider 30 | from opentelemetry.sdk._logs.export import SimpleLogRecordProcessor, ConsoleLogExporter 31 | from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter, AzureMonitorTraceExporter 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | async def execute_kql_query(log_analytics_workspace, kql_query, last_timestamp): 37 | credential = DefaultAzureCredential() 38 | client = LogsQueryClient(credential) 39 | 40 | end_time=datetime.now(timezone.utc) 41 | start_time=last_timestamp 42 | 43 | logger.info(f"Executing KQL query: {kql_query}") 44 | logger.info(f"Start time: {start_time}") 45 | logger.info(f"End time: {end_time}") 46 | 47 | try: 48 | response = await client.query_workspace( 49 | workspace_id=log_analytics_workspace, 50 | query=kql_query, 51 | timespan=(start_time, end_time) 52 | ) 53 | if response.status == LogsQueryStatus.PARTIAL: 54 | error = response.partial_error 55 | data = response.partial_data 56 | print(error) 57 | elif response.status == LogsQueryStatus.SUCCESS: 58 | data = response.tables 59 | for table in data: 60 | df = pd.DataFrame(data=table.rows, columns=table.columns) 61 | 62 | except HttpResponseError as err: 63 | print("something fatal happened") 64 | print(err) 65 | finally: 66 | await client.close() 67 | 68 | # make sure it has the required fields 69 | required_fields = ["trace_id", "span_id", "time_stamp"] 70 | for field in required_fields: 71 | if field not in df.columns: 72 | raise ValueError(f"Required field {field} not found in the dataframe") 73 | 74 | # sort dataframes by time_stamp 75 | df.sort_values(by="time_stamp", inplace=True) 76 | return df 77 | 78 | def configure_logging(connection_string): 79 | provider = LoggerProvider() 80 | _logs.set_logger_provider(provider) 81 | 82 | #logger_provider.add_log_record_processor(SimpleLogRecordProcessor(OTLPLogExporter())) 83 | provider.add_log_record_processor(SimpleLogRecordProcessor(ConsoleLogExporter())) 84 | provider.add_log_record_processor(SimpleLogRecordProcessor(AzureMonitorLogExporter(connection_string=connection_string))) 85 | 86 | def log_evaluation_event(name: str, scores: dict, meta_data: dict, message: str, dry_run=False) -> None: 87 | trace_id = int(meta_data["trace_id"], 16) 88 | span_id = int(meta_data["span_id"], 16) 89 | trace_flags = TraceFlags(TraceFlags.SAMPLED) 90 | 91 | attributes = {"event.name": f"gen_ai.evaluation.{name}"} 92 | for key, value in scores.items(): 93 | attributes[f"gen_ai.evaluation.{key}"] = value 94 | 95 | event = opentelemetry.sdk._logs.LogRecord( 96 | timestamp=time_ns(), 97 | observed_timestamp=time_ns(), 98 | trace_id=trace_id, 99 | span_id=span_id, 100 | trace_flags=trace_flags, 101 | severity_text=None, 102 | severity_number=_logs.SeverityNumber.UNSPECIFIED, 103 | body=message, 104 | attributes=attributes 105 | ) 106 | 107 | if dry_run: 108 | event_dict = json.loads(event.to_json()) 109 | print(json.dumps(event_dict, indent=2)) 110 | else: 111 | _logs.get_logger(__name__).emit(event) 112 | 113 | async def execute_batch(prompty, batch): 114 | input_fields = prompty._get_input_signature().keys() 115 | output_fields = prompty._get_output_signature().keys() 116 | 117 | coros = [] 118 | meta_data = [] 119 | for _, row in batch.iterrows(): 120 | inputs = {field: row[field] for field in input_fields} 121 | coros.append(prompty(**inputs)) 122 | meta_data.append(dict(time_stamp=row["time_stamp"], trace_id=row["trace_id"], span_id=row["span_id"])) 123 | 124 | results = await asyncio.gather(*coros) 125 | logger.info(f"Executed batch of {len(batch)} records") 126 | return results, meta_data 127 | 128 | def log_batch(name, results, meta_data, timestamp_file, dry_run=False): 129 | for result, meta in zip(results, meta_data): 130 | log_evaluation_event(name, result, meta, f"Evaluation results: {name}", dry_run=dry_run) 131 | 132 | # update the timestamp file 133 | last_timestamp = meta["time_stamp"] 134 | last_timestamp += timedelta(milliseconds=1) 135 | if not dry_run: 136 | with open(timestamp_file, "w") as f: 137 | f.write(last_timestamp.isoformat()) 138 | 139 | async def evaluate_data(df, evaluator_path, timestamp_file, dry_run=False): 140 | # load the evaluator 141 | model_config = AzureOpenAIModelConfiguration( 142 | azure_endpoint=os.getenv("OPENAI_API_BASE"), 143 | api_key=os.getenv("OPENAI_API_KEY"), 144 | api_version=os.getenv("OPENAI_API_VERSION"), 145 | azure_deployment=os.getenv("OPENAI_EVAL_MODEL") 146 | ) 147 | 148 | prompty = AsyncPrompty.load(source=evaluator_path, model={"configuration": model_config}) 149 | input_fields = prompty._get_input_signature().keys() 150 | 151 | for field in input_fields: 152 | if field not in df.columns: 153 | raise ValueError(f"Required field {field} not found in the dataframe") 154 | 155 | 156 | # evaluate by batches of 25 157 | batch_size = 25 158 | for i in range(0, len(df), batch_size): 159 | batch = df.iloc[i:i+batch_size] 160 | results, meta_data = await execute_batch(prompty, batch) 161 | log_batch(name=prompty._name, 162 | results=results, 163 | meta_data=meta_data, 164 | timestamp_file=timestamp_file, 165 | dry_run=dry_run) 166 | 167 | async def main(kql_file, timestamp_file, log_analytics_workspace, app_insights_connection_string, evaluator_path, dry_run=False): 168 | configure_logging(connection_string=app_insights_connection_string) 169 | 170 | last_timestamp = datetime(1970, 1, 1, tzinfo=timezone.utc) 171 | try: 172 | with open(timestamp_file, "r") as f: 173 | last_timestamp = datetime.fromisoformat(f.read()) 174 | 175 | except FileNotFoundError: 176 | pass 177 | 178 | with open(kql_file, "r") as f: 179 | kql_query = f.read() 180 | 181 | # Execute KQL query 182 | df = await execute_kql_query(log_analytics_workspace, kql_query, last_timestamp) 183 | 184 | logger.info(f"Query returned {len(df)} records.") 185 | 186 | # Evaluate the data and log the results 187 | await evaluate_data(df, evaluator_path, timestamp_file, dry_run=dry_run) 188 | 189 | 190 | if __name__ == "__main__": 191 | import argparse 192 | from dotenv import load_dotenv 193 | load_dotenv(override=True) 194 | 195 | # dial down the logs for azure monitor -- it is so chatty 196 | azmon_logger = logging.getLogger('azure') 197 | azmon_logger.setLevel(logging.WARNING) 198 | # configure logging to stdout 199 | logging.basicConfig(level=logging.INFO) 200 | 201 | parser = argparse.ArgumentParser(description="Evaluate Azure Monitor data") 202 | parser.add_argument("--kql-file", type=str, help="KQL query file. Default is sales_data_insights.kql") 203 | parser.add_argument("--timestamp-file", type=str, help="Timestamp file. Default is in_domain_evaluator_time_stamp.txt") 204 | parser.add_argument("--evaluator-path", type=str, help="Evaluator path. Currently only prompty is supported. Default is in_domain_evaluator.prompty") 205 | parser.add_argument("--dry-run", action="store_true", help="When set, the script will not write to App Insights. Default is False.") 206 | args = parser.parse_args() 207 | 208 | this_file = pathlib.Path(__file__).resolve() 209 | if not args.kql_file: 210 | args.kql_file = this_file.parent / "azure_monitor" / "sales_data_insights.kql" 211 | if not args.timestamp_file: 212 | args.timestamp_file = this_file.parent / "azure_monitor" / "in_domain_evaluator_time_stamp.txt" 213 | if not args.evaluator_path: 214 | args.evaluator_path = this_file.parent.parent / "custom_evaluators" / "in_domain_evaluator.prompty" 215 | 216 | if args.dry_run: 217 | print("\033[31m" + "Dry run mode is enabled. No data will be written to App Insights or time_stamp file." + "\033[0m") 218 | 219 | log_analytics_workspace = os.getenv("LOG_ANALYTICS_WORKSPACE_ID") 220 | app_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") 221 | 222 | print("Configuration:") 223 | print(f"KQL file: {args.kql_file}") 224 | print(f"Timestamp file: {args.timestamp_file}") 225 | print(f"Log Analytics Workspace: {log_analytics_workspace}") 226 | print(f"App Insights Key: {app_insights_connection_string}") 227 | 228 | asyncio.run(main(args.kql_file, args.timestamp_file, log_analytics_workspace, app_insights_connection_string, args.evaluator_path, args.dry_run)) 229 | -------------------------------------------------------------------------------- /src/evaluate/evaluate.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from dotenv import load_dotenv 3 | import os 4 | import pathlib 5 | import pandas as pd 6 | from pprint import pprint 7 | 8 | from promptflow.client import load_flow 9 | from sales_data_insights.main import SalesDataInsights 10 | 11 | load_dotenv(override=True) 12 | 13 | def extract_execution_time(execution_time: float): 14 | return {"seconds": execution_time} 15 | 16 | def error_to_number(error: str): 17 | # return 1 if error is not None 18 | numerical_error = 0 if not error or error == "None" else 1 19 | return {"error": numerical_error} 20 | 21 | def main(model="azure_openai", data="small"): 22 | # which test set to use 23 | if data == "small": 24 | data_set = "test_set_small.jsonl" 25 | data_file = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "generate_data", data_set) 26 | elif data == "large": 27 | data_set = "test_set_large.jsonl" 28 | data_file = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "generate_data", data_set) 29 | elif data == "mini": 30 | data_set = "test_set_mini.jsonl" 31 | data_file = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "generate_data", data_set) 32 | else: 33 | data_file = data 34 | 35 | prompty_path = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "custom_evaluators", "sql_similarity.prompty") 36 | 37 | # Initialize evaluators 38 | sql_similarity_evaluator = load_flow(prompty_path) 39 | execution_time_evaluator = extract_execution_time 40 | error_evaluator = error_to_number 41 | 42 | # Run evaluation 43 | with tempfile.TemporaryDirectory() as d: 44 | if model == "azure_openai": 45 | evaluation_name = f"SDI: {os.getenv('OPENAI_ANALYST_CHAT_MODEL')}, dataset: {data}" 46 | else: 47 | evaluation_name = f"SDI: {model}, dataset: {data}" 48 | 49 | print(f"Starting evaluation: {evaluation_name}") 50 | 51 | # You can get the same code with this link. https://aka.ms/2024-brk141​ 52 | 53 | from promptflow.evals.evaluate import evaluate 54 | from promptflow.evals.evaluators import ContentSafetyEvaluator 55 | 56 | response = evaluate( 57 | evaluation_name=evaluation_name, 58 | data=data_file, 59 | target=SalesDataInsights(model_type=model), 60 | evaluators={ 61 | # Check out promptflow-evals package for more built-in evaluators 62 | # like gpt-groundedness, gpt-similarity and content safety metrics. 63 | "content_safety": ContentSafetyEvaluator(project_scope={ 64 | "subscription_id": "15ae9cb6-95c1-483d-a0e3-b1a1a3b06324", 65 | "resource_group_name": "danielsc", 66 | "project_name": "build-demo-project" 67 | }), 68 | "execution_time": execution_time_evaluator, 69 | "error": error_evaluator, 70 | "sql_similarity": sql_similarity_evaluator, 71 | }, 72 | evaluator_config={ 73 | "sql_similarity": { 74 | "response": "${target.query}", 75 | "ground_truth": "${data.ground_truth_query}" 76 | }, 77 | "execution_time": { 78 | "execution_time": "${target.execution_time}" 79 | }, 80 | "error": { 81 | "error": "${target.error}" 82 | }, 83 | "content_safety": { 84 | "question": "${target.query}", 85 | "answer": "${target.data}" 86 | } 87 | } 88 | ) 89 | 90 | print("\n") 91 | pprint("-----Tabular Results-----") 92 | pprint(pd.DataFrame(response.get("rows"))) 93 | print("\n") 94 | pprint("-----Average of Scores-----") 95 | pprint(response.get("metrics")) 96 | print("\n") 97 | print("-----Studio URL-----") 98 | pprint(response["studio_url"]) 99 | 100 | import json 101 | with open("response.json", "w") as f: 102 | json.dump(response, f, indent=4) 103 | 104 | 105 | if __name__ == '__main__': 106 | # add argparse to load --model parameter which defaults to "azure_openai" 107 | # valid values are: ["azure_openai", "phi3_mini", "phi3_medium", "cohere_chat", "mistral_small", "mistral_large", "llama3"] 108 | # data parameter defaults to "small" and can be either "small", "large" or a path to a jsonl file 109 | import argparse 110 | parser = argparse.ArgumentParser() 111 | parser.add_argument("--model", help="Model to evaluate", default="azure_openai", choices=["azure_openai", "phi3_mini", "phi3_medium", "cohere_chat", "mistral_small", "mistral_large", "llama3"]) 112 | parser.add_argument("--data", help="Data to evaluate. Can be either 'mini', 'small', 'large', or a file name.", default="small") 113 | args = parser.parse_args() 114 | main(model=args.model, data=args.data) -------------------------------------------------------------------------------- /src/evaluate/sequence.sh: -------------------------------------------------------------------------------- 1 | python evaluate.py --data large --model llama3 2 | python evaluate.py --data large --model phi3_mini 3 | python evaluate.py --data large --model phi3_medium 4 | python evaluate.py --data large --model cohere_chat 5 | python evaluate.py --data large --model mistral_small 6 | python evaluate.py --data large --model mistral_large 7 | python evaluate.py --data large --model azure_openai 8 | -------------------------------------------------------------------------------- /src/evaluate/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 49, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "with open('response.json') as f:\n", 11 | " data = json.load(f)\n", 12 | " " 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 50, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "outputs.data\n", 25 | "outputs.error\n", 26 | "outputs.query\n", 27 | "outputs.execution_time\n", 28 | "inputs.question\n", 29 | "inputs.ground_truth_query\n", 30 | "outputs.execution_time.seconds\n", 31 | "outputs.error.error\n", 32 | "outputs.sql_similarity.score\n", 33 | "outputs.sql_similarity.explanation\n", 34 | "line_number\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "import pandas as pd\n", 40 | "pd.set_option('display.max_colwidth', None)\n", 41 | "df = pd.DataFrame(data[\"rows\"])\n", 42 | "for field in df:\n", 43 | " print(field)\n", 44 | " \n", 45 | "df = df[[\"outputs.query\", \"inputs.question\", \"inputs.ground_truth_query\", \"outputs.sql_similarity.score\", \"outputs.sql_similarity.explanation\"]]\n", 46 | "\n", 47 | "df[\"better_query\"] = None\n", 48 | "df[\"better_query\"] = df[\"better_query\"].astype(str)\n", 49 | "\n", 50 | "import os\n", 51 | "def save_df(df):\n", 52 | " index = 1\n", 53 | " while True:\n", 54 | " file_name = f\"response_with_better_query_{str(index).zfill(3)}.jsonl\"\n", 55 | " if not os.path.exists(file_name):\n", 56 | " break\n", 57 | " index += 1\n", 58 | " # save the dataframe to a jsonl file\n", 59 | " df.to_json(file_name, orient=\"records\", lines=True)\n", 60 | " print(f\"saved to {file_name}\")" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 52, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "application/vnd.jupyter.widget-view+json": { 71 | "model_id": "7ed68f534c1947cc8a2558028e9203a0", 72 | "version_major": 2, 73 | "version_minor": 0 74 | }, 75 | "text/plain": [ 76 | "HBox(children=(Button(description='Previous', style=ButtonStyle()), Button(description='Next', style=ButtonSty…" 77 | ] 78 | }, 79 | "metadata": {}, 80 | "output_type": "display_data" 81 | }, 82 | { 83 | "data": { 84 | "application/vnd.jupyter.widget-view+json": { 85 | "model_id": "af749838314c441487b215886e774da6", 86 | "version_major": 2, 87 | "version_minor": 0 88 | }, 89 | "text/plain": [ 90 | "HTML(value='')" 91 | ] 92 | }, 93 | "metadata": {}, 94 | "output_type": "display_data" 95 | }, 96 | { 97 | "data": { 98 | "application/vnd.jupyter.widget-view+json": { 99 | "model_id": "1314212e07ca4f00ae61e5928bc47d26", 100 | "version_major": 2, 101 | "version_minor": 0 102 | }, 103 | "text/plain": [ 104 | "HTML(value='')" 105 | ] 106 | }, 107 | "metadata": {}, 108 | "output_type": "display_data" 109 | }, 110 | { 111 | "data": { 112 | "application/vnd.jupyter.widget-view+json": { 113 | "model_id": "a4bc6c0a3d8548e0899beffddc287a81", 114 | "version_major": 2, 115 | "version_minor": 0 116 | }, 117 | "text/plain": [ 118 | "HTML(value='')" 119 | ] 120 | }, 121 | "metadata": {}, 122 | "output_type": "display_data" 123 | }, 124 | { 125 | "data": { 126 | "application/vnd.jupyter.widget-view+json": { 127 | "model_id": "ba2a1247e8ad4e93ac5eb0410c316750", 128 | "version_major": 2, 129 | "version_minor": 0 130 | }, 131 | "text/plain": [ 132 | "HTML(value='')" 133 | ] 134 | }, 135 | "metadata": {}, 136 | "output_type": "display_data" 137 | }, 138 | { 139 | "data": { 140 | "application/vnd.jupyter.widget-view+json": { 141 | "model_id": "afa9f2b2bb4946bbad1d818cc224e4b7", 142 | "version_major": 2, 143 | "version_minor": 0 144 | }, 145 | "text/plain": [ 146 | "HTML(value='')" 147 | ] 148 | }, 149 | "metadata": {}, 150 | "output_type": "display_data" 151 | }, 152 | { 153 | "data": { 154 | "application/vnd.jupyter.widget-view+json": { 155 | "model_id": "efd32b0dd96b4a6f9aed0eee89dd4c0f", 156 | "version_major": 2, 157 | "version_minor": 0 158 | }, 159 | "text/plain": [ 160 | "RadioButtons(description='Better:', options=('outputs.query', 'inputs.ground_truth_query', 'None'), value='out…" 161 | ] 162 | }, 163 | "metadata": {}, 164 | "output_type": "display_data" 165 | }, 166 | { 167 | "data": { 168 | "application/vnd.jupyter.widget-view+json": { 169 | "model_id": "65c77fde8ac64dbf9186c05939827665", 170 | "version_major": 2, 171 | "version_minor": 0 172 | }, 173 | "text/plain": [ 174 | "Button(description='Save', style=ButtonStyle())" 175 | ] 176 | }, 177 | "metadata": {}, 178 | "output_type": "display_data" 179 | }, 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "saved to response_with_better_query_006.jsonl\n", 185 | "saved to response_with_better_query_007.jsonl\n", 186 | "saved to response_with_better_query_008.jsonl\n", 187 | "saved to response_with_better_query_009.jsonl\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "import ipywidgets as widgets\n", 193 | "from IPython.display import display, HTML\n", 194 | "import pandas as pd\n", 195 | "\n", 196 | "# Create widgets\n", 197 | "record_number = widgets.HTML()\n", 198 | "prev_button = widgets.Button(description=\"Previous\")\n", 199 | "next_button = widgets.Button(description=\"Next\")\n", 200 | "user_question = widgets.HTML()\n", 201 | "output_query = widgets.HTML()\n", 202 | "ground_truth_query = widgets.HTML()\n", 203 | "sql_similarity_score = widgets.HTML()\n", 204 | "sql_similarity_explanation = widgets.HTML()\n", 205 | "user_input = widgets.RadioButtons(options=['outputs.query', 'inputs.ground_truth_query', 'None'], description='Better:')\n", 206 | "save_button = widgets.Button(description=\"Save\")\n", 207 | "\n", 208 | "# Display widgets\n", 209 | "display(widgets.HBox([prev_button, next_button, record_number]))\n", 210 | "display(user_question, output_query, ground_truth_query, sql_similarity_score, sql_similarity_explanation, user_input, save_button)\n", 211 | "\n", 212 | "# Initialize index\n", 213 | "index = 0\n", 214 | "\n", 215 | "# Function to update widgets\n", 216 | "def update_widgets(index):\n", 217 | " user_question.value = f\"
{df.loc[index, 'inputs.question']}
\"\n", 218 | " output_query.value = f\"
{df.loc[index, 'outputs.query']}
\"\n", 219 | " ground_truth_query.value = f\"
{df.loc[index, 'inputs.ground_truth_query']}
\"\n", 220 | " sql_similarity_score.value = f\"
{df.loc[index, 'outputs.sql_similarity.score']}
\"\n", 221 | " sql_similarity_explanation.value = f\"
{df.loc[index, 'outputs.sql_similarity.explanation']}
\"\n", 222 | " user_input.value = df.loc[index, 'better_query'] if 'better_query' in df.columns and pd.notna(df.loc[index, 'better_query']) else None\n", 223 | " record_number.value = f\"Record: {index+1}/{len(df)}\"\n", 224 | "\n", 225 | "# Function to handle button clicks\n", 226 | "def on_next_button_clicked(b):\n", 227 | " global index\n", 228 | " df.loc[index, 'better_query'] = user_input.value\n", 229 | " index = min(index + 1, len(df) - 1)\n", 230 | " update_widgets(index)\n", 231 | "\n", 232 | "def on_prev_button_clicked(b):\n", 233 | " global index\n", 234 | " df.loc[index, 'better_query'] = user_input.value\n", 235 | " index = max(index - 1, 0)\n", 236 | " update_widgets(index)\n", 237 | "\n", 238 | "def on_save_button_clicked(b):\n", 239 | " save_df(df)\n", 240 | "\n", 241 | "# Attach the function to the button\n", 242 | "next_button.on_click(on_next_button_clicked)\n", 243 | "prev_button.on_click(on_prev_button_clicked)\n", 244 | "save_button.on_click(on_save_button_clicked)\n", 245 | "\n", 246 | "# Initialize widgets\n", 247 | "update_widgets(index)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 53, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "#show rows of df where better_query is not 'outputs.query'\n", 257 | "df[df[\"better_query\"] != \"outputs.query\"]\n", 258 | "# set fiel ground_truth_query to whaterver is better_query says (data.ground_truth_query or outputs.query)\n", 259 | "df[\"ground_truth_query\"] = None\n", 260 | "# set question to data.question\n", 261 | "df[\"question\"] = df[\"inputs.question\"]\n", 262 | "for index, row in df.iterrows():\n", 263 | " if row[\"better_query\"] == \"outputs.query\":\n", 264 | " df.at[index, \"ground_truth_query\"] = row[\"outputs.query\"]\n", 265 | " else:\n", 266 | " df.at[index, \"ground_truth_query\"] = row[\"inputs.ground_truth_query\"]\n", 267 | "\n", 268 | "\n" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 55, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "saved to response_with_better_query_001.jsonl\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "save_df(df[[\"question\", \"ground_truth_query\"]])" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": "assistant-eval2", 299 | "language": "python", 300 | "name": "python3" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 3 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython3", 312 | "version": "3.11.0" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 2 317 | } 318 | -------------------------------------------------------------------------------- /src/evaluation_readme.md: -------------------------------------------------------------------------------- 1 | # Evaluation 2 | 3 | ## Evaluators 4 | 5 | Evaluators can be found [here](./custom_evaluators/) 6 | 7 | | Evaluator | Description | 8 | |----------|----------| 9 | | [SQLSimilarityEvaluator](./custom_evaluators/sql_similarity/) | Compares two SQL queries for similarity using LLM | 10 | | [CompareEvaluator](./custom_evaluators/compare.py) | Compares two SQL queries to be strictly the same | 11 | 12 | ## Application to be Evaluated 13 | Application to be evaluated is [Sales Data Insight](./sales_data_insights/) 14 | 15 | ## How to Evaluate ? 16 | 17 | To evaluate run the following command: 18 | 19 | ```bash 20 | python src/evaluation.py 21 | ``` 22 | This should output results in tabular form, average scores and `AI Studio URL` where evaluation results can be seen easily seen for comparison 23 | 24 | ```log 25 | '-----Tabular Results-----' 26 | outputs.data outputs.error ... outputs.execution_time.seconds line_number 27 | 0 [{'Day': 1, 'Total_Orders': 35}, {'Day': 2, 'T... None ... 3.06 0 28 | 1 [{'product_type': 'JACKETS & VESTS'}, {'produc... None ... 1.10 1 29 | 2 [{'Total_Revenue': 2517.6189987606}] None ... 2.52 2 30 | 3 (Failed) Execution failed on sql 'SELECT SUM(Sum_of_Ord... ... 3.40 3 31 | 4 [{'Day': 1, 'sub_category': 'MEN'S FOOTWEAR', ... None ... 2.87 4 32 | 33 | [5 rows x 11 columns] 34 | 35 | '-----Average of Scores-----' 36 | {'compare.score': 0.0, 37 | 'execution_time.seconds': 2.59, 38 | 'sql_similarity.score': 3.8} 39 | 40 | -----Studio URL----- 41 | 'https://ai.azure.com/build/evaluation/assistant_pf_demo_variant_0_20240509_143418_546676?wsid=/subscriptions/e0fd569c-e34a-4249-8c24-e8d723c7f054/resourceGroups/rg-qunsongai/providers/Microsoft.MachineLearningServices/workspaces/qunsong-0951' 42 | ``` 43 | 44 | Clicking on Studio URL will take you to `AI Studio` where comparison can be done easily 45 | 46 | -------------------------------------------------------------------------------- /src/finetune/finetune.py: -------------------------------------------------------------------------------- 1 | from sales_data_insights.system_message import system_message 2 | import pandas as pd 3 | import os, pathlib, time, json 4 | from openai import AzureOpenAI 5 | from azure.identity import DefaultAzureCredential 6 | import json 7 | import os 8 | import requests 9 | 10 | def create_datasets(data_set, test_size=100, validation_size=40): 11 | # Create a dataset from the training set 12 | # training_set is a list of tuples (question, answer) 13 | # traing set looks like this: 14 | # { 15 | # "custom_id":"task-237", 16 | # "question":"How many orders were placed on holidays last month?", 17 | # "ground_truth_query":"Error: Holiday data is not available in the table" 18 | # } 19 | data_set_df = pd.read_json(data_set, lines=True) 20 | formatted_df = [] 21 | for i, row in data_set_df.iterrows(): 22 | # {"messages": 23 | # [ 24 | # {"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, 25 | # {"role": "user", "content": "What's the capital of France?"}, 26 | # {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."} 27 | # ] 28 | # } 29 | 30 | formatted_df.append( 31 | { 32 | "messages": [ 33 | {"role": "system", "content": system_message}, 34 | {"role": "user", "content": row["question"]}, 35 | {"role": "assistant", "content": row["ground_truth_query"]} 36 | ] 37 | } 38 | ) 39 | finetune_df = pd.DataFrame(formatted_df) 40 | finetune_df = finetune_df.sample(test_size + validation_size, random_state=42) 41 | finetune_df.reset_index(drop=True, inplace=True) 42 | validation_set = finetune_df.loc[:validation_size-1][["messages"]] 43 | training_set = finetune_df.loc[validation_size:][["messages"]] 44 | training_set.to_json("training_set.jsonl", orient="records", lines=True) 45 | validation_set.to_json("validation_set.jsonl", orient="records", lines=True) 46 | return ("training_set.jsonl", "validation_set.jsonl") 47 | 48 | def wait_for_file(client, file_id): 49 | while True: 50 | f = client.files.retrieve(file_id) 51 | current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 52 | print(current_time, file_id, "file status:", f.status) 53 | if f.status.lower() == "processed": 54 | print("file is processed") 55 | break 56 | time.sleep(2) 57 | 58 | def submit(client, model, data_set, test_set, train_rows, validation_rows): 59 | 60 | training_file_name, validation_file_name = create_datasets(data_set=data_set, test_size=train_rows, validation_size=validation_rows) 61 | 62 | # Upload the training and validation dataset files to Azure OpenAI with the SDK. 63 | training_response = client.files.create( 64 | file=open(training_file_name, "rb"), purpose="fine-tune" 65 | ) 66 | training_file_id = training_response.id 67 | 68 | validation_response = client.files.create( 69 | file=open(validation_file_name, "rb"), purpose="fine-tune" 70 | ) 71 | validation_file_id = validation_response.id 72 | 73 | print("Training file ID:", training_file_id) 74 | wait_for_file(client, training_file_id) 75 | print("Validation file ID:", validation_file_id) 76 | wait_for_file(client, validation_file_id) 77 | 78 | # extract the file name from the data_set path 79 | data_set_name = os.path.basename(data_set) 80 | # replace . with - in the dataset name 81 | data_set_name = data_set_name.replace(".", "-") 82 | 83 | response = client.fine_tuning.jobs.create( 84 | training_file=training_file_id, 85 | validation_file=validation_file_id, 86 | model=model, # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters. 87 | seed = 42, # seed parameter controls reproducibility of the fine-tuning job. If no seed is specified one will be generated automatically. 88 | suffix=f"{data_set_name}-{train_rows}-{validation_rows}" 89 | ) 90 | 91 | job_id = response.id 92 | 93 | # You can use the job ID to monitor the status of the fine-tuning job. 94 | # The fine-tuning job will take some time to start and complete. 95 | 96 | print("Job ID:", response.id) 97 | print("Status:", response.id) 98 | print(response.model_dump_json(indent=2)) 99 | 100 | def monitor_job(client, job_id): 101 | job = client.fine_tuning.jobs.retrieve(job_id) 102 | current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 103 | print(current_time, "Job status:", job.status) 104 | printed = 0 105 | while True: 106 | job = client.fine_tuning.jobs.retrieve(job_id) 107 | current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 108 | print(current_time, "Job status:", job.status) 109 | 110 | response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id) 111 | events = response.data.copy() 112 | events.reverse() 113 | events = events[printed:] 114 | printed += len(events) 115 | for event in events: 116 | print(event) 117 | 118 | if job.status == "succeeded": 119 | print("Job completed") 120 | print(job) 121 | return job.fine_tuned_model 122 | 123 | time.sleep(10) 124 | 125 | def deploy(fine_tuned_model): 126 | 127 | 128 | credential = DefaultAzureCredential() 129 | 130 | subscription = os.getenv("FT_SUBSCRIPTION") 131 | resource_group = os.getenv("FT_RESOURCE_GROUP") 132 | resource_name = os.getenv("FT_RESOURCE_NAME") 133 | # resource_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}" 134 | token = credential.get_token(f"https://management.azure.com/.default").token 135 | model_deployment_name = fine_tuned_model # deployment name should be the same as the fine-tuned model name 136 | 137 | # clean up the model name 138 | # Resource name can only include alphanumeric characters, underscores and hyphens; Resource name only allows 2 to 64 characters.' 139 | fine_tuned_model = fine_tuned_model.replace(".", "-")[0:64] 140 | 141 | deploy_params = {'api-version': "2023-05-01"} 142 | deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'} 143 | 144 | deploy_data_obj = { 145 | "sku": {"name": "standard", "capacity": 1}, 146 | "properties": { 147 | "model": { 148 | "format": "OpenAI", 149 | "name": fine_tuned_model, #retrieve this value from the previous call, it will look like gpt-35-turbo-0613.ft-b044a9d3cf9c4228b5d393567f693b83 150 | "version": "1" 151 | } 152 | } 153 | } 154 | 155 | deploy_data = json.dumps(deploy_data_obj) 156 | 157 | request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}' 158 | 159 | print(f'Creating deployment: {model_deployment_name}') 160 | print(json.dumps(deploy_data_obj, indent=2)) 161 | 162 | r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data) 163 | 164 | print(r) 165 | print(r.reason) 166 | print(r.json()) 167 | 168 | 169 | def main(model, data_set, test_set, train_rows, validation_rows, monitor): 170 | client = AzureOpenAI( 171 | azure_endpoint = os.getenv("FT_OPENAI_API_BASE"), 172 | api_key=os.getenv("FT_OPENAI_API_KEY"), 173 | api_version="2024-05-01-preview" # This API version or later is required to access seed/events/checkpoint capabilities 174 | ) 175 | 176 | if not monitor: 177 | job_id = submit(client, model, data_set, test_set, train_rows, validation_rows) 178 | else: 179 | job_id = monitor 180 | 181 | fine_tuned_model = monitor_job(client, job_id) 182 | 183 | print("Fine-tuned model:", fine_tuned_model) 184 | 185 | # Deploy the fine-tuned model 186 | deploy(fine_tuned_model) 187 | 188 | 189 | 190 | if __name__ == '__main__': 191 | import argparse 192 | test_set = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "generate_data", "test_set_small.jsonl") 193 | data_set = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "generate_data", "train_set_xxl.jsonl") 194 | 195 | parser = argparse.ArgumentParser() 196 | parser.add_argument("--model", help="Model to finetune", default="gpt-35-turbo-1106") 197 | parser.add_argument("--data_set", help="The data set to use", default=data_set) 198 | parser.add_argument("--test_set", help="The test set to use", default=test_set) 199 | parser.add_argument("--train_rows", help="Number of rows to finetune on", default=100) 200 | parser.add_argument("--validation_rows", help="Number of rows to finetune on", default=100) 201 | parser.add_argument("--monitor", help="Don't start, just monitor the job") 202 | parser.add_argument("--deploy", help="Don't, just deploy the model and test it") 203 | args = parser.parse_args() 204 | main(args.model, args.data_set, args.test_set, args.train_rows, args.validation_rows, args.monitor) -------------------------------------------------------------------------------- /src/generate_data/batch_generate_sql.py: -------------------------------------------------------------------------------- 1 | from sales_data_insights.system_message import system_message 2 | from dotenv import load_dotenv 3 | from openai import AzureOpenAI 4 | import pandas as pd 5 | import os, json, time 6 | from sales_data_insights.system_message import system_message 7 | import tiktoken 8 | 9 | load_dotenv(override=True) 10 | 11 | 12 | def upload_input_file(file_client, batch_input): 13 | # upload to aoai using file client 14 | print("uploading batch input to Azure OpenAI") 15 | r = file_client.files.create( 16 | file=open(batch_input, "rb"), 17 | purpose="batch", 18 | ) 19 | file_id = r.id 20 | print("uploaded file id", file_id) 21 | 22 | print("waiting for file to be processed") 23 | while True: 24 | f = file_client.files.retrieve(file_id) 25 | if f.status.lower() == "processed": 26 | print("file is processed") 27 | break 28 | current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 29 | print(current_time, "file status:", f.status) 30 | time.sleep(2) 31 | 32 | return file_id 33 | 34 | def submit_batch_job(batch_client, file_id): 35 | print("submitting batch job") 36 | b = batch_client.batches.create( 37 | input_file_id=file_id, 38 | endpoint="/v1/chat/completions", 39 | completion_window="24h", 40 | ) 41 | print("submitted batch job with id", b.id) 42 | return b.id 43 | 44 | def monitor_and_download(batch_client, file_client, batch_id, batch_output): 45 | print("monitoring batch job", batch_id) 46 | while True: 47 | b = batch_client.batches.retrieve(batch_id) 48 | if b.status.lower() == "completed": 49 | print("batch job completed") 50 | break 51 | current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 52 | print(current_time, "batch job status:", b.status) 53 | time.sleep(10) 54 | 55 | 56 | print("downloading batch output") 57 | content = file_client.files.content(b.output_file_id) 58 | lines = content.content.decode("utf-8").strip().split("\n") 59 | print("writing batch output to", batch_output) 60 | with open(batch_output, "w") as f: 61 | for line in lines: 62 | f.write(line + "\n") 63 | 64 | def merge_output_write_result(questions, batch_output): 65 | # determine the output file name from questions 66 | base = os.path.splitext(batch_output)[0] 67 | output_jsonl = f"{base}_merged.jsonl" 68 | 69 | print("merging batch input and output") 70 | # load both with pandas 71 | df_input = pd.read_csv(questions) 72 | df_output = pd.read_json(batch_output, lines=True) 73 | 74 | # add "custom_id": f"task-{i}" to df_input 75 | df_input["custom_id"] = df_input.index.map(lambda x: f"task-{x}") 76 | 77 | # merge on custom_id 78 | df = pd.merge(df_input, df_output, on="custom_id") 79 | 80 | # extract the response from the output 81 | df["ground_truth_query"] = df["response"].map(lambda x: x["body"]["choices"][0]["message"]["content"]) 82 | 83 | 84 | # sum up the df["usage"]["total_tokens"], df["usage"]["completion_tokens"], df["usage"]["prompt_tokens"] 85 | total_tokens = df["response"].map(lambda x: x["body"]["usage"]["total_tokens"]).sum() 86 | completion_tokens = df["response"].map(lambda x: x["body"]["usage"]["completion_tokens"]).sum() 87 | prompt_tokens = df["response"].map(lambda x: x["body"]["usage"]["prompt_tokens"]).sum() 88 | 89 | # price for gpt-4-turbo is $0.01 per 1000 prompt tokens and $0.03 per 1000 completion tokens 90 | # batch costs 50% less than single requests 91 | print("total rows:", len(df)) 92 | print("completion tokens:", completion_tokens) 93 | print("prompt tokens:", prompt_tokens) 94 | print("total tokens:", total_tokens) 95 | # make sure the numbers are aligned to the right with 2 decimal places 96 | print("\nCost breakdown \n(assuming $0.005/$0.015 per 1000 prompt/completion tokens):") 97 | print("-------------------------------------") 98 | print(f"price for completion tokens: $ {completion_tokens * 0.03/2000:>6.2f}") 99 | print(f"price for prompt tokens: $ {prompt_tokens * 0.01/2000:>6.2f}") 100 | print(f"total price: $ {(completion_tokens * 0.03/2000 + prompt_tokens * 0.01/2000):>6.2f}") 101 | 102 | 103 | # if "ground_truth_query" starts with "```sql" and ends with "```", remove them 104 | df["ground_truth_query"] = df["ground_truth_query"].map(lambda x: x[6:-3].strip() if x.startswith("```sql") and x.endswith("```") else x) 105 | 106 | # write to jsonl 107 | print("\nwriting result to", output_jsonl) 108 | df[[ "custom_id","question", "ground_truth_query"]].to_json(output_jsonl, orient="records", lines=True) 109 | return output_jsonl 110 | 111 | def count_tokens(content): 112 | model = "gpt-4" 113 | encoding = tiktoken.encoding_for_model(model) 114 | encoded = encoding.encode(content) 115 | return len(encoded) 116 | 117 | def create_batches(questions, batch_tokens=2400000): 118 | # determine the output file name from questions 119 | base = os.path.splitext(questions)[0] 120 | 121 | # create a batch input file for each batch_tokens 122 | df = pd.read_csv(questions) 123 | batches = [] 124 | batch = [] 125 | prompt_tokens = 0 126 | 127 | for i, row in df.iterrows(): 128 | """ 129 | Create a row with full request data as required by the batch API 130 | example: 131 | { 132 | "custom_id": "task-0", # from iterating over the rows 133 | "method": "POST", 134 | "url": "/v1/chat/completions", 135 | "body": { 136 | "model": "gpt-4-1106-preview", 137 | "messages": [ 138 | {"role": "system", "content": "You are a helpful assistant."}, 139 | {"role": "user", "content": "List and describe the top five most influential sci-fi movies of the 21st century and how they've impacted pop culture."} 140 | ] 141 | } 142 | } 143 | """ 144 | question = row["question"] 145 | messages = [{"role": "system", "content": system_message}] 146 | messages.append({"role": "user", "content": f"{question}\nGive only the query in SQL format"}) 147 | batch.append({ 148 | "custom_id": f"task-{i}", 149 | "method": "POST", 150 | "url": "/v1/chat/completions", 151 | "body": { 152 | "model": os.environ["OPENAI_BATCH_MODEL"], 153 | "messages": messages 154 | } 155 | }) 156 | # not exactly accurate, but close enough 157 | prompt_tokens += count_tokens(str(messages)) 158 | 159 | if prompt_tokens > batch_tokens: 160 | batches.append(batch) 161 | print(f"batch {len(batches)} has {prompt_tokens} tokens and {len(batch)} questions") 162 | batch = [] 163 | prompt_tokens = 0 164 | 165 | if batch: 166 | batches.append(batch) 167 | print(f"batch {len(batches)} has {prompt_tokens} tokens and {len(batch)} questions") 168 | 169 | # save the batches to disk 170 | batch_input_files = [] 171 | for i, batch in enumerate(batches): 172 | batch_input = f"{base}_batch_{i}.jsonl" 173 | with open(batch_input, "w") as f: 174 | for line in batch: 175 | f.write(json.dumps(line) + "\n") 176 | batch_input_files.append(batch_input) 177 | print("wrote batch input to", batch_input) 178 | 179 | return batch_input_files 180 | 181 | 182 | def main(questions, file_id, batch_id): 183 | 184 | 185 | file_client = AzureOpenAI( 186 | api_key=os.environ["OPENAI_BATCH_API_KEY"], 187 | api_version=os.environ["OPENAI_BATCH_API_VERSION"], 188 | azure_endpoint=os.environ["OPENAI_BATCH_BASE"] 189 | ) 190 | 191 | batch_client = AzureOpenAI( 192 | api_key=os.environ["OPENAI_BATCH_API_KEY"], 193 | api_version=os.environ["OPENAI_BATCH_API_VERSION"], 194 | azure_endpoint=os.environ["OPENAI_BATCH_BASE"], 195 | azure_deployment=os.environ["OPENAI_BATCH_MODEL"] 196 | ) 197 | 198 | batches = create_batches(questions) 199 | merged_outputs = [] 200 | for batch_input in batches: 201 | base = os.path.splitext(batch_input)[0] 202 | batch_output = f"{base}_output.jsonl" 203 | 204 | file_id = upload_input_file(file_client=file_client, 205 | batch_input=batch_input) 206 | 207 | batch_id = submit_batch_job(batch_client, file_id) 208 | 209 | monitor_and_download(batch_client, file_client, batch_id, batch_output) 210 | 211 | merged_output = merge_output_write_result(questions, batch_output) 212 | merged_outputs.append(merged_output) 213 | 214 | # copy the batch outputs to a single file 215 | base = os.path.splitext(questions)[0] 216 | final_file = f"{base}.jsonl" 217 | with open(final_file, "w") as f: 218 | for output in merged_outputs: 219 | with open(output, "r") as g: 220 | for line in g: 221 | f.write(line) 222 | 223 | print("wrote final output to", final_file) 224 | 225 | 226 | 227 | if __name__ == "__main__": 228 | # we need those only for local testing 229 | import argparse 230 | 231 | parser = argparse.ArgumentParser() 232 | parser.add_argument("--questions", help="the csv file containing the questions", default="src/generate_data/test_set_small.csv") 233 | parser.add_argument("--file_id", help="the file id of the batch input file -- if present, will skip creating the input file and use this file id instead") 234 | # batch_5064389a-782c-4a38-a990-997bdd4784a2 235 | parser.add_argument("--batch_id", help="the batch id of the batch input file -- if present, will go straight to monitoring and downloading the output file") 236 | 237 | args = parser.parse_args() 238 | 239 | main(args.questions, args.file_id, args.batch_id) -------------------------------------------------------------------------------- /src/generate_data/generate.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def generate_order_data(num_rows, boost): 5 | # Generate 'Number_of_Orders' first to use as a base for constraints 6 | number_of_orders = np.int32(np.random.randint(0, 10, num_rows) * boost/10) 7 | number_of_orders = np.maximum(number_of_orders, 1) 8 | # for the rows that have number_of_orders == 1, introduce a random chance to increase the number of orders 9 | for i in range(num_rows): 10 | if number_of_orders[i] == 1: 11 | if np.random.random() < 0.1: 12 | number_of_orders[i] = 2 13 | # Define averages for scalability 14 | average_order_value = 30 * 1/boost # Average order value per order 15 | items_per_order = 3.5 # Average number of items per order 16 | shipping_cost_per_order = 7.5 # Average shipping cost per order 17 | time_per_order = 1.5 # Average fulfillment time per order 18 | 19 | # Generate other columns with normal distribution based on 'Number_of_Orders' 20 | data = { 21 | "Number_of_Orders": number_of_orders, 22 | "Sum_of_Order_Value_USD": np.abs(np.random.normal(average_order_value, 5, num_rows)) * number_of_orders, 23 | "Sum_of_Number_of_Items": np.abs(np.floor(np.random.normal(items_per_order, 3, num_rows))) * number_of_orders, 24 | "Number_of_Orders_with_Discount": np.random.randint(0, number_of_orders + 1), 25 | "Sum_of_Discount_Percentage": np.random.uniform(0.1, 1, num_rows) * 100, # Constant range for percentage 26 | "Sum_of_Shipping_Cost_USD": np.abs(np.random.normal(shipping_cost_per_order, 2, num_rows)) * number_of_orders, 27 | "Number_of_Orders_Returned": np.random.randint(0, number_of_orders + 1), 28 | "Number_of_Orders_Cancelled": np.random.randint(0, number_of_orders + 1), 29 | "Sum_of_Time_to_Fulfillment": np.random.normal(time_per_order, 0.5, num_rows) * number_of_orders, 30 | "Number_of_Orders_Repeat_Customers": np.random.randint(0, number_of_orders + 1) 31 | } 32 | 33 | return pd.DataFrame(data) 34 | 35 | def save_to_csv(df, filename="data/order_data.csv"): 36 | # Save the DataFrame to a CSV file 37 | df.to_csv(filename, index=False) 38 | print(f"Data saved to {filename}") 39 | 40 | def save_to_sql(df, filename="data/order_data.db"): 41 | import sqlite3 42 | conn = sqlite3.connect(filename) 43 | df.to_sql("order_data", conn, if_exists="replace", index=False) 44 | conn.close() 45 | print(f"Data saved to {filename}") 46 | 47 | # read in product categories 48 | import os 49 | current_dir = os.path.dirname(os.path.realpath(__file__)) 50 | product_categories = pd.read_csv(f"{current_dir}/product_categories.csv") 51 | num_categories = len(product_categories) 52 | 53 | regions = ["North America", "Europe", "Asia-Pacific", "Africa", "Middle East", "South America"] 54 | regions = [region.upper() for region in regions] 55 | 56 | num_regions = len(regions) 57 | 58 | start_date = pd.to_datetime('2023-01-01') 59 | end_date = pd.to_datetime('2024-05-21') 60 | # end_date = pd.to_datetime('2023-01-03') 61 | num_days = (end_date - start_date).days 62 | total_data = pd.DataFrame() 63 | 64 | for region_index in range(num_regions): 65 | # generate order data for each day 66 | for i in range(num_days): 67 | order_day = start_date + pd.DateOffset(days=i) 68 | boost1 = (10 + order_day.dayofweek + 10 * (i/num_days)) / 10 69 | boost2 = (0.2 * np.sin(i/90 * 2 * np.pi) + 1) 70 | boost3 = (10-region_index) / 10 71 | boost = boost1 * boost2 * boost3 72 | #print(f"boost: {boost:.2f}, {boost1:.2f}, {boost2:.2f}, {boost3:.2f} on {order_day.dayofweek}, i={i}, region {regions[region_index]}") 73 | order_data = generate_order_data(num_categories, boost) 74 | order_data['Year'] = order_day.year 75 | order_data['Month'] = order_day.month 76 | order_data['Day'] = order_day.day 77 | order_data['Date'] = order_day 78 | order_data['Day_of_Week'] = order_day.dayofweek 79 | 80 | # bring product categories and order data together 81 | # in the end we will have a table with product categories and order data 82 | for col in product_categories.columns: 83 | order_data[col] = product_categories[col] 84 | order_data['Region'] = regions[region_index] 85 | 86 | total_data = pd.concat([total_data, order_data], ignore_index=True) 87 | 88 | save_to_sql(total_data, filename=f"{current_dir}/../assistant_flow/sales_data_insights/data/order_data.db") 89 | -------------------------------------------------------------------------------- /src/generate_data/product_categories.csv: -------------------------------------------------------------------------------- 1 | main_category,sub_category,product_type 2 | APPAREL,MEN'S CLOTHING,JACKETS & VESTS 3 | APPAREL,MEN'S CLOTHING,SHIRTS 4 | APPAREL,MEN'S CLOTHING,PANTS & SHORTS 5 | APPAREL,MEN'S CLOTHING,UNDERWEAR & BASE LAYERS 6 | APPAREL,MEN'S CLOTHING,OTHER 7 | APPAREL,WOMEN'S CLOTHING,JACKETS & VESTS 8 | APPAREL,WOMEN'S CLOTHING,TOPS 9 | APPAREL,WOMEN'S CLOTHING,PANTS & SHORTS 10 | APPAREL,WOMEN'S CLOTHING,UNDERWEAR & BASE LAYERS 11 | APPAREL,WOMEN'S CLOTHING,OTHER 12 | APPAREL,CHILDREN'S CLOTHING,JACKETS & VESTS 13 | APPAREL,CHILDREN'S CLOTHING,TOPS 14 | APPAREL,CHILDREN'S CLOTHING,PANTS & SHORTS 15 | APPAREL,CHILDREN'S CLOTHING,UNDERWEAR & BASE LAYERS 16 | APPAREL,CHILDREN'S CLOTHING,OTHER 17 | APPAREL,OTHER,OTHER 18 | FOOTWEAR,MEN'S FOOTWEAR,HIKING BOOTS 19 | FOOTWEAR,MEN'S FOOTWEAR,TRAIL SHOES 20 | FOOTWEAR,MEN'S FOOTWEAR,SANDALS 21 | FOOTWEAR,MEN'S FOOTWEAR,WINTER BOOTS 22 | FOOTWEAR,MEN'S FOOTWEAR,OTHER 23 | FOOTWEAR,WOMEN'S FOOTWEAR,HIKING BOOTS 24 | FOOTWEAR,WOMEN'S FOOTWEAR,TRAIL SHOES 25 | FOOTWEAR,WOMEN'S FOOTWEAR,SANDALS 26 | FOOTWEAR,WOMEN'S FOOTWEAR,WINTER BOOTS 27 | FOOTWEAR,WOMEN'S FOOTWEAR,OTHER 28 | FOOTWEAR,CHILDREN'S FOOTWEAR,HIKING BOOTS 29 | FOOTWEAR,CHILDREN'S FOOTWEAR,TRAIL SHOES 30 | FOOTWEAR,CHILDREN'S FOOTWEAR,SANDALS 31 | FOOTWEAR,CHILDREN'S FOOTWEAR,WINTER BOOTS 32 | FOOTWEAR,CHILDREN'S FOOTWEAR,OTHER 33 | FOOTWEAR,OTHER,OTHER 34 | CAMPING & HIKING,TENTS & SHELTERS,BACKPACKING TENTS 35 | CAMPING & HIKING,TENTS & SHELTERS,FAMILY CAMPING TENTS 36 | CAMPING & HIKING,TENTS & SHELTERS,SHELTERS & TARPS 37 | CAMPING & HIKING,TENTS & SHELTERS,BIVYS 38 | CAMPING & HIKING,TENTS & SHELTERS,OTHER 39 | CAMPING & HIKING,SLEEPING GEAR,SLEEPING BAGS 40 | CAMPING & HIKING,SLEEPING GEAR,SLEEPING PADS 41 | CAMPING & HIKING,SLEEPING GEAR,HAMMOCKS 42 | CAMPING & HIKING,SLEEPING GEAR,LINERS 43 | CAMPING & HIKING,SLEEPING GEAR,OTHER 44 | CAMPING & HIKING,BACKPACKS,DAYPACKS 45 | CAMPING & HIKING,BACKPACKS,OVERNIGHT PACKS 46 | CAMPING & HIKING,BACKPACKS,EXTENDED TRIP PACKS 47 | CAMPING & HIKING,BACKPACKS,HYDRATION PACKS 48 | CAMPING & HIKING,BACKPACKS,OTHER 49 | CAMPING & HIKING,COOKING GEAR,STOVES 50 | CAMPING & HIKING,COOKING GEAR,COOKWARE 51 | CAMPING & HIKING,COOKING GEAR,UTENSILS & ACCESSORIES 52 | CAMPING & HIKING,COOKING GEAR,FOOD & NUTRITION 53 | CAMPING & HIKING,COOKING GEAR,OTHER 54 | CAMPING & HIKING,OTHER,OTHER 55 | CLIMBING,CLIMBING GEAR,HARNESSES 56 | CLIMBING,CLIMBING GEAR,HELMETS 57 | CLIMBING,CLIMBING GEAR,CARABINERS & QUICKDRAWS 58 | CLIMBING,CLIMBING GEAR,ROPES & SLINGS 59 | CLIMBING,CLIMBING GEAR,OTHER 60 | CLIMBING,BOULDERING & TRAINING,CLIMBING SHOES 61 | CLIMBING,BOULDERING & TRAINING,CHALK & CHALK BAGS 62 | CLIMBING,BOULDERING & TRAINING,TRAINING EQUIPMENT 63 | CLIMBING,BOULDERING & TRAINING,OTHER 64 | CLIMBING,MOUNTAINEERING,ICE AXES 65 | CLIMBING,MOUNTAINEERING,CRAMPONS 66 | CLIMBING,MOUNTAINEERING,MOUNTAINEERING BOOTS 67 | CLIMBING,MOUNTAINEERING,AVALANCHE SAFETY 68 | CLIMBING,MOUNTAINEERING,OTHER 69 | CLIMBING,OTHER,OTHER 70 | WATER SPORTS,PADDLING,KAYAKS 71 | WATER SPORTS,PADDLING,CANOES 72 | WATER SPORTS,PADDLING,PADDLES 73 | WATER SPORTS,PADDLING,SAFETY GEAR 74 | WATER SPORTS,PADDLING,OTHER 75 | WATER SPORTS,SURFING,SURFBOARDS 76 | WATER SPORTS,SURFING,WETSUITS 77 | WATER SPORTS,SURFING,RASH GUARDS 78 | WATER SPORTS,SURFING,SURF ACCESSORIES 79 | WATER SPORTS,SURFING,OTHER 80 | WATER SPORTS,FISHING,RODS & REELS 81 | WATER SPORTS,FISHING,TACKLE 82 | WATER SPORTS,FISHING,WADERS 83 | WATER SPORTS,FISHING,ACCESSORIES 84 | WATER SPORTS,FISHING,OTHER 85 | WATER SPORTS,OTHER,OTHER 86 | WINTER SPORTS,SKIING,SKIS 87 | WINTER SPORTS,SKIING,SKI BOOTS 88 | WINTER SPORTS,SKIING,SKI POLES 89 | WINTER SPORTS,SKIING,SKI BINDINGS 90 | WINTER SPORTS,SKIING,OTHER 91 | WINTER SPORTS,SNOWBOARDING,SNOWBOARDS 92 | WINTER SPORTS,SNOWBOARDING,SNOWBOARD BOOTS 93 | WINTER SPORTS,SNOWBOARDING,BINDINGS 94 | WINTER SPORTS,SNOWBOARDING,HELMETS 95 | WINTER SPORTS,SNOWBOARDING,OTHER 96 | WINTER SPORTS,SNOWSHOEING,SNOWSHOES 97 | WINTER SPORTS,SNOWSHOEING,POLES 98 | WINTER SPORTS,SNOWSHOEING,ACCESSORIES 99 | WINTER SPORTS,SNOWSHOEING,OTHER 100 | WINTER SPORTS,OTHER,OTHER 101 | TRAVEL,LUGGAGE & BAGS,TRAVEL BACKPACKS 102 | TRAVEL,LUGGAGE & BAGS,DUFFEL BAGS 103 | TRAVEL,LUGGAGE & BAGS,CARRY-ONS 104 | TRAVEL,LUGGAGE & BAGS,TRAVEL ACCESSORIES 105 | TRAVEL,LUGGAGE & BAGS,OTHER 106 | TRAVEL,TRAVEL ACCESSORIES,TRAVEL PILLOWS 107 | TRAVEL,TRAVEL ACCESSORIES,EYE MASKS 108 | TRAVEL,TRAVEL ACCESSORIES,PACKING ORGANIZERS 109 | TRAVEL,TRAVEL ACCESSORIES,SECURITY 110 | TRAVEL,TRAVEL ACCESSORIES,OTHER 111 | TRAVEL,OTHER,OTHER 112 | -------------------------------------------------------------------------------- /src/generate_data/test_set_large.jsonl: -------------------------------------------------------------------------------- 1 | {"question":"how many orders did we have in women's clothing this month","ground_truth_query":"SELECT SUM(Number_of_Orders)\nFROM order_data\nWHERE main_category = \"APPAREL\" AND sub_category = \"WOMEN'S CLOTHING\" AND Month = strftime('%m', 'now') AND Year = strftime('%Y', 'now')"} 2 | {"question":"how many orders in the main category of \"apparel\" and the sub cateory of men's apparel were made in may 2024","ground_truth_query":"SELECT SUM(Number_of_Orders) FROM order_data WHERE main_category = \"APPAREL\" AND sub_category = \"MEN'S CLOTHING\" AND Month = 5 AND Year = 2024"} 3 | {"question":"how many orders of Travel pillows were processed this month","ground_truth_query":"SELECT SUM(Number_of_Orders) \nFROM order_data \nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Month = strftime('%m', 'now') AND Year = strftime('%Y', 'now')"} 4 | {"question":"how many orders were made for travel pillows this month by region","ground_truth_query":"SELECT SUM(Number_of_Orders), Region\nFROM order_data\nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Month = strftime('%m', 'now')\nGROUP BY Region"} 5 | {"question":"on which day in May 2024 did we sell the most product_type \"TRAVEL PILLOWS\"","ground_truth_query":"SELECT Day, SUM(Number_of_Orders) AS Total_Orders\nFROM order_data\nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Month = 5 AND Year = 2024\nGROUP BY Day\nORDER BY Total_Orders DESC\nLIMIT 1"} 6 | {"question":"query for all distinct values for main_category and sub_category","ground_truth_query":"SELECT DISTINCT main_category, sub_category\nFROM order_data"} 7 | {"question":"query for all the values for product_type where product_type LIKE '%Pillow%'","ground_truth_query":"SELECT DISTINCT product_type\nFROM order_data\nWHERE product_type LIKE '%PILLOW%'"} 8 | {"question":"query for all the values for product_type where sub_category = 'Travel Pillows'","ground_truth_query":"SELECT DISTINCT product_type\nFROM order_data\nWHERE sub_category = \"TRAVEL PILLOWS\""} 9 | {"question":"query for all the values for sub_category and product_type where main_category = 'Travel'","ground_truth_query":"SELECT DISTINCT sub_category, product_type \nFROM order_data \nWHERE main_category = \"TRAVEL\""} 10 | {"question":"query for all the values for the main_category","ground_truth_query":"SELECT DISTINCT main_category FROM order_data"} 11 | {"question":"query for all the values for the product_type where sub_category = 'TRAVEL ACCESSORIES'","ground_truth_query":"SELECT DISTINCT product_type\nFROM order_data\nWHERE sub_category = \"TRAVEL ACCESSORIES\""} 12 | {"question":"query for all the values for the product_type where sub_category = 'travel'","ground_truth_query":"SELECT DISTINCT product_type\nFROM order_data\nWHERE sub_category = \"TRAVEL\""} 13 | {"question":"query for all the values for the product_type","ground_truth_query":"SELECT DISTINCT product_type FROM order_data"} 14 | {"question":"query for all the values for the Region","ground_truth_query":"SELECT DISTINCT Region FROM order_data"} 15 | {"question":"query for all the values for the sub_category where main_category = 'TRAVEL'","ground_truth_query":"SELECT DISTINCT sub_category\nFROM order_data\nWHERE main_category = \"TRAVEL\""} 16 | {"question":"query for all the values for the sub_category","ground_truth_query":"SELECT DISTINCT sub_category FROM order_data"} 17 | {"question":"query for the month with the strongest revenue","ground_truth_query":"SELECT Month, SUM(Sum_of_Order_Value_USD) as Total_Revenue\nFROM order_data\nGROUP BY Month\nORDER BY Total_Revenue DESC\nLIMIT 1"} 18 | {"question":"query for the total revenue in January 2024 for Europe","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD)\nFROM order_data\nWHERE Month = 1 AND Year = 2024 AND Region = \"EUROPE\""} 19 | {"question":"show the 2023 sales by category","ground_truth_query":"SELECT main_category, \n SUM(Number_of_Orders), \n SUM(Sum_of_Order_Value_USD) \nFROM order_data \nWHERE Year = 2023 \nGROUP BY main_category"} 20 | {"question":"show the revenue grouped by sub_category and product type for main_category \"Camping & Hiking\"","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD), sub_category, product_type\nFROM order_data\nWHERE main_category = \"CAMPING & HIKING\"\nGROUP BY sub_category, product_type"} 21 | {"question":"show the sales revenue by main_category \"Camping & Hiking\"","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) as Total_Sales_Revenue\nFROM order_data\nWHERE main_category = \"CAMPING & HIKING\""} 22 | {"question":"show the total revenue by day and sub_category in May 2024 for 'HIKING BOOTS'","ground_truth_query":"SELECT Day, sub_category, SUM(Sum_of_Order_Value_USD) as Total_Revenue\nFROM order_data\nWHERE Month = 5 AND Year = 2024 AND product_type = \"HIKING BOOTS\"\nGROUP BY Day, sub_category"} 23 | {"question":"show the total sales of product_type \"TRAVEL PILLOWS\" by day in May 2024","ground_truth_query":"SELECT Day, SUM(Sum_of_Order_Value_USD) as Total_Sales\nFROM order_data\nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Month = 5 AND Year = 2024\nGROUP BY Day"} 24 | {"question":"show the total sales revenue by month","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) as Total_Sales_Revenue, \n Month\nFROM order_data \nGROUP BY Month"} 25 | {"question":"what are the orders for women's clothing by day in the current month","ground_truth_query":"SELECT Day, SUM(Number_of_Orders) as Total_Orders\nFROM order_data\nWHERE main_category = \"APPAREL\" AND sub_category = \"WOMEN'S CLOTHING\" AND Month = strftime('%m', 'now')\nGROUP BY Day"} 26 | {"question":"what is the average sale value for January","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) \/ SUM(Number_of_Orders) AS Avg_Sale_Value\nFROM order_data\nWHERE Month = 1"} 27 | {"question":"what is the sum of items by product_type where product_type = 'TRAVEL PILLOWS'","ground_truth_query":"SELECT SUM(Sum_of_Number_of_Items)\nFROM order_data\nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\""} 28 | {"question":"what is the sum of items by Region, product_type where product_type = 'TRAVEL PILLOWS'","ground_truth_query":"SELECT SUM(Sum_of_Number_of_Items), \n Region\nFROM order_data\nWHERE product_type = \"TRAVEL PILLOWS\"\nGROUP BY Region"} 29 | {"question":"what was the average daily revenue in europe by year","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) \/ COUNT(DISTINCT Date) as Avg_Daily_Revenue, Year\nFROM order_data\nWHERE Region = \"EUROPE\"\nGROUP BY Year"} 30 | {"question":"what was the total number of items sold for product_type = 'Travel Pillows' in 2023","ground_truth_query":"SELECT SUM(Sum_of_Number_of_Items) FROM order_data WHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Year = 2023"} 31 | {"question":"what was the total number of orders in May 2024 by day","ground_truth_query":"SELECT Day, SUM(Number_of_Orders) as Total_Orders\nFROM order_data\nWHERE Month = 5 AND Year = 2024\nGROUP BY Day"} 32 | {"question":"what was the total revenue for product_type = 'Travel Pillows' in 2023","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD)\nFROM order_data\nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Year = 2023"} 33 | {"question":"what was the total revenue in 2023 by main_category","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) as Total_Revenue,\n main_category\nFROM order_data\nWHERE Year = 2023\nGROUP BY main_category"} 34 | {"question":"what was the total revenue in 2023 by region","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) as Total_Revenue, \n Region \nFROM order_data \nWHERE Year = 2023 \nGROUP BY Region"} 35 | {"question":"what was the total revenue in EUROPE","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD)\nFROM order_data\nWHERE Region = \"EUROPE\""} 36 | {"question":"what was the total revenue in February 2024 by day","ground_truth_query":"SELECT Day, SUM(Sum_of_Order_Value_USD) as Total_Revenue\nFROM order_data\nWHERE Month = 2 AND Year = 2024\nGROUP BY Day"} 37 | {"question":"what was the total revenue in January 2024 by Region","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD), Region\nFROM order_data\nWHERE Month = 1 AND Year = 2024\nGROUP BY Region"} 38 | {"question":"what was the total revenue in May 2024 by day","ground_truth_query":"SELECT Day, SUM(Sum_of_Order_Value_USD) as Total_Revenue\nFROM order_data\nWHERE Month = 5 AND Year = 2024\nGROUP BY Day"} 39 | {"question":"what was the total revenue in May 2024 by product_type for 'HIKING BOOTS'","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD), \n product_type\nFROM order_data \nWHERE Month = 5 AND Year = 2024 AND product_type = \"HIKING BOOTS\"\nGROUP BY product_type"} 40 | {"question":"what was the total revenue in May 2024 by sub_category for 'HIKING BOOTS'","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD), sub_category\nFROM order_data\nWHERE Month = 5 AND Year = 2024 AND product_type = \"HIKING BOOTS\"\nGROUP BY sub_category"} 41 | {"question":"what was the total revenue in Q1 2024 by region","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) as Total_Revenue, \n Region\nFROM order_data \nWHERE Year = 2024 AND Month IN (1, 2, 3)\nGROUP BY Region"} 42 | {"question":"what was the total revenue in the current month by product_type \"travel pillows\"","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) \nFROM order_data \nWHERE main_category = \"TRAVEL\" AND sub_category = \"TRAVEL ACCESSORIES\" AND product_type = \"TRAVEL PILLOWS\" AND Month = strftime('%m', 'now') AND Year = strftime('%Y', 'now')"} 43 | -------------------------------------------------------------------------------- /src/generate_data/test_set_mini.jsonl: -------------------------------------------------------------------------------- 1 | {"question":"what was the average daily revenue in europe by year","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) \/ COUNT(DISTINCT Date) as Avg_Daily_Revenue, Year\nFROM order_data\nWHERE Region = \"EUROPE\"\nGROUP BY Year"} 2 | -------------------------------------------------------------------------------- /src/generate_data/test_set_small.csv: -------------------------------------------------------------------------------- 1 | question 2 | "query for all the values for the product_type" 3 | "show the total revenue by day and sub_category in May 2024 for product type 'HIKING BOOTS'" 4 | "what are the orders for women's clothing by day in the current month" 5 | "what was the average daily revenue in europe by year" 6 | "what was the total revenue in May 2024 by product_type for 'HIKING BOOTS'" 7 | "what was the total revenue in May 2024 by sub_category for 'HIKING BOOTS'" -------------------------------------------------------------------------------- /src/generate_data/test_set_small.jsonl: -------------------------------------------------------------------------------- 1 | {"custom_id":"task-0","question":"query for all the values for the product_type","ground_truth_query":"SELECT DISTINCT product_type\nFROM order_data"} 2 | {"custom_id":"task-1","question":"show the total revenue by day and sub_category in May 2024 for product type 'HIKING BOOTS'","ground_truth_query":"SELECT Day, sub_category, SUM(Sum_of_Order_Value_USD) as Total_Revenue\nFROM order_data\nWHERE product_type = \"HIKING BOOTS\" AND Month = 5 AND Year = 2024\nGROUP BY Day, sub_category"} 3 | {"custom_id":"task-2","question":"what are the orders for women's clothing by day in the current month","ground_truth_query":"SELECT Day, SUM(Number_of_Orders) as Total_Orders\nFROM order_data\nWHERE main_category = \"APPAREL\" AND sub_category = \"WOMEN'S CLOTHING\" AND Month = strftime('%m', 'now') AND Year = strftime('%Y', 'now')\nGROUP BY Day"} 4 | {"custom_id":"task-3","question":"what was the average daily revenue in europe by year","ground_truth_query":"SELECT Year, SUM(Sum_of_Order_Value_USD) \/ COUNT(DISTINCT Date) as Avg_Daily_Revenue\nFROM order_data \nWHERE Region = \"EUROPE\"\nGROUP BY Year"} 5 | {"custom_id":"task-4","question":"what was the total revenue in May 2024 by product_type for 'HIKING BOOTS'","ground_truth_query":"SELECT SUM(Sum_of_Order_Value_USD) FROM order_data WHERE product_type = \"HIKING BOOTS\" AND Month = 5 AND Year = 2024"} 6 | {"custom_id":"task-5","question":"what was the total revenue in May 2024 by sub_category for 'HIKING BOOTS'","ground_truth_query":"SELECT sub_category, SUM(Sum_of_Order_Value_USD) \nFROM order_data \nWHERE main_category = \"FOOTWEAR\" \nAND product_type = \"HIKING BOOTS\" \nAND Month = 5 \nAND Year = 2024 \nGROUP BY sub_category"} 7 | -------------------------------------------------------------------------------- /src/sales_data_insights/data/order_data.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/assistant-pf-demo/15d87431ad2a0ed43e1a8541cbb219f207c7c01d/src/sales_data_insights/data/order_data.db -------------------------------------------------------------------------------- /src/sales_data_insights/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import sqlite3 4 | from openai import AzureOpenAI 5 | import pandas as pd 6 | from promptflow.tracing import trace 7 | import json 8 | from azure.ai.inference import ChatCompletionsClient 9 | from azure.ai.inference.models import SystemMessage, UserMessage 10 | from azure.core.credentials import AzureKeyCredential 11 | from .system_message import system_message, system_message_short 12 | 13 | from typing import TypedDict 14 | class Result(TypedDict): 15 | data: dict 16 | error: str 17 | query: str 18 | execution_time: float 19 | 20 | # Callable class with @trace decorator on the __call__ method 21 | class SalesDataInsights: 22 | 23 | """ 24 | SalesDataInsights tool. You can use this tool as a standalone flow to retrieve sales data 25 | with natural language queries. In this example, it's also called by the assistant API for a 26 | full end-to-end assistant experience. 27 | """ 28 | 29 | def __init__(self, data=None, model_type="azure_openai"): 30 | self.data = data if data else os.path.join( 31 | pathlib.Path(__file__).parent.resolve(), "data", "order_data.db" 32 | ) 33 | self.model_type = model_type 34 | 35 | @trace 36 | def __call__(self, *, question: str, **kwargs) -> Result: 37 | 38 | if self.model_type == "azure_openai": 39 | client = AzureOpenAI( 40 | api_key = os.getenv("OPENAI_API_KEY"), 41 | azure_endpoint = os.getenv("OPENAI_API_BASE"), 42 | api_version = os.getenv("OPENAI_API_VERSION") 43 | ) 44 | else: 45 | endpoint = os.getenv(f"AZUREAI_{self.model_type.upper()}_URL") 46 | key = os.getenv(f"AZUREAI_{self.model_type.upper()}_KEY") 47 | print("endpoint", endpoint) 48 | print("key", key) 49 | client = ChatCompletionsClient( 50 | endpoint=endpoint, 51 | credential=AzureKeyCredential(key), 52 | ) 53 | # Code to get time to execute the function 54 | import time 55 | start = time.time() 56 | 57 | print("getting sales data insights") 58 | print("question", question) 59 | 60 | if self.model_type == "azure_openai": 61 | messages = [{"role": "system", "content": system_message}] 62 | 63 | messages.append({"role": "user", "content": f"{question}\nGive only the query in SQL format"}) 64 | 65 | response = client.chat.completions.create( 66 | model= os.getenv("OPENAI_ANALYST_CHAT_MODEL"), 67 | messages=messages, 68 | ) 69 | elif self.model_type.lower() == "phi3_mini": 70 | combined_message = UserMessage(content=f"{system_message_short}\n\n{question}\nGive only the query in SQL format") 71 | messages = [combined_message] 72 | response = client.create(messages=messages, temperature=0, max_tokens=1000) 73 | elif self.model_type.lower() == "phi3_medium": 74 | combined_message = UserMessage(content=f"{system_message}\n\n{question}\nGive only the query in SQL format") 75 | messages = [combined_message] 76 | response = client.create(messages=messages, temperature=0, max_tokens=1000) 77 | else: 78 | system_message_obj = SystemMessage(content=system_message) 79 | user_message_obj = UserMessage(content=f"{question}\nGive only the query in SQL format") 80 | messages = [system_message_obj, user_message_obj] 81 | response = client.create(messages=messages, temperature=0, max_tokens=1000) 82 | 83 | message = response.choices[0].message 84 | 85 | query :str = message.content 86 | 87 | if query.startswith("```sql") and query.endswith("```"): 88 | query = query[6:-3].strip() 89 | 90 | if query.lower().startswith("error"): 91 | end = time.time() 92 | execution_time = round(end - start, 2) 93 | return {"data": None, "error": query, "query": query, "execution_time": 0} 94 | 95 | try: 96 | data = self.query_db(query) 97 | except Exception as e: 98 | end = time.time() 99 | execution_time = round(end - start, 2) 100 | print("Execution time:", execution_time) 101 | return {"data": None, "error": f"{e}", "query": query, "execution_time": execution_time} 102 | 103 | end = time.time() 104 | execution_time = round(end - start, 2) 105 | 106 | return {"data": data, "error": str(None), "query": query, "execution_time": execution_time} 107 | 108 | @trace 109 | def query_db(self, query: str) -> dict: 110 | sql_connection = sqlite3.connect(self.data) 111 | 112 | df = pd.read_sql(query, sql_connection) 113 | 114 | return df.to_dict(orient='records') 115 | 116 | if __name__ == "__main__": 117 | 118 | models = ["azure_openai", "phi3_mini", "phi3_medium", "cohere_chat", "mistral_small", "mistral_large", "llama3"] 119 | for model in models: 120 | print("="*50) 121 | print("model", model) 122 | sdi = SalesDataInsights(model_type=model) 123 | result = sdi(question="for 2024 Query the average number of orders per day grouped by Month") 124 | result["data"] = None 125 | print("execution_time:", result['execution_time']) 126 | print("query", result['query']) 127 | --------------------------------------------------------------------------------