├── .github └── workflows │ └── jekyll-ghpage.yml ├── .gitignore ├── 404.html ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DELIVERY.md ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── LICENSE-CODE ├── README.md ├── SECURITY.md ├── _config.yml ├── index.md ├── labs ├── lesson_01 │ ├── images │ │ ├── 03.01.2024_09.24.25_REC.png │ │ ├── 06.02.2024_21.44.42_REC.png │ │ ├── 06.02.2024_21.48.36_REC.png │ │ ├── 06.02.2024_21.49.26_REC.png │ │ ├── 06.02.2024_23.10.43_REC.png │ │ ├── 06.02.2024_23.28.26_REC.png │ │ ├── 06.02.2024_23.31.19_REC.png │ │ ├── 08.04.2024_10.01.04_REC.png │ │ ├── 08.04.2024_10.48.45_REC.png │ │ ├── 08.04.2024_14.47.08_REC.png │ │ ├── 08.04.2024_14.47.41_REC.png │ │ ├── 08.04.2024_14.48.05_REC.png │ │ ├── 08.04.2024_14.48.46_REC.png │ │ ├── 08.04.2024_14.49.04_REC.png │ │ ├── 08.04.2024_14.57.15_REC.png │ │ ├── 08.04.2024_14.58.21_REC.png │ │ ├── 08.04.2024_14.59.17_REC.png │ │ ├── 08.04.2024_16.22.06_REC.png │ │ ├── 08.04.2024_16.27.10_REC.png │ │ ├── 08.04.2024_16.31.56_REC.png │ │ ├── 12.03.2024_16.22.33_REC.png │ │ ├── 12.03.2024_16.31.47_REC.png │ │ ├── 12.03.2024_16.54.24_REC.png │ │ ├── 12.03.2024_16.55.20_REC.png │ │ ├── 12.03.2024_16.57.31_REC.png │ │ ├── 12.03.2024_22.47.37_REC.png │ │ ├── 12.03.2024_22.48.11_REC.png │ │ ├── 16.12.2023_13.35.18_REC.png │ │ ├── 16.12.2023_13.43.27_REC.png │ │ ├── 16.12.2023_16.05.08_REC.png │ │ ├── 16.12.2023_16.20.05_REC.png │ │ ├── 16.12.2023_16.29.30_REC.png │ │ ├── 16.12.2023_16.39.52_REC.png │ │ ├── 16.12.2023_16.48.13_REC.png │ │ ├── 17.12.2023_18.37.17_REC.png │ │ ├── 17.12.2023_20.00.00_REC.png │ │ ├── 17.12.2023_20.20.01_REC.png │ │ ├── 17.12.2023_20.20.26_REC.png │ │ ├── 17.12.2023_20.21.07_REC.png │ │ ├── 17.12.2023_20.21.55_REC.png │ │ ├── 17.12.2023_20.23.02_REC.png │ │ ├── 17.12.2023_20.23.22_REC.png │ │ ├── 17.12.2023_20.32.33_REC.png │ │ ├── 17.12.2023_20.34.52_REC.png │ │ └── 17.12.2023_20.36.17_REC.png │ └── lab01.md ├── lesson_02 │ ├── files │ │ └── surface-pro-4-user-guide-EN.pdf │ ├── images │ │ ├── 04.01.2024_19.22.29_REC.png │ │ ├── 04.01.2024_19.35.49_REC.png │ │ ├── 07.02.2024_10.41.56_REC.png │ │ ├── 07.02.2024_10.42.40_REC.png │ │ ├── 07.02.2024_10.56.42_REC.png │ │ ├── 07.02.2024_10.57.15_REC.png │ │ ├── 07.02.2024_16.39.01_REC.png │ │ ├── 07.02.2024_19.14.16_REC.png │ │ ├── 07.02.2024_19.15.13_REC.png │ │ ├── 13.03.2024_01.22.07_REC.png │ │ ├── 13.03.2024_10.31.21_REC.png │ │ ├── 13.03.2024_10.47.05_REC.png │ │ ├── 13.03.2024_10.47.56_REC.png │ │ ├── 14.03.2024_15.00.08_REC.png │ │ ├── 17.12.2023_22.11.22_REC.png │ │ ├── 17.12.2023_22.12.07_REC.png │ │ ├── 17.12.2023_22.14.04_REC.png │ │ ├── 17.12.2023_22.55.51_REC.png │ │ ├── 17.12.2023_22.58.42_REC.png │ │ ├── 17.12.2023_23.49.29_REC.png │ │ ├── 17.12.2023_23.58.57_REC.png │ │ ├── 18.12.2023_00.02.08_REC.png │ │ ├── 18.12.2023_00.06.51_REC.png │ │ ├── 18.12.2023_00.13.52_REC.png │ │ ├── 26.02.2024_10.00.05_REC.png │ │ ├── 26.02.2024_10.29.13_REC.png │ │ ├── 26.02.2024_10.36.22_REC.png │ │ └── 26.02.2024_10.52.27_REC.png │ └── lab02.md ├── lesson_03 │ ├── data.csv │ ├── images │ │ ├── 05.01.2024_00.43.51_REC.png │ │ ├── 05.01.2024_01.31.10_REC.png │ │ ├── 05.01.2024_01.36.19_REC.png │ │ ├── 05.01.2024_01.44.01_REC.png │ │ ├── 07.02.2024_23.37.47_REC.png │ │ ├── 13.03.2024_10.31.21_REC.png │ │ ├── 14.03.2024_16.04.30_REC.png │ │ ├── 14.03.2024_16.05.01_REC.png │ │ ├── 14.03.2024_22.29.58_REC.png │ │ ├── 14.03.2024_23.08.34_REC.png │ │ ├── 14.03.2024_23.11.52_REC.png │ │ ├── 14.03.2024_23.12.25_REC.png │ │ ├── 15.03.2024_00.36.03_REC.png │ │ ├── 15.03.2024_00.52.20_REC.png │ │ ├── 15.03.2024_01.21.34_REC.png │ │ ├── 15.03.2024_01.28.00_REC.png │ │ ├── 26.02.2024_23.14.59_REC.png │ │ ├── 26.02.2024_23.18.12_REC.png │ │ ├── 26.02.2024_23.21.02_REC.png │ │ ├── 26.02.2024_23.24.46_REC.png │ │ ├── 26.02.2024_23.43.08_REC.png │ │ ├── 26.02.2024_23.51.33_REC.png │ │ └── 26.02.2024_23.54.35_REC.png │ └── lab03.md ├── lesson_04 │ └── lab04.md ├── lesson_05 │ ├── images │ │ ├── 01.01.2024_10.57.37_REC.png │ │ ├── 01.01.2024_11.05.21_REC.png │ │ ├── 01.01.2024_11.19.43_REC.png │ │ ├── 01.01.2024_11.25.08_REC.png │ │ ├── 01.01.2024_11.26.33_REC.png │ │ ├── 01.01.2024_11.27.15_REC.png │ │ ├── 01.01.2024_16.33.50_REC.png │ │ ├── 01.01.2024_16.37.50_REC.png │ │ ├── 01.01.2024_16.58.05_REC.png │ │ ├── 02.01.2024_11.52.51_REC.png │ │ ├── 02.01.2024_15.49.11_REC.png │ │ ├── 02.01.2024_16.08.03_REC.png │ │ ├── 02.01.2024_18.33.42_REC.png │ │ ├── 09.01.2024_23.54.37_REC.png │ │ ├── 09.01.2024_23.56.21_REC.png │ │ ├── 09.01.2024_23.58.24_REC.png │ │ ├── 10.01.2024_00.17.52_REC.png │ │ ├── 10.01.2024_00.27.51_REC.png │ │ ├── 10.01.2024_00.35.42_REC.png │ │ ├── 16.01.2024_23.13.29_REC.png │ │ ├── 31.12.2023_13.19.23_REC.png │ │ ├── 31.12.2023_13.23.18_REC.png │ │ ├── 31.12.2023_13.27.45_REC.png │ │ ├── 31.12.2023_13.30.59_REC.png │ │ ├── 31.12.2023_13.32.17_REC.png │ │ ├── git_workflow_branching.png │ │ ├── git_workflow_pipelines.png │ │ └── large-language-model-operations-prompt-flow-process.png │ └── lab05.md └── performance │ ├── README.md │ ├── benchmark.parameters.template │ ├── benchmark_analysis.ipynb │ ├── docs │ ├── AOAI_BENCH_TOOL.md │ └── PERFTEST_CONCEPTS.md │ ├── media │ ├── perftest-GPT-RAG-Basic-communication.png │ ├── perftest-GPT-RAG-Basic.png │ ├── perftest-analysis.png │ ├── perftest-aoai-response-time.png │ ├── perftest-app-component.png │ ├── perftest-architecture.png │ ├── perftest-azure-diagnostics.png │ ├── perftest-azure-load-testing.png │ ├── perftest-github-environment.png │ ├── perftest-github-var-secrets.png │ ├── perftest-llmlifecycle-with-tests.png │ ├── perftest-llmlifecycle.png │ ├── perftest-portal.png │ ├── perftest-response-time.png │ ├── perftest-running.png │ ├── perftest-sample-sequence-diagram.png │ ├── perftest-server-metrics.png │ ├── perftest-starting.png │ └── perftest-users-per-hour.png │ ├── requirements.txt │ └── runtest.sh └── requirements.txt /.github/workflows/jekyll-ghpage.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment to GitHub Pages 19 | concurrency: 20 | group: "pages" 21 | cancel-in-progress: true 22 | 23 | jobs: 24 | # Build job 25 | build: 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | - name: Setup Ruby 31 | uses: ruby/setup-ruby@v1 32 | with: 33 | ruby-version: '3.1' # Not needed with a .ruby-version file 34 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically 35 | cache-version: 0 # Increment this number if you need to re-download cached gems 36 | - name: Setup Pages 37 | id: pages 38 | uses: actions/configure-pages@v4 39 | - name: Build with Jekyll 40 | # Outputs to the './_site' directory by default 41 | run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" 42 | env: 43 | JEKYLL_ENV: production 44 | - name: Upload artifact 45 | uses: actions/upload-pages-artifact@v3 46 | 47 | # Deployment job 48 | deploy: 49 | environment: 50 | name: github-pages 51 | url: ${{ steps.deployment.outputs.page_url }} 52 | runs-on: ubuntu-latest 53 | needs: build 54 | steps: 55 | - name: Deploy to GitHub Pages 56 | id: deployment 57 | uses: actions/deploy-pages@v4 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | local/ 2 | azure-openai-benchmark/ 3 | *~$*.docx 4 | labs/lesson_05/llmops-project*/ 5 | labs/lesson_05/llmops-project*/* 6 | *.log 7 | error.err 8 | benchmark.parameters 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | pip-wheel-metadata/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject some infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webenv 75 | .env 76 | .venv 77 | env/ 78 | venv/ 79 | ENV/ 80 | env.bak/ 81 | venv.bak/ 82 | 83 | # Spyder project settings 84 | .spyderproject 85 | .spyder-py3 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # mkdocs documentation 91 | /site 92 | 93 | # mypy 94 | .mypy_cache/ 95 | .dmypy.json 96 | dmypy.json 97 | 98 | # Pyre type checker 99 | .pyre/ 100 | 101 | # Cython debug symbols 102 | cython_debug/ 103 | 104 | # Jupyter Notebook 105 | .ipynb_checkpoints 106 | 107 | # IPython 108 | profile_default/ 109 | ipython_config.py 110 | 111 | # pyenv 112 | .python-version 113 | 114 | # celery beat schedule file 115 | celerybeat-schedule 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyder-py3 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | # pytype static type analyzer 148 | .pytype/ 149 | 150 | # Profiling data 151 | .prof 152 | 153 | # vscode settings 154 | .vscode/ 155 | 156 | # pycharm settings 157 | .idea/ 158 | 159 | # Jupyter 160 | .ipynb_checkpoints 161 | 162 | # Jekyll 163 | _site 164 | .sass-cache 165 | .jekyll-cache 166 | .jekyll-metadata 167 | vendor 168 | -------------------------------------------------------------------------------- /404.html: -------------------------------------------------------------------------------- 1 | --- 2 | permalink: /404.html 3 | layout: default 4 | --- 5 | 6 | 19 | 20 |
21 |

404

22 | 23 |

Page not found :(

24 |

The requested page could not be found.

25 |
26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # LLMOps Workshop 2 | 3 | ## Changelog 4 | 5 | ### 2024-07-18 6 | - **Added**: Github.io page. 7 | - **Changed**: Removed Lab 05 Team Collaboration. 8 | - 9 | ### 2024-04-09 10 | - **Added**: LLM Performance Testing lab. 11 | 12 | ### 2024-04-08 13 | - **Changed**: Lab 01 - Added note to delete Llama2 deployment. 14 | - **Changed**: Lab 01 - Adjusted Llama2 model input to Json format in test window. 15 | - **Changed**: Lab 01 - Setup now includes Content Safety creation; wasn't needed before. 16 | - **Changed**: Lab 03 - Emphasized checking input values in evaluation flow. 17 | 18 | ### 2024-03-15 19 | - **Added**: Workshop delivery guide. 20 | - **Changed**: Some screenshots in Lab 01 due to UI updates in AI Studio. 21 | - **Changed**: Proofreading in Lab 01, 02, and 03 to make instructions clearer. 22 | - **Changed**: Lab 02 - Simplification by using registered index in the lookup node. 23 | - **Changed**: Lab 03 - Added additional evaluation flows: similarity and relevance. 24 | - **Added**: Prerequisites and setup for all labs. 25 | - **Added**: Changelog file. 26 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to, 5 | and actually do, grant us the rights to use your contribution. For details, visit 6 | https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. -------------------------------------------------------------------------------- /DELIVERY.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Delivery Guide' 3 | layout: default 4 | nav_order: 8 5 | --- 6 | # LLMOps Workshop 7 | 8 | ## Delivery Guide 9 | 10 | This page is designed to assist **instructors** in effectively delivering the **LLMOps workshop**. 11 | 12 | The workshop is structured into **six lessons**, each comprising conceptual slides and hands-on **lab exercises**. Instructors are encouraged to adapt the content based on their experience and the needs of the audience. 13 | 14 | The hands-on activities can be demonstrated by the instructor if the students prefer. For instance, we suggest that the **first lesson's lab** could be presented as a **demo** to introduce participants to the tools without disrupting the timeline of future lessons. 15 | 16 | ### Preparation 17 | 18 | Each lab has its **prerequisites** and **setup** section for the **hands-on** part, detailed at the beginning of the respective lab. If participants are expected to engage in hands-on activities rather than watch a demonstration, it's good practice to have them check the prerequisites and complete the setup **before** starting the practical exercises. 19 | 20 | 21 | ### Planning the Workshop 22 | 23 | When planning the workshop session, consider that each lesson has an **estimated duration**, including time for presenting slides and conducting hands-on exercises or a demonstration. 24 | 25 | > The duration may vary based on factors such as class size, level of interaction, internet speed, and the technical proficiency of the participants. 26 | 27 | This workshop is structured to span over an 8-hour duration, encompassing all lessons. However, the length can be tailored to the participants' needs, permitting shorter sessions or emphasis on particular topics. 28 | 29 | ### Delivery Scenarios 30 | 31 | Here are some suggestions for workshop configurations, which can be adjusted based on specific needs. 32 | 33 | 34 | | Content | Duration | Lessons | 35 | |-------------|----------|---------| 36 | | Full Workshop | Two Days (2x4h) | D1: 1,2 and 3; D2: 4 and 5| 37 | | Overview, Build, Evaluate and Deploy | Half Day (4h) | 1,2 and 3 | 38 | | Overview, Build, Evaluate and Deploy - Short Version * | 3h | 1,2,3 * | 39 | | Overview and Automation ** | 2h | 1,6 ** | 40 | 41 | 42 | 43 | > **Notes**

* In this condensed version, the first hour will be dedicated to presenting the `Applying LLMOps concepts to LLM Apps` slides from lesson 1 section, followed Lessons 2 and 3 sections from the full deck. The subsequent two hours will be allocated to conducting Labs 2 and 3. 44 |

45 | > ** For this option, use the `LLMOps Overview and Automation` deck followed by lesson 6 lab. 46 | 47 | 48 | 49 | ### Additional Recommendations 50 | 51 | Instructors should be prepared to either run the lab exercises as a demo or have a pre-recorded video available as a backup in case of any unforeseen circumstances. 52 | 53 | For virtual format, it's recommended to cap sessions at 4 hours, incorporating breaks between lessons to sustain participant engagement and productivity. 54 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "jekyll", "~> 4.3.3" 4 | 5 | gem "just-the-docs", "0.4.2" 6 | 7 | # If you want to use GitHub Pages, remove the "gem "jekyll"" above and 8 | # uncomment the line below. To upgrade, run `bundle update github-pages`. 9 | # gem "github-pages", group: :jekyll_plugins 10 | # If you have any plugins, put them here! 11 | group :jekyll_plugins do 12 | gem "jekyll-feed", "~> 0.12" 13 | end 14 | 15 | # Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem 16 | # and associated library. 17 | platforms :mingw, :x64_mingw, :mswin, :jruby do 18 | gem "tzinfo", ">= 1", "< 3" 19 | gem "tzinfo-data" 20 | end 21 | 22 | # Performance-booster for watching directories on Windows 23 | #gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] 24 | 25 | # Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem 26 | # do not have a Java counterpart. 27 | gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] 28 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.8.7) 5 | public_suffix (>= 2.0.2, < 7.0) 6 | colorator (1.1.0) 7 | concurrent-ruby (1.3.3) 8 | em-websocket (0.5.3) 9 | eventmachine (>= 0.12.9) 10 | http_parser.rb (~> 0) 11 | eventmachine (1.2.7) 12 | eventmachine (1.2.7-x64-mingw32) 13 | ffi (1.17.0) 14 | ffi (1.17.0-x64-mingw32) 15 | forwardable-extended (2.6.0) 16 | google-protobuf (3.25.3-x64-mingw32) 17 | google-protobuf (3.25.3-x86_64-linux) 18 | http_parser.rb (0.8.0) 19 | i18n (1.14.5) 20 | concurrent-ruby (~> 1.0) 21 | jekyll (4.3.3) 22 | addressable (~> 2.4) 23 | colorator (~> 1.0) 24 | em-websocket (~> 0.5) 25 | i18n (~> 1.0) 26 | jekyll-sass-converter (>= 2.0, < 4.0) 27 | jekyll-watch (~> 2.0) 28 | kramdown (~> 2.3, >= 2.3.1) 29 | kramdown-parser-gfm (~> 1.0) 30 | liquid (~> 4.0) 31 | mercenary (>= 0.3.6, < 0.5) 32 | pathutil (~> 0.9) 33 | rouge (>= 3.0, < 5.0) 34 | safe_yaml (~> 1.0) 35 | terminal-table (>= 1.8, < 4.0) 36 | webrick (~> 1.7) 37 | jekyll-feed (0.17.0) 38 | jekyll (>= 3.7, < 5.0) 39 | jekyll-sass-converter (3.0.0) 40 | sass-embedded (~> 1.54) 41 | jekyll-seo-tag (2.8.0) 42 | jekyll (>= 3.8, < 5.0) 43 | jekyll-watch (2.2.1) 44 | listen (~> 3.0) 45 | just-the-docs (0.4.2) 46 | jekyll (>= 3.8.5) 47 | jekyll-seo-tag (>= 2.0) 48 | rake (>= 12.3.1) 49 | kramdown (2.4.0) 50 | rexml 51 | kramdown-parser-gfm (1.1.0) 52 | kramdown (~> 2.0) 53 | liquid (4.0.4) 54 | listen (3.9.0) 55 | rb-fsevent (~> 0.10, >= 0.10.3) 56 | rb-inotify (~> 0.9, >= 0.9.10) 57 | mercenary (0.4.0) 58 | pathutil (0.16.2) 59 | forwardable-extended (~> 2.6) 60 | public_suffix (6.0.0) 61 | rake (13.2.1) 62 | rb-fsevent (0.11.2) 63 | rb-inotify (0.11.1) 64 | ffi (~> 1.0) 65 | rexml (3.3.1) 66 | strscan 67 | rouge (4.3.0) 68 | safe_yaml (1.0.5) 69 | sass-embedded (1.69.5) 70 | google-protobuf (~> 3.23) 71 | rake (>= 13.0.0) 72 | sass-embedded (1.69.5-x64-mingw32) 73 | google-protobuf (~> 3.23) 74 | strscan (3.1.0) 75 | terminal-table (3.0.2) 76 | unicode-display_width (>= 1.1.1, < 3) 77 | tzinfo (2.0.6) 78 | concurrent-ruby (~> 1.0) 79 | tzinfo-data (1.2024.1) 80 | tzinfo (>= 1.0.0) 81 | unicode-display_width (2.5.0) 82 | webrick (1.8.1) 83 | 84 | PLATFORMS 85 | x64-mingw32 86 | x86_64-linux 87 | 88 | DEPENDENCIES 89 | http_parser.rb (~> 0.6.0) 90 | jekyll (~> 4.3.3) 91 | jekyll-feed (~> 0.12) 92 | just-the-docs (= 0.4.2) 93 | tzinfo (>= 1, < 3) 94 | tzinfo-data 95 | 96 | BUNDLED WITH 97 | 2.5.15 98 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /LICENSE-CODE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLMOps Workshop 2 | 3 | Welcome to the LLMOps workshop! This course will guide you through building, evaluating, monitoring, and deploying Large Language Model solutions efficiently using Azure AI, Azure Machine Learning Prompt Flow, Content Safety, and Azure OpenAI. Let's master LLMOps together! 4 | 5 | 6 | - [Workshop contents](https://microsoft.github.io/llmops-workshop/) 7 | - [Change log](CHANGELOG.md) 8 | 9 | 10 | 11 | ## Contributing 12 | 13 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 14 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 15 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 16 | 17 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 18 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 19 | provided by the bot. You will only need to do this once across all repos using our CLA. 20 | 21 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 22 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 23 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 24 | 25 | ## Trademarks 26 | 27 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 28 | trademarks or logos is subject to and must follow 29 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 30 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 31 | Any use of third-party trademarks or logos are subject to those third-party's policies. 32 | 33 | ## Code of Conduct 34 | 35 | This project has adopted the 36 | [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 37 | For more information see the 38 | [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 39 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) 40 | with any additional questions or comments. 41 | 42 | ## License 43 | 44 | Copyright (c) Microsoft Corporation. All rights reserved. 45 | 46 | Licensed under the [MIT](LICENSE) license. 47 | 48 | ### Reporting Security Issues 49 | 50 | [Reporting Security Issues](https://github.com/microsoft/repo-templates/blob/main/shared/SECURITY.md) 51 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | title: LLMOPs Workshop 2 | description: >- # this means to ignore newlines until "baseurl:" 3 | LLMOPs Workshop 4 | baseurl: "" 5 | url: "" # the base hostname & protocol for your site, e.g. http://example.com 6 | 7 | # Build settings 8 | theme: "just-the-docs" 9 | 10 | plugins: 11 | - jekyll-feed 12 | 13 | 14 | -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | layout: home 4 | nav_order: 1 5 | --- 6 | # LLMOps Workshop 7 | 8 | Welcome to the LLMOps workshop! This course will guide you through building, evaluating, monitoring, and deploying Large Language Model solutions efficiently using Azure AI, Azure Machine Learning Prompt Flow, Content Safety, and Azure OpenAI. Let's master LLMOps together! 9 | 10 | ## Table of Contents 11 | 12 | The workshop is composed of the lessons below. They are sorted by coding levels: **no-code** for beginners, **low-code** for intermediates, and **full-code** for advanced learners, indicating the coding effort needed. 13 | 14 | Each lesson includes an **estimated time** that covers both the conceptual part and the execution of the labs. Please note that this is an estimate to assist with time planning and **may vary** slightly depending on the case. 15 | 16 | ### [Lesson 1](labs/lesson_01/lab01.html): Intro to LLMs and Azure AI Services 17 | *no-code. 1h duration* 18 | - Introduction to LLMs: GPTs and other models. 19 | - LLMOps: applying MLOps principles to LLM Solutions. 20 | - Azure AI Services Overview: 21 | - Azure OpenAI 22 | - Azure AI Studio 23 | - Azure AI PromptFlow 24 | - Azure Content Safety 25 | 26 | ### [Lesson 2](labs/lesson_02/lab02.html): Building LLMs Orchestration Flows 27 | *low-code. 1.5h duration* 28 | - LLM App Orchestration. 29 | - Prompt Flow Standard and Chat flows. 30 | 31 | ### [Lesson 3](labs/lesson_03/lab03.html): Evaluating and Deploying LLMs 32 | *low-code. 1.5h duration* 33 | - Prompt flow Evaluation flows to evaluate LLMs Solutions. 34 | - Generated content metrics: groundedness, relevance, etc. 35 | - Deploying LLMs Flows. 36 | 37 | ### [Lesson 4](labs/lesson_04/lab04.html): Monitoring and Responsible AI 38 | *low-code. 1h duration* 39 | - Monitoring LLMs orchestration flows. 40 | - Generated content performance metrics. 41 | - Operational metrics. 42 | - Content safety to protect your solution. 43 | 44 | ### [Lesson 5](labs/lesson_05/lab05.html): Automating Everything 45 | *full-code. 2h duration* 46 | - Github and Github Actions. 47 | - Evaluation and Deployment Automation. 48 | 49 | ## Extras 50 | 51 | ### [LLM Performance Testing](labs/performance/README.html) 52 | *low-code. 2h duration* 53 | - Benchmarking Azure OpenAI Models 54 | - Load Testing LLM Apps -------------------------------------------------------------------------------- /labs/lesson_01/images/03.01.2024_09.24.25_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/03.01.2024_09.24.25_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_21.44.42_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_21.44.42_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_21.48.36_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_21.48.36_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_21.49.26_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_21.49.26_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_23.10.43_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_23.10.43_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_23.28.26_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_23.28.26_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/06.02.2024_23.31.19_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/06.02.2024_23.31.19_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_10.01.04_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_10.01.04_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_10.48.45_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_10.48.45_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.47.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.47.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.47.41_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.47.41_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.48.05_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.48.05_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.48.46_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.48.46_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.49.04_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.49.04_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.57.15_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.57.15_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.58.21_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.58.21_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_14.59.17_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_14.59.17_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_16.22.06_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_16.22.06_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_16.27.10_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_16.27.10_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/08.04.2024_16.31.56_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/08.04.2024_16.31.56_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_16.22.33_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_16.22.33_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_16.31.47_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_16.31.47_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_16.54.24_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_16.54.24_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_16.55.20_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_16.55.20_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_16.57.31_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_16.57.31_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_22.47.37_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_22.47.37_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/12.03.2024_22.48.11_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/12.03.2024_22.48.11_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_13.35.18_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_13.35.18_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_13.43.27_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_13.43.27_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_16.05.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_16.05.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_16.20.05_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_16.20.05_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_16.29.30_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_16.29.30_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_16.39.52_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_16.39.52_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/16.12.2023_16.48.13_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/16.12.2023_16.48.13_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_18.37.17_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_18.37.17_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.00.00_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.00.00_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.20.01_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.20.01_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.20.26_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.20.26_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.21.07_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.21.07_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.21.55_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.21.55_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.23.02_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.23.02_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.23.22_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.23.22_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.32.33_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.32.33_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.34.52_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.34.52_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/images/17.12.2023_20.36.17_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_01/images/17.12.2023_20.36.17_REC.png -------------------------------------------------------------------------------- /labs/lesson_01/lab01.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 01: Introduction to LLMs and Azure AI Services' 3 | layout: default 4 | nav_order: 1 5 | --- 6 | #### Introduction to LLMs and Azure AI Services 7 | 8 | In this lab, we will have an overview on how to use Azure AI to work with large language models. 9 | 10 | The focus will be more on an overview of the creation process, so that in the next lessons we will delve deeper into the build, evaluation, deployment, and monitoring process. 11 | 12 | #### Prerequisites 13 | 14 | An Azure subscription is required, where you can create an AI Project along with its AI Hub Resource, a Content Safety service, and an AI Search service. 15 | 16 | #### Setup 17 | 18 | - [Create an AI Project and AI Hub Resources](#create-an-ai-project-and-ai-hub-resouces) 19 | - [Deploy an Azure OpenAI model](#deploy-an-azure-openai-model) 20 | 21 | #### Lab Steps 22 | 23 | 1) Use AzureAI Studio Playground. 24 | 2) Work with an Open Source LLM Model. 25 | 3) Test the prompt in Content Safety. 26 | 4) Create a Prompt Flow flow. 27 | 28 | #### Setup 29 | 30 | ##### Create an AI Project and AI Hub Resouces 31 | 32 | Let's start by creating a project in Azure AI Studio. 33 | 34 | Go to your browser and type: https://ai.azure.com 35 | 36 | After logging in with your Azure account, you will see the following screen: 37 | 38 | ![LLMOps Workshop](images/16.12.2023_13.35.18_REC.png) 39 | 40 | In the **Build** tab, select **New AI project** to create a project. 41 | 42 | Choose an unique name for your project. 43 | 44 | ![LLMOps Workshop](images/08.04.2024_14.47.08_REC.png) 45 | 46 | Select the **Create a new resource** link and choose a name for your AI hub where your project resources will be created. 47 | 48 | ![LLMOps Workshop](images/08.04.2024_14.47.41_REC.png) 49 | 50 | > Note: Choose the region where the GPT-4 models and text-embeddings-ada-002 are available. 51 | 52 | Still on this screen, select the **Create a new Azure AI Search** option; this service will be used in the following lessons. 53 | 54 | ![LLMOps Workshop](images/08.04.2024_14.48.05_REC.png) 55 | 56 | Finally, select Create a project for the creation of the resources to be used in your project. 57 | 58 | ![LLMOps Workshop](images/08.04.2024_14.48.46_REC.png) 59 | 60 | ![LLMOps Workshop](images/08.04.2024_14.49.04_REC.png) 61 | 62 | ##### Deploy an Azure OpenAI model 63 | 64 | After creating your AI Project, the first step is to create a deployment of an OpenAI model so you can start experimenting with the prompts you will use in your application. 65 | 66 | To do this, access your newly created project in the **Build** tab of the AI Studio, select the **Deployments** option, and click on **Create (Real-time endpoint)**. 67 | 68 | ![LLMOps Workshop](images/06.02.2024_21.44.42_REC.png) 69 | 70 | From the list of models, select **gpt-4**. 71 | 72 | ![LLMOps Workshop](images/12.03.2024_16.22.33_REC.png) 73 | 74 | On the next screen, define the name of the deployment, in this case, you can use the same name as the model and in the version field select the latest available version, in the example below we chose version **0125-Preview** (gpt4-turbo). 75 | 76 | ![LLMOps Workshop](images/12.03.2024_16.31.47_REC.png) 77 | 78 | > Click on **Advanced Options** and select at least 40K **Tokens per Minute Rate Limit*** to ensure the flows run smoothly in the upcoming lessons. 79 | 80 | Now, just click on **Deploy** and your model deployment is created. You can now test it in the Playground. 81 | 82 | ##### Create a Content Safety Service 83 | 84 | By the end of this lab, you will test with Content Safety. Therefore, click on the following link to create it [https://aka.ms/acs-create](https://aka.ms/acs-create). 85 | 86 | Select the resource group that you previously used for your AI Project. After that, follow the steps presented in the subsequent screens to continue with the creation process, start by clicking on **Review + create** button 87 | 88 | ![LLMOps Workshop](images/08.04.2024_14.57.15_REC.png) 89 | 90 | Then click on **Create** to create your service. 91 | 92 | ![LLMOps Workshop](images/08.04.2024_16.22.06_REC.png) 93 | 94 | Done! The Content Safety service is now created. 95 | 96 | ![LLMOps Workshop](images/08.04.2024_14.58.21_REC.png) 97 | 98 | 99 | #### Lab Steps 100 | 101 | ##### 1) Use AzureAI Studio Playground 102 | 103 | On the screen with the deployment information, select the **Open in playground** button. 104 | 105 | ![LLMOps Workshop](images/16.12.2023_16.29.30_REC.png) 106 | 107 | In this lab, we will run an example where the model will help us summarize and extract information from a conversation between a customer and a representative of a telco company. 108 | 109 | Copy the following prompt into the system message field of the playground: 110 | 111 | ``` 112 | You're an AI assistant that helps telco company to extract valuable information from their conversations by creating JSON files for each conversation transcription you receive. You always try to extract and format as a JSON: 113 | 1. Customer Name [name] 114 | 2. Customer Contact Phone [phone] 115 | 3. Main Topic of the Conversation [topic] 116 | 4. Customer Sentiment (Neutral, Positive, Negative)[sentiment] 117 | 5. How the Agent Handled the Conversation [agent_behavior] 118 | 6. What was the FINAL Outcome of the Conversation [outcome] 119 | 7. A really brief Summary of the Conversation [summary] 120 | 121 | Only extract information that you're sure. If you're unsure, write "Unknown/Not Found" in the JSON file. 122 | ``` 123 | 124 | After copying, select **Apply changes** 125 | 126 | ![LLMOps Workshop](images/06.02.2024_21.48.36_REC.png) 127 | 128 | Then type the following text in the chat session and click the send button: 129 | 130 | ``` 131 | Agent: Hello, welcome to Telco's customer service. My name is Juan, how can I assist you? 132 | Client: Hello, Juan. I'm calling because I'm having issues with my mobile data plan. It's very slow and I can't browse the internet or use my apps. 133 | Agent: I'm very sorry for the inconvenience, sir. Could you please tell me your phone number and your full name? 134 | Client: Yes, sure. My number is 011-4567-8910 and my name is Martín Pérez. 135 | Agent: Thank you, Mr. Pérez. I'm going to check your plan and your data usage. One moment, please. 136 | Client: Okay, thank you. 137 | Agent: Mr. Pérez, I've reviewed your plan and I see that you have contracted the basic plan of 2 GB of data per month. Is that correct? 138 | Client: Yes, that's correct. 139 | Agent: Well, I inform you that you have consumed 90% of your data limit and you only have 200 MB available until the end of the month. That's why your browsing speed has been reduced. 140 | Client: What? How is that possible? I barely use the internet on my cell phone. I only check my email and my social networks from time to time. I don't watch videos or download large files. 141 | Agent: I understand, Mr. Pérez. But keep in mind that some applications consume data in the background, without you realizing it. For example, automatic updates, backups, GPS, etc. 142 | Client: Well, but they didn't explain that to me when I contracted the plan. They told me that with 2 GB I would have enough for the whole month. I feel cheated. 143 | Agent: I apologize, Mr. Pérez. It was not our intention to deceive you. I offer you a solution: if you want, you can change your plan to a higher one, with more GB of data and higher speed. This way you can enjoy a better browsing experience. 144 | Client: And how much would that cost me? 145 | Agent: We have a special offer for you. For only 10 pesos more per month, you can access the premium plan of 5 GB of data and 4G speed. Are you interested? 146 | Client: Mmm, I don't know. Isn't there another option? Can't you give me more speed without charging me more? 147 | Agent: I'm sorry, Mr. Pérez. That's the only option we have available. If you don't change your plan, you'll have to wait until next month to recover your normal speed. Or you can buy an additional data package, but it would be more expensive than changing plans. 148 | Client: Well, let me think about it. Can I call later to confirm? 149 | Agent: Of course, Mr. Pérez. You can call whenever you want. The number is the same one you dialed now. Is there anything else I can help you with? 150 | Client: No, that's all. Thank you for your attention. 151 | Agent: Thank you, Mr. Pérez. Have a good day. Goodbye. 152 | ``` 153 | 154 | ![LLMOps Workshop](images/06.02.2024_21.49.26_REC.png) 155 | 156 | You will see a result generated by the model similar to the one shown in the image below. 157 | 158 | Notice that the model correctly followed the instructions indicated in the System message field: 159 | 160 | ![LLMOps Workshop](images/16.12.2023_16.48.13_REC.png) 161 | 162 | ##### 2) Work with an Open Source LLM Model 163 | 164 | Now let's test an open source Llama2 model from Meta. 165 | 166 | For this, go to the **Deployments** section in the **Build** tab and click on **Create (Real-time endpoint)**. 167 | 168 | ![LLMOps Workshop](images/12.03.2024_16.55.20_REC.png) 169 | 170 | Select the model **Llama-2-13b-chat** and click on **confirm**. 171 | 172 | ![LLMOps Workshop](images/12.03.2024_16.54.24_REC.png) 173 | 174 | Select the **Standard_NC24s_v3** compute for inference with the selected model, for this workshop one instance is enough. 175 | 176 | If you do not have enough quota you can access the Quota option in the Managed tab to request an increase in quota for the selected resource. 177 | 178 | ![LLMOps Workshop](images/12.03.2024_16.57.31_REC.png) 179 | 180 | The creation of the deployment will take a few minutes, the time varies, but generally something between 10 and 20 minutes. 181 | 182 | ![LLMOps Workshop](images/17.12.2023_18.37.17_REC.png) 183 | 184 | Done! Let's test this model by selecting the **Test** option on the deployment page. 185 | 186 | Adjust the ```max_next_tokens``` parameter to 1000 so we can test the same example we used with the gpt-4 model. 187 | 188 | ![LLMOps Workshop](images/12.03.2024_22.47.37_REC.png) 189 | 190 | Now just copy the text below into the "Start typing text box" and then send to observe the response generated by the Llama2 model. 191 | 192 | ``` 193 | { 194 | "input_data": { 195 | "input_string": [ 196 | { 197 | "role": "system", 198 | "content": "You're an AI assistant that helps telco company to extract valuable information from their conversations by creating JSON documents for each conversation transcription you receive. You always try to extract and format as a JSON, fields names between square brackets: 1. Customer Name [name] 2. Customer Contact Phone [phone] 3. Main Topic of the Conversation [topic] 4. Customer Sentiment (Neutral, Positive, Negative)[sentiment] 5. How the Agent Handled the Conversation [agent_behavior] 6. What was the FINAL Outcome of the Conversation [outcome] 7. A really brief Summary of the Conversation [summary] Only extract information that you're sure. If you're unsure, write 'Unknown/Not Found' in the JSON file. Your answers outputs contains only the json document." 199 | }, 200 | { 201 | "role": "user", 202 | "content": "Agent: Hello, welcome to Telco's customer service. My name is Juan, how can I assist you? Client: Hello, Juan. I'm calling because I'm having issues with my mobile data plan. It's very slow and I can't browse the internet or use my apps. Agent: I'm very sorry for the inconvenience, sir. Could you please tell me your phone number and your full name? Client: Yes, sure. My number is 011-4567-8910 and my name is Martín Pérez. Agent: Thank you, Mr. Pérez. I'm going to check your plan and your data usage. One moment, please. Client: Okay, thank you. Agent: Mr. Pérez, I've reviewed your plan and I see that you have contracted the basic plan of 2 GB of data per month. Is that correct? Client: Yes, that's correct. Agent: Well, I inform you that you have consumed 90% of your data limit and you only have 200 MB available until the end of the month. That's why your browsing speed has been reduced. Client: What? How is that possible? I barely use the internet on my cell phone. I only check my email and my social networks from time to time. I don't watch videos or download large files. Agent: I understand, Mr. Pérez. But keep in mind that some applications consume data in the background, without you realizing it. For example, automatic updates, backups, GPS, etc. Client: Well, but they didn't explain that to me when I contracted the plan. They told me that with 2 GB I would have enough for the whole month. I feel cheated. Agent: I apologize, Mr. Pérez. It was not our intention to deceive you. I offer you a solution: if you want, you can change your plan to a higher one, with more GB of data and higher speed. This way you can enjoy a better browsing experience. Client: And how much would that cost me? Agent: We have a special offer for you. For only 10 pesos more per month, you can access the premium plan of 5 GB of data and 4G speed. Are you interested? Client: Mmm, I don't know. Isn't there another option? Can't you give me more speed without charging me more? Agent: I'm sorry, Mr. Pérez. That's the only option we have available. If you don't change your plan, you'll have to wait until next month to recover your normal speed. Or you can buy an additional data package, but it would be more expensive than changing plans. Client: Well, let me think about it. Can I call later to confirm? Agent: Of course, Mr. Pérez. You can call whenever you want. The number is the same one you dialed now. Is there anything else I can help you with? Client: No, that's all. Thank you for your attention. Agent: Thank you, Mr. Pérez. Have a good day. Goodbye." 203 | } 204 | ], 205 | "parameters": { 206 | "temperature": 0.8, 207 | "top_p": 0.8, 208 | "do_sample": true, 209 | "max_new_tokens": 1000 210 | } 211 | } 212 | } 213 | ``` 214 | 215 | You will see a result generated by the model similar to the one shown in the image below. 216 | 217 | ![LLMOps Workshop](images/12.03.2024_22.48.11_REC.png) 218 | 219 | ##### 3) Discover Content Safety 220 | 221 | Now let's test how the Content Safety service can be used in conjunction with an Open Source model with Llama 2. 222 | 223 | First, let's test the behavior of the Azure OpenAI's gpt-4 model, select the **Playground** option in the **Tools** section from the **Build** menu. 224 | 225 | In the playground, make sure the selected model is gpt-4 and copy the following prompt: 226 | 227 | ``` 228 | You're an AI assistant that helps telco company to extract valuable information from their conversations by creating JSON files for each conversation transcription you receive. 229 | 230 | You always try to extract and format as a JSON, fields names between square brackets: 231 | 232 | 1. Customer Name [name] 233 | 2. Customer Contact Phone [phone] 234 | 3. Main Topic of the Conversation [topic] 235 | 4. Customer Sentiment (Neutral, Positive, Negative)[sentiment] 236 | 5. How the Agent Handled the Conversation [agent_behavior] 237 | 6. What was the FINAL Outcome of the Conversation [outcome] 238 | 7. A really brief Summary of the Conversation [summary] 239 | 240 | Conversation: 241 | 242 | Agent: Hi Mr. Perez, welcome to Telco's customer service. My name is Juan, how can I assist you? 243 | Client: Hello, Juan. I am very dissatisfied with your services. 244 | Agent: ok sir, I am sorry to hear that, how can I help you? 245 | Client: I hate this company I will kill everyone with a bomb. 246 | ``` 247 | 248 | Check the response from gpt-4, the Violence filter was triggered with the text. 249 | 250 | ![LLMOps Workshop](images/08.04.2024_10.48.45_REC.png) 251 | 252 | Now in the **Deployments** item in the **Components** section in the **Build** menu, select the deployment of the Llama 2 model and then open the **Test** tab to test with this Input: 253 | 254 | ``` 255 | { 256 | "input_data": { 257 | "input_string": [ 258 | { 259 | "role": "system", 260 | "content": "You're an AI assistant that helps telco company to extract valuable information from their conversations by creating JSON documents for each conversation transcription you receive. You always try to extract and format as a JSON, fields names between square brackets: 1. Customer Name [name] 2. Customer Contact Phone [phone] 3. Main Topic of the Conversation [topic] 4. Customer Sentiment (Neutral, Positive, Negative)[sentiment] 5. How the Agent Handled the Conversation [agent_behavior] 6. What was the FINAL Outcome of the Conversation [outcome] 7. A really brief Summary of the Conversation [summary] Only extract information that you're sure. If you're unsure, write 'Unknown/Not Found' in the JSON file. Your answers outputs contains only the json document." 261 | }, 262 | { 263 | "role": "user", 264 | "content": "Agent: Hi Mr. Perez, welcome to Telco's customer service. My name is Juan, how can I assist you? Client: Hello, Juan. I am very dissatisfied with your services. Agent: ok sir, I am sorry to hear that, how can I help you? Client: I hate this company I will kill everyone with a bomb." 265 | } 266 | ], 267 | "parameters": { 268 | "temperature": 0.8, 269 | "top_p": 0.8, 270 | "do_sample": true, 271 | "max_new_tokens": 1000 272 | } 273 | } 274 | } 275 | ``` 276 | 277 | Notice the result of the model, content was not blocked. 278 | 279 | ![LLMOps Workshop](images/08.04.2024_10.01.04_REC.png) 280 | 281 | To see how the Content Safety service can help you filter this type of content, select **Content Safety Studio** from the **All Azure AI** drop-down menu in the top right corner. 282 | 283 | ![LLMOps Workshop](images/06.02.2024_23.28.26_REC.png) 284 | 285 | Select the same service that you created in the **Setup** section of this lab and click on **Use resource** 286 | 287 | ![LLMOps Workshop](images/08.04.2024_14.59.17_REC.png) 288 | 289 | Upon reaching the following screen, choose **Try it out** in the **Moderate text content** box. 290 | 291 | ![LLMOps Workshop](images/06.02.2024_23.31.19_REC.png) 292 | 293 | Paste the same text used earlier into the **2. Test** field and then select **Run Test**, you will see how the Violence filter is triggered with the provided content. 294 | 295 | ![LLMOps Workshop](images/17.12.2023_20.00.00_REC.png) 296 | 297 | ##### 4) Create a Prompt Flow flow 298 | 299 | Great, now that you have seen how you can deploy models, test them in the playground, and also seen a bit of how Content Safety works, let's see how you can create an orchestration flow for your LLM application in Prompt Flow. 300 | 301 | To start, let's go back to the Playground with the gpt-4 model, add the same system message that we used in the initial test and then click on the **Customize in prompt flow** option. 302 | 303 | ``` 304 | You're an AI assistant that helps telco company to extract valuable information from their conversations by creating JSON files for each conversation transcription you receive. You always try to extract and format as a JSON: 305 | 1. Customer Name [name] 306 | 2. Customer Contact Phone [phone] 307 | 3. Main Topic of the Conversation [topic] 308 | 4. Customer Sentiment (Neutral, Positive, Negative)[sentiment] 309 | 5. How the Agent Handled the Conversation [agent_behavior] 310 | 6. What was the FINAL Outcome of the Conversation [outcome] 311 | 7. A really brief Summary of the Conversation [summary] 312 | 313 | Only extract information that you're sure. If you're unsure, write "Unknown/Not Found" in the JSON file. 314 | ``` 315 | 316 | ![LLMOps Workshop](images/17.12.2023_20.20.01_REC.png) 317 | 318 | By doing this, you will create a new flow in Prompt Flow. 319 | 320 | Click **Open** to open your newly created flow. 321 | 322 | ![LLMOps Workshop](images/17.12.2023_20.20.26_REC.png) 323 | 324 | In the following figure, on the right side, a single node represents the step in the flow where the LLM model is called. 325 | 326 | ![LLMOps Workshop](images/17.12.2023_20.21.07_REC.png) 327 | 328 | Observe that the Playground's configuration for deployment, prompt, and parameters like temperature and max_tokens were used to populate the created flow. 329 | 330 | To execute the flow within the Studio, you'll require a Runtime. To initiate it, simply choose the "Start" option from the Runtime dropdown menu. 331 | 332 | ![LLMOps Workshop](images/03.01.2024_09.24.25_REC.png) 333 | 334 | Done! Now just select the started Runtime and click on the blue **Chat** button to test your flow in the chat window. 335 | 336 | ![LLMOps Workshop](images/17.12.2023_20.34.52_REC.png) 337 | 338 | Paste the same content used in the initial Playground test and send it in the chat, you will see the expected result as can be seen in the next image: 339 | 340 | ``` 341 | Agent: Hello, welcome to Telco's customer service. My name is Juan, how can I assist you? 342 | Client: Hello, Juan. I'm calling because I'm having issues with my mobile data plan. It's very slow and I can't browse the internet or use my apps. 343 | Agent: I'm very sorry for the inconvenience, sir. Could you please tell me your phone number and your full name? 344 | Client: Yes, sure. My number is 011-4567-8910 and my name is Martín Pérez. 345 | Agent: Thank you, Mr. Pérez. I'm going to check your plan and your data usage. One moment, please. 346 | Client: Okay, thank you. 347 | Agent: Mr. Pérez, I've reviewed your plan and I see that you have contracted the basic plan of 2 GB of data per month. Is that correct? 348 | Client: Yes, that's correct. 349 | Agent: Well, I inform you that you have consumed 90% of your data limit and you only have 200 MB available until the end of the month. That's why your browsing speed has been reduced. 350 | Client: What? How is that possible? I barely use the internet on my cell phone. I only check my email and my social networks from time to time. I don't watch videos or download large files. 351 | Agent: I understand, Mr. Pérez. But keep in mind that some applications consume data in the background, without you realizing it. For example, automatic updates, backups, GPS, etc. 352 | Client: Well, but they didn't explain that to me when I contracted the plan. They told me that with 2 GB I would have enough for the whole month. I feel cheated. 353 | Agent: I apologize, Mr. Pérez. It was not our intention to deceive you. I offer you a solution: if you want, you can change your plan to a higher one, with more GB of data and higher speed. This way you can enjoy a better browsing experience. 354 | Client: And how much would that cost me? 355 | Agent: We have a special offer for you. For only 10 pesos more per month, you can access the premium plan of 5 GB of data and 4G speed. Are you interested? 356 | Client: Mmm, I don't know. Isn't there another option? Can't you give me more speed without charging me more? 357 | Agent: I'm sorry, Mr. Pérez. That's the only option we have available. If you don't change your plan, you'll have to wait until next month to recover your normal speed. Or you can buy an additional data package, but it would be more expensive than changing plans. 358 | Client: Well, let me think about it. Can I call later to confirm? 359 | Agent: Of course, Mr. Pérez. You can call whenever you want. The number is the same one you dialed now. Is there anything else I can help you with? 360 | Client: No, that's all. Thank you for your attention. 361 | Agent: Thank you, Mr. Pérez. Have a good day. Goodbye. 362 | ``` 363 | 364 | Results: 365 | 366 | ![LLMOps Workshop](images/17.12.2023_20.36.17_REC.png) 367 | 368 | 369 | #### Removing your Llama 2 deployment 370 | 371 | In this lab, you've used a **Standard_NC24s_v3** SKU to deploy your Llama2 model. To prevent incurring high costs, it's recommended to delete this deployment now since it won't be used in the next labs. 372 | 373 | To do this, select **Delete deployment** on the screen with the Llama2 deployment. 374 | 375 | ![LLMOps Workshop](images/08.04.2024_16.31.56_REC.png) 376 | 377 | Click on **Delete**, as shown in the following screen, to complete the removal. 378 | 379 | ![LLMOps Workshop](images/08.04.2024_16.27.10_REC.png) 380 | -------------------------------------------------------------------------------- /labs/lesson_02/files/surface-pro-4-user-guide-EN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/files/surface-pro-4-user-guide-EN.pdf -------------------------------------------------------------------------------- /labs/lesson_02/images/04.01.2024_19.22.29_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/04.01.2024_19.22.29_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/04.01.2024_19.35.49_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/04.01.2024_19.35.49_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_10.41.56_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_10.41.56_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_10.42.40_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_10.42.40_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_10.56.42_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_10.56.42_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_10.57.15_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_10.57.15_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_16.39.01_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_16.39.01_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_19.14.16_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_19.14.16_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/07.02.2024_19.15.13_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/07.02.2024_19.15.13_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/13.03.2024_01.22.07_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/13.03.2024_01.22.07_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/13.03.2024_10.31.21_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/13.03.2024_10.31.21_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/13.03.2024_10.47.05_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/13.03.2024_10.47.05_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/13.03.2024_10.47.56_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/13.03.2024_10.47.56_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/14.03.2024_15.00.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/14.03.2024_15.00.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_22.11.22_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_22.11.22_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_22.12.07_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_22.12.07_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_22.14.04_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_22.14.04_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_22.55.51_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_22.55.51_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_22.58.42_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_22.58.42_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_23.49.29_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_23.49.29_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/17.12.2023_23.58.57_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/17.12.2023_23.58.57_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/18.12.2023_00.02.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/18.12.2023_00.02.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/18.12.2023_00.06.51_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/18.12.2023_00.06.51_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/18.12.2023_00.13.52_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/18.12.2023_00.13.52_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/26.02.2024_10.00.05_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/26.02.2024_10.00.05_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/26.02.2024_10.29.13_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/26.02.2024_10.29.13_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/26.02.2024_10.36.22_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/26.02.2024_10.36.22_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/images/26.02.2024_10.52.27_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_02/images/26.02.2024_10.52.27_REC.png -------------------------------------------------------------------------------- /labs/lesson_02/lab02.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 02: Building LLMs Orchestration Flows' 3 | layout: default 4 | nav_order: 3 5 | --- 6 | #### Building LLMs Orchestration Flows 7 | 8 | Learn how to build prompt flow orchestrations for your LLM App. 9 | 10 | #### Prerequisites 11 | 12 | An Azure subscription where you can create an AI Hub Resource and a AI Search service. 13 | 14 | #### Setup 15 | 16 | If you are running this Lab after lesson 1, you don't need to worry about this step. Otherwise, follow **Setup** from **Lesson 1** to create a project and its associated resources in Azure AI Studio, as well as to deploy the GPT-4 model. 17 | 18 | #### Lab Steps 19 | 20 | During this lab, we will cover the following steps: 21 | 22 | 1) Create a standard classification flow. 23 | 2) Create a conversational RAG flow. 24 | 25 | ##### 1) Create a standard classification flow 26 | 27 | Open your web browser and navigate to: https://ai.azure.com 28 | 29 | Select the AI project that you created during the setup. Then, in the **Build** menu, select the **Prompt flow** option and click on the blue **Create** button. 30 | 31 | ![LLMOps Workshop](images/17.12.2023_22.11.22_REC.png) 32 | 33 | We will create a Standard flow for classifying websites on the internet. 34 | 35 | In the flow creation window, select the **Standard flow** filter in the **Explore gallery** section. 36 | 37 | Then, click on the **Clone** button in the Web Classification box. 38 | 39 | ![LLMOps Workshop](images/17.12.2023_22.12.07_REC.png) 40 | 41 | Use the default name for the flow, or if you prefer, define a name of your preference and click on **Clone**. 42 | 43 | ![LLMOps Workshop](images/04.01.2024_19.22.29_REC.png) 44 | 45 | A Standard flow will be created with the following structure: 46 | 47 | ![LLMOps Workshop](images/17.12.2023_22.14.04_REC.png) 48 | 49 | Notice that the flow has five nodes, the first `fetch_text_content_from_url` is a python node to extract the text from a Web page. 50 | 51 | Then the content obtained by the extraction serves as input for an LLM node `summarize_text_content` to summarize the content. 52 | 53 | The summarization, combined with the classification examples generated by a python node `prepare_examples` is the input for another LLM node `classify_with_llm` where the classification is performed. 54 | 55 | At the end, we have a Python node `convert_to_dict` responsible for formatting the output of the flow in a python dictionary format. 56 | 57 | Now that the flow has been created, we need a runtime to execute it in the Prompt Flow. 58 | 59 | Select **Start** in the runtime dropdown to start a runtime to run your flow: 60 | 61 | ![LLMOps Workshop](images/04.01.2024_19.35.49_REC.png) 62 | 63 | After starting the Runtime, we need to define the Connection with the LLM for each LLM step. In our case, these are `summarize_text_content` and `classify_with_llm`. 64 | 65 | We will use the `Default_AzureOpenAI` Connection, which connects to the Azure OpenAI resource that was created when the Azure AI project was set up. 66 | 67 | Select `gpt-4` in the deployment_name. This deployment was created during Setup. 68 | 69 | ![LLMOps Workshop](images/17.12.2023_23.49.29_REC.png) 70 | 71 | Associate the same Connection for the `classify_with_llm` step: 72 | 73 | ![LLMOps Workshop](images/17.12.2023_23.58.57_REC.png) 74 | 75 | > Note: you can leave the `response_format` field in blank or select the `{"type":"text"}`. 76 | 77 | Once the Runtime is selected and the Connections are configured, you can start the flow by clicking the **Run** button at the top of the page. 78 | 79 | ![LLMOps Workshop](images/17.12.2023_22.55.51_REC.png) 80 | 81 | The input required for the flow execution is specified in its input section. 82 | 83 | ![LLMOps Workshop](images/17.12.2023_22.58.42_REC.png) 84 | 85 | After finishing the execution you will see that the flow is complete with all steps. 86 | 87 | ![LLMOps Workshop](images/18.12.2023_00.02.08_REC.png) 88 | 89 | You can view the result of the processing by clicking the last node. 90 | 91 | ![LLMOps Workshop](images/18.12.2023_00.06.51_REC.png) 92 | 93 | ##### 2) Create a conversational RAG flow 94 | 95 | Now you will create a conversational flow using the RAG pattern, start by creating a new flow in the **Prompt Flow** item in the **Tools** section within the **Build** tab. 96 | 97 | Select the **Multi-Round Q&A** on Your Data template after clicking the **Create** button. 98 | 99 | ![LLMOps Workshop](images/18.12.2023_00.13.52_REC.png) 100 | 101 | Click on the **Clone** button. A flow with the following structure will be created. 102 | 103 | ![LLMOps Workshop](images/26.02.2024_10.00.05_REC.png) 104 | 105 | Start the automatic runtime by selecting **Start** in the **Runtime** drop down. The runtime will be useful for you to work with the flow moving forward. 106 | 107 | ![LLMOps Workshop](images/13.03.2024_10.31.21_REC.png) 108 | 109 | Click the **Save** button to save your flow. 110 | 111 | ![LLMOps Workshop](images/13.03.2024_01.22.07_REC.png) 112 | 113 | ###### 2.1) Flow overview 114 | 115 | The first node, `modify_query_with_history`, produces a search query using the user's question and their previous interactions. Next, in the `lookup` node, the flow uses the vector index to conduct a search within a vector store, which is where the RAG pattern retrieval step takes place. Following the search process, the `generate_prompt_context` node consolidates the results into a string. This string then serves as input for the `Prompt_variants` node, which formulates various prompts. Finally, these prompts are used to generate the user's answer in the `chat_with_context` node. 116 | 117 | ###### 2.2) Search index 118 | 119 | Before you can start running your flow, a crucial step is to establish the search index for the Retrieval stage. This search index will be provided by the Azure AI Search service. 120 | 121 | The AI Search service was originally created in the **Setup** section of this lab. If you have not yet created the Search service, you will need to set one up as explained below. With the search service created, you can now proceed to create the index. 122 | 123 | In our case, we will create a **Vector index**. To do this, you just need to go back to the project in the **AI Studio**, select the **Indexes** option, and then click on the **New index** button. 124 | 125 | ![LLMOps Workshop](images/07.02.2024_10.41.56_REC.png) 126 | 127 | At the `Source data` stage, select the `Upload files/folders` option and upload the PDF `files/surface-pro-4-user-guide-EN.pdf` to the data folder of this lab, as shown in the next screen. 128 | 129 | ![LLMOps Workshop](images/07.02.2024_10.42.40_REC.png) 130 | 131 | In `Index storage`, select the Search Service you created earlier. 132 | 133 | > If someone has created the AI Search service for you, you can also use it to create the index. Simply select it in the **Select Azure AI Search service** option. 134 | 135 | ![LLMOps Workshop](images/07.02.2024_10.56.42_REC.png) 136 | 137 | Under `Search settings`, select **Add vector search to this ...** as indicated in the following image. 138 | 139 | ![LLMOps Workshop](images/07.02.2024_10.57.15_REC.png) 140 | 141 | In `Index settings`, keep the default options as indicated below. 142 | 143 | ![LLMOps Workshop](images/07.02.2024_16.39.01_REC.png) 144 | 145 | > Note: If you want to select a virtual machine configuration, click on the **Select from recommended options**. If you don't select, the default configuration will use serverless processing. 146 | 147 | Great, now just click on the **Create** button at the `Review and finish` stage. 148 | 149 | The indexing job will be created and submitted for execution, so please wait a while for it to complete. 150 | 151 | It may take about 10 minutes from the time it enters the execution queue until it starts. 152 | 153 | Wait until the index status is `Completed` as in the next image, before proceeding with the next steps. 154 | 155 | ![LLMOps Workshop](images/26.02.2024_10.29.13_REC.png) 156 | 157 | Done! You have created the index, as can be seen in the **Indexes** item of the **Components** section. 158 | 159 | ![LLMOps Workshop](images/13.03.2024_10.47.56_REC.png) 160 | 161 | Now return to the RAG flow created in **Prompt flow** to configure the `lookup` node. 162 | 163 | After selecting the `lookup` node, click on `mlindex_content`. 164 | 165 | ![LLMOps Workshop](images/26.02.2024_10.52.27_REC.png) 166 | 167 | A **Generate** window will appear. In this window, select the `Registered Index` option from the `index_type` field. Then, choose version 1 of the index you just created, as shown in the following image. After making these selections, click on **Save**. 168 | 169 | ![LLMOps Workshop](images/13.03.2024_10.47.05_REC.png) 170 | 171 | Now, let's go back to the `lookup` node. Select the `Hybrid (vector + keyword)` option from the query_type field, as shown in the subsequent image. 172 | 173 | ![LLMOps Workshop](images/26.02.2024_10.36.22_REC.png) 174 | 175 | ###### 2.3) Updating connection information 176 | 177 | Now you will need to update the Connections of the nodes that link with LLM models. 178 | 179 | Starting with the Connection in the `modify_query_with_history` node with the gpt-4 deployment, as indicated below: 180 | 181 | ![LLMOps Workshop](images/07.02.2024_19.14.16_REC.png) 182 | 183 | And the Connection for the `chat_with_context node` with the gpt-4 deployment, as indicated below: 184 | 185 | ![LLMOps Workshop](images/07.02.2024_19.15.13_REC.png) 186 | 187 | ###### 2.4) Testing your RAG flow 188 | 189 | Everything is now set up for you to initiate your chat flow. Simply click on the blue **Chat** button located at the top right corner of your page to begin interacting with the flow. 190 | 191 | ![LLMOps Workshop](images/14.03.2024_15.00.08_REC.png) -------------------------------------------------------------------------------- /labs/lesson_03/data.csv: -------------------------------------------------------------------------------- 1 | chat_history,question,answer,documents 2 | "[]","What does Windows 10 Provides?","Windows 10 offers a variety of new features and numerous possibilities for both entertainment and productivity whether you are at school home or on the move.","Windows 10 provides new features and many options for entertainment and productivity at school at home or while you re on the go To learn more about Windows; see Get started with Windows 10 on Windows.com." 3 | "[]","How much RAM does Surface Pro 4 can support?","Surface Pro 4 is available with up to 16 GB of RAM.","Memory and storage Surface Pro 4 is available in configurations with up to 16 GB of RAM and 512 GB storage. See Surface storage on Surface.com for info on available disk space." 4 | "[]","How do I check the battery level on my Surface Pro 4?","You can check the battery level from the lock screen or the desktop taskbar. On the lock screen; the battery status appears in the lower-right corner when you wake your Surface. On the desktop taskbar; select the battery icon for info about the charging and battery status; including the percent remaining.","Check the battery level You can check the battery level from the lock screen or the desktop: Lock screen. When you wake your Surface; the battery status appears in the lower-right corner of the lock screen. Desktop taskbar. Battery status appears at the right side of the taskbar. Select the battery icon for info about the charging and battery status; including the percent remaining." 5 | "[]","What processor does the Surface Pro 4 have?","The Surface Pro 4 is equipped with a 6th-generation Intel Core processor; providing speed and power for smooth; fast performance.","Processor The 6th-generation Intel Core processor provides speed and power for smooth; fast performance." 6 | "[]","Can I use a pen with the Surface Pro 4?","Yes; the Surface Pro 4 comes with the Surface Pen; which offers a natural writing experience. You can use it to launch Cortana; open OneNote; jot down notes; or take screenshots.","Surface Pen Enjoy a natural writing experience with a pen that feels like an actual pen. Use Surface Pen to launch Cortana in Windows or open OneNote and quickly jot down notes or take screenshots." 7 | "[]","How can I extend the storage on my Surface Pro 4?","You can extend your storage using OneDrive; USB drives; and microSD cards.","Storage and OneDrive Surface Pro 4 is available with up to 16 GB of RAM and 512 GB storage. You can extend your storage options by using OneDrive; USB drives; and microSD cards." 8 | "[]","What ports are available on the Surface Pro 4?","The Surface Pro 4 has a full-size USB 3.0 port; a microSD card slot; a Mini DisplayPort; a 3.5 mm headset jack; and cover connectors for the Type Cover.","Ports and connectors Surface Pro 4 has the ports you expect in a full-feature laptop. Full-size USB 3.0 port; MicroSD card slot; Mini DisplayPort version 1.2; 3.5 mm headset jack; Cover connectors." 9 | "[]","Does the Surface Pro 4 support Wi-Fi and Bluetooth?","Yes; the Surface Pro 4 supports standard Wi-Fi protocols (802.11a/b/g/n/ac) and Bluetooth 4.0.","Wi-Fi and Bluetooth® Surface Pro 4 supports standard Wi-Fi protocols (802.11a/b/g/n/ac) and Bluetooth 4.0." 10 | "[]","Can I use my Surface Pro 4 to take photos or record videos?","Yes; the Surface Pro 4 has two cameras for taking photos and recording video: an 8-megapixel rear-facing camera with autofocus and a 5-megapixel; high-resolution; front-facing camera.","Cameras Surface Pro 4 has two cameras for taking photos and recording video: an 8-megapixel rear-facing camera with autofocus and a 5-megapixel; high-resolution; front-facing camera." 11 | "[]","How do I care for the touchscreen on my Surface Pro 4?","To protect the touchscreen; clean it frequently with a soft; lint-free cloth dampened with water or eyeglass cleaner; keep it covered when not in use; and avoid leaving it in direct sunlight for long periods.","Touchscreen care Scratches; oil; dust; chemicals; and ultraviolet light can affect the performance of your touchscreen." 12 | "[]","Can I connect my Surface Pro 4 to a monitor or TV?","Yes; you can connect your Surface Pro 4 to an HDTV; monitor; or projector using the Mini DisplayPort or wirelessly with a compatible adapter or Microsoft Wireless Display Adapter.","Connect or project to a monitor; screen; or other display You can connect your Surface to a TV; monitor; or projector." 13 | "[]","Does the Surface Pro 4 come with a warranty?","Yes; the Surface Pro 4 comes with a warranty. For warranty information; you can visit the Surface warranty page on Surface.com.","Warranty For warranty info; see Surface warranty and Surface warranty documents on Surface.com." 14 | "[]","How do I set up my Surface Pro 4?","To set up your Surface Pro 4; press the power button to turn it on and then follow the Windows setup process.","Set up your Surface Pro 4 Press the power button to turn on your Surface Pro 4. Windows starts and guides you through the setup process." 15 | "[]","What operating system does the Surface Pro 4 run?","The Surface Pro 4 runs Windows 10 Pro operating system.","Software Windows 10 Pro operating system Windows 10 provides new features and many options for entertainment and productivity at school; at home; or while you’re on the go." 16 | "[]","How can I personalize my Surface Pro 4?","You can personalize your Surface Pro 4 by adjusting settings such as your account settings; lock screen; or sync settings; and by customizing the Start menu and action center.","Personalization and settings Settings control nearly everything about how your Surface looks and works." 17 | "[]","How do I take care of the Type Cover for my Surface Pro 4?","To clean the Type Cover; wipe it with a lint-free cloth dampened in mild soap and water. Do not apply liquids directly to the Cover.","Cover and keyboard care The Type Cover for your Surface Pro 4 requires minimal care to function well." 18 | "[]","Can I install additional apps on my Surface Pro 4?","Yes; you can install more apps from the Windows Store; websites; or from a CD or DVD.","Get more apps Ready to get even more apps? You can install more apps and programs from the Windows Store; websites; or a CD or DVD." 19 | "[]","What accessibility features does the Surface Pro 4 have?","The Surface Pro 4 offers features like Narrator; Magnifier; high contrast themes; and closed captions to make it easier to use.","Accessibility Ease of Access features let you use your Surface the way you want." 20 | "[]","How do I register my Surface Pro 4 for warranty and support?","To register your Surface Pro 4; go to microsoft.com/surface/support/register and sign in with your Microsoft account. You will need the serial number of your Surface Pro 4.","Register your Surface If you haven’t already registered your Surface Pro 4 and other Surface products; you can do so at microsoft.com/surface/support/register." 21 | "[]","What are the camera specs on the Surface Pro 4?","The Surface Pro 4 has an 8-megapixel rear-facing camera with autofocus and a 5-megapixel; high-resolution; front-facing camera. Both cameras record video in 1080p with a 16:9 aspect ratio.","Cameras Surface Pro 4 has two cameras for taking photos and recording video: an 8-megapixel rear-facing camera with autofocus and a 5-megapixel; high-resolution; front-facing camera. Both cameras record video in 1080p; with a 16:9 aspect ratio." -------------------------------------------------------------------------------- /labs/lesson_03/images/05.01.2024_00.43.51_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/05.01.2024_00.43.51_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/05.01.2024_01.31.10_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/05.01.2024_01.31.10_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/05.01.2024_01.36.19_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/05.01.2024_01.36.19_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/05.01.2024_01.44.01_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/05.01.2024_01.44.01_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/07.02.2024_23.37.47_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/07.02.2024_23.37.47_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/13.03.2024_10.31.21_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/13.03.2024_10.31.21_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_16.04.30_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_16.04.30_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_16.05.01_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_16.05.01_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_22.29.58_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_22.29.58_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_23.08.34_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_23.08.34_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_23.11.52_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_23.11.52_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/14.03.2024_23.12.25_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/14.03.2024_23.12.25_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/15.03.2024_00.36.03_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/15.03.2024_00.36.03_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/15.03.2024_00.52.20_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/15.03.2024_00.52.20_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/15.03.2024_01.21.34_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/15.03.2024_01.21.34_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/15.03.2024_01.28.00_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/15.03.2024_01.28.00_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.14.59_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.14.59_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.18.12_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.18.12_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.21.02_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.21.02_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.24.46_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.24.46_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.43.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.43.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.51.33_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.51.33_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/images/26.02.2024_23.54.35_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_03/images/26.02.2024_23.54.35_REC.png -------------------------------------------------------------------------------- /labs/lesson_03/lab03.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 03: Evaluating and Deploying LLMs' 3 | layout: default 4 | nav_order: 4 5 | --- 6 | #### Evaluating and Deploying LLMs 7 | 8 | #### Prerequisites 9 | 10 | An Azure subscription where you can create an AI Hub Resource and a AI Search service. 11 | 12 | #### Setup 13 | 14 | If you are running this Lab after lesson 1, you don't need to worry about this step. Otherwise, follow **Setup** from **Lesson 1** to create a project and its associated resources in Azure AI Studio, as well as to deploy the GPT-4 model. 15 | 16 | #### Lab Steps 17 | 18 | In this Lab, you will execute the following steps: 19 | 20 | 1) Evaluate your Chat flow. 21 | 22 | 2) Deploy the RAG flow to an online managed endpoint. 23 | 24 | ##### 1) Evaluate your Chat flow 25 | 26 | Go to your browser and type: https://ai.azure.com 27 | 28 | Select the project created earlier and choose the **Prompt flow** item in the **Tools** section of the **Build** tab. 29 | 30 | ###### 1.1) Prepare you chat flow for evaluation 31 | 32 | For the RAG flow that you created earlier to be evaluated, you must include additional information to the output node of this flow, specifically the context used to generate the answer. 33 | 34 | This information will be used by the Evaluation Flow. To do this, just follow these steps: 35 | 36 | In the Flows section of **Prompt Flow**, open the `Multi-Round Q&A on Your Data` flow that you created in the previous lab. This will be the flow we use for evaluation. 37 | 38 | ![LLMOps Workshop](images/26.02.2024_23.43.08_REC.png) 39 | 40 | Create a new output named `documents` in the Outputs node. This output will represent the documents that were retrieved in the `lookup` node and subsequently formatted in the `generate_prompt_context` node. 41 | 42 | Assign the output of the `generate_prompt_context` node to the `documents` output, as shown in the image below. 43 | 44 | ![LLMOps Workshop](images/07.02.2024_23.37.47_REC.png) 45 | 46 | Click **Save** before moving to the next section. 47 | 48 | ###### 1.2) Create your evaluation flows 49 | 50 | Still in the **Prompt flow** item in the **Tools** section of the **Build** tab, click on the blue **Create** button. 51 | 52 | ![LLMOps Workshop](images/05.01.2024_00.43.51_REC.png) 53 | 54 | Select the **Evaluation Flow** filter and click on **Clone** on the **QnA Groundedness Evaluation** card. 55 | 56 | ![LLMOps Workshop](images/26.02.2024_23.14.59_REC.png) 57 | 58 | Click on the other **Clone** button to create a copy of the flow. 59 | 60 | ![LLMOps Workshop](images/26.02.2024_23.18.12_REC.png) 61 | 62 | A flow will be created with the following structure: 63 | 64 | ![LLMOps Workshop](images/26.02.2024_23.21.02_REC.png) 65 | 66 | Update the `Connection` field to point to a gpt-4 deployment in `groundedness_score` node also update max_tokens to `1000` as shown in the next figure. 67 | 68 | ![LLMOps Workshop](images/26.02.2024_23.24.46_REC.png) 69 | 70 | After updating the connection information, click on **Save** in the evaluation flow and navigate to the Flows section in **Prompt Flow** item. 71 | 72 | Now, you will repeat the same steps described so far in this **section 1.2** to create **two** additional evaluation flows, one `QnA Relevance Evaluation` and another `QnA GPT Similarity Evaluation`. The two images below show where these flows are in the prompt flow gallery. 73 | 74 | > You will repeat **section 1.2** steps twice since you will need to create two additional evaluation flows. 75 | 76 | > Note that the LLM nodes, where you will set the Azure OpenAI connection for each flow, have slightly different names: **relevance_score** and **similarity_score**, respectively. 77 | 78 | QnA Relevance Evaluation: 79 | 80 | ![LLMOps Workshop](images/14.03.2024_16.04.30_REC.png) 81 | 82 | 83 | QnA GPT Similarity Evaluation: 84 | 85 | ![LLMOps Workshop](images/14.03.2024_16.05.01_REC.png) 86 | 87 | 88 | ###### 1.3) Run the evaluation 89 | 90 | In the Flows section of **Prompt Flow**, open the `Multi-Round Q&A on Your Data` flow that you created in the previous lab. This will be the flow we use for evaluation. 91 | 92 | Start the automatic runtime by selecting **Start** in the **Runtime** drop down. The runtime will be useful for you to work with the flow moving forward. 93 | 94 | ![LLMOps Workshop](images/13.03.2024_10.31.21_REC.png) 95 | 96 | Now select the **Custom evaluation** option in the Evaluate menu. 97 | 98 | ![LLMOps Workshop](images/05.01.2024_01.31.10_REC.png) 99 | 100 | In the `Prompt_variants` option, select the option to run only **two variants** to avoid reaching your GPT-4 model quota limit, as shown in the example image below. 101 | 102 | ![LLMOps Workshop](images/15.03.2024_00.36.03_REC.png) 103 | 104 | Select **Add new data**. 105 | 106 | ![LLMOps Workshop](images/26.02.2024_23.51.33_REC.png) 107 | 108 | Upload the file data.csv inside the lesson_03 folder. 109 | 110 | ![LLMOps Workshop](images/26.02.2024_23.54.35_REC.png) 111 | 112 | After clicking on **Add** proceed to map the input fields as shown below: 113 | 114 | ![LLMOps Workshop](images/05.01.2024_01.36.19_REC.png) 115 | 116 | Select the three evaluation flows you just created. 117 | 118 | ![LLMOps Workshop](images/14.03.2024_22.29.58_REC.png) 119 | 120 | Great job so far! Now, let's move on to the next step. Click on **Next** to set up the `question`, `context`, `ground_truth` and `answer` fields for each evaluation flow. You can see how to do this in the three images below. 121 | 122 | > **Note:** Please take a moment to ensure you've selected the correct value. It's crucial for accurate metric calculation. Notice that the default values initially presented in the wizard are not the same as those indicated in the following images. Keep up the good work! 123 | 124 | **QnA GPT Similarity Evaluation** 125 | 126 | ![LLMOps Workshop](images/14.03.2024_23.08.34_REC.png) 127 | 128 | **QnA Groundedness Evaluation** 129 | 130 | ![LLMOps Workshop](images/14.03.2024_23.11.52_REC.png) 131 | 132 | **QnA Relevance Evaluation** 133 | 134 | ![LLMOps Workshop](images/14.03.2024_23.12.25_REC.png) 135 | 136 | Click on **Submit** to start the evaluation. 137 | 138 | ![LLMOps Workshop](images/05.01.2024_01.44.01_REC.png) 139 | 140 | The evaluation process has started. To view all evaluations (one per variant), please navigate to the **Evaluation** section under the **Build** tab. 141 | 142 | ![LLMOps Workshop](images/15.03.2024_00.52.20_REC.png) 143 | 144 | Upon selecting specific evaluation results, you will have the ability to view their detailed information. 145 | 146 | You can also select **Switch to dashboard view** to access a dashboard that provides a tabular and visual comparison between the rounds of different variations, as shown in the following images. 147 | 148 | *Table comparison* 149 | 150 | ![LLMOps Workshop](images/15.03.2024_01.28.00_REC.png) 151 | 152 | *Chart comparison* 153 | 154 | ![LLMOps Workshop](images/15.03.2024_01.21.34_REC.png) 155 | 156 | ##### 2) Deploy the RAG flow to an online managed endpoint 157 | 158 | Open the **Multi-Round Q&A on Your Data** flow that you created in the previous lab. 159 | 160 | After opening the flow, follow the instructions indicated in this link: 161 | 162 | https://learn.microsoft.com/en-us/azure/ai-studio/how-to/flow-deploy -------------------------------------------------------------------------------- /labs/lesson_04/lab04.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 04: Monitoring' 3 | layout: default 4 | nav_order: 5 5 | --- 6 | #### Monitoring 7 | 8 | #### Prerequisites 9 | 10 | An Azure subscription where you can create an AI Hub Resource and a AI Search service. 11 | 12 | #### Setup 13 | 14 | If you are running this Lab after lesson 1, you don't need to worry about this step. 15 | 16 | Otherwise, follow **Setup** from **Lesson 1** to create a project and its associated resources in Azure AI Studio, as well as to deploy the GPT-4 model. 17 | 18 | #### Lab Steps 19 | 20 | In this Lab, you will execute the following steps: 21 | 22 | 1) Monitoring your LLMs flow. 23 | 24 | 2) Add Content Safety to your Solution. 25 | 26 | ##### 1) Monitoring your LLMs flow 27 | 28 | Modify the output node of the workflow to incorporate the required information for computing the metrics that need monitoring, as outlined in the [User Experience](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/monitor-quality-safety#user-experience) section of the Monitor Quality and Safety of Deployed Applications documentation. Be sure to activate monitoring by selecting the "Enable" button within the Model Monitoring section when deploying the workflow. 29 | 30 | ##### 2) Add Content Safety to your Solution 31 | 32 | Create a basic flow from scratch (a simple Joke flow will be constructed). 33 | 34 | Make the following adjustments to the flow as per the instructions below: 35 | 36 | - Insert a [Content Safety tool](https://learn.microsoft.com/en-us/azure/machine-learning/prompt-flow/tools-reference/content-safety-text-tool) node between the input and the language model (LLM). 37 | 38 | - Add a Python node to process the output from the Content Safety tool and determine whether to proceed with the standard flow or not. For guidance on creating a conditional flow in Prompt Flow, refer to [this example](https://github.com/microsoft/promptflow/tree/main/examples/flows/standard/conditional-flow-for-if-else). 39 | 40 | - Add a Python node to craft a default response. 41 | 42 | - Implement conditions to trigger one response or another, depending on the Content Safety's result. 43 | 44 | - Develop a final node to compile and deliver the response to the user. -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_10.57.37_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_10.57.37_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_11.05.21_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_11.05.21_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_11.19.43_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_11.19.43_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_11.25.08_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_11.25.08_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_11.26.33_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_11.26.33_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_11.27.15_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_11.27.15_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_16.33.50_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_16.33.50_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_16.37.50_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_16.37.50_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/01.01.2024_16.58.05_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/01.01.2024_16.58.05_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/02.01.2024_11.52.51_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/02.01.2024_11.52.51_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/02.01.2024_15.49.11_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/02.01.2024_15.49.11_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/02.01.2024_16.08.03_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/02.01.2024_16.08.03_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/02.01.2024_18.33.42_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/02.01.2024_18.33.42_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/09.01.2024_23.54.37_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/09.01.2024_23.54.37_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/09.01.2024_23.56.21_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/09.01.2024_23.56.21_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/09.01.2024_23.58.24_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/09.01.2024_23.58.24_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/10.01.2024_00.17.52_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/10.01.2024_00.17.52_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/10.01.2024_00.27.51_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/10.01.2024_00.27.51_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/10.01.2024_00.35.42_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/10.01.2024_00.35.42_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/16.01.2024_23.13.29_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/16.01.2024_23.13.29_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/31.12.2023_13.19.23_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/31.12.2023_13.19.23_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/31.12.2023_13.23.18_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/31.12.2023_13.23.18_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/31.12.2023_13.27.45_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/31.12.2023_13.27.45_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/31.12.2023_13.30.59_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/31.12.2023_13.30.59_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/31.12.2023_13.32.17_REC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/31.12.2023_13.32.17_REC.png -------------------------------------------------------------------------------- /labs/lesson_05/images/git_workflow_branching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/git_workflow_branching.png -------------------------------------------------------------------------------- /labs/lesson_05/images/git_workflow_pipelines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/git_workflow_pipelines.png -------------------------------------------------------------------------------- /labs/lesson_05/images/large-language-model-operations-prompt-flow-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/lesson_05/images/large-language-model-operations-prompt-flow-process.png -------------------------------------------------------------------------------- /labs/lesson_05/lab05.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 05: Automating Everything' 3 | layout: default 4 | nav_order: 6 5 | --- 6 | 7 | #### Lesson 5: Automating Everything 8 | 9 | ##### Scenario 10 | 11 | In this exercise, you'll learn to automate the build, evaluation, and deployment of your LLM orchestration flow. We'll use the [**LLMOps Accelerator**](https://github.com/azure/llmops) as a guide for deploying an LLM with **Prompt Flow** and **GitHub Actions**. This automation is vital for Telco Company, enabling efficient, error-free deployment processes for their 24/7 support virtual agent. 12 | 13 | By mastering these tasks, Telco's team will boost their ability to manage AI projects independently, ensuring smooth operations and maximizing the benefits of Azure AI services for their healthcare solutions. 14 | 15 | ##### Exercise Steps 16 | 17 | 1. [Bootstrapping a New Project](https://github.com/Azure/llmops/blob/main/documentation/bootstrapping.md) 18 | 2. [Delivering a New Feature](https://github.com/Azure/llmops/blob/main/documentation/delivering_new_feature.md) 19 | 20 | Refer to the following sections for details on the Git Workflow and Pipelines used in this workshop: 21 | 22 | ###### Git Workflow 23 | 24 | The image below illustrates the workflow used in the workshop. We'll explore how to deliver a new feature based on this workflow. In this example, we are developing a feature called "Feature X," which will be included in the project's release 1.0.0. 25 | 26 | ![Git Workflow](images/git_workflow_branching.png) 27 | 28 | ###### Detailed Workflow Description: 29 | 30 | 1. **Feature Branch Creation:** 31 | 32 | The process starts when the development team creates a feature branch from the `develop` branch. This branch is dedicated to the development of the new feature X. 33 | 34 | 2. **Pull Request (PR):** 35 | 36 | Upon completing the feature, a Pull Request (PR) is initiated from the feature branch to the `develop` branch, which is the default branch where the team integrates changes. 37 | 38 | The creation of the PR triggers a *PR Evaluation Pipeline* to ensure that the code adheres to standards, passes unit tests, and the orchestration flow is evaluated by AI to ensure it meets quality metrics. 39 | 40 | 3. **Merge to develop:** 41 | 42 | Once the Pull Request is approved, it is merged into the `develop` branch. This merge triggers the *Continuous Integration (CI) Pipeline*, which builds the orchestration flow and conducts AI-assisted evaluations using a comprehensive test dataset based on the [Golden Dataset](https://aka.ms/copilot-golden-dataset-guide). Upon successful completion, the *Continuous Deployment (CD) Pipeline* is executed to deploy the flow to the **dev** environment. 43 | 44 | 4. **Release Branch Creation (Release/1.0.0):** 45 | 46 | After confirming the stability of the `develop` branch through testing in **dev**, a release branch is created from `develop`. This triggers a *Continuous Deployment (CD) Pipeline* to deploy the application to the **qa** environment. Before deployment, an AI-based evaluation assesses [quality](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/flow-evaluate-sdk), risk, and [safety](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/simulator-interaction-data) factors. The application in **qa** is then used for User Acceptance Testing (UAT) and [red-teaming](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/red-teaming) or LLM App. 47 | 48 | 5. **Pull Request to main:** 49 | 50 | After UAT tests in the **qa** environment confirm that the application is ready for production, a Pull Request (PR) is created to merge the changes into the `main` branch. 51 | 52 | 6. **Merge to main:** 53 | 54 | Once the Pull Request (PR) to the `main` branch is manually approved, the release branch is merged into the `main` branch. This action triggers the Continuous Deployment (CD) Pipeline, which deploys the code to the **prod** environment. 55 | 56 | ##### CI/CD Pipelines 57 | 58 | The CI/CD (Continuous Integration/Continuous Deployment) pipelines automate integration, evaluation, and deployment processes, ensuring efficient delivery of high-quality applications. 59 | 60 | ![Pipelines](images/git_workflow_pipelines.png) 61 | 62 | - **The Pull Request Evaluation Pipeline** begins with unit tests, followed by a code review, and concludes with AI-assisted prompt evaluation to validate code changes before integration. 63 | 64 | - **The Continuous Integration Pipeline** starts with unit tests and code reviews, followed by AI-assisted flow evaluation to identify potential issues. The application is then built, and the flow image is registered for deployment. 65 | 66 | - **The Continuous Deployment Pipeline** operates across three environments: dev, qa, and prod. Provisioning of resources is performed when necessary, and the deployment of the application is executed in the respective environment. 67 | 68 | - **In the dev environment**, the latest code is pulled, and the application is deployed for the development team's testing. 69 | 70 | - **In the qa environment**, the code is retrieved, and AI-assisted evaluations for quality and safety are conducted, followed by integration testing. The application is then deployed and made available for User Acceptance Testing (UAT). 71 | 72 | - **In the prod environment**, the same image built in the Continuous Integration Pipeline is deployed, ensuring consistency and reliability. Integration testing is conducted, and smoke testing ensures functionality post-deployment. 73 | 74 | This structured approach streamlines workflows, reduces errors, and guarantees the efficient delivery of applications to production. -------------------------------------------------------------------------------- /labs/performance/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Lab 06: Performance Evaluation' 3 | layout: default 4 | nav_order: 7 5 | has_children: true 6 | --- 7 | 8 | #### LLM Performance Testing 9 | 10 | Welcome to the practical part of the performance testing lesson. This lab is divided into two separate tasks. Each one is designed to give you practical experience with performance testing. Below, you'll find a brief overview of each task, along with links to detailed instructions. If you want to dive deeper into how to evaluate the performance of LLM models, check out the [Performance Evaluation](docs/PERFTEST_CONCEPTS.md) section. 11 | 12 | #### Task 1: Benchmarking Azure OpenAI Models 13 | 14 | In this task, you'll get hands-on experience with the [Azure OpenAI Benchmarking Tool](https://github.com/Azure/azure-openai-benchmark). This tool is an invaluable asset for gauging the performance of Azure OpenAI deployments. It proves particularly useful in the initial phases of a project, helping developers determine if the model deployment is appropriately scaled. Moreover, it enables comparisons between various Azure OpenAI deployments. 15 | 16 | For a step-by-step guide on how to proceed with this task, please follow the [**Task 1 Instructions**](docs/AOAI_BENCH_TOOL.md). 17 | 18 | #### Task 2: Load Testing LLM Apps 19 | 20 | In this task, you'll be working with a reference Language Model (LLM) application. This application, based on the RAG pattern, is available in a separate repository. Your objective is to deploy this application and then carry out load testing using Azure Load Testing. This hands-on task will provide you with practical experience in managing load testing for LLM applications, mirroring real-world scenarios. 21 | 22 | For a step-by-step guide on how to proceed with this task, please follow the [**Task 2 Instructions**](https://github.com/Azure/GPT-RAG/blob/main/docs/LOAD_TESTING.md). -------------------------------------------------------------------------------- /labs/performance/benchmark.parameters.template: -------------------------------------------------------------------------------- 1 | export OPENAI_API_KEY=[replace with your Azure OpenAI API key] 2 | AOAI_ENDPOINT=https://[replace with your Azure OpenAI resource name].openai.azure.com/ 3 | AOAI_DEPLOYMENT=[replace with your Azure OpenAI deployment name] 4 | TEST_NAME=[replace with your Test Run name, example: ptu-gpt4-eastus] 5 | ERROR_FILE=error.err 6 | CONTEXT_TOKENS=4500 7 | MAX_TOKENS=500 8 | RETRY=none 9 | RATE=4 10 | DURATION=180 -------------------------------------------------------------------------------- /labs/performance/benchmark_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 1) Load Test Results" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import pandas as pd\n", 18 | "import json\n", 19 | "\n", 20 | "# Get a list of all log files in the directory\n", 21 | "log_files = [f for f in os.listdir() if f.endswith('.log')]\n", 22 | "\n", 23 | "# Create a dictionary to hold all dataframes\n", 24 | "dfs = {}\n", 25 | "\n", 26 | "for file in log_files:\n", 27 | " data = []\n", 28 | " with open(file, 'r') as f:\n", 29 | " for line in f:\n", 30 | " data.append(json.loads(line))\n", 31 | " \n", 32 | " df = pd.json_normalize(data, sep='_')\n", 33 | "\n", 34 | " # Remove the .log extension and use the filename as the key in the dictionary\n", 35 | " dfs[file[:-4]] = df\n", 36 | "\n", 37 | "# Print the number of test runs\n", 38 | "print(f\"Number of test runs: {len(dfs)}\")\n", 39 | "\n", 40 | "# Print the name of each test run\n", 41 | "for name in dfs.keys():\n", 42 | " print(name)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### 2) Summarize Test Runs" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "from tabulate import tabulate\n", 59 | "\n", 60 | "# Create a list to hold the last row of each DataFrame\n", 61 | "summary_data = []\n", 62 | "\n", 63 | "for name, df in dfs.items():\n", 64 | " # Check if the DataFrame is empty\n", 65 | " if not df.empty:\n", 66 | " # Get the last row of the DataFrame\n", 67 | " last_row = df.iloc[-1:]\n", 68 | " \n", 69 | " # Add the name of the DataFrame as the first column\n", 70 | " last_row.insert(0, 'DataFrame', name)\n", 71 | " \n", 72 | " # Append the last row to the summary_data list\n", 73 | " summary_data.append(last_row)\n", 74 | "\n", 75 | "# Convert the list of last rows into a DataFrame\n", 76 | "summary_df = pd.concat(summary_data)\n", 77 | "\n", 78 | "# Print summary_df in a tabular format\n", 79 | "print('Execution Summary:')\n", 80 | "print(tabulate(summary_df, headers='keys', tablefmt='psql', showindex=False))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### 3) Create some trend charts" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "import matplotlib.pyplot as plt\n", 97 | "import numpy as np\n", 98 | "\n", 99 | "# Increase the global font size\n", 100 | "plt.rcParams.update({'font.size': 16})\n", 101 | "\n", 102 | "# List of fields to plot\n", 103 | "fields = ['requests', 'failures', 'e2e_avg', 'ttft_avg', 'tbt_avg', 'util_avg']\n", 104 | "\n", 105 | "# Determine the number of rows and columns for the subplots\n", 106 | "nrows = len(fields)\n", 107 | "ncols = len(dfs)\n", 108 | "\n", 109 | "# Increase the height of each chart to 10 inches\n", 110 | "fig, axs = plt.subplots(nrows, ncols, figsize=(2 * ncols * 6, nrows * 10))\n", 111 | "fig.subplots_adjust(hspace=0.5)\n", 112 | "\n", 113 | "# Loop over each field, each dataframe, and each axes\n", 114 | "for i, field in enumerate(fields):\n", 115 | " for j, (name, df) in enumerate(dfs.items()):\n", 116 | " # Check if 'run_seconds' and field exist in df\n", 117 | " if 'run_seconds' in df.columns and field in df.columns:\n", 118 | " # Convert 'run_seconds' and field to numeric type and drop rows with NaN values\n", 119 | " df['run_seconds'] = pd.to_numeric(df['run_seconds'], errors='coerce')\n", 120 | " df[field] = pd.to_numeric(df[field], errors='coerce')\n", 121 | " df = df.dropna(subset=['run_seconds', field])\n", 122 | "\n", 123 | " # Create a line plot on each axes\n", 124 | " if ncols > 1:\n", 125 | " ax = axs[i, j]\n", 126 | " else:\n", 127 | " ax = axs[i]\n", 128 | "\n", 129 | " ax.plot(df['run_seconds'], df[field])\n", 130 | "\n", 131 | " # Set the title of the plot to the name of the dataframe\n", 132 | " ax.set_title(name)\n", 133 | "\n", 134 | " # Set the labels for the x-axis and y-axis\n", 135 | " ax.set_xlabel('run_seconds')\n", 136 | " ax.set_ylabel(field)\n", 137 | " else:\n", 138 | " print(f\"'run_seconds' or '{field}' not found in DataFrame {name}\")\n", 139 | "\n", 140 | "# Display the plots\n", 141 | "plt.show()" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "kernelspec": { 147 | "display_name": "aoai_benchmarking", 148 | "language": "python", 149 | "name": "python3" 150 | }, 151 | "language_info": { 152 | "codemirror_mode": { 153 | "name": "ipython", 154 | "version": 3 155 | }, 156 | "file_extension": ".py", 157 | "mimetype": "text/x-python", 158 | "name": "python", 159 | "nbconvert_exporter": "python", 160 | "pygments_lexer": "ipython3", 161 | "version": "3.10.14" 162 | } 163 | }, 164 | "nbformat": 4, 165 | "nbformat_minor": 2 166 | } 167 | -------------------------------------------------------------------------------- /labs/performance/docs/AOAI_BENCH_TOOL.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Benchmarking Azure OpenAI Models' 3 | layout: default 4 | nav_order: 1 5 | parent: 'Lab 06: Performance Evaluation' 6 | --- 7 | #### Benchmarking Azure OpenAI Models 8 | 9 | The [Azure OpenAI Benchmarking Tool](https://github.com/Azure/azure-openai-benchmark) is a powerful resource for evaluating the performance of Azure OpenAI deployments. It proves especially beneficial during the early stages of a project, aiding developers in assessing whether the model deployment is correctly sized and facilitating comparisons between different Azure OpenAI deployments. 10 | 11 | The tool delivers comprehensive latency metrics and simulates various traffic patterns. If you're interested in learning more about the benchmarking tool and LLM Performance testing, please visit the [LLM Apps Performance Evaluation](PERFTEST_CONCEPTS.md) page. 12 | 13 | In this Lab, you will learn how to set up and run a load test on an Azure OpenAI model deployment using the Azure OpenAI Benchmarking Tool. You will also learn how to interpret the results of the test and adjust parameters for different testing scenarios. 14 | 15 | ##### Test Scope 16 | 17 | For simplicity and educational purposes, this lab will focus on the PayGo endpoint using a Standard deployment of the gpt-35-turbo-16k model. However, the same procedures learned here can also be applied to gpt-4 models and Provisioned Throughput Units (PTU) deployments. 18 | 19 | ##### Test Architecture 20 | 21 | In our testing setup, we primarily use the gpt-35-turbo-16k model in a standard deployment with a quota of 20K TPM (Tokens Per Minute). 22 | 23 | In the example provided in this guide, you will deploy the resource in the East US (eastus) Azure region. However, remember that you're free to choose any region that works best for you. 24 | 25 | For accurate testing, run the Azure OpenAI Benchmarking Tool in the same region as your final application deployment. This mirrors the typical scenario where the client and the Azure OpenAI model deployment are in the same region. 26 | 27 | Running the test from the same region as your deployment not only provides a more realistic scenario, but it also eliminates any variations in latency that might occur if you were to run the test from your home network. 28 | 29 | For the sake of simplicity in this lab, you'll operate the tool from your own workstation. 30 | 31 | ##### Test Scenario 32 | 33 | This test scenario involves running a load test on an Azure OpenAI model deployment using the AOAI Benchmarking tool. Initially, the test mimics a situation where 4 requests are sent every minute, following a custom shape profile. Subsequently, we increase the number of requests per minute to 50. 34 | 35 | Each request will contain 4500 context tokens and will generate a maximum of 500 tokens. No retries will be made for failed requests. The test runs for a total of 180 seconds (3 minutes). 36 | 37 | While this guide presents a simple scenario, you're encouraged to experiment with the parameters to simulate and test different scenarios after the initial run. 38 | 39 | ##### Prerequisites 40 | 41 | Before you begin, ensure you have the necessary tools and services installed as listed in this section. 42 | 43 | - VS Code to run the analysis notebook: [Download VS Code](https://code.visualstudio.com/Download) 44 | 45 | - Python 3.11 to run the benchmarking tool: [Download Python](https://www.python.org/downloads/release/python-3118/) 46 | 47 | - Git: [Download Git](https://git-scm.com/downloads) 48 | 49 | ##### Lab Setup 50 | 51 | After finishing the prerequisites, you can start setting up the lab. 52 | 53 | ###### Python environment setup 54 | 55 | 1. **Clone the Workshop Repository:** If you haven't already, clone the LLMOps workshop repository to your local machine using the following command: 56 | 57 | ```bash 58 | git clone git@github.com:microsoft/llmops-workshop.git 59 | ``` 60 | 61 | 2. **Navigate to the lab directory:** Open your terminal and navigate to the `llmops-workshop/labs/performance` directory. This is where you cloned the repository in the previous step. 62 | 63 | Next, let's set up your Python environment. This includes the libraries you'll use for executing tests and analyzing results. If you're using a Python environment manager like [Conda](https://docs.anaconda.com/free/miniconda/), create a new environment with Python 3.11, as demonstrated in steps 3 and 4. If you're not using Conda, that's okay. Just make sure you have Python 3.11 installed on your machine and proceed to step 5. 64 | 65 | 3. Create the Python environment with Conda. 66 | 67 | ```bash 68 | conda create -n aoai_benchmarking python=3.11 69 | ``` 70 | 71 | 4. **Activate the Environment:** 72 | 73 | ```bash 74 | conda activate aoai_benchmarking 75 | ``` 76 | 77 | 5. **Install the Required libraries:** Install the libraries that will be used in this lab. 78 | 79 | ```bash 80 | pip install -r requirements.txt 81 | ``` 82 | 83 | 6. **Clone the Benchmarking Tool repository:** 84 | 85 | ```bash 86 | git clone git@github.com:Azure/azure-openai-benchmark.git 87 | ``` 88 | 89 | 7. **Install the Libraries used by the Benchmarking Tool:** 90 | 91 | ```bash 92 | pip install -r azure-openai-benchmark/requirements.txt 93 | ``` 94 | 95 | Great job! Your Python environment is now ready for this lab. 96 | 97 | ###### Azure Resources Setup 98 | 99 | 1. **Deploy the gpt-35-turbo Model:** For this lab, you'll need a gpt-35-turbo model deployment. If you haven't already done so, follow the instructions in [Resource and Model Deployment](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource) to create one. Initially, deploy the model with a quota of 20K TPMs. 100 | 101 | 2. **Configure Diagnostic Settings (Optional):** Configure Diagnostic Settings to send log data from your Azure OpenAI resource to Azure Monitor. This helps analyze server metrics post-test. Follow the steps in [this link](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/monitoring#configure-diagnostic-settings) to set it up. 102 | 103 | Great job! Your Azure resources are now ready for this lab. 104 | 105 | ##### Running the Test 106 | 107 | To run the test, follow these steps: 108 | 109 | 1. **Navigate to the lab directory:** Open your terminal and navigate to the `llmops-workshop/labs/performance` directory. This is where you cloned the repository in the previous step. 110 | 111 | At this point, you can continue executing in the terminal or open this directory in VS Code for a more convenient experience: 112 | 113 | ```bash 114 | code . 115 | ``` 116 | 117 | 2. For the test execution, rename the file [`benchmark.parameters.template`](benchmark.parameters.template) to `benchmark.parameters`. 118 | 119 | This file contains the necessary parameters for the benchmarking tool to connect to the deployment endpoint, as well as information related to the scenario that will be reproduced. 120 | 121 | 3. Update the `benchmark.parameters` file: 122 | 123 | Open the `benchmark.parameters` file and update the first four parameters of the file: 124 | 125 | ``` 126 | OPENAI_API_KEY=[replace with your Azure OpenAI API key] 127 | AOAI_ENDPOINT=https://[replace with your Azure OpenAI resource name].openai.azure.com/ 128 | AOAI_DEPLOYMENT=[replace with your Azure OpenAI deployment name] 129 | TEST_NAME=[replace with your Test Run name] 130 | ``` 131 | 132 | In `TEST_NAME` use **paygo-gpt35-eastus-4RPM**, for example: 133 | 134 | `TEST_NAME=paygo-gpt35-eastus-4RPM` 135 | 136 | For the other three parameters, replace the placeholders with the corresponding values for your case. 137 | 138 | 4. Run the test 139 | 140 | In the terminal, execute the following command according to the type of shell used: 141 | 142 | **bash** 143 | 144 | ``` 145 | ./runtest.sh 146 | ``` 147 | 148 | 5. Update the deployment quota to 200K TPM and repeat steps 3 and 4, but use paygo-gpt35-eastus-50RPM in the `TEST_NAME` field in step 3. 149 | 150 | ##### Analyzing the Results 151 | 152 | To analyze the results of your test, follow these two steps: 153 | 154 | 1. **Analyze Azure OpenAI Metrics:** Navigate to your Azure OpenAI resource in the Azure portal. In the Monitoring section, select the Metrics option. Here, you can analyze metrics such as Azure OpenAI Requests and Blocked errors. 155 | 156 | 2. **Run the Analysis Notebook:** Execute the [`benchmark_analysis.ipynb`](../benchmark_analysis.ipynb) file to analyze the test results further. Follow the instructions in the notebook to complete this step. -------------------------------------------------------------------------------- /labs/performance/docs/PERFTEST_CONCEPTS.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Performance Evaluation' 3 | layout: default 4 | nav_order: 1 5 | parent: 'Lab 06: Performance Evaluation' 6 | --- 7 | # Performance Evaluation 8 | 9 | When developing Language Model (LLM) applications, we typically invest a significant amount of time in both development and evaluation. This is to ensure that the application can generate high-quality responses that are grounded in reliable sources and pose no harm to the user. 10 | 11 | However, the effectiveness of an LLM application's user experience is determined not only by the quality of responses but also by how fast the user gets these responses. Therefore, our discussion centers on the evaluation of LLM applications designed for rapid response times. 12 | 13 | 14 | 15 | The aim of **performance evaluation** is to proactively test the application to identify and address performance issues before they impact end-users. In the subsequent sections, we will explore performance evaluation in detail. We will discuss building an effective strategy, mastering evaluation techniques, and provide practical guides. Here's what you can expect: 16 | 17 | - [Building an Effective Strategy](#building-an-effective-strategy) 18 | - [Mastering Evaluation Techniques](#mastering-evaluation-techniques) 19 | - [How-To Guides](#how-to-guides) 20 | 21 | ## Building an Effective Strategy 22 | 23 | Each application has unique characteristics, such as user count, transaction volume, and expected response time. Therefore, it's crucial for you to establish an effective evaluation strategy tailored to the specific application you're evaluating. 24 | 25 | Before initiating the tests, you need to outline your strategy, which includes determining the aspects to test and the methods to use. This section provides a detailed discussion on these considerations. 26 | 27 | ### Identifying What to Evaluate 28 | 29 | Let's start by defining what you are going to test. For example, if the application is fully implemented and running in an environment similar to production, you can conduct a comprehensive load test. This allows you to measure performance and anticipate the user experience before the application is released to end users. 30 | 31 | Testing the entire application is a good idea as it provides a measure of response times that closely mirrors what a user will experience when interacting with the application. However, a user's interaction with a Large Language Model (LLM) App involves several elements. These include the application frontend, backend, networking, the LLM model, and other cloud services like databases and AI services. 32 | 33 | This is particularly true for modern application architectures. With this, you have the opportunity to perform performance tests on specific service even before the entire application is completed and ready for deployment. For example, you can preemptively test the performance of the Large Language Model (LLM) that you will use in the application, even before it is ready. In the [Mastering Evaluation Techniques](#mastering-evaluation-techniques) section, you will see how you can test the performance of a model deployed in the Azure OpenAI service. 34 | 35 | Now let's take a look at an example of an application architecture where you have multiple services working together to produce a response for the user. 36 | 37 | **Retrieval Augmented Generation** (RAG) is an architectural pattern frequently used in the development of Large Language Model (LLM) Applications, such as ChatGPT. Before making a call to the LLM to generate content, this architecture includes a retrieval step, which is crucial in providing grounding data. The [Enterprise RAG architecture](https://aka.ms/gpt-rag) offers a practical example of the RAG pattern implemented in an enterprise setting. In the [How-To Guides](#how-to-guides) section, you will see an example of how to perform load testing on an LLM application based on the RAG pattern. 38 | 39 | ![Architecture Overview](../media/perftest-GPT-RAG-Basic-communication.png) 40 |

Example of communication between the components of an LLM App based on the RAG pattern.

41 | 42 | > Note: To simplify the diagram, we did not represent the return messages. 43 | 44 | This figure illustrates the orchestration flow within an LLM application based on RAG. 45 | 46 | Here's how it works: 47 | 48 | 1) The user interacts with the frontend UI to pose a question. 49 | 2) The frontend service forwards the user's question to the Orchestrator. 50 | 3) The Orchestrator retrieves the user's conversation history from the database. 51 | 4) The Orchestrator accesses the AI Search key stored in the Key Vault. 52 | 5) The Orchestrator retrieves relevant documents from the AI Search index. 53 | 6) The Orchestrator uses Azure OpenAI to generate a user response. 54 | 55 | Each step in the process involves data transfer and processing across various services, all contributing to the total response time. In such scenarios, you can evaluate not just the overall application response time, but also the performance of individual components, like the response times of the Azure OpenAI model deployment. 56 | 57 | Ultimately, the scope of testing depends on each application's specific requirements. For instance, an internal application and a public-facing application may have different performance needs. While a response time of 15 seconds might be acceptable in an internal HR application used to view paychecks, a contact center app with hundreds of users might need to respond much faster due to its high user demand and SLAs. Make sure you know the requirements of your application before starting your performance evaluation. 58 | 59 | ### Test Scenario 60 | 61 | Once you have defined what you will evaluate, it's crucial to define the test scenario. This will not be like timing how long it takes for completions to appear in the playground, how long a Prompt flow's flow takes to execute, or the duration of code execution in VS Code. These metrics, which are based on a single user's experience, can help identify potential performance bottlenecks in specific components. However, to truly gauge the performance under real-world conditions, your test scenario should simulate the actual usage load of the application. 62 | 63 | In performance testing, after deciding what to measure, it's essential to define the test scenario accurately. This scenario won't be as simple as measuring the time it takes for completions to appear in the model playground or the execution time of a flow in Prompt Flow. While these metrics, based on a single user's experience, can help pinpoint potential performance issues in specific components, they don't provide a complete picture. To truly understand the performance under real-world conditions, your test scenario should mimic the actual usage load of the application. 64 | 65 | First, we need to determine the load that will be placed on the application. This load is defined in terms of throughput, which is the number of requests the application will receive within a specific time frame, such as **Requests per Minute** (RPM). 66 | 67 | There are multiple ways to estimate the expected throughput. If the application is already operational, you can use its current usage data, gathered from monitoring tools, as a reference. The subsequent figure illustrates this approach. If you foresee an increase in usage due to the integration of LLM into the solution, you should adjust your throughput estimation to accommodate this anticipated growth. 68 | 69 | ![Users per hour](../media/perftest-users-per-hour.png) 70 |

Example of usage scenario, see the peak load from 10h to 13h hours.

71 | 72 | When dealing with a new application, estimating the expected throughput can be approached through benchmarking or usage modeling. Benchmarking involves comparing your application with similar ones that serve the same target audience. By studying their usage patterns, you can get a rough estimate of the expected throughput for your application. 73 | 74 | Usage modeling, on the other hand, requires you to create a model of the expected usage patterns of your application. This can be achieved by interacting with stakeholders or potential users in the specific field for which the application is being developed. Their insights can provide a better understanding of how the application might be used, which can assist in estimating the Requests Per Minute (RPM). 75 | 76 | One approach to model your application usage is starting by identifying the **total number of users**. This should encompass all registered, or potential users of your application. Then, identify the number of these **users** who are active **during peak usage times**. Focusing on peak usage times is crucial as off-peak data may not accurately reflect system performance, particularly for systems with distinct high usage periods. 77 | 78 | Next, estimate the average number of times a user will use the application during peak times. This is referred to as **sessions**. Also, estimate the number of actions or **interactions** a user makes during a session. Each interaction corresponds to a request made to the application. 79 | 80 | For example, consider a mobile app for a shopping mall. Users are likely to have multiple interactions in a single session. They might first search for recommended restaurants, then ask about the operating hours of a specific restaurant. Each of these actions is an interaction. 81 | 82 |

83 | Sample user session 84 |
85 | Example of a user session. 86 |

87 | 88 | Once you have the number of users (**u**), the percentage (**p**) of them that will use the application during the peak usage hours (**n**), the number of user sessions (**s**), and the average number of interactions (**i**) they have with the application, you can use the following formula to derive the RPM, considering these are sufficient to run a load test. 89 | 90 | `RPM = (u * p * s * i) / n / 60` 91 | 92 | Taking the previous example of the Mall App, let's consider a set of **10,000 registered users** on the App. We expect that during peak hours, **10%** of the users will interact at least once with the application to obtain information, such as store locations or product details. 93 | 94 | In this case, we have: 95 | - **u=10000** (total users) 96 | - **p=0.1** (percentage of active users) 97 | - **s=1** (sessions per user) 98 | - **i=2** (interactions per session) 99 | - **n=1** (network calls per interaction) 100 | 101 | Therefore, the expected throughput for the peak hours is approximately **17 RPM**. 102 | 103 | > **Note:** During testing, you may want to reproduce a load that is about 10% higher than estimated to be more conservative. 104 | 105 | Defining the scenario for larger applications can become complex, requiring the identification of distinct user roles and their usage behavior. However, if exact modeling is challenging due to lack of information, just keep things simple, make an educated guess, and validate it with the application's stakeholders. 106 | 107 | Another factor to consider when defining your test scenario is that LLM response times depend on the sizes of prompts and completions. Accurate testing requires replicating real usage scenarios, matching these sizes. For instance, RAG prompts are typically larger due to context text, while proofreading apps usually have similar prompt and completion sizes. 108 | 109 | #### Test Data 110 | 111 | Performance testing heavily relies on the data used during the test execution. It's crucial to use data that closely mirrors real-world scenarios. For instance, if we're testing an application like a copilot, the test results would be more accurate if each user asks different questions. Even if users ask the same questions, they should phrase them differently. 112 | 113 | Consider a scenario where each virtual user asks the exact same question during the test. This could lead to results that don't accurately represent real-world usage. Certain components of the application might leverage caching mechanisms to deliver faster responses, skewing the results. Furthermore, the final metric, typically an average or a percentile, will be biased towards the repeated question. 114 | 115 | Having experts in the App domain contribute to the creation of the test data set can greatly enhance its quality and relevance. Their knowledge can help shape more realistic and relevant examples. Alternatively, a Large Language Model (LLM) can be utilized to generate a synthetic dataset. This approach can be particularly useful for running tests, as it allows for the creation of diverse and comprehensive data scenarios. This practice not only enhances the quality of the tests but also ensures that they cover a wide range of potential real-world situations. 116 | 117 | #### Test Measurements 118 | 119 | Performance testing requires identifying key metrics. A common one is **Response Time**, the total time from sending a request to receiving a response. Performance requirements are typically determined by this metric, such as needing an average response time under ten seconds, or 95% of responses within ten seconds. 120 | 121 | However, response time is not the only metric of interest. To gain a holistic understanding of the application's performance and the factors affecting it, we can categorize the metrics into two groups. 122 | 123 | The first group comprises metrics that can be measured from the client's perspective - what the client can observe and capture. The second group consists of metrics that are measured by monitoring the server's performance. Let's explore each of these groups in detail: 124 | 125 | ##### Client metrics 126 | 127 | When testing an LLM App, we usually obtain the following client metrics: 128 | 129 | | Metric | Description | 130 | |----------------------------|----------------------------------------------------| 131 | | Number of Virtual Users | This metric shows the virtual user count during a load test, helping assess application performance under different user loads. | 132 | | Requests per Second | This is the rate at which requests are sent to the LLM App during the load test. It's a measure of the load your application can handle. | 133 | | Request Response Time | This is the time taken by the application to respond to a request.| 134 | | Number of Failed Requests | This is the count of requests that failed during the load test. It helps identify the reliability of your application under stress. | 135 | 136 | > **Note:** "Request response time" is the duration from when a client sends a request to when it receives the full response, often called "end-to-end response time". It includes all processing time within the system. However, the "request response time" only measures the interval between sending a request and receiving the response. It does not account for any client-side processing time, such as rendering a webpage or executing JavaScript in a web application. 137 | 138 | The diagram below illustrates how processing and communication times add up to the total response time. In the figure, each Tn marks a specific time during processing. T1 is when the user initiates a request through a client, such as a browser or MS Teams. T10 is when the user gets the full response. Note that the total response time (from T1 to T10) depends on the processing and response times of all components involved in the request. 139 | 140 | ![Users per hour](../media/perftest-response-time.png) 141 |

Simplified example of the breakdown of request response time.

142 | 143 | 144 | ###### Performance Metrics for a LLM 145 | 146 | When conducting performance testing directly on a specific service, we can collect specific client-side metrics for the target service. In the context of performance testing a Language Model (LLM), we should consider metrics related to prompt tokens and response tokens. For instance, consider the deployment of an OpenAI model on Azure. The following table presents some of these metrics, which offer valuable insights into the client's interaction with the model deployment and its performance under load. 147 | 148 | | Metric | Description | 149 | |----------------------------------|--------------------------------------------------------------| 150 | | Number Prompt Tokens per Minute | Rate at which the client sends prompts to the OpenAI model. | 151 | | Number Generated Tokens per Min | Rate at which the OpenAI model generates response tokens. | 152 | | Time to First Token (TTFT) | Latency from the start of the client's request until the first response token is generated. | 153 | | Time Between Tokens (TBT) | Time interval between consecutive response tokens being generated. | 154 | 155 | > **Note:** To examine the time intervals between tokens in Azure OpenAI's responses, you can utilize its streaming feature. Unlike conventional API calls that deliver the entire response at once, streaming sends each token or a set of tokens to the client as soon as they are produced. This allows for real-time performance monitoring and detailed analysis of the dynamics of response generation. 156 | 157 | The diagram below provides a simplified view of a client's interaction with a model endpoint. The interaction commences at the moment (`T0`) when the client sends a request to the model's endpoint. The model responds in streaming mode, with `T1`, `T2`, and `TN` representing the moments when the first, second, and last tokens are received, respectively. 158 | 159 | In this scenario, we define several key metrics: **Time to First Token (TTFT)** is `T1 - T0`, **Time Between Tokens (TBT)** is `T2 - T1`, and the **end-to-end response time** is `TN - T0`. It's important to note that in streaming mode, the model's responses can arrive in multiple parts, each with several tokens. This makes both the diagram and the metrics an approximate representation of real-world scenarios. 160 | 161 | ![Users per hour](../media/perftest-aoai-response-time.png) 162 |

AOAI deployment response in streaming mode.

163 | 164 | 165 | ##### Server metrics 166 | 167 | During performance testing, we focus on two types of metrics. The first type is client metrics, which directly affect the user experience. The second type is server metrics, which give us insights into the performance of server components. 168 | 169 | Server metrics encompass a wide range of measurements. For instance, we might look at the CPU and memory usage of the application service running the frontend. We could also monitor the utilization of resources like the Azure OpenAI PTU deployment. These are just a few examples; there are many other server metrics we could potentially examine. 170 | 171 | By collecting these measurements, we can create a detailed performance profile of the entire solution. This profile helps us identify any bottlenecks and tune any components that are not performing optimally. 172 | 173 | LLM Apps consist of various services, and the server metrics we utilize will vary based on these services. To give you an idea, here are some examples of the metrics we might gather, depending on the specific service in use: 174 | 175 | 176 | 177 | | Service Name | Metric | Description | 178 | |--------------------------|----------------------------------|-------------| 179 | | Azure OpenAI | Azure OpenAI Requests | Total calls to Azure OpenAI API. | 180 | | Azure OpenAI | Generated Completion Tokens | Output tokens from Azure OpenAI model. | 181 | | Azure OpenAI | Processed Inference Tokens | The number of input and output tokens that are processed by the Azure OpenAI model. | 182 | | Azure OpenAI | Provision-managed Utilization V2 | The percentage of the provisioned-managed deployment that is currently being used. | 183 | | Azure App Service | CPU Percentage | The percentage of CPU used by the App backend services. | 184 | | Azure App Service | Memory Percentage | The percentage of memory used by the App backend services. | 185 | | Azure Cosmos DB | Total Requests | Number of requests made to Cosmos DB. | 186 | | Azure Cosmos DB | Provisioned Throughput | The amount of throughput that has been provisioned for a container or database. | 187 | | Azure Cosmos DB | Normalized RU Consumption | The normalized request unit consumption based on the provisioned throughput. | 188 | | Azure API Management | Total Requests | Total number of requests made to APIM. | 189 | | Azure API Management | Capacity | Percentage of resource and network queue usage in APIM instance | 190 | 191 | 192 | ### When should I evaluate performance? 193 | 194 | You might be wondering when to execute performance tests. To help us in this discussion, let's take a look at the Enterprise LLM Lifecycle, illustrated in the following image. 195 | 196 | ![LLM Lifecycle](../media/perftest-llmlifecycle.png) 197 |

Enterprise LLM Lifecycle.

198 | 199 | The Enterprise LLM Lifecycle with Azure AI involves ideating and exploring, building and augmenting, operationalizing, and managing loops to develop, enhance, deploy, and govern large language model (LLM) applications. You can learn more about the Enterprise LLM Lifecycle by reading this blog: [Building for the future: The enterprise generative AI application lifecycle with Azure AI](https://azure.microsoft.com/es-es/blog/building-for-the-future-the-enterprise-generative-ai-application-lifecycle-with-azure-ai/). 200 | 201 | Performance testing is crucial and should start as early as possible during the development process. This early start provides enough time for making necessary adjustments and optimizations. The exact timing, however, depends on what aspects of the application you're testing. 202 | 203 | If your goal is to evaluate the performance of the entire LLM App before it's used by end-users, the application must be fully developed and deployed to a staging environment. Typically, this load testing of the LLM App occurs during the initial iterations of the Operationalization loop in the Enterprise LLM Lifecycle. 204 | 205 | Keep in mind that there are scenarios where performance evaluations can be conducted before Operationalization. For instance, during the Experimenting and Ideating phase, you might be exploring various LLMs for use. If you're considering using one of the models available on Azure OpenAI, this could be an excellent time to conduct a performance benchmark test using the Azure OpenAI benchmarking tool. 206 | 207 | The following figure illustrates the moments in the LLM lifecycle where the two types of performance tests mentioned earlier are usually conducted. 208 | 209 | ![LLM Lifecycle](../media/perftest-llmlifecycle-with-tests.png) 210 |

Performance tests in the LLM Lifecycle.

211 | 212 | ## Mastering Evaluation Techniques 213 | 214 | OK, if you've reached this point, you already know what's important to consider in your testing strategy. Here, we will explore two evaluation techniques. One is aimed at performance testing of the entire LLM application, and the second is more focused on testing the deployed LLM. It's worth mentioning that these are two commonly used examples, but this is a non-exhaustive list. Depending on your performance requirements, it may be necessary to use other techniques in your testing strategy. 215 | 216 | #### LLM App Load Testing 217 | 218 | Azure Load Testing is a fully managed load-testing service that enables you to generate high-scale LLM App load testing. The service simulates traffic for your applications, regardless of where they're hosted. You can use it to test and optimize application performance, scalability, or capacity of your application. You have the flexibility to create and execute load tests either through the Azure portal or via the Azure Command Line Interface (CLI), managing and running your tests in the way that suits you best. 219 | 220 | Azure Load Testing helps you simulate a large number of users sending requests to a server to measure how well an application or service performs under heavy load. You can use Apache JMeter scripts to set up and run these tests. These scripts can act like real users, doing things like interacting with the service, waiting, and using data. In the [How-To Guides](#how-to-guides) section, you will find a guide on how you can test your LLM App with a practical example. 221 | 222 | The diagram below shows the high-level architecture of Azure Load Testing. It uses JMeter to simulate heavy server loads and provides detailed performance metrics. You can adjust the number of test engine instances to meet your load test requirements, making the system scalable and robust. 223 | 224 | ![LLM Lifecycle](../media/perftest-azure-load-testing.png) 225 |

Azure Load Testing.

226 | 227 | LLM App load testing is crucial for identifying performance issues and ensuring that your application and its Azure dependencies (like the App Service, Function App, and Cosmos DB) can handle peak loads efficiently. 228 | 229 | The following table offers an explanation of important concepts associated with Azure Load Testing. Grasping these concepts is essential for effectively using Azure's load testing features to evaluate the performance of the LLM App under various load scenarios. 230 | 231 | | Concept | Description | 232 | |---------|-------------| 233 | | Test | Refers to a performance evaluation setup that assesses system behavior under simulated loads by configuring load parameters, test scripts, and target environments. | 234 | | Test Run | Represents the execution of a Test.| 235 | | Test Engine | Engine that runs the JMeter test scripts. Adjust load test scale by configuring test engine instances. | 236 | | Threads | Are parallel threads in JMeter that represent virtual users. They are limited to a maximum of 250. | 237 | | Virtual Users (VUs) | Simulate concurrent users. Calculated as threads * engine instances. | 238 | | Ramp-up Time | Is the time required to reach the maximum number of VUs for the load test. | 239 | | Latency | Is the time from sending a request to the moment the beginning of the response arrives. | 240 | | Response Time | This refers to the duration between sending a request and receiving the full response. It does not include any time spent on client-side response processing or rendering. | 241 | 242 | Azure Load Testing allows parameter definition, including environment variables, secrets, and certificates. It supports test scaling, failure criteria setting, and monitoring of application components and resource metrics. CSV files with test data and JMeter configurations can be uploaded for flexible, customizable test scripts. 243 | 244 | For [secure access](https://learn.microsoft.com/en-us/azure/load-testing/how-to-test-secured-endpoints) to Azure Key Vault [secrets](https://learn.microsoft.com/en-us/azure/load-testing/how-to-parameterize-load-tests#secrets), a managed identity can be used. This resource, when deployed within your [virtual network](https://learn.microsoft.com/en-us/azure/load-testing/how-to-test-private-endpoint), It is capable of generating load directed at your application's private endpoint. Authentication via access tokens, user credentials, or client certificates is also supported, depending on your application's requirements. 245 | 246 | ##### Monitoring Application Resources 247 | 248 | With Azure Load Testing, you can monitor your server-side performance during load tests. You can specify which Azure application components to monitor in the test configuration. You can view these server-side metrics both during the test and afterwards on the load testing dashboard. The following figure shows an example of server-side metrics obtained from an App Service after running a test. You can see the Azure services from which you can obtain server-side metrics [in this link](https://learn.microsoft.com/en-us/azure/load-testing/resource-supported-azure-resource-types). 249 | 250 | ![Server metrics](../media/perftest-server-metrics.png) 251 |

Azure Load Testing Server-side Performance Metrics.

252 | 253 | ##### Load Testing Automation 254 | 255 | Integrating Azure Load Testing into your CI/CD pipeline is a key step in enhancing your organization's adoption of LLMOps practices. This integration enables automated load testing, ensuring consistent performance checks at crucial points in the development lifecycle. You can trigger Azure Load Testing directly from Azure Pipelines or GitHub Actions workflows, providing a simplified and efficient approach to performance testing. Below are some examples of commands to automate the creation and execution of a load test. 256 | 257 | ``` 258 | # Example command to create a load test 259 | az loadtest create \ 260 | --name $loadTestResource \ 261 | --resource-group $resourceGroup \ 262 | --location $location \ 263 | --test-file @path-to-your-jmeter-test-file.jmx \ 264 | --configuration-file @path-to-your-load-test-config.yaml 265 | ``` 266 | 267 | ``` 268 | # Example command to run the load test 269 | az loadtest run \ 270 | --name $loadTestResource \ 271 | --resource-group $resourceGroup \ 272 | --test-id $testId 273 | ``` 274 | 275 | For more information on configuring a load test and automating these steps using the Azure Command Line Interface (CLI), refer to the [Azure Load Testing CI/CD configuration guide](https://learn.microsoft.com/en-us/azure/load-testing/how-to-configure-load-test-cicd) and the [Azure CLI reference for load testing](https://learn.microsoft.com/en-us/cli/azure/load). 276 | 277 | ##### Key Metrics to Monitor During Load Tests 278 | 279 | When conducting load tests, it's crucial to monitor certain key metrics to understand how your application performs under stress. These metrics will help you identify any potential bottlenecks or areas that need optimization. Here are some of the most important ones to keep an eye on: 280 | 281 | - **Request Rate**: Monitor the request rate during load testing. Ensure that the LLM application can handle the expected number of requests per second. 282 | - **Response Time**: Analyze response times under different loads. Identify bottlenecks and optimize slow components. 283 | - **Throughput**: Measure the number of successful requests per unit of time. Optimize for higher throughput. 284 | - **Resource Utilization**: Monitor CPU, memory, and disk usage. Ensure efficient resource utilization. 285 | 286 | ##### Best Practices for Executing Load Tests 287 | 288 | To ensure your load tests are effective and yield meaningful insights, it's worthwhile to review the following recommendations. Here are some key strategies to consider: 289 | 290 | - **Test Scenarios**: Create realistic test scenarios that mimic actual user behavior. 291 | - **Ramp-Up Strategy**: Gradually increase the load to simulate real-world traffic patterns. The warm-up period typically lasts between 20 to 60 seconds. After the warm-up, the actual load test begins 292 | - **Think Time**: Include think time between requests to simulate user interactions. 293 | - **Geographical Distribution**: Test from different Azure regions to assess global performance. 294 | 295 | ##### Performance Tuning Strategies for LLM Apps 296 | 297 | This section discusses performance tuning for LLM Apps. Application performance is heavily influenced by design and architecture. Effective structures can manage high loads, while poor ones may struggle. We'll cover various performance tuning aspects, not all of which may be universally applicable. 298 | 299 | ###### Application Design 300 | 301 | - **Optimize Application Code**: Examine and refine the algorithms and backend systems of your LLM application to increase efficiency. Utilize asynchronous processing methods, such as Python's async/await, to elevate application performance. This method allows data processing without interrupting other tasks. 302 | 303 | - **Batch Processing**: Batch LLM requests whenever possible to reduce overhead. Grouping multiple requests for simultaneous processing improves throughput and efficiency by allowing the model to better leverage parallel processing capabilities, thereby optimizing overall performance. 304 | 305 | - **Implement Caching**: Use caching for repetitive queries to lighten the application's load and speed up response times. This is particularly useful in LLM applications where similar questions are common. Caching answers to frequently asked or common questions reduces the need to run the model repeatedly for the same inputs, saving both time and computational resources. 306 | 307 | - **Revisit your Retry Logic**: LLM model deployments might start to operate at their capacity, which can lead to 429 errors. A well-designed retry mechanism can help maintain application responsiveness. With the OpenAI Python SDK, you can opt for an exponential backoff algorithm. This algorithm gradually increases the wait time between retries, helping to prevent service overload. Additionally, consider the option of falling back on another model deployment. For more information, refer to the load balance item in the Solution Architecture section. 308 | 309 | ###### Prompt Design 310 | 311 | - **Generate Less Tokens**: To reduce model latency, create concise prompts and limit token output. [cutting 50% of your output tokens may cut ~50% your latency](https://platform.openai.com/docs/guides/latency-optimization). Utilizing 'max_tokens' can also expedite the response time. 312 | 313 | - **Optimize Your Prompt**: If dealing with large amounts of context data, consider prompt compression methods. Approaches like those offered by [LLMLingua-2](https://llmlingua.com/llmlingua2.html), fine-tuning the model to reduce lengthy prompts, eliminating superfluous RAG responses, and removing extraneous HTML can be efficient. Trimming your prompt by 50% might only yield a latency reduction of 1-5%, but these strategies can lead to more substantial improvements in performance. 314 | 315 | - **Refine Your Prompt**: Optimize the shared prompt prefix by placing dynamic elements, such as RAG results or historical data, toward the end of your prompt. This enhances compatibility with the [KV cache system](https://arxiv.org/pdf/2211.05102) commonly used by most large language model providers. As a result, fewer input tokens need processing with each request, increasing efficiency. 316 | 317 | - **Use Smaller Models**: Whenever possible, pick smaller models because they are faster and more cost-effective. You can improve their responses by using detailed prompts, a few examples, or by fine-tuning. 318 | 319 | ###### Solution Architecture 320 | 321 | - **Provisioned Throughput Deployments**: Use these in scenarios requiring stable latency and predictable performance, avoiding the 'noisy neighbor' issue in regular pay-as-you-go setups. 322 | 323 | - **Load Balancing LLM Endpoints**: Implement [load balancing](https://github.com/Azure-Samples/openai-aca-lb/) for LLM deployment endpoints. Distribute the workload dynamically to enhance performance based on endpoint latency. Establish suitable rate limits to prevent resource exhaustion and ensure stable latency. 324 | 325 | - **Resource Scaling**: If services show strain under increased load, consider scaling up resources. Azure allows seamless scaling of CPU, RAM, and storage to meet growing demands. 326 | 327 | - **Network Latency**: Position Azure resources, like the Azure OpenAI service, near your users geographically to minimize network latency during data transmission to and from the service. 328 | 329 | #### Azure OpenAI Benchmarking 330 | 331 | The [Azure OpenAI Benchmarking Tool](https://github.com/Azure/azure-openai-benchmark) enables you to assess the performance of Azure OpenAI deployments and choose the ideal model and deployment approach (PTU vs. pay-as-you-go) for your specific needs. It provides detailed latency metrics and simulates various traffic patterns. This tool is particularly useful during model selection and experimentation in the initial phases of a project, as it assists developers in determining whether the model deployment is appropriately sized or if it needs adjustments. This tool allows you to make data-driven decisions, ensuring your deployment is both efficient and tailored to your operational requirements. 332 | 333 | The benchmarking tool works by creating traffic patterns that mirror the expected test load. When conducting the test, make sure it runs long enough for the throughput to reach a stable state, especially when the utilization is close to or at 100%. The benchmark tool also generates synthetic requests with random words, matching the number of context tokens in the requested shape profile. To simulate a worst-case scenario and prevent unrealistically favorable results due to optimizations, each prompt includes a random prefix, forcing the engine to fully process each request. 334 | 335 | This type of test is especially beneficial for Provisioned-Managed deployments. By adjusting the number of provisioned throughput units (PTUs) deployed, you can optimize your solution's design. Based on the analysis, you might need to revise the number of PTU deployments or even consider a hybrid architecture with PTU and Pay-as-you-go deployments, using load balancing between two or more deployments. 336 | 337 | ###### Test Parameters 338 | 339 | You can configure the benchmarking tool to optimize your load testing experience with several adjustable parameters: 340 | 341 | With the `rate` parameter, you can control the frequency of requests in Requests Per Minute (RPM), allowing for detailed management of test intensity. 342 | 343 | The `clients` parameter enables you to specify the number of parallel clients that will send requests simultaneously, providing a way to simulate varying levels of user interaction. 344 | 345 | The `shape-profile` parameter, with options like "balanced", "context", "custom" or "generation", adjusts the request characteristics based on the number of context and generated tokens, enabling precise testing scenarios that reflect different usage patterns. 346 | 347 | When shape-profile is set to "custom", two additional parameters come into play: context-token and max-tokens. The `context-token` parameter allows you to specify the number of context tokens in the request, while `max-tokens` allows you to specify the maximum number of tokens that can be generated in the response. 348 | 349 | The `aggregation-window` parameter defines the duration, in seconds, for which the data aggregation window spans. Before the test hits the aggregation-window duration, all stats are computed over a flexible window, equivalent to the elapsed time. This ensures accurate RPM/TPM stats even if the test ends early due to hitting the request limit. 350 | 351 | ###### Retry Strategy 352 | 353 | The `retry` parameter allows you to set the retry strategy for requests, offering options such as "none" or "exponential", which can be crucial for handling API request failures effectively. When setting up a retry strategy for Azure OpenAI benchmarking, it's crucial to select an approach that carefully balances resource capacity to avoid skewing latency statistics. 354 | 355 | When running a test with retry=none, throttled requests are immediately retried with a reset start time, and latency metrics only reflect the final successful attempt, o que pode nao representar a experiencia do usuario final . Use this setting for workloads within resource limits without throttling or to assess how many requests need redirecting to a backup during peak loads that surpass the primary resource’s capacity. 356 | 357 | Conversely, with retry=exponential, failed or throttled requests are retried with exponential backoff, up to 60 seconds. This approach, recommended when no backup resources are deployed, measures the total time from the first failed attempt to the successful completion, thus capturing the full potential wait time an end-user might experience. This method is ideal for understanding the impacts of throttling and retries on total request latency, especially in scenarios like chat applications where response times are critical. 358 | 359 | ###### Sample Scenario 360 | 361 | In the following example, taken from the tool's documentation, the benchmarking tool tests a traffic pattern that sends requests to the gpt-4 deployment in the 'myaccount' Azure OpenAI resource at a rate of 60 requests per minute, with the retry set to exponential. The default traffic shape is used, where each request contains 1000 context tokens, and the maximum response size is limited to 500 tokens. 362 | ``` 363 | $ python -m benchmark.bench load \ 364 | --deployment gpt-4 \ 365 | --rate 60 \ 366 | --retry exponential \ 367 | https://myaccount.openai.azure.com 368 | 369 | 2023-10-19 18:21:06 INFO using shape profile balanced: context tokens: 500, max tokens: 500 370 | 2023-10-19 18:21:06 INFO warming up prompt cache 371 | 2023-10-19 18:21:06 INFO starting load... 372 | 2023-10-19 18:21:06 rpm: 1.0 requests: 1 failures: 0 throttled: 0 ctx tpm: 501.0 gen tpm: 103.0 ttft avg: 0.736 ttft 95th: n/a tbt avg: 0.088 tbt 95th: n/a e2e avg: 1.845 e2e 95th: n/a util avg: 0.0% util 95th: n/a 373 | 2023-10-19 18:21:07 rpm: 5.0 requests: 5 failures: 0 throttled: 0 ctx tpm: 2505.0 gen tpm: 515.0 ttft avg: 0.937 ttft 95th: 1.321 tbt avg: 0.042 tbt 95th: 0.043 e2e avg: 1.223 e2e 95th: 1.658 util avg: 0.8% util 95th: 1.6% 374 | 2023-10-19 18:21:08 rpm: 8.0 requests: 8 failures: 0 throttled: 0 ctx tpm: 4008.0 gen tpm: 824.0 ttft avg: 0.913 ttft 95th: 1.304 tbt avg: 0.042 tbt 95th: 0.043 e2e avg: 1.241 e2e 95th: 1.663 util avg: 1.3% util 95th: 2.6% 375 | ``` 376 | 377 | When you run the test, you will obtain average and 95th percentile metrics from the following measures: 378 | 379 | |measure|description| 380 | |-|-| 381 | |`ttft`| Time to First Token. Time in seconds from the beginning of the request until the first token was received.| 382 | |`tbt`| Time Between Tokens. time in seconds between two consecutive generated tokens.| 383 | |`e2e`| End to end request time.| 384 | |`util`| Azure OpenAI deployment utilization percentage as reported by the service.| 385 | 386 | 387 | ##### Monitoring AOAI Resource 388 | 389 | To leverage Azure Monitor Log Analytics queries for log and metrics analysis, it's essential to [configure diagnostic settings](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/monitoring#configure-diagnostic-settings) for both your Azure OpenAI resource and Log Analytics workspace. While platform metrics and the Azure Monitor activity log are automatically collected and stored, Azure Monitor resource logs require a diagnostic setting to be created and routed to one or more locations. 390 | 391 | Configuring diagnostic settings for Azure OpenAI Service is essential for monitoring the availability, performance, and operation of your Azure resources. These settings enable the collection and analysis of metrics and log data from your Azure OpenAI resource. They allow you to track key metrics like the number of API calls, generated tokens, and training hours, and provide access to logs for insights into resource-specific activities. Proper configuration of these settings offers valuable insights into your Azure OpenAI resources' performance and usage, aiding in application optimization and troubleshooting. 392 | 393 | After configuring the diagnostic settings, you can start querying the generated logs. Simply access your Azure OpenAI resource in the portal and then select Logs in the Monitoring section. Next, click on the Log Analytics Workspace that you selected during the diagnostic settings configuration and select the workspace's Logs option. 394 | 395 | Below is a query example that retrieves logs from AzureDiagnostics for "ChatCompletions_Create" operations, conducted between 3:30 PM and 4:30 PM on April 26, 2024. It selects logs with details such as timestamp, resource, operation, duration, response code, and additional properties, enabling a detailed analysis of the operation's performance and outcomes during that hour. 396 | 397 | ``` 398 | AzureDiagnostics 399 | | where TimeGenerated between(datetime(2024-04-26T15:30:00) .. datetime(2024-04-26T16:30:00)) 400 | | where OperationName == "ChatCompletions_Create" 401 | | project TimeGenerated, _ResourceId, Category, OperationName, DurationMs, ResultSignature, properties_s 402 | ``` 403 | 404 | ![Server metrics](../media/perftest-azure-diagnostics.png) 405 |

Analyzing Azure OpenAI Metrics with Azure Monitor.

406 | 407 | 408 | ## How-To Guides 409 | 410 | Now that you understand the concepts for conducting performance tests, you can refer to the following sections where we provide a detailed guide on how to use the tools mentioned in the text to test your LLM App or your Azure OpenAI model deployment. 411 | 412 | - [LLM RAG application testing with Azure Load Testing](https://github.com/Azure/GPT-RAG/blob/main/docs/LOAD_TESTING.md). 413 | 414 | - [Model deployment testing with AOAI Benchmarking Tool](AOAI_BENCH_TOOL.md). 415 | 416 | -------------------------------------------------------------------------------- /labs/performance/media/perftest-GPT-RAG-Basic-communication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-GPT-RAG-Basic-communication.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-GPT-RAG-Basic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-GPT-RAG-Basic.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-analysis.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-aoai-response-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-aoai-response-time.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-app-component.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-app-component.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-architecture.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-azure-diagnostics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-azure-diagnostics.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-azure-load-testing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-azure-load-testing.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-github-environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-github-environment.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-github-var-secrets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-github-var-secrets.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-llmlifecycle-with-tests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-llmlifecycle-with-tests.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-llmlifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-llmlifecycle.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-portal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-portal.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-response-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-response-time.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-running.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-running.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-sample-sequence-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-sample-sequence-diagram.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-server-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-server-metrics.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-starting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-starting.png -------------------------------------------------------------------------------- /labs/performance/media/perftest-users-per-hour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/llmops-workshop/976292190e4fab328af3b1aa56cac2b0e45b6c18/labs/performance/media/perftest-users-per-hour.png -------------------------------------------------------------------------------- /labs/performance/requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel 2 | matplotlib 3 | nbstripout 4 | pandas 5 | seaborn 6 | tabulate 7 | -------------------------------------------------------------------------------- /labs/performance/runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Source the parameters file 4 | source benchmark.parameters 5 | 6 | # Run the command 7 | python -m azure-openai-benchmark.benchmark.bench load \ 8 | --deployment $AOAI_DEPLOYMENT \ 9 | --rate $RATE \ 10 | --shape-profile custom \ 11 | --context-tokens $CONTEXT_TOKENS \ 12 | --max-tokens $MAX_TOKENS \ 13 | --retry $RETRY \ 14 | $AOAI_ENDPOINT \ 15 | --duration $DURATION \ 16 | --output-format jsonl | tee $TEST_NAME.log 2> $ERROR_FILE 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel 2 | nbstripout 3 | nbconvert 4 | promptflow 5 | promptflow-tools --------------------------------------------------------------------------------