├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── 1_bug_template.yml │ ├── 2_feaure_template.yml │ └── config.yml └── workflows │ ├── check-online-doc-build.yml │ ├── daily_check_issue_and_pr.yml │ ├── pr-path-detection.yml │ └── scorecard.yml ├── .gitignore ├── .known-issues ├── README └── sphinx.conf ├── .pre-commit-config.yaml ├── 404.rst ├── CONTRIBUTING.md ├── Makefile ├── README.rst ├── blogs └── index.rst ├── community ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── SECURITY.md ├── TSC.rst ├── add_vectorDB.md ├── charter.md ├── codeowner.md ├── codeowner_emeritus.md ├── dataprep-fig1.png ├── index.rst ├── pull_request_template.txt ├── rfcs.rst └── rfcs │ ├── 24-05-16-GenAIExamples-001-Using_MicroService_to_implement_ChatQnA.md │ ├── 24-05-16-OPEA-001-Overall-Design.md │ ├── 24-05-17-OPEA-001-Deployment-Design.md │ ├── 24-05-24-OPEA-001-Code-Structure.md │ ├── 24-06-21-OPEA-001-DocSum_Video_Audio.md │ ├── 24-06-21-OPEA-001-Guardrails-Gateway.md │ ├── 24-07-11-OPEA-Agent.md │ ├── 24-08-02-OPEA-AIAvatarChatbot.md │ ├── 24-08-05-OPEA-Workflow-Executor-Example.md │ ├── 24-08-07-OPEA-GenAIStudio.md │ ├── 24-08-20-OPEA-001-AI_Gateway_API.md │ ├── 24-08-21-GenAIExample-002-Edge_Craft_RAG.md │ ├── 24-10-02-GenAIExamples-001-Image_and_Audio_Support_in_MultimodalQnA.md │ ├── 24-10-20-OPEA-001-Haystack-Integration.md │ ├── 24-11-25-GenAIExamples-Ollama_support_for_cpu_server.md │ ├── 25-01-10-OPEA-Benchmark.md │ ├── 25-03-03-GenAIComponents-001-Routing-Agent.md │ ├── 25-03-07-OPEA-001-OIM-Operator.md │ ├── 25-03-14-GenAIExample-001-CodeTrans-with-Agents.md │ ├── 25-05-06-GenAIComps-001-Add-Streaming-Support-for-ASR.md │ ├── 25-15-01-GenAIExamples-001-Code-Generation-Using-RAG-and-Agents.md │ ├── 25-19-03-GenAIExamples-HybridRag-personal_assistant.md │ ├── 25-23-04-GenAIComps-001-Air-Gap-Support.md │ ├── Edge_Craft_RAG.png │ ├── Edge_Craft_RAG_screenshot_1.png │ ├── Edge_Craft_RAG_screenshot_2.png │ ├── README.md │ ├── assets │ ├── Hybrid-rag-architecture.png │ ├── avatar1.jpg │ ├── avatar2.jpg │ ├── avatar3.png │ ├── avatar4.png │ ├── avatar6.png │ ├── avatar_design.png │ ├── avatars-chatbot.png │ ├── avatars-ui.png │ ├── image_wav2lipgfpgan_cut.gif │ ├── multimodal_enhanced_audio_ui.png │ ├── multimodal_enhanced_chat_ui.png │ ├── multimodal_enhanced_diagram.png │ ├── multimodal_enhanced_image_ui.png │ ├── multimodal_enhanced_pdf_ui.png │ ├── multimodal_enhanced_video_ui.png │ ├── oim-operator-flow.png │ ├── ui_latest_1.png │ ├── ui_latest_2.png │ ├── ui_latest_3.png │ └── video_wav2lipgfpgan_cut.gif │ ├── opea_architecture.png │ ├── opea_deploy_process_v0.png │ ├── opea_deploy_process_v1.png │ ├── opea_deploy_process_v2.png │ ├── opea_deploy_workflow.png │ ├── opea_workflow.png │ └── rfc_template.txt ├── conf.py ├── deploy └── index.rst ├── developer-guides ├── OPEA_API.rst ├── OPEA_API_Specification.docx ├── doc_guidelines.rst ├── docbuild.rst ├── graphviz.rst ├── images │ ├── boot-flow.dot │ ├── circle-square.dot │ ├── doc-gen-flow.dot │ ├── gaspump.dot │ ├── node-shape-edges.dot │ ├── opea-docs-fork.png │ ├── record.dot │ └── trusty-boot-flow.dot ├── index.rst ├── mdtable.txt ├── primary_readme_genai_examples_template.md ├── readme_guidelines_genai_examples.rst ├── secondary_readme_genai_examples_template.md └── tabbed-alternative.txt ├── eval └── index.rst ├── examples ├── examples.rst └── index.rst ├── faq.md ├── framework ├── OPEA - Rev05 PS - 4_15_2024.docx ├── framework.md └── images │ ├── framework-image1.png │ ├── framework-image10.png │ ├── framework-image11.png │ ├── framework-image12.png │ ├── framework-image13.png │ ├── framework-image14.png │ ├── framework-image15.png │ ├── framework-image16.png │ ├── framework-image17.png │ ├── framework-image2.png │ ├── framework-image3.png │ ├── framework-image4.png │ ├── framework-image5.png │ ├── framework-image6.png │ ├── framework-image7.png │ ├── framework-image8.png │ └── framework-image9.png ├── getting-started ├── README.md └── assets │ ├── chat_ui_response.png │ ├── chat_ui_response_rag.png │ └── what_is_opea.pdf ├── glossary.rst ├── guide └── installation │ ├── gmc_install │ └── gmc_install.md │ ├── install_docker.sh │ └── k8s_install │ ├── README.md │ ├── k8s_instal_aws_eks.md │ ├── k8s_install_kubeadm.md │ └── k8s_install_kubespray.md ├── images ├── BrokenBlocks.png ├── OPEA-favicon-32x32.png ├── opea-horizontal-color-w200.png └── opea-horizontal-white-w200.png ├── index.rst ├── introduction └── index.rst ├── microservices └── index.rst ├── publish └── index.rst ├── release_notes ├── RELEASE.md ├── index.rst ├── v0.6.md ├── v0.7.md ├── v0.8.md ├── v0.9.md ├── v1.0.md ├── v1.1.md ├── v1.2.md └── v1.3.md ├── roadmap ├── 2024-2025.md └── CICD.md ├── scripts ├── build.sh ├── checkmd.sh ├── codeowners_to_md.py ├── filter-doc-log.sh ├── filter-known-issues.py ├── fix-github-md-refs.sh ├── hist_rel.sh ├── maketoc.sh ├── publish-README.md ├── publish-redirect.sh ├── publish-robots.txt ├── requirements.txt ├── rsync-include.txt ├── setup_env.sh ├── show-versions.py └── sync-all-repos.sh ├── sphinx ├── _static │ ├── images │ │ ├── BrokenBlocks.png │ │ ├── OPEA-favicon-32x32.png │ │ ├── opea-horizontal-color-w200.png │ │ ├── opea-horizontal-white-w200.png │ │ ├── opea-icon-color.svg │ │ └── opea-icon-white.svg │ ├── opea-custom.css │ └── opea-custom.js ├── _templates │ ├── aversions.html │ ├── breadcrumbs.html │ ├── footer.html │ ├── layout.html │ └── versions.html ├── extensions │ ├── html_redirects.py │ └── link_roles.py └── substitutions.txt └── tutorial ├── AgentQnA └── AgentQnA_Guide.rst ├── AudioQnA ├── AudioQnA_Guide.rst └── deploy │ ├── gaudi.md │ └── xeon.md ├── ChatQnA ├── ChatQnA_Guide.rst └── deploy │ ├── add_vector_db.md │ ├── aipc.md │ ├── gaudi.md │ ├── k8s_getting_started.md │ ├── k8s_helm.md │ ├── nvidia.md │ └── xeon.md ├── CodeGen ├── CodeGen_Guide.rst └── deploy │ ├── gaudi.md │ └── xeon.md ├── CodeTrans ├── CodeTrans_Guide.rst └── deploy │ ├── gaudi.md │ └── xeon.md ├── DocIndexRetriever ├── DocIndexRetriever_Guide.rst └── deploy │ ├── gaudi.md │ └── xeon.md ├── DocSum ├── DocSum_Guide.rst └── deploy │ ├── gaudi.md │ └── xeon.md ├── OpenTelemetry ├── OpenTelemetry_OPEA_Guide.rst ├── assets │ ├── Grafana_Node_Exporter.png │ ├── Grafana_chatqna_backend_server.png │ ├── Grafana_chatqna_backend_server_1.png │ ├── Grafana_chatqna_dataprep.png │ ├── Grafana_chatqna_retriever.png │ ├── Grafana_vLLM.png │ ├── Grafana_vLLM_2.png │ ├── Jaeger_agent_rag.png │ ├── Jaeger_agent_sql.png │ ├── agent_grafana_mega_list.png │ ├── agent_grafana_node.png │ ├── agent_grafana_react.png │ ├── agent_grafana_sql.png │ ├── agent_grafana_vllm.png │ ├── agent_grafana_vllm_2.png │ ├── agent_jaeger_4traces.png │ ├── agent_jaeger_4traces_web.png │ ├── agent_jaeger_init.png │ ├── agent_jaeger_react_2_spans.png │ ├── agent_jaeger_react_init.png │ ├── agent_jaeger_react_spans.png │ ├── agent_jaeger_react_spans_1_webq.png │ ├── agent_jaeger_react_spans_2_webq.png │ ├── agent_jaeger_sql_2_spans.png │ ├── agent_jaeger_sql_35_q2_spans.png │ ├── agent_jaeger_sql_spans.png │ ├── agent_questions.png │ ├── agent_questions_web.png │ ├── chatqna_16reqs.png │ ├── grafana_dashboard_init.png │ ├── grafana_init.png │ ├── jaeger_agent_init.png │ ├── jaeger_ui_init.png │ ├── jaeger_ui_opea.png │ ├── jaeger_ui_opea_chatqna_1req.png │ ├── jaeger_ui_opea_chatqna_cpu_breakdown.png │ ├── jaeger_ui_opea_chatqna_req_breakdown.png │ ├── jaeger_ui_opea_chatqna_req_breakdown_2.png │ ├── jaeger_ui_opea_chatqna_req_cpu.png │ ├── jaeger_ui_opea_chatqna_req_gaudi.png │ ├── jaeger_ui_opea_trace.png │ ├── opea_telemetry.jpg │ └── prometheus.png └── deploy │ ├── AgentQnA.md │ └── ChatQnA.md ├── VideoQnA └── VideoQnA_Guide.rst └── index.rst /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * david.b.kinder@intel.com suyue.chen@intel.com feng.tian@intel.com malini.bhandaru@intel.com preethi.venkatesh@intel.com rachel.roumeliotis@intel.com tom.f.lenth@intel.com 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1_bug_template.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | name: Report Bug 5 | description: Used to report bug 6 | title: "[Bug]" 7 | labels: ["bug"] 8 | body: 9 | - type: dropdown 10 | id: version 11 | attributes: 12 | label: Version 13 | multiple: true 14 | options: 15 | - v0.9 16 | - v1.0 17 | default: v1.0 18 | validations: 19 | required: true 20 | 21 | - type: dropdown 22 | id: browsers 23 | attributes: 24 | label: What browsers are you seeing the problem on? 25 | multiple: true 26 | options: 27 | - Firefox 28 | - Chrome 29 | - Safari 30 | - Microsoft Edge 31 | - Others (Please let us know in description) 32 | 33 | - type: dropdown 34 | id: section 35 | attributes: 36 | label: What section of the docs is the problem on? 37 | multiple: true 38 | options: 39 | - GenAI Examples 40 | - GenAI Microservices 41 | - Deploying GenAI 42 | - Evaluating GenAI 43 | - Others (Please let us know in description) 44 | 45 | - type: markdown 46 | id: description 47 | attributes: 48 | label: Bug Description 49 | description: What is the problem? 50 | value: | 51 | Thanks for taking the time to fill out this bug report. If you have a new idea, please fill a feature request instead. 52 | validations: 53 | required: true 54 | 55 | - type: markdown 56 | id: reproduce 57 | attributes: 58 | label: Reproduction steps 59 | description: "How do you view this bug? Please walk us through it step by step." 60 | value: | 61 | 1. Go to https://opea-project.github.io/ 62 | 2. 63 | 3. 64 | ... 65 | render: bash 66 | validations: 67 | required: true -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2_feaure_template.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | name: Request Feature 5 | description: Used to request feature 6 | title: "[Feature]" 7 | body: 8 | - type: markdown 9 | id: description 10 | attributes: 11 | label: Feature Description 12 | description: Describe the current problem? And why is this important? 13 | value: | 14 | Thanks for taking the time to fill out this feature request. 15 | validations: 16 | required: true 17 | 18 | - type: dropdown 19 | id: current-issue 20 | attributes: 21 | label: Is this an issue in the latest version? 22 | options: 23 | - Yes 24 | - No 25 | - Unknown 26 | 27 | - type: dropdown 28 | id: section 29 | attributes: 30 | label: What section of the docs is the problem on? 31 | multiple: true 32 | options: 33 | - GenAI Examples 34 | - GenAI Microservices 35 | - Deploying GenAI 36 | - Evaluating GenAI 37 | - Others (Please let us know in description) -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | blank_issues_enabled: true 5 | -------------------------------------------------------------------------------- /.github/workflows/check-online-doc-build.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | name: Check Online Document Building 5 | permissions: {} 6 | 7 | on: 8 | pull_request: 9 | branches: [main] 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-22.04 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Build Online Document 17 | shell: bash 18 | run: | 19 | git config --local --get remote.origin.url 20 | echo "build online doc" 21 | bash scripts/build.sh 22 | -------------------------------------------------------------------------------- /.github/workflows/daily_check_issue_and_pr.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2025 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | name: Check stale issue and pr 5 | 6 | on: 7 | schedule: 8 | - cron: "30 22 * * *" 9 | 10 | jobs: 11 | close-issues: 12 | runs-on: ubuntu-latest 13 | permissions: 14 | issues: write 15 | pull-requests: write 16 | steps: 17 | - uses: actions/stale@v9 18 | with: 19 | days-before-issue-stale: 30 20 | days-before-pr-stale: 30 21 | days-before-issue-close: 7 22 | days-before-pr-close: 7 23 | stale-issue-message: "This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days." 24 | stale-pr-message: "This PR is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days." 25 | close-issue-message: "This issue was closed because it has been stalled for 7 days with no activity." 26 | close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity." 27 | repo-token: ${{ secrets.ACTION_TOKEN }} 28 | start-date: "2025-03-01T00:00:00Z" 29 | exempt-issue-labels: "Backlog" 30 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2025 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # This workflow uses actions that are not certified by GitHub. They are provided 5 | # by a third-party and are governed by separate terms of service, privacy 6 | # policy, and support documentation. 7 | 8 | name: Scorecard supply-chain security 9 | on: 10 | # For Branch-Protection check. Only the default branch is supported. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 12 | branch_protection_rule: 13 | # To guarantee Maintained check is occasionally updated. See 14 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 15 | schedule: 16 | - cron: '31 14 * * 5' 17 | push: 18 | branches: [ "main" ] 19 | 20 | # Declare default permissions as read only. 21 | permissions: read-all 22 | 23 | jobs: 24 | analysis: 25 | name: Scorecard analysis 26 | runs-on: ubuntu-latest 27 | # `publish_results: true` only works when run from the default branch. conditional can be removed if disabled. 28 | if: github.event.repository.default_branch == github.ref_name || github.event_name == 'pull_request' 29 | permissions: 30 | # Needed to upload the results to code-scanning dashboard. 31 | security-events: write 32 | # Needed to publish results and get a badge (see publish_results below). 33 | id-token: write 34 | # Uncomment the permissions below if installing in a private repository. 35 | # contents: read 36 | # actions: read 37 | 38 | steps: 39 | - name: "Checkout code" 40 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 41 | with: 42 | persist-credentials: false 43 | 44 | - name: "Run analysis" 45 | uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1 46 | with: 47 | results_file: results.sarif 48 | results_format: sarif 49 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 50 | # - you want to enable the Branch-Protection check on a *public* repository, or 51 | # - you are installing Scorecard on a *private* repository 52 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. 53 | repo_token: ${{ secrets.ACTION_TOKEN }} 54 | 55 | # Public repositories: 56 | # - Publish results to OpenSSF REST API for easy access by consumers 57 | # - Allows the repository to include the Scorecard badge. 58 | # - See https://github.com/ossf/scorecard-action#publishing-results. 59 | # For private repositories: 60 | # - `publish_results` will always be set to `false`, regardless 61 | # of the value entered here. 62 | publish_results: true 63 | 64 | # (Optional) Uncomment file_mode if you have a .gitattributes with files marked export-ignore 65 | # file_mode: git 66 | 67 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 68 | # format to the repository Actions tab. 69 | - name: "Upload artifact" 70 | uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 71 | with: 72 | name: SARIF file 73 | path: results.sarif 74 | retention-days: 5 75 | 76 | # Upload the results to GitHub's code scanning dashboard (optional). 77 | # Commenting out will disable upload of results to your repo's Code Scanning dashboard 78 | - name: "Upload to code-scanning" 79 | uses: github/codeql-action/upload-sarif@v3 80 | with: 81 | sarif_file: results.sarif 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | .vscode 3 | .venv 4 | *.DS_Store 5 | 6 | -------------------------------------------------------------------------------- /.known-issues/README: -------------------------------------------------------------------------------- 1 | This directory contains configuration files to ignore errors found in 2 | the build and test process which are known to the developers and for 3 | now can be safely ignored. 4 | 5 | To use: 6 | 7 | $ cd 8 | $ make SOMETHING >& result 9 | $ scripts/filter-known-issues.py result 10 | 11 | It is included in the source tree so if anyone has to submit anything 12 | that triggers some kind of error that is a false positive, it can 13 | include the "ignore me" file, properly documented. 14 | 15 | Each file can contain one or more multiline Python regular expressions 16 | (https://docs.python.org/2/library/re.html#regular-expression-syntax) 17 | that match an error message. Multiple regular expressions are 18 | separated by comment blocks (that start with #). Note that an empty 19 | line still is considered part of the multiline regular expression. 20 | 21 | For example 22 | 23 | ---beginning--- 24 | # 25 | # This testcase always fails, pending fix ZEP-1234 26 | # 27 | .*/tests/kernel/grumpy .* FAIL 28 | # 29 | # Documentation issue, masks: 30 | # 31 | # /home/e/inaky/z/kernel.git/doc/api/io_interfaces.rst:28: WARNING: Invalid definition: Expected identifier in nested name. [error at 19] 32 | # struct dev_config::@65 dev_config::bits 33 | # -------------------^ 34 | # 35 | ^(?P.+/doc/api/io_interfaces.rst):(?P[0-9]+): WARNING: Invalid definition: Expected identifier in nested name. \[error at [0-9]+] 36 | ^\s+struct dev_config::@[0-9]+ dev_config::bits.* 37 | ^\s+-+\^ 38 | ---end--- 39 | 40 | Note you want to: 41 | 42 | - use relateive paths; instead of 43 | /home/me/mydir/zephyr/something/somewhere.c you will want 44 | ^.*/something/somewhere.c (as they will depend on where it is being 45 | built) 46 | 47 | - Replace line numbers with [0-9]+, as they will change 48 | 49 | - (?P[-._/\w]+/something/somewhere.c) saves the match on 50 | that file path in a "variable" called 'filename' that later you can 51 | match with (?P=filename) if you want to match multiple lines of the 52 | same error message. 53 | 54 | Can get really twisted and interesting in terms of regexps; they are 55 | powerful, so start small :) 56 | -------------------------------------------------------------------------------- /.known-issues/sphinx.conf: -------------------------------------------------------------------------------- 1 | # Known Sphinx generation messages to ignore 2 | # 3 | # 4 | # for toctree glob, we give patterns that go down many levels that might not 5 | # contain any documents along the way 6 | ^.*WARNING: toctree glob pattern '.*\/\*' didn't match any documents$ 7 | # 8 | ^.*from sphinx.util import.*$ 9 | # ignore warnings about deprecating features in future Sphinx versions 10 | ^.*RemovedInSphinx80Warning.*$ 11 | # 12 | ^WARNING: while setting up extension myst_parser: Failed to convert typing.Any to a set or tuple$ 13 | # 14 | ^.*WARNING: toctree contains reference to document .*that doesn't have a title: no link will be generated$ 15 | # ignore intradoc targets 16 | # let's ignore all missing targets for now 17 | ^.*WARNING: 'myst' cross-reference target not found: '[^\']*'.*$ 18 | # 19 | ^.*WARNING: local id not found in doc .*$ 20 | # ignore .md files not is a toc tree (used this temporarily) 21 | #^.*md: WARNING: document isn't included in any toctree$ 22 | # 23 | # Mermaid config options 24 | ^.*WARNING: 'mermaid': Unknown option keys: .*$ 25 | # Ignore unknown pygments lexer names 26 | ^.*WARNING: Pygments lexer name .* is not known$ 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: true 3 | autoupdate_schedule: quarterly 4 | 5 | repos: 6 | - repo: https://github.com/codespell-project/codespell 7 | rev: v2.2.6 8 | hooks: 9 | - id: codespell 10 | args: [-w] 11 | -------------------------------------------------------------------------------- /404.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _page-not-found: 4 | 5 | Page Not Found 6 | ############## 7 | 8 | .. rst-class:: rst-columns2 9 | 10 | .. image:: images/BrokenBlocks.png 11 | :align: left 12 | :width: 320px 13 | 14 | Sorry. The page you requested was not found on this site. 15 | 16 | Check the address for misspellings. 17 | It's also possible we've removed or renamed the page you're looking for. 18 | 19 | Try using the navigation links on the left of this page to navigate 20 | the major sections of our site, or use the document search box. 21 | 22 | .. raw:: html 23 | 24 |
25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to OPEA 2 | 3 | Welcome to the OPEA open-source community! We are thrilled to have you here and excited about the potential contributions you can bring to the OPEA platform. Whether you are fixing bugs, adding new GenAI components, improving documentation, or sharing your unique use cases, your contributions are invaluable. 4 | 5 | Together, we can make OPEA the go-to platform for enterprise AI solutions. Let's work together to push the boundaries of what's possible and create a future where AI is accessible, efficient, and impactful for everyone. 6 | 7 | Please check the [Contributing guidelines](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) for a detailed guide on how to contribute a GenAI component and all the ways you can contribute! 8 | 9 | Thank you for being a part of this journey. We can't wait to see what we can achieve together! 10 | 11 | # Additional Content 12 | 13 | - [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md) 14 | - [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md) 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # makefile for Sphinx documentation 2 | # 3 | 4 | ifeq ($(VERBOSE),1) 5 | Q = 6 | else 7 | Q = @ 8 | endif 9 | 10 | # You can set these variables from the command line. 11 | SPHINXOPTS ?= -q -j auto 12 | SPHINXBUILD = sphinx-build 13 | SPHINXPROJ = "OPEA Project" 14 | BUILDDIR ?= _build 15 | SOURCEDIR = $(BUILDDIR)/rst 16 | LATEXMKOPTS = "-silent -f" 17 | 18 | # Document publication assumes the folder structure is setup with the 19 | # opea-project repos: GenAIExamples (etc), docs, and opea.github.io repos as 20 | # sibling folders. make is run inside the opea-project/docs folder. Content from 21 | # other sibling repos is copied to a build folder to get all content in one 22 | # tree. 23 | 24 | 25 | OPEA_BASE = $(CURDIR)/.. 26 | DOC_TAG ?= development 27 | RELEASE ?= latest 28 | PUBLISHDIR = $(OPEA_BASE)/opea-project.github.io/$(RELEASE) 29 | # scripts/rsync-include.txt lists file extensions to look for and copy 30 | RSYNC_OPTS = -am --exclude='.github/pull_request_template.md' --include='*/' --include-from=scripts/rsync-include.txt --exclude='*' 31 | RSYNC_DIRS = GenAIComps GenAIEval GenAIExamples GenAIInfra 32 | 33 | # Put it first so that "make" without argument is like "make help". 34 | help: 35 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(OPTS) 36 | @echo "" 37 | @echo "make publish" 38 | @echo " publish generated html to opea.github.io site:" 39 | @echo " specify RELEASE=name to publish as a tagged release version" 40 | @echo " and placed in a version subfolder. Requires repo merge permission." 41 | 42 | .PHONY: help Makefile content html singlehtml clean publish 43 | 44 | # Copy all the rst and md content (and images, etc) into the _build/rst folder 45 | # including rst and md content 46 | 47 | # GenAIComps GenAIEval GenAIExamples GenAIInfra 48 | content: 49 | $(Q)mkdir -p $(SOURCEDIR) 50 | $(Q)rsync -a --exclude=$(BUILDDIR) . $(SOURCEDIR) 51 | $(Q)for dir in $(RSYNC_DIRS); do\ 52 | rsync $(RSYNC_OPTS) ../$$dir $(SOURCEDIR); \ 53 | done 54 | # temporarily, copy docs content too (were in the docs-work) 55 | # $(Q)rsync $(RSYNC_OPTS) ../docs/* $(SOURCEDIR) 56 | $(Q)find $(SOURCEDIR) -type f -empty -name "README.md" -delete 57 | $(Q)scripts/fix-github-md-refs.sh $(SOURCEDIR) 58 | $(Q)scripts/maketoc.sh $(SOURCEDIR) 59 | 60 | 61 | html: content 62 | @echo making HTML content 63 | $(Q)./scripts/show-versions.py 64 | -$(Q)$(SPHINXBUILD) -t $(DOC_TAG) -b html -d $(BUILDDIR)/doctrees $(SOURCEDIR) $(BUILDDIR)/html $(SPHINXOPTS) $(OPTS) > $(BUILDDIR)/doc.log 2>&1 65 | $(Q)./scripts/filter-doc-log.sh $(BUILDDIR)/doc.log 66 | 67 | singlehtml: content 68 | -$(Q)$(SPHINXBUILD) -t $(DOC_TAG) -b singlehtml -d $(BUILDDIR)/doctrees $(SOURCEDIR) $(BUILDDIR)/html $(SPHINXOPTS) $(OPTS) > $(BUILDDIR)/doc.log 2>&1 69 | $(Q)./scripts/filter-doc-log.sh $(BUILDDIR)/doc.log 70 | 71 | 72 | # Remove generated content 73 | 74 | clean: 75 | rm -fr $(BUILDDIR) 76 | 77 | # Copy material over to the GitHub pages staging repo 78 | # along with a README, index.html redirect to latest/index.html, robots.txt (for 79 | # search exclusions), and tweak the Sphinx-generated 404.html to work as the 80 | # site-wide 404 response page. (We generate the 404.html with Sphinx so it has 81 | # the current left navigation contents and overall style.) 82 | 83 | publish: 84 | mkdir -p $(PUBLISHDIR) 85 | cd $(PUBLISHDIR)/..; git pull origin main 86 | rm -fr $(PUBLISHDIR)/* 87 | cp -r $(BUILDDIR)/html/* $(PUBLISHDIR) 88 | ifeq ($(RELEASE),latest) 89 | cp scripts/publish-README.md $(PUBLISHDIR)/../README.md 90 | scripts/publish-redirect.sh $(PUBLISHDIR)/../index.html latest/index.html 91 | sed 's//\n /' $(BUILDDIR)/html/404.html > $(PUBLISHDIR)/../404.html 92 | endif 93 | cd $(PUBLISHDIR)/..; git add -A; git commit -s -m "publish $(RELEASE)"; git push origin main; 94 | 95 | server: 96 | cd _build/html; python3 -m http.server 97 | 98 | 99 | # Catch-all target: route all unknown targets to Sphinx using the new 100 | # "make mode" option. $(OPTS) is meant as a shortcut for $(SPHINXOPTS). 101 | %: Makefile 102 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(OPTS) 103 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | OPEA Project Documentation 4 | ########################## 5 | 6 | This repository holds the source and configuration files used to generate the 7 | `OPEA Project documentation web site`_ from all the documentation maintained in 8 | this docs repo and all the GenAI\* repos. 9 | 10 | .. _OPEA Project documentation web site: https://opea-project.github.io 11 | -------------------------------------------------------------------------------- /blogs/index.rst: -------------------------------------------------------------------------------- 1 | .. _opea_blogs: 2 | 3 | OPEA Blogs 4 | ########## 5 | 6 | Come learn, try, develop, and share your stories! Please submit PRs, organizing content in reverse chronological order. 7 | 8 | .. list-table:: Trending Blogs 9 | :widths: 10 50 40 10 | :header-rows: 1 11 | 12 | * - Date 13 | - URL 14 | - Authors 15 | * - 04/18/2025 16 | - `省钱还高效!OPEA+Dify在Intel® Arc™ GPU上构建基于DeepSeek的RAG工作流 `_ 17 | - Qiang Ren 18 | * - 04/15/2025 19 | - `Build Your First Chatbot with OPEA In Minutes `_ 20 | - Wang Xigui, Bhandaru Malini, Sin Alex, Du dolpher, Yao Yi, Hu Ying 21 | * - 04/11/2025 22 | - `OPEA驱动的Deepseek解决方案:助力企业智能化转型 `_ 23 | - Theresa Shan 24 | * - 04/03/2025 25 | - `打造接地气的DeepSeek一体机,Xeon+Arc显卡+OPEA平台重塑大模型落地新范式 `_ 26 | - Ren Qiaowei 27 | * - 04/03/2025 28 | - `OPEA开源项目通过RAG技术重塑企业AI应用 The OPEA Open-Source Project is Reshaping Enterprise AI Applications Through RAG Technology `_ 29 | - Shane Wang 30 | * - 04/03/2025 31 | - `解锁边缘AI新潜能, 加速智能场景落地 Unlock the Power of Edge AI and Speed Up the Landing of Generative AI `_ 32 | - Ruoyu Ying 33 | * - 03/14/2025 34 | - `Document Summarization: A Step-by-Step Guide with OPEA™ 1.2 and Intel® Gaudi® 2 `_ 35 | - Mustafa S. Cetin, Sihan Chen, Xinyao Wang and Omar Khleif 36 | * - 03/12/2025 37 | - `AMD Advances Enterprise AI Through OPEA Integration `_ 38 | - Yu Wang, Alex He 39 | * - 02/28/2025 40 | - `极速启航:利用OPEA一键完成DeepSeek探索 Rapid Launch: Explore DeepSeek with One-Click Deployment via OPEA `_ 41 | - Ruoyu Ying 42 | * - 02/26/2025 43 | - `Multimodal Q&A: A Step-by-Step Guide `_ 44 | - Melanie Hart Buehler, Mustafa Cetin, Dina Suehiro Jones 45 | * - 02/18/2025 46 | - `Moving from Open AI to Open Source with OPEA `_ 47 | - William Fowler, Joshua Segovia, Chris Ah-Siong 48 | * - 02/12/2025 49 | - `Deploy a DeepSeek-R1 Distill Chatbot on AWS Xeon `_ 50 | - Alex Sin 51 | * - 10/14/2024 52 | - `Harness Enterprise GenAI Using OPEA `_ 53 | - Iris Ding, Malini Bhandaru 54 | * - 09/01/2024 55 | - 企业人工智能开源软件项目OPEA深度解析(软件和集成电路2024年第9期) Deep Dive into the project Open Platform for Enterprise AI (OPEA) (Software and Integrated Circuit, ISSN 2096-062X) 56 | - Shane Wang, Qiaowei Ren, Ruoyu Ying 57 | 58 | 59 | -------------------------------------------------------------------------------- /community/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | - Demonstrating empathy and kindness toward other people 21 | - Being respectful of differing opinions, viewpoints, and experiences 22 | - Giving and gracefully accepting constructive feedback 23 | - Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | - Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | - The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | - Trolling, insulting or derogatory comments, and personal or political attacks 33 | - Public or private harassment 34 | - Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | - Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 125 | [https://www.contributor-covenant.org/translations][translations]. 126 | 127 | [homepage]: https://www.contributor-covenant.org 128 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 129 | [Mozilla CoC]: https://github.com/mozilla/diversity 130 | [FAQ]: https://www.contributor-covenant.org/faq 131 | -------------------------------------------------------------------------------- /community/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting a Vulnerability 2 | 3 | Report any security vulnerabilities in this project by following these [Linux Foundation security guidelines](https://www.linuxfoundation.org/security). 4 | 5 | ## Script Usage Notice 6 | 7 | SCRIPT USAGE NOTICE: By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Dockerfiles, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) 8 | (the “Script File”), it is your obligation to review the Script File to understand what files (e.g., other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). 9 | Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers. 10 | -------------------------------------------------------------------------------- /community/TSC.rst: -------------------------------------------------------------------------------- 1 | Technical Steering Committee (TSC) 2 | ################################## 3 | 4 | As defined in the :doc:`OPEA Charter `, the Technical Steering 5 | Committee is responsible for all technical oversight of the OPEA project 6 | including: 7 | 8 | * coordinating the technical direction of the OPEA project; 9 | * approving project or system proposals 10 | * creating committees or working groups (for example, an executive or 11 | architectural committee or end-user advisory committee) to support the 12 | OPEA project; 13 | * appointing representatives to work with other open source or open standards 14 | communities; 15 | * establishing community norms, workflows, issuing releases, and security issue 16 | reporting policies; 17 | * approving and implementing policies and processes for contributing 18 | * discussions, seeking consensus, and where necessary, voting on technical 19 | matters relating to the code base that affect multiple sub-projects; 20 | * coordinating any marketing, events, or communications regarding the OPEA project. 21 | 22 | Refer to the :doc:`OPEA Charter ` for more details. 23 | 24 | Technical Steering Committee Members 25 | ************************************ 26 | 27 | .. list-table:: TSC Members (as of September 6, 2024) 28 | :header-rows: 1 29 | 30 | * - TSC Member Name 31 | - Member's Title and Company 32 | * - `Malini Bhandaru `_ (Chair) 33 | - Senior Principal Engineer, Intel 34 | * - `Amr Abdelhalem `_ 35 | - SVP, Head of Cloud Platforms, Fidelity 36 | * - `Nathan Cartwright `_ 37 | - Chief Architect - AI, CDW 38 | * - `Justin Cormack `_ 39 | - CTO, Docker 40 | * - `Ke Ding `_ 41 | - Senior Prinicipal AI Engineer, Intel 42 | * - Steve Grubb 43 | - Senior Principal Engineer, Red Hat 44 | * - `Robert Hafner `_ 45 | - Senior Principal Architect, Comcast 46 | * - `Melissa Mckay `_ 47 | - Head of Developer Relations, JFrog 48 | * - `Logan Markewich `_ 49 | - Founding Software Developer, LlamaIndex 50 | * - `Nick Ni `_ (Interim) 51 | - Senior Director Ai Product Management, AMD 52 | -------------------------------------------------------------------------------- /community/codeowner.md: -------------------------------------------------------------------------------- 1 | # OPEA Project Code Owners 2 | 3 | These tables list the GitHub code owners, as found in the CODEOWNERS file in the 4 | corresponding OPEA repository. Code owners are responsible for code and 5 | documentation in a repository. They are automatically requested for 6 | review when someone opens a pull request (PR) that modifies code or 7 | documentation that they own. 8 | 9 | Select or contact the corresponding area owner for a PR review or questions 10 | about content within a repository. 11 | 12 | 13 | ```{include} codeowners.txt 14 | ``` 15 | 16 | ----- 17 | 18 | ## Continuous Integration (CICD) owners 19 | 20 | CI/CD processing is defined and managed by these owners: 21 | 22 | * chensuyue, daisy-ycguo, ashahba, preethivenkatesh 23 | 24 | 25 | -------------------------------------------------------------------------------- /community/codeowner_emeritus.md: -------------------------------------------------------------------------------- 1 | # OPEA Emeritus Code Owners 2 | 3 | The table below acknowledges the OPEA code owners who have left, and whose valuable contributions were crucial to the project's success. We are deeply grateful! 4 | 5 | 6 | | Repo | Path | Emeritus Owner | 7 | |-----------------------------|-------------------------------|-------------------------------------| 8 | | docs | `*` | david.b.kinder@intel.com | 9 | | GenAIInfra | `*` | iris.ding@intel.com | 10 | | GenAIInfra | `/microservices-connector/` | kefei.zhang@intel.com | 11 | | GenAIInfra | `/microservices-connector/` | huailong.zhang@intel.com | -------------------------------------------------------------------------------- /community/dataprep-fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/dataprep-fig1.png -------------------------------------------------------------------------------- /community/index.rst: -------------------------------------------------------------------------------- 1 | .. _OPEA_community: 2 | 3 | OPEA Community 4 | ############## 5 | 6 | Community Support 7 | ***************** 8 | 9 | Welcome to the OPEA project community! 10 | 11 | The OPEA Community includes developers from member organizations and the general 12 | community all joining in the development of the project. Members contribute and 13 | discuss ideas, submit bugs and bug fixes, and improve documentation. They also 14 | help those in need through the community's forums and mailing lists. Anyone can 15 | join the developer community and the community is always willing to help its 16 | members and the User Community to get the most out of OPEA. 17 | 18 | Resources 19 | ********* 20 | 21 | Here's a quick summary of resources to find your way around the OPEA Project 22 | support systems: 23 | 24 | * **OPEA Project Website**: The https://opea.dev website is the 25 | central source of information about what's going on with OPEA. 26 | On this site, you'll 27 | find background and current information about the project as well as 28 | relevant links to project material. 29 | 30 | * **Source Code in GitHub**: OPEA Project source code is maintained on a 31 | public GitHub repository at https://github.com/opea-project. 32 | You'll find information about getting access to the repository and how to 33 | contribute to the project in this :doc:`Contribution Guide `. 34 | 35 | * **Documentation**: Project technical documentation is developed 36 | along with the project's code, and can be found at 37 | https://opea-project.github.io. 38 | 39 | * **Issue Reporting and Tracking**: Requirements and Issue tracking is done in 40 | the Github issues system within each of the major repositories such as: https://github.com/opea-project/GenAIComps/issues. 41 | You can browse through the reported issues and submit issues of your own. 42 | 43 | * **Mailing List**: TBD 44 | 45 | 46 | Contributing Guides 47 | ******************* 48 | 49 | .. toctree:: 50 | :maxdepth: 1 51 | 52 | CONTRIBUTING 53 | add_vectorDB 54 | codeowner 55 | codeowner_emeritus 56 | SECURITY 57 | 58 | Roadmaps 59 | ******** 60 | 61 | .. toctree:: 62 | :maxdepth: 1 63 | :glob: 64 | 65 | ../roadmap/* 66 | 67 | 68 | Project Governance 69 | ****************** 70 | 71 | .. toctree:: 72 | :maxdepth: 1 73 | 74 | charter 75 | TSC 76 | CODE_OF_CONDUCT 77 | SECURITY 78 | 79 | RFC Proposals 80 | ************* 81 | 82 | .. toctree:: 83 | :maxdepth: 1 84 | 85 | rfcs 86 | -------------------------------------------------------------------------------- /community/pull_request_template.txt: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | The summary of the proposed changes as long as the relevant motivation and context. 4 | 5 | ## Issues 6 | 7 | List the issue or RFC link this PR is working on. If there is no such link, please mark it as `n/a`. 8 | 9 | ## Type of change 10 | 11 | List the type of change like below. Please delete options that are not relevant. 12 | 13 | - [ ] Bug fix (non-breaking change which fixes an issue) 14 | - [ ] New feature (non-breaking change which adds new functionality) 15 | - [ ] Breaking change (fix or feature that would break existing design and interface) 16 | - [ ] Others (enhancement, documentation, validation, etc.) 17 | 18 | ## Dependencies 19 | 20 | List the newly introduced 3rd party dependency if exists. 21 | 22 | ## Tests 23 | 24 | Describe the tests that you ran to verify your changes. 25 | -------------------------------------------------------------------------------- /community/rfcs.rst: -------------------------------------------------------------------------------- 1 | .. _rfcs: 2 | 3 | Request for Comments (RFCs) 4 | ########################### 5 | 6 | .. include:: rfcs/README.md 7 | :parser: myst_parser.sphinx_ 8 | :start-after: # RFC Archive 9 | 10 | _____ 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | :glob: 15 | 16 | rfcs/2* 17 | 18 | ----- 19 | 20 | RFC Template 21 | ************ 22 | 23 | When creating a new RFC, follow the existing RFCs as an example, or use this 24 | template: 25 | 26 | .. include:: rfcs/rfc_template.txt 27 | :code: 28 | 29 | 30 | -------------------------------------------------------------------------------- /community/rfcs/24-05-16-OPEA-001-Overall-Design.md: -------------------------------------------------------------------------------- 1 | # 24-05-16 OPEA-001 Overall Design 2 | 3 | ## Author 4 | 5 | [ftian1](https://github.com/ftian1), [lvliang-intel](https://github.com/lvliang-intel), [hshen14](https://github.com/hshen14) 6 | 7 | ## Status 8 | 9 | Under Review 10 | 11 | ## Objective 12 | 13 | Have a stable, extensible, secure, and easy-of-use orchestration framework design for OPEA users to quickly build their own GenAI applications. 14 | 15 | The requirements include but not limited to: 16 | 17 | 1. orchestration planner 18 | 19 | have the ability of offer config-based definition or low-code for constructing complex LLM applications. 20 | 21 | 2. component registry 22 | 23 | allow user to register new service for building complex GenAI applications 24 | 25 | 3. monitoring 26 | 27 | allow user to trace the working flow, including logging, execution status, execution time, and so on. 28 | 29 | 4. scalability 30 | 31 | easily scale within K8S or other deployment techs at on-premis and cloud environment. 32 | 33 | ## Motivation 34 | 35 | This RFC is used to present the OPEA overall design philosophy, including overall architecture, working flow, component design, for community discussion. 36 | 37 | ## Design Proposal 38 | 39 | The proposed overall architecture is 40 | 41 | ![OPEA Architecture](opea_architecture.png) 42 | 43 | 1. GenAIComps 44 | 45 | The suite of microservices, leveraging a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. 46 | 47 | 2. GenAIExamples 48 | 49 | The collective list of Generative AI (GenAI) and Retrieval-Augmented Generation (RAG) examples, targeting for demonstrating the whole orchestration pipeline. 50 | 51 | 3. GenAIInfra 52 | 53 | The containerization and cloud native suite for OPEA, including artifacts to deploy GenAIExamples in a cloud native way, which can be used by enterprise users to deploy to their own cloud. 54 | 55 | 4. GenAIEval 56 | 57 | The evaluation, benchmark, and scorecard suite for OPEA, targeting for performance on throughput and latency, accuracy on popular evaluation harness, safety, and hallucination. 58 | 59 | The proposed OPEA workflow is 60 | 61 | ![OPEA Workflow](opea_workflow.png) 62 | 63 | 1. Microservice 64 | 65 | Microservices are akin to building blocks, offering the fundamental services for constructing RAG (Retrieval-Augmented Generation) applications. Each microservice is designed to perform a specific function or task within the application architecture. By breaking down the system into smaller, self-contained services, microservices promote modularity, flexibility, and scalability. This modular approach allows developers to independently develop, deploy, and scale individual components of the application, making it easier to maintain and evolve over time. Additionally, microservices facilitate fault isolation, as issues in one service are less likely to impact the entire system. 66 | 67 | 2. Megaservice 68 | 69 | A megaservice is a higher-level architectural construct composed of one or more microservices, providing the capability to assemble end-to-end applications. Unlike individual microservices, which focus on specific tasks or functions, a megaservice orchestrates multiple microservices to deliver a comprehensive solution. Megaservices encapsulate complex business logic and workflow orchestration, coordinating the interactions between various microservices to fulfill specific application requirements. This approach enables the creation of modular yet integrated applications, where each microservice contributes to the overall functionality of the megaservice. 70 | 71 | 3. Gateway 72 | 73 | The Gateway serves as the interface for users to access the megaservice, providing customized access based on user requirements. It acts as the entry point for incoming requests, routing them to the appropriate microservices within the megaservice architecture. Gateways support API definition, API versioning, rate limiting, and request transformation, allowing for fine-grained control over how users interact with the underlying microservices. By abstracting the complexity of the underlying infrastructure, gateways provide a seamless and user-friendly experience for interacting with the megaservice. 74 | 75 | ## Alternatives Considered 76 | 77 | n/a 78 | 79 | ## Compatibility 80 | 81 | n/a 82 | 83 | ## Miscs 84 | 85 | - TODO List: 86 | 87 | - [ ] Micro Service specification 88 | - [ ] Mega Service specification 89 | - [ ] static cloud resource allocator vs dynamic cloud resource allocator 90 | - [ ] open telemetry support 91 | - [ ] authentication and trusted env 92 | 93 | 94 | -------------------------------------------------------------------------------- /community/rfcs/24-05-24-OPEA-001-Code-Structure.md: -------------------------------------------------------------------------------- 1 | # 24-05-24 OPEA-001 Code Structure 2 | 3 | ## Author 4 | 5 | [ftian1](https://github.com/ftian1), [lvliang-intel](https://github.com/lvliang-intel), [hshen14](https://github.com/hshen14) 6 | 7 | ## Status 8 | 9 | Under Review 10 | 11 | ## Objective 12 | 13 | Define a clear criteria and rule of adding new codes into OPEA projects. 14 | 15 | ## Motivation 16 | 17 | OPEA project consists of serveral repos, including GenAIExamples, GenAIInfra, GenAICompos, and so on. We need a clear definition on where the new code for a given feature should be put for a consistent and well-orgnized code structure. 18 | 19 | 20 | ## Design Proposal 21 | 22 | The proposed code structure of GenAIInfra is: 23 | 24 | ``` 25 | GenAIInfra/ 26 | ├── kubernetes-addon/ # the folder implementing additional operational capabilities to Kubernetes applications 27 | ├── microservices-connector/ # the folder containing the implementation of microservice connector on Kubernetes 28 | └── scripts/ 29 | ``` 30 | 31 | The proposed code structure of GenAIExamples is: 32 | 33 | ``` 34 | GenAIExamples/ 35 | └── ChatQnA/ 36 | ├── kubernetes/ 37 | │ ├── manifests 38 | │ └── microservices-connector 39 | ├── docker/ 40 | │ ├── docker_compose.yaml 41 | │ ├── dockerfile 42 | │ └── chatqna.py 43 | ├── chatqna.yaml # The MegaService Yaml 44 | └── README.md 45 | ``` 46 | 47 | The proposed code structure of GenAIComps is: 48 | 49 | ``` 50 | GenAIComps/ 51 | └── comps/ 52 | └── llms/ 53 | ├── text-generation/ 54 | │ ├── tgi-gaudi/ 55 | │ │ ├── dockerfile 56 | │ │ └── llm.py 57 | │ ├── tgi-xeon/ 58 | │ │ ├── dockerfile 59 | │ │ └── llm.py 60 | │ ├── vllm-gaudi 61 | │ ├── ray 62 | │ └── langchain 63 | └── text-summarization/ 64 | ``` 65 | 66 | ## Miscs 67 | 68 | n/a 69 | -------------------------------------------------------------------------------- /community/rfcs/24-08-20-OPEA-001-AI_Gateway_API.md: -------------------------------------------------------------------------------- 1 | # 24-08-20-OPEA-001-AI Gateway API 2 | 3 | AI Gateway API 4 | 5 | ## Author 6 | 7 | [daixiang0](https://github.com/daixiang0), [zhixie](https://github.com/zhxie), [gyohuangxin](https://github.com/gyohuangxin), [Forrest-zhao](https://github.com/Forrest-zhao), [ruijin-intel](https://github.com/ruijin-intel) 8 | 9 | ## Status 10 | 11 | Under Review 12 | 13 | ## Objective 14 | 15 | Design the API for AI Gateway. 16 | 17 | ## Motivation 18 | 19 | - Introduce gateway to do mTLS, traffic control, observability and so on 20 | - Introduce AI Gateway API to use existing gateway sloutions rather than implement our own one. 21 | 22 | ## Design Proposal 23 | 24 | The AI gateway is at the front of all microservices: 25 | 26 | ```mermaid 27 | graph TD; 28 | A(AI Gateway)-->Retrival; 29 | A-->Rerank; 30 | A-->LLM; 31 | A-->Guardrails; 32 | A-->B(Any microservice); 33 | ``` 34 | 35 | ### API overall 36 | 37 | To make the most of current resources, we choose to follow [Kubernetes Gateway API](https://gateway-api.sigs.k8s.io/) since it is the gateway API standard that all gateways support. 38 | 39 | Since AI specific features of Kubernetes Gateway API are still [under discussion](https://docs.google.com/document/d/1FQN_hGhTNeoTgV5Jj16ialzaSiAxC0ozxH1D9ngCVew/edit), We design AI Gateway API including following two parts: 40 | 41 | - **Kubernetes Gateway API** for features it already supports 42 | - **Extension API for** all other features 43 | 44 | ### API workflow 45 | 46 | ```mermaid 47 | graph LR; 48 | A(Config using AI Gateway API)-->B(Convert to specific gateway API) 49 | ``` 50 | 51 | AI Gateway is not a brand-new gateway implementation, only does one thing: Convert. 52 | 53 | ### Extension API 54 | 55 | ```yaml 56 | apiVersion: extension.gateway.opea.dev/v1 57 | kind: Gateway 58 | metadata: 59 | name: extension-exmaple 60 | spec: 61 | gatewayClassName: envoy 62 | extensions: 63 | - name: extension-1 64 | config: 65 | extension-1-config: aaa 66 | - name: extension-2 67 | config: 68 | extension-2-config: bbb 69 | ``` 70 | 71 | - gatewayClassName: specific gateway implement 72 | - name: the name of extension feature, support multiple extensions 73 | - config: the content of extension config, following specified gateway API 74 | 75 | ### Extension API example 76 | 77 | ```yaml 78 | 79 | apiVersion: extension.gateway.opea.dev/v1 80 | kind: Gateway 81 | metadata: 82 | name: envoy-extension-exmaple 83 | spec: 84 | gatewayClassName: envoy 85 | extensions: 86 | - name: token-ratelimit 87 | config: 88 | name: envoy.filters.http.guardrails 89 | typed_config: 90 | "@type": type.googleapis.com/envoy.extensions.filters.http.guardrails.v3.Guardrails 91 | inference: 92 | runtime: envoy.inference_runtime.openvino 93 | typed_config: 94 | "@type": type.googleapis.com/envoy.extensions.inference_runtime.openvino.v3.OpenvinoConfig 95 | backend: CPU 96 | plugins: 97 | - /usr/lib/libopenvino_tokenizers.so 98 | model_path: /home/zhihao/envoy/.project/openvino/models/OTIS-Official-Spam-Model.xml 99 | source: RESPONSE 100 | action: ALLOW 101 | ``` 102 | 103 | **Guardrail** is AI specific feature, here we use Extension API to config Envoy to use CPU to inference with specified model to do response check. 104 | 105 | The config field follows the Envoy API. 106 | -------------------------------------------------------------------------------- /community/rfcs/24-10-20-OPEA-001-Haystack-Integration.md: -------------------------------------------------------------------------------- 1 | # 24-10-20-OPEA-001-Haystack-Integration 2 | 3 | ## Author 4 | 5 | [gadmarkovits](https://github.com/gadmarkovits) 6 | 7 | ## Status 8 | 9 | Under Review 10 | 11 | ## Objective 12 | 13 | Create a Haystack integration for OPEA that will enable the use of OPEA components within a Haystack pipeline. 14 | 15 | ## Motivation 16 | 17 | Haystack is a production-ready open source AI framework that is used by many AI practitioners. It has over 70 integrations with various GenAI components such as document stores, model providers and evaluation frameworks from companies such as Amazon, Microsoft, Nvidia and more. Creating an integration for OPEA will allow Haystack customers to use OPEA components in their pipelines. This RFC is used to present a high-level overview of the Haystack integration. 18 | 19 | ## Design Proposal 20 | 21 | The idea is to create thin wrappers for OPEA components that will enable communicating with them using the existing REST API. The wrappers will match Haystack's API so that they could be used within Haystack pipelines. This will allow developers to seamlessly use OPEA components alongside other Haystack components. 22 | 23 | The integration will be implemented as a Python package (similar to other Haystack integrations). The source code will be hosted in OPEA's GenAIComps repo under a new directory called Integrations. The package itself will be uploaded to [PyPi](https://pypi.org/) to allow for easy installation. 24 | 25 | Following a discussion with Haystack's technical team, it was agreed that a ChatQnA example, using this OPEA integration, would be a good way to showcase its capabilities. To support this, several component wrappers need to be implemented in the first version of the integration (other wrappers will be added gradually): 26 | 27 | 1. OPEA Document Embedder 28 | 29 | This component will receive a Haystack Document and embed it using an OPEA embedding microservice. 30 | 31 | 2. OPEA Text Embedder 32 | 33 | This component will receive text input and embed it using an OPEA embedding microservice. 34 | 35 | 3. OPEA Generator 36 | 37 | This component will receive a text prompt and generate a reponse using an OPEA LLM microservice. 38 | 39 | 4. OPEA Retriever 40 | 41 | This component will receive an embedding and retrieve documents with similar emebddings using an OPEA retrieval microservice. 42 | 43 | ## Alternatives Considered 44 | 45 | n/a 46 | 47 | ## Compatibility 48 | 49 | n/a 50 | 51 | ## Miscs 52 | 53 | Once implemented, the Haystack team list the OPEA integration on their [integrations page](https://haystack.deepset.ai/integrations) which will allow for easier discovery. Haystack, in collaboration with Intel, will also publish a technical blog post showcasing a ChatQnA example using this integration (similar to this [NVidia NIM post](https://haystack.deepset.ai/blog/haystack-nvidia-nim-rag-guide)). 54 | 55 | 56 | -------------------------------------------------------------------------------- /community/rfcs/25-01-10-OPEA-Benchmark.md: -------------------------------------------------------------------------------- 1 | # Purpose 2 | 3 | This RFC is used to describe the behavior of unified benchmark script for GenAIExamples user. 4 | 5 | In v1.1, those bechmark scripts are per examples. It causes many duplicated codes and bad user experience. 6 | 7 | That is why we have motivation to improve such tool to have an unified entry for perf benchmark. 8 | 9 | ## Original benchmark script layout 10 | 11 | ``` 12 | GenAIExamples/ 13 | ├── ChatQnA/ 14 | │ ├── benchmark/ 15 | │ │ ├── benchmark.sh # each example has its own script 16 | │ │ └── deploy.py 17 | │ ├── kubernetes/ 18 | │ │ ├── charts.yaml 19 | │ │ └── ... 20 | │ ├── docker-compose/ 21 | │ │ └── compose.yaml 22 | │ └── chatqna.py 23 | └── ... 24 | ``` 25 | 26 | ## Proposed benchmark script layout 27 | 28 | ``` 29 | GenAIExamples/ 30 | ├── deploy_and_benchmark.py # main entry of GenAIExamples 31 | ├── ChatQnA/ 32 | │ ├── chatqna.yaml # default deploy and benchmark config for deploy_and_benchmark.py 33 | │ ├── kubernetes/ 34 | │ │ ├── charts.yaml 35 | │ │ └── ... 36 | │ |── docker-compose/ 37 | │ | └── compose.yaml 38 | | └── chatqna.py 39 | └── ... 40 | ``` 41 | 42 | 43 | # Design 44 | 45 | The pesudo code of deploy_and_benchmark.py is listed at below for your reference. 46 | 47 | ``` 48 | # deploy_and_benchmark.py 49 | # below is the pesudo code to demostrate its behavior 50 | # 51 | # def main(yaml_file): 52 | # # extract all deployment combinations from chatqna.yaml deploy section 53 | # deploy_traverse_list = extract_deploy_cfg(yaml_file) 54 | # # for example, deploy_traverse_list = [{'node': 2, 'device': gaudi, 'cards_per_node': 8, ...}, 55 | # {'node': 4, 'device': gaudi, 'cards_per_node': 8, ...}, 56 | # ...] 57 | # 58 | # benchmark_traverse_list = extract_benchmark_cfg(yaml_file) 59 | # # for example, benchmark_traverse_list = [{'concurrency': 128, , 'totoal_query_num': 4096, ...}, 60 | # {'concurrency': 128, , 'totoal_query_num': 4096, ...}, 61 | # ...] 62 | # for deploy_cfg in deploy_traverse_list: 63 | # start_k8s_service(deploy_cfg) 64 | # for benchmark_cfg in benchmark_traverse_list: 65 | # if service_ready: 66 | # ingest_dataset(benchmark_cfg.dataset) 67 | # send_http_request(benchmark_cfg) # will call stresscli.py in GenAIEval 68 | ``` 69 | 70 | Taking chatqna as an example, the configurable fields are listed at below 71 | 72 | ``` 73 | # chatqna.yaml 74 | # 75 | # usage: 76 | # 1) deploy_and_benchmark.py --workload chatqna [overrided parameters] 77 | # 2) or deploy_and_benchmark.py ./chatqna/benchmark/chatqna.yaml [overrided parameters] 78 | # 79 | # for example, deploy_and_benchmark.sh ./chatqna/benchmark/chatqna.yaml --node=2 80 | # 81 | deploy: 82 | # hardware related config 83 | device: [xeon, gaudi, ...] # AMD and other h/ws could be extended into here 84 | node: [1, 2, 4] 85 | cards_per_node: [4, 8] 86 | 87 | # components related config, by default is for OOB, if overrided, then it is for tuned version 88 | embedding: 89 | model_id: bge_large_v1.5 90 | instance_num: [2, 4, 8] 91 | cores_per_instance: 4 92 | memory_capacity: 20 # unit: G 93 | retrieval: 94 | instance_num: [2, 4, 8] 95 | cores_per_instance: 4 96 | memory_capacity: 20 # unit: G 97 | rerank: 98 | enable: True 99 | model_id: bge_rerank_v1.5 100 | instance_num: 1 101 | cards_per_instance: 1 # if cpu is specified, this field is ignored and will check cores_per_instance field 102 | llm: 103 | model_id: llama2-7b 104 | instance_num: 7 105 | cards_per_instance: 1 # if cpu is specified, this field is ignored and will check cores_per_instance field 106 | # serving related config, dynamic batching 107 | max_batch_size: [1, 2, 8, 16, 32] # the query number to construct a single batch in serving 108 | max_latency: 20 # time to wait before combining incoming requests into a batch, unit milliseconds 109 | 110 | benchmark: 111 | # http request behavior related fields 112 | concurrency: [1, 2, 4] 113 | totoal_query_num: [2048, 4096] 114 | duration: [5, 10] # unit minutes 115 | query_num_per_concurrency: [4, 8, 16] 116 | possion: True 117 | possion_arrival_rate: 1.0 118 | warmup_iterations: 10 119 | seed: 1024 120 | 121 | # dataset relted fields 122 | dataset: [dummy_english, dummy_chinese, pub_med100, ...] # predefined keywords for supported dataset 123 | user_query: [dummy_english_qlist, dummy_chinese_qlist, pub_med100_qlist, ...] 124 | query_token_size: 128 # if specified, means fixed query token size will be sent out 125 | data_ratio: [10%, 20%, ..., 100%] # optional, ratio from query dataset 126 | 127 | #advance settings in each component which will impact perf. 128 | data_prep: # not target this time 129 | chunk_size: [1024] 130 | chunk_overlap: [1000] 131 | retriver: # not target this time 132 | algo: IVF 133 | fetch_k: 2 134 | k: 1 135 | rerank: 136 | top_n: 2 137 | llm: 138 | max_token_size: 1024 # specify the output token size 139 | ``` 140 | -------------------------------------------------------------------------------- /community/rfcs/25-03-03-GenAIComponents-001-Routing-Agent.md: -------------------------------------------------------------------------------- 1 | # Routing AgentRFC 2 | 3 | A dynamic routing agent for optimal model selection and orchestration 4 | 5 | ## Author(s) 6 | Haim Barad 7 | Madison Evans 8 | 9 | ## Status 10 | Proposed 11 | 12 | ## Objective 13 | Create an intelligent routing layer that: 14 | - Analyzes text-based input queries in real-time. 15 | - Selects optimal model based on criteria like cost, latency, and capability requirements 16 | - Supports multiple cloud providers and self-hosted models 17 | 18 | ## Motivation 19 | - Growing complexity of multi-LLM environments 20 | - Need for cost-efficient inference without sacrificing quality 21 | - Lack of standardized orchestration patterns 22 | - Increasing demand for hybrid cloud/on-prem deployments 23 | 24 | ## Design Proposal 25 | ### Core Components: 26 | 1. Query Analyzer: Supports several known classifiers (matrix factorization, BERT, etc) and Semantic understanding and intent classification 27 | 2. Routing Engine: Provides dynamic model selection based on query complexity 28 | 3. Monitoring: Real-time metrics collection (latency, cost, accuracy) 29 | 4. This code is based on RouteLLM, which is available at https://github.com/lm-sys/RouteLLM 30 | 31 | ### Key Features: 32 | - Dynamic model selection based on query complexity 33 | - Returns the selected model endpoint so that developer can call proper model, or does actual routing to the chosen model so this process is invisible to the developer 34 | - Cost-aware routing policies 35 | 36 | ## Miscellaneous 37 | - Performance: <5ms overhead per request 38 | - Security: Zero-trust authentication between components 39 | - Staging Plan: 40 | 1. Phase 1: Basic routing MVP 41 | 2. Phase 2: Advanced analytics dashboard 42 | 3. Phase 3: Auto-scaling integration 43 | -------------------------------------------------------------------------------- /community/rfcs/25-03-07-OPEA-001-OIM-Operator.md: -------------------------------------------------------------------------------- 1 | # RFC: OPEA Inference Microservices (OIM) 2 | 3 | This RFC talks about creating OPEA inference microservices (OIM) similar to NVIDIA NIMs. 4 | 5 | ## Author(s) 6 | 7 | [Poussa, Sakari](https://github.com/poussa), 8 | [Bhandaru, Malini](https://github.com/mkbhanda), 9 | [Lehtonen, Markus](https://github.com/marquiz), 10 | [Kanevsky, Alexander](https://github.com/kad), 11 | [Lu, Lianhao](https://github.com/lianhao), 12 | [Yang, Yuhan](https://github.com/PeterYang12) 13 | 14 | ## Status 15 | 16 | `Accepted` 17 | 18 | ## Objective 19 | 20 | OPEA seeks to ease enterprise GenAI adoption in a landscape that is fast moving, has skills gaps, and is cost conscious. A look at OPEA's GenAIExamples illustrates the choices a user must make: whether to use Docker or Kubernetes, the model serving framework, whether to optimize for latency or throughput or quantization, in addition to the hardware they want to run on. Can we abstract some of this complexity for inference services? 21 | 22 | Nvidia's NIM does just this. It expects a user to only specify a model and what they would like to optimize for, their key performance indicator (KPI) type, be it low latency, high throughput or quantization. Nvidia abstracts away both the need to choose a model serving framework and determining its optimum parameters for the same to achieve the KPIs on provided hardware. 23 | 24 | ## Motivation 25 | 26 | Nvidia delivers this ease of use through tooling that first detects the runtime hardware, which is then used to filter for profiles matching the desired model and KPIs. A profile is a recipe specifying how to launch an inference service. It specifies not only which model server framework (such as TGI or vLLM or Ollama ) to use but also number of GPU cards, amount of memory, degree of tensor parallelism, batch size etc. Lastly the profile is used to launch the inference service on the given hardware. 27 | 28 | The profiles are established through offline experimentation using tools like MLPerf. For a model, one or more model frameworks are explored on different hardware to determine settings providing the best performance for a given KPIs. Constructing profiles is non-trivial, resource intensive work. 29 | 30 | ## Design Proposal 31 | 32 | We seek to offer a similar ease of use experience in OPEA for the inference services. Nvidia’s open source NIM Operator is a tool that eases launching inference services. OPEA plans to extend NIM operator, or similar, to deliver an Operator that provides similar functionality but also supports multi-vendor hardware and shall endeavor to work with Kubernetes and its flavors such as Red Hat OpenShift and SUSE Rancher, and their AI variants by working with our partners and potentially integrating with platforms such as KServe and RayAI. The first release will provide minimal functionality and over time with community contributions grow richer. We anticipate providers of AI platforms that operate on a multitude of hardware platforms will find the HW optimized profiles valuable in providing cost-effective performant inference services. 33 | 34 | With regard to profiles we shall start with supporting vLLM as the model framework and build profiles for a few popular models. Profiles may also contain more accurate resource usage information for Kubernetes deployments (allowing better Kubernetes scheduling decisions and preventing failures on resource contention). We do anticipate constant enhancements and new offerings in the model serving space and a steady influx of new models. 35 | 36 | The following diagram shows example of the operator main use case interactions; model caching, inference service creation and usage. 37 | 38 | ![OIM operator interactions](assets/oim-operator-flow.png) 39 | 40 | The work will entail 41 | 42 | 1) Auto-detecting hardware type and what is allocated. 43 | 44 | 2) Taking the user specified model and profile (if not specified using a default profile) to retrieve configurations that are possible on the allocated hardware type and quantity. For instance, a configuration that leverages tensor parallelism is infeasible if only one GPU card is allocated. 45 | 46 | 3) Automating performance studies to identify optimum settings for a given hardware with various resource allocations for a given model and constructing profile specific configurations. These studies will be repeated to take advantage of new models, frameworks, and other enhancements. 47 | 48 | 4) Providing an open-source tool, such as [ORAS](https://oras.land/), to discover and list available profiles/configurations for a model. 49 | 50 | 5) Publishing the profiles and other artifacts to an OCI compatible registry, such as Docker Hub or ghcr.io. 51 | 52 | ## Alternatives Considered 53 | 54 | Creating everything from scratch was rejected because an open-source alternative exists and provides most of the features OPEA project needs. 55 | 56 | ## Compatibility 57 | 58 | OPEA project will try to stay as close as possible with the original APIs and behaviors. 59 | -------------------------------------------------------------------------------- /community/rfcs/25-19-03-GenAIExamples-HybridRag-personal_assistant.md: -------------------------------------------------------------------------------- 1 | # HybridRAG 2 | This RFC introduces the HybridRAG framework, a novel approach to building advanced question-answering (Q&A) systems. HybridRAG combines two key techniques: 3 | 1. GraphRAG: Utilizes Knowledge Graphs (KGs) for information retrieval 4 | 2. VectorRAG: Employs vector-based retrieval methods 5 | 6 | 7 | The integration of these techniques has demonstrated superior performance in generating accurate and contextually relevant answers compared to using either method alone. 8 | 9 | Key Features of HybridRAG 10 | - Enhances information extraction in Q&A systems 11 | - Combines the strengths of graph-based and vector-based retrieval 12 | - Improves both retrieval accuracy and answer generation quality 13 | 14 | Some examples highlight the versatility and potential impact of the HybridRAG framework across different industries and use cases: Personal assistant, Insurance claim processing, Transaction processing, Fraud detection and risk management etc. to name a few. 15 | 16 | 17 | ## Author(s) 18 | [Sharath Raghava] (https://github.com/intelsharath) 19 | 20 | [Jean Yu] (https://github.com/jeanyu-habana) 21 | 22 | [Siddhi Velankar] (https://github.com/siddhivelankar23) 23 | 24 | ## Status 25 | 26 | Under Review 27 | 28 | ## Objective 29 | This RFC proposes a Hybrid RAG architecture framework that enhances RAG technology. The input data can be unstructured data or structured data sources. Structured data sources such as CSVs, SQL databases can be used to integrate graph sources. Structured information can also be extracted from unstructured data by extracting nodes and relationships. HybridRAG effectively handles both unstructured and structured data adopting a flexible microservice approach for enterprise AI applications. 30 | 31 | We are building and example to showcase Medical Bot personal assistant application with: 32 | 33 | - Preloaded database with detailed information on ~100 diseases 34 | - Augmented with structured data on medicines, symptoms, home remedies, and care 35 | - Demonstrates the architecture's capability in handling complex, multi-modal information 36 | 37 | This Hybrid RAG approach offers a versatile solution for enterprise applications requiring advanced data retrieval and generation capabilities integrating unstructured and structured data from various sources. 38 | 39 | 40 | ## Motivation 41 | 42 | Enterprise AI systems require solutions that handle both structured data (databases, transactions, CSVs, JSON) and unstructured data (documents, images, audio). While traditional VectorRAG excels at semantic search across documents, it struggles with complex queries requiring global context or relationship-aware reasoning. HybridRAG addresses these gaps by combining GraphRAG (knowledge graph-based retrieval) and VectorRAG (vector database retrieval) for enhanced accuracy and contextual relevance. 43 | 44 | GraphRAG: 45 | 46 | - Uses knowledge graphs to model entities, relationships, and hierarchical clusters from data. 47 | - Enables global context understanding (e.g., identifying indirect connections between entities). 48 | 49 | VectorRAG: 50 | 51 | - Leverages semantic search for unstructured data retrieval. 52 | 53 | Hybrid Integration: 54 | 55 | - Simultaneously retrieves context from graph and vector databases during queries. 56 | - Outperforms individual approaches in retrieval accuracy and answer quality. 57 | 58 | HybridRAG’s hybrid architecture also supports secure on-premise/cloud deployments, critical for sensitive sectors like healthcare. This approach represents a strategic evolution in enterprise AI, enabling systems to reason across data types while maintaining scalability and compliance. 59 | 60 | ## Design Proposal 61 | This RFC proposes a Hybrid Retrieval-Augmented Generation (Hybrid-RAG) architecture detailed in the following figure. 62 | 63 | ![Hybrid-RAG Architecture](assets/Hybrid-rag-architecture.png) 64 | 65 | The proposed architecture involves the creation of a Hybrid rag mega services. The megaservice functions as the core pipeline, comprising of the following microservices: 66 | Indexing: 67 | 68 | - Unstructured data into semantic chunk and loading into vector db. 69 | - Structured data load CSV/SQLdb into graph DB. 70 | - Optional unstructured data processing to extract and generate triplets to load into graphdb. 71 | - Retrieve for vector DB: take query, generate embeddings and look up and extract context 72 | - Retriever for graph DB: take in query, generate triplets (text2cypher) and extract context in graph db. 73 | - Merge this and generate final results. 74 | 75 | Proof of Concept: Medical Assistant 76 | - Focused on a subset of common diseases 77 | - Built using Wikidata for unstructured medical knowledge. 78 | - Extracted and built structured dataset from unstructured medical knowledge (diseases, symptoms, treatments, home remedies). 79 | - Designed to provide more precise answers to medical queries 80 | 81 | How It Works 82 | - When a query is received, HybridRAG retrieves relevant context from both vector and graph databases 83 | - The system then generates a final answer based on the combined retrieved information 84 | - The indexing for vector and graph 85 | 86 | Performance Evaluation 87 | HybridRAG outperforms traditional VectorRAG and GraphRAG techniques when used individually. This superior performance is observed in both: 88 | 89 | - Retrieval accuracy 90 | - Answer generation quality 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /community/rfcs/Edge_Craft_RAG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/Edge_Craft_RAG.png -------------------------------------------------------------------------------- /community/rfcs/Edge_Craft_RAG_screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/Edge_Craft_RAG_screenshot_1.png -------------------------------------------------------------------------------- /community/rfcs/Edge_Craft_RAG_screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/Edge_Craft_RAG_screenshot_2.png -------------------------------------------------------------------------------- /community/rfcs/README.md: -------------------------------------------------------------------------------- 1 | # RFC Archive 2 | 3 | This folder is used to archive all RFCs contributed by OPEA community. Either users directly contribute RFC to this folder or submit to each OPEA repository's `Issues` page with the `[RFC]: xxx` string pattern in title. The latter will be automatically stored to here by an archieve tool. 4 | 5 | The file naming convention follows this rule: yy-mm-dd-[OPEA Project Name]-[index]-title.md 6 | 7 | For example, 24-04-29-GenAIExamples-001-Using_MicroService_to_implement_ChatQnA.md 8 | -------------------------------------------------------------------------------- /community/rfcs/assets/Hybrid-rag-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/Hybrid-rag-architecture.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatar1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar1.jpg -------------------------------------------------------------------------------- /community/rfcs/assets/avatar2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar2.jpg -------------------------------------------------------------------------------- /community/rfcs/assets/avatar3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar3.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatar4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar4.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatar6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar6.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatar_design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatar_design.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatars-chatbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatars-chatbot.png -------------------------------------------------------------------------------- /community/rfcs/assets/avatars-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/avatars-ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/image_wav2lipgfpgan_cut.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/image_wav2lipgfpgan_cut.gif -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_audio_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_audio_ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_chat_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_chat_ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_diagram.png -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_image_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_image_ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_pdf_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_pdf_ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/multimodal_enhanced_video_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/multimodal_enhanced_video_ui.png -------------------------------------------------------------------------------- /community/rfcs/assets/oim-operator-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/oim-operator-flow.png -------------------------------------------------------------------------------- /community/rfcs/assets/ui_latest_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/ui_latest_1.png -------------------------------------------------------------------------------- /community/rfcs/assets/ui_latest_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/ui_latest_2.png -------------------------------------------------------------------------------- /community/rfcs/assets/ui_latest_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/ui_latest_3.png -------------------------------------------------------------------------------- /community/rfcs/assets/video_wav2lipgfpgan_cut.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/assets/video_wav2lipgfpgan_cut.gif -------------------------------------------------------------------------------- /community/rfcs/opea_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_architecture.png -------------------------------------------------------------------------------- /community/rfcs/opea_deploy_process_v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_deploy_process_v0.png -------------------------------------------------------------------------------- /community/rfcs/opea_deploy_process_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_deploy_process_v1.png -------------------------------------------------------------------------------- /community/rfcs/opea_deploy_process_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_deploy_process_v2.png -------------------------------------------------------------------------------- /community/rfcs/opea_deploy_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_deploy_workflow.png -------------------------------------------------------------------------------- /community/rfcs/opea_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/community/rfcs/opea_workflow.png -------------------------------------------------------------------------------- /community/rfcs/rfc_template.txt: -------------------------------------------------------------------------------- 1 | # RFC Template 2 | 3 | Replace the "RFC Template" heading with your RFC Title, followed by 4 | the short description of the feature you want to contribute 5 | 6 | ## Author(s) 7 | 8 | List all contributors of this RFC. 9 | 10 | ## Status 11 | 12 | Change the PR status to `Under Review` | `Rejected` | `Accepted`. 13 | 14 | ## Objective 15 | 16 | List what problem will this solve? What are the goals and non-goals of this RFC? 17 | 18 | ## Motivation 19 | 20 | List why this problem is valuable to solve? Whether some related work exists? 21 | 22 | ## Design Proposal 23 | 24 | This is the heart of the document, used to elaborate the design philosophy and detail proposal. 25 | 26 | ## Alternatives Considered 27 | 28 | List other alternatives if have, and corresponding pros/cons to each proposal. 29 | 30 | ## Compatibility 31 | 32 | list possible incompatible interface or workflow changes if exists. 33 | 34 | ## Miscellaneous 35 | 36 | List other information user and developer may care about, such as: 37 | 38 | - Performance Impact, such as speed, memory, accuracy. 39 | - Engineering Impact, such as binary size, startup time, build time, test times. 40 | - Security Impact, such as code vulnerability. 41 | - TODO List or staging plan. 42 | 43 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # OPEA Project documentation build configuration file 2 | # 3 | # 4 | import os 5 | import sys 6 | from datetime import datetime 7 | import glob 8 | import shutil 9 | 10 | 11 | sys.path.insert(0, os.path.abspath('.')) 12 | 13 | # Get OPEA version from GenAIComps/comps/version.py 14 | sys.path.insert(0, os.path.abspath("../../../GenAIComps/comps")) 15 | from version import __version__ 16 | 17 | RELEASE = "" 18 | if "RELEASE" in os.environ: 19 | RELEASE = os.environ["RELEASE"] 20 | 21 | # we've got some project sphinx extensions (link_roles) 22 | sys.path.insert(0, os.path.join(os.path.abspath('.'), 'sphinx/extensions')) 23 | extensions = [ 24 | 'sphinx.ext.graphviz', 25 | 'sphinxcontrib.jquery', 26 | 'sphinx_tabs.tabs', 27 | 'myst_parser', 28 | 'sphinxcontrib.mermaid', 29 | 'link_roles', 30 | 'sphinx_design', 31 | #'sphinx_md', 32 | ] 33 | 34 | myst_enable_extensions = {"colon_fence"} 35 | myst_fence_as_directive = {"mermaid"} 36 | 37 | # sphinx_md config 38 | sphinx_md_useGitHubURL = True 39 | 40 | graphviz_output_format='png' 41 | graphviz_dot_args=[ 42 | '-Nfontname="verdana"', 43 | '-Gfontname="verdana"', 44 | '-Efontname="verdana"'] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['sphinx/_templates'] 48 | 49 | # The suffix(es) of source filenames. 50 | # You can specify multiple suffix as a list of string: 51 | # 52 | source_suffix = ['.rst', '.md', 53 | ] 54 | 55 | # The master toctree document. 56 | master_doc = 'index' 57 | 58 | # General information about the project. 59 | project = u'OPEA™' 60 | this_year=str(datetime.now().year); 61 | copyright = u'2024' + ('' if this_year == '2024' else ('-' + this_year)) + ' ' + project + ', a Series of LF Projects, LLC' 62 | author = u'OPEA Project developers' 63 | 64 | version = release = __version__ 65 | if not version: 66 | sys.stderr.write('Warning: Could not extract OPEA version from version.py\n') 67 | version = release = "unknown" 68 | 69 | 70 | # files and directories to ignore when looking for source files. 71 | exclude_patterns = [ 72 | 'scripts/*', 73 | 'examples/AgentQnA/deploy/index.rst', 74 | 'examples/AgentQnA/deploy/xeon.md' 75 | ] 76 | try: 77 | import sphinx_rtd_theme 78 | except ImportError: 79 | sys.stderr.write('Warning: sphinx_rtd_theme missing. Use pip to install it.\n') 80 | else: 81 | html_theme = "sphinx_rtd_theme" 82 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 83 | html_theme_options = { 84 | 'canonical_url': '', 85 | 'analytics_id': 'G-3QH5804YP8', 86 | 'logo_only': False, 87 | 'display_version': True, 88 | #'prev_next_buttons_location': 'None', 89 | # Toc options 90 | 'collapse_navigation': False, 91 | 'sticky_navigation': True, 92 | 'navigation_depth': 4, 93 | } 94 | 95 | 96 | # Here's where we (manually) list the document versions maintained on 97 | # the published doc website. On a regular basis we publish to the 98 | # /latest folder but when releases are made, we publish to a / 99 | # folder (specified via the version) 100 | 101 | if tags.has('release'): 102 | is_release = True 103 | docs_title = '%s' %(version) 104 | current_version = version 105 | if RELEASE: 106 | version = release = current_version = RELEASE 107 | docs_title = '%s' %(version) 108 | else: 109 | version = current_version = "latest" 110 | is_release = False 111 | docs_title = 'Latest' 112 | 113 | html_context = { 114 | 'current_version': current_version, 115 | 'docs_title': docs_title, 116 | 'is_release': is_release, 117 | 'versions': ( ("latest", "/latest/"), 118 | ("1.0", "/1.0/"), 119 | ("1.1", "/1.1/"), 120 | ("1.2", "/1.2/"), 121 | ("1.3", "/1.3/") 122 | ) 123 | } 124 | 125 | show_warning_types = True 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | # 131 | # html_theme_options = {} 132 | 133 | html_logo = 'images/opea-horizontal-white-w200.png' 134 | html_favicon = 'images/OPEA-favicon-32x32.png' 135 | 136 | numfig = True 137 | numfig_format = {'figure': 'Figure %s', 'table': 'Table %s', 'code-block': 'Code Block %s'} 138 | 139 | # paths that contain custom static files (such as style sheets) 140 | html_static_path = ['sphinx/_static'] 141 | 142 | def copy_images(src ,dst): 143 | image_types = ["png", "svg"] 144 | for image_type in image_types: 145 | pattern = "{}/**/*.{}".format(src, image_type) 146 | files = glob.glob(pattern, recursive = True) 147 | for file in files: 148 | sub_name = file.replace(src, '') 149 | dst_filename = "{}{}".format(dst, sub_name) 150 | folder = os.path.dirname(dst_filename) 151 | if not os.path.exists(folder): 152 | os.makedirs(folder) 153 | shutil.copy(file, dst_filename) 154 | 155 | def copy_image_to_html(app, docname): 156 | if app.builder.name == 'html': 157 | if os.path.exists(app.srcdir) and os.path.exists(app.outdir): 158 | copy_images(str(app.srcdir) ,str(app.outdir)) 159 | else: 160 | print("No existed {} or {}".format(app.srcdir ,app.outdir)) 161 | 162 | def setup(app): 163 | 164 | app.add_css_file("opea-custom.css") 165 | app.add_js_file("opea-custom.js") 166 | app.connect('build-finished', copy_image_to_html) 167 | 168 | # Disable "Created using Sphinx" in the HTML footer. Default is True. 169 | html_show_sphinx = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | html_show_sourcelink = True 173 | 174 | # If not '', a 'Last updated on:' timestamp is inserted at every page 175 | # bottom, 176 | # using the given strftime format. 177 | html_last_updated_fmt = '%b %d, %Y' 178 | 179 | 180 | rst_epilog = """ 181 | .. include:: /sphinx/substitutions.txt 182 | """ 183 | -------------------------------------------------------------------------------- /deploy/index.rst: -------------------------------------------------------------------------------- 1 | .. _GenAIInfra: 2 | 3 | Deploying GenAI 4 | ############### 5 | 6 | GenAIInfra is the containerization and cloud native suite for OPEA, including 7 | artifacts to deploy :ref:`GenAIExamples` in a cloud native way so enterprise users 8 | can deploy to their own cloud. 9 | 10 | We're building this documentation from content in the 11 | :GenAIInfra_blob:`GenAIInfra` GitHub repository. 12 | 13 | .. rst-class:: rst-columns 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | :glob: 18 | 19 | /GenAIInfra/README 20 | /GenAIInfra/* 21 | 22 | Installation Guides 23 | ******************* 24 | 25 | .. rst-class:: rst-columns 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | :glob: 30 | 31 | ../guide/installation/* 32 | ../guide/installation/**/* 33 | 34 | Cloud Service Provider 35 | ********************** 36 | 37 | 38 | .. rst-class:: rst-columns 39 | 40 | .. toctree:: 41 | :maxdepth: 1 42 | :glob: 43 | 44 | /GenAIInfra/cloud-service-provider/* 45 | /GenAIInfra/cloud-service-provider/**/* 46 | 47 | Authentication and Authorization 48 | ******************************** 49 | 50 | .. rst-class:: rst-columns 51 | 52 | .. toctree:: 53 | :maxdepth: 1 54 | :glob: 55 | 56 | /GenAIInfra/authN-authZ/* 57 | /GenAIInfra/authN-authZ/**/* 58 | 59 | Helm Charts 60 | *********** 61 | 62 | .. rst-class:: rst-columns 63 | 64 | .. toctree:: 65 | :maxdepth: 1 66 | :glob: 67 | 68 | /GenAIInfra/helm-charts/README 69 | /GenAIInfra/helm-charts/* 70 | /GenAIInfra/helm-charts/common/* 71 | /GenAIInfra/helm-charts/common/**/* 72 | /GenAIInfra/helm-charts/**/* 73 | 74 | KubeAI Operator 75 | *************** 76 | 77 | .. rst-class:: rst-columns 78 | 79 | .. toctree:: 80 | :maxdepth: 1 81 | :glob: 82 | 83 | /GenAIInfra/kubeai/* 84 | 85 | Kubernetes Addons 86 | ***************** 87 | 88 | .. rst-class:: rst-columns 89 | 90 | .. toctree:: 91 | :maxdepth: 1 92 | :glob: 93 | 94 | /GenAIInfra/kubernetes-addons/README 95 | /GenAIInfra/kubernetes-addons/* 96 | /GenAIInfra/kubernetes-addons/**/* 97 | 98 | Microservices Connector 99 | *********************** 100 | 101 | .. rst-class:: rst-columns 102 | 103 | .. toctree:: 104 | :maxdepth: 1 105 | :glob: 106 | 107 | /GenAIInfra/microservices-connector/README 108 | /GenAIInfra/microservices-connector/* 109 | /GenAIInfra/microservices-connector/**/* 110 | 111 | Pipeline Proxy 112 | ************** 113 | 114 | .. rst-class:: rst-columns 115 | 116 | .. toctree:: 117 | :maxdepth: 1 118 | :glob: 119 | 120 | /GenAIInfra/proxy/README 121 | /GenAIInfra/proxy/* 122 | /GenAIInfra/proxy/**/* 123 | 124 | Scripts 125 | ******* 126 | 127 | .. rst-class:: rst-columns 128 | 129 | .. toctree:: 130 | :maxdepth: 1 131 | :glob: 132 | 133 | /GenAIInfra/scripts/README 134 | /GenAIInfra/scripts/* 135 | /GenAIInfra/scripts/**/* 136 | 137 | Contributing 138 | ************ 139 | 140 | .. rst-class:: rst-columns 141 | 142 | .. toctree:: 143 | :maxdepth: 1 144 | :glob: 145 | 146 | /GenAIInfra/dev/* 147 | /GenAIInfra/dev/**/* 148 | -------------------------------------------------------------------------------- /developer-guides/OPEA_API_Specification.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/developer-guides/OPEA_API_Specification.docx -------------------------------------------------------------------------------- /developer-guides/graphviz.rst: -------------------------------------------------------------------------------- 1 | .. _graphviz-examples: 2 | 3 | Drawings Using Graphviz 4 | ####################### 5 | 6 | We support using the Sphinx `Graphviz extension`_ for creating simple 7 | graphs and line drawings using the dot language. The advantage of using 8 | Graphviz for drawings is that the source for a drawing is a text file that 9 | can be edited and maintained in the repo along with the documentation. 10 | 11 | .. _graphviz extension: http://graphviz.gitlab.io 12 | 13 | These source ``.dot`` files are generally kept separate from the document 14 | itself, and included by using a Graphviz directive: 15 | 16 | .. code-block:: none 17 | 18 | .. graphviz:: images/boot-flow.dot 19 | :name: boot-flow-example 20 | :align: center 21 | :caption: Hypervisor Boot Flow 22 | 23 | where the boot-flow.dot file contains the drawing commands: 24 | 25 | .. literalinclude:: images/boot-flow.dot 26 | 27 | and the generated output would appear as: 28 | 29 | .. graphviz:: images/boot-flow.dot 30 | :name: boot-flow-example 31 | :align: center 32 | :caption: Hypervisor Boot Flow 33 | 34 | 35 | Let's look at some more examples and then we'll get into more details about 36 | the dot language and drawing options. 37 | 38 | Simple Directed Graph 39 | ********************* 40 | 41 | For simple drawings with shapes and lines, you can put the Graphviz commands 42 | in the content block for the directive. For example, for a simple directed 43 | graph (digraph) with two nodes connected by an arrow, you can write: 44 | 45 | 46 | .. code-block:: none 47 | 48 | .. graphviz:: 49 | 50 | digraph { 51 | "a" -> "b" 52 | } 53 | 54 | and get this drawing: 55 | 56 | .. graphviz:: 57 | 58 | digraph { 59 | "a" -> "b" 60 | } 61 | 62 | 63 | You can change the following attributes: 64 | 65 | * Graph layout (from top-to-bottom to left-to-right) 66 | * Node shapes (rectangles, circles, houses, stars, etc.) 67 | * Style (filled, rounded) 68 | * Colors 69 | * Text displayed in the node 70 | * Placement of the resulting image on the page (centered) 71 | 72 | Example: 73 | 74 | .. literalinclude:: images/circle-square.dot 75 | 76 | .. graphviz:: images/circle-square.dot 77 | :align: center 78 | 79 | You can use the `standard HTML color names`_ or use RGB values for colors, 80 | as shown. 81 | 82 | .. _standard HTML color names: 83 | https://www.w3schools.com/colors/colors_hex.asp 84 | 85 | Adding Edge Labels 86 | ****************** 87 | 88 | Here's an example of a drawing with labels on the edges (arrows) between 89 | nodes. We also show how to change the default attributes for all nodes and 90 | edges within this graph: 91 | 92 | .. literalinclude:: images/node-shape-edges.dot 93 | 94 | .. graphviz:: images/node-shape-edges.dot 95 | :align: center 96 | 97 | Tables 98 | ****** 99 | 100 | For nodes with a ``record`` shape attribute, the text of the label is 101 | presented in a table format: a vertical bar ``|`` starts a new row or 102 | column and curly braces ``{ ... }`` specify a new row (if you're in a 103 | column) or a new column (if you're in a row). For example: 104 | 105 | .. literalinclude:: images/record.dot 106 | 107 | .. graphviz:: images/record.dot 108 | :align: center 109 | 110 | Note that you can also specify the horizontal alignment of text using escape 111 | sequences ``\n``, ``\l``, and ``\r``, which divide the label into lines that 112 | are centered, left-justified, and right-justified, respectively. 113 | 114 | Finite-State Machine 115 | ******************** 116 | 117 | Here's an example of using Graphviz for defining a finite-state machine 118 | for pumping gas: 119 | 120 | .. literalinclude:: images/gaspump.dot 121 | 122 | .. graphviz:: images/gaspump.dot 123 | :align: center 124 | -------------------------------------------------------------------------------- /developer-guides/images/boot-flow.dot: -------------------------------------------------------------------------------- 1 | digraph G { 2 | rankdir=LR; 3 | bgcolor="transparent"; 4 | UEFI -> "acrn.efi" -> "OS\nBootloader" -> 5 | "SOS\nKernel" -> "ACRN\nDevice Model" -> "Virtual\nBootloader"; 6 | } 7 | -------------------------------------------------------------------------------- /developer-guides/images/circle-square.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | bgcolor="transparent"; rankdir=LR; 3 | { a [shape=circle height="1" style=filled color=AntiqueWhite 4 | label="Circle\nLabel"] 5 | b [shape=box height="1" width="1" style="rounded,filled" 6 | color="#F080F0" label="Square\nLabel"] 7 | } 8 | a -> b 9 | } 10 | -------------------------------------------------------------------------------- /developer-guides/images/doc-gen-flow.dot: -------------------------------------------------------------------------------- 1 | # Doc Generation flow 2 | # dot -Tpng -odoc-gen-flow.png doc-gen-flow.dot 3 | 4 | digraph docgen { 5 | node [ fontname="verdana"] 6 | bgcolor=transparent; rankdir=LR; 7 | rst [shape="rectangle" label="restructuredText\nfiles"] 8 | md [shape="rectangle" label="markdown\nfiles"] 9 | images [shape="rectangle" label=".png, .jpg\nimages"] 10 | conf [shape="rectangle" label="conf.py\nconfiguration"] 11 | rtd [shape="rectangle" label="read-the-docs\ntheme"] 12 | html [shape="rectangle" label="HTML\nweb site"] 13 | includes [shape="rectangle" label="includes"] 14 | sphinx[shape="ellipse" label="sphinx +\nMyst Parser +\ndocutils"] 15 | prepare[shape="ellipse" label="fix\ncross-repo\nlinks"] 16 | scan[shape="ellipse" label="scan\nrepo dirs"] 17 | images -> sphinx 18 | scan -> includes 19 | includes -> sphinx 20 | rst -> sphinx 21 | md -> prepare 22 | prepare -> sphinx 23 | conf -> sphinx 24 | rtd -> sphinx 25 | sphinx -> html 26 | } 27 | -------------------------------------------------------------------------------- /developer-guides/images/gaspump.dot: -------------------------------------------------------------------------------- 1 | digraph gaspump { 2 | rankdir=LR; 3 | node [shape = circle;]; 4 | edge [color = grey; fontsize=10]; 5 | S0 -> S1 [ label = "Lift Nozzle" ] 6 | S1 -> S0 [ label = "Replace Nozzle" ] 7 | S1 -> S2 [ label = "Authorize Pump" ] 8 | S2 -> S0 [ label = "Replace Nozzle" ] 9 | S2 -> S3 [ label = "Pull Trigger" ] 10 | S3 -> S2 [ label = "Release Trigger" ] 11 | } 12 | -------------------------------------------------------------------------------- /developer-guides/images/node-shape-edges.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | bgcolor=transparent; rankdir=LR; 3 | node [shape="rectangle" style="filled" color="lightblue"] 4 | edge [fontsize="12" fontcolor="grey"] 5 | 6 | "acrnprobe" -> "telemetrics-client" [label="crashlog\npath"] 7 | "telemetrics-client" -> "backend" [label="log\ncontent"] 8 | } 9 | -------------------------------------------------------------------------------- /developer-guides/images/opea-docs-fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/developer-guides/images/opea-docs-fork.png -------------------------------------------------------------------------------- /developer-guides/images/record.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | a [shape=record label="left | {above|middle|below} | right"] 3 | b [shape=record label="{row1\l|row2\r|{row3\nleft|row3\nright}|row4}"] 4 | } 5 | -------------------------------------------------------------------------------- /developer-guides/images/trusty-boot-flow.dot: -------------------------------------------------------------------------------- 1 | digraph G { 2 | rankdir=LR; 3 | rank=same; 4 | bgcolor="transparent"; 5 | uservml1 [label="User VM OS\nBootloader"] 6 | acrn_init [shape=box style="rounded,filled" label="ACRN"] 7 | acrn_switch [shape=box style="rounded,filled" label="ACRN"] 8 | uservml2 [label="User VM OS\nBootloader"] 9 | uservml1 -> acrn_init -> "Trusty" -> acrn_switch -> uservml2; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /developer-guides/index.rst: -------------------------------------------------------------------------------- 1 | .. _developer_guides: 2 | 3 | Developer Guides 4 | ################ 5 | 6 | Coding Guides 7 | ************* 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | OPEA_API 13 | 14 | 15 | 16 | Documentation Guides 17 | ******************** 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | 22 | doc_guidelines 23 | readme_guidelines_genai_examples 24 | graphviz 25 | docbuild 26 | 27 | -------------------------------------------------------------------------------- /developer-guides/mdtable.txt: -------------------------------------------------------------------------------- 1 | | heading 1 | heading 2 | heading 3 | 2 | |---|---|---| 3 | |row 1 column 1 | row 1 column 2 | yes, it's row 1 column 3| 4 | |row 2 col 1 | row 2 column 2 | row 2 col 3 | 5 | -------------------------------------------------------------------------------- /developer-guides/primary_readme_genai_examples_template.md: -------------------------------------------------------------------------------- 1 | # Template for Primary README File 2 | 3 | ** Application** 4 | 5 | ## Contents 6 | - Overview 7 | - Architecture 8 | - Deployment 9 | 10 | ## Overview 11 | 12 | 13 | 14 | 15 | ## Architecture 16 | 17 | 18 | 19 | 20 | ## Deployment 21 | This table describes options to deploy the application. See relevant secondary README files to learn more about these options. 22 | You can also find information about implementing this sample on specific hardware. 23 | 24 | | Hardware | Deployment Mode | Description | 25 | | ------------------------------------| ------------------------------ | ----------------------------------------------------------------------------- | 26 | | Intel | Single Node | Hardware A | 27 | | Intel | Single Node with Benchmarking | Hardware B | 28 | | ... | HELM | | 29 | 30 | This table describes how you deploy the application on cloud service providers (CSP) using Terraform: 31 | 32 | | CSP | Hardware | Cloud Module for Terraform | Notes | 33 | | ------------------------------------| ------------------------------ | ----------------------------------------------------------------------------- | 34 | | ... | | 35 | | ... | | | | 36 | | ... | | | | 37 | 38 | -------------------------------------------------------------------------------- /developer-guides/readme_guidelines_genai_examples.rst: -------------------------------------------------------------------------------- 1 | .. _readme_guidelines_genai_examples: 2 | 3 | README Guidelines for GenAIExamples 4 | =================================== 5 | 6 | When you contribute a new sample to GenAI Examples, make sure to prepare a README file for the contribution. The README file explains the sample and describes how other developers can integrate it into their application. 7 | 8 | You should be able to deploy your sample through Docker Compose. If your sample also supports other deployment options such as Kubernetes, or can target multiple hardware platforms, you must prepare additional (secondary) README files for those purposes. 9 | 10 | Use these guidelines and templates to create primary and secondary README files for your sample. 11 | 12 | *Primary README File For a Sample:* 13 | * Structure 14 | * Guidelines 15 | * Template 16 | 17 | *Secondary README File(s) For Deployment Options/Target Hardware:* | 18 | * Structure 19 | * Guidelines 20 | * Template 21 | 22 | 23 | Prepare the Primary README File 24 | ############################### 25 | 26 | The primary README file for a sample describes an overview of the sample and clarifies its usefulness in solving a practical task. 27 | Application developers use this information to decide if a sample in GenAI Examples is immediately relevant to their application, so make sure to clarify the value that your sample delivers. 28 | 29 | Structure 30 | ^^^^^^^^^ 31 | 32 | Your primary README file should contain these sections: 33 | 34 | * *Overview* – Describe the purpose of the sample and the task(s) it can solve. 35 | * *Architecture* – Explain how the sample works. List the building blocks you use from the Generative AI Components (GenAI Comps) folder. 36 | * *Deployment* – Inform how you deploy this sample on all supported hardware platforms. 37 | 38 | For an example, open the [primary README file for the ChatQnA application] (https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md). 39 | 40 | Guidelines 41 | ^^^^^^^^^^ 42 | 43 | * The primary README file is the first README file in the sample that application developers use. Keep its content clear and concise so that developers understand the purpose of the sample and know how to learn more about it. 44 | * Do not describe detailed steps in the primary README or insert code blocks for deployment options or targeting specific hardware. Create secondary README files for those purposes and link to them from the primary file. 45 | 46 | Template 47 | ^^^^^^^^ 48 | .. toctree:: 49 | :maxdepth: 1 50 | 51 | Primary README Guidelines for GenAI Examples 52 | 53 | ---- 54 | 55 | 56 | Prepare the Secondary README File 57 | ################################# 58 | 59 | The secondary README file explains how you deploy a sample or target it for specific hardware. For deployment, make sure to specify any artifacts that may be necessary. These artifacts can include: 60 | * Compose files or setup scripts to run with Docker Compose 61 | * YAML or HELM charts to deploy with Kubernetes 62 | 63 | Structure 64 | ^^^^^^^^^ 65 | 66 | Your secondary README file should contain these sections: 67 | 68 | * *Overview* – Describe the contents of the README file. Avoid repeating information that is available in the primary README. 69 | * *Deployment* – Explain how you deploy this sample on a specific hardware platform. Mention prerequisites as well as any optional methods. 70 | * *Validation* - Describe how you check the health of the microservices in this sample. 71 | * *Termination* - Describe how you stop the microservices in this sample. 72 | * *Profiling* - Give information on procedures to set up and monitor profiling dashboards. 73 | * *Troubleshooting* - Describe common problems with this deployment and their solutions. 74 | 75 | For an example, open the [Docker Compose file for the ChatQnA application](https://opea-project.github.io/latest/GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/README.html#chatqna-docker-compose-files). 76 | 77 | Guidelines 78 | ^^^^^^^^^^ 79 | 80 | * Create the secondary README file to help developers set up and run a use case in the fewest steps possible. 81 | * Each target hardware must have only one secondary README file for a specific deployment method. 82 | * To customize a microservice, use the approproate guide available in the GenAI Components folder. 83 | * Only include profiling or debugging information that apply to your specific use case. Include general troubleshooting information in the primary README file. 84 | * Always validate the contents of a secondary README file when you create or update it. 85 | 86 | Template 87 | ^^^^^^^^ 88 | .. toctree:: 89 | :maxdepth: 1 90 | 91 | Secondary README Guidelines for GenAI Examples 92 | 93 | ---- 94 | 95 | Validate a Secondary README File 96 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 97 | 98 | When you create or update the information in a secondary README file, you must complete a validation process to ensure its quality. 99 | 100 | When you create a new README file, create a new test for validation. When you update a README file, update the corresponding or related test(s) for validation. 101 | 102 | All tests are available in the tests folder. 103 | -------------------------------------------------------------------------------- /developer-guides/secondary_readme_genai_examples_template.md: -------------------------------------------------------------------------------- 1 | # Template for Secondary README File 2 | 3 | **Deploy Application on ** 4 | 5 | ## Contents 6 | - Overview 7 | - Deployment 8 | - Additional Options for Deployment 9 | - Validation 10 | - Profiling 11 | - Termination 12 | - Troubleshooting 13 | 14 | 15 | ## Overview 16 | 17 | 18 | ## Deployment 19 | 20 | 21 | 22 | | Environment Variable | Description | Default Value | 23 | | ------------------------------------| ---------------------------------------------------------------------| 24 | | | | | 25 | | | | | 26 | 27 | 28 | 29 | 30 | *Additional Options for Deployment* 31 | 32 | 33 | | File | Description | 34 | | ------------------------------------| ---------------------------------------------------------------------| 35 | | | | 36 | 37 | ## Validation 38 | 39 | 40 | 41 | - 42 | - 43 | - 44 | - 45 | 46 | 47 | - 48 | - 49 | - 50 | - 51 | - 52 | 53 | 54 | ## Profiling 55 | 56 | 57 | 58 | ## Termination 59 | 60 | 61 | ## Troubleshooting 62 | 63 | 64 | ## Related Information 65 | 66 | - 67 | - 68 | - -------------------------------------------------------------------------------- /developer-guides/tabbed-alternative.txt: -------------------------------------------------------------------------------- 1 | Here's an example of markdown with two tab sets that are synchronized when 2 | chosen by the viewer: 3 | 4 | ::::{tab-set} 5 | 6 | :::{tab-item} Linux 7 | :sync: Linux 8 | 9 | This content will show on the Linux tab 10 | ::: 11 | :::{tab-item} macOS 12 | :sync: macOS 13 | 14 | This content will show on the macOS tab 15 | ::: 16 | :::{tab-item} Windows 17 | :sync: Windows 18 | 19 | This content will show on the Windows tab 20 | ::: 21 | :::: 22 | 23 | Some more content here followed by another tab set: 24 | 25 | ::::{tab-set} 26 | 27 | :::{tab-item} Linux 28 | :sync: Linux 29 | 30 | This is more content that will show on this Linux tab 31 | ::: 32 | :::{tab-item} macOS 33 | :sync: macOS 34 | 35 | This is more content that will show on this macOS tab 36 | ::: 37 | :::{tab-item} Windows 38 | :sync: Windows 39 | 40 | This is more content that will show on this Windows tab 41 | ::: 42 | :::: 43 | 44 | -------------------------------------------------------------------------------- /eval/index.rst: -------------------------------------------------------------------------------- 1 | .. _GenAIEval: 2 | 3 | Evaluating GenAI 4 | ################ 5 | 6 | GenAIEval provides evaluation, benchmark, scorecard, and targeting for performance on throughpuut and latency, accuracy on popular evaluation harnesses, safety, and hallucination. 7 | 8 | We're building this documentation from content in the 9 | :GenAIEval_blob:`GenAIEval` GitHub repository. 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | :glob: 14 | 15 | /GenAIEval/README 16 | /GenAIEval/* 17 | /GenAIEval/**/* 18 | -------------------------------------------------------------------------------- /examples/examples.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ########## 3 | 4 | .. rst-class:: rst-columns 5 | 6 | .. contents:: 7 | :local: 8 | :depth: 1 9 | 10 | ---- 11 | 12 | .. comment This include file is generated in the Makefile during doc build 13 | time from all the directories found in the GenAIExamples top level directory 14 | 15 | .. include:: examples.txt -------------------------------------------------------------------------------- /examples/index.rst: -------------------------------------------------------------------------------- 1 | .. _GenAIExamples: 2 | 3 | GenAI Examples 4 | ############## 5 | 6 | GenAIExamples are designed to give developers an easy entry into generative AI, featuring microservice-based samples that simplify the processes of deploying, testing, and scaling GenAI applications. All examples are fully compatible with Docker and Kubernetes, supporting a wide range of hardware platforms such as Gaudi, Xeon, and NVIDIA GPU, and other hardware, ensuring flexibility and efficiency for your GenAI adoption. 7 | 8 | We're building this documentation from content in the 9 | :GenAIExamples_blob:`GenAIExamples` GitHub repository. 10 | 11 | .. rst-class:: rst-columns 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | :glob: 16 | 17 | /GenAIExamples/README 18 | examples.rst 19 | /GenAIExamples/* 20 | 21 | -------------------------------------------------------------------------------- /framework/OPEA - Rev05 PS - 4_15_2024.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/OPEA - Rev05 PS - 4_15_2024.docx -------------------------------------------------------------------------------- /framework/images/framework-image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image1.png -------------------------------------------------------------------------------- /framework/images/framework-image10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image10.png -------------------------------------------------------------------------------- /framework/images/framework-image11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image11.png -------------------------------------------------------------------------------- /framework/images/framework-image12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image12.png -------------------------------------------------------------------------------- /framework/images/framework-image13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image13.png -------------------------------------------------------------------------------- /framework/images/framework-image14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image14.png -------------------------------------------------------------------------------- /framework/images/framework-image15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image15.png -------------------------------------------------------------------------------- /framework/images/framework-image16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image16.png -------------------------------------------------------------------------------- /framework/images/framework-image17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image17.png -------------------------------------------------------------------------------- /framework/images/framework-image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image2.png -------------------------------------------------------------------------------- /framework/images/framework-image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image3.png -------------------------------------------------------------------------------- /framework/images/framework-image4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image4.png -------------------------------------------------------------------------------- /framework/images/framework-image5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image5.png -------------------------------------------------------------------------------- /framework/images/framework-image6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image6.png -------------------------------------------------------------------------------- /framework/images/framework-image7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image7.png -------------------------------------------------------------------------------- /framework/images/framework-image8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image8.png -------------------------------------------------------------------------------- /framework/images/framework-image9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/framework/images/framework-image9.png -------------------------------------------------------------------------------- /getting-started/assets/chat_ui_response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/getting-started/assets/chat_ui_response.png -------------------------------------------------------------------------------- /getting-started/assets/chat_ui_response_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/getting-started/assets/chat_ui_response_rag.png -------------------------------------------------------------------------------- /getting-started/assets/what_is_opea.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/getting-started/assets/what_is_opea.pdf -------------------------------------------------------------------------------- /glossary.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _glossary: 4 | 5 | Glossary of Terms 6 | ################# 7 | 8 | .. glossary:: 9 | :sorted: 10 | 11 | -------------------------------------------------------------------------------- /guide/installation/gmc_install/gmc_install.md: -------------------------------------------------------------------------------- 1 | # GenAI-microservices-connector(GMC) Installation 2 | 3 | This document will introduce the GenAI Microservices Connector (GMC) and its installation. It will then use the ChatQnA pipeline as a use case to demonstrate GMC's functionalities. 4 | 5 | ## GenAI-microservices-connector(GMC) 6 | 7 | GMC can be used to compose and adjust GenAI pipelines dynamically on Kubernetes. It can leverage the microservices provided by GenAIComps and external services to compose GenAI pipelines. External services might be running in a public cloud or on-prem. Just provide an URL and access details such as an API key and ensure there is network connectivity. It also allows users to adjust the pipeline on the fly like switching to a different Large language Model(LLM), adding new functions into the chain(like adding guardrails), etc. GMC supports different types of steps in the pipeline, like sequential, parallel and conditional. For more information: https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector 8 | 9 | ## Install GMC 10 | 11 | **Prerequisites** 12 | 13 | - For the ChatQnA example ensure your cluster has a running Kubernetes cluster with at least 16 CPUs, 32GB of memory, and 100GB of disk space. To install a Kubernetes cluster refer to: 14 | ["Kubernetes installation"](../k8s_install/README.md) 15 | 16 | **Download the GMC github repository** 17 | 18 | ```sh 19 | git clone https://github.com/opea-project/GenAIInfra.git && cd GenAIInfra/microservices-connector 20 | ``` 21 | 22 | **Build and push your image to the location specified by `CTR_IMG`:** 23 | 24 | ```sh 25 | make docker.build docker.push CTR_IMG=/gmcmanager: 26 | ``` 27 | 28 | **NOTE:** This image will be published in the personal registry you specified. 29 | And it is required to have access to pull the image from the working environment. 30 | Make sure you have the proper permissions to the registry if the above commands don’t work. 31 | 32 | **Install GMC CRD** 33 | 34 | ```sh 35 | kubectl apply -f config/crd/bases/gmc.opea.io_gmconnectors.yaml 36 | ``` 37 | 38 | **Get related manifests for GenAI Components** 39 | 40 | ```sh 41 | mkdir -p $(pwd)/config/manifests 42 | cp $(dirname $(pwd))/manifests/ChatQnA/*.yaml -p $(pwd)/config/manifests/ 43 | ``` 44 | 45 | **Copy GMC router manifest** 46 | 47 | ```sh 48 | cp $(pwd)/config/gmcrouter/gmc-router.yaml -p $(pwd)/config/manifests/ 49 | ``` 50 | 51 | **Create Namespace for gmcmanager deployment** 52 | 53 | ```sh 54 | export SYSTEM_NAMESPACE=system 55 | kubectl create namespace $SYSTEM_NAMESPACE 56 | ``` 57 | 58 | **NOTE:** Please use the exact same `SYSTEM_NAMESPACE` value setting you used while deploying gmc-manager.yaml and gmc-manager-rbac.yaml. 59 | 60 | **Create ConfigMap for GMC to hold GenAI Components and GMC Router manifests** 61 | 62 | ```sh 63 | kubectl create configmap gmcyaml -n $SYSTEM_NAMESPACE --from-file $(pwd)/config/manifests 64 | ``` 65 | 66 | **NOTE:** The configmap name `gmcyaml' is defined in gmcmanager deployment Spec. Please modify accordingly if you want use a different name for the configmap. 67 | 68 | **Install GMC manager** 69 | 70 | ```sh 71 | kubectl apply -f $(pwd)/config/rbac/gmc-manager-rbac.yaml 72 | kubectl apply -f $(pwd)/config/manager/gmc-manager.yaml 73 | ``` 74 | 75 | **Check the installation result** 76 | 77 | ```sh 78 | kubectl get pods -n system 79 | NAME READY STATUS RESTARTS AGE 80 | gmc-controller-78f9c748cb-ltcdv 1/1 Running 0 3m 81 | ``` 82 | 83 | ## Use GMC to compose a chatQnA Pipeline 84 | A sample for chatQnA can be found at config/samples/chatQnA_xeon.yaml 85 | 86 | **Deploy chatQnA GMC custom resource** 87 | 88 | ```sh 89 | kubectl create ns chatqa 90 | kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml 91 | ``` 92 | 93 | **GMC will reconcile chatQnA custom resource and get all related components/services ready** 94 | 95 | ```sh 96 | kubectl get service -n chatqa 97 | ``` 98 | 99 | **Check GMC chatQnA custom resource to get access URL for the pipeline** 100 | 101 | ```bash 102 | $kubectl get gmconnectors.gmc.opea.io -n chatqa 103 | NAME URL READY AGE 104 | chatqa http://router-service.chatqa.svc.cluster.local:8080 8/0/8 3m 105 | ``` 106 | 107 | **Deploy one client pod for testing the chatQnA application** 108 | 109 | ```bash 110 | kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep infinity 111 | ``` 112 | 113 | **Access the pipeline using the above URL from the client pod** 114 | 115 | ```bash 116 | export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name}) 117 | export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") 118 | kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' 119 | ``` 120 | -------------------------------------------------------------------------------- /guide/installation/install_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | # Update the package index 7 | sudo apt-get -y update 8 | 9 | # Install prerequisites 10 | sudo apt-get -y install ca-certificates curl 11 | 12 | # Create the directory for the Docker GPG key 13 | sudo install -m 0755 -d /etc/apt/keyrings 14 | 15 | # Add Docker's official GPG key 16 | sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc 17 | 18 | # Set permissions for the GPG key 19 | sudo chmod a+r /etc/apt/keyrings/docker.asc 20 | 21 | # Add Docker repository to the sources list 22 | echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ 23 | $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 24 | 25 | # Update the package index with Docker packages 26 | sudo apt-get -y update 27 | 28 | # Install Docker packages 29 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin 30 | 31 | # add existing user to docker group and log in 32 | sudo groupadd docker 33 | sudo usermod -aG docker $USER 34 | newgrp docker 35 | 36 | # Optional: Verify that Docker is installed correctly 37 | sudo docker --version 38 | -------------------------------------------------------------------------------- /guide/installation/k8s_install/README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Installation Options 2 | 3 | Here are a variety of ways to install Kubernetes: 4 | 5 | * [Using AWS EKS Cluster](k8s_instal_aws_eks.md) 6 | * [Using kubeadm](k8s_install_kubeadm.md) 7 | * [Using Kubespray](k8s_install_kubespray.md) 8 | 9 | -------------------------------------------------------------------------------- /guide/installation/k8s_install/k8s_instal_aws_eks.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Installation using AWS EKS Cluster 2 | 3 | In this document, we'll install Kubernetes v1.30 using [AWS EKS Cluster](https://docs.aws.amazon.com/eks/latest/userguide/clusters.html). 4 | 5 | 6 | There are two ways to create a new Kubernetes cluster with nodes in AWS EKS: 7 | - ["eksctl"](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html) 8 | - ["AWS Management Console and AWS CLI"](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-console.html). 9 | 10 | In this document, we'll introduce the "AWS Management Console and AWS CLI" method. 11 | 12 | ## Prerequisites 13 | 14 | Before starting this tutorial, you must install and configure the following tools and resources that you need to create and manage an Amazon EKS cluster. 15 | 16 | - AWS CLI – A command line tool for working with AWS services, including Amazon EKS. For more information, see ["Installing, updating, and uninstalling the AWS CLI"](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) in the AWS Command Line Interface User Guide. After installing the AWS CLI, we recommend that you also configure it. For more information, see ["Quick configuration with aws configure"](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html#cli-configure-quickstart-config) in the AWS Command Line Interface User Guide. 17 | 18 | - kubectl – A command line tool for working with Kubernetes clusters. For more information, see ["Installing or updating kubectl"](https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html). 19 | 20 | - Required IAM permissions – The IAM security principal that you're using must have permissions to work with Amazon EKS IAM roles, service linked roles, AWS CloudFormation, a VPC, and related resources. For more information, see ["Actions, resources, and condition keys for Amazon Elastic Kubernetes Service"](https://docs.aws.amazon.com/service-authorization/latest/reference/list_amazonelastickubernetesservice.html) and ["Using service-linked roles"](https://docs.aws.amazon.com/IAM/latest/UserGuide/using-service-linked-roles.html) in the IAM User Guide. You must complete all steps in this guide as the same user. To check the current user, run the following command: 21 | 22 | ``` 23 | aws sts get-caller-identity 24 | ``` 25 | 26 | ## Create AWS EKS Cluster in AWS Console 27 | 28 | You can refer to the YouTube video that demonstrates the steps to create an EKS cluster in the AWS console: 29 | https://www.youtube.com/watch?v=KxxgF-DAGWc 30 | 31 | Alternatively, you can refer to the AWS documentation directly: ["AWS Management Console and AWS CLI"](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-console.html) or the AWS workshop to to learn how to create an EKS cluster at https://www.eksworkshop.com/ 32 | 33 | ## Uploading images to an AWS Private Registry 34 | 35 | There are several reasons why your images might not be uploaded to a public image repository like Docker Hub. 36 | You can upload your image to an AWS private registry using the following steps: 37 | 38 | 1. Create a new ECR repository (if not already created): 39 | 40 | An Amazon ECR private repository contains your Docker images, Open Container Initiative (OCI) images, and OCI compatible artifacts. More information about Amazon ECR private repository: https://docs.aws.amazon.com/AmazonECR/latest/userguide/Repositories.html 41 | 42 | ``` 43 | aws ecr create-repository --repository-name my-app-repo --region 44 | ``` 45 | 46 | Replace my-app-repo with your desired repository name and with your AWS region (e.g., us-west-1). 47 | 48 | 2. Authenticate Docker to Your ECR Registry: 49 | 50 | ``` 51 | aws ecr get-login-password --region | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com 52 | ``` 53 | 54 | Replace with your AWS region and with your AWS account ID. 55 | 56 | 3. Build Your Docker Image: 57 | 58 | ``` 59 | docker build -t my-app: . 60 | ``` 61 | 62 | 4. Tag your Docker image so that it can be pushed to your ECR repository: 63 | 64 | ``` 65 | docker tag my-app: .dkr.ecr..amazonaws.com/my-app-repo: 66 | ``` 67 | 68 | Replace with your AWS account ID, with your AWS region, and my-app-repo with your repository name. 69 | 70 | 5. Push your Docker image to the ECR repository with this command: 71 | 72 | ``` 73 | docker push .dkr.ecr..amazonaws.com/my-app-repo:latest 74 | ``` 75 | -------------------------------------------------------------------------------- /images/BrokenBlocks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/images/BrokenBlocks.png -------------------------------------------------------------------------------- /images/OPEA-favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/images/OPEA-favicon-32x32.png -------------------------------------------------------------------------------- /images/opea-horizontal-color-w200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/images/opea-horizontal-color-w200.png -------------------------------------------------------------------------------- /images/opea-horizontal-white-w200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/images/opea-horizontal-white-w200.png -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | .. _opea_home: 2 | 3 | OPEA Project Documentation 4 | ########################## 5 | 6 | Welcome to the OPEA Project (|version|) documentation published |today|. 7 | OPEA streamlines the implementation of enterprise-grade Generative AI by efficiently 8 | integrating secure, performant, and cost-effective Generative AI workflows into business processes. 9 | 10 | Source code for the OPEA Project is maintained in the 11 | `OPEA Project GitHub repository`_. 12 | 13 | .. comment The links in this grid display can't use :ref: because we're 14 | using raw html. There's a risk of broken links if referenced content is 15 | moved. 16 | 17 | .. raw:: html 18 | 19 | 63 | 64 | 65 | .. toctree:: 66 | :maxdepth: 1 67 | :hidden: 68 | 69 | Documentation Home 70 | introduction/index 71 | getting-started/README 72 | tutorial/index 73 | examples/index 74 | microservices/index 75 | deploy/index 76 | eval/index 77 | developer-guides/index 78 | community/index 79 | publish/index 80 | release_notes/index 81 | CONTRIBUTING 82 | faq 83 | blogs/index 84 | 85 | .. _OPEA Project GitHub repository: https://github.com/opea-project 86 | -------------------------------------------------------------------------------- /introduction/index.rst: -------------------------------------------------------------------------------- 1 | .. _OPEA_intro: 2 | 3 | OPEA Overview 4 | ############# 5 | 6 | OPEA (Open Platform for Enterprise AI) is a framework that enables the creation 7 | and evaluation of open, multi-provider, robust, and composable generative AI 8 | (GenAI) solutions. It harnesses the best innovations across the ecosystem while 9 | keeping enterprise-level needs front and center. 10 | 11 | OPEA simplifies the implementation of enterprise-grade composite GenAI 12 | solutions, starting with a focus on Retrieval Augmented Generative AI (RAG). 13 | The platform is designed to facilitate efficient integration of secure, 14 | performant, and cost-effective GenAI workflows into business systems and manage 15 | its deployments, leading to quicker GenAI adoption and business value. 16 | 17 | The OPEA platform includes: 18 | 19 | * Detailed framework of composable microservices building blocks for 20 | state-of-the-art GenAI systems including LLMs, data stores, and prompt engines 21 | 22 | * Architectural blueprints of retrieval-augmented GenAI component stack 23 | structure and end-to-end workflows 24 | 25 | * Multiple micro- and megaservices to get your GenAI into production and 26 | deployed 27 | 28 | * A four-step assessment for grading GenAI systems around performance, features, 29 | trustworthiness and enterprise-grade readiness 30 | 31 | OPEA Project Architecture 32 | ************************* 33 | 34 | OPEA uses microservices to create high-quality GenAI applications for 35 | enterprises, simplifying the scaling and deployment process for production. 36 | These microservices leverage a service composer that assembles them into a 37 | megaservice thereby creating real-world Enterprise AI applications. 38 | 39 | Microservices: Flexible and Scalable Architecture 40 | ================================================= 41 | 42 | The :ref:`GenAIComps` documentation describes 43 | a suite of microservices. Each microservice is designed to perform a specific 44 | function or task within the application architecture. By breaking down the 45 | system into these smaller, self-contained services, microservices promote 46 | modularity, flexibility, and scalability. This modular approach allows 47 | developers to independently develop, deploy, and scale individual components of 48 | the application, making it easier to maintain and evolve over time. All of the 49 | microservices are containerized, allowing cloud native deployment. 50 | 51 | Megaservices: A Comprehensive Solution 52 | ====================================== 53 | 54 | Megaservices are higher-level architectural constructs composed of one or more 55 | microservices. Unlike individual microservices, which focus on specific tasks or 56 | functions, a megaservice orchestrates multiple microservices to deliver a 57 | comprehensive solution. Megaservices encapsulate complex business logic and 58 | workflow orchestration, coordinating the interactions between various 59 | microservices to fulfill specific application requirements. This approach 60 | enables the creation of modular yet integrated applications. You can find a 61 | collection of use case-based applications in the :ref:`GenAIExamples` 62 | documentation 63 | 64 | Gateways: Customized Access to Mega- and Microservices 65 | ====================================================== 66 | 67 | The Gateway serves as the interface for users to access a megaservice, providing 68 | customized access based on user requirements. It acts as the entry point for 69 | incoming requests, routing them to the appropriate microservices within the 70 | megaservice architecture. 71 | 72 | Gateways support API definition, API versioning, rate limiting, and request 73 | transformation, allowing for fine-grained control over how users interact with 74 | the underlying Microservices. By abstracting the complexity of the underlying 75 | infrastructure, Gateways provide a seamless and user-friendly experience for 76 | interacting with the Megaservice. 77 | 78 | Next Step 79 | ********* 80 | 81 | Links to: 82 | 83 | * Getting Started Guide 84 | * Get Involved with the OPEA Open Source Community 85 | * Browse the OPEA wiki, mailing lists, and working groups: https://wiki.lfaidata.foundation/display/DL/OPEA+Home 86 | 87 | .. toctree:: 88 | :maxdepth: 1 89 | 90 | ../framework/framework 91 | -------------------------------------------------------------------------------- /microservices/index.rst: -------------------------------------------------------------------------------- 1 | .. _GenAIComps: 2 | .. _GenAIMicroservices: 3 | 4 | GenAI Microservices 5 | ################### 6 | 7 | GenAI microservices leverage a service composer to 8 | assemble a mega-service tailored for real-world Enterprise AI applications. All 9 | the microservices are containerized, allowing cloud native deployment. Checkout 10 | how the microservices are used in :ref:`GenAIExamples`. 11 | 12 | We're building this microservices documentation from content in the 13 | :GenAIComps_blob:`GenAIComps` GitHub repository. 14 | 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | :glob: 19 | 20 | /GenAIComps/README 21 | /GenAIComps/* 22 | 23 | **Microservices Table of Contents** 24 | 25 | .. rst-class:: rst-columns 26 | 27 | .. contents:: 28 | :local: 29 | :depth: 1 30 | 31 | ---- 32 | 33 | .. comment This include file is generated in the Makefile during doc build 34 | time from all the directories found in the GenAIComps/comps directory 35 | 36 | .. include:: microservices.txt 37 | -------------------------------------------------------------------------------- /publish/index.rst: -------------------------------------------------------------------------------- 1 | .. _Publications: 2 | 3 | Publications 4 | ############## 5 | 6 | News 7 | ***************** 8 | https://opea.dev/news/ 9 | 10 | Events 11 | ***************** 12 | https://opea.dev/community-events/ 13 | 14 | Blogs 15 | ***************** 16 | 17 | .. list-table:: Blogs 18 | :widths: 5 65 30 19 | :header-rows: 1 20 | 21 | * - Date 22 | - Link 23 | - Authors 24 | * - [02/26/2025] 25 | - `Multimodal Question and Answer - A Step-by-Step Guide with OPEA 1.2 and Intel Gaudi 2 `_ 26 | - Melanie Hart Buehler, Mustafa Cetin, Dina Suehiro Jones 27 | * - [02/12/2025] 28 | - `Deploy a DeepSeek-R1-Distill Chatbot in Minutes on Low-Cost AWS Xeon Instances `_ 29 | - Alex Sin 30 | * - [01/24/2025] 31 | - `OPEA Blogs `_ 32 | - OPEA Team 33 | * - [10/14/2024] 34 | - `Harness Enterprise GenAI Using OPEA `_ 35 | - Iris Ding, Malini Bhandaru 36 | 37 | Demos and Videos 38 | ***************** 39 | Todo. 40 | 41 | -------------------------------------------------------------------------------- /release_notes/RELEASE.md: -------------------------------------------------------------------------------- 1 | # OPEA Release Guide 2 | 3 | ## Release Cadence 4 | 5 | The following release cadence is for year 2024/2025. Please note that the dates listed below may not reflect the most up-to-date information. 6 | 7 | | Version | Release Date | 8 | |---------|--------------| 9 | | 0.1 | Apr 2024 | 10 | | 0.6 | May 2024 | 11 | | 0.7 | Jun 2024 | 12 | | 0.8 | Jul 2024 | 13 | | 0.9 | Aug 2024 | 14 | | 1.0 | Sep 2024 | 15 | | 1.1 | Nov 2024 | 16 | | 1.2 | Jan 2025 | 17 | | 1.3 | Apr 2025 | 18 | | 1.4 | Jul 2025 | 19 | | 1.5 | Oct 2025 | 20 | | 1.6 | Jan 2026 | 21 | | 1.7 | Apr 2026 | 22 | 23 | ## General Overview 24 | 25 | Releasing a new version of OPEA generally involves the following key steps: 26 | 27 | 1. Feature freeze (2 weeks before the release) 28 | 2. Code/Doc freeze, and creating the RC(Release Candidate) branch (1 week before the release) 29 | 3. Cherry Pick critical Code/Doc fix from main branch to the RC branch 30 | 4. Create release tag from RC branch 31 | 5. Deliver docker images, helm charts, and pypi binaries 32 | 33 | ## Feature Freeze 34 | 35 | Generally, this marks a point in the OPEA release process where no new features are added to the `main` branch of OPEA projects. It typically occurs two weeks before the scheduled OPEA release. After this point, first round release test will be triggered. 36 | 37 | ## Code/Doc Freeze, and Creating the RC Branch 38 | 39 | This is the point in the OPEA release cycle to create the Release Candidate (RC) branch. It typically occurs one week before the scheduled OPEA release. After this point, final round release test will be triggered. 40 | 41 | ### Preparing Creating RC Branch 42 | Following requirements needs to be met prior to creating the RC branch: 43 | - Implement all features and functionalities targeting this release. 44 | - Resolve all the known outstanding issues targeting this release. 45 | - Fix all the bugs found in the release test. 46 | 47 | ### Creating RC Branch 48 | The RC branch are typically created from the `main` branch. The branch name must follow the following format: 49 | ``` 50 | v{MAJOR}.{MINOR}rc 51 | ``` 52 | An example of this would look like: 53 | ``` 54 | v1.1rc 55 | ``` 56 | 57 | ## Cherry Pick Critical Code/Doc Fix 58 | Fixes for critical issues after code freeze must cherry-pick into the RC branch. 59 | 60 | ### How to do Cherry Picking 61 | Critical issues found in the RC branch must be fixed in the `main` branch and then cherry-picked into the RC branch. Cherry-picking will be done manually by the CI/CD owner. 62 | 63 | ## Creating Tag from RC Branch 64 | The following requirements need to be met prior to creating final Release Candidate: 65 | - No outstanding issues in the milestone. 66 | - No open issues/PRs marked with the milestone of this release(e.g. v1.1). 67 | - All the closed milestone PRs should be contained in the release branch. 68 | - Create tags with [GHA job](https://github.com/opea-project/Validation/actions/workflows/manual-create-tag.yaml). 69 | 70 | ## Deliver Docker Images, Helm Charts, and PyPi Binaries 71 | After the release tag is created, the following artifacts need to be delivered: 72 | - Docker images, [GHA job](https://github.com/opea-project/GenAIExamples/actions/workflows/manual-docker-publish.yml). 73 | - Helm charts, [GHA job](https://github.com/opea-project/GenAIInfra/actions/workflows/manual-release-charts.yaml). 74 | - PyPi binaries, [GHA job](https://github.com/opea-project/Validation/actions/workflows/manual-pypi-publish.yml). 75 | -------------------------------------------------------------------------------- /release_notes/index.rst: -------------------------------------------------------------------------------- 1 | .. _release_notes: 2 | 3 | Release Notes 4 | ############# 5 | 6 | Release plan & guide. 7 | 8 | .. rst-class:: rst-columns3 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | RELEASE.md 14 | 15 | Release notes for the current and previous releases are archived here. 16 | 17 | .. rst-class:: rst-columns3 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | :glob: 22 | :reversed: 23 | 24 | v* 25 | -------------------------------------------------------------------------------- /release_notes/v0.6.md: -------------------------------------------------------------------------------- 1 | # OPEA Release Notes v0.6 2 | 3 | ## OPEA Highlight 4 | 5 | * Add 4 MegaService examples: CodeGen, ChatQnA, CodeTrans and Docsum, you can deploy them on Kubernetes 6 | * Enable 10 microservices for LLM, RAG, security...etc 7 | * Support text generation, code generation and end-to-end evaluation 8 | 9 | ## GenAIExamples 10 | 11 | * Build 4 reference solutions for some classic GenAI applications, like code generation, chat Q&A, code translation and document summarization, through orchestration interface in GenAIComps. 12 | * Support seamlessly deployment on Intel Xeon and Gaudi platform through Kubernetes and Docker Compose. 13 | 14 | ## GenAIComps 15 | 16 | * Activate a suite of microservices including ASR, LLMS, Rerank, Embedding, Guardrails, TTS, Telemetry, DataPrep, Retrieval, and VectorDB. ASR functionality is fully operational on Xeon architecture, pending readiness on Gaudi. Retrieval capabilities are functional on LangChain, awaiting readiness on LlamaIndex. VectorDB functionality is supported on Redis, Chroma, and Qdrant, with readiness pending on SVS. 17 | * Added 14 file formats support in data preparation microservices and enabled Safeguard of conversation in guardrails. 18 | * Added the Ray Gaudi Supported for LLM Service. 19 | 20 | ## GenAIEvals 21 | 22 | * Add evaluating the models on text-generation tasks(lm-evaluation-harness) and coding tasks (bigcode-evaluation-harness) 23 | * Add end-to-end evaluation with microservice 24 | 25 | ## GenAIInfra 26 | 27 | * Add Helm Charts redis-vector-db, TEI, TGI and CodeGen for deploying GenAIExamples on Kubernetes 28 | * Add Manifests for deploying GenAIExamples CodeGen, ChatQnA and Docsum on Kubernetes and on Docker Compose 29 | -------------------------------------------------------------------------------- /roadmap/CICD.md: -------------------------------------------------------------------------------- 1 | # OPEA CI/CD Roadmap 2 | 3 | ## Milestone 1 (May, Done) 4 | - Format scan for GenAIExamples/GenAIComps/GenAIInfra/GenAIEval 5 | - Security scan for GenAIExamples/GenAIComps/GenAIInfra/GenAIEval 6 | - Unit test for GenAIComps/GenAIInfra/GenAIEval 7 | - E2E test for GenAIExamples/GenAIComps/GenAIInfra milestone1 related scope 8 | 9 | ## Milestone 2 (June) 10 | - CI infrastructure optimization 11 | - k8s multi-node cluster on 2 Xeon node for CI 12 | - k8s multi-node cluster on 2 Gaudi node for CI 13 | - Set up image repository for CI 14 | - UT coverage measurement 15 | - Cross-projects impact monitor 16 | - E2E test for GenAIExamples/GenAIComps/GenAIInfra milestone2 related scope 17 | - RAG benchmark with GenAIEval 18 | 19 | ## Milestone 3 (July) 20 | - Enhance code coverage 21 | - E2E test for GenAIExamples/GenAIComps/GenAIInfra milestone3 related scope 22 | - GMC test for k8s 23 | - k8s scalability test 24 | 25 | ## Milestone 4 (Aug) 26 | - Enhance code coverage 27 | - E2E test for GenAIExamples/GenAIComps/GenAIInfra milestone4 related scope 28 | - Enhance k8s scalability test 29 | - Auto CD workflow setup 30 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd 4 | cd scripts 5 | 6 | #add "f" to force create env 7 | bash setup_env.sh $1 8 | cd ../.. 9 | 10 | ENV_NAME=env_sphinx 11 | pwd 12 | source $ENV_NAME/bin/activate 13 | 14 | #clone repos 15 | for repo_name in docs GenAIComps GenAIEval GenAIExamples GenAIInfra opea-project.github.io; do 16 | echo "prepare for $repo_name" 17 | 18 | if [[ "$1" == "f" ]]; then 19 | echo "force to clone rep ${repo_name}" 20 | rm -rf ${repo_name} 21 | fi 22 | 23 | if [ ! -d ${repo_name} ]; then 24 | URL=https://github.com/opea-project/${repo_name}.git 25 | echo "git clone $URL" 26 | git clone $URL 27 | retval=$? 28 | if [ $retval -ne 0 ]; then 29 | echo "git clone ${repo_name} is wrong, try again!" 30 | rm -rf ${repo_name} 31 | exit 1 32 | fi 33 | sleep 10 34 | else 35 | echo "repo ${repo_name} exists, skipping cloning" 36 | fi 37 | done 38 | 39 | echo "Build HTML" 40 | cd docs 41 | make clean 42 | make html 43 | retval=$? 44 | echo "result = $retval" 45 | if [ $retval -ne 0 ]; then 46 | echo "make html is error" 47 | exit 1 48 | else 49 | echo "Done" 50 | fi 51 | 52 | if [ ! -d _build/html ]; then 53 | echo "Build online doc is wrong!" 54 | exit 1 55 | else 56 | echo "Build online doc done!" 57 | fi 58 | 59 | echo "Update github.io" 60 | 61 | RELEASE_FOLDER=../opea-project.github.io 62 | BUILDDIR=_build 63 | PUBLISHDIR=${RELEASE_FOLDER}/latest 64 | 65 | echo "Clear all content in ${PUBLISHDIR}" 66 | rm -rf ${PUBLISHDIR}/* 67 | 68 | echo "Copy html content to ${PUBLISHDIR}" 69 | cp -r ${BUILDDIR}/html/* ${PUBLISHDIR} 70 | cp scripts/publish-README.md ${PUBLISHDIR}/../README.md 71 | bash scripts/publish-redirect.sh ${PUBLISHDIR}/../index.html latest/index.html 72 | sed 's//\n /' ${BUILDDIR}/html/404.html > ${PUBLISHDIR}/../404.html 73 | 74 | echo "Copied html content to ${PUBLISHDIR}" 75 | 76 | -------------------------------------------------------------------------------- /scripts/checkmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2024 Intel Corporation. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # Use pymarkdown to recursively scan all markdown files for problems 6 | # Disable rules we don't care to check. If you find others that you'd like to 7 | # ignore, simply add them to this list 8 | 9 | drules=line-length,no-bare-urls,no-multiple-blanks,blanks-around-fences,no-hard-tabs,blanks-around-headings 10 | drules=$drules,fenced-code-language,no-duplicate-heading,no-emphasis-as-heading,no-trailing-spaces 11 | drules=$drules,code-block-style 12 | 13 | pymarkdown --disable-rules $drules scan -r . 14 | 15 | -------------------------------------------------------------------------------- /scripts/codeowners_to_md.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Convert CODEOWNERS files provided as command line arguments into markdown with 7 | # H2 heading titles followed by a table with path and owners 8 | 9 | import sys 10 | 11 | def parse_codeowners(file_path): 12 | codeowners = [] 13 | 14 | with open(file_path, 'r') as file: 15 | for line in file: 16 | line = line.strip() 17 | # Skip comments and empty lines 18 | if not line or line.startswith('#'): 19 | continue 20 | 21 | parts = line.split() 22 | if len(parts) >= 2: 23 | path = parts[0] 24 | owners = ', '.join(parts[1:]) 25 | codeowners.append((path, owners)) 26 | 27 | return codeowners 28 | 29 | 30 | def convert_to_markdown_table(codeowners, file_name): 31 | # ./.github/CODEOWNERS ./GenAIComps/.github/CODEOWNERS ./GenAIExamples/.github/CODEOWNERS 32 | parts = file_name.split('/') 33 | # if the repo name is missing, it's the docs repo. Also handle case when 34 | # CODEOWNERS is in the root of the docs repo instead of in a .github directory. 35 | repo=parts[1] 36 | if (repo == '.github'): 37 | repo="docs" 38 | elif (repo == "CODEOWNERS"): 39 | repo="docs" 40 | 41 | table = f"\n## {repo} Repository Code Owners\n\n" 42 | table += "| Path | Owners |\n" 43 | table += "|------|--------|\n" 44 | 45 | for path, owners in codeowners: 46 | table += f"| `{path}` | {owners} |\n" 47 | 48 | return table 49 | 50 | 51 | def main(): 52 | if len(sys.argv) < 2: 53 | print("Usage: python codeowners_to_md.py ...") 54 | sys.exit(1) 55 | 56 | markdown_output = "" 57 | 58 | for file_path in sys.argv[1:]: 59 | try: 60 | codeowners = parse_codeowners(file_path) 61 | markdown_table = convert_to_markdown_table(codeowners, file_path) 62 | markdown_output += markdown_table + "\n" 63 | except FileNotFoundError: 64 | print(f"Error: File '{file_path}' not found.") 65 | sys.exit(1) 66 | 67 | print(markdown_output) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | 73 | -------------------------------------------------------------------------------- /scripts/filter-doc-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2019-2024 Intel Corporation. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # run the filter-known-issues.py script to remove "expected" warning 6 | # messages from the output of the document build process and write 7 | # the filtered output to stdout 8 | # 9 | # Only argument is the name of the log file saved by the build. 10 | 11 | KI_SCRIPT=scripts/filter-known-issues.py 12 | CONFIG_DIR=.known-issues/ 13 | 14 | LOG_FILE=$1 15 | BUILDDIR=$(dirname $LOG_FILE) 16 | 17 | if [ -z "${LOG_FILE}" ]; then 18 | echo "Error in $0: missing input parameter " 19 | exit 1 20 | fi 21 | 22 | # When running in background, detached from terminal jobs, tput will 23 | # fail; we usually can tell because there is no TERM env variable. 24 | if [ -z "${TERM:-}" -o "${TERM:-}" = dumb ]; then 25 | TPUT="true" 26 | red='' 27 | green='' 28 | else 29 | TPUT="tput" 30 | red='\E[31m' 31 | green='\e[32m' 32 | fi 33 | 34 | if [ -s "${LOG_FILE}" ]; then 35 | python3 $KI_SCRIPT --config-dir ${CONFIG_DIR} ${LOG_FILE} > ${BUILDDIR}/doc.warnings 2>&1 36 | if [ -s ${BUILDDIR}/doc.warnings ]; then 37 | echo 38 | echo -e "${red}New errors/warnings found, please fix them:" 39 | echo -e "==============================================" 40 | $TPUT sgr0 41 | echo 42 | cat ${BUILDDIR}/doc.warnings 43 | echo 44 | exit 2 45 | else 46 | echo -e "${green}No new errors/warnings." 47 | $TPUT sgr0 48 | fi 49 | 50 | else 51 | echo "Error in $0: logfile \"${LOG_FILE}\" not found." 52 | exit 3 53 | fi 54 | -------------------------------------------------------------------------------- /scripts/fix-github-md-refs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2024 Intel Corporation. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # We'll post process the markdown files copied to the _build/rst folder to look 6 | # for hard references to the github.com markdown file, for example 7 | # (https://github.com/opea-project/.../blob/.../README.md) and make them 8 | # relative to the _build/rst directory structure where docs are being built 9 | 10 | # Work on the current directory or the directory passed as the first argument 11 | # (as done in the makefile). Normally is _build/rst 12 | 13 | cd ${1:-.} 14 | 15 | # look for markdown files containing a hard github.com/opea-project/... 16 | # reference to a markdown file 17 | 18 | mdfiles=`grep -ril --include="*.md" 'github.com/opea-project.*\/[^\)]*'` 19 | 20 | # fix references to opea-project/tree/main/.../*.md or blob/.../*.md to be to the repo name and 21 | # subsequent path to the md file \1 is repo \3 is file path \4 is an optional #xxx target 22 | 23 | sed -i 's/(https:\/\/github.com\/opea-project\/\([^\/]*\)\/\(blob\|tree\)\/main\/\([^#)]*\.md\)\(#[^)]*\)*)/(\/\1\/\3\4)/g' $mdfiles 24 | 25 | # After that, links to the docs repo such as [blah](docs/...) should have the repo name removed since docs repo is the build root 26 | 27 | sed -i 's/](\/docs\//](\//g' $mdfiles 28 | 29 | # Fix relative links with a heading target with numbers or leading non-alpha 30 | # characters such as [section](#222-heading-title). On github.com, that link to 31 | # a heading such as "### 2.2.2 Heading Title" would work, but the id built with 32 | # Sphinx does not have leading numbers on is and would instead be to 33 | # #heading-title 34 | 35 | mdfiles=`find -name '*.md'` 36 | sed -i -E 's/(]\([^#]*#)[-0-9]+/\1/g' $mdfiles 37 | 38 | # links to a folder should instead be to the folder's README.md 39 | # Not automating this for now since there are valid folder references 40 | # The markdown sources need to be explicit about this: 41 | # if the link it to a folder, leave it and the link will open in github.com 42 | # if the link is to a .md file, the previous sed will catch it. 43 | 44 | # fix references to opea-project/blob/main/... to use the special role :repo_raw:`{path to file}` 45 | # alas, using sphinx roles doesn't work in markdown files, so leave them alone 46 | # mdfiles=`grep -ril --include="*.md" '(https:\/\/github.com\/opea-project\/[^\/]*\/blob\/main\/[^\)]*)'` 47 | # sed -i # 's/(https:\/\/github.com\/opea-project\/\([^\/]*\)\/blob\/main\/\([^)]*\)/(:\1_blob:`\2`/g' $mdfiles 48 | 49 | # find CODEOWNERS files and generate a rst table for each one found. This file 50 | # will is included by the codeowners.md file during the doc build so we keep 51 | # these lists up-to-date. 52 | 53 | cfiles=`find -name CODEOWNERS | sort` 54 | scripts/codeowners_to_md.py $cfiles > community/codeowners.txt 55 | -------------------------------------------------------------------------------- /scripts/hist_rel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# < 1 ]]; then 4 | echo "Miss parameter" 5 | echo "$0 [version]" 6 | echo " like: 1.2, which is defined in html_context.versions of conf.py" 7 | echo "" 8 | echo "How to build online doc for history release?" 9 | echo "" 10 | echo " Prepare: add tag in all repos with format 'v*.*', like v1.2" 11 | echo "" 12 | echo " 1. Add history release version (like 1.2) in html_context.versions of conf.py." 13 | echo " 2. Execute this script with release version (like $0 1.2). Build the history release document and output to release folder, like 1.2." 14 | echo " 3. Execute scripts\build.sh. Update the 'latest' to add new release link in 'Document Versions'." 15 | echo " 4. Git push the content of opea-project.github.io." 16 | exit 1 17 | fi 18 | 19 | version=$1 20 | TAG="v${version}" 21 | 22 | echo "TAG=${TAG}" 23 | pwd 24 | cd scripts 25 | 26 | #add "f" to force create env 27 | bash setup_env.sh $1 28 | cd ../.. 29 | 30 | ENV_NAME=env_sphinx 31 | pwd 32 | source $ENV_NAME/bin/activate 33 | 34 | #clone repos 35 | for repo_name in docs GenAIComps GenAIEval GenAIExamples GenAIInfra opea-project.github.io; do 36 | echo "prepare for $repo_name" 37 | 38 | if [[ "$1" == "f" ]]; then 39 | echo "force to clone rep ${repo_name}" 40 | rm -rf ${repo_name} 41 | fi 42 | 43 | if [ ! -d ${repo_name} ]; then 44 | URL=https://github.com/opea-project/${repo_name}.git 45 | echo "git clone $URL" 46 | git clone $URL 47 | retval=$? 48 | if [ $retval -ne 0 ]; then 49 | echo "git clone ${repo_name} is wrong, try again!" 50 | rm -rf ${repo_name} 51 | exit 1 52 | fi 53 | sleep 10 54 | else 55 | echo "repo ${repo_name} exists, skipping cloning" 56 | fi 57 | cd ${repo_name} 58 | echo "checkout ${TAG} in ${repo_name}" 59 | pwd 60 | git checkout ${TAG} 61 | cd .. 62 | done 63 | 64 | echo "Build HTML" 65 | cd docs 66 | make clean 67 | make DOC_TAG=release RELEASE=${version} html 68 | #make DOC_TAG=release RELEASE=${version} publish 69 | retval=$? 70 | echo "result = $retval" 71 | if [ $retval -ne 0 ]; then 72 | echo "make html is error" 73 | exit 1 74 | else 75 | echo "Done" 76 | fi 77 | 78 | if [ ! -d _build/html ]; then 79 | echo "Build online doc is wrong!" 80 | exit 1 81 | else 82 | echo "Build online doc done!" 83 | fi 84 | 85 | echo "Update github.io" 86 | 87 | RELEASE_FOLDER=../opea-project.github.io 88 | BUILDDIR=_build 89 | PUBLISHDIR=${RELEASE_FOLDER}/${version} 90 | 91 | echo "Clear all content in ${PUBLISHDIR}" 92 | 93 | mkdir -p ${PUBLISHDIR} 94 | rm -rf ${PUBLISHDIR}/* 95 | echo "Copy html content to ${PUBLISHDIR}" 96 | cp -r ${BUILDDIR}/html/* ${PUBLISHDIR} 97 | 98 | echo "Copied html content to ${PUBLISHDIR}" 99 | -------------------------------------------------------------------------------- /scripts/maketoc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2024 Intel Corporation. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # For all directories, create a H2 title with the directory name and a 6 | # .toctree directive with the repo name and 7 | # those directory names, something like this: 8 | # 9 | # AudioQnA Application 10 | # ******************** 11 | # 12 | # .. toctree:: 13 | # :maxdepth: 1 14 | # :glob: 15 | # 16 | # /GenAIExamples/AudioQnA/* 17 | # /GenAIExamples/AudioQnA/**/* 18 | # 19 | # ls -d1 */ returns something like this: 20 | # 21 | # AgentQnA/ 22 | # AudioQnA/ 23 | # ChatQnA/ 24 | # CodeGen/ 25 | # CodeTrans/ 26 | # DocIndexRetriever/ 27 | # 28 | # 29 | # Create a title based on the directory name and print it. 30 | # Use a gsub to turn every character in the title into an * for the 31 | # heading underline and print it. 32 | 33 | cd ../GenAIExamples 34 | 35 | ls -d1 */ | \ 36 | awk \ 37 | -v repo="GenAIExamples" \ 38 | -e '{dirname=substr($0,1,length($0)-1); if (dirname==repo) {\ 39 | print("\n.. comment maketoc: ignoring " dirname " directory\n\n"); next}\ 40 | title=dirname " Application"; \ 41 | print title;gsub(/./,"*",title); print title; \ 42 | print "\n.. rst-class:: rst-columns\n\n.. toctree::\n :maxdepth: 1\n :glob:\n\n /" \ 43 | repo "/" dirname "/*\n /" \ 44 | repo "/" dirname "/**/*\n";}' > ../docs/_build/rst/examples/examples.txt 45 | 46 | # 47 | # The components directory names in GenAIComps/comps are all lowercase, so uppercase 48 | # just the first character for the title. 49 | # 50 | 51 | cd ../GenAIComps/comps 52 | 53 | ls -d1 [a-zA-Z0-9]*/ | \ 54 | awk \ 55 | -v repo="GenAIComps" \ 56 | -e '{dirname=substr($0,1,length($0)-1); title=toupper(substr(dirname,1,1)) substr(dirname,2) " Microservice"; \ 57 | print title;gsub(/./,"*",title); print title; \ 58 | print "\n.. rst-class:: rst-columns\n\n.. toctree::\n :maxdepth: 1\n :glob:\n\n /" \ 59 | repo "/comps/" dirname "/*\n /" \ 60 | repo "/comps/" dirname "/**/*\n";}' > ../../docs/_build/rst/microservices/microservices.txt 61 | 62 | 63 | -------------------------------------------------------------------------------- /scripts/publish-README.md: -------------------------------------------------------------------------------- 1 | # opea-project.github.io 2 | 3 | This is the OPEA Project Documentation Publishing site for GitHub Pages. 4 | Content changes are not made directly in this repo. Instead, edit content in 5 | the opea-project/docs and other repos, re-generate the HTML with Sphinx (make 6 | html), and with the right permissions, push the updated content here for 7 | publishing (make publish). 8 | 9 | Read the published documentation on 10 | [https://opea-project.github.io](https://opea-project.github.io). 11 | -------------------------------------------------------------------------------- /scripts/publish-redirect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2021 Intel Corporation. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # Create top-level website redirect to a doc (conf.py redirect script can only 6 | # create redirects within the published folder output, e.g. latest/ or 2.6/) 7 | # 8 | # publish-redirect docname.html destpath/docname.html 9 | 10 | if [[ $# -ne 2 ]]; then 11 | echo "Error: $0 expects two parameters: docname.html destpath/docname.html" >&2 12 | exit 1 13 | fi 14 | 15 | cat>"$1"< 17 | 18 | OPEA Project documentation Redirect 19 | 20 | 23 | 24 | 25 |

Please visit the latest OPEA Project documentation

26 | 27 | 28 | EOF 29 | -------------------------------------------------------------------------------- /scripts/publish-robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==7.3 2 | docutils==0.20 3 | sphinx_rtd_theme==2.0 4 | sphinx-tabs==3.4.5 5 | myst-parser>=3.0 6 | sphinx-md==0.0.3 7 | sphinxcontrib-mermaid==0.9.2 8 | pymarkdownlnt==0.9.24 9 | sphinx-design==0.6.1 10 | requirements-parser==0.11.0 11 | -------------------------------------------------------------------------------- /scripts/rsync-include.txt: -------------------------------------------------------------------------------- 1 | *.jpg 2 | *.png 3 | *.gif 4 | *.svg 5 | *.rst 6 | *.md 7 | *.rst 8 | *.txt 9 | CODEOWNERS 10 | LICENSE 11 | -------------------------------------------------------------------------------- /scripts/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo apt install git graphviz -y 4 | 5 | ENV_NAME=env_sphinx 6 | deactivate 7 | 8 | pwd 9 | cd ../.. 10 | 11 | if [[ "$1" == "f" ]]; then 12 | echo "force to create env by rm existed env folder $ENV_NAME" 13 | rm -rf $ENV_NAME 14 | fi 15 | 16 | if [ -d $ENV_NAME ]; then 17 | echo "found existed env $ENV_NAME, skip create. Use "f" to force create" 18 | exit 0 19 | fi 20 | 21 | 22 | python -m venv $ENV_NAME 23 | source $ENV_NAME/bin/activate 24 | pip install --upgrade pip 25 | pip install -r docs/scripts/requirements.txt 26 | 27 | echo "build env $ENV_NAME is done" 28 | exit 0 29 | -------------------------------------------------------------------------------- /scripts/show-versions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2018-2024, Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Show installed versions of doc building tools (per requirements.txt) 7 | 8 | import os.path 9 | import sys 10 | import requirements 11 | import subprocess 12 | 13 | class color: 14 | PURPLE = '\033[95m' 15 | CYAN = '\033[96m' 16 | DARKCYAN = '\033[36m' 17 | BLUE = '\033[94m' 18 | GREEN = '\033[92m' 19 | YELLOW = '\033[93m' 20 | RED = '\033[91m' 21 | BOLD = '\033[1m' 22 | UNDERLINE = '\033[4m' 23 | END = '\033[0m' 24 | 25 | # Check all requirements listed in requirements.txt and print out version # installed (if any) 26 | reqfile = os.path.join(sys.path[0], "requirements.txt") 27 | print ("doc build tool versions found on your system per " + reqfile + "...\n") 28 | 29 | rf = open(reqfile, "r") 30 | 31 | for req in requirements.parse(rf): 32 | try: 33 | print(" {} version: {}".format(req.name.ljust(25," "), req.specs)) 34 | if len(req.specs) == 0: 35 | print (color.RED + color.BOLD + " >>> Warning: Expected version " + 36 | req.name + " Python module from scripts/requirements.text." + color.END) 37 | except: 38 | print (color.RED + color.BOLD + req.name + " is missing." + color.END + 39 | " (Hint: install all dependencies with " + color.YELLOW + 40 | "\"pip3 install --user -r scripts/requirements.txt\"" + color.END + ")") 41 | 42 | rf.close() 43 | 44 | # Print out the version of relevent packages not installed via pip 45 | # print (" " + "doxygen".ljust(25," ") + " version: " + subprocess.check_output(["doxygen", "-v"]).decode("utf-8")) 46 | -------------------------------------------------------------------------------- /scripts/sync-all-repos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # synch local copy and origin with what's in upstream repo 4 | # assumes there's an origin and upstream remote defined in each of the repos 5 | 6 | # optionally, you can give a branch name as a first parameter and it will 7 | # checkout and sync all the repos to that branchname 8 | 9 | branch=${1:-main} 10 | 11 | for d in GenAIComps GenAIExamples GenAIEval GenAIInfra docs opea-project.github.io ; do 12 | cd ~/opea-project/"$d" 13 | echo "====" $d 14 | git checkout $branch 15 | git fetch upstream 16 | git merge upstream/$branch 17 | git push origin $branch 18 | done 19 | -------------------------------------------------------------------------------- /sphinx/_static/images/BrokenBlocks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/sphinx/_static/images/BrokenBlocks.png -------------------------------------------------------------------------------- /sphinx/_static/images/OPEA-favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/sphinx/_static/images/OPEA-favicon-32x32.png -------------------------------------------------------------------------------- /sphinx/_static/images/opea-horizontal-color-w200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/sphinx/_static/images/opea-horizontal-color-w200.png -------------------------------------------------------------------------------- /sphinx/_static/images/opea-horizontal-white-w200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/sphinx/_static/images/opea-horizontal-white-w200.png -------------------------------------------------------------------------------- /sphinx/_static/images/opea-icon-color.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 26 | 28 | 29 | 31 | 32 | 33 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /sphinx/_static/images/opea-icon-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 26 | 28 | 29 | 31 | 32 | 33 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /sphinx/_static/opea-custom.js: -------------------------------------------------------------------------------- 1 | /* Extra acrn-specific javascript */ 2 | 3 | $(document).ready(function(){ 4 | /* tweak logo link to the marketing site instead of doc site */ 5 | $( ".icon-home" ).attr({href: "https://opea-project.github.io/latest/index.html", target: "_blank"}); 6 | 7 | /* open external links in a new tab */ 8 | $('a[class*=external]').attr({target: '_blank', rel: 'noopener'}); 9 | 10 | }); 11 | -------------------------------------------------------------------------------- /sphinx/_templates/aversions.html: -------------------------------------------------------------------------------- 1 | {# Add rst-badge after rst-versions for small badge style. #} 2 |
3 | 4 | OPEA Project 5 | v: {{ current_version }} 6 | 7 | 8 |
9 |
10 |
{{ _('Document Versions') }}
11 | {% for slug, url in versions %} 12 |
{{ slug }}
13 | {% endfor %} 14 |
15 |
16 |
{{ _('OPEA Project links') }}
17 |
18 | Project Home 19 |
20 |
21 | Wiki 22 |
23 |
24 |
25 |
26 | -------------------------------------------------------------------------------- /sphinx/_templates/breadcrumbs.html: -------------------------------------------------------------------------------- 1 | {% extends "!breadcrumbs.html" %} 2 | {% block breadcrumbs %} 3 | 4 | {# parameterize default name "Docs" in breadcrumb via docs_title in conf.py #} 5 | {% if not docs_title %} 6 | {% set docs_title = "Docs" %} 7 | {% endif %} 8 | 9 |
  • {{ docs_title }} »
  • 10 | {% for doc in parents %} 11 |
  • {{ doc.title }} »
  • 12 | {% endfor %} 13 |
  • {{ title }}
  • 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /sphinx/_templates/footer.html: -------------------------------------------------------------------------------- 1 | {% extends "!footer.html" %} 2 | {% block contentinfo %} 3 |

    4 | {%- if show_copyright %} 5 | {%- if hasdoc('copyright') %} 6 | {%- trans path=pathto('copyright'), copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %} 7 | {%- else %} 8 | {%- trans copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %} 9 | {%- endif %} 10 | {%- endif %} 11 | 12 | Published on {{last_updated}}. 13 | {% endblock %} 14 | -------------------------------------------------------------------------------- /sphinx/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block document %} 3 | {% if not is_release %} 4 | 10 | {% endif %} 11 | {{ super() }} 12 | {% endblock %} 13 | {% block menu %} 14 | {% include "aversions.html" %} 15 | {{ super() }} 16 | {% endblock %} 17 | -------------------------------------------------------------------------------- /sphinx/_templates/versions.html: -------------------------------------------------------------------------------- 1 | {# wipe out the versions.html from the rtd theme #} 2 | -------------------------------------------------------------------------------- /sphinx/extensions/html_redirects.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | 16 | 17 | # Mechanism to generate static HTML redirect pages in the output 18 | # 19 | # Uses redirect_template.html and the list of pages given in 20 | # conf.html_redirect_pages 21 | # 22 | # Adapted from ideas in https://tech.signavio.com/2017/managing-sphinx-redirects 23 | import os.path 24 | 25 | from sphinx.builders.html import StandaloneHTMLBuilder 26 | 27 | REDIRECT_TEMPLATE = """ 28 | 29 | 30 | 31 | 34 | 35 | 36 |

    Page has moved here.

    37 | 38 | 39 | """ 40 | 41 | 42 | def setup(app): 43 | app.add_config_value('html_redirect_pages', [], 'html') 44 | app.connect('build-finished', create_redirect_pages) 45 | 46 | # Since we're just setting up a build-finished hook, which runs 47 | # after both reading and writing, this extension is safe for both. 48 | return { 49 | 'parallel_read_safe': True, 50 | 'parallel_write_safe': True, 51 | } 52 | 53 | 54 | def create_redirect_pages(app, docname): 55 | if not isinstance(app.builder, StandaloneHTMLBuilder): 56 | return # only relevant for standalone HTML output 57 | 58 | for (old_url, new_url) in app.config.html_redirect_pages: 59 | if old_url.startswith('/'): 60 | old_url = old_url[1:] 61 | print("Creating redirect: %s.html to %s.html" % (old_url, new_url)) 62 | 63 | new_url = app.builder.get_relative_uri(old_url, new_url) 64 | out_file = app.builder.get_outfilename(old_url) 65 | 66 | out_dir = os.path.dirname(out_file) 67 | if not os.path.exists(out_dir): 68 | os.makedirs(out_dir) 69 | 70 | content = REDIRECT_TEMPLATE.replace("$NEWURL", new_url) 71 | 72 | if not os.path.exists(out_file): 73 | with open(out_file, "w") as rp: 74 | rp.write(content) 75 | -------------------------------------------------------------------------------- /sphinx/extensions/link_roles.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2024 Intel Corporation. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | # based on http://protips.readthedocs.io/link-roles.html 6 | 7 | from __future__ import print_function 8 | from __future__ import unicode_literals 9 | import re 10 | import os 11 | import os.path 12 | from os import path 13 | import subprocess 14 | from docutils import nodes 15 | 16 | 17 | def run_cmd_get_output(cmd): 18 | try: 19 | with open(os.devnull, 'w') as devnull: 20 | output = subprocess.check_output(cmd, stderr=devnull, shell=True).strip() 21 | except subprocess.CalledProcessError as e: 22 | output = e.output.decode('ascii') 23 | 24 | return output 25 | 26 | def get_github_rev(): 27 | tag = run_cmd_get_output('git describe --tags --exact-match') 28 | if tag: 29 | return tag.decode("utf-8") 30 | else: 31 | return 'main' 32 | 33 | 34 | def setup(app): 35 | rev = get_github_rev() 36 | 37 | baseurl = 'https://github.com/opea-project/' 38 | repos = ["GenAIComps", "GenAIEval", "GenAIExamples", "GenAIInfra", "Governance", "docs"] 39 | 40 | for r in repos: 41 | app.add_role('{}_blob'.format(r), autolink('{}{}/blob/{}/%s'.format(baseurl, r, rev))) 42 | app.add_role('{}_raw'.format(r), autolink('{}{}/raw/{}/%s'.format(baseurl, r, rev))) 43 | 44 | # The role just creates new nodes based on information in the 45 | # arguments; its behavior doesn't depend on any other documents. 46 | return { 47 | 'parallel_read_safe': True, 48 | 'parallel_write_safe': True, 49 | } 50 | 51 | 52 | def autolink(pattern): 53 | def role(name, rawtext, text, lineno, inliner, options={}, content=[]): 54 | m = re.search(r'(.*)\s*<(.*)>', text) 55 | if m: 56 | link_text = m.group(1) 57 | link = m.group(2) 58 | else: 59 | link_text = text 60 | link = text 61 | url = pattern % (link,) 62 | node = nodes.reference(rawtext, link_text, refuri=url, **options) 63 | return [node], [] 64 | return role 65 | -------------------------------------------------------------------------------- /sphinx/substitutions.txt: -------------------------------------------------------------------------------- 1 | .. |br| raw:: html .. force a line break in HTML output (blank lines needed here) 2 | 3 |
    4 | 5 | .. These are replacement strings for non-ASCII characters used within the project 6 | using the same name as the html entity names (e.g., ©) for that character 7 | 8 | .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN 9 | :ltrim: 10 | .. |trade| unicode:: U+02122 .. TRADEMARK SIGN 11 | :ltrim: 12 | .. |reg| unicode:: U+000AE .. REGISTERED TRADEMARK SIGN 13 | :ltrim: 14 | .. |deg| unicode:: U+000B0 .. DEGREE SIGN 15 | :ltrim: 16 | .. |plusminus| unicode:: U+000B1 .. PLUS-MINUS SIGN 17 | :rtrim: 18 | .. |micro| unicode:: U+000B5 .. MICRO SIGN 19 | :rtrim: 20 | .. |check| unicode:: U+02714 .. HEAVY CHECK MARK 21 | :rtrim: 22 | .. |oplus| unicode:: U+02295 .. CIRCLED PLUS SIGN 23 | .. |rarr| unicode:: U+02192 .. RIGHTWARDS ARROW 24 | -------------------------------------------------------------------------------- /tutorial/AgentQnA/AgentQnA_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _AgentQnA_Guide: 2 | 3 | AgentQnA 4 | ##################### 5 | 6 | .. note:: This guide is in its early development and is a work-in-progress with 7 | placeholder content. 8 | 9 | Overview 10 | ******** 11 | 12 | This example showcases a hierarchical multi-agent system for question-answering applications. 13 | 14 | Purpose 15 | ******* 16 | * Improve relevancy of retrieved context. Agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user’s questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer. 17 | * Use tools to get additional knowledge. For example, knowledge graphs and SQL databases can be exposed as APIs for Agents to gather knowledge that may be missing in the retrieval vector database. 18 | * Hierarchical agent can further improve performance. Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer. 19 | 20 | How It Works 21 | ************ 22 | 23 | The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. 24 | The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. 25 | 26 | 27 | .. mermaid:: 28 | 29 | graph LR; 30 | U[User]-->SA[Supervisor Agent]; 31 | SA-->WA[Worker Agent]; 32 | WA-->RT[Retrieval Tool]; 33 | SA-->T1[Tool 1]; 34 | SA-->T2[Tool 2]; 35 | SA-->TN[Tool N]; 36 | SA-->U; 37 | WA-->SA; 38 | RT-->WA; 39 | T1-->SA; 40 | T2-->SA; 41 | TN-->SA; 42 | 43 | 44 | Deployment 45 | ********** 46 | Here are some deployment options, depending on your hardware and environment: 47 | 48 | Single Node 49 | +++++++++++++++ 50 | .. toctree:: 51 | :maxdepth: 1 52 | 53 | Xeon Scalable Processor 54 | Gaudi 55 | -------------------------------------------------------------------------------- /tutorial/AudioQnA/AudioQnA_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _AudioQnA_Guide: 2 | 3 | AudioQnA 4 | #################### 5 | 6 | .. note:: This guide is in its early development and is a work-in-progress with 7 | placeholder content. 8 | 9 | Overview 10 | ******** 11 | 12 | AudioQnA is an example that demonstrates the integration of Generative AI 13 | (GenAI) models for performing question-answering (QnA) on audio files, with 14 | the added functionality of Text-to-Speech (TTS) for generating spoken 15 | responses.The example showcases how to convert audio input to text using 16 | Automatic Speech Recognition (ASR), generate answers to user queries using 17 | a language model, and then convert those answers back to speech using 18 | Text-to-Speech (TTS). 19 | 20 | Purpose 21 | ******* 22 | 23 | * **Enable audio conversation with LLMs**: AudioAnA is to develop an innovative voice-to-text-to-LLM-to-text-to-voice conversational system that leverages advanced language models to facilitate seamless and natural communication between humans and machines. 24 | 25 | Key Implementation Details 26 | ************************** 27 | 28 | User Interface: 29 | The interface that interactivates with users, gets inputs from users and 30 | serves responses to users. 31 | AudioQnA GateWay: 32 | The agent that maintains the connections between user-end and service-end, 33 | forwards requests and responses to apropriate nodes. 34 | AudioQnA MegaService: 35 | The central component that converts audio input to text using Automatic 36 | Speech Recognition (ASR), generates answers to user queries using a language 37 | model, and then converts those answers back to speech using Text-to-Speech (TTS). 38 | 39 | How It Works 40 | ************ 41 | 42 | The AudioQnA example is implemented using the component-level microservices defined 43 | in `GenAI Components `. The flow chart below 44 | shows the information flow between different microservices for this example. 45 | 46 | .. mermaid:: 47 | 48 | --- 49 | config: 50 | flowchart: 51 | nodeSpacing: 400 52 | rankSpacing: 100 53 | curve: linear 54 | themeVariables: 55 | fontSize: 50px 56 | --- 57 | flowchart LR 58 | %% Colors %% 59 | classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 60 | classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 61 | classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 62 | classDef invisible fill:transparent,stroke:transparent; 63 | style AudioQnA-MegaService stroke:#000000 64 | 65 | %% Subgraphs %% 66 | subgraph AudioQnA-MegaService["AudioQnA MegaService "] 67 | direction LR 68 | ASR([ASR MicroService]):::blue 69 | LLM([LLM MicroService]):::blue 70 | TTS([TTS MicroService]):::blue 71 | end 72 | subgraph UserInterface[" User Interface "] 73 | direction LR 74 | a([User Input Query]):::orchid 75 | UI([UI server
    ]):::orchid 76 | end 77 | 78 | WSP_SRV{{whisper service
    }} 79 | SPC_SRV{{speecht5 service
    }} 80 | LLM_gen{{LLM Service
    }} 81 | GW([AudioQnA GateWay
    ]):::orange 82 | 83 | %% Questions interaction 84 | direction LR 85 | a[User Audio Query] --> UI 86 | UI --> GW 87 | GW <==> AudioQnA-MegaService 88 | ASR ==> LLM 89 | LLM ==> TTS 90 | 91 | %% Embedding service flow 92 | direction LR 93 | ASR <-.-> WSP_SRV 94 | LLM <-.-> LLM_gen 95 | TTS <-.-> SPC_SRV 96 | 97 | 98 | This diagram illustrates the flow of information in the voice chatbot system, 99 | starting from the user input and going through the Audio2Text, response 100 | generations, and Text2Audio components, ultimately resulting in the bot's output. 101 | 102 | The architecture follows a series of steps to process user queries and generate 103 | responses: 104 | 105 | 1. **Automatic Speech Recognition (ASR)**: ASR is used to accurately recognize and 106 | transcribe human speech, which is crucial for LLMs. The ASR system receives audio 107 | input, extracts features, maps features to sequences of phonemes and predict the 108 | sequence of words corresponding to the phoneme sequence. 109 | #. **Large Language Models (LLM)**: LLMs are used to generate text-based outputs 110 | for specific text-based inputs, which are provided by ASR. 111 | #. **Text-to-Speech (TTS)**: TTS is used to read text aloud in a way that sounds 112 | natural, similar to human speech. The TTS system analyzes the input text, converts 113 | the text into a standard form and converts normalized text into final speech. 114 | 115 | Deployment 116 | ********** 117 | 118 | Here are some deployment options depending on your hardware and environment. 119 | 120 | Single Node 121 | +++++++++++++++ 122 | .. toctree:: 123 | :maxdepth: 1 124 | 125 | Xeon Scalable Processor 126 | Gaudi -------------------------------------------------------------------------------- /tutorial/CodeGen/CodeGen_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _Codegen_Guide: 2 | 3 | CodeGen 4 | ##################### 5 | 6 | Overview 7 | ******** 8 | 9 | The CodeGen example uses specialized AI models that went through training with datasets that encompass repositories, documentation, programming code, and web data. With an understanding 10 | of various programming languages, coding patterns, and software development concepts, CodeGen LLMs assist developers and programmers. The LLMs can be integrated into the developers' 11 | Integrated Development Environments (IDEs) to have more contextual awareness to write more 12 | refined and relevant code based on suggestions. 13 | 14 | Purpose 15 | ******* 16 | * Code Generation: Streamline coding through Code Generation, enabling non-programmers to describe tasks for code creation. 17 | * Code Completion: Accelerate coding by suggesting contextually relevant snippets as developers type. 18 | * Code Translation and Modernization: Translate and modernize code across multiple programming languages, aiding interoperability and updating legacy projects. 19 | * Code Summarization: Extract key insights from codebases, improving readability and developer productivity. 20 | * Code Refactoring: Offer suggestions for code refactoring, enhancing code performance and efficiency. 21 | * AI-Assisted Testing: Assist in creating test cases, ensuring code robustness and accelerating development cycles. 22 | * Error Detection and Debugging: Detect errors in code and provide detailed descriptions and potential fixes, expediting debugging processes. 23 | 24 | How It Works 25 | ************ 26 | 27 | The CodeGen example uses an open-source code generation model with Text Generation Inference (TGI) 28 | for serving deployment. It is presented as a Code Copilot application as shown in the diagram below. 29 | 30 | .. figure:: /GenAIExamples/CodeGen/assets/img/codegen_architecture.png 31 | :alt: CodeGen Architecture Diagram 32 | 33 | Deployment 34 | ********** 35 | Here are some deployment options, depending on the hardware and environment: 36 | 37 | .. toctree:: 38 | :maxdepth: 1 39 | 40 | Intel® Xeon® Scalable processor 41 | Gaudi AI Accelerator 42 | -------------------------------------------------------------------------------- /tutorial/CodeTrans/CodeTrans_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _CodeTrans_Guide: 2 | 3 | Code Translation 4 | ############################## 5 | 6 | Overview 7 | ******** 8 | 9 | This example showcases a code translation system that converts code from one programming language to another while preserving the original logic and functionality. The primary component is the CodeTrans MegaService, which encompasses an LLM microservice that performs the actual translation. 10 | A lightweight gateway service and a user interface allow users to submit their source code in a given language and receive the translated output in another language. 11 | 12 | Purpose 13 | ******* 14 | * **Enable code conversion and modernization**: Developers can seamlessly migrate legacy code to newer languages or frameworks, leveraging modern best practices without having to rewrite large code bases from scratch. 15 | 16 | * **Facilitate multi-language support**: By providing a system that understands multiple programming languages, organizations can unify their development approaches and reduce the barrier to adopting new languages. 17 | 18 | * **Improve developer productivity**: Automated code translation drastically reduces manual, time-consuming porting efforts, allowing developers to focus on higher-level tasks like feature design and optimization. 19 | 20 | How It Works 21 | ************ 22 | 23 | .. figure:: /GenAIExamples/CodeTrans/assets/img/code_trans_architecture.png 24 | :alt: ChatQnA Architecture Diagram 25 | 26 | 1. A user specifies the source language, the target language, and the snippet of code to be translated. This request is handled by the front-end UI or via a direct API call. 27 | 28 | 2. The user’s request is sent to the CodeTrans gateway, which orchestrates the call to the LLM microservice. The gateway handles details like constructing prompts and managing responses. 29 | 30 | 3. The large language model processes the user’s code snippet by analyzing syntax and semantics before generating an equivalent snippet in the target language. 31 | 32 | 4. The gateway formats the model’s output and returns the translated code to the user, via an API response or rendered within the UI. 33 | 34 | 35 | Deployment 36 | ********** 37 | Here are some deployment options, depending on the hardware and environment: 38 | 39 | Single Node 40 | +++++++++++++++ 41 | .. toctree:: 42 | :maxdepth: 1 43 | 44 | Xeon Scalable Processor 45 | Gaudi 46 | -------------------------------------------------------------------------------- /tutorial/DocSum/DocSum_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _DocSum_Guide: 2 | 3 | DocSum 4 | ##################### 5 | 6 | Overview 7 | ******** 8 | 9 | The DocSum example is designed to process diverse content types including text documents, spoken language (audio), and visual media (video) to generate concise summaries that capture the essence of the original material. 10 | This pipeline integrates ASR (automatic speech recognition) with an LLM to summarize the content. 11 | This example can be used to create summaries of news articles, research papers, technical documents, legal documents, multimedia documents, and other types of documents. 12 | 13 | Purpose 14 | ******* 15 | * Quick Content Understanding: Saves time by providing concise overviews of lengthy documents, enabling users to focus on essential information. 16 | * Knowledge Management: Organizes and indexes large repositories of documents, making information retrieval and navigation more efficient. 17 | * Research and Analysis: Simplifies the synthesis of insights from multiple reports or articles, accelerating data-driven decision-making. 18 | * Content Creation and Editing: Generates drafts or summaries for presentations, briefs, or automated reports, streamlining content workflows. 19 | * Legal and Compliance: Extracts key clauses and obligations from contracts or guidelines, ensuring compliance while reducing review effort. 20 | 21 | 22 | How It Works 23 | ************ 24 | 25 | The Docsum example uses an open-source model served using a framework such as Text Generation Inference (TGI) or vLLM to construct a summary 26 | of the input provided. It can process textual, audio, and video input from a variety of sources as shown in the diagram below. 27 | 28 | .. figure:: /GenAIExamples/DocSum/assets/img/docsum_architecture.png 29 | :alt: DocSum Architecture Diagram 30 | 31 | Deployment 32 | ********** 33 | Here are some deployment options, depending on the hardware and environment: 34 | 35 | .. toctree:: 36 | :maxdepth: 1 37 | 38 | Intel® Xeon® Scalable processor 39 | Gaudi AI Accelerator 40 | -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_Node_Exporter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_Node_Exporter.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_chatqna_backend_server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_chatqna_backend_server.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_chatqna_backend_server_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_chatqna_backend_server_1.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_chatqna_dataprep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_chatqna_dataprep.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_chatqna_retriever.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_chatqna_retriever.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_vLLM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_vLLM.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Grafana_vLLM_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Grafana_vLLM_2.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Jaeger_agent_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Jaeger_agent_rag.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/Jaeger_agent_sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/Jaeger_agent_sql.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_mega_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_mega_list.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_node.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_react.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_react.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_sql.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_vllm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_vllm.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_grafana_vllm_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_grafana_vllm_2.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_4traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_4traces.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_4traces_web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_4traces_web.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_react_2_spans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_react_2_spans.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_react_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_react_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_react_spans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_react_spans.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_react_spans_1_webq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_react_spans_1_webq.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_react_spans_2_webq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_react_spans_2_webq.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_sql_2_spans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_sql_2_spans.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_sql_35_q2_spans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_sql_35_q2_spans.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_jaeger_sql_spans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_jaeger_sql_spans.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_questions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_questions.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/agent_questions_web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/agent_questions_web.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/chatqna_16reqs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/chatqna_16reqs.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/grafana_dashboard_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/grafana_dashboard_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/grafana_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/grafana_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_agent_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_agent_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_init.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_1req.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_1req.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_cpu_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_cpu_breakdown.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_breakdown.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_breakdown_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_breakdown_2.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_cpu.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_gaudi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_chatqna_req_gaudi.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/jaeger_ui_opea_trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/jaeger_ui_opea_trace.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/opea_telemetry.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/opea_telemetry.jpg -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/assets/prometheus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opea-project/docs/eef95b7a2070f2ff1ab9b5760017381d792e2845/tutorial/OpenTelemetry/assets/prometheus.png -------------------------------------------------------------------------------- /tutorial/OpenTelemetry/deploy/ChatQnA.md: -------------------------------------------------------------------------------- 1 | # OpenTelemetry on ChatQnA Application 2 | 3 | Each microservice in ChatQnA is instrumented with opea_telemetry, enabling Jaeger to provide a detailed time breakdown across microservices for each request. 4 | Additionally, ChatQnA features a pre-defined Grafana dashboard for its megaservice, alongside a vLLM Grafana dashboard. 5 | A dashboard for monitoring CPU statistics is also available, offering comprehensive insights into system performance and resource utilization. 6 | 7 | ## Table of contents 8 | 9 | 1. [Telemetry Tracing with Jaeger on Gaudi](#telemetry-tracing-with-jaeger-on-gaudi) 10 | 2. [Telemetry Metrics with Grafana on Gaudi](#telemetry-metrics-with-grafana-on-gaudi) 11 | 12 | 13 | ## Telemetry Tracing with Jaeger on Gaudi 14 | 15 | After ChatQnA processes a question, two traces should appear along the timeline. 16 | The trace for opea: ServiceOrchestrator.schedule runs on the CPU and includes seven spans, one of which represents the LLM service running on CPU. 17 | For LLM functions executed on Gaudi, stream requests are displayed under opea: llm_generate_stream. 18 | This trace contains two spans: one for the first token and another for all subsequent tokens. 19 | 20 | ![chatqna_1req](../assets/jaeger_ui_opea_chatqna_1req.png) 21 | 22 | The first trace along the timeline is opea: ServiceOrchestrator.schedule, which runs on the CPU. 23 | 24 | It provides insights into the orchestration and scheduling of services within the ChatQnA megaservice, highlighting the execution flow during the process. 25 | 26 | 27 | ![chatqna_cpu_req](../assets/jaeger_ui_opea_chatqna_req_cpu.png) 28 | 29 | Clicking on the opea: ServiceOrchestrator.schedule trace will expand to reveal seven spans along the timeline. 30 | The first span represents the main schedule function, which has minimal self-execution time, indicated in black. 31 | The second span corresponds to the embedding microservice execution time, taking 33.72 ms as shown in the diagram. 32 | Following the embedding is the retriever span, which took only 3.13 ms. 33 | The last span captures the LLM functions on the CPU, with an execution time of 41.99 ms. 34 | These spans provide a detailed breakdown of the execution flow and timing for each component within the service orchestration. 35 | 36 | ![chatqna_cpu_breakdown](../assets/jaeger_ui_opea_chatqna_cpu_breakdown.png) 37 | 38 | The second trace following the schedule trace is opea: llm_generate_stream, which operates on Gaudi, as depicted in the diagram. 39 | This trace provides insights into the execution of LLM functions on Gaudi, 40 | highlighting the processing of stream requests and the associated spans for token generation. 41 | 42 | ![chatqna_gaudi_req](../assets/jaeger_ui_opea_chatqna_req_gaudi.png) 43 | 44 | Clicking on the opea: llm_generate_stream trace will expand to reveal two spans along the timeline. 45 | The first span represents the execution time for the first token, which took 15.12 ms in this run. 46 | The second span captures the execution time for all subsequent tokens, taking 920 ms as shown in the diagram. 47 | 48 | ![chatqna_gaudi_breakdown](../assets/jaeger_ui_opea_chatqna_req_breakdown_2.png) 49 | 50 | Overall, the traces on the CPU consist of seven spans and are represented as larger circles. 51 | In contrast, the traces on Gaudi have two spans and are depicted as smaller circles. 52 | The diagrams below illustrate a run with 16 user requests, resulting in a total of 32 traces. 53 | In this scenario, the larger circles, representing CPU traces, took less time than the smaller circles, 54 | indicating that the requests required more processing time on Gaudi compared to the CPU. 55 | 56 | ![chatqna_gaudi_16reqs](../assets/chatqna_16reqs.png). 57 | 58 | ## Telemetry Metrics with Grafana on Gaudi 59 | 60 | The ChatQnA application offers several useful dashboards that provide valuable insights into its performance and operations. 61 | These dashboards are designed to help monitor various aspects of the application, such as service execution times, resource utilization, and system health, 62 | enabling users to effectively manage and optimize the application. 63 | 64 | ### ChatQnA MegaService Dashboard 65 | 66 | This dashboard provides metrics for services within the ChatQnA megaservice. 67 | The chatqna-backend-server service, which functions as the megaservice, 68 | is highlighted with its average response time displayed across multiple runs. 69 | Additionally, the dashboard presents CPU and memory usage statistics for the megaservice, 70 | offering a comprehensive view of its performance and resource consumption. 71 | 72 | ![chatqna_1req](../assets/Grafana_chatqna_backend_server_1.png) 73 | 74 | The dashboard can also display metrics for the dataprep-redis-service and the retriever service. 75 | These metrics provide insights into the performance and resource utilization of these services, 76 | allowing for a more comprehensive understanding of the ChatQnA application's overall operation. 77 | 78 | ![chatqna_1req](../assets/Grafana_chatqna_dataprep.png) 79 | 80 | ![chatqna_1req](../assets/Grafana_chatqna_retriever.png) 81 | 82 | ### LLM Dashboard 83 | 84 | This dashboard presents metrics for the LLM service, including key performance indicators such as request latency, time per output token latency, 85 | and time to first token latency, among others. 86 | These metrics offer valuable insights into the efficiency and responsiveness of the LLM service, 87 | helping to identify areas for optimization and ensuring smooth operation. 88 | 89 | ![chatqna_1req](../assets/Grafana_vLLM.png) 90 | 91 | The dashboard also displays metrics for request prompt length and output length. 92 | 93 | ![chatqna_1req](../assets/Grafana_vLLM_2.png) 94 | -------------------------------------------------------------------------------- /tutorial/VideoQnA/VideoQnA_Guide.rst: -------------------------------------------------------------------------------- 1 | .. _VideoQnA_Guide: 2 | 3 | VideoQnA 4 | ################# 5 | 6 | .. note:: This guide is in its early development and is a work-in-progress with 7 | placeholder content. 8 | 9 | Overview 10 | ******** 11 | 12 | VideoQnA is a framework that retrieves video based on provided user prompt. It uses only the video embeddings to perform vector similarity search in Intel's VDMS vector database and performs all operations on Intel Xeon CPU. The pipeline supports long form videos and time-based search. 13 | 14 | Purpose 15 | ******* 16 | 17 | * Efficient Search: Utilizes video embeddings for accurate and efficient retrieval. 18 | * Long-form Video Support: Capable of handling extensive video archives and time-based searches. 19 | * Microservice Architecture: Built on GenAIComps, incorporating microservices for embedding, retrieval, reranking, and language model integration. 20 | 21 | How It Works 22 | ************ 23 | 24 | It utilizes the `GenAIComps `_ microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and lvm. Videos are converted into feature vectors using mean aggregation and stored in the VDMS vector store. When a user submits a query, the system performs a similarity search in the vector store to retrieve the best-matching videos. Contextual Inference: The retrieved videos are then sent to the Large Vision Model (LVM) for inference, providing supplemental context for the query. 25 | 26 | .. mermaid:: 27 | 28 | --- 29 | config: 30 | flowchart: 31 | nodeSpacing: 400 32 | rankSpacing: 100 33 | curve: linear 34 | themeVariables: 35 | fontSize: 50px 36 | --- 37 | flowchart LR 38 | %% Colors %% 39 | classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 40 | classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 41 | classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 42 | classDef invisible fill:transparent,stroke:transparent; 43 | style VideoQnA-MegaService stroke:#000000 44 | %% Subgraphs %% 45 | subgraph VideoQnA-MegaService["VideoQnA-MegaService"] 46 | direction LR 47 | EM([Embedding MicroService]):::blue 48 | RET([Retrieval MicroService]):::blue 49 | RER([Rerank MicroService]):::blue 50 | LVM([LVM MicroService]):::blue 51 | end 52 | subgraph User Interface 53 | direction LR 54 | a([User Input Query]):::orchid 55 | UI([UI server
    ]):::orchid 56 | Ingest([Ingest
    ]):::orchid 57 | end 58 | 59 | LOCAL_RER{{Reranking service
    }} 60 | CLIP_EM{{Embedding service
    }} 61 | VDB{{Vector DB

    }} 62 | V_RET{{Retriever service
    }} 63 | Ingest{{Ingest data
    }} 64 | DP([Data Preparation
    ]):::blue 65 | LVM_gen{{LVM Service
    }} 66 | GW([VideoQnA GateWay
    ]):::orange 67 | 68 | %% Data Preparation flow 69 | %% Ingest data flow 70 | direction LR 71 | Ingest[Ingest data] --> UI 72 | UI --> DP 73 | DP <-.-> CLIP_EM 74 | 75 | %% Questions interaction 76 | direction LR 77 | a[User Input Query] --> UI 78 | UI --> GW 79 | GW <==> VideoQnA-MegaService 80 | EM ==> RET 81 | RET ==> RER 82 | RER ==> LVM 83 | 84 | %% Embedding service flow 85 | direction LR 86 | EM <-.-> CLIP_EM 87 | RET <-.-> V_RET 88 | RER <-.-> LOCAL_RER 89 | LVM <-.-> LVM_gen 90 | 91 | direction TB 92 | %% Vector DB interaction 93 | V_RET <-.->VDB 94 | DP <-.->VDB 95 | 96 | Deployment 97 | ********** 98 | 99 | To deploy on Xeon, please check guide `here `_ 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /tutorial/index.rst: -------------------------------------------------------------------------------- 1 | OPEA Tutorial 2 | ########################## 3 | 4 | This tutorial will help user learn to deploy and use OPEA quickly. 5 | 6 | Provide following tutorials to cover common user cases: 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | AgentQnA/AgentQnA_Guide 12 | AudioQnA/AudioQnA_Guide.rst 13 | ChatQnA/ChatQnA_Guide 14 | CodeGen/CodeGen_Guide 15 | CodeTrans/CodeTrans_Guide 16 | DocSum/DocSum_Guide 17 | DocIndexRetriever/DocIndexRetriever_Guide 18 | VideoQnA/VideoQnA_Guide 19 | 20 | Provide following tutorials to cover more advanced features like OPEA Open Telemetry: 21 | 22 | .. toctree:: 23 | :maxdepth: 1 24 | 25 | OpenTelemetry/OpenTelemetry_OPEA_Guide 26 | 27 | ----- 28 | 29 | 30 | If you want to learn more, please refer to :doc:`/GenAIExamples/README`. 31 | --------------------------------------------------------------------------------