├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── stale.yml
├── LICENSE
├── README.md
├── architecture-decision-records
    ├── ODH-ADR-0000-template.md
    ├── ODH-ADR-0001-use-architecture-decision-records-for-open-data-hub.md
    ├── ODH-ADR-0002-data-science-pipelines-multi-user-approach.md
    ├── ODH-ADR-0003-use-apache-2-0-licence.md
    ├── ODH-ADR-0005-github-labels-standards.md
    ├── ODH-ADR-0006-organization-membership-automation.md
    ├── README.md
    ├── data-science-pipelines
    │   └── ODH-ADR-DSP-0001-data-science-pipelines-upgrade-testing-strategy.md
    ├── distributed-workloads
    │   └── ODH-ADR-DW-0001-determine-codeflare-deployment-strategy.md
    ├── explainability
    │   └── ODH-ADR-XAI-0001-trustyaiservice-database-configuration.md
    ├── model-serving
    │   ├── ODH-ADR-MS-0001-kserve-private-network-in-cluster.md
    │   └── images
    │   │   ├── ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-1.png
    │   │   └── ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-2.png
    └── operator
    │   ├── ODH-ADR-0004-odh-trusted-ca-configmap.md
    │   ├── ODH-ADR-Operator-0001-distributed-manifests.md
    │   ├── ODH-ADR-Operator-0002-operator-scope.md
    │   ├── ODH-ADR-Operator-0003-component-integration.md
    │   ├── ODH-ADR-Operator-0005-configure-resources.md
    │   ├── ODH-ADR-Operator-0006-internal-api.md
    │   ├── ODH-ADR-Operator-0007-auth-crd.md
    │   ├── ODH-ADR-Operator-0007-components-version-mapping.md
    │   ├── ODH-ADR-Operator-0008-resources-lifecycle.md
    │   └── assets
    │       └── ODH-ADR-Operator-0006
    │           ├── odh-operator-current.png
    │           └── odh-operator-next.png
└── documentation
    ├── README.md
    ├── arch-overview.md
    ├── components
        ├── dashboard
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── assets
        │   │   ├── Dashboard Arch Diagrams.drawio
        │   │   ├── featureFlags.drawio
        │   │   └── featureFlags.png
        │   ├── configuringDashboard.md
        │   ├── dashboardStorage.md
        │   ├── features
        │   │   ├── README.md
        │   │   ├── connections.md
        │   │   ├── modelCatalog.md
        │   │   └── modelRegistry.md
        │   └── k8sLabelsAndAnnotations.md
        ├── devops
        │   └── .gitkeep
        ├── distributed-workload
        │   └── .gitkeep
        ├── edge
        │   └── .gitkeep
        ├── explainability
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── diagram.drawio
        │   └── diagram.png
        ├── feature_store
        │   ├── README.md
        │   └── images
        │   │   ├── feature-store-overview.drawio
        │   │   └── feature-store-overview.jpg
        ├── model-registry
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── images
        │   │   ├── model-registry-connections.png
        │   │   ├── model-registry-deployment-model.png
        │   │   ├── model-registry-logical-model.png
        │   │   ├── model-registry-overview.jpg
        │   │   └── model-registry-tenancy-model.png
        │   └── model-registry-tenancy.md
        ├── pipelines
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── dsp v2 architecture.drawio
        │   ├── dsp-v2-architecture.drawio.png
        │   └── dsp-v2-high-level-architecture.png
        ├── platform
        │   ├── .gitkeep
        │   ├── Authorization in Service Mesh.png
        │   ├── Platform Architecture Overview.png
        │   └── README.md
        ├── serving
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── modelserving-architecture-High-Level Components Architecture.jpg
        │   └── modelserving-kserve-architecture.drawio
        └── workbenches
        │   ├── .gitkeep
        │   ├── README.md
        │   ├── high-level-workbench-arch.drawio.png
        │   ├── notebook-controller.drawio.png
        │   ├── rstudio-imagestream.drawio.png
        │   └── workbenches-imagestreams.drawio.png
    ├── diagram
        ├── README.MD
        ├── RHOAI Architecture.drawio
        └── RHOAI_Network_Architecture.drawio
    ├── enhancements
        └── .gitkeep
    └── images
        ├── RHOAI Architecture - D1 - Operator.png
        ├── RHOAI Architecture - D2 - DSP.png
        ├── RHOAI Architecture - D3 - Workbenches.png
        ├── RHOAI Architecture - D4 - Dashboard.png
        ├── RHOAI Architecture - D5 - Distr Workloads.png
        ├── RHOAI Architecture - D6a - Model Serving.png
        ├── RHOAI Architecture - D6b - Model Serving.png
        ├── RHOAI Architecture - D6c - Model Serving.png
        ├── RHOAI Architecture - D7 - Trusty.png
        ├── RHOAI Architecture - D9 - Feature Store.png
        ├── RHOAI Architecture-Overview.drawio.png
        ├── RHODS Architecture - Network Diagram.png
        └── network
            ├── Dashboard.png
            ├── DataScienePipelines.png
            ├── DistributedWorkloads_KubeFlow_Training_Operator.png
            ├── DistributedWorkloads_KubeRay.png
            ├── ModelRegistry.png
            ├── ModelServing.png
            ├── TrustyAI.png
            └── Workbenches.png


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Assigning ownership for specific files or directories
 2 | # Syntax: <file or directory path> <GitHub username or team>
 3 | 
 4 | # Assigning ownership for all files in a directory
 5 | # Syntax: <directory path>/* <GitHub username or team>
 6 | 
 7 | # Assigning ownership for all files in a repository
 8 | # Syntax: * <GitHub username or team>
 9 | 
10 | * @opendatahub-io/architects
11 | 
12 | /documentation/components/dashboard/ @opendatahub-io/architects @opendatahub-io/exploring-team
13 | /documentation/components/devops/ @opendatahub-io/architects @opendatahub-io/platform
14 | /documentation/components/distributed-workloads/ @opendatahub-io/architects @opendatahub-io/training-experimentation
15 | /documentation/components/edge/ @opendatahub-io/architects @opendatahub-io/platform
16 | /documentation/components/explainability/ @opendatahub-io/architects @opendatahub-io/model-serving
17 | /documentation/components/model-registry/ @opendatahub-io/architects @opendatahub-io/model-serving
18 | /documentation/components/pipelines/ @opendatahub-io/architects @opendatahub-io/training-experimentation
19 | /documentation/components/platform/ @opendatahub-io/architects @opendatahub-io/platform
20 | /documentation/components/serving/ @opendatahub-io/architects @opendatahub-io/model-serving
21 | /documentation/components/workbenches/ @opendatahub-io/architects @opendatahub-io/exploring-team
22 | 
23 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
 2 | #
 3 | # You can adjust the behavior by modifying this file.
 4 | # For more information, see:
 5 | # https://github.com/actions/stale
 6 | name: Mark stale issues and pull requests
 7 | 
 8 | on:
 9 |   schedule:
10 |   - cron: '00 03 * * *'
11 |   workflow_dispatch:    
12 | 
13 | jobs:
14 |   stale:
15 | 
16 |     runs-on: ubuntu-latest
17 |     permissions:
18 |       issues: write
19 |       pull-requests: write
20 | 
21 |     steps:
22 |     - uses: actions/stale@v5
23 |       with:
24 |         # for now, we will only label PRs as stale, not issues
25 |         days-before-issue-stale: -1 
26 |         days-before-pr-stale: 21
27 |         days-before-pr-close: 7
28 |         stale-pr-message: 'This PR is stale because it has been open 21 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
29 |         close-pr-message: 'This PR was closed because it has been stale for 21+7 days with no activity.'
30 |         stale-pr-label: 'Stale'
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub Architecture Documents
 2 | 
 3 | This repository contains all the Architecture Decision Records as well as the Architecture Documentation for Open Data Hub and OpenShift AI
 4 | 
 5 | ## Architecture Decision Records
 6 | [Architecture Decision Records](architecture-decision-records)
 7 | 
 8 | ## Architecture Documentation
 9 | [Documentation](documentation)
10 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0000-template.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Architecture Decision Record template
 2 | 
 3 | <!-- copy and paste this template to start authoring your own ADR -->
 4 | <!-- for the Status of new ADRs, please use Approved, since it will be approved by the time it is merged -->
 5 | <!-- remove this comment block too -->
 6 | 
 7 | |                |            |
 8 | | -------------- | ---------- |
 9 | | Date           | insert data |
10 | | Scope          | |
11 | | Status         | Approved |
12 | | Authors        | [name](@github-username) |
13 | | Supersedes     | N/A |
14 | | Superseded by: | N/A |
15 | | Tickets        | |
16 | | Other docs:    | none |
17 | 
18 | ## What
19 | 
20 | A couple sentences describing what this ADR is about.
21 | ## Why
22 | 
23 | A couple sentences describing why we need an ADR for this.
24 | 
25 | ## Goals
26 | 
27 | * Bulleted list of goals
28 | 
29 | ## Non-Goals
30 | 
31 | * Bulleted list of non-goals
32 | * 
33 | ## How
34 | 
35 | A couple sentences describing the high level approach that this ADR captures.
36 | 
37 | ## Open Questions
38 | 
39 | Optional section, hopefully removed before transitioning from Draft/Proposed to Accepted.
40 | 
41 | ## Alternatives
42 | 
43 | Carefully describe the alternatives considered, and specifically document what the tradeoffs of each approach are.
44 | 
45 | ## Security and Privacy Considerations
46 | 
47 | Optional section. Talk about any security and privacy concerns here.
48 | 
49 | ## Risks
50 | 
51 | Optional section. Talk about any risks here.
52 | 
53 | ## Stakeholder Impacts
54 | 
55 | | Group                         | Key Contacts     | Date       | Impacted? |
56 | | ----------------------------- | ---------------- | ---------- | --------- |
57 | | group or team name            | key contact name | date       | ? |
58 | 
59 | 
60 | ## References
61 | 
62 | * optional bulleted list
63 | 
64 | ## Reviews
65 | 
66 | | Reviewed by                   | Date       | Notes |
67 | | ----------------------------- | ---------  | ------|
68 | | name                          | date       | ? |
69 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0001-use-architecture-decision-records-for-open-data-hub.md:
--------------------------------------------------------------------------------
 1 | # Use Architecture Decision Records for Open Data Hub 
 2 | 
 3 | |                |            |
 4 | | -------------- | ---------- |
 5 | | Date           | 2023-02-20 |
 6 | | Scope          | Open Data Hub |
 7 | | Status         | Draft |
 8 | | Authors        | [Greg Sheremeta](@gregsheremeta) |
 9 | | Supersedes     | N/A |
10 | | Superseded by: | N/A |
11 | | Tickets        | |
12 | | Other docs:    | none |
13 | 
14 | # Open Data Hub Architecture Decision Records
15 | 
16 | "Documenting architectural decisions helps a project succeed by helping current and future contributors understand the reasons for doing things a certain way." [1]
17 | 
18 | ## What is an ADR?
19 | 
20 | An architecture decision record is a short text file in a Markdown format. Each record describes a set of forces and a single decision in response to those forces. [2]
21 | 
22 | An ADR is not a technical design, a team-level internal procedure, or a roadmap. An ADR does not replace detailed technical design documents or good commit messages.
23 | 
24 | ## How
25 | 
26 | We will keep each ADR in a short text file in Markdown format.
27 | 
28 | We will keep ADRs in this repository, https://github.com/opendatahub-io/adr.
29 | 
30 | ADRs will be numbered sequentially and monotonically. Numbers will not be reused.
31 | 
32 | If a decision is reversed, we will keep the old one around, but mark it as superseded. (It's still relevant to know that it was the decision, but is no longer the decision.)
33 | 
34 | We will use a format with just a few parts, so each document is easy to digest.
35 | 
36 | ## Approval Process / Flow
37 | 
38 | TBD
39 | 
40 | ## Reviews
41 | 
42 | | Reviewed by                   | Date       | Notes |
43 | | ----------------------------- | ---------  | ------|
44 | 
45 | ## References
46 | 
47 | * https://www.redhat.com/architect/architecture-decision-records
48 | * https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
49 | * https://github.com/operate-first/blueprint/tree/main/adr
50 | * https://adr.github.io/
51 | * https://docs.aws.amazon.com/prescriptive-guidance/latest/architectural-decision-records/adr-process.html
52 | 
53 | ## Citations
54 | 
55 | * [1] Heiko W. Rupp, https://www.redhat.com/architect/architecture-decision-records
56 | * [2] Michael Nygard, https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
57 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0002-data-science-pipelines-multi-user-approach.md:
--------------------------------------------------------------------------------
 1 | # Data Science Pipelines Multi-User Approach
 2 | 
 3 | |                |            |
 4 | | -------------- | ---------- |
 5 | | Date           | 2023-02-20 |
 6 | | Scope          | Data Science Pipelines Project, within Open Data Hub |
 7 | | Status         | Draft |
 8 | | Authors        | [Greg Sheremeta](@gregsheremeta) |
 9 | | Supersedes     | N/A |
10 | | Superseded by: | N/A |
11 | | Tickets        | |
12 | | Other docs:    | none |
13 | 
14 | ## What
15 | 
16 | This decision document is proposing an approach for integrating [Data Science Pipelines](https://github.com/opendatahub-io/data-science-pipelines) (upstream = [Kubeflow Pipelines](https://github.com/kubeflow/kfp-tekton)) in a multi-user environment.
17 | 
18 | ## Why
19 | 
20 | Out of the box, [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) comes with [multi-user isolation](https://www.kubeflow.org/docs/components/pipelines/v1/overview/multi-user/) that utilizes the Kubeflow concept of [Profiles](https://www.kubeflow.org/docs/components/multi-tenancy/overview/). We’re not using Profiles in Open Data Hub. Additionally, Kubeflow Pipelines [technical implementation](https://www.kubeflow.org/docs/components/multi-tenancy/design/) for multi-user isolation requires the installation of Istio and Dex. Istio in particular is a heavy dependency that we’d rather not require either.
21 | 
22 | ## Goals
23 | 
24 | * Time pressure – implement a working solution for multi-user isolation and have it production ready by the beginning of April 2023.
25 | * Implement a solution that takes into account supportability, operability, and SLAs in a managed services environment. We prefer a solution that will page SREs less over one that could potentially page SREs more.
26 | * Utilize well-known Kubernetes concepts (RBAC, namespaces) to implement multi-user isolation.
27 | * Ensure that our solution doesn’t make it difficult to migrate to Kubeflow Pipelines V2 when it’s released.
28 | * Ensure that our solution stays consistent enough with upstream such that we can rebase on upstream frequently and that we can eventually make meaningful contributions to upstream.
29 | 
30 | ## Non-Goals
31 | 
32 | * No plans to make Data Science Pipelines concepts work in a cross-namespace fashion. If a user has access to namespaces Namespace1 and Namespace2, a Component in Namespace1 cannot work with a Component in Namespace2 to comprise a Pipeline.
33 | 
34 | ## How
35 | 
36 | Instead of using Kubeflow Pipelines’ out of the box [Multi-user isolation](https://www.kubeflow.org/docs/components/pipelines/v1/overview/multi-user/), we propose to roll out multiple individual single-user Kubeflow Pipelines stack into multiple namespaces, one for each Data Science Project. We’ll create a new operator called [data-science-pipelines-operator](https://github.com/opendatahub-io/data-science-pipelines-operator) to roll out and maintain these multiple stacks. (The rest of the ADR will refer to this approach as the “multi-stack approach”.)
37 | 
38 | ## Open Questions
39 | 
40 | The Data Science Pipelines stack requires access to object storage (default is Minio) and a relational database (default is MySQL). For both Open Data Hub running on-prem and as a managed service, it’s unclear whether we’re better off using one central multi-tenant database and object store, or whether each stack should have its own individual single-tenant database and object store. Our chosen design allows for either approach.
41 | 
42 | ## Alternatives
43 | 
44 | 1. Use the Kubeflow Pipelines [multi-user isolation](https://www.kubeflow.org/docs/components/pipelines/v1/overview/multi-user/) as it exists, out of the box, including Istio and Dex and Profiles.
45 |     * This is less work upfront for us to implement, but Istio is a complex and heavy dependency that we don’t want to require. It’s possible the Open Data Hub will require Istio in the future, but we don’t want to force that inclusion now if we don’t have to. We also don’t have the concept of Profiles in Open Data Hub, and it would be difficult to shoehorn that concept in. We dismissed this option early on.
46 | 2. Use the Kubeflow Pipelines [multi-user isolation](https://www.kubeflow.org/docs/components/pipelines/v1/overview/multi-user/) as it exists, out of the box, but **remove** the requirement for Istio and Dex. Replace these authz components with an oauth proxy that sets the HTTP header that the Kubeflow Pipelines components use to authorize users.
47 |     * We have a [proof of concept](https://github.com/HumairAK/odh-manifests/blob/rhods-auth/data-science-pipelines/base/auth/AUTH_NOTES.md) of this working, but after weighing the tradeoffs with the multi-stack approach, we decided that the multi-stack approach fits our goals and requirements better:
48 |         * supportability, operability, and SLAs in a managed services environment – while there is more surface area to monitor and potentially more things that can break with multiple stacks running, the impact of one single-tenant stack breaking is much less than the impact of the only multi-user stack breaking. In other words, we believe that the multi-stack approach will make it easier for us to meet SLOs and SLAs in a managed environment.
49 |         * Ensure that our solution stays consistent with upstream – we felt that utilizing the upstream multi-user solution but modifying it by removing Istio left us farther away from upstream than our multi-stack approach. In the multi-stack approach we’re deploying multiple single-tenant stacks and not modifying them – we’re wrapping them with an operator.
50 |         * Time pressure – we were originally concerned that writing a custom operator to maintain the multi-stack approach would cause a risk to our deadline, but we were able to create a working proof of concept of our operator within a week. It is true, though, that our multi-stack approach requires us to maintain custom code (an operator written in go) whereas using Alternative 2 would have merely required us to maintain custom manifests.
51 | 
52 | ## Security and Privacy Considerations
53 | 
54 | * Because the multi-stack approach relies on Kubernetes RBAC and namespaces, we inherit the security and privacy benefits around multi-user authorization inherent in Kubernetes itself.
55 | * We still have an open question around the object storage and relational database requirements, and our decision there has security and privacy considerations. If we use a single shared database, we would specify separate databases within the provider/database instance for each stack, so users’ Data Science Project data would not be stored alongside each other in the same tables. This is in contrast to the single-stack solution, which would have one provider and one shared database within the provider, so users’ data would be stored within the same tables. If we use multiple databases (one per stack), then the data is a little more separated, but we don’t see this as advantageous over multiple databases within one provider/database instance.
56 | 
57 | ## Risks
58 | 
59 | * The implementing team as a whole doesn’t have deep experience with building operators.
60 | 
61 | ## Stakeholder Impacts
62 | 
63 | | Group                         | Key Contact      | Date       | Impacted? |
64 | | ----------------------------- | ---------------  | ---------- | --------- |
65 | | Open Data Hub UI Console team | Andrew Ballantyne| 2023-02-20 | ? |
66 | 
67 | 
68 | ## References
69 | 
70 | * https://github.com/opendatahub-io/data-science-pipelines
71 | * https://www.kubeflow.org/docs/components/pipelines/v1/overview/multi-user/
72 | * https://www.kubeflow.org/docs/components/pipelines/
73 | 
74 | ## Reviews
75 | 
76 | | Reviewed by                   | Date       | Notes |
77 | | ----------------------------- | ---------  | ------|
78 | | Anish Asthana                 | 2023-02-13 | lgtm  |
79 | | Humai Khan                    | 2023-02-10 | lgtm  |
80 | 
81 | ## Accept / Reject
82 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0003-use-apache-2-0-licence.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - ODH-ADR-0003 - Open Data Hub default licence
 2 | 
 3 | |                |            |
 4 | | -------------- | ---------- |
 5 | | Date           | 11-April-2023
 6 | | Scope          | Open Data Hub default licence |
 7 | | Status         | Accepted |
 8 | | Authors        | [Greg Sheremeta](@gregsheremeta) |
 9 | | Supersedes     | N/A |
10 | | Superseded by: | N/A |
11 | | Tickets        | N/A |
12 | | Other docs:    | none |
13 | 
14 | ## What
15 | 
16 | This ADR captures our decision to license Open Data Hub under the Apache 2.0 license going forward.
17 | 
18 | ## Why
19 | 
20 | Historically, Open Data Hub had standardized on using the GPLv3 license for all new code repositories. Over time, we lost track of the original reasons for selecting GPLv3. When the original decision was made, Open Data Hub was structured differently and was more focused on providing an open source reference architecture, so the reasons for the previous decision no longer apply.
21 | 
22 | The current engineers and project team members who build Open Data Hub are in the best place to select the best license for the project.
23 | 
24 | While GPLv3 and Apache 2.0 are both acceptable choices for Open Data Hub, we believe that selecting the Apache 2.0 license will better align Open Data Hub with other projects in the Machine Learning domain. We also believe that selecting the Apache 2.0 license will encourage more open source contributions to Open Data Hub.
25 | 
26 | We did an inventory of the licenses in use in the ODH-included projects and also some popular peer projects, and we found that 100% of those we looked at use a permissive licence, and the vast majority of those use Apache 2.0:
27 | 
28 | |                |            |
29 | | -------------- | ---------- |
30 | | Jupyter | permissive, 3-clause BSD |
31 | | Kubeflow | permissive, Apache 2.0 |
32 | | KFP-Tekton | permissive, Apache 2.0 |
33 | | Tekton | permissive, Apache 2.0 |
34 | | CodeFlare | permissive, Apache 2.0 |
35 | | Ray | permissive, Apache 2.0 |
36 | | Elyra | permissive, Apache 2.0 |
37 | | Modelmesh | permissive, Apache 2.0 |
38 | | Pytorch | permissive, Apache-like, mostly Apache 2.0 headers |
39 | | Tensorflow | permissive, Apache 2.0 |
40 | | Keras | permissive, Apache 2.0 |
41 | | Spark | permissive, Apache 2.0 |
42 | | AutoML | permissive, Apache 2.0 |
43 | | Scikit-image | permissive, mostly 3-clause BSD |
44 | | Scikit-learn | permissive, 3-clause BSD |
45 | | Pandas | permissive, 3-clause BSD |
46 | | MXNet | permissive, Apache 2.0 |
47 | 
48 | ## Goals
49 | 
50 | * Recognize that the Machine Learning community uses permissive licenses (mostly Apache 2.0) as the de facto standard, and strive to align Open Data Hub to match that de facto standard.
51 | * Capture the decision that Open Data Hub will be licensed under the Apache 2.0 license going forward.
52 | * Relicense any existing Open Data Hub-specfic repositories that are currently GPLv3 (such as Data Science Pipelines Operator) to Apache 2.0.
53 | 
54 | ## Non-Goals
55 | 
56 | * We are not changing the license of any repositories included in Open Data Hub that are direct copies or forks of some other repository outside of Open Data Hub. Those must retain their existing licenses.
57 | 
58 | ## How
59 | 
60 | * Publish this ADR as a proposed ADR.
61 | * Have a week of commentary period for the Open Data Hub Community to ask questions and provide feedback.
62 | * Assuming there is no Community dissent, we will move this ADR to accepted, change the affected repositories, and announce the change as being completed.
63 | 
64 | ## Open Questions
65 | 
66 | We're leaving this ADR as "proposed" for a period of time so that the Open Data Hub Community can comment.
67 | 
68 | ## Alternatives
69 | 
70 | The primary alternative is to not do anything and leave the default license as GPLv3. However, this has caused some confusion recently because the Machine Learning community has mostly adopted Apache 2.0 as discussed above.
71 | 
72 | Another alternative is to use a permissive license other than Apache 2.0. However, also as stated above, our goal is to be consistent with the Machine Learning community, and the community has mostly adopted Apache 2.0.
73 | 
74 | ## Security and Privacy Considerations
75 | 
76 | n/a
77 | 
78 | ## Risks
79 | 
80 | n/a for technical risks.
81 | 
82 | If there have been any non-trivial contributions to Open Data Hub that were made with the author's understanding that they were contributing under GPLv3, we need to get their permission to change the license on their contributions. We're not currently aware of any such contributions where the auther would not approve the relicensing to Apache 2.0.
83 | 
84 | ## Stakeholder Impacts
85 | 
86 | n/a
87 | 
88 | ## References
89 | 
90 | * n/a
91 | 
92 | ## Reviews
93 | 
94 | | Reviewed by                   | Date       | Notes |
95 | | ----------------------------- | ---------  | ------|
96 | | Open Data Hub Community       | 2023-04-22 |  Accepted |
97 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0005-github-labels-standards.md:
--------------------------------------------------------------------------------
 1 | # GitHub Label Standard for opendatahub-io organization
 2 | 
 3 | |                |            |
 4 | | -------------- | ---------- |
 5 | | Date           | 2023-04-14 |
 6 | | Scope          | |
 7 | | Status         | Accepted |
 8 | | Authors        | [Landon LaSmith](@lavlas) |
 9 | | Supersedes     | N/A |
10 | | Superseded by: | N/A |
11 | | Tickets        | |
12 | | Other docs:    | none |
13 | 
14 | ## What
15 | 
16 | This document will set the the organization standards for the core set of GitHub [Issue Labels](https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels) that should be supported by every repository in the opendatahub-io organziation.
17 | 
18 | ## Why
19 | 
20 | [opendatahub-io](https://github.com/opendatahub-io) needs a unified Issue workflow that can support common [queries](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) for all Issue and Pull Request metadata from any repository in the opendatahub-io organization. To support this unified workflow, the required label names must match across all repositories.
21 | 
22 | ## Goals
23 | 
24 | * Standardize an issue workflow that will be used to show the Issue lifecycle, ownership and category
25 | * Define the label name standard that covers the universal states and metadata that is relevant to the entire opendatahub-io organization
26 |   * Type: Bug, Feature, Documentation, tracker 
27 |   * Status: To Do, In Progess, In Review, Closed 
28 |   * Priority 
29 |   * ODH Component: ODH Operator, Notebook Controller, Notebooks, Manifests, Data Science Pipelines, Model Serving, AI Explainability, ...
30 |   * SIG: Platform, ML Ops, Developer Experience
31 |   * Extra: Good First Issue
32 | * Use the `opendatahub-community` repository that will become the centralized Issue triaging location for ODH Component owners to triage new issues and/or transfer to the appropriate component repository
33 | 
34 | ## Non-Goals
35 | 
36 | * This ADR will only define the core set of labels that will be supported across all repositories in opendatahub-io.
37 |   The labels outlined in this document will only be a subset of the available labels in any given repo and will not contain any labels that are isolatedj to an individual repostory workflow
38 | * This is not a mandate that every component or SIG must use the centralized `opendatahub-community` repository to manage the lifecyle of issues relevant to their workflow
39 | 
40 | ## How
41 | 
42 | Across all repositories in the opendatahub-io organization, we will create the labels below with the expectation that the workflows they outline will be universal across all opendatahub-io repositories
43 | 
44 | | Label                         | Description     |
45 | | ----------------------------- | ---------------- |
46 | | `tracker`             | Non-completable ticket; used for tracking work - akin to a Jira Epic |
47 | | `untriaged`           | Indicates the newly created issue has not been triaged yet |
48 | | `kind/bug`            | Indicates an unexpected problem or unintended behavior |
49 | | `kind/enhancement`    | New functionality request (existing augments or new additions) |
50 | | `kind/documentation`  | Improvements or additions to documentation |
51 | | `kind/security`       | Indicates that this is a security issue that should be addressed |
52 | | `needinfo`            | Further information is requested to unblock any progress on the issue |
53 | | `priority/normal`     | An issue with the product; fix when possible                |
54 | | `priority/blocker`    | Critical issue that needs to be fixed asap; blocks up coming releases                |
55 | | `priority/low`        | An issue with the product that doesn't impact the user much or not at all (ie tech debt)    |
56 | | `priority/high`       | Important issue that needs to be resolved asap. Releases should not have too many of these. |
57 | | `good-first-issue`    | Good for newcomers |
58 | | `odh-component/*`     | Name of the odh-component that owns this issue.  This should be used as the indicator for which component should own this issue if located in the centralized `opendatahub-community` repository |
59 | | `sig/*`               | Name of the Special Interest Group in charge of this subject matter|
60 | | `wg/*`                | Name of the Working Group that should be assigned to this issue |
61 | 
62 | Additional labels maybe reserved depending on certain bots or apps running in the organization.
63 | 
64 | It is assumed that all new issues will have the `untriaged` label until it is reviewed.  Once an issue is triaged, a `kind/*` and `priority/*` label should be added and the issue will follow the issue worklow outlined in the component repo where it is located.
65 | 
66 | ## Alternatives
67 | 
68 | Alternative is to allow each SIG, WG, Maintainer, ... to use their own label naming system which would complicate attempts when querying or filtering on labels that have the same purpose but different name structure: `kind/bug` vs `kind::bug` vs `bug`
69 | 
70 | ## Stakeholder Impacts
71 | 
72 | | Group                         | Key Contacts     | Date       | Impacted? |
73 | | ----------------------------- | ---------------- | ---------- | --------- |
74 | | Platform SIG                  |                  | 2023-05-31 |   yes     |
75 | | MLOps SIG                     |                  | 2023-05-31 |   yes     |
76 | | Developer Experience SIG      |                  | 2023-05-31 |   yes     |
77 | 
78 | ## References
79 | 
80 | * Kubernetes Contributors Documentation
81 |   * [Issue Triage Guidelines](https://www.kubernetes.dev/docs/guide/issue-triage/)
82 |   * [Help Wanted and Good First Issue Labels](https://www.kubernetes.dev/docs/guide/help-wanted/#good-first-issue)
83 | 
84 | ## Reviews
85 | GitHub Approvals will function as reviews
86 | 


--------------------------------------------------------------------------------
/architecture-decision-records/ODH-ADR-0006-organization-membership-automation.md:
--------------------------------------------------------------------------------
  1 | # Codification of Open Data Hub GitHub organization membership
  2 | 
  3 | <!-- copy and paste this template to start authoring your own ADR -->
  4 | <!-- for the Status of new ADRs, please use Approved, since it will be approved by the time it is merged -->
  5 | <!-- remove this comment block too -->
  6 | 
  7 | |                |            |
  8 | | -------------- | ---------- |
  9 | | Date           | 2024-08-12 |
 10 | | Scope          | |
 11 | | Status         | Approved |
 12 | | Authors        | [Alex Corvin](@accorvin) |
 13 | | Supersedes     | N/A |
 14 | | Superseded by: | N/A |
 15 | | Tickets        | |
 16 | | Other docs:    | none |
 17 | 
 18 | ## What
 19 | 
 20 | We will use codify membership in the OpenDataHub-io GitHub organization using
 21 | [Peribolos](https://docs.prow.k8s.io/docs/components/cli-tools/peribolos/). We will
 22 | automate the process of applying this membership using GitHub actions.
 23 | 
 24 | ## Why
 25 | 
 26 | This change is being made in the context of broader changes to ensure that the OpenDataHub code
 27 | base is secure and minimally vulnerable to malicious actors. As part of this effort, we plan to
 28 | reduce the set of organization owners to a very small set of individuals. We do not want this
 29 | change to result in a bottleneck for managing organization membership, and thus want to enable
 30 | individual teams to manage membership themselves.
 31 | 
 32 | ## Goals
 33 | 
 34 | * Create an easy method to add and remove organizational members in a way that is self-service to teams
 35 | * Ensure that we can have a small set of organizational owners
 36 | * Implement a reliable process which won't have a high administrative burden
 37 | 
 38 | ## Non-Goals
 39 | 
 40 | * We will not automate GitHub team membership or permissions on individual repositories.
 41 |   We want this automation to be as minimal as possible so that teams can continue using the
 42 |   GitHub native interface for as much as possible
 43 | 
 44 | ## How
 45 | 
 46 | We will revive the [org-management](https://github.com/opendatahub-io/org-management) repository
 47 | which will contain a streamlined configuration file listing organization owners and members. We will
 48 | not use this file to list Organization teams, repositories, or teams' permissions on individual repositories.
 49 | Management of these items will continue to be manual.
 50 | 
 51 | We will create a new GitHub team in the opendatahub-io org called `Org Membership Maintainers` which
 52 | will include all managers and development leads. This team will be used in a CODEOWNERS file in the
 53 | org-management repo and have permission to approve and merge pull requests to the membership configuration file.
 54 | 
 55 | With this change in place, going forward we will use a pull request flow to add and remove
 56 | members to the organization. Individuals (either the individual requesting membership or somone acting
 57 | on their behalf) will open a pull request modifying the membership, which a member of the the
 58 | `Org Membership Maintainers` team will then approve.
 59 | 
 60 | ## Open Questions
 61 | 
 62 | N/A
 63 | 
 64 | ## Alternatives
 65 | 
 66 | * Continue with the current process of organization owners manually making membership changes - this
 67 |   is seen as not a viable alternative as our current set of org owners is seen as too large and therefore
 68 |   exposes us to security vulnerabilities
 69 | * Implement a custom organization role that enables only management or org members - this is hypothetically
 70 |   possible, but we have not tested this. Custom organization roles require GitHub enterprise which we
 71 |   do not currently have funding to adopt.
 72 | 
 73 | ## Security and Privacy Considerations
 74 | 
 75 | * With this change we make the full set or organization owners and members public in the config file
 76 | * We will need to use a GitHub personal access tokken for us in the automation. This will need
 77 |   to be periodically renewed.
 78 | 
 79 | ## Risks
 80 | 
 81 | * The automation will need to be maintained.
 82 |   * We expect the DevOps team to own this automation as part of their long term plans to automate
 83 |     membership in the Red-Hat-Data-Services org as required for Konflux.
 84 | * When we previously attempted to use Peribolos the automation was flaky and a constant point
 85 |   of frustration. To mitigate this:
 86 |     * We will use Peribolos only for managing organization membership, not teams or repository permissions.
 87 |       We feel that these latter items are more impactful to individual teams, and we'll leave manual control
 88 |       of these with the teams.
 89 |     * The previous implementation of Peribolos was not clearly communicated and therefore
 90 |       never fully adopted. This ADR is an attempt to communicate this more fully.
 91 | 
 92 | ## Stakeholder Impacts
 93 | 
 94 | | Group                            | Key Contacts                            | Date        | Impacted? |
 95 | | -------------------------------- | ---------------------------------------- | ---------- | --------- |
 96 | | Architects Team                  | @opendatahub-io/architects               |            | y |
 97 | | Documentation Team               | @opendatahub-io/documentation            |            | y |
 98 | | Exploring Team                   | @opendatahub-io/exploring-team           |            | y |
 99 | | Model Serving Team               | @opendatahub-io/model-serving            |            | y |
100 | | Training & Experimentation Team  | @opendatahub-io/training-experimentation |            | y |
101 | | Platform Team                    | @opendatahub-io/platform                 |            | y |
102 | 
103 | ## References
104 | 
105 | * [Peribolos](https://docs.prow.k8s.io/docs/components/cli-tools/peribolos/
106 | 
107 | ## Reviews
108 | 
109 | | Reviewed by                   | Date       | Notes |
110 | | ----------------------------- | ---------  | ------|
111 | | name                          | date       | ? |
112 | 
113 | We will not use this table for reviews. Instead, approval on the pull request
114 | adding this ADR will be used as reviews.


--------------------------------------------------------------------------------
/architecture-decision-records/README.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub Architecture Decision Records
 2 | 
 3 | "Documenting architectural decisions helps a project succeed by helping current and future contributors understand the reasons for doing things a certain way." [1]
 4 | 
 5 | ## What is an ADR?
 6 | 
 7 | An architecture decision record is a short text file in a Markdown format. Each record describes a set of forces and a single decision in response to those forces. [2]
 8 | 
 9 | An ADR is not a technical design, a team-level internal procedure, or a roadmap. An ADR does not replace detailed technical design documents or good commit messages.
10 | 
11 | ## How
12 | 
13 | We will keep each ADR in a short text file in Markdown format.
14 | 
15 | We will keep ADRs in this repository, https://github.com/opendatahub-io/architecture-decision-records .
16 | 
17 | ADRs will be numbered sequentially and monotonically. Numbers will not be reused.
18 | 
19 | If a decision is reversed, we will keep the old one around, but mark it as superseded. (It's still relevant to know that it was the decision, but is no longer the decision.)
20 | 
21 | We will use a format with just a few parts, so each document is easy to digest.
22 | 
23 | ## References
24 | 
25 | * https://www.redhat.com/architect/architecture-decision-records
26 | * https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
27 | * https://github.com/operate-first/blueprint/tree/main/adr
28 | * https://adr.github.io/
29 | * https://docs.aws.amazon.com/prescriptive-guidance/latest/architectural-decision-records/adr-process.html
30 | 
31 | ## Citations
32 | 
33 | * [1] Heiko W. Rupp, https://www.redhat.com/architect/architecture-decision-records
34 | * [2] Michael Nygard, https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
35 | 


--------------------------------------------------------------------------------
/architecture-decision-records/data-science-pipelines/ODH-ADR-DSP-0001-data-science-pipelines-upgrade-testing-strategy.md:
--------------------------------------------------------------------------------
 1 | # Upgrade Testing Process for Data Science Pipelines (DSP)
 2 | 
 3 | |                |                            |
 4 | | -------------- | -------------------------- |
 5 | | Date           | 2023-07-05                 |
 6 | | Scope          |                            |
 7 | | Status         | Accepted                   |
 8 | | Authors        | [Dharmit Dalvi](@DharmitD) |
 9 | | Supersedes     | N/A                        |
10 | | Superseded by: | N/A                        |
11 | | Tickets        |                            |
12 | | Other docs:    | none                       |
13 | 
14 | ## What
15 | 
16 | This document outlines the upgrade testing process for Data Science Pipelines (DSP). The process involves upgrading DSP from a latest released version to a version with the newest commit in main (or a tag version).
17 | 
18 | ## Why
19 | 
20 | The upgrade testing process is crucial to ensure the successful upgrade of DSP versions and to identify any issues early on. By following a standardized testing approach, we can automate the process and provide continuous feedback on the stability and functionality of the upgraded DSP versions.
21 | 
22 | ## Goals
23 | 
24 | * Regularly test the success of DSP version upgrades using the most recent commit or tag.
25 | * Detect issues early and provide continuous feedback on the stability and functionality of the upgraded DSP versions.
26 | * Incorporate upgrade testing into the regular release strategy to ensure smooth transitions between released and unreleased DSP versions.
27 | 
28 | ## Non-Goals
29 | 
30 | * This ADR does not cover the implementation details such as tooling that ought to be used. Those aspects can be tailored according to specific requirements.
31 | 
32 | ## How
33 | 
34 | The update testing strategy should account for all pre-requisites for deploying DSP, then perform an upgrade, and any follow up tests to confirm the upgrade was successful. The following outlines the design for such a process: 
35 | 
36 | 1. Set up a Test Environment:
37 |    - Provision a test Kubernetes cluster or an OpenShift cluster.
38 |    - Ensure cluster admin privileges are available to perform the following steps.
39 | 
40 | 2. Install Open Data Hub (ODH) Operator.
41 | 
42 | 3. Deploy KfDef Core for the most recently released DSPO Version:
43 |    - Use the KfDef manifest to deploy the core components of the most recently released DSP version.
44 |    - Deploy a DSPA instance
45 |    
46 | 4. Prepare for Upgrade Testing:
47 |    - Determine the candidate version for the DSP upgrade. It could be a tag, branch or a commit.
48 |    - Update the KfDef manifest for the candidate DSP version by configuring DSPO to point to the latest commit or a tag for that version.
49 | 
50 | 5. Deploy KfDef Core for the candidate DSP Version:
51 |    - Use the updated KfDef manifest to deploy the core components for the candidate DSPO version.
52 |    - Deploy a DSPA instance
53 |    
54 | 6. Run Upgrade Tests:
55 |    - Execute tests specific to testing the success of the DSP version upgrade. Examples of test cases could include checking if resources such as DSPO and DSPA deployments, ServiceMonitors, etc., come up correctly.
56 |    - Customize the tests according to specific requirements and use cases.
57 | 
58 | ## Automation Considerations
59 | 
60 |    - Set up a workflow to automate the upgrade testing process.
61 |    - Configure the workflow to trigger every night asynchronously using the most recent commit of the DSP upgrade branch.
62 |    - Within the workflow, deploy the updated KfDef Core for the candidate DSP version and run the upgrade tests.
63 |    - Collect the test results, including logs, error messages, and any relevant information.
64 | 
65 | ## Open Questions
66 | 
67 | 1. Impact on Running Pipelines:
68 |    - Consider the impact of upgrading DSP on currently running pipelines.
69 |    - Assess how the upgrade process might affect pipeline execution, ongoing workflows, and pipeline outcomes.
70 |    - Plan for any necessary adjustments or mitigations to ensure the smooth functioning of ongoing pipelines during the upgrade process.
71 |    - Efforts to address this question to be tracked in [this issue.](https://github.com/opendatahub-io/data-science-pipelines-operator/issues/217)
72 | 
73 | ## Security and Privacy Considerations
74 | 
75 | No security and privacy considerations identified for the upgrade testing process.
76 | 
77 | ## Risks
78 | 
79 | 1. ODH Operator Redesign:
80 |    - As per the input from the operator team, updating manifests will not be officially supported in the ODH Operator.
81 |    - In development mode, users may have the option to update the manifest URI, which can enable testing of upgrades. However, this approach may not be officially supported and could introduce potential inconsistencies or issues.
82 |    - There is a risk that relying on the manifest update capability in development mode may not align with the desired upgrade testing process or may not be reliable in a production environment.
83 |    
84 | 2. The implementing team may face challenges in building and maintaining the automated upgrade testing process.
85 | 
86 | ## Reviews
87 | 
88 | | Reviewed by                   | Date            | Notes |
89 | | ----------------------------- | --------------- | ------|
90 | | Achyut M.                     | July 19th, 2023 | --    |
91 | | Greg S.                       | July 20th, 2023 | --    |
92 | | Giulio F.                     | July 21st, 2023 | --    |
93 | | Humair K.                     | July 18th, 2023 | --    |
94 | 


--------------------------------------------------------------------------------
/architecture-decision-records/distributed-workloads/ODH-ADR-DW-0001-determine-codeflare-deployment-strategy.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Determine CodeFlare Deployment Strategy
 2 | 
 3 | |                |                                                                                      |
 4 | | -------------- | ------------------------------------------------------------------------------------ |
 5 | | Date           | September 22, 2023                                                                   |
 6 | | Scope          | Distributed Workloads                                                                |
 7 | | Status         | Review                                                                               |
 8 | | Authors        | [Anish Asthana](@anishasthana)                                                       |
 9 | | Supersedes     | N/A                                                                                  |
10 | | Superseded by: | N/A                                                                                  |
11 | | Tickets        | [Tracking Issue](https://github.com/project-codeflare/codeflare-operator/issues/314) |
12 | | Other docs:    | none                                                                                 |
13 | 
14 | ## What
15 | 
16 | We will no longer use an OLM installed CodeFlare operator to provide distributed workloads capabilities in ODH.
17 | 
18 | ## Why
19 | 
20 | The Operator of Operators pattern followed by the ODH has ODH responsible for deploying CRDs and controllers for components under its umbrella. CodeFlare has taken a different approach, where we have:
21 | 
22 | 1. A published CodeFlare Operator (CFO) is available in community operator hub
23 |     1. Users need to manually subscribe to this operator
24 | 2. ODH creates configurations for CFO as part of the Data Science Cluster custom resource
25 |     1. If the CFO does not exist on the cluster, ODH Operator will expose a status requiring users to manually subscribe to the CFO.
26 | 
27 | The above flow results in confusion for users – we have seen multiple instances of folks not subscribing to the CFO and coming to community channels with “issues”. Additionally, this results in CodeFlare diverging from other ODH components.
28 | 
29 | ## Goals
30 | 
31 | * Simplify CodeFlare usage experience for users
32 | 
33 | ## Non-Goals
34 | 
35 | * Changing installation path for upstream CodeFlare project
36 | 
37 | ## How
38 | 
39 | 1. We will create a fork of the CodeFlare Operator repository in the ODH organization
40 |    1. This fork will be kept in sync with upstream CodeFlare. There are no plans for code to diverge
41 |    2. This repository will now serve as the home for CodeFlare Operator code, manifests, and CRDs in ODH.
42 |       1. Having a fork allows us to have an ODH-controlled repository for new image builds and manifest references, allowing us to better version the CodeFlare stack in ODH.
43 | 2. CodeFlare CRDs will initially be included alongside the CFO manifests
44 |    1. They will eventually be moved into the ODH Operator bundle.
45 |    2. An implication of this is that ODH and community olm CFO can not be installed on the same cluster.
46 | 3. This fork will be synced with upstream every time there is an upstream release
47 | 4. As we also have a fork of KubeRay in the ODH organization, we can follow a similar process for KubeRay. Given this, we can probably delete all manifests from the distributed workloads repository.
48 | 
49 | This would simplify the user experience greatly as users simply need to enable CodeFlare in the DSC, with ODH taking care of the rest. The above flow also brings CodeFlare closer in line with other ODH components. From a testing perspective, all of our existing test cases will still be useful. The only ones that won’t carry over without updates are the existing olm upgrade tests in the upstream CFO repository.
50 | 
51 | Another benefit to the above approach is better controls around the versioning of CodeFlare. In the event of a CodeFlare community operator release, existing ODH users are not at risk of being auto-updated before the ODH changes are ready.
52 | 
53 | ## Questions
54 | 
55 | 1. Should we delete the distributed workloads repository altogether?
56 |     1. This requires us to have a landing place for distributed workloads documentation.
57 |     2. [GitHub Issue](https://github.com/red-hat-data-services/distributed-workloads/issues/25)
58 | 
59 | ## Alternatives
60 | 
61 | 1. Continue to use OLM for CodeFlare
62 |     1. This is not a great user experience as users need to manually subscribe to dependent operators. The ODH operator is currently not planning to include manage of subscription to dependent operators
63 |     2. This will result in us continuing to differ from other ODH components, which could result in other unforeseen issues popping up in the future.
64 | 
65 | ## Stakeholder Impacts
66 | 
67 | | Group                 | Key Contacts   | Date          | Impacted? |
68 | | --------------------- | -------------- | ------------- | --------- |
69 | | Distributed Workloads | Anish Asthana  | Sept 22, 2023 | Yes       |
70 | | ODH Operator          | Vaishnavi Hire | Oct 4, 2023   | Yes       |
71 | 
72 | ## Reviews
73 | 
74 | | Reviewed by        | Date          | Approval | Notes                                                                                                                                                                                                                                                                                                                                                                             |
75 | | ------------------ | ------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
76 | | Antonin Stefanutti | Sept 22, 2023 | Approved |                                                                                                                                                                                                                                                                                                                                                                                   |
77 | | Dimitri Saridakis  | Sept 29, 2023 | Approved |                                                                                                                                                                                                                                                                                                                                                                                   |
78 | | Karel Suta         | Sept 26, 2023 | Approved |                                                                                                                                                                                                                                                                                                                                                                                   |
79 | | Jessica Forrester  | Oct 02, 2023  | Approved | If the team feels comfortable with the overhead of carrying the community operator and the slightly different install path downstream, then this is the right customer experience from my perspective. Whether the CRDs are included in the operator bundle needs to be settled, but I’d recommended sticking with whatever pattern the operator has already established for now. |
80 | | Daniele Zonca      | Oct 02, 2023  | Approved | I think we need to revisit how we integrate components into ODH to limit the need of having a fork “just” to simplify user experience. But this requires work that is not in the scope of this ADR so I’m fine to proceed with this proposal assuming DW team is fine maintaining this “fork”                                                                                     |
81 | | Greg Sheremata     | Sept 27, 2023 | Neutral  | +1 to not making users click extra buttons in operatorhub. I’m agnostic to the implementation details described here, and thus not explicitly marking my review as an “approval”.                                                                                                                                                                                                 |
82 | | Edson Tirelli      | Sept 26, 2023 | Approved | Ideally this change is also coordinated with changes in the ODH operator, to support cases where the codeflare operator is pre-installed by the user (“managementState: unmanaged”). Also, please ensure the “fork” is only a snapshot for supporting/image building purposes and not an actual fork with diverging code.                                                         |
83 | | Vaishnavi Hire     | Oct 4, 2023   | Approved | No additional notes                                                                                                                                                                                                                                                                                                                                                               |
84 | 


--------------------------------------------------------------------------------
/architecture-decision-records/explainability/ODH-ADR-XAI-0001-trustyaiservice-database-configuration.md:
--------------------------------------------------------------------------------
  1 | # TrustyAI service database configuration
  2 | 
  3 | |                |                                                                  |
  4 | | -------------- |------------------------------------------------------------------|
  5 | | Date           | 8 Jun 2024                                                       |
  6 | | Scope          | Explainability                                                   |
  7 | | Status         | Approved                                                         |
  8 | | Authors        | [Rui Vieira](https://github.com/ruivieira)                       |
  9 | | Supersedes     | N/A                                                              |
 10 | | Superseded by: | N/A                                                              |
 11 | | Tickets        | [Tracking Issue](https://issues.redhat.com/browse/RHOAIENG-8178) |
 12 | | Other docs:    | none                                                             |
 13 | 
 14 | 
 15 | ## What
 16 | 
 17 | This document describes the decision to add configuration options to the `TrustyAIService` (TAS) Custom Resource Definition (CRD) in order to support database backends.
 18 | 
 19 | ## Why
 20 | 
 21 | TASs require a way to store data. Currently, TASs store data in files in a Persistent Volume Claim (PVC). This is a simple and effective way to store data, but it has limitations.
 22 | Implementing a database backend will allow TASs to store data in a more efficient, secure and scalable way.
 23 | 
 24 | In order to configure the TAS/Database communication new fields need to be added to the TAS CRD.
 25 | 
 26 | We will distinguish the following modes throughout this document:
 27 | 
 28 | * **PVC-mode**: the currently supported mode. Data is stored in files in a PVC
 29 | * **DB-mode**: future mode where data is stored in a database.
 30 | 
 31 | ## Goals
 32 | 
 33 | This ADR aims to:
 34 | 
 35 | * Define required fields to configure a database connection for TASs
 36 | * Provide backwards compatibility with previous CRs (PVC-mode)
 37 | * Define custom DB connections using the TAS CRD
 38 | 
 39 | ## Non-Goals
 40 | 
 41 | This ADR does **not** aim to:
 42 | 
 43 | * Define database details or architecture beyond the connection level (e.g. schemas) and CRD configuration fields
 44 | * Document how to deploy or manage the database backend. A running database is assumed to be available.
 45 | 
 46 | ## How
 47 | 
 48 | The type of database is abstracted by using Hibernate's ORM. This means that, technically, all Hibernate supported databases should also be transparently supported by the TrustyAI service (although at this stage, only the `mysql` family will be supported). We will focus on the required fields to configure the database connection.
 49 | 
 50 | The current TAS CRD is typically defined as follows:
 51 | 
 52 | ```yaml
 53 | apiVersion: trustyai.opendatahub.io/v1alpha1
 54 | kind: TrustyAIService
 55 | metadata:
 56 |   name: trustyai-service-example
 57 | spec:
 58 |   storage:
 59 |   format: "PVC"
 60 |   folder: "/inputs"
 61 |   size: "1Gi"
 62 |     data:
 63 |   filename: "data.csv"
 64 |   format: "CSV"
 65 |     metrics:
 66 |   schedule: "5s"
 67 |   batchSize: 5000 # Optional, defaults to 5000
 68 | ```
 69 | 
 70 | The proposed configuration introduces the following fields:
 71 | 
 72 | _TAS CR_
 73 | 
 74 | | Field | Mandatory | Note |
 75 | |-------|-----------|------|
 76 | |`databaseConfigurations`|No|Secret name with the connection credentials, service and port. If omitted, operator will look in `$(metadata.name)-db-credentials`|
 77 | 
 78 | _Secret:_
 79 | 
 80 | | Field            | Mandatory | Note |
 81 | |------------------|-----------|------|
 82 | | `databaseKind`     |Yes|The type of the database (only `mysql` supported at this stage)|
 83 | | `databaseUsername` |Yes|Username|
 84 | | `databasePassword` |Yes|Password|
 85 | | `databaseService`  |Yes|The Kubernetes service to communicate with the database|
 86 | |`databasePort`|Yes|The port over which communication is made|
 87 | 
 88 | ### Example
 89 | 
 90 | ```yaml
 91 | apiVersion: trustyai.opendatahub.io/v1alpha1
 92 | kind: TrustyAIService
 93 | metadata:
 94 |   name: trustyai-service-example
 95 | spec:
 96 |   storage:
 97 |   format: "DATABASE"
 98 |   databaseConfigurations: db-credentials # Optional
 99 |     metrics:
100 |   schedule: "5s"
101 |   batchSize: 5000 # Optional, defaults to 5000
102 | ```
103 | 
104 | ```yaml
105 | apiVersion: v1
106 | kind: Secret
107 | metadata:
108 |   name: db-credentials
109 | type: Opaque
110 | data:
111 |   databaseKind: mysql
112 |   databaseUsername: foo
113 |   databasePassword: bar
114 |   databaseService: mariadb-service
115 |   databasePort: 3306
116 | ```
117 | 
118 | From the above example, the operator will:
119 | 
120 | * Read the provided `Secret` with the connection credentials
121 | * If not provided, it will look for a secret named `$(metadata.name)-db-credentials`
122 |   * Use the secret’s `databaseService`, `databasePort`, `databaseKind` and credentials to connect to the database
123 | 
124 | _Following the Principle Of Least Astonishment:_
125 | 
126 | * An invalid database configuration (_e.g._ invalid DB location) will not default to PVC, but instead prevent the TAS to be deployed, with respective feedback information (_e.g_ Kubernetes error Event + condition in the status of the CRD)
127 | * If databaseCredentials is omitted, the operator will look for a Kubernetes Secret in the same namespace with name _$(metadata.name)-db-credentials_.
128 |   * If no such secret is found, the TAS will not be created
129 | 
130 | > **Note**
131 | > _TBD: If the databaseService is of a file type DB (e.g. H2), a PVC will still be created to store the DB._
132 | > _This means that the fields (spec.storage.folder and spec.storage.size still need to be provided._
133 | 
134 | The values in the CRD will be passed to the TAS as environment variables, which will be used to configure the Hibernate connection.
135 | 
136 | ### Scenarios
137 | 
138 | #### PVC-mode
139 | 
140 | The PVC-mode is the only mode currently supported. As such, for backwards compatibility, with the current typical CR
141 | ```yaml
142 | apiVersion: trustyai.opendatahub.io/v1alpha1
143 | kind: TrustyAIService
144 | metadata:
145 |   name: trustyai-service-example
146 | spec:
147 |   storage:
148 |   format: "PVC"
149 |   folder: "/inputs"
150 |   size: "1Gi"
151 |     data:
152 |   filename: "data.csv"
153 |   format: "CSV"
154 |     metrics:
155 |   schedule: "5s"
156 |   batchSize: 5000 # Optional, defaults to 5000
157 | ```
158 | 
159 | **PVC will take precedence over other types**. This means that if other DB related fields are added erroneously, the operator will ignore them and proceed with the PVC-mode.
160 | 
161 | ```yaml
162 | apiVersion: trustyai.opendatahub.io/v1alpha1
163 | kind: TrustyAIService
164 | metadata:
165 |   name: trustyai-service-example
166 | spec:
167 |   storage:
168 |   format: "PVC"
169 |   databaseConfigurations: my-secret
170 |   folder: "/inputs"
171 |   size: "1Gi"
172 |     data:
173 |   filename: "data.csv"
174 |   format: "CSV"
175 |     metrics:
176 |   schedule: "5s"
177 |   batchSize: 5000 # Optional, defaults to 5000
178 | ```
179 | 
180 | The operator will display a warning in the logs, but proceed with the PVC-mode as previously.
181 | 
182 | #### DB-mode
183 | 
184 | ##### Example: MariaDB
185 | 
186 | ```yaml
187 | apiVersion: trustyai.opendatahub.io/v1alpha1
188 | kind: TrustyAIService
189 | metadata:
190 |   name: trustyai-service-example
191 | spec:
192 |   storage:
193 |   format: "DATABASE"
194 |   databaseConfigurations: db-credentials # Optional
195 |     metrics:
196 |   schedule: "5s"
197 |   batchSize: 5000 # Optional, defaults to 5000
198 | ```
199 | 
200 | ```yaml
201 | apiVersion: v1
202 | kind: Secret
203 | metadata:
204 |   name: db-credentials
205 | type: Opaque
206 | data:
207 |   databaseKind: mysql
208 |   databaseUsername: foo
209 |   databasePassword: bar
210 |   databaseService: mariadb-service
211 |   databasePort: 3306
212 | 
213 | ```
214 | 
215 | ### Migration
216 | 
217 | Options for migration.
218 | 
219 | #### Migration with an existing PVC-mode
220 | 
221 | **If**
222 | 
223 | * `storage.format` is `DATABASE`
224 | 
225 | **And**
226 | 
227 | * Both `storage.folder` and `data.filename` are set
228 | 
229 | Then the operator will pass the information to the service, so that it migrates from the PVC files to the DB.
230 | The details of this process are outside the scope of this ADR.
231 | 
232 | #### Migration direction
233 | 
234 | * Migrating from PVC-mode to DB-mode will be supported.
235 | * Migrating from DB-mode to PVC will **not** be supported.
236 | 
237 | ### Co-existence
238 | 
239 | Different TASs can co-exist in different modes or the same mode, since they are isolated.
240 | 
241 | ```mermaid
242 | flowchart TD
243 |     TA[TrustyAI\nService A] --> PVCA[PVC A]
244 |     TB[TrustyAI\nService B] --> PVCB[PVC B]
245 |     TC[TrustyAI\nService C] --> DBA[MariaDB C]
246 |     TD[TrustyAI\nService D] --> DBD[MariaDB D]
247 | ```
248 | 
249 | However, this does not prevent deployment mistakes, such as two different services sharing the same database.
250 | 
251 | 
252 | ## Alternatives
253 | 
254 | Since the TAS configuration and deployment is managed by the operator, there are currently no alternatives to configure the database connection from the CR.
255 | 
256 | An alternative would be to provide a separate ConfigMap with the database connection details, but this would require additional manual intervention and there would be no way to specify a database connection instead of a PVC directly in the CR.
257 | 
258 | ## Stakeholder Impacts
259 | 
260 | ## References
261 | 
262 | * [Hibernate supported databases](https://github.com/hibernate/hibernate-orm/blob/main/dialects.adoc)
263 | 
264 | Reviews
265 | 
266 | Reviewed by
267 | Date
268 | Notes
269 | name
270 | date
271 | ?
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/architecture-decision-records/model-serving/ODH-ADR-MS-0001-kserve-private-network-in-cluster.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - KServe Enable Private Endpoint 
 2 | 
 3 | |                |                                                                  |
 4 | | -------------- | ---------------------------------------------------------------- |
 5 | | Date           | May 5, 2024                                                      |
 6 | | Scope          | Model Serving(KServe Serverless mode)                            |
 7 | | Status         | Review                                                           |
 8 | | Authors        | [Jooho Lee](@JOOHO)                                              |
 9 | | Supersedes     | N/A                                                              |
10 | | Superseded by: | N/A                                                              |
11 | | Tickets        | [Tracking Issue](https://issues.redhat.com/browse/RHOAIENG-7918) |
12 | | Other docs:    | none                                                             |
13 | 
14 | 
15 | ## What
16 | This ADR addresses the issue with Kserve Serverless using Istio and mTLS, which requires client certificates between pods. Applications accessing the isvc endpoint via service hostname in the same cluster must include an Istio sidecar, or they will fail.
17 | 
18 | ## Why
19 | Customers deploying ODH/RHOAI on OpenShift Cluster do not expect to include their services in the mesh to access deployed models. From their perspective, models deployed through KServe are just another service, and changing the network layer for access is burdensome. Therefore, we need to provide an out-of-the-box solution allowing access to inference services via internal hostnames, similar to other services.
20 | 
21 | ## Goals
22 | - Pods without an Istio sidecar should have access to the isvc endpoint.
23 | - Ensure a seamless transition for users from ModelMesh to KServe.
24 | - Implement a stable and tested solution.
25 | 
26 | ## Non-Goals
27 | - Immediate overhaul of the existing Knative infrastructure.
28 | - Long-term commitment to a specific certificate management solution.
29 | 
30 | 
31 | ## How
32 | It involves adding an Ingress Gateway and integrating with OpenShift Certificates services.
33 | The new Istio ingress gateway would only be used for internal requests.
34 | - KServe would no longer share the knative-local-gateway, with regards to configurations.
35 | - The new Istio ingress gateway would forward requests to knative-local-gateway
36 | - i.e. we build on top of Knative routing needs (or we chain KServe routing needs with Knative ones)
37 | - The ISTIO_MUTUAL configuration on knative-local-gateway is no longer an issue.
38 | - Since both KServe and Knative gateways belong to the mesh, they can use mTLS.
39 | - KServe gateway would be capable of a TLS-simple configuration.
40 | 
41 | ![proposed architecture](./images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-1.png)  
42 | 
43 | ## Alternatives
44 | Knative team members designed the solution below for consideration. The benefit of the solution being in Knative layer is sharing tests coverage within the component and have lighter the ODH/KServe layer on top of it. This alternative solution is not available in Knative yet and it requires a new component `cert-manager`. Therefore, it may take a time to be done, so it can be considerable in the future once available. In contrast, the main solution proposed in this document is for the short term in ODH releases.
45 | 
46 | ![alternative architecture](./images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-2.png)  
47 | 
48 | ## Security and Privacy Considerations
49 | - Ensuring that the chosen solution maintains or enhances the current security posture.
50 | - Managing the secure transition and implementation of certificate management.
51 | 
52 | ## Risks
53 | - Delays in implementation affecting user migration.
54 | - Limited testing of this method possibly leads to unforeseen issues.
55 | 
56 | ## Stakeholder Impacts
57 | 
58 | | Group        | Key Contacts                                                                                                                                    | Date        | Impacted? |
59 | | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | --------- |
60 | | ModelServing | [Jooho Lee](jlee@redhat.com), [Edgar Hernandez](ehernand@redhat.com), [Alessandro Lazarotti](lazarotti@redhat.com)                              | May 5, 2024 | Yes       |
61 | | Serverless   | [Reto Lehmann](rlehmann@redhat.com), [Lukas Berk](lberk@redhat.com), [Stavros Kontopoulos](skontopo@redhat.com), [Roland Huß](rhuss@redhat.com) | May 5, 2024 | Yes       |
62 | | ServiceMesh  | [Rob Cernich](rcernich@redhat.com), [Bartosz Majsak](bmajsak@redhat.com), [Aslak Knutsen](aknutsen@redhat.com)                                  | May 5, 2024 | Yes       |
63 | | Dashboard    | [Andrew Ballantyne](aballant@redhat.com), [Lucas Fernadez aragon](lferrnan@redhat.com), [Vince Conzola](vconzola@redhat.com)                    | May 5, 2024 | Yes       |
64 | | ODH Operator | [Vaishnavi Hire](vhire@redhat.com)                                                                                                              | May 5, 2024 | Yes       |
65 | 
66 | # Reviews
67 | 
68 | | Reviewed by           | Date         | Approval | Notes |
69 | | --------------------- | -----------  | -------- | ----- |
70 | | Jooho Lee             | Jun 3, 2024  | Approved |       |
71 | | Edgar Hernandez       | Jun 3, 2024  | Approved |       |
72 | | Alessandro Lazarotti  | Jun 3, 2024  | Approved |       |
73 | | Daniele Zonca         | Jun 3, 2024  | Approved |       |
74 | | Reto Lehmann          | Jun 3, 2024  | Approved |       |
75 | | Rob Cernich           | Jun 18, 2024 | Approved |       |
76 | | Lucas Fernadez aragon | Jun 12, 2024 | Approved |       |
77 | | Wen Zhou              | Jun 27, 2024 | Approved |       |
78 | 


--------------------------------------------------------------------------------
/architecture-decision-records/model-serving/images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/architecture-decision-records/model-serving/images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-1.png


--------------------------------------------------------------------------------
/architecture-decision-records/model-serving/images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/architecture-decision-records/model-serving/images/ODH-ADR-MS-0001-kserve-private-network-in-cluster-img-2.png


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-0004-odh-trusted-ca-configmap.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Make Trusted Bundle Configmap available 
 2 | 
 3 | 
 4 | |                |                                                             |
 5 | | -------------- |-------------------------------------------------------------|
 6 | | Date           | 2024-02-12                                                  |
 7 | | Scope          | Open Data Hub                                               |
 8 | | Status         | Draft                                                       |
 9 | | Authors        | [Landon LaSmith](@LaVLaS), [Vaishnavi Hire](@VaishnaviHire) |
10 | | Supersedes     | N/A                                                         |
11 | | Superseded by: | N/A                                                         |
12 | | Tickets        |                                                             |
13 | | Other docs:    | none                                                        |
14 | 
15 | ## What
16 | 
17 | Add trusted-cabundle configmap to all non-openshift namespaces on ODH operator installation.
18 | 
19 | ## Why
20 | 
21 | The first step to support self-signed certificates in ODH deployments is to make trusted-cabundle available in all ODH namespaces.
22 | 
23 | This allows ODH components to mount the certs as part of their deployment VolumeMounts.
24 | 
25 | ## Goals
26 | 
27 | * Make trusted-cabundle configmap available in all non-openshift namespaces
28 | * Users can opt-out of configmap injection by explicitly adding `security.opendatahub.io/inject-trusted-ca-bundle=false` annotation to a given namespace.
29 | 
30 | ## Non-Goals
31 | 
32 | * Modification or management of OpenShift-specific or default namespaces concerning the trusted-cabundle configmap.
33 | * Removal of injected trusted-cabundle configmap
34 | 
35 | ## How
36 | 
37 | * We are introducing a controller that will be responsible for creating trusted-cabundle configmap in all new and existing non-openshift namespaces.
38 | * For trusted-cabundle configmap, we are standardizing on `odh-trusted-ca-bundle` as the configmap name with a label of `app.kubernetes.io/part-of=opendatahub-operator`.
39 | * A namespace is considered non-openshift if -
40 |   * It doesn't start with `openshift-`
41 |   * It doesn't start with `kube-`
42 |   * It is not `openshift`
43 |   * It is not `default`
44 | * The configmap injection is triggered using an api field in DSCI `.spec.trustedCABundle.managementState`. When set to a `Managed` state this will inject the cert configmap in
45 | all non-openshift namespaces. Users can opt-out of cert injection by setting the managementState to `Removed`.
46 | 
47 | ## Alternatives
48 | 
49 | 1. We are proposing a longer term solution that involves adding cert-cabundle configmap to only the namespaces that have
50 | ODH resources. This approach is contingent upon the successful implementation of [DataScienceProjects controller ADR](https://github.com/opendatahub-io/architecture-decision-records/pull/25).
51 | 
52 | 
53 | ## Stakeholder Impacts
54 | 
55 | | Group                 | Key Contacts      | Date       | Impacted? |
56 | |-----------------------|-------------------| ---------- | --------- |
57 | | ODH Dashboard Team    | @andrewballantyne | date       | ? |
58 | | IDE Team              | @harshad16        | date       | ? |
59 | | DS Pipelines Team     | @HumairAK         | date       | ? |
60 | | Serving Team          | @Jooho            | date       | ? |
61 | | TrustyAI Team         | @RobGeada         | date       | ? |
62 | | Docs Team             | Manuela Ansaldo   | date       | ? |
63 | | Distributed Workloads | @anishasthana     | date       | ? |
64 | 
65 | ## Reviews
66 | 
67 | | Reviewed by   | Date       | Notes |
68 | |---------------|------------| ------|
69 | | Edson Tirelli | 2024-02-12 | ? |
70 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0001-distributed-manifests.md:
--------------------------------------------------------------------------------
  1 | # Open Data Hub - odh-manifests git repository transition
  2 | 
  3 | |                |            |
  4 | | -------------- | ---------- |
  5 | | Date           | 2023-08-28 |
  6 | | Scope          | |
  7 | | Status         | Approved   |
  8 | | Authors        | [Wen Zhou](@zdtsw) |
  9 | | Supersedes     | N/A |
 10 | | Superseded by: | N/A |
 11 | | Tickets        | |
 12 | | Other docs:    | none |
 13 | 
 14 | ## What
 15 | 
 16 | This document outlines a solution to transform the current setup of hosting manifests within a centralized `odh-manifests` git repository into separate repositories for each individual component.
 17 | 
 18 | ## Why
 19 | 
 20 | The existing structure of having a singular `odh-manifests` git repository for hosting manifests across all components presents several drawbacks:
 21 | 
 22 | - Manifests Duplication: The presence of duplicated manifests can lead to confusion among community users.
 23 | - Synchronization Challenges: Ensuring timely updates from component repositories to `odh-manifests` becomes problematic.
 24 | - Scalability Concerns: Extending the scope of Open Data Hub to include new tier-0/1 components presents complexities, despite its current status as the solution for tier-0/1.
 25 | 
 26 | ## Goals
 27 | 
 28 | - Reduced Human Error: Streamlining the release cycle to minimize human errors.
 29 | - Enhanced Product Quality: Increasing confidence in the overall product quality.
 30 | - Developer Support: Assisting developers in validating and troubleshooting changes within their respective domains.
 31 | - This approach aims to enhance the organization and efficiency of manifest management within Open Data Hub's ecosystem.
 32 | 
 33 | ## Non-Goals
 34 | 
 35 | This ADR is specifically intendeded for tier-0/1 components, which are supported across all repositories within the opendatahub-io organization.
 36 | 
 37 | ## How
 38 | 
 39 | To achieve this transition, a multi-step approach is proposed:
 40 | 
 41 | - Component Repositories: Each component's git repository will independently host its manifests. Progress towards this transition is tracked in the checklist below:
 42 | 
 43 | | Component               | Default Git Repo                                | Default Git branch/tag| Transition finished ? |
 44 | | ----------------------- | ----------------------------------------------- |-----------------      | -----------  |
 45 | | Platform                | opendatahub-io/opendatahub-operator             | main                  | Yes |
 46 | | Dashboard               | opendatahub-io/odh-dashboard                    | incubation            | Yes |
 47 | | Data Science Pipelines  | opendatahub-io/data-science-pipelines-operator  | main                  | Yes |
 48 | | Kserve                  | opendatahub-io/kserve                           | release-v0.11         | No  |
 49 | | Modelmesh               | opendatahub-io/modelmesh-serving                | -                     | No  |
 50 | | Workbenches             | opendatahub-io/notebooks                        | main                  | Yes |
 51 | | Workbenches             | opendatahub-io/kubeflow                         | v1.7-branch           | Yes |
 52 | | Ray                     | opendatahub-io/distributed-workloads            | main                  | Yes |
 53 | | Codeflare               | opendatahub-io/distributed-workloads            | main                  | Yes |
 54 | 
 55 | - Operator Integration: Within the operator, a function will be introduced to fetch manifests from individual component repositories during image build processes.
 56 | - Archival of odh-manifests: During the image build process, the current `odh-manifests` git repository is downloaded into a tarball file, which is subsequently decompressed into the final operator image. This approach is currently in use. However, as part of this transition, the `odh-manifests` git repository will be archived into read-only mode to align with the evolving workflow.
 57 | - Development Mode API: A new API field in `DataScienceCluster` will be integrated into the operator for development mode, enabling the fetching of manifests from component repositories during runtime.
 58 | 
 59 | ## Open Questions
 60 | 
 61 | - Quality Assurance for Manifests: the component team is responsible for ensuring the quality of its manifests. Any changes should undergo thorough verification before merging into the default branch, as it serves as a crucial integration point. ODH nightly build will be generated and subjected to essential verification steps, enabling a rapid feedback loop.
 62 | - Flexible Component Repository Proposal: the proposal is to use  the component repository within the "opendatahub-io" organization. However, if this doesn't align with most use cases, we are open to making it more configurable to accommodate different components.
 63 | - Verification Process Proposal: In the current workflow, changes made to the odh-manifests Git repository undergo comprehensive testing steps as a pre-merge CI process. However, a defined process for post-transition verification remains pending. This raises the question of whether verification should be conducted within the Operator CI system or if we should rely on the ODH nightly build solely for post-merge verification purposes. Further clarification and decision-making are needed in this regard.
 64 | - During the transition period, the operator continues to utilize the `odh-manifests` git repository to retrieve manifests for any components that are not yet ready to host manifests.
 65 | 
 66 | ## Alternatives
 67 | 
 68 | N/A
 69 | 
 70 | ## Security and Privacy Considerations
 71 | 
 72 | N/A
 73 | 
 74 | ## Risks
 75 | 
 76 | There are a couple of potential risks associated with the proposed approach:
 77 | 
 78 | - Operator Logic Updates: The component teams will be required to update the logic within the operator to align with the newly updated manifests. This transition may demand additional effort and coordination to ensure a seamless integration.
 79 | - Non-Production Manifests in Runtime: As a consequence of the new setup, there's a possibility that users might unintentionally use non-production manifests to build image or during runtime. This could potentially lead to unexpected behavior or issues.
 80 | - Monitoring Maintenance: The platform team will take primary responsibility for maintaining any modifications made to downstream monitoring manifests.
 81 | - ODH Release Cycle: Each component must provide a valid release tag from its git repository for the release coordinator to update the configuration in the operator. Otherwise, it falls back to using the default branch or tag as specified in the table above.
 82 | 
 83 | Addressing these risks through careful planning and communication will be crucial to the success of the manifest repository transition.
 84 | 
 85 | ## Stakeholder Impacts
 86 | 
 87 | | Group                   | Key Contacts                              | Impacted? |
 88 | | ----------------------- | ----------------------------------------- | --------- |
 89 | | Platform                | [Landon LaSmith](@LaVLas)                 |  Yes |
 90 | | Open Data Hub UI        | [Andrew Ballantyne](@andrewballantyne)    |  Yes |
 91 | | Data Science Pipelines  | [Giulio Frasca](@gmfrasca)                |  Yes |
 92 | | Model Serving           | [Daniel Zonca](@danielezonca)             |  Yes |
 93 | | Workbenches             | [Harshad Reddy Nalla](@harshad16)         |  Yes |
 94 | | Distributed Workloads   | [Anish Asthana](@anishasthana)            |  Yes |
 95 | 
 96 | ## References
 97 | 
 98 | N/A
 99 | 
100 | ## Reviews
101 | 
102 | | Reviewed by                    | Date          | Notes |
103 | | ------------------------------ | ------------  | ----- |
104 | |[Humair Khan](@HumairAK)        | 2023-09-08    | ----- |
105 | |[Anish Asthana](@anishasthana)  | 2023-09-10    | ----- |
106 | |[Vaishnavi Hire](@VaishnaviHire)| 2023-09-11    | ----- |
107 | |[Harshad Reddy Nalla](@harshad16)|2023-09-11    | ----- |
108 | |[Joohoo Lee](@Jooho)            | 2023-09-20    | ----- |
109 | |[Andrew Ballantyne](@andrewballantyne)| 2023-09-25|-----|
110 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0002-operator-scope.md:
--------------------------------------------------------------------------------
  1 | # Open Data Hub - Operator Scope
  2 | 
  3 | |                |            |
  4 | | -------------- | ---------- |
  5 | | Date           | Sep 5th, 2023 |
  6 | | Scope          | Open Data Hub Operator |
  7 | | Status         | Approved |
  8 | | Authors        | [Edson Tirelli](@etirelli), [Vaishnavi Hire](@VaishnaviHire) |
  9 | | Supersedes     | N/A |
 10 | | Superseded by: | N/A |
 11 | | Tickets        | [Tracker issue](https://github.com/opendatahub-io/opendatahub-operator/issues/158) |
 12 | | Other docs:    | [TODO: add link to ODH Operator Design](http://) |
 13 | 
 14 | ## What
 15 | 
 16 | OpenShift and Kubernetes operators can be Namespace scoped, Multi-namespace scoped or Cluster scoped. The choice of the 
 17 | scope has impacts both on required permissions as well as operator capabilities. 
 18 | 
 19 | The ODH Operator v2.x is a meta-operator that manages a number of resources and other operators in the OpenShift cluster. 
 20 | As such, the operator was implemented as a Cluster Scoped Operator. This ADR captures the reasoning and impact of the 
 21 | decision.
 22 | 
 23 | ## Why
 24 | 
 25 | The ODH operator acts like a meta operator, installing and managing other operators and resources. In particular the operator manages the following types of resources, among possibly others:
 26 | 
 27 | - Namespaces
 28 | - Deployment
 29 | - Role
 30 | - ClusterRole
 31 | - RoleBindings
 32 | - ClusterRoleBindings
 33 | - ConfigMaps
 34 | - Secrets
 35 | - Service
 36 | - NetworkPolicy
 37 | - Route
 38 |   
 39 | The operator deploys the following Custom Resource Definitions:
 40 | 
 41 | - DSCInitialization
 42 | - DataScienceCluster
 43 | - FeatureTracker
 44 |  
 45 | The operator (optionally) requires and/or manages the following dependent operators, among others:
 46 | 
 47 | - KServe
 48 | - ModelMesh
 49 | - Data Science Pipeline Operator
 50 | - KubeRay
 51 | - Codeflare Operator
 52 | - Kueue
 53 | - Training Operator
 54 | - TrustyAI Operator
 55 | - Model Registry Operator
 56 | 
 57 | In order to properly manage these resources, the operator requires access and permissions across namespaces. In particular, the operator creates and/or manages the following namespace (the namespace name can be changed via configuration):
 58 | 
 59 | - opendatahub
 60 | - odh-model-registries
 61 |   
 62 | The operator also leverages the platform’s [Owner References](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/) capability that ensures all dependent resources are owned by the operator and their life cycles are tracked and managed cleanly. Kubernetes however restricts the use of owner references in a way that only cluster scoped resources can own other cluster scoped resources, leading to the requirement of the operator being cluster scoped. 
 63 | 
 64 | ## Goals
 65 | 
 66 | * The operator needs to support management of resources across namespaces
 67 | * The operator must track all resources it creates and manages
 68 | * The operator must remove resources created by a managed custom resource when that custom resource is removed
 69 | 
 70 | ## Non-Goals
 71 | 
 72 | * 
 73 |   
 74 | ## How
 75 | 
 76 | The ODH operator v2.x is set to Cluster scope.
 77 | 
 78 | ## Alternatives
 79 | 
 80 | We considered the use of a namespace scoped operator, but the following limitations were determinant to choose the Cluster scope instead:
 81 | 
 82 | ### 1. Some dependent resources are cluster scoped 
 83 | Some of the resources/dependencies that OpenShift AI requires are cluster scoped (e.g. Serverless, ModelMesh, etc). Managing these resources/dependencies requires the operator to also be cluster scoped.
 84 | The operator also introduces two new CRDs (DSCInitialization) and (DataScienceCluster) that are cluster scoped. 
 85 | 
 86 | ### 2. Tracking and management of dependent resources
 87 | One of the drivers for the design and development of the ODH v2 operator was the need to  increase the resilience and reliability of the tracking and management of dependent resources. In particular the operator should provide accurate, clear and reliable diagnostics in case of problems with dependencies, and be able to properly maintain a clean cluster in case of upgrades or uninstalls. 
 88 | The operator leverages platform capabilities for that task, like owner references. Namespaced operators cannot own resources outside their own namespace (as per Kubernetes design). Scoping the operator to a namespace would require us to stop using owner references and implement from scratch the management of resources, using a different mechanism like labels. That would be complex, brittle, and duplicate functionality available in the platform itself.
 89 | 
 90 | ### 3. Contradictory requirements with other users and best practices
 91 | The use of namespaced operator would prevent us from meeting the requirements of other customers and users (e.g. automated install and upgrades of resources across namespaces). 
 92 | 
 93 | ## Tradeoffs
 94 | 
 95 | The main drawback of using a cluster scoped operator is the impossibility of running multiple instances and/or versions of RHOAI in the same cluster. Users that need multiple instances/versions of RHOAI are required to use one cluster for each. For cases where cost is a concern, alternatives like Hosted Control Plane can be considered.
 96 | 
 97 | ## Stakeholder Impacts
 98 | 
 99 | | Group                         | Key Contacts     | Date       | Impacted? |
100 | | ----------------------------- | ---------------- | ---------- | --------- |
101 | | ODH Platform Team             | Vaishnavi Hire   | 2023/09/05 | ? |
102 | 
103 | ## References
104 | 
105 | * ODH Operator Design document
106 | 
107 | ## Reviews
108 | 
109 | | Reviewed by                   | Date       | Notes |
110 | | ----------------------------- | ---------  | ------|
111 | | [Vaishnavi Hire](https://github.com/VaishnaviHire) | Sep 15, 2023 |       |
112 | | [Wen Zhou](https://github.com/zdtsw) | Sep 15, 2023 |       |
113 | | [Trevor Royer](https://github.com/strangiato) | Sep 15, 2023 |     |
114 | 
115 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0003-component-integration.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - ODH component Integration with DataScienceCluster
 2 | 
 3 | |                |                                  |
 4 | | -------------- |----------------------------------|
 5 | | Date           | 2023-09-18                       |
 6 | | Scope          |                                  |
 7 | | Status         | Draft                            |
 8 | | Authors        | [Vaishnavi Hire](@VaishnaviHire) |
 9 | | Supersedes     | N/A                              |
10 | | Superseded by: | N/A                              |
11 | | Tickets        |                                  |
12 | | Other docs:    | none                             |
13 | 
14 | ## What
15 | 
16 | This document outlines design decision to integrate ODH components with DataScienceCluster CRD. The document also defines the rationale behind fostering a close-knit integration between the individual components and the operator API.
17 | 
18 | ## Why
19 | 
20 | The [KfDef](https://github.com/opendatahub-io/opendatahub-operator/blob/master/config/crd/bases/kfdef.apps.kubeflow.org_kfdefs.yaml) CRD, defined by the v1.x of ODH operator allowed any valid kustomize manifests to be deployed to the OpenShift cluster.
21 | However, this design introduced following drawbacks:
22 | 
23 | - **Managing Resources:** Monitoring all resources for custom components became strenuous, coupled with the hindrance in utilizing owner references for a clean-up process.
24 | - **Limited Customization:** The existing structure restricted the customization of components, offering limited accessibility to component-specific fields through the API.
25 | - **Duplication of GitOps Workflow/ kustomize-build:** The KfDef CRD replicated the kustomize build functions, presenting no supplemental features post the deployment of the components.
26 | 
27 | ## Goals
28 | 
29 | - **Increased Component Customization:** Allowing the [DataScienceCluster](https://github.com/opendatahub-io/opendatahub-operator/blob/main/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml) API to expose every integrated component will accord users the flexibility to directly configure component-specific fields via the CRD, thereby expanding customization scope.
30 | - **Improved Component Management:** As every component is tightly coupled with the operator, the controller is aware of the resources being deployed and only has the permissions to watch and manage those specific resources. This also allows operator to manage component lifecycle and upgrades.
31 | - **Informed Approach:** The goal is to ensure that the operator has knowledge of the components being deployed, and make intelligent decisions based on that knowledge. 
32 | ## Non-Goals
33 | 
34 | - This ADR will not define transition of Tier 2 components into Tier 0/1.
35 | 
36 | ## How
37 | 
38 | - To achieve this transition, any new component should be integrated with ODH Operator by following steps given [here](https://github.com/opendatahub-io/opendatahub-operator/blob/main/components/README.md). As a result update the DataScieceCluster API to expose new component fields.
39 | - Ensure any components that are integrated follow the requirements for Tier 0/1 components.
40 | 
41 | ## Open Questions
42 | 
43 | - **Quality Assurance for Components:** The component team is responsible for ensuring unit tests are added to any new component specific code and for update operator [e2e tests](https://github.com/opendatahub-io/opendatahub-operator/blob/main/tests/e2e/helper_test.go#L55) to include testing of the
44 |                                     new component.
45 | 
46 | 
47 | ## Alternatives
48 | 
49 | - Any valid kustomize manifests that users want to deploy alongside ODH integrated components, can be deployed using kustomize build
50 |   or GitOps workflow.
51 | 
52 | ## References
53 | 
54 | N/A
55 | 
56 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0005-configure-resources.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Whitelist some component fields for user customizations
 2 | 
 3 | |                |                                                                          |
 4 | |----------------|--------------------------------------------------------------------------|
 5 | | Date           | 2024-03-07                                                               |
 6 | | Scope          |                                                                          |
 7 | | Status         | Draft                                                                    |
 8 | | Authors        | [Vaishnavi Hire](@VaishnaviHire)                                         |
 9 | | Supersedes     | https://github.com/opendatahub-io/architecture-decision-records/pull/23  |                         
10 | | Superseded by: | N/A                                                                      |
11 | | Tickets        |                                                                          |
12 | | Other docs:    | none                                                                     |
13 | 
14 | ## What
15 | 
16 | This document outlines design decision to introduce ability for user customizations of fields like resources and replicas.
17 | 
18 | ## Why
19 | 
20 | ODH component deployments lack a mechanism for customizing resource limits, requests and deployment replicas. Deployment fields are hard-coded in the manifests with no means to adjust them according to user requirements.
21 | We need a mechanism for users to configure resources when available resources are limited.
22 | 
23 | ## Goals
24 | 
25 | * Enable the configuration of resource limits, requests and replicas for individual components. 
26 | * Introduce an internal kustomize plugin that will whitelist fields like `replicas` and `resources`. This means any changes to these
27 | fields will not be overwritten by the operator.
28 | * We will not update or expose any fields
29 | 
30 | ## Non-Goals
31 | 
32 | - This ADR will not define customization parameters other than `resources` and `replicas`.
33 | 
34 | ## How
35 | 
36 | * Implementation for this will be done in two phases
37 |   * Introduce customizations only in **Kserve** component. This is to address resource utilization [issues](https://github.com/kserve/kserve/issues/3467) in kserve.
38 |   * Replicate the functionality in other components that require customization.
39 |   * Kustomize [plugins](https://github.com/kubernetes-sigs/kustomize/tree/master/plugin/builtin) can be used to patch resources once deployed.
40 |   
41 | 
42 | ## Alternatives
43 | 
44 | - N/A
45 | 
46 | 
47 | ## Stakeholder Impacts
48 | 
49 | | Group                   | Key Contacts                                                   | Impacted? |
50 | | ----------------------- |----------------------------------------------------------------|-----------|
51 | | Platform                | [Landon LaSmith](@LaVLas), [Edson Tirelli](@etirelli)          | Yes       |
52 | | Model Serving           | [Daniel Zonca](@danielezonca), [Edgar Hernández](@israel-hdez) | Yes       |
53 | | ODH Dashboard Team    | @andrewballantyne                                              | ?         |
54 | | IDE Team              | @harshad16                                                     | ?         |
55 | | DS Pipelines Team     | @HumairAK                                                      | ?         |
56 | | Serving Team          | @Jooho                                                         |  ?        |
57 | | TrustyAI Team         | @RobGeada                                                      | ?         |
58 | | Distributed Workloads | @dimakis                                                       | ?         |
59 | 
60 | ## References
61 | 
62 | N/A
63 | 
64 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0006-internal-api.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Architecture Decision Record template
 2 | 
 3 | <!-- copy and paste this template to start authoring your own ADR -->
 4 | <!-- for the Status of new ADRs, please use Approved, since it will be approved by the time it is merged -->
 5 | <!-- remove this comment block too -->
 6 | 
 7 | |                |            |
 8 | | -------------- | ---------- |
 9 | | Date           | Sep 13th, 2024 |
10 | | Scope          | Open Data Hub Operator |
11 | | Status         | Approved |
12 | | Authors        | [Luca Burgazzoli](@lburgazzoli), [Vaishnavi Hire](@VaishnaviHire) |
13 | | Supersedes     | N/A |
14 | | Superseded by: | N/A |
15 | | Tickets        | |
16 | | Other docs:    | none |
17 | 
18 | ## What
19 | 
20 | The Open Data Hub Operator is a meta-operator that manages a number of resources and other operators in the OpenShift cluster leveragin a number of CRDs:
21 | - **DSCInitialization (DSCI)**: a cluster scope user-facing API that the operator creates to perform initial setup common for all components.
22 | - **DataScienceCluster (DSC)**: a cluster scope user-facing API that the ODH Operator watches to enable and configure various data science components.
23 | - **FeatureTracker**: an internal API that represents a cluster-scoped resource in the Data Science Cluster specifically designed for monitoring and managing component related resources created by the operator.
24 | 
25 | This document outlines design decision to introduce additional, internal only, components specific CRDs.
26 | 
27 | ## Why
28 | 
29 | The deployment of components is handled by a single reconcile loop that is in charge to deploy all the enabled components within the platform. 
30 | This means that one centralized loop handles the configuration, updates, and error handling for all components.
31 | 
32 | ![architecture](assets/ODH-ADR-Operator-0006/odh-operator-current.png)
33 | 
34 | However, this design introduced following drawbacks:
35 | - **Scalability**: as the ODH Operator evolves and new features or components are added, the single reconcile loop can become a significant bottleneck. The centralized nature of the loop means that any update or change in one component triggers a reconciliation for all components, even those unaffected by the change. This can lead to delays in processing, reduced responsiveness, and a potential decrease in the overall performance of the operator.
36 | - **Granularity**: the single reconcile loop lacks granularity, meaning that any update or error in one component forces the loop to reconcile all components. This can cause unnecessary workload and resource consumption, particularly in large deployments where only a subset of components may need updating. 
37 | - **Error handling and failure isolation**: One of the most significant challenges with this model is error handling and failure isolation. Since all components are managed within the same loop, a failure in one part of the reconciliation process can affect the entire loop, potentially disrupting the deployment or management of other components. 
38 | 
39 | ## Goals
40 | 
41 | - Improve efficiency and scalability of the ODH Operator. 
42 | - Improve developer productivity and ability to add more features/components/subsystems over time.
43 | - Improve visibility of the state of the platform.
44 | - Reduce resource ustilisation, having a more fine grained reconciliation and impacting only components that have changed, so less resources spent rendering not impacted resources and less call to the API server.
45 | 
46 | ## Non-Goals
47 | 
48 | * Provide additional, user facing CRDs. 
49 | 
50 | ## How
51 | 
52 | Move from a monolithic reconciliation loop to a more modular approach that manages components individually by:
53 | 
54 | - Introducing more granular reconciliation.
55 | - Introducing a set of internal APIs/CRDs. 
56 |   - These CRDs will be used exclusively by the operator for internal management and will not be exposed to end-users and marked as [internal objects](https://docs.openshift.com/container-platform/4.16/operators/operator_sdk/osdk-generating-csvs.html#osdk-hiding-internal-objects_osdk-generating-csvs) so the OCP console hides them in the Operator UIs.
57 |   - Since CRDs are public, the ODH Operator will exclusively own them and it will revert any changes applied outside its control.
58 | - Each internal API/CRD will have its own reconciliation loop, which:
59 |   - Is responsible for managing the lifecycle and state of the associated components.
60 |   - Surfaces any relevant information as part of the component specific status (versions, namespace, routes, service, etc). 
61 | - The DSC reconciler would be in charge:
62 |     - To create/update/delete internal CRs depending on the management state and configuration of the components.
63 |     - To updates status of the DSC based on individual component CR statuses. 
64 | 
65 | 
66 | ![architecture](assets/ODH-ADR-Operator-0006/odh-operator-next.png)
67 | 
68 | ## Alternatives
69 | 
70 | N/A
71 | 
72 | ## Stakeholder Impacts
73 | 
74 | | Group              | Key Contacts     | Date       | Impacted? |
75 | | ------------------ | ---------------- | ---------- | --------- |
76 | | ODH Platform Team  | [Luca Burgazzoli](@lburgazzoli), [Vaishnavi Hire](@VaishnaviHire) | 2024/09/13 | YES |
77 | 
78 | ## Reviews
79 | 
80 | | Reviewed by                   | Date       | Notes |
81 | | ----------------------------- | ---------  | ------|
82 | | name                          | date       | ? |
83 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0007-auth-crd.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Addition of an Auth CR
 2 | 
 3 | <!-- copy and paste this template to start authoring your own ADR -->
 4 | <!-- for the Status of new ADRs, please use Approved, since it will be approved by the time it is merged -->
 5 | <!-- remove this comment block too -->
 6 | 
 7 | |                |            |
 8 | | -------------- | ---------- |
 9 | | Date           | 22-10-2024 |
10 | | Scope          | Open Data Hub Operator |
11 | | Status         | Accepted |
12 | | Authors        | [Steven Tobin](@StevenTobin) |
13 | | Supersedes     | N/A |
14 | | Superseded by: | N/A |
15 | | Tickets        | [RHAOIENG-14807](https://issues.redhat.com/browse/RHOAIENG-14807)|
16 | | Other docs:    | none |
17 | 
18 | ## What
19 | 
20 | This document outlines the decision to implement an auth CRD in ODH.
21 | 
22 | ## Why
23 | 
24 | There is a growing need for the ODH operator to provide centralized authentication and authorization services for the platform.  The near term needs are :
25 | 
26 | -  Add support for managing user groups. Currently this  is  managed by the Dashboard component and needs to be centralized.
27 | -  Prepare for upcoming for changes to handle OCP Platform support for external OIDC authentication
28 | 
29 | Longer term,  this API and its controller will be enhanced to handle additional RBAC configuration currently handled in the Dashboard component, such as creating roles and role bindings.
30 | 
31 | Centralising auth concerns will simplify the architecture of auth features, security and ease of use of these features for users.
32 | 
33 | 
34 | ## Goals
35 | 
36 | * Define a new auth CRD.
37 | 
38 | ## Non-Goals
39 | 
40 | 
41 | ## How
42 | 
43 | A new  API is added to the operator in the form of an auth CRD in the services api group. This will initially handle a list of adminGroups and allowedGroups (migrated from the groupsConfig field of the dashboardConfig) which the operator will reconcile for access to dashboard UIs and applying requisite openshift permissions. This is intended to be the canonical place for auth configuration for future auth initiatives.
44 | 
45 | The Auth CR is a singleton like the DSC and DSCi CRs.
46 | 
47 | An example of the CR:
48 | ```
49 | apiVersion: services.platform.opendatahub.io/v1alpha1
50 | kind: Auth
51 | metadata:
52 |     name: odhAuth
53 | spec:
54 |     adminGroups: []
55 |     allowedGroups: []
56 | ```
57 | 
58 | ## Open Questions
59 | * Migration Path:
60 |     *   We will use [CEL](https://kubernetes.io/blog/2022/09/29/enforce-immutability-using-cel/#immutability-upon-object-creation) to make the groupsConfig field in the current OdhDashboardConfig CRD immutable. The operator will manage copying the content of the field over to the new CRD and the Auth CRD will be the new source of truth for adminGroups and allowedGroups. 
61 | 
62 | ## Alternatives
63 | 
64 | * Continue to not have a central API for auth.
65 | 
66 | ## Security and Privacy Considerations
67 | 
68 | 
69 | ## Risks
70 | 
71 | 
72 | ## Stakeholder Impacts
73 | 
74 | | Group                         | Key Contacts     | Date       | Impacted? |
75 | | ----------------------------- | ---------------- | ---------- | --------- |
76 | | ODH platform         | [Luca Burgazzoli](@lburgazzoli), [Chris Sams](@), [Lindani Phiri](@), [Steven Tobin](@StevenTobin) | 22-10-2024      | ? |
77 | 
78 | 
79 | ## References
80 | 
81 | * optional bulleted list
82 | 
83 | ## Reviews
84 | 
85 | | Reviewed by                   | Date       | Notes |
86 | | ----------------------------- | ---------  | ------|
87 | | name                          | date       | ? |
88 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0007-components-version-mapping.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Map component upstream versions to ODH releases
 2 | 
 3 | |                |                                  |
 4 | | -------------- |----------------------------------|
 5 | | Date           | 2024-10-31                       |
 6 | | Scope          | Open Data Hub Operator           |
 7 | | Status         | Approved                         |
 8 | | Authors        | [Saravana Srinivasan](@sasriniv) |
 9 | | Supersedes     | N/A                              |
10 | | Superseded by: | N/A                              |
11 | | Tickets        |                                  |
12 | | Other docs:    | none                             |
13 | 
14 | ## What
15 | 
16 | This document is intended to outline the design decisions made to map upstream versions of components that are supported by Data Science Cluster to ODH releases.
17 | 
18 | ## Why
19 | 
20 | Users are expecting to know the list of upstream components and their versions that are being shipped with the product. There were already several requests from users wanting to know the component versions during releases. 
21 | 
22 | ## Goals
23 | 
24 | - To maintain a standard to capture the list of upstreams of the components along with their version and repository url that are supported by Data Science Cluster.
25 | - Have it displayed in the Data Science Cluster's components status.
26 | 
27 | ## How
28 | 
29 | - Component teams are expected to create and maintain a "component_metadata.yaml" file in their repositories, in the root of the directory from where the manifests are retrieved by the ODH operator at build time. The yaml can contain release details of the upstreams in the format specified below.
30 | ```
31 | releases:
32 |   - name:
33 |     version:
34 |     repositoryurl:
35 |   - name:
36 |     version:
37 |     repositoryurl:
38 | ```
39 | - Develop code logic on the ODH operator to read through the component_metadata.yaml file.
40 | - Update the component status section in Data Science Cluster with the information read from the yaml file
41 | 
42 | ## Open Questions
43 | 
44 | N/A
45 | 
46 | ## Responsibility
47 | 
48 | - **Updating version details:** The component teams are responsible for creating and maintaining the "component_metadata.yaml" file containing details of the upstreams and also, to promptly update the same when there are new additions to the upstreams.
49 | 
50 | 
51 | ## Alternatives
52 | 
53 | - Initially, it was proposed to maintain this version information in an .env file. But, with further discussions and understanding, certain components have multiple upstreams with them. Maintaining and fetching information about multiple upstream releases from an env file would be cumbersome, this pushed us to choose a yaml file where we can group details of each of the upstreams.
54 | 
55 | ## Stakeholder Impacts
56 | 
57 | | Group                     | Key Contacts                                                    | Date       | Impacted? |
58 | | ------------------------- | --------------------------------------------------------------- | ---------- | --------- |
59 | | ODH Platform Team         | @lburgazzoli @lphiri                                            |            | y         |
60 | | Model Serving             | [Daniel Zonca](@danielezonca), [Edgar Hernández](@israel-hdez)  |            | y         |
61 | | Model Serving Runtimes    | [Sean Pryor](@Xaenalt), [Vaibhav Jain](@vaibhavjainwiz)         |            | y         |
62 | | ODH Dashboard Team        | @andrewballantyne                                               |            | y         |
63 | | IDE Team                  | @harshad16                                                      |            | y         |
64 | | DS Pipelines Team         | @HumairAK @gmfrasca                                             |            | y         |
65 | | Serving Team              | @Jooho                                                          |            | y         |
66 | | TrustyAI Team             | @RobGeada  @ruivieira                                           |            | y         |
67 | | Distributed Workloads     | @dimakis @amsharma3                                             |            | y         |
68 | 
69 | ## References
70 | 
71 | RHOAISTRAT-327 [Refinement Document](https://docs.google.com/document/d/1nbQB-uA48x79Ci3xrpMHGfdo3XjTR4xirtWk0we0kl8)
72 | 


--------------------------------------------------------------------------------
/architecture-decision-records/operator/ODH-ADR-Operator-0008-resources-lifecycle.md:
--------------------------------------------------------------------------------
 1 | # Open Data Hub - Resource Lifecycle Management in opendatahub-operator
 2 | 
 3 | |                |                                  |
 4 | | -------------- |----------------------------------|
 5 | | Date           | 2025-04-05                       |
 6 | | Scope          | Open Data Hub Operator           |
 7 | | Status         | TBD                              |
 8 | | Authors        | [Luca Burgazzoli](@burgazzoli)   |
 9 | | Supersedes     | N/A                              |
10 | | Superseded by: | N/A                              |
11 | | Tickets        |                                  |
12 | | Other docs:    | none                             |
13 | 
14 | ## What
15 | 
16 | This ADR defines how the opendatahub-operator manages the lifecycle of the Kubernetes resources it provisions using component-defined manifests and the `opendatahub.io/managed` annotation.
17 | 
18 | ## Why
19 | 
20 | The operator provisions and reconciles resources defined by components. 
21 | To support a variety of use cases—including user-customizable objects and create-only behavior. There needs to be a clear contract regarding how resources are managed over time, especially in the presence of manual modifications or evolving manifests.
22 | 
23 | ## Goals
24 | 
25 | - Define consistent resource management semantics.
26 | - Allow component developers to declare create-only resources.
27 | - Allow end users to take ownership of specific resources post-deployment.
28 | - Prevent unintended reconciliation or overwriting of user-managed resources.
29 | 
30 | ## Terminology
31 | To ensure clarity throughout this document, we define the following terms:
32 | 
33 | - Kustomized Manifest: The YAML configuration that has passed through the kustomization process but has not yet been applied to the cluster. This represents the intended state of resources as defined by component/services manifests and processed by kustomize.
34 | - Kubernetes Object: The actual resource that exists in the Kubernetes cluster after manifests have been applied. These are the live entities that the operator manages and users can interact with.
35 | 
36 | ## How
37 | 
38 | The operator evaluates the presence and value of the `opendatahub.io/managed` annotation on both the **Kustomized Manifest** and the **Kubernetes Object**:
39 | 
40 | | Annotation Location      | Value                        | Behavior                                                                 |
41 | |--------------------------|------------------------------|--------------------------------------------------------------------------|
42 | | **Kustomized Manifest**  | `"false"`                    | Resource is created once, not reconciled afterward (create-only).        |
43 | | **Kustomized Manifest**  | _Missing_ or `"true"`        | Operator fully manages and reconciles the resource.                      |
44 | | **Kubernetes Object**    | `"false"`                    | Operator skips reconciliation and treats the object as user-owned.       |
45 | | **Kubernetes Object**    | _Missing_ or `"true"`        | Operator enforces manifest state and overwrites any manual modifications.|
46 | 
47 | ### Additional Behavior
48 | 
49 | - The `opendatahub.io/managed` annotation defined in Kustomized Manifests is **not propagated** to the resulting Kubernetes Objects to avoid misleading users into thinking they can control lifecycle via the cluster object.
50 | - For Kustomized Manifests with `opendatahub.io/managed: "false"`, the operator does not set an owner reference on the created Kubernetes Objects. This means that these objects will remain in the cluster rather than being garbage-collected when the component/service the objects are part of get removed/disabled.
51 | - **In all cases**, if the Kubernetes Object is deleted from the cluster (either accidentally or manually), the operator will **recreate it** during the next reconciliation loop. This ensures declared state is always realized, regardless of whether the object is fully managed or marked as create-only.
52 | 
53 | 
54 | ## Open Questions
55 | 
56 | N/A
57 | 
58 | ## Responsibility
59 | 
60 | The ODH Platform team is responsible for implementing and maintaining the behavior described in this ADR within the `opendatahub-operator`.
61 | 
62 | ## Alternatives
63 | 
64 | N/A
65 | 
66 | ## Stakeholder Impacts
67 | 
68 | | Group                     | Key Contacts                                                    | Date       | Impacted? |
69 | | ------------------------- | --------------------------------------------------------------- | ---------- | --------- |
70 | | ODH Platform Team         | @lphiri                                                         |            | y         |
71 | | Model Serving             |                                                                 |            | y         |
72 | | Model Serving Runtimes    |                                                                 |            | y         |
73 | | Model Registry            |                                                                 |            | y         |
74 | | ODH Dashboard Team        | @andrewballantyne                                               |            | y         |
75 | | IDE Team                  |                                                                 |            | y         |
76 | | DS Pipelines Team         |                                                                 |            | y         |
77 | | Serving Team              |                                                                 |            | y         |
78 | | TrustyAI Team             |                                                                 |            | y         |
79 | | Distributed Workloads     |                                                                 |            | y         |
80 | 
81 | ## References


--------------------------------------------------------------------------------
/architecture-decision-records/operator/assets/ODH-ADR-Operator-0006/odh-operator-current.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/architecture-decision-records/operator/assets/ODH-ADR-Operator-0006/odh-operator-current.png


--------------------------------------------------------------------------------
/architecture-decision-records/operator/assets/ODH-ADR-Operator-0006/odh-operator-next.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/architecture-decision-records/operator/assets/ODH-ADR-Operator-0006/odh-operator-next.png


--------------------------------------------------------------------------------
/documentation/README.md:
--------------------------------------------------------------------------------
1 | # RHOAI Architecture
2 | 
3 | ## Architecture Overview
4 | [Architecture Overview](arch-overview.md)
5 | 
6 | ## Component Architecture Details
7 | [Components](components)
8 | 


--------------------------------------------------------------------------------
/documentation/components/dashboard/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/dashboard/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/dashboard/assets/featureFlags.drawio:
--------------------------------------------------------------------------------
  1 | <mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" version="24.7.7">
  2 |   <diagram name="Page-1" id="_F1Y5q_0cjQZbcs4WvlD">
  3 |     <mxGraphModel dx="1512" dy="635" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
  4 |       <root>
  5 |         <mxCell id="0" />
  6 |         <mxCell id="1" parent="0" />
  7 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-87" value="" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
  8 |           <mxGeometry x="-110" y="280" width="890" height="480" as="geometry" />
  9 |         </mxCell>
 10 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-52" value="" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 11 |           <mxGeometry x="310" y="320" width="450" height="420" as="geometry" />
 12 |         </mxCell>
 13 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-60" value="" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 14 |           <mxGeometry x="320" y="470" width="420" height="260" as="geometry" />
 15 |         </mxCell>
 16 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-39" value="" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 17 |           <mxGeometry x="-40" y="554" width="250" height="178" as="geometry" />
 18 |         </mxCell>
 19 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-72" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZpG1sT2TVCUBfAVEROlj-36" target="ZpG1sT2TVCUBfAVEROlj-31">
 20 |           <mxGeometry relative="1" as="geometry" />
 21 |         </mxCell>
 22 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-74" value="Refresh cache&lt;br&gt;(every 2 mins)" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="ZpG1sT2TVCUBfAVEROlj-72">
 23 |           <mxGeometry x="-0.3402" y="-1" relative="1" as="geometry">
 24 |             <mxPoint x="10" y="-1" as="offset" />
 25 |           </mxGeometry>
 26 |         </mxCell>
 27 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-36" value="" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 28 |           <mxGeometry x="399" y="508" width="161" height="152" as="geometry" />
 29 |         </mxCell>
 30 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-26" value="rhods-dashboard-{id}" style="shape=image;verticalLabelPosition=middle;labelBackgroundColor=default;verticalAlign=middle;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/labeled/pod.svg;labelPosition=right;align=left;" vertex="1" parent="1">
 31 |           <mxGeometry x="380" y="490" width="47.39" height="46" as="geometry" />
 32 |         </mxCell>
 33 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-31" value="OdhDashboardApplication&#xa;(odh-dashboard-config)" style="shape=image;verticalLabelPosition=bottom;labelBackgroundColor=default;verticalAlign=top;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/unlabeled/crd.svg;" vertex="1" parent="1">
 34 |           <mxGeometry x="618" y="612" width="68" height="66" as="geometry" />
 35 |         </mxCell>
 36 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-34" value="OdhDashboardApplication&#xa;(cached)" style="shape=image;verticalLabelPosition=bottom;labelBackgroundColor=default;verticalAlign=top;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/unlabeled/crd.svg;" vertex="1" parent="1">
 37 |           <mxGeometry x="444" y="546" width="68" height="66" as="geometry" />
 38 |         </mxCell>
 39 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-38" value="client browser" style="image;aspect=fixed;perimeter=ellipsePerimeter;html=1;align=center;shadow=0;dashed=0;spacingTop=3;image=img/lib/active_directory/laptop_client.svg;" vertex="1" parent="1">
 40 |           <mxGeometry x="-70" y="524" width="54.5" height="60.56" as="geometry" />
 41 |         </mxCell>
 42 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-57" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="1" source="ZpG1sT2TVCUBfAVEROlj-41" target="ZpG1sT2TVCUBfAVEROlj-34">
 43 |           <mxGeometry relative="1" as="geometry" />
 44 |         </mxCell>
 45 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-58" value="Calls pod and&lt;div&gt;&lt;div&gt;gets cached value&lt;/div&gt;&lt;/div&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;" vertex="1" connectable="0" parent="ZpG1sT2TVCUBfAVEROlj-57">
 46 |           <mxGeometry x="-0.2917" y="1" relative="1" as="geometry">
 47 |             <mxPoint x="7" y="1" as="offset" />
 48 |           </mxGeometry>
 49 |         </mxCell>
 50 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-67" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1">
 51 |           <mxGeometry relative="1" as="geometry">
 52 |             <mxPoint x="90" y="574" as="sourcePoint" />
 53 |             <mxPoint x="550" y="382.9999999999999" as="targetPoint" />
 54 |             <Array as="points">
 55 |               <mxPoint x="90" y="383" />
 56 |             </Array>
 57 |           </mxGeometry>
 58 |         </mxCell>
 59 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-71" value="Gets various&amp;nbsp;&lt;div&gt;stack information&lt;/div&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="ZpG1sT2TVCUBfAVEROlj-67">
 60 |           <mxGeometry x="-0.6311" y="-4" relative="1" as="geometry">
 61 |             <mxPoint as="offset" />
 62 |           </mxGeometry>
 63 |         </mxCell>
 64 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-41" value="&quot;Areas&quot;" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 65 |           <mxGeometry x="30" y="564" width="120" height="30" as="geometry" />
 66 |         </mxCell>
 67 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-47" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;curved=1;" edge="1" parent="1" source="ZpG1sT2TVCUBfAVEROlj-42" target="ZpG1sT2TVCUBfAVEROlj-41">
 68 |           <mxGeometry relative="1" as="geometry">
 69 |             <Array as="points">
 70 |               <mxPoint x="90" y="644" />
 71 |             </Array>
 72 |           </mxGeometry>
 73 |         </mxCell>
 74 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-42" value="Feature&lt;br&gt;A" style="rhombus;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 75 |           <mxGeometry x="-30" y="644" width="80" height="80" as="geometry" />
 76 |         </mxCell>
 77 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-48" style="rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;edgeStyle=orthogonalEdgeStyle;curved=1;" edge="1" parent="1" source="ZpG1sT2TVCUBfAVEROlj-43" target="ZpG1sT2TVCUBfAVEROlj-41">
 78 |           <mxGeometry relative="1" as="geometry" />
 79 |         </mxCell>
 80 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-43" value="Feature&lt;br&gt;B" style="rhombus;whiteSpace=wrap;html=1;" vertex="1" parent="1">
 81 |           <mxGeometry x="60" y="644" width="80" height="80" as="geometry" />
 82 |         </mxCell>
 83 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-49" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;curved=1;" edge="1" parent="1" source="ZpG1sT2TVCUBfAVEROlj-44" target="ZpG1sT2TVCUBfAVEROlj-41">
 84 |           <mxGeometry relative="1" as="geometry">
 85 |             <mxPoint x="100" y="624" as="targetPoint" />
 86 |             <Array as="points">
 87 |               <mxPoint x="180" y="624" />
 88 |               <mxPoint x="90" y="624" />
 89 |             </Array>
 90 |           </mxGeometry>
 91 |         </mxCell>
 92 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-51" value="Am I Enabled?" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="ZpG1sT2TVCUBfAVEROlj-49">
 93 |           <mxGeometry x="0.1667" y="1" relative="1" as="geometry">
 94 |             <mxPoint x="-36" as="offset" />
 95 |           </mxGeometry>
 96 |         </mxCell>
 97 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-44" value="Feature&lt;br style=&quot;font-size: 6px;&quot;&gt;n" style="rhombus;whiteSpace=wrap;html=1;fontSize=6;" vertex="1" parent="1">
 98 |           <mxGeometry x="160" y="664" width="40" height="40" as="geometry" />
 99 |         </mxCell>
100 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-45" value="..." style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontStyle=1;fontSize=17;" vertex="1" parent="1">
101 |           <mxGeometry x="120" y="669" width="60" height="30" as="geometry" />
102 |         </mxCell>
103 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-59" value="redhat-ods-applications" style="shape=image;verticalLabelPosition=middle;labelBackgroundColor=default;verticalAlign=middle;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/labeled/ns.svg;labelPosition=right;align=left;" vertex="1" parent="1">
104 |           <mxGeometry x="320" y="444" width="47.39" height="46" as="geometry" />
105 |         </mxCell>
106 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-65" value="" style="shape=image;verticalLabelPosition=bottom;labelBackgroundColor=default;verticalAlign=top;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/unlabeled/crd.svg;" vertex="1" parent="1">
107 |           <mxGeometry x="550" y="340" width="68" height="66" as="geometry" />
108 |         </mxCell>
109 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-69" value="DSC &amp; DSCI&#xa;(cluster CRs)" style="shape=image;verticalLabelPosition=bottom;labelBackgroundColor=default;verticalAlign=top;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/unlabeled/crd.svg;" vertex="1" parent="1">
110 |           <mxGeometry x="560" y="350" width="68" height="66" as="geometry" />
111 |         </mxCell>
112 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-70" value="K8s Cluster" style="shape=image;verticalLabelPosition=middle;labelBackgroundColor=default;verticalAlign=middle;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/infrastructure_components/unlabeled/control-plane.svg;labelPosition=right;align=left;" vertex="1" parent="1">
113 |           <mxGeometry x="288" y="290" width="68" height="66" as="geometry" />
114 |         </mxCell>
115 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-76" value="2" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
116 |           <mxGeometry x="140" y="612" width="30" height="30" as="geometry" />
117 |         </mxCell>
118 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-77" value="1b" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
119 |           <mxGeometry x="-90.00000000000001" y="516" width="30" height="30" as="geometry" />
120 |         </mxCell>
121 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-79" value="3a" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
122 |           <mxGeometry x="110" y="386" width="30" height="30" as="geometry" />
123 |         </mxCell>
124 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-85" value="3b" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
125 |           <mxGeometry x="250" y="534" width="30" height="30" as="geometry" />
126 |         </mxCell>
127 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-86" value="1c" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
128 |           <mxGeometry x="350" y="508" width="30" height="30" as="geometry" />
129 |         </mxCell>
130 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-88" value="4" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
131 |           <mxGeometry x="640" y="539.28" width="30" height="30" as="geometry" />
132 |         </mxCell>
133 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-89" value="Dashboard&#xa;Route" style="shape=image;verticalLabelPosition=bottom;labelBackgroundColor=default;verticalAlign=top;aspect=fixed;imageAspect=0;image=https://raw.githubusercontent.com/kubernetes/community/0a485eaaa96a1e194b056cdaa2261f8f371d5891/icons/svg/resources/unlabeled/svc.svg;" vertex="1" parent="1">
134 |           <mxGeometry x="338.2099999999999" y="630" width="53.58" height="52" as="geometry" />
135 |         </mxCell>
136 |         <mxCell id="ZpG1sT2TVCUBfAVEROlj-91" value="1a" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fontSize=16;fontStyle=1" vertex="1" parent="1">
137 |           <mxGeometry x="399" y="682" width="30" height="30" as="geometry" />
138 |         </mxCell>
139 |       </root>
140 |     </mxGraphModel>
141 |   </diagram>
142 | </mxfile>
143 | 


--------------------------------------------------------------------------------
/documentation/components/dashboard/assets/featureFlags.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/dashboard/assets/featureFlags.png


--------------------------------------------------------------------------------
/documentation/components/dashboard/configuringDashboard.md:
--------------------------------------------------------------------------------
  1 | [OdhDashboardConfig]: ./README.md#odhdashboardconfig-singleton
  2 | 
  3 | # Configuring the Dashboard
  4 | 
  5 | * [The Big Picture](#the-big-picture)
  6 | * [Configuring Features On/Off](#configuring-features-onoff)
  7 |   * [UI-K8s Features](#ui-k8s-feature-eg-ds-projects-feature)
  8 |   * [UI-Backend Component Features](#ui-backend-component-feature-eg-ds-pipelines-feature)
  9 | * [Configuring Aspects of Features](#configuring-aspects-of-features)
 10 | 
 11 | ## The Big Picture
 12 | 
 13 | ![featureFlags.png](assets%2FfeatureFlags.png)
 14 | 
 15 | - (1) The user accesses the dashboard from their computer
 16 |   - (1a) They'll access the Dashboard route url
 17 |   - (1b) The client is served resources specifically from one pod (irrespective of replica counts)
 18 |   - (1c) This pod for all intensive purposes is the only thing the client sees & operates with
 19 | - (2) All features use our "areas" concept (explained in more detail [below](#configuring-features-onoff))
 20 | - (3) "Areas" fetches all needed settings from the cluster
 21 |   - (3a) From the DSC & DSCI we get only `.status` values (this is typically only for features backed by a [OpenShift AI served backend](#ui-backend-component-feature-eg-ds-pipelines-feature))
 22 |   - (3b) From our own OdhDashboardConfig, we will get our feature flags
 23 | - (4) Every 2 minutes, each pod refreshes the internal cached state of the DashboardConfig (this delays any changes you do in this area)
 24 | 
 25 | ## Configuring Features On/Off
 26 | 
 27 | The Dashboard uses a concept we call "areas" to control the flow of features being visible in the Dashboard UI. It effectively is a 2-flag system around every feature we add.
 28 | 
 29 | 1. A flag for the API installation
 30 | 2. A flag for the UI installation
 31 |     * You may want to disable a UI flow, and interact via an API-driven flow, so you want the backend, but not the frontend
 32 | 
 33 | "Areas" are a combination of system settings which can be simplified to these questions:
 34 | 
 35 | * Does the Operator, specifically the DataScienceCluster (DSC), have the particular backend component installed?
 36 | * Does the [OdhDashboardConfig] have the feature flag enabled?
 37 | * Are other areas reliant (a foundation) for this feature & are they installed?
 38 | 
 39 | If the "area" is configured with any combination of the 3 questions above, that combination must all be true for the feature to be visible in the UI.
 40 | 
 41 | A couple examples are visible below, using snippets of the Dashboard configuration.
 42 | 
 43 | ### UI-K8s Feature (eg. DS Projects feature)
 44 | 
 45 | Our configuration would look something like this:
 46 | ```javascript
 47 | const configurations = {
 48 |   [SupportedArea.DS_PROJECTS_VIEW]: {
 49 |     featureFlags: ['disableProjects'],
 50 |   },
 51 |   [SupportedArea.DS_PROJECTS_PERMISSIONS]: {
 52 |     featureFlags: ['disableProjectSharing'],
 53 |     reliantAreas: [SupportedArea.DS_PROJECTS_VIEW],
 54 |   },
 55 |   // ...
 56 | }
 57 | ```
 58 | 
 59 | * The block above simply says "our feature flag `disableProjects` needs to be enabled to show the DS Projects View (the navigation based view / list page"
 60 | * The second configuration `DS_PROJECTS_PERMISSIONS` allows us to configure a sub-portion of the DS Projects feature that you can disable our Project sharing feature (if you're an admin of a project, you can share your project with another user / group on the cluster; aka invite the to join your project) -- this feature is reliant on the Project View being visible (as it's a sub feature) -- but this allows us to
 61 | 
 62 | As you can see, there are no backend components listed here. This is because this feature is built on K8s backend itself -- we are interacting with the K8s resources without needing a dedicated OpenShift AI backend.
 63 | 
 64 | Note: this would effectively mean there is only active 1 layer of our 2-layered flag system for features like this.
 65 | 
 66 | > Tip: To know what features are designed this way, consider the output of each feature. eg. [Creating a DS Project is just a OpenShift Project](#projects---openshift-console-vs-data-science-differences), Creating Custom Images are just ImageStreams on the cluster, etc
 67 | 
 68 | ### UI-Backend Component Feature (eg. DS Pipelines feature)
 69 | 
 70 | Our configuration would look something like this:
 71 | ```javascript
 72 | const configurations = {
 73 |   [SupportedArea.DS_PIPELINES]: {
 74 |     featureFlags: ['disablePipelines'],
 75 |     requiredComponents: [StackComponent.DS_PIPELINES],
 76 |   },
 77 |   // ...
 78 | }
 79 | ```
 80 | 
 81 | * Similar to DS Projects, we have a feature flag
 82 | * But now we see `requiredComponents`, this indicates the need of the DSC to provide a successful install of this component otherwise the Dashboard feature flag means nothing -- no backend, can't use the UI anyways
 83 | 
 84 | > Tip: To know what features are designed this way, the simple question is... is the feature it in the DSC?
 85 | 
 86 | ## Configuring Aspects of Features
 87 | 
 88 | Many features have various different configurations that give them the ability to be slightly shifted to fit the needs of the customer; these usually are small 'nudges' in the way a feature works -- dropdown values, ordering of importance, display information. These aspects are almost exclusively UI flavouring and not something for the OpenShift AI stack to consume.
 89 | 
 90 | See the comprehensive list below for Dashboard-specific aspects.
 91 | 
 92 | > Note: These are not a comprehensive list of what each backend feature can do, just what we configure in the Dashboard for our needs.
 93 | 
 94 | > Note: For this list, each item will be flagged with **(UI)** or **(API)**. UI features have a UI flow and can be configured through API as needed (aka outside of the UI). API features are not able to be configured inside the UI and can only be configured outside of the UI.
 95 | 
 96 | * Workbench Container Sizes **(API)**
 97 |   > Visible during creation of a Workbench & Jupyter tile's Notebooks
 98 |     * Configured through the [OdhDashboardConfig] `.spec.notebookSizes` an array of resource objects (memory & cpu limits/requests)
 99 |     * We have a fallback default if not provided
100 | * Model Serving Container Sizes **(API)**
101 |   > Visible during the creation of a Model Server (or KServe model)
102 |     * Configured through the [OdhDashboardConfig] `.spec.modelServingSizes` an array of resource objects (memory & cpu limits/requests)
103 |     * We have a fallback default if not provided
104 | * Jupyter Tile configurations **(UI)**
105 |     * PVC Size through the [OdhDashboardConfig] `spec.notebookController.pvcSize`
106 | * Telemetry
107 | 


--------------------------------------------------------------------------------
/documentation/components/dashboard/dashboardStorage.md:
--------------------------------------------------------------------------------
 1 | [Workbench component documentation]: ../workbenches
 2 | [AcceleratorProfiles]: ./README.md#acceleratorprofiles
 3 | 
 4 | # Dashboard Storage Mechanisms
 5 | 
 6 | There are only two types of storages we have in the Dashboard. Local to the user's browser & on-cluster storage.
 7 | 
 8 | * [Browser Storage](#browser-storage)
 9 | * [On-Cluster Storage](#on-cluster-storage)
10 |   * [Admin / Dashboard Configurations](#admin--dashboard-configurations)
11 |   * [Non-Admin flows](#non-admin-flows)
12 | 
13 | ## Browser Storage
14 | 
15 | User-specific choices are currently stored in the browser's storages (local & session storages).
16 | 
17 | Such as:
18 | * "remember my choice" settings
19 |     * Stop notebook on toggle modal not showing up
20 |     * Remember to open Jupyter tile Notebooks in new tab without asking
21 | * Active QuickStart
22 | * Some technical infrastructure around detecting token expiry and auto-handling an auto logout
23 | 
24 | ## On-Cluster Storage
25 | 
26 | ### Admin / Dashboard Configurations
27 | 
28 | Features that impact all users or the Dashboard itself. These are only available to those considered admin.
29 | 
30 | * Cluster Settings
31 |     * Such as:
32 |         * Model serving platforms
33 |         * PVC size (Notebook tile only)
34 |         * Notebook pod tolerations
35 |         * Telemetry
36 |     * are stored in the [OdhDashboardConfig]
37 | * Cluster Settings' Notebook Culler
38 |     * This is configured by the Notebook Controller feature
39 |     * Stored today as `notebook-controller-culler-config` ConfigMap in the deployment namespace (see [Workbench component documentation] for more information)
40 | * Accelerator profiles
41 |     * These are stored as [AcceleratorProfiles]
42 | * Notebook images
43 |     * These are stored as ImageStreams in the deployment namespace
44 | * Serving Runtimes
45 |     * These are stored as OpenShift Templates in the deployment namespace
46 |       > Note: OpenShift Templates was an idea of future feature expansion and are not executed as OpenShift Templates today.
47 | * Connection Types
48 |     * These are stored as ConfigMaps in the deployment namespace
49 | 
50 | ### Non-Admin Flows
51 | 
52 | Flows that can be performed by any user, provided the [feature is enabled](./configuringDashboard.md#configuring-features-onoff).
53 | 
54 | These are all stored as K8s resources using OCP or OpenShift AI backing CRDs.
55 | 
56 | #### Connections
57 | 
58 | Connections is a concept created by the Dashboard to store information and enable users to connect to various data sources. This information is stored in a K8s secret. The data within these secrets conform to the schema defined within connection types. Connection types are predefined OOTB and can also be defined by an admin.
59 | 
60 | ```yaml
61 | kind: Secret
62 | apiVersion: v1
63 | metadata:
64 |   name: aws-connection-<user-inputted-value>
65 |   namespace: <users-namespace>
66 |   labels:
67 |     opendatahub.io/dashboard: 'true'
68 |     opendatahub.io/managed: 'true'
69 |   annotations:
70 |     opendatahub.io/connection-type: s3
71 |     openshift.io/display-name: <users-driven-display-name>
72 | data:
73 |   AWS_ACCESS_KEY_ID: <value>
74 |   AWS_DEFAULT_REGION: <value>
75 |   AWS_S3_BUCKET: <value>
76 |   AWS_S3_ENDPOINT: <value>
77 |   AWS_SECRET_ACCESS_KEY: <value>
78 | type: Opaque
79 | ```
80 | 
81 | See more information on the labels & annotations in the [Connection section of the K8s Labels & Annotations](./k8sLabelsAndAnnotations.md#connections)
82 | 
83 | See more information on how the Connection feature works by reading more in the [Feature Connections section](./features/connections.md).
84 | 


--------------------------------------------------------------------------------
/documentation/components/dashboard/features/README.md:
--------------------------------------------------------------------------------
 1 | # Features In-Depth
 2 | 
 3 | This area is intended to shed some light on how Dashboard features work. We don't typically have a lot of features that do not rely on some backend team, but for those that we do, questions are often asked about "how do they work".
 4 | 
 5 | It is worth noting, some of this information will talk about other backend features when appropriate. This will not be an in-depth knowledge on how _those_ features work. Just how Dashboard interacts with them.
 6 | 
 7 | ## Features
 8 | 
 9 | 1. [Data Connections - Connection Types - Connections](./connections.md)
10 | 
11 | Additional specific feature details coming soon...
12 | 
13 | <!-- TODO: Model Serving Tokens -->
14 | <!-- TODO: Jupyter Tile & Notebook Namespace (ex. rhods-notebooks) -->
15 | 


--------------------------------------------------------------------------------
/documentation/components/dashboard/features/connections.md:
--------------------------------------------------------------------------------
  1 | # Connections
  2 | 
  3 | > Introduced in 2.16 -- Encompasses & replaces Data Connections
  4 | 
  5 | * [Introduction](#introduction)
  6 | * [Connection Types & Corresponding Connections](#connection-types--corresponding-connections)
  7 |    * [How Editing Impacts Things](#how-editing-impacts-things)
  8 | * [Out of the Box (ootb) Offerings](#out-of-the-box-ootb-offerings)
  9 | * [Connectivity](#connectivity)
 10 |   * [Workbench Connections](#workbench-connections)
 11 |   * [Model Serving Connections](#model-serving-connections)
 12 | 
 13 | ## Introduction
 14 | 
 15 | There are three terms we are working with here:
 16 | 
 17 | * **Data Connections** -- The "old" (no longer used) term. This indicated the S3-compatible Data Connection that we had prior to 2.16; moving forward these are _Connections_ based on the S3 _Connection Type_ (which comes [ootb](#out-of-the-box-ootb-offerings))
 18 | * **Connection Types** -- These are like templates for new _Connections_. These are crafted & managed by the RHOAI admins and stored inside the deployment namespace; some come [ootb](#out-of-the-box-ootb-offerings)
 19 | * **Connections** -- These are the instances inside a Data Science Project that can be connected to Workbenches & Model Serving Models, they are always based off a _Connection Type_
 20 | 
 21 | ## Connection Types & Corresponding Connections
 22 | 
 23 | Connection Types are a form-driven way of adding a structured object that details the fields and structure of a Connection interface. These provide the RHOAI admin with some flexibility to how to structure Connections for their users.
 24 | 
 25 | Connection Types are a configmap in the deployment namespace. They are managed in the Admin Settings page. There is a preview button to see what the Connection will look like built into the form builder and should allow good coordination on how it will work for users.
 26 | 
 27 | Connections are project-based and always built off of one of the Connection Types that are accessible (created and enabled) to the user at time of creation. They are saved as Secrets inside the project.
 28 | 
 29 | Connection types are of this structure:
 30 | ```typescript
 31 | type ConnectionTypeConfigMap = K8sConfigMapResource & {
 32 |   metadata: {
 33 |     annotations?: DisplayNameAnnotations & {
 34 |       'opendatahub.io/disabled'?: 'true' | 'false';
 35 |       'opendatahub.io/username'?: string;
 36 |     };
 37 |     labels: DashboardLabels & {
 38 |       'opendatahub.io/connection-type': 'true';
 39 |     };
 40 |   };
 41 |   data?: {
 42 |     category?: string;
 43 |     // JSON array of ConnectionTypeFields
 44 |     fields?: string;
 45 |   };
 46 | };
 47 | ```
 48 | 
 49 | Each `ConnectionTypeField` is a configuration of a type of field. Read more about the [Dashboard Labels & Annotations over here](../k8sLabelsAndAnnotations.md).
 50 | 
 51 | The supported field types today are:
 52 | * BooleanField
 53 | * DropdownField
 54 | * FileField
 55 | * HiddenField
 56 | * NumericField
 57 | * SectionField
 58 | * ShortTextField
 59 | * TextField
 60 | * UriField
 61 | 
 62 | > Note: Each of these have fields to configure read-only, required, and varying configurations based on the type. There is quite a bit of variability here, so the details can be added if that kind of granularity is needed.
 63 | 
 64 | ### How Editing Impacts Things
 65 | 
 66 | Editing an existing connection attempts to re-present the same look and feel at the time of creation with a few exceptions:
 67 | 
 68 | * If the Connection Type has since been modified
 69 |   * & new fields added
 70 |     * Then the new fields will be accessible
 71 |     * There should be limited impact, with exceptions
 72 |       * If the new fields are required, they will prevent users from resaving changes in their Connections until they are updated with new values
 73 |       * If the new fields have defaults, it will require the users to edit and resave the Connections
 74 |   * & existing fields removed
 75 |     * If they were not used, it will be pretty seamless to the form experience
 76 |     * If they were used, the field will be marked with very little information [1] & will just be the environment variable name to its value
 77 |   * & changes an existing field's type
 78 |     * The field should remain as-is in the old type/value until otherwise modified
 79 | * If the Connection Type has since been deleted
 80 |   * The Connection edit screen will have very little information [1] and will entirely just be a listing of environment variable names to their value
 81 | 
 82 | > [1] All the metadata comes from the Connection Type not the Connection itself; metadata such as: section information, the display name of the field, defaults, readonly, required, etc
 83 | 
 84 | It is worth noting that if the Connection Type is just disabled, we will still pull the configuration details during edits, but it cannot be used in future creations until it is re-enabled. This gives an avenue to use this functionality as a way to version existing Connection Types without encountering issues with existing Connections.
 85 | 
 86 | ## Out of the Box (ootb) Offerings
 87 | 
 88 | Admins can disable this offering and/or duplicate it to provide defaults or read-only aspects to help their users with some information.
 89 | 
 90 | > Note: It is important to note that since the Connection Types are stored as ConfigMaps, passwords and other credential information are exposed in plain text if stored in the Connection Type. We do not recommend storing this kind of information in Connection Types at this time.
 91 | 
 92 | #### S3 compatible object storage - v1
 93 | To help with existing usages before the upgrade 2.16, we naturally continue to have support for S3. 
 94 | 
 95 | #### URI - v1
 96 | We also provide a URI ootb variant to help with connecting public [2] URL models to model serving.
 97 | 
 98 | > [2] At this time, private connections are not supported
 99 | 
100 | #### OCI compliant registry - v1
101 | With the OCI compliant registry ootb connection type, users are able to connect to a private container registry by providing a pull secret. Due to the presence of the `.dockerconfigjson` env variable, the created connection becomes a secret of type `kubernetes.io/dockerconfigjson` which can be used the same way traditional pull secrets are used in kubernetes, with the additional fields from the connection type.
102 | 
103 | To connect to a public container registry, a user can use the "URI - v1" connection type and provide the URI to the image tag and prepending it with `oci://`
104 | 
105 | ## Connectivity
106 | 
107 | Connections don't do a lot by themselves; they effectively store configurations about how to connect to another source [3]. They reside inside projects and can connect to a few other resources that share the same project. Each interacts slightly different, so lets cover what those scenario details are.
108 | 
109 | > [3] Technically speaking, a Connection Type can template any type of information which does not need to reflect relationships with another storage. They can store reusable variable values so you can share them with multiple Workbenches -- [more details below](#workbench-connections).
110 | 
111 | ### Workbench Connections
112 | 
113 | Workbenches are by far the most flexible of Connection consumers. All Connections are connected via `envFrom` (see below for an example), which injects all the keys of the Secret as environment variables. Consumption of the data can be done through the Workbenches' standard access to the environment variables (in Python that's `os.environ["ENV_NAME_HERE"]`).
114 | 
115 | Under the hood -- the connectivity between a Connection and a Workbench exists as such:
116 | 
117 | ```yaml
118 | apiVersion: kubeflow.org/v1
119 | kind: Notebook
120 | metadata:
121 |   name: example-workbench
122 |   # ...other properties
123 | spec:
124 |   template:
125 |     spec:
126 |       # ...other properties
127 |       containers:
128 |         - name: the-notebook-container
129 |           # ...other properties
130 |           envFrom:
131 |             - secretRef:
132 |                 name: my-s3-connection
133 |             - secretRef:
134 |                 name: my-uri-connection
135 | ```
136 | 
137 | The `my-s3-connection` (using the ootb S3 Connection Type) & `my-uri-connection` (using the ootb URI Connection Type) are connected via the `envFrom` section on the notebook container. Since all Connections are secrets & are injected the same way as environment variables, it will always be mounted from `envFrom.secretRef.name` for each Connection irrespective of their structure.
138 | 
139 | > Note: It is important to note that since they are injected as environment variables, two Connections sharing the same variable will clobber each other and the "last one" wins. The UI will note this concern when you have two Connections overlapping.
140 | 
141 | ### Model Serving Connections
142 | 
143 | > Note: Due to the complexities of how Connections integrate with Model Serving, limited use-cases are available to Model Serving.
144 | 
145 | Essentially we only have support for these types:
146 | * [S3-compatible](#s3-compatible-connection)
147 | * [URI](#uri-connection)
148 | * [OCI model cars](#oci-model-cars-connection)
149 | 
150 | > Note: At this time there is not much else that can be done as it requires specific integration logic in order to connect a specific set of fields from the Connection to align it with the implementation of the Serving feature (KServe, Model Mesh, etc).
151 | 
152 | #### S3-compatible Connection
153 | 
154 | > Pulling a model from an S3-compatible Bucket
155 | 
156 | Like _Data Connections_ in the previous world, these operate identically through the storage property.
157 | 
158 | ```yaml
159 | apiVersion: serving.kserve.io/v1beta1
160 | kind: InferenceService
161 | metadata:
162 |   name: model-example-using-s3
163 |   # ...other properties
164 | spec:
165 |   predictor:
166 |     # ...other properties
167 |     model:
168 |       # ...other properties
169 |       storage:
170 |         key: my-s3-connection
171 |         path: the/path/in/my/bucket
172 | ```
173 | 
174 | The `storage.key` is the Connection Secret. Note the `path` value is still used to qualify where in your S3 Connection bucket the model will be.
175 | 
176 | #### URI Connection
177 | 
178 | > Pulling a model from a public URI
179 | 
180 | A new feature with the initial release of the Connection Types.
181 | 
182 | ```yaml
183 | apiVersion: serving.kserve.io/v1beta1
184 | kind: InferenceService
185 | metadata:
186 |   name: model-example-using-uri
187 |   # ...other properties
188 | spec:
189 |   predictor:
190 |     # ...other properties
191 |     model:
192 |       # ...other properties
193 |       storageUri: 'https://the-url-to-my-model.com/path'
194 | ```
195 | 
196 | The `storageUri` path is queried for the model and installed into the pod that is associated to your deployment.
197 | 
198 | > Note: The `storageUri` field is an overloaded one in the KServe documentation and can have wider implications for usage. Anything that 
199 | 
200 | #### OCI Model Cars Connection
201 | 
202 | > Pulling a model from an authenticated OCI container registry
203 | 
204 | OCI is only supported on KServe single model serving deployments. Additionally, the image must be in a Modelcar[^Modelcar] format specified by KServe.
205 | 
206 | ```yaml
207 | apiVersion: serving.kserve.io/v1beta1
208 | kind: InferenceService
209 | metadata:
210 |   name: model-example-using-oci
211 |   # ...other properties
212 | spec:
213 |   predictor:
214 |     imagePullSecrets:
215 |       - name: oci-connection
216 |     # ...other properties
217 |     model:
218 |       # ...other properties
219 |       storageUri: 'oci://quay.io/someregistry/image:tag'
220 | ```
221 | 
222 | The `imagePullSecrets` points to the OCI connection.
223 | 
224 | The `storageUri` path starts with `oci://` and points to an image.
225 | 
226 | [^Modelcar]: https://kserve.github.io/website/latest/modelserving/storage/oci/#prepare-an-oci-image-with-model-data


--------------------------------------------------------------------------------
/documentation/components/dashboard/k8sLabelsAndAnnotations.md:
--------------------------------------------------------------------------------
  1 | [AcceleratorProfile]: ./README.md#acceleratorprofiles
  2 | 
  3 | [`openshift.io/display-name`]: #openshiftiodisplay-name
  4 | [`openshift.io/description`]: #openshiftiodescription
  5 | [`opendatahub.io/recommended-accelerators`]: #opendatahubiorecommended-accelerators
  6 | [`opendatahub.io/accelerator-name`]: #opendatahubioaccelerator-name
  7 | [`opendatahub.io/sc-config`]: #opendatahubiosc-config
  8 | 
  9 | # Dashboard K8s Labels & Annotations
 10 | 
 11 | Dashboard has a reputation of using a lot of annotations and labels on various resources. This document should help serve to explain the use-cases behind each.
 12 | 
 13 | > Note: Not all resources shown in the Dashboard are K8s driven resources. For those that are not, this page does not have any impact on them.
 14 | 
 15 | > Note: This is not a comprehensive list of all labels & annotations used in OpenShift AI, just the ones managed and created by the Dashboard. Specific components may have ever-changing needs, so you should seek out those component's documentation for more information. 
 16 | 
 17 | * [Labels](#common-labels)
 18 |   * [`opendatahub.io/dashboard`](#opendatahubiodashboard)
 19 | * [Annotations](#common-annotations)
 20 |   * [`openshift.io/display-name`]
 21 |   * [`openshift.io/description`]
 22 |   * [`opendatahub.io/recommended-accelerators`]
 23 |   * [`opendatahub.io/accelerator-name`]
 24 |   * [`opendatahub.io/sc-config`]
 25 | * [Specific Use-Cases](#specific-use-cases)
 26 |   * [DS Projects](#data-science-projects)
 27 |   * [Connection Types](#connection-types)
 28 |   * [Connections](#connections)
 29 |   * [ImageStreams](#imagestreams)
 30 |   * [Notebooks](#notebooks)
 31 |   * [ServingRuntime Templates](#servingruntime-templates)
 32 |   * [Storage Classes](#storage-classes)
 33 |   * [Model Registry](#model-registry)
 34 | 
 35 | ## Common Labels
 36 | 
 37 | Common reused labels in the Dashboard. Key features of labels:
 38 | 
 39 | * Is able to be used as a filter in a k8s request
 40 | * Must be a restrictive k8s naming structure
 41 | 
 42 | ### opendatahub.io/dashboard
 43 | 
 44 | The most common dashboard label. The initial goal here was to mark all things created by the Dashboard, so we could reverse lookup said resources. This has proven to be a bit over aggressive, adding friction in customers making use of external of the Dashboard flows work with Dashboard flows (eg. gitops).
 45 | 
 46 | This is a highly contentious label and will be seeing changes in the near future.
 47 | 
 48 | > Note: This concept is deprecated for DS Projects, it is adding no value and is adding confusion to the concept of ["what is a DS Project?"](./README.md#projects---openshift-console-vs-data-science-differences)
 49 | 
 50 | > Note: This concept is not entirely deprecated for some resources that have multiple uses, including those outside of OpenShift AI. But all OpenShift AI CRDs should not need this soon.
 51 | 
 52 | ## Common Annotations
 53 | 
 54 | Common reused annotations in the Dashboard. Key features of annotations
 55 | 
 56 | * Can be a flexible field to be used for complex metadata or flexible usage of characters that are not K8s-safe (vs Labels)
 57 | 
 58 | ### openshift.io/display-name
 59 | 
 60 | Used heavily by Dashboard UI flows to allow OpenShift AI users to craft a readable & flexible name. 
 61 | 
 62 | > Note: This is optional, so we fall back on the resource's k8s name. 
 63 | 
 64 | ### openshift.io/description
 65 | 
 66 | Used almost as heavily as the display-name annotation. This allows for a description of the resource the user is creating. This usually is shown next to the display-name once the resource is created.
 67 | 
 68 | > Note: Some resources do not have the use for this, but that's more of an oversight than an intent.
 69 | 
 70 | > Note: This annotation is not required, nor tied to the use of the display-name annotation.
 71 | 
 72 | ### opendatahub.io/recommended-accelerators
 73 | 
 74 | > Type: string array of [AcceleratorProfile] k8s names.
 75 | 
 76 | This annotation is what we use to suggest a recommended connection between a resource's usage by the user & an accelerator profile created on the cluster. This appears as a tag next to the accelerator dropdown item in the UI.
 77 | 
 78 | ### opendatahub.io/accelerator-name
 79 | 
 80 | > Type: a string value of the [AcceleratorProfile] k8s name.
 81 | 
 82 | This annotation is what we use to relate back to an accelerator profile. This is metadata to help with reselection of the right accelerator profile in read & edit modes. This is needed as a way to convert back from the resource values used to the proper profile the user selected. We have a fallback for legacy support for Nvidia GPU, but everything else will fail to locate a profile and show an intermediary custom profile that cannot be mutated in edit modes.
 83 | 
 84 | If no accelerator was selected, this value should not appear.
 85 | 
 86 | ### opendatahub.io/sc-config
 87 | 
 88 | > Type: object
 89 | ```js
 90 | {
 91 |     displayName: string;
 92 |     isEnabled: boolean;
 93 |     isDefault: boolean;
 94 |     lastModified: string;
 95 |     description?: string;
 96 | }
 97 | ```
 98 | 
 99 | This annotation is used as internal Dashboard metadata to describe, enable, and set which storage class is the default. This annotation does not affect Openshift default storage classes.
100 | 
101 | ## Specific Use-Cases
102 | 
103 | ### Data Science Projects
104 | 
105 | * Labels
106 |   * `modelmesh-enabled` - required by Model Mesh to say the project is using model mesh configurations
107 |     > Note: When this is `true`, the project is Model Mesh. When this is `false`, we key off it to say this project is KServe
108 | * Annotations
109 |   * [`openshift.io/display-name`]
110 |   * [`openshift.io/description`]
111 | 
112 | For the Project Sharing feature specifically:
113 | * Label `opendatahub.io/project-sharing` is used to denote permissions crafted by Dashboard flows & thus show up in the Dashboard UI
114 | 
115 | ### Connection Types
116 | 
117 | * Labels
118 |   * `opendatahub.io/connection-type` - a value of `true` indicates that the `ConfigMap` represents a connection type
119 | * Annotations
120 |   * [`openshift.io/display-name`]
121 |   * [`openshift.io/description`]
122 |   * `opendatahub.io/disabled` - a `true` or `false` value indicates whether the connection type is disabled
123 |   * `opendatahub.io/username` - the name of the user who created the connection type
124 | 
125 | ### Connections
126 | 
127 | * Labels
128 |   * `opendatahub.io/managed` - Legacy value. Identifies data connections which are watched by the model mesh controller for the purpose of populating the model serving `storage-config`
129 | * Annotations
130 |   * [`openshift.io/display-name`]
131 |   * [`openshift.io/description`]
132 |   * `opendatahub.io/connection-type` - Legacy value. Used to identify S3-compatible data connections; `s3` is the only supported value
133 |   * `opendatahub.io/connection-type-ref` - a reference to the connection type that is used to create the connection
134 | 
135 | ### ImageStreams
136 | 
137 | > Note: Out-of-the-box variants of ImageStreams is a Workbench backed feature.
138 | 
139 | These are configured by the admin in the UI and are provided as out-of-the-box examples.
140 | 
141 | * General Annotations
142 |   * [`opendatahub.io/recommended-accelerators`]
143 |   * `opendatahub.io/notebook-python-dependencies` - the python dependencies that are included in the image to list to the user
144 |   * `opendatahub.io/notebook-software` - the software that is included in the image to list to the user
145 | * Annotations used primarily by the out-of-the-box images provided by the workbench component
146 |   * (tag) `opendatahub.io/image-tag-outdated` - a `true` or `false` value to say if the image is present for lookup, but not intended for selection
147 |   * (tag) `opendatahub.io/workbench-image-recommended` - the recommended tag to suggest to the user
148 |   * `opendatahub.io/notebook-image-order` - a weighed value to help with organization of the images in display lists for the user
149 | * Annotations used primarily by the Admin UI when created custom Notebook Images
150 |   * `opendatahub.io/notebook-image-desc` - description provided by the user
151 |   * `opendatahub.io/notebook-image-name` - a display name provided by the user
152 |   * `opendatahub.io/notebook-image-url` - the original image value from the user before it's processed for the ImageStream
153 | 
154 | ### Notebooks
155 | 
156 | > Note: This is a Workbench backed feature.
157 | 
158 | * Labels
159 |   * `opendatahub.io/odh-managed` - (unknown, potential legacy without value)
160 |   * `opendatahub.io/user` - a translated username; the Dashboard k8s-ifies the user's username so we can compare or look up by user in the future
161 | * Annotations
162 |   * [`openshift.io/display-name`]
163 |   * [`openshift.io/description`]
164 |   * `opendatahub.io/username` - the actual username (related to the Label `opendatahub.io/user`)
165 |   * [`opendatahub.io/accelerator-name`]
166 |   * `opendatahub.io/workbench-image-namespace` - This annotation is used to indicate the scope of a workbench image. If the workbench image is project-scoped, this annotation is added with the workbench image’s namespace. If it’s global-scoped, the annotation is omitted.
167 |   * `opendatahub.io/hardware-profile-namespace` - This annotation is used to indicate the scope of a hardware profile. If the hardware profile is project-scoped, this annotation is added with the hardware profile’s namespace. If it’s global-scoped, the annotation is omitted.
168 |   * `opendatahub.io/accelerator-profile-namespace` - This annotation is used to indicate the scope of a accelerator profile. If the accelerator profile is project-scoped, this annotation is added with the accelerator profile’s namespace. If it’s global-scoped, the annotation is omitted.
169 | 
170 | ### ServingRuntime Templates
171 | 
172 | > Note: This is a Serving backed feature.
173 | 
174 | These are configured by the admin in the UI and are provided as out-of-the-box examples. These are stored as OpenShift Templates under the hood, but the admin only ever sees a ServingRuntime when configuring.
175 | 
176 | * Annotations (when configuring in the admin page)
177 |   * [`openshift.io/display-name`]
178 |   * `opendatahub.io/modelServingSupport` - (managed by the UI) an JSON Array of supported platforms; options: 'single', 'multi'
179 |   * `opendatahub.io/apiProtocol` - (managed by the UI) the api protocols available; options (one of): 'REST', 'gRPC' 
180 |   * `opendatahub.io/disable-gpu` - (optional, typed in) if the ServingRuntime should not be used with GPUs (aka accelerators)
181 |   * [`opendatahub.io/recommended-accelerators`] - (optional, typed in)
182 | 
183 | * Annotations (when deploying in projects)
184 |   * [`opendatahub.io/accelerator-name`]
185 |   * `opendatahub.io/template-name` - the runtime used
186 |   * `opendatahub.io/template-display-name` - the display name shown for the runtime
187 |   * `opendatahub.io/serving-runtime-scope` - This annotation is used to identify whether a serving runtime template is project-scoped or global-scoped. 
188 |   * `opendatahub.io/hardware-profile-namespace` -  This annotation is used to indicate the scope of a hardware profile. If the hardware profile is project-scoped, this annotation is added with the hardware profile’s namespace. If it’s global-scoped, the annotation is omitted.
189 |   * `opendatahub.io/accelerator-profile-namespace` - This annotation is used to indicate the scope of a accelerator profile. If the accelerator profile is project-scoped, this annotation is added with the accelerator profile’s namespace. If it’s global-scoped, the annotation is omitted.
190 | 
191 | ### Storage Classes
192 | 
193 | * Annotations
194 |   * [`opendatahub.io/sc-config`] - (managed by the UI) a JSON Blob of storage class metadata
195 | 
196 | ### Model Registry
197 | 
198 | * Labels
199 |   * `opendatahub.io/rb-project-subject` - This label is used to distinguish RoleBindings with the group subject `system:serviceaccounts:{projectName}`, identifying them as specific to project service accounts. This allows us to use group RoleBindings separately for groups and projects, making sure they always appear in the view where they were created without relying on filtering by a string prefix.
200 | 
201 |   * `modelregistry.opendatahub.io/registered-model-id` and `modelregistry.opendatahub.io/model-version-id` - These labels identify InferenceServices deployed via the model registry UI and get the Model Registry Controller to sync the deployment. They are also used to filter InferenceServices when viewing the list of deployments for a specific model version.
202 | 
203 |   * `modelregistry.opendatahub.io/name` - This label provides a unique reference to InferenceServices deployed via a model registry. It ensures that models will be listed in the deployments tab of that specific registry, preventing incorrect listing across multiple registries with overlapping model IDs.  
204 | 


--------------------------------------------------------------------------------
/documentation/components/devops/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/devops/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/distributed-workload/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/distributed-workload/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/edge/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/edge/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/explainability/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/explainability/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/explainability/README.md:
--------------------------------------------------------------------------------
 1 | # Model Explainability Architecture
 2 | 
 3 | ![Model Explainability Diagram](diagram.png)
 4 | 
 5 | The TrustyAI operator[^operator] is responsible for managing the lifecycle of `TrustyAIService` (TAS) Custom Resources (CR).
 6 | 
 7 | ## `TrustyAIService`
 8 | 
 9 | TrustyAI is designed to support a single `TrustyAIService` per namespace/project. Although multiple TASs can be created in the same namespace, and indeed work, due to the architecture this will not bring any additional benefit, and will only duplicate the computations performed by a single TAS.
10 | 
11 | In the following sections, we will always assume a single TAS per namespace.
12 | 
13 | The general syntax of the `TrustyAIService` CR is as follows:
14 | 
15 | ```yaml
16 | apiVersion: trustyai.opendatahub.io/v1alpha1
17 | kind: TrustyAIService
18 | metadata:
19 |   name: trustyai-service
20 | spec:
21 |   storage:
22 |     format: "PVC"
23 |     folder: "/inputs"
24 |     size: "1Gi"
25 |   data:
26 |     filename: "data.csv"
27 |     format: "CSV"
28 |   metrics:
29 |     schedule: "5s"
30 | ```
31 | 
32 | - `metadata.name` specifies the name of the `TrustyAIService`.
33 | - `spec.storage.format` specifies the storage format. Currently, only `PVC` is supported.
34 | - `spec.storage.folder` specifies the folder where the input data is stored.
35 | - `spec.storage.size` specifies the size of the PVC to be used for storage.
36 | - `spec.data.filename` specifies the suffix of the storage file.
37 | - `spec.data.format` specifies the format of the data file (only `CSV` supported at the moment).
38 | - `metrics.schedule` specifies the interval at which the metrics are calculated, when a a calculation request is register with the service.
39 | 
40 | The default behaviour when installing a CR in a namespace is for the operator to provision the following resources:
41 | 
42 | | Type                  | Name                     | Description                                        |
43 | |-----------------------|--------------------------|----------------------------------------------------|
44 | | Deployment            | `$(metadata.name)`       | Deploys a pod with two containers (service and OAuth). |
45 | | PersistentVolumeClaim | `$(metadata.name)-pvc`   | Claims a volume for the storage of the inference data. |
46 | | Service               | `$(metadata.name)-service`| Internal service to the TrustyAI REST server.     |
47 | | Service               | `$(metadata.name)-tls`   | Service to expose the TrustyAI OAuth server.      |
48 | | Route                 | `$(metadata.name)`       | Route exposing the `$(metadata.name)-tls`.        |
49 | 
50 | ## Payload consumption
51 | 
52 | When a `InferenceService` (IS), either ModelMesh or KServe is detected by the operator in the same namespace as a `TrustyAIService`, the operator will try to configure the `InferenceService` to send the inference data to the `TrustyAIService` for processing.
53 | 
54 | ### ModelMesh
55 | 
56 | When a ModelMesh IS is detected, the operator will set the `PAYLOAD_PROCESSOR` environment to the internal `$(metadata.name)-service`. `PAYLOAD_PROCESSOR` is interpreted by ModelMesh as a space-delimited list of endpoints. If additional endpoints are present, the operator will append the `$(metadata.name)-service` to the list. If the processor is already present, the operator will not modify the list.
57 | 
58 | ### KServe
59 | 
60 | In the case of KServe, the operator will either add (if not present) or replace the `spec.logger` field with the internal `$(metadata.name)-service`. As example, the final[^kserveis] IS will look similar to:
61 | 
62 | ```yaml
63 | apiVersion: serving.kserve.io/v1beta1
64 | kind: InferenceService
65 | metadata:
66 |   name: sklearn-iris
67 | spec:
68 |   predictor:
69 |     logger: # Added by the TrustyAI operator
70 |       mode: all
71 |       url: http://$(metadata.name)-service.$namespace.svc.cluster
72 |     model:
73 |       modelFormat:
74 |         name: sklearn
75 |       storageUri: gs://kfserving-examples/models/sklearn/1.0/model
76 | ```
77 | 
78 | ## Authentication
79 | 
80 | Each TAS will have two associated `Services`:
81 | 
82 | - `$(metadata.name)-service` (1)
83 | - `$(metadata.name)-tls` (2)
84 | 
85 | (1) will have no route associated with it, and will be used for internal communication between the IS and the TAS. This service does not support authentication or TLS at the moment (2) will be exposed via a `Route`, supports TLS and will be used for OAuth authentication.
86 | 
87 | Request to (2) will be authenticated using a bearer token in the request header, `Authorization: Bearer <token>`. These requests will be forwarded to the OAuth container (running `oauth-proxy`[^oauth-proxy]) for authentication. If the token is valid, the request will be forwarded to the TrustyAI service container.
88 | 
89 | Requests to (1) will not be authenticated, and will be forwarded directly to the TrustyAI service container.
90 | 
91 | 
92 | 
93 | [^operator]: [TrustyAI Operator repository](https://github.com/trustyai-explainability/trustyai-service-operator).
94 | [^kserveis]: Example IS taken from [KServe's documentation](https://kserve.github.io/website/0.11/modelserving/logger/logger/#create-message-dumper).
95 | [^oauth-proxy]: [OAuth Proxy repository](https://github.com/openshift/oauth-proxy)
96 | 


--------------------------------------------------------------------------------
/documentation/components/explainability/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/explainability/diagram.png


--------------------------------------------------------------------------------
/documentation/components/feature_store/README.md:
--------------------------------------------------------------------------------
  1 | # Feast Feature Store Architecture
  2 | 
  3 | <!-- sources:
  4 | https://docs.feast.dev/
  5 | https://rtd.feast.dev/
  6 | -->
  7 | 
  8 | ## Introduction
  9 | 
 10 | Feast (Feature Store) is an open-source feature store that helps teams operate production ML systems at scale by allowing them to define, manage, validate, and serve features for production AI/ML.
 11 | 
 12 | ## Feast Feature Store High Level Architecture
 13 | 
 14 | The architecture diagram presents a high level overview of OpenShift AI environment that integrates Feast for feature store, and other component like Data Science Pipelines / Notebooks for Model training, a Model Registry for using models, and Model Serving for model inference/serving. 
 15 | 
 16 | ![feature-store-overview](images/feature-store-overview.jpg)
 17 | 
 18 | ## Goals
 19 | 
 20 | - Integrate Feast Feature Store as a centralized repository for feature management within OpenShift AI. 
 21 | - Provide a unified view of features used during model training and serving stages.
 22 | 
 23 | 
 24 | ## Components
 25 | 
 26 | Feast's feature store is composed of below foundational components: 
 27 | <!-- sources:
 28 | https://docs.feast.dev/getting-started/components
 29 | 
 30 | -->
 31 | 
 32 | 1. [**Offline Store**](https://docs.feast.dev/getting-started/components/offline-store) : Used for historical feature extraction used in model training.
 33 | 2. [**Online Store**](https://docs.feast.dev/getting-started/components/online-store): Used for serving features at low-latency for inference requests in production. 
 34 | 3. [**Registry**](https://docs.feast.dev/getting-started/components/registry): Used to keep track of feature store definitions and state (optionally backed by GCS or S3 based persistent storage). The Feast Registry acts as the backbone for storing feature definitions, which are typically written in Python, stored in feature repositories, and shared across environments such as staging and production.
 35 | 4. [**Feast Python SDK/CLI**](https://docs.feast.dev/reference/feast-cli-commands): The primary user facing SDK
 36 |    - Manage version controlled feature definitions.
 37 |    - Materialize (load) feature values into the online store.
 38 |    - Build and retrieve training datasets from the offline store.
 39 |    - Retrieve online features.
 40 | 5. [**Batch Materialization Engine**](https://docs.feast.dev/getting-started/components/batch-materialization-engine) : A batch materialization engine is a component of Feast that's responsible for moving data from the offline store into the online store.
 41 | 6. [**Feature Repository**](https://docs.feast.dev/reference/feature-repository/feature-store-yaml) : Contains Feature definitions files written in Python , and the `feature_store.yaml` file to configure the feature store, including data sources with Feast project.
 42 | 7. [**Feature Server**](https://docs.feast.dev/reference/feature-servers/python-feature-server): The Feature Server is a core architectural component in Feast, designed to provide low-latency feature retrieval and updates for machine learning applications.
 43 |       It is a REST API server built using [FastAPI](https://fastapi.tiangolo.com/) and exposes a limited set of endpoints to serve features, push data, and support materialization operations. The server is scalable, flexible, and designed to work seamlessly with various deployment environments, including local setups and cloud-based systems.
 44 | 8. [**Feature Store Controller/Operator**](https://github.com/feast-dev/feast/tree/master/infra/feast-operator): The Feature Store Controller/Operator is responsible for the deployment and management of the Feast servers ([Offline Server](https://docs.feast.dev/reference/feature-servers/offline-feature-server), [Online Server](https://docs.feast.dev/reference/feature-servers/python-feature-server), [Registry Server](https://github.com/feast-dev/feast/blob/master/docs/reference/feature-servers/registry-server.md)) in Kubernetes/OpenShift environments:
 45 | 
 46 | ## Feature Store Flow with OpenShift AI.
 47 | 
 48 | **1. Feature Store Initialization**
 49 | The UI Dashboard initializes/Creates the FeatureStore CR and sets the config for feature services used for data ingestion, transformation, and storage.
 50 | 
 51 | **2. Data Ingestion into Feature Store**
 52 | Data can be sent to Feast either pre-processed (e.g., via batch or streaming data pipelines) or raw and transformed by the Feast feature server during data ingestion (i.e., transformed prior to being written to the online store). Feast simplifies the integration with various data sources by providing an opinionated yet flexible API.
 53 | 
 54 | **3. Data Storage in Feature Store**
 55 | 
 56 | **Offline Store**: A lower-cost, persistent storage system (e.g., data warehouse) optimized for storing large volumes of historical feature data used in model training and batch scoring. It prioritizes storage efficiency over low-latency access.
 57 | **Online Store**: A higher-cost, low-latency storage system (e.g., in-memory database or cache) designed to provide rapid access to frequently used features during real-time inference, optimizing for speed and responsiveness.
 58 | **Feature Registry:** Metadata storage to track feature definitions, feature transformations, and feature metadata.
 59 | 
 60 | **4. Data Retrieval for Model Training**
 61 | Data Science Pipelines retrieve historical features from the Offline Store. Feature views in the Feature Registry define how features are joined and retrieved.
 62 | 
 63 | **5. Model Training**
 64 | The retrieved data is split into train, test, and hold-out sets. The model is iteratively trained and evaluated on these datasets.
 65 | 
 66 | **6. Model Evaluation**
 67 | The trained model is validated on a hold-out dataset to assess its accuracy, generalizability, and impact to its domain.
 68 | **Workflow:** Data retrieved → Train/Test split → Train Model → Evaluate Performance.
 69 | 
 70 | **7. Model Registration**
 71 | The trained model is registered in the Model Registry. 
 72 | 
 73 | **8. Model Deployment**
 74 | The registered model is deployed to the Model Server.
 75 | 
 76 | **9. Inference Request**
 77 | There are three ways to orchestrate inference requests. 
 78 |     1. The applications sends an inference requests to the Model Server and the Model Server retrieves real-time features from the Feature Server and generates predictions using the retrieved features and deployed model.
 79 |     2. The application sends an inference request to the Feature Server and the Feature Server sends the features to the Model Server and generates predictions using the deployed model.
 80 |     3. The application sends a request to the Feature Server and sends the features to the Model Server whcih generates predictions using the deployed model.
 81 | 
 82 | 
 83 | ```mermaid
 84 | sequenceDiagram
 85 |     actor U as UI Dashboard
 86 |     participant NB as Notebook Controller
 87 |     box Feature Store
 88 |         participant FS as Feature Store
 89 |         participant OS as Offline Store
 90 |         participant IS as Online Store
 91 |         participant REG as Feature Registry
 92 |     end
 93 |     participant DS as Data Science Pipelines
 94 |     participant MR as Model Registry
 95 |     participant MS as Model Serving
 96 |     participant K as Kubernetes/OpenShift
 97 | 
 98 |     %% Step 1: Notebook Controller Initialization
 99 |     U->>+NB: Launch Notebook Controller
100 | 
101 |     %% Step 2: Feature Store Creation
102 |     U->>+FS: Create Feature Store
103 |     FS-->>U: Feature Store ready
104 | 
105 |     %% Step 3: Data Ingestion into Feature Store
106 |     U->>+FS: Send pre-processed data (Batch/Streaming)
107 |     FS->>REG: Store/apply feature metadata
108 |     FS->>OS: Store historical features (Offline Store)
109 |     FS->>IS: Store real-time features (Online Store)
110 |     Note right of FS: Features are stored in Offline/Online Stores and<br/> Metadata about the features is registered in the Feature Registry.
111 | 
112 |     %% Step 4: Model Training
113 |     U->>+DS:  Data Science Pipeline
114 |     DS->>+FS: Retrieve features for model training
115 |     FS->>OS: Pull historical features
116 |     OS-->>DS: Return historical features
117 |     DS->>DS: Train and evaluate ML model
118 | 
119 |     %% Step 5: Model Registration
120 |     DS->>+MR: Register trained model with metadata
121 | 
122 |     %% Step 6: Model Deployment
123 |     DS->>+K: Deploy model to OpenShift AI (KServe)
124 |     K->>+MS: Create InferenceService (ISVC)
125 |     MS-->>K: ISVC deployed successfully
126 | 
127 |     %% Step 7: Real-Time Inference
128 |     U->>+MS: Send inference request with new data
129 |     MS->>+FS: Fetch real-time features (Online Store)
130 |     FS->>IS: Retrieve real-time features
131 |     IS-->>MS: Return real-time features
132 |     MS-->>U: Return predictions
133 | 
134 | ```
135 | 
136 | 


--------------------------------------------------------------------------------
/documentation/components/feature_store/images/feature-store-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/feature_store/images/feature-store-overview.jpg


--------------------------------------------------------------------------------
/documentation/components/model-registry/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/model-registry/README.md:
--------------------------------------------------------------------------------
 1 | # Model Registry architecture
 2 | 
 3 | <!-- sources:
 4 | - "Model Registry Architecture" https://docs.google.com/document/d/1OLqy3ivdPs4Db78VrdnnGVkdPpkNPhJZAdAZkMwqUp0/edit
 5 | - "Model Registry Proposal" https://docs.google.com/document/d/1G-pjdGaS2kLELsB5kYk_D4AmH-fTfnCnJOhJ8xENjx0/edit#heading=h.ds8q4xtkmu64
 6 | -->
 7 | 
 8 | ## Introduction
 9 | 
10 | A model registry plays a pivotal role in the lifecycle of AI/ML models, serving as the central repository holding metadata pertaining to machine learning models from inception to deployment. This encompasses both high-level details like deployment environment and project origins, as well as intricate information like training hyperparameters, performance metrics, and deployment events. Acting as a bridge between model experimentation and serving, it offers a secure, collaborative interface of a metadata store for stakeholders involved in the ML lifecycle.
11 | 
12 | ## Model Registry High Level Architecture 
13 | ![Model Registry High Level Architecture](./images/model-registry-overview.jpg)
14 | 
15 | > [!NOTE]  
16 | > The Model Registry is a passive repository for metadata and is not meant to be a Control Plane. It does not perform any orchestration or expose APIs to perform actions on underlying OpenShift AI components. 
17 | 
18 | The model registry is a backing store for various stages of MLOps that can log user flow of a model development and deployment. The model registry meets a data scientist's need to be able to visualize a model’s lineage and trace back the training executions, parameters, metrics, etc. It also help deployment engineers visualize model pipeline events, actions, progress through deployment stages, etc. 
19 | 
20 | ## Goals 
21 | - Associate metadata from training, experimentation, studies and their metrics, with a model
22 | - Build a catalog of models and manage model versions for deployment
23 | - Management of model for multiple deployment environments
24 | - Build a Kube Native solution
25 | 
26 | ## Architecture
27 | 
28 | Google community project [ML-Metadata](https://github.com/google/ml-metadata) is used as the core component to build the Model Registry. ML-Metadata provides a very extensible schema that is generic, similar to a key-value store, but also allows for the creation of logical schemas that can be queried as if they were physical schemas. Those can be manipulated using their bindings in the Python library. We use this model to extend and provide metadata storage services for model serving, also known as Model Registry.
29 | 
30 | The model registry uses the ml-metadata project’s C++ server as-is to handle the storing of the metadata, while domain-specific Model Registry features are added as extensions (aka microservices). As part of these extensions, Model Registry provides:
31 | - Python/Go extensions to support the Model Registry interaction
32 | - an OpenAPI interface to expose the Model Registry API to the clients
33 | 
34 | ![Model Registry Connections](./images/model-registry-connections.png)
35 | 
36 | Enforcing of RBAC policies can be handled at the REST API layer using service accounts with Authorino, details about [RBAC and Tenancy](model-registry-tenancy.md) are described here.
37 | 
38 | ## Components
39 | - *[MLMD C++ Server](https://github.com/google/ml-metadata)*
40 |   - This is the metadata server from Google's ml-metadata project.  This component is hosted to communicate with a backend relational database that stores the actual metadata about the models. This server exposes a “gRPC” interface for its clients to communicate with. This server provides a very flexible schema model, where using this model one can define logical data models to fit the needs of different MLOps operations, for example, metadata during the training and experimentation, metadata about metrics or model versioning, etc. 
41 |   
42 | - *[OpenAPI/REST Server](https://github.com/kubeflow/model-registry)*
43 |   - This component exposes a higher-level REST API of the Model Registry. In contrast, the MLMD server exposes a lower level generic API over gRPC, whereas this REST server exposes a higher level API that is much closer to the domain model of Model Registry, like:
44 |     - Register a Model
45 |     - Version a Model
46 |     - Get a catalog of models
47 |     - Manage the deployment statutes of a model
48 |   
49 |   - The REST API server converts its requests into one or more underlying gRPC requests on the MLMD Server. This layer is mainly designed to be used with UI.
50 | 
51 | - *[Model Registry Controller](https://github.com/opendatahub-io/model-registry-operator)*
52 |   - Model Registry controller is also called Model Registry Operator. The main purpose of this component is to install/deploy components of the Model Registry stack on the OpenShift. Once the components are installed, the reconciler in the controller will continuously run and monitor these components to keep them healthy and alive. 
53 | 
54 | - *[CLI (Python client, SDK)](https://github.com/kubeflow/model-registry/tree/main/clients/python)*
55 |   - CLI is also called MR Python client/SDK, a command line tool for interacting with Model Registry. This tool can be used by a user to execute operations such as retrieving the registered models, get model’s deployment status, model’s version etc. 
56 |   
57 |   - The model registry provides logical mappings from the high level [logical model](https://github.com/kubeflow/model-registry/blob/main/docs/logical_model.md) available through the OpenAPI/REST Server, to the underlying ml-metadata entities.
58 | 
59 | ## Integration with Model Serving Components
60 | 
61 | <!-- sources:
62 | - https://issues.redhat.com/browse/RHOAIENG-1071
63 | - https://github.com/opendatahub-io/model-registry-bf4-kf/issues/249
64 | - https://github.com/opendatahub-io/odh-model-controller/pull/135
65 | -->
66 | 
67 | In a typical ML workflow, a ML model is registered on the Model Registry as a `RegisteredModel` logical entity, along with its versions and its associated `ModelArtifacts` resources.
68 | 
69 | Then, Model serving controller advertises itself to the Model Registry, by creating a `ServingEnvironment` entity.
70 | 
71 | Then, the Model Controller reconciler monitors `InferenceService` CRs having pre-defined `labels`, and based on those `labels` it syncs the model registry by keeping track of every deployment that occurred in the cluster.
72 | Then, the Model Controller reconciler updates the `InferenceService` CR by linking it to the Model Registry logical entity using a specific `label`.
73 | 
74 | ```mermaid
75 | sequenceDiagram
76 |     actor U as UI Dashboard
77 |     participant K as Kubernetes
78 |     participant MC as ODH Model Controller
79 |     participant MR as Model Registry
80 |     U->>+MR: Retrieve indexed model version
81 |     MR-->>-U: Indexed model version
82 |     U->>K: Create InferenceService (ISVC)
83 |     Note right of U: Annotate/Label the ISVC with indexed <br/> model information, like RegisteredModel and <br/>ModelVersion IDs.
84 |     Note right of K: Here all operators/controllers in charge to deploy<br/> the model will make<br/> their actions, e.g., KServe or ModelMesh.
85 |     loop Every ISVC creation/deletion/update
86 |       K-->>+MC: Send notification
87 |       MC->>+K: Retrieve affected ISVC in the cluster
88 |       K-->>-MC: ISVC resource
89 |       MC->>+MR: Create/Update InferenceService in Model Registry
90 |       Note left of MR: InferenceService records in Model Registry<br/>are used to keep track of every deployment that<br/>occurred in the monitored Kubernetes cluster.
91 |       MR-->>-MC: InferenceService record
92 |       MC-->>-K: Update ISVC with Model Registry record ID
93 |     end
94 | ```
95 | 
96 | In this way, the Model Controller reconciler syncs those occurrence into the Model Registry to keep track of every deployment that occurred in the cluster for indexed models.
97 | 
98 | <!-- to be continued once finalized "Model Registry Tenancy Proposal" -->


--------------------------------------------------------------------------------
/documentation/components/model-registry/images/model-registry-connections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/images/model-registry-connections.png


--------------------------------------------------------------------------------
/documentation/components/model-registry/images/model-registry-deployment-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/images/model-registry-deployment-model.png


--------------------------------------------------------------------------------
/documentation/components/model-registry/images/model-registry-logical-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/images/model-registry-logical-model.png


--------------------------------------------------------------------------------
/documentation/components/model-registry/images/model-registry-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/images/model-registry-overview.jpg


--------------------------------------------------------------------------------
/documentation/components/model-registry/images/model-registry-tenancy-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/model-registry/images/model-registry-tenancy-model.png


--------------------------------------------------------------------------------
/documentation/components/model-registry/model-registry-tenancy.md:
--------------------------------------------------------------------------------
 1 | # Model Registry Tenancy
 2 | 
 3 |  OpenShift AI deploys components per RedHat OpenShift AI Project. In this model, all OpenShift AI components are deployed per Project (Kubernetes namespace) (except for Dashboard, which is per-cluster).   
 4 | 
 5 | Model Registry needs to support sharing ML Model metadata across multiple projects/environments, etc. The current RHOAI per-namespace deployment model uses “namespaces” as the tenant, i.e. any users or service accounts that are members of that namespace share all data as a single tenant. Ideally, there should be a _logical_ tenancy model that allows users to group ML Model development based on teams, groups, or organizations, independent of the underlying Kubernetes deployment architecture. 
 6 | 
 7 | 
 8 |  This document lays tenancy architecture behind Model registry, which gives them complete control over what kind of deployment models a cluster Admin can choose. 
 9 | 
10 | 
11 | ## Model Registry Logical Architecture
12 | 
13 | The diagram below shows the core components at a logical level without going into authentication and authorization implementation details. 
14 | 
15 | 
16 | ![alt_text](images/model-registry-logical-model.png "image_tooltip")
17 | 
18 | 
19 | At a high level Model Registry architecture consists of the following:
20 | 
21 | * Clients, which can be users or service accounts for client processes
22 | * Kubernetes Deployment that provides a Model Registry service. This service exposes the following API ports
23 |     * An MLMD CPP Server that implements a gRPC API
24 |     * A Golang Server that implements an HTTP REST API
25 | * An external user provided database service to store ML model metadata
26 | 
27 | clients could be either human users that have authenticated with their user credentials and make API calls through the dashboard, CLI or could be other pods/clients using service accounts or non-human credentials such as secrets and tokens. 
28 | 
29 | 
30 | ## Deployment Architecture - Use Service Mesh and Authorino for RBAC
31 | 
32 | This architecture calls for deploying the Model Registry deployment(s) in a known namespace such as “odh-model-registries”, where OpenShift AI administrators can provision any number of Model Registries with a unique name. For example, the administrator can install a single Model Registry called “public” and configure its permissions such that any authenticated user to the Kubernetes cluster has access to its APIs. Similarly, another instance can be configured to be accessible to a set of known users/groups. 
33 | 
34 | The namespace “odh-model-registries” will be automatically signed up as a member of the service mesh and all the model registry deployments will be configured to run an envoy proxy as a sidecar component. 
35 | 
36 | For all the inter-service communication mTLS will be configured using a “DestinationRule”. For exposing the model registry’s service endpoints a “_Virtual Service_” will be configured. A “_Gateway_” for each “_Virtual Service_” is configured if the service needs to be exposed externally or to a non-mesh-member component. 
37 | 
38 | Service Mesh will also be configured with CUSTOM “AuthorizationPolicy” to delegate the authorization decisions to Authorino. An “_AuthPolicy_” to support Authorino will be configured to enforce the “_Role_” based RBAC rules for accessing the Model Registry that has been created by the administrator.
39 | 
40 | **Note:** This proposal follows the [Principle of Least Privilege](https://en.wikipedia.org/wiki/Principle_of_least_privilege) for RBAC rules and resources. This ensures that any access granted is to a very specific resource to avoid accidentally granting access to other resources. 
41 | 
42 | The diagram below shows the RBAC proxy deployment model:
43 | 
44 | 
45 | ![alt_text](images/model-registry-deployment-model.png    "image_tooltip")
46 | 
47 | Model Registry Operator will create the following:
48 | 
49 | * Kubernetes Role `registry-user-<registry-name>` - allows the verb `GET` on the Kubernetes service created for `registry-name`. This role will make it convenient for users, group, and service accounts to be granted access to a specific Model Registry service. 
50 | * OpenShift User Group `<registry-name>-users` - with role binding to role `registry-user-<registry-name>` to help registry administrators easily add users to this group and grant them access to the registry. 
51 | 
52 | Using the above two resources to handle the access permissions, Model Registry creates the tenancy model.
53 | 
54 | ### Multiple “Model Registry” deployments in OpenShift AI
55 | 
56 | This is the prescribed deployment model for multiple Model Registries in the MVP release with tenancy model. The tenancy is enforced with RBAC access to given instance of the Model Registry.
57 | 
58 | ![alt_text](images/model-registry-tenancy-model.png "image_tooltip")
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/documentation/components/pipelines/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/pipelines/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/pipelines/README.md:
--------------------------------------------------------------------------------
 1 | # Data Science Pipelines
 2 | 
 3 | Data Science Pipelines is a platform for building and deploying portable, scalable machine learning (ML) workflows based on containers. It is based on Kubeflow Pipelines and relies on Argo Workflows to run the pipelines. Additionally, Data Science Pipelines includes a custom "control plane" on top of Kubeflow Pipelines -- an operator we refer to as Data Science Pipelines Operator (DSPO). DSPO manages the "data planes", the individual "Data Science Pipelines Applications" (aka "stacks") that are deployed in each Data Science Project (kubernetes namespace).
 4 | 
 5 | ## Data Science Pipelines Operator APIs
 6 | 
 7 | ### DataSciencePipelinesApplication (DSPA)
 8 | 
 9 | * [API Reference](https://github.com/opendatahub-io/data-science-pipelines-operator/blob/main/api/v1alpha1/dspipeline_types.go)
10 | * This CRD is responsible for defining the configuration of the Data Science Pipelines stack.
11 | 
12 | ## DSP High Level Architecture
13 | ![DSP High Level Architecture](./dsp-v2-high-level-architecture.png)
14 | 
15 | ## DSP Detailed Architecture
16 | ![DSP Detailed Architecture](./dsp-v2-architecture.drawio.png)
17 | 
18 | ## Kubeflow Pipelines Architecture references
19 | 
20 | Note: you must join https://groups.google.com/g/kubeflow-discuss to access these documents
21 | 
22 | [Kubeflow Pipelines v2 System Design](https://docs.google.com/document/d/1fHU29oScMEKPttDA1Th1ibImAKsFVVt2Ynr4ZME05i0/edit) -- goes into deeper detail for the data plane design. The Orchestration section in that document is particularly helpful.
23 | 
24 | [KFP v2 control flow](https://docs.google.com/document/d/1TZeZtxwPzAImIu8Jk_e-4otSx467Ckf0smNe7JbPReE/edit)
25 | 


--------------------------------------------------------------------------------
/documentation/components/pipelines/dsp-v2-architecture.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/pipelines/dsp-v2-architecture.drawio.png


--------------------------------------------------------------------------------
/documentation/components/pipelines/dsp-v2-high-level-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/pipelines/dsp-v2-high-level-architecture.png


--------------------------------------------------------------------------------
/documentation/components/platform/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/platform/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/platform/Authorization in Service Mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/platform/Authorization in Service Mesh.png


--------------------------------------------------------------------------------
/documentation/components/platform/Platform Architecture Overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/platform/Platform Architecture Overview.png


--------------------------------------------------------------------------------
/documentation/components/platform/README.md:
--------------------------------------------------------------------------------
 1 | # Platform Architecture
 2 | 
 3 | Platform component is responsible for maintaining the core ODH Operator and establishing standards for component 
 4 | deployments, monitoring, security and ecosystem integration.
 5 | 
 6 | ## ODH Operator APIs
 7 | 
 8 | ### DSCInitialization API
 9 | 
10 | * [API Reference](https://github.com/opendatahub-io/opendatahub-operator/blob/incubation/docs/api-overview.md#dscinitializationopendatahubiov1)
11 | * This CRD is responsible for defining config required by the ODH platform before the applications are deployed.
12 | * This includes creation of applications and monitoring namespaces, component wide configurations like Authorization,
13 | monitoring etc
14 | 
15 | 
16 | ### DataScienceCluster API
17 | 
18 | * [API Reference](https://github.com/opendatahub-io/opendatahub-operator/blob/incubation/docs/api-overview.md#datascienceclusteropendatahubiov1)
19 | * This CRD will be created by the end user to enable various data science components.
20 | * It is responsible for enabling support for Notebooks, DataSciencePipelinesApplication, InferenceService etc based on 
21 | the configuration
22 | 
23 | 
24 | ## Platform Architecture Overview
25 | ![Platform Architecture Overview](./Platform%20Architecture%20Overview.png)
26 | 
27 | ## Authorization in ServiceMesh
28 | ![Authorization in ServiceMesh](./Authorization%20in%20Service%20Mesh.png)
29 | 


--------------------------------------------------------------------------------
/documentation/components/serving/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/serving/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/serving/README.md:
--------------------------------------------------------------------------------
 1 | # ModelServing architecture
 2 | 
 3 | ## Components
 4 | - *[KSERVE](https://github.com/opendatahub-io/kserve)*
 5 |   - This supports a single model serving platform. For deploying large models such as large language models (LLMs), OpenShift AI includes a single model serving platform that is based on the KServe component. Because each model is deployed from its own model server, the single model serving platform helps you deploy, monitor, scale, and maintain large models that require increased resources.
 6 | - *[MODEL MESH](https://github.com/opendatahub-io/modelmesh-serving)*
 7 |   - This supports a multi-model serving platform. For deploying small and medium-sized models, OpenShift AI includes a multi-model serving platform that is based on the ModelMesh component. On the multi-model serving platform, you can deploy multiple models on the same model server. Each of the deployed models shares the server resources. This approach can be advantageous on OpenShift clusters that have finite compute resources or pods.
 8 | - *[ODH-MODEL-CONTROLLER](https://github.com/opendatahub-io/odh-model-controller)*
 9 |   - This component facilitates seamless integration between RHOAI's various components and model serving components, enhancing the interoperability and synergy within the RHOAI ecosystem. It streamlines the integration process, enabling smoother communication and interaction between different modules and services, thereby optimizing the overall performance and functionality of the RHOAI platform
10 | 
11 | 
12 | ## ModelServing Componets Architecture Diagram 
13 | ![ModelServing Componets Architecture Diagram](./modelserving-architecture-High-Level%20Components%20Architecture.jpg)
14 | 


--------------------------------------------------------------------------------
/documentation/components/serving/modelserving-architecture-High-Level Components Architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/serving/modelserving-architecture-High-Level Components Architecture.jpg


--------------------------------------------------------------------------------
/documentation/components/workbenches/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/workbenches/.gitkeep


--------------------------------------------------------------------------------
/documentation/components/workbenches/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Workbenches architecture
  3 | 
  4 | <!-- sources:
  5 | - "Kubeflow Notebooks Architecture" https://www.kubeflow.org/docs/components/notebooks/overview/
  6 | - "Kubeflow Architecture" https://www.kubeflow.org/docs/started/architecture/
  7 | -->
  8 | 
  9 | Workbenches component provides a platform to run web-based development environments inside the openshift cluster. In the ML lifecycle, workbenches are utilize as platform in Model Development Stage. As it provides as avenue for the Data Scientist, to explore and experiment on the development of model.
 10 | 
 11 | Key features include:
 12 | 
 13 | - Native support for [JupyterLab](https://github.com/jupyterlab/jupyterlab), [RStudio](https://github.com/rstudio/rstudio), and [code-server](https://github.com/coder/code-server).
 14 | - Tailored integrated environments equipped with the latest tools and libraries.
 15 | - Users can create notebook containers directly in the cluster.
 16 | - Admins can provide standard notebook images for their organization with required packages pre-installed.
 17 | - Access control is managed by Admins, enabling easier notebook management in the organization.
 18 | 
 19 | Components:
 20 | 
 21 | - *[Notebooks/workbenches](https://github.com/opendatahub-io/notebooks/wiki/Workbenches)*
 22 |   - A collection of notebooks tailored for data analysis, machine learning, research and coding within the Openshift ecosystem. Designed to streamline data science workflows, these notebooks offer an integrated environment equipped with the latest tools and libraries. These notebooks were created to be used with Openshift with the Notebook Controller as the launcher. The following are the out of the box notebook images supported with One year cadence:
 23 |     
 24 |     - Minimal (includes: jupyterlab)
 25 |     - Data-science (includes: jupyterlab, numpy, scipy, pandas,etc)
 26 |     - PyTorch (includes: jupyterlab, torch,etc)
 27 |     - TensorFlow(includes: jupyterlab, tensorflow,etc)
 28 |     - TrustyAI (includes: jupyterlab, trustyai,etc)
 29 |   - GPU support: Nvidia (CUDA drivers), Intel (Habana-Gaudi drivers)
 30 | 
 31 | - *[Notebook Controller](https://github.com/opendatahub-io/kubeflow/tree/v1.7-branch/components/odh-notebook-controller)*
 32 |   - The combination of two controller which acts as the backend for this component. Based on the upstream kubeflow notebook controller and it is responsible to watch the Notebook custom resource events to start the notebook environment along with the following capabilities:
 33 |     - Openshift ingress controller integration.
 34 |     - Openshift OAuth sidecar injection.
 35 |     - Openshift certs injection
 36 | 
 37 | <!-- - *[ODH-Elyra](https://github.com/opendatahub-io/elyra)*
 38 |   - The Jupyterlab extension plugin for notebooks, it enable submission of ML Kubeflow pipeline workflows. This component is extenion of open-source [Elyra](https://github.com/elyra-ai/elyra), with support for the data science pipeline v2 API. The extension that are focused by workbenches components:
 39 |     
 40 |     - Pipeline Visual editor
 41 |     - Python editor
 42 |     - Code snippet editor -->
 43 | 
 44 | 
 45 | ## High Level architecture
 46 | 
 47 | ![Workbenches High level Architecture Diagram](./high-level-workbench-arch.drawio.png)
 48 | 
 49 | ## Workbenches
 50 | 
 51 | ### Architecture
 52 | 
 53 | The structure of the notebook's build chain is derived from the parent image. To better comprehend this concept, refer to the following graph.
 54 | 
 55 | ![workbenches Architecture](./workbenches-imagestreams.drawio.png)
 56 | 
 57 | Each notebook inherits the properties of its parent. For instance, the TrustyAI notebook inherits all the installed packages from the Standard Data Science notebook, which in turn inherits the characteristics from its parent, the Minimal notebook.
 58 | 
 59 | The Rstudio arhitecture is little different than other as the component, is not shipped as image, 
 60 | instead shipped as buildconfig, so user can build the Rstudio on their cluster as per their need.
 61 | As the RStudio is in Dev Preview.
 62 | 
 63 | ![rstudio Architecture](./rstudio-imagestream.drawio.png)
 64 | 
 65 | 
 66 | ## Notebook Controller
 67 | 
 68 | ### Architecture
 69 | 
 70 | ![Notebook Controller](./notebook-controller.drawio.png)
 71 | 
 72 | ### Spec
 73 | 
 74 | The user needs to specify the PodSpec for the Workbenches. Based on the selection made by user, the Dashboard component submits the Custom Resource to the cluster.
 75 | For example:
 76 | 
 77 | ```yaml
 78 | apiVersion: kubeflow.org/v1
 79 | kind: Notebook
 80 | metadata:
 81 |   name: my-notebook
 82 | spec:
 83 |   template:
 84 |     spec:
 85 |       containers:
 86 |         - name: my-notebook
 87 |           image: standard-data-science:ubi9-python3.9
 88 |           args:
 89 |             [
 90 |               "start.sh",
 91 |               "lab",
 92 |               "--LabApp.token=''",
 93 |               "--LabApp.allow_remote_access='True'",
 94 |               "--LabApp.allow_root='True'",
 95 |               "--LabApp.ip='*'",
 96 |               "--LabApp.base_url=/test/my-notebook/",
 97 |               "--port=8888",
 98 |               "--no-browser",
 99 |             ]
100 | ```
101 | 
102 | The required fields are `containers[0].image` and (`containers[0].command` and/or `containers[0].args`).
103 | That is, the user should specify what and how to run.
104 | 
105 | All other fields will be filled in with default value if not specified.
106 | 
107 | By default, when the ODH notebook controller is deployed along with the
108 | Kubeflow notebook controller, it will expose the notebook in the Openshift
109 | ingress by creating a TLS `Route` object.
110 | 
111 | If the notebook annotation `notebooks.opendatahub.io/inject-oauth` is set to
112 | true, the OAuth proxy will be injected as a sidecar proxy in the notebook
113 | deployment to provide authN and authZ capabilities:
114 | 
115 | ```yaml
116 | apiVersion: kubeflow.org/v1
117 | kind: Notebook
118 | metadata:
119 |   name: example
120 |   annotations:
121 |     notebooks.opendatahub.io/inject-oauth: "true"
122 | ```
123 | 
124 | A [mutating webhook](./controllers/notebook_webhook.go) is part of the ODH
125 | notebook controller, it will add the sidecar to the notebook deployment. The
126 | controller will create all the objects needed by the proxy as explained in the
127 | architecture diagram.
128 | 
129 | When accessing the notebook, you will have to authenticate with your Openshift
130 | user, and you will only be able to access it if you have the necessary
131 | permissions.
132 | 
133 | The authorization is delegated to Openshift RBAC through the `--openshfit-sar`
134 | flag in the OAuth proxy:
135 | 
136 | ```json
137 | --openshift-sar=
138 | {
139 |     "verb":"get",
140 |     "resource":"notebooks",
141 |     "resourceAPIGroup":"kubeflow.org",
142 |     "resourceName":"example",
143 |     "namespace":"opendatahub"
144 | }
145 | ```
146 | 
147 | That is, you will only be able to access the notebook if you can perform a `GET`
148 | notebook operation on the cluster:
149 | 
150 | ```shell
151 | oc get notebook example -n <YOUR_NAMESPACE>
152 | ```
153 | 
154 | 


--------------------------------------------------------------------------------
/documentation/components/workbenches/high-level-workbench-arch.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/workbenches/high-level-workbench-arch.drawio.png


--------------------------------------------------------------------------------
/documentation/components/workbenches/notebook-controller.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/workbenches/notebook-controller.drawio.png


--------------------------------------------------------------------------------
/documentation/components/workbenches/rstudio-imagestream.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/workbenches/rstudio-imagestream.drawio.png


--------------------------------------------------------------------------------
/documentation/components/workbenches/workbenches-imagestreams.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/components/workbenches/workbenches-imagestreams.drawio.png


--------------------------------------------------------------------------------
/documentation/diagram/README.MD:
--------------------------------------------------------------------------------
1 | Folder with architecture diagrams
2 | Use https://www.drawio.com/ (client) to open and edit it


--------------------------------------------------------------------------------
/documentation/enhancements/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/enhancements/.gitkeep


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D1 - Operator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D1 - Operator.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D2 - DSP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D2 - DSP.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D3 - Workbenches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D3 - Workbenches.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D4 - Dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D4 - Dashboard.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D5 - Distr Workloads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D5 - Distr Workloads.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D6a - Model Serving.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D6a - Model Serving.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D6b - Model Serving.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D6b - Model Serving.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D6c - Model Serving.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D6c - Model Serving.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D7 - Trusty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D7 - Trusty.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture - D9 - Feature Store.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture - D9 - Feature Store.png


--------------------------------------------------------------------------------
/documentation/images/RHOAI Architecture-Overview.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHOAI Architecture-Overview.drawio.png


--------------------------------------------------------------------------------
/documentation/images/RHODS Architecture - Network Diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/RHODS Architecture - Network Diagram.png


--------------------------------------------------------------------------------
/documentation/images/network/Dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/Dashboard.png


--------------------------------------------------------------------------------
/documentation/images/network/DataScienePipelines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/DataScienePipelines.png


--------------------------------------------------------------------------------
/documentation/images/network/DistributedWorkloads_KubeFlow_Training_Operator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/DistributedWorkloads_KubeFlow_Training_Operator.png


--------------------------------------------------------------------------------
/documentation/images/network/DistributedWorkloads_KubeRay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/DistributedWorkloads_KubeRay.png


--------------------------------------------------------------------------------
/documentation/images/network/ModelRegistry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/ModelRegistry.png


--------------------------------------------------------------------------------
/documentation/images/network/ModelServing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/ModelServing.png


--------------------------------------------------------------------------------
/documentation/images/network/TrustyAI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/TrustyAI.png


--------------------------------------------------------------------------------
/documentation/images/network/Workbenches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatahub-io/architecture-decision-records/8063037c513ce9d51bc9f6f2d4a637a46ab536b9/documentation/images/network/Workbenches.png


--------------------------------------------------------------------------------